From 0c0a8dd68ac99181d3353fd7d5b2e79ea4ea1d59 Mon Sep 17 00:00:00 2001 From: ScreenTinker Date: Tue, 23 Jun 2026 22:49:01 -0500 Subject: [PATCH] fix(ota): surface stuck OTA on dashboard + read APK signer correctly on API 28/29 (#139) Follow-up to the cache/backoff loop fix (aa23cf0): make a device that can't self-install visible to operators, and fix the signature-verify bug that kept the whole #139 fix from engaging on the actual Fire OS target. Dashboard surface (Phase 2): - devices gains ota_status / ota_target_version / ota_attempts / ota_updated_at via the idempotent ALTER TABLE ADD COLUMN migration (non-destructive, default-backfilled, idempotent on re-run). - The device reports ota_status (OtaThrottle.statusFor -> none | pending | manual_update_required) in device_info; the server persists it on register (the reconnect backstop). devices d.* already surfaces it to the dashboard. - Dashboard shows a non-blocking amber badge when manual_update_required ("Update available (vX) - install failed N times, manual update required"); i18n key in en.js (non-en inherits via the en fallback). Server suite +1 test. Event-driven status (Option B): - New device:ota-status WS message, emitted on STATE TRANSITIONS only (enter-backoff -> manual_update_required, clear -> none), so the badge updates promptly without waiting for a reconnect and without per-poll/heartbeat chatter. Server handler persists the same fields; an unknown/forged device_id is a safe no-op. The register-path persist stays as the reconnect backstop. Signature-verify fix (the critical piece): verifyApkSignature read the downloaded APK's signer via getPackageArchiveInfo(GET_SIGNING_CERTIFICATES).signingInfo, but that field is null for ARCHIVE files on API 28/29 (populated only from API 30). On Fire OS 8 (Android 9 / API 28) - the actual deployment target - this returned 0 certs from a correctly-signed APK, so every OTA was refused as "tampered," the cache was deleted, and the full APK re-downloaded every check cycle. This was the real cause of the #139 re-download loop, NOT a silent-install failure: the cache and backoff added in this branch sit behind this verify gate and never engaged on the target. Fix: below API 30, read the archive's signer via the legacy GET_SIGNATURES + .signatures (its v1/JAR cert, which IS populated on 28/29). Keep GET_SIGNING_CERTIFICATES + signingInfo for API >= 30 and for the installed-app read (which works on 28+). The archive's signer is still extracted and compared to the installed app's signer; a mismatch or zero-cert APK is still rejected. This reads the cert correctly on old APIs - it does not weaken verification. Verified on emulators: - API 28: verify now passes for a legit APK (was: 0 certs, refused). Full backoff then engages - 8.5MB pulled once, cache-hit on retries, backoff after 3, manual_update_required emitted once; clears on successful update. - API 28 negative: a re-signed (different-key) APK is still refused on cert MISMATCH - no hole opened. - API 30: unchanged path still passes (no regression). - server suite 173/173, OtaThrottleTest 7/7. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../com/remotedisplay/player/MainActivity.kt | 3 ++ .../player/service/OtaThrottle.kt | 14 ++++++ .../player/service/UpdateChecker.kt | 46 +++++++++++++++---- .../player/service/WebSocketService.kt | 16 +++++++ .../player/telemetry/DeviceInfo.kt | 9 ++++ .../player/service/OtaThrottleTest.kt | 15 ++++++ frontend/js/i18n/en.js | 2 + frontend/js/views/dashboard.js | 3 ++ server/db/database.js | 9 ++++ server/test/api.test.js | 26 +++++++++++ server/ws/deviceSocket.js | 22 ++++++++- 11 files changed, 155 insertions(+), 10 deletions(-) diff --git a/android/app/src/main/java/com/remotedisplay/player/MainActivity.kt b/android/app/src/main/java/com/remotedisplay/player/MainActivity.kt index f4cf854..8922f12 100644 --- a/android/app/src/main/java/com/remotedisplay/player/MainActivity.kt +++ b/android/app/src/main/java/com/remotedisplay/player/MainActivity.kt @@ -243,6 +243,9 @@ class MainActivity : AppCompatActivity() { // #139: surface OTA status (applying / backing off / manual-update-required) to the // dashboard. wsService is read lazily — it binds after this runs. updateChecker.otaLogReporter = { level, msg -> wsService?.sendLog("ota", level, msg) } + // #139 Phase 2 (Option B): announce OTA status transitions (clear / enter-backoff) so the + // dashboard badge clears/lights up promptly without waiting for a reconnect. + updateChecker.otaStatusReporter = { wsService?.sendOtaStatus() } updateChecker.startPeriodicCheck() } diff --git a/android/app/src/main/java/com/remotedisplay/player/service/OtaThrottle.kt b/android/app/src/main/java/com/remotedisplay/player/service/OtaThrottle.kt index 7db5bf7..bfa0e0e 100644 --- a/android/app/src/main/java/com/remotedisplay/player/service/OtaThrottle.kt +++ b/android/app/src/main/java/com/remotedisplay/player/service/OtaThrottle.kt @@ -57,4 +57,18 @@ object OtaThrottle { /** A check found us already on the latest. True if there was pending OTA state to clear. */ fun shouldClearOnUpToDate(state: State): Boolean = state.targetVersion.isNotEmpty() + + /** + * #139 Phase 2: operator-facing status for the dashboard. + * - "none" : no update pending. + * - "manual_update_required" : capped AND still inside the backoff window — this device + * can't self-install; a human needs to update it. + * - "pending" : an update is in progress / will retry (under the cap, or the + * window has elapsed so a retry is due). + */ + fun statusFor(state: State, now: Long): String = when { + state.targetVersion.isEmpty() -> "none" + state.attempts >= MAX_INSTALL_ATTEMPTS && now - state.lastAttemptAt < BACKOFF_MS -> "manual_update_required" + else -> "pending" + } } diff --git a/android/app/src/main/java/com/remotedisplay/player/service/UpdateChecker.kt b/android/app/src/main/java/com/remotedisplay/player/service/UpdateChecker.kt index 77372ce..8efe400 100644 --- a/android/app/src/main/java/com/remotedisplay/player/service/UpdateChecker.kt +++ b/android/app/src/main/java/com/remotedisplay/player/service/UpdateChecker.kt @@ -50,6 +50,14 @@ class UpdateChecker(private val context: Context) { try { otaLogReporter?.invoke(level, message) } catch (_: Throwable) {} } + // #139 Phase 2 (Option B): announce an OTA status TRANSITION to the server (wired by + // MainActivity to WebSocketService.sendOtaStatus, which reads the just-persisted state). + // Fired ONLY at the two transitions — clear and enter-backoff — so the dashboard badge + // updates promptly without waiting for a reconnect, with no per-poll/heartbeat chatter. + // Lazy/null-safe so binding order doesn't matter, same as otaLogReporter. + var otaStatusReporter: (() -> Unit)? = null + private fun announceOtaStatus() { try { otaStatusReporter?.invoke() } catch (_: Throwable) {} } + // The PackageInstaller session reports its status (incl. STATUS_PENDING_USER_ACTION, // which Android 13+ returns for non-device-owner installers) via this broadcast. // Without handling it the committed session just stalls and the update never @@ -136,6 +144,7 @@ class UpdateChecker(private val context: Context) { report("info", "OTA complete: now on $currentVersion — clearing update state") config.clearOtaState() cleanupApks(null) + announceOtaStatus() // transition -> emits 'none' so the badge clears promptly } } else if (downloadUrl.isNotEmpty()) { maybeUpdate(latestVersion, "${config.serverUrl}$downloadUrl") @@ -183,6 +192,7 @@ class UpdateChecker(private val context: Context) { Log.i(TAG, "Install launched for $latestVersion (attempt ${afterLaunch.attempts}/${OtaThrottle.MAX_INSTALL_ATTEMPTS})") if (enteredBackoff) { report("warn", "Update $latestVersion available but not installing after ${afterLaunch.attempts} attempts — manual update required (backing off to one retry per ${OtaThrottle.BACKOFF_MS / 3_600_000L}h)") + announceOtaStatus() // transition -> emits 'manual_update_required' } } @@ -335,9 +345,18 @@ class UpdateChecker(private val context: Context) { private fun verifyApkSignature(apkFile: File): Boolean { return try { val pm = context.packageManager - val flags = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.P) + // #139: getPackageArchiveInfo(GET_SIGNING_CERTIFICATES).signingInfo is NULL for + // ARCHIVE files on API 28/29 (it's only populated from API 30) — so the modern flag + // reads 0 certs from a downloaded APK and we'd wrongly REFUSE a legitimate update, + // which is the real Fire OS 8 / Android 9 OTA-loop cause. Below API 30, read the + // archive's signer via the legacy GET_SIGNATURES + .signatures (its v1/JAR cert, + // which IS populated on 28/29). This reads the cert CORRECTLY — it does not weaken + // verification: the archive's signer is still extracted and compared to the installed + // app's signer below, and a mismatch / zero-cert APK is still rejected. + val archiveUsesSigningInfo = Build.VERSION.SDK_INT >= Build.VERSION_CODES.R // API 30 + val archiveFlags = if (archiveUsesSigningInfo) PackageManager.GET_SIGNING_CERTIFICATES else @Suppress("DEPRECATION") PackageManager.GET_SIGNATURES - val downloaded = pm.getPackageArchiveInfo(apkFile.absolutePath, flags) + val downloaded = pm.getPackageArchiveInfo(apkFile.absolutePath, archiveFlags) if (downloaded == null) { Log.e(TAG, "Could not parse downloaded APK") return false @@ -346,14 +365,20 @@ class UpdateChecker(private val context: Context) { Log.e(TAG, "APK package mismatch: ${downloaded.packageName} != ${context.packageName}") return false } - val installed = pm.getPackageInfo(context.packageName, flags) - val downloadedSigs = signingCertHashes(downloaded) - val installedSigs = signingCertHashes(installed) + // INSTALLED-app read: signingInfo IS populated for installed packages on API 28+, + // so keep the modern flag there (this side already worked). + val installedUsesSigningInfo = Build.VERSION.SDK_INT >= Build.VERSION_CODES.P // API 28 + val installedFlags = if (installedUsesSigningInfo) + PackageManager.GET_SIGNING_CERTIFICATES else @Suppress("DEPRECATION") PackageManager.GET_SIGNATURES + val installed = pm.getPackageInfo(context.packageName, installedFlags) + val downloadedSigs = signingCertHashes(downloaded, archiveUsesSigningInfo) + val installedSigs = signingCertHashes(installed, installedUsesSigningInfo) if (downloadedSigs.isEmpty() || installedSigs.isEmpty()) { Log.e(TAG, "Missing signing certificates (downloaded=${downloadedSigs.size}, installed=${installedSigs.size})") return false } - // Share at least one current signing certificate. + // Require a non-empty overlap of signer certs (handles multi-signer / cert-rotation + // the same way the API>=30 path does: compare the full current signer sets). val match = downloadedSigs.any { it in installedSigs } if (!match) Log.e(TAG, "APK signing certificate does not match installed app") match @@ -363,8 +388,13 @@ class UpdateChecker(private val context: Context) { } } - private fun signingCertHashes(info: PackageInfo): Set { - val sigs: Array? = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.P) { + // Read the signer-cert SHA-256 set from a PackageInfo. `useSigningInfo` must match the flag + // it was fetched with: GET_SIGNING_CERTIFICATES -> signingInfo.apkContentsSigners (modern; + // multi-signer + rotation aware), GET_SIGNATURES -> legacy .signatures (the only field + // populated for ARCHIVE reads on API 28/29). Both yield the same cert for a normally-signed + // APK; the caller compares as sets so an overlapping signer still verifies. + private fun signingCertHashes(info: PackageInfo, useSigningInfo: Boolean): Set { + val sigs: Array? = if (useSigningInfo) { info.signingInfo?.apkContentsSigners } else { @Suppress("DEPRECATION") info.signatures diff --git a/android/app/src/main/java/com/remotedisplay/player/service/WebSocketService.kt b/android/app/src/main/java/com/remotedisplay/player/service/WebSocketService.kt index 59a047c..68b6e23 100644 --- a/android/app/src/main/java/com/remotedisplay/player/service/WebSocketService.kt +++ b/android/app/src/main/java/com/remotedisplay/player/service/WebSocketService.kt @@ -560,6 +560,22 @@ class WebSocketService : Service() { } catch (e: Throwable) { Log.w("WebSocketService", "sendLog: ${e.message}") } } + // #139 Phase 2 (Option B): announce an OTA status transition to the server so the dashboard + // badge updates promptly (not only on reconnect). Reads the just-persisted throttle state — + // the emit always reflects the stored truth. Called by UpdateChecker at clear / enter-backoff. + fun sendOtaStatus() { + if (socket?.connected() != true) return + try { + val s = OtaThrottle.State(config.otaTargetVersion, config.otaAttempts, config.otaLastAttemptAt, config.otaBackoffReported) + socket?.emit("device:ota-status", JSONObject().apply { + put("device_id", config.deviceId) + put("ota_status", OtaThrottle.statusFor(s, System.currentTimeMillis())) + put("ota_target_version", config.otaTargetVersion) + put("ota_attempts", config.otaAttempts) + }) + } catch (e: Throwable) { Log.w("WebSocketService", "sendOtaStatus: ${e.message}") } + } + fun sendPlaybackState(contentId: String, positionSec: Float) { if (socket?.connected() != true) return try { diff --git a/android/app/src/main/java/com/remotedisplay/player/telemetry/DeviceInfo.kt b/android/app/src/main/java/com/remotedisplay/player/telemetry/DeviceInfo.kt index 7b93e22..6f0ac58 100644 --- a/android/app/src/main/java/com/remotedisplay/player/telemetry/DeviceInfo.kt +++ b/android/app/src/main/java/com/remotedisplay/player/telemetry/DeviceInfo.kt @@ -13,6 +13,8 @@ import android.os.SystemClock import android.provider.Settings import android.util.DisplayMetrics import android.view.WindowManager +import com.remotedisplay.player.data.ServerConfig +import com.remotedisplay.player.service.OtaThrottle import java.security.MessageDigest import org.json.JSONObject @@ -49,6 +51,13 @@ class DeviceInfo(private val context: Context) { put("screen_height", outH) put("render_width", renW) put("render_height", renH) + // #139 Phase 2: report OTA backoff state (alongside app_version) so the dashboard can + // flag screens stuck in manual-update-required. Read from the persisted throttle state. + val cfg = ServerConfig(context) + val ota = OtaThrottle.State(cfg.otaTargetVersion, cfg.otaAttempts, cfg.otaLastAttemptAt, cfg.otaBackoffReported) + put("ota_status", OtaThrottle.statusFor(ota, System.currentTimeMillis())) + put("ota_target_version", cfg.otaTargetVersion) + put("ota_attempts", cfg.otaAttempts) } } diff --git a/android/app/src/test/java/com/remotedisplay/player/service/OtaThrottleTest.kt b/android/app/src/test/java/com/remotedisplay/player/service/OtaThrottleTest.kt index 6dae03d..e853bf1 100644 --- a/android/app/src/test/java/com/remotedisplay/player/service/OtaThrottleTest.kt +++ b/android/app/src/test/java/com/remotedisplay/player/service/OtaThrottleTest.kt @@ -79,4 +79,19 @@ class OtaThrottleTest { assertTrue(OtaThrottle.shouldClearOnUpToDate(OtaThrottle.State(targetVersion = V, attempts = 2))) assertFalse(OtaThrottle.shouldClearOnUpToDate(OtaThrottle.State())) // nothing pending } + + @Test fun statusForReflectsBackoffWindow() { + val now = 10_000L + // no target → none + assertEquals("none", OtaThrottle.statusFor(OtaThrottle.State(), now)) + // under the cap → pending + assertEquals("pending", OtaThrottle.statusFor( + OtaThrottle.State(targetVersion = V, attempts = 1, lastAttemptAt = now), now)) + // capped AND inside the window → manual update required + assertEquals("manual_update_required", OtaThrottle.statusFor( + OtaThrottle.State(targetVersion = V, attempts = MAX, lastAttemptAt = now), now + WINDOW - 1)) + // capped but window elapsed (a retry is due) → pending, not stuck + assertEquals("pending", OtaThrottle.statusFor( + OtaThrottle.State(targetVersion = V, attempts = MAX, lastAttemptAt = now), now + WINDOW + 1)) + } } diff --git a/frontend/js/i18n/en.js b/frontend/js/i18n/en.js index ab0a6b2..91eeff6 100644 --- a/frontend/js/i18n/en.js +++ b/frontend/js/i18n/en.js @@ -6,6 +6,8 @@ export default { 'device.pl_item.orphan_zone_tip': "This item's zone isn't part of the device's current layout. It still plays (recovered into the largest zone), but reassign it to a zone in this layout.", 'dashboard.device_orphan_tip_one': "{n} item assigned to a zone that isn't in this device's layout — open the device to reassign", 'dashboard.device_orphan_tip_other': "{n} items assigned to a zone that isn't in this device's layout — open the device to reassign", + // #139: device stuck in OTA backoff (can't self-install — e.g. Fire TV) — needs a manual update. + 'dashboard.device_ota_stuck': 'Update available (v{version}) — install failed {n}×, manual update required', // Nav (sidebar) 'nav.displays': 'Displays', 'nav.content': 'Content', diff --git a/frontend/js/views/dashboard.js b/frontend/js/views/dashboard.js index 06a65b1..4725611 100644 --- a/frontend/js/views/dashboard.js +++ b/frontend/js/views/dashboard.js @@ -117,6 +117,9 @@ function renderDeviceCard(device) {
${esc(device.name)}${device.orphan_count > 0 ? ` ${device.orphan_count} + ` : ''}${device.ota_status === 'manual_update_required' ? ` + + update ` : ''}
${device.owner_name || device.owner_email ? `
diff --git a/server/db/database.js b/server/db/database.js index e7d9f07..79b9941 100644 --- a/server/db/database.js +++ b/server/db/database.js @@ -216,6 +216,15 @@ const migrations = [ // signal, so the two differ — surfacing both explains "reports 720 but monitor sees 1080". "ALTER TABLE devices ADD COLUMN render_width INTEGER", "ALTER TABLE devices ADD COLUMN render_height INTEGER", + // #139 Phase 2: device-reported OTA backoff status, so the dashboard can flag screens that + // can't self-install (Fire TV: no device-owner path) and need a hands-on update. ADD COLUMN + // with defaults is non-destructive in SQLite, and the apply loop below swallows "duplicate + // column" — so this is idempotent and upgrades an existing populated db without data loss. + // ota_updated_at = server receipt time (s), stamped on each register persist. + "ALTER TABLE devices ADD COLUMN ota_status TEXT DEFAULT 'none'", + "ALTER TABLE devices ADD COLUMN ota_target_version TEXT", + "ALTER TABLE devices ADD COLUMN ota_attempts INTEGER DEFAULT 0", + "ALTER TABLE devices ADD COLUMN ota_updated_at INTEGER", ]; // Apply each ALTER idempotently. A "duplicate column name" / "already exists" // error means the column is already present (expected on a migrated DB) - benign. diff --git a/server/test/api.test.js b/server/test/api.test.js index 13d3291..e31d460 100644 --- a/server/test/api.test.js +++ b/server/test/api.test.js @@ -259,6 +259,32 @@ test('device WS: wrong device_token is rejected (auth-error, never registered)', assert.ok(!got.registered, 'wrong token must not register'); }); +// #139 Phase 2 (Option B): event-driven OTA status. Registers (which, with no ota fields in +// device_info, persists ota_status='none' via the backstop), then emits a valid ota-status and +// a foreign-id one in order on the authenticated socket. +function deviceOtaSeq(payload, otaEvents, timeoutMs = 4000) { + return new Promise((resolve) => { + const sock = ioClient(`${BASE}/device`, { transports: ['websocket'], reconnection: false, forceNew: true }); + const finish = () => { try { sock.close(); } catch { /* */ } resolve(); }; + sock.on('connect', () => sock.emit('device:register', payload)); + sock.on('device:registered', () => { for (const e of otaEvents) sock.emit('device:ota-status', e); setTimeout(finish, 500); }); + sock.on('device:auth-error', finish); + setTimeout(finish, timeoutMs); + }); +} +test('device WS: device:ota-status persists the fields; a foreign device_id is a safe no-op (#139)', async () => { + await deviceOtaSeq( + { device_id: S.deviceId, device_token: S.deviceToken, device_info: { app_version: 'test' } }, + [ + { device_id: S.deviceId, ota_status: 'manual_update_required', ota_target_version: '1.9.1-beta6', ota_attempts: 3 }, + { device_id: 'nope-not-a-device', ota_status: 'none', ota_target_version: null, ota_attempts: 0 }, // foreign id -> no-op, no throw + ]); + const dev = await jfetch(`/api/devices/${S.deviceId}`, auth(S.jwt)); + assert.equal(dev.body.ota_status, 'manual_update_required', 'valid ota-status persisted'); + assert.equal(dev.body.ota_target_version, '1.9.1-beta6'); + assert.equal(dev.body.ota_attempts, 3, 'and the foreign-id event did not overwrite it'); +}); + // ───────────────────────── TIER 4: #92 FOLLOW-UP COVERAGE ───────────────────────── // The non-security gaps named in the self-review (issue #92): the gap-fix fields + the // cross-tenant guard (the security-relevant one), docs serving, and the token lifecycle diff --git a/server/ws/deviceSocket.js b/server/ws/deviceSocket.js index da9dc35..b15c038 100644 --- a/server/ws/deviceSocket.js +++ b/server/ws/deviceSocket.js @@ -372,8 +372,12 @@ module.exports = function setupDeviceSocket(io) { } if (device_info) { - db.prepare('UPDATE devices SET android_version = ?, app_version = ?, screen_width = ?, screen_height = ?, render_width = ?, render_height = ? WHERE id = ?') - .run(device_info.android_version, device_info.app_version, device_info.screen_width, device_info.screen_height, device_info.render_width ?? null, device_info.render_height ?? null, device_id); + db.prepare(`UPDATE devices SET android_version = ?, app_version = ?, screen_width = ?, screen_height = ?, render_width = ?, render_height = ?, + ota_status = ?, ota_target_version = ?, ota_attempts = ?, ota_updated_at = strftime('%s','now') WHERE id = ?`) + .run(device_info.android_version, device_info.app_version, device_info.screen_width, device_info.screen_height, device_info.render_width ?? null, device_info.render_height ?? null, + // #139 Phase 2: older APKs don't send these — default to a clean 'none' state. + device_info.ota_status ?? 'none', device_info.ota_target_version ?? null, device_info.ota_attempts ?? 0, + device_id); } heartbeat.registerConnection(device_id, socket.id); @@ -585,6 +589,20 @@ module.exports = function setupDeviceSocket(io) { }); }); + // #139 Phase 2 (Option B): event-driven OTA status. The device announces a status TRANSITION + // ('manual_update_required' on enter-backoff, 'none' on clear) so the dashboard badge updates + // promptly without waiting for a reconnect. The register path still persists these fields too + // (the reconnect backstop if a transition event is missed). Same columns + ?? defaults. + socket.on('device:ota-status', (data) => { + if (!requireDeviceAuth()) return; + const { device_id, ota_status, ota_target_version, ota_attempts } = data || {}; + // Unknown / forged / mismatched id -> no-op. WHERE id = ? also makes an unregistered id a + // 0-row update (never throws), so a stray event can't error the socket. + if (!device_id || device_id !== currentDeviceId) return; + db.prepare("UPDATE devices SET ota_status = ?, ota_target_version = ?, ota_attempts = ?, ota_updated_at = strftime('%s','now') WHERE id = ?") + .run(ota_status ?? 'none', ota_target_version ?? null, ota_attempts ?? 0, device_id); + }); + // Play event logging (proof-of-play) socket.on('device:play-event', (data) => { if (!requireDeviceAuth()) return;