mirror of
https://github.com/screentinker/screentinker.git
synced 2026-06-29 09:23:16 -06:00
fix(#144): OTA update-check circuit-breaker + phantom guard + per-device keying
/api/update/check offered the update whenever client !== latest (raw string inequality, not semver) with no backoff. A device that can't APPLY the update (broken OTA client 1.7.12, signing/Fire OS) keeps reporting the same version and is told update_available=true on every poll; a fast poll loop saturates the event loop (prod loop-lag 49s). All requests share one NAT IP, so IP-keying is useless. server-only breaker (lib/ota-breaker.js), two independent axes: - RATE breaker (primary, immediate): a key checking >THRESHOLD (3) times within WINDOW (60s) is looping -> throttle update_available with exponential backoff (30s->2m->8m->cap 30m). Healthy devices poll ~12 min and never approach this, so rollout/stragglers are inherently safe -- NO grace-for-flood timer; slow == safe. - PHANTOM guard (immediate): unrecognized version, or a prerelease of an OLDER core (superseded old-minor beta e.g. 1.9.1-beta4), gets no-offer on the first check. A RECENT real older version (beta3 vs latest beta4; stable 1.7.12) stays offerable. - Never offers a downgrade (client >= latest -> no offer). KEYING (#144 option 3): keyed on device_id when present, else reported version. - server.js:581 accepts + logs ?device_id=, passes it to the breaker. - UpdateChecker.kt:122 appends &device_id=<config.deviceId> (existing registered id; omitted until provisioned). One-line client change. beta4+ clients get precise per-device throttling; stuck legacy clients sending only ?version= are caught by the version-keyed + rate + phantom logic. Response gains additive `reason` + `retry_after_seconds` (old clients ignore). BOUNDED STATE: a periodic sweep (startSweep, wired in server.js) evicts buckets idle > IDLE_RESET_MS so the keyed Map can't grow unbounded (churned device_ids); not reset-on-access only. SCOPE (deliberate): this targets the FAST flood + phantoms. The slow #144 drip (stable 1.7.12 polling ~every 12 min, ~20/hr) stays below >3/60s and is NOT throttled -- catching it needs #144 option-3 "skip-this-version after N cycles", which is intentionally NOT in this build. NOTE: carries a CLIENT/APK change -> versionCode must increment at the beta4 bump and the release keystore is required for the APK. The device_id path only helps devices that can install beta4+; the stuck legacy fleet is covered by the version-keyed path. Tests: unit (lib/ota-breaker, injected time) a-f + comparator + escalation + sweep + slow-drip-scope; HTTP integration (real endpoint, device_id passthrough). Full suite green serial AND parallel (234). OTA-only delta -- reconnect/reclaim/shed/content-ack/ block untouched. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6642841cd8
commit
289d6b6f95
|
|
@ -119,7 +119,11 @@ class UpdateChecker(private val context: Context) {
|
||||||
Thread {
|
Thread {
|
||||||
try {
|
try {
|
||||||
val currentVersion = getAppVersion()
|
val currentVersion = getAppVersion()
|
||||||
val url = "${config.serverUrl}/api/update/check?version=$currentVersion"
|
// #144: send our stable registered device_id so the server OTA breaker can throttle
|
||||||
|
// per-device (not per-NAT-IP). Reuses the same id we register/socket with; omitted
|
||||||
|
// until provisioned (server then falls back to version-keyed).
|
||||||
|
val deviceParam = if (config.deviceId.isNotEmpty()) "&device_id=${config.deviceId}" else ""
|
||||||
|
val url = "${config.serverUrl}/api/update/check?version=$currentVersion$deviceParam"
|
||||||
Log.i(TAG, "Checking for updates: $url")
|
Log.i(TAG, "Checking for updates: $url")
|
||||||
|
|
||||||
val request = Request.Builder().url(url).build()
|
val request = Request.Builder().url(url).build()
|
||||||
|
|
|
||||||
118
server/lib/ota-breaker.js
Normal file
118
server/lib/ota-breaker.js
Normal file
|
|
@ -0,0 +1,118 @@
|
||||||
|
// #144 — OTA update-check circuit-breaker + phantom-version guard.
|
||||||
|
//
|
||||||
|
// The /api/update/check handler offered the update whenever client !== latest (raw
|
||||||
|
// string inequality, not semver). A device that can't APPLY the update (old/broken
|
||||||
|
// OTA client like 1.7.12, signing mismatch, Fire OS) keeps reporting the same old
|
||||||
|
// version and is told update_available=true on every poll. A fast poll loop (10-30s)
|
||||||
|
// then saturates the event loop (prod loop-lag 49s).
|
||||||
|
//
|
||||||
|
// Two independent axes (kept separate on purpose):
|
||||||
|
//
|
||||||
|
// 1. RATE breaker (primary, immediate). Healthy devices poll ~every 12 min, so a key
|
||||||
|
// checking MORE than THRESHOLD times within WINDOW (default >3 / 60s) is by
|
||||||
|
// definition looping -> throttle update_available for that key with exponential
|
||||||
|
// backoff. Catches the fast flood within seconds. A normally-polling device never
|
||||||
|
// approaches this rate, so rollout/straggler updates are inherently safe — there
|
||||||
|
// is deliberately NO "tolerate the flood for N minutes" grace; slow == safe.
|
||||||
|
//
|
||||||
|
// 2. PHANTOM guard (immediate). An unrecognized version, or a prerelease of an OLDER
|
||||||
|
// core (a superseded old-minor beta — e.g. 1.9.1-beta4 when latest is 1.9.2-beta3),
|
||||||
|
// gets "no offer" on the first check. A RECENT real older version (e.g. beta3 when
|
||||||
|
// latest is beta4, or stable 1.7.12) is legitimately offerable and is NOT phantom.
|
||||||
|
//
|
||||||
|
// KEYING: keyed on device_id when the client sends one (beta4+ clients -> precise
|
||||||
|
// per-device throttling), falling back to the reported VERSION when absent (legacy
|
||||||
|
// clients send only ?version=, and the site is behind NAT so IP is useless). So every
|
||||||
|
// device is covered: new clients per-device, stuck legacy clients per-version.
|
||||||
|
//
|
||||||
|
// Constants are env-tunable for ops + tests.
|
||||||
|
|
||||||
|
const WINDOW_MS = parseInt(process.env.OTA_BREAKER_WINDOW_MS) || 60_000; // rate window
|
||||||
|
const THRESHOLD = parseInt(process.env.OTA_BREAKER_THRESHOLD) || 3; // checks/window before tripping (>THRESHOLD trips)
|
||||||
|
const COOLDOWNS_MS = (process.env.OTA_BREAKER_COOLDOWNS_MS
|
||||||
|
? process.env.OTA_BREAKER_COOLDOWNS_MS.split(',').map(s => parseInt(s, 10))
|
||||||
|
: [30_000, 120_000, 480_000, 1_800_000]); // 30s -> 2m -> 8m -> cap 30m
|
||||||
|
const IDLE_RESET_MS = parseInt(process.env.OTA_BREAKER_IDLE_RESET_MS) || 60 * 60 * 1000;
|
||||||
|
|
||||||
|
const state = new Map(); // key -> { hits:number[], blockedUntil, level, lastSeen }
|
||||||
|
const loggedBad = new Set(); // log unrecognized/superseded versions once
|
||||||
|
|
||||||
|
// --- minimal semver-ish parse/compare (no dependency) ---
|
||||||
|
function parseVer(v) {
|
||||||
|
if (typeof v !== 'string') return null;
|
||||||
|
const m = /^(\d+)\.(\d+)\.(\d+)(?:-(.+))?$/.exec(v.trim());
|
||||||
|
if (!m) return null;
|
||||||
|
return { core: [+m[1], +m[2], +m[3]], pre: m[4] || null };
|
||||||
|
}
|
||||||
|
function coreCmp(a, b) { for (let i = 0; i < 3; i++) if (a.core[i] !== b.core[i]) return a.core[i] < b.core[i] ? -1 : 1; return 0; }
|
||||||
|
function cmpParsed(a, b) {
|
||||||
|
const c = coreCmp(a, b);
|
||||||
|
if (c !== 0) return c;
|
||||||
|
if (a.pre === b.pre) return 0;
|
||||||
|
if (a.pre === null) return 1; // release outranks a prerelease of the same core
|
||||||
|
if (b.pre === null) return -1;
|
||||||
|
// lexical prerelease compare — fine for beta1..beta9 (cores decide everything else).
|
||||||
|
return a.pre < b.pre ? -1 : (a.pre > b.pre ? 1 : 0);
|
||||||
|
}
|
||||||
|
function cmp(a, b) { const pa = parseVer(a), pb = parseVer(b); return (!pa || !pb) ? null : cmpParsed(pa, pb); }
|
||||||
|
|
||||||
|
// decide(clientVersion, latestVersion, deviceId?, now?) ->
|
||||||
|
// { update_available, reason, retry_after_seconds?, log? }
|
||||||
|
function decide(clientVersion, latestVersion, deviceId = null, now = Date.now()) {
|
||||||
|
// ---- PHANTOM / unrecognized guard (immediate, version-based, no rate state) ----
|
||||||
|
if (!clientVersion) return { update_available: false, reason: 'no-version' };
|
||||||
|
const pc = parseVer(clientVersion), pl = parseVer(latestVersion);
|
||||||
|
if (!pc || !pl) return { update_available: false, reason: 'unrecognized-version', log: logOnce(clientVersion, `[ota] unrecognized client version '${clientVersion}' — no offer (latest=${latestVersion})`) };
|
||||||
|
const full = cmpParsed(pc, pl);
|
||||||
|
if (full === 0) return { update_available: false, reason: 'up-to-date' };
|
||||||
|
if (full > 0) return { update_available: false, reason: 'client-newer' }; // never offer a downgrade
|
||||||
|
if (pc.pre !== null && coreCmp(pc, pl) < 0) { // superseded old-core prerelease (e.g. 1.9.1-beta4)
|
||||||
|
return { update_available: false, reason: 'superseded-prerelease', log: logOnce(clientVersion, `[ota] superseded prerelease '${clientVersion}' (older core than latest=${latestVersion}) — no offer`) };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- offerable (recent real older version) -> RATE breaker, keyed per device / per version ----
|
||||||
|
const key = deviceId ? 'd:' + deviceId : 'v:' + clientVersion;
|
||||||
|
let b = state.get(key);
|
||||||
|
if (!b) { b = { hits: [], blockedUntil: 0, level: 0, lastSeen: now }; state.set(key, b); }
|
||||||
|
if (now - b.lastSeen > IDLE_RESET_MS) { b.hits = []; b.blockedUntil = 0; b.level = 0; } // long-quiet -> fresh
|
||||||
|
b.lastSeen = now;
|
||||||
|
|
||||||
|
if (now < b.blockedUntil) {
|
||||||
|
return { update_available: false, reason: 'rate-backoff', retry_after_seconds: Math.ceil((b.blockedUntil - now) / 1000) };
|
||||||
|
}
|
||||||
|
if (b.blockedUntil !== 0) b.blockedUntil = 0; // cooldown elapsed -> probe window
|
||||||
|
|
||||||
|
b.hits = b.hits.filter(t => now - t < WINDOW_MS);
|
||||||
|
b.hits.push(now);
|
||||||
|
if (b.hits.length > THRESHOLD) { // looping faster than a healthy device ever would
|
||||||
|
const cd = COOLDOWNS_MS[Math.min(b.level, COOLDOWNS_MS.length - 1)];
|
||||||
|
b.blockedUntil = now + cd;
|
||||||
|
b.level++;
|
||||||
|
b.hits = []; // require a fresh burst to re-trip after cooldown
|
||||||
|
return { update_available: false, reason: 'rate-backoff', retry_after_seconds: Math.ceil(cd / 1000),
|
||||||
|
log: `[ota] breaker tripped key=${key} (>${THRESHOLD} checks/${Math.round(WINDOW_MS / 1000)}s, looping) -> backoff ${Math.round(cd / 1000)}s [level ${b.level}]` };
|
||||||
|
}
|
||||||
|
return { update_available: true, reason: 'offer' };
|
||||||
|
}
|
||||||
|
|
||||||
|
function logOnce(version, msg) { if (loggedBad.has(version)) return undefined; loggedBad.add(version); return msg; }
|
||||||
|
|
||||||
|
// #144: actively EVICT idle buckets so the keyed state can't grow unbounded over time
|
||||||
|
// (churned device_ids, varied versions). reset-on-access alone never deletes; this does.
|
||||||
|
function sweep(now = Date.now()) {
|
||||||
|
let n = 0;
|
||||||
|
for (const [k, b] of state) if (now - b.lastSeen > IDLE_RESET_MS) { state.delete(k); n++; }
|
||||||
|
if (n > 0) console.log(`[ota] breaker swept ${n} idle bucket(s) (idle > ${Math.round(IDLE_RESET_MS / 60000)}m); ${state.size} remain`);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
let sweepTimer = null;
|
||||||
|
function startSweep() {
|
||||||
|
if (sweepTimer) return sweepTimer;
|
||||||
|
sweepTimer = setInterval(() => sweep(), IDLE_RESET_MS);
|
||||||
|
if (sweepTimer.unref) sweepTimer.unref(); // don't keep the process alive on this timer
|
||||||
|
return sweepTimer;
|
||||||
|
}
|
||||||
|
|
||||||
|
function reset() { state.clear(); loggedBad.clear(); }
|
||||||
|
function _size() { return state.size; }
|
||||||
|
module.exports = { decide, reset, sweep, startSweep, cmp, parseVer, _size, WINDOW_MS, THRESHOLD };
|
||||||
|
|
@ -578,28 +578,36 @@ app.use('/api/status', require('./routes/status'));
|
||||||
// route block) - leaving this comment here as a breadcrumb for the move.
|
// route block) - leaving this comment here as a breadcrumb for the move.
|
||||||
|
|
||||||
// APK version check endpoint (public, used by devices to check for updates)
|
// APK version check endpoint (public, used by devices to check for updates)
|
||||||
|
const otaBreaker = require('./lib/ota-breaker');
|
||||||
|
otaBreaker.startSweep(); // #144: periodically evict idle breaker buckets so keyed state stays bounded
|
||||||
app.get('/api/update/check', (req, res) => {
|
app.get('/api/update/check', (req, res) => {
|
||||||
const currentVersion = req.query.version;
|
const currentVersion = req.query.version;
|
||||||
const apkPath = resolveApkPath();
|
const deviceId = req.query.device_id || null; // #144: optional; beta4+ clients send it for per-device keying
|
||||||
const apkExists = apkPath !== null;
|
|
||||||
const apkSize = apkExists ? fs.statSync(apkPath).size : 0;
|
|
||||||
const apkModified = apkExists ? fs.statSync(apkPath).mtimeMs : 0;
|
|
||||||
|
|
||||||
const latestVersion = VERSION;
|
const latestVersion = VERSION;
|
||||||
const updateAvailable = currentVersion && currentVersion !== latestVersion;
|
|
||||||
|
|
||||||
// #96: log every version check so the OTA is observable - which devices check in, their
|
// #144: circuit-breaker + phantom-version guard (replaces the old string-inequality
|
||||||
// version, and whether they'll update. This diagnosability gap is part of why the 1.9.0
|
// offer). Keys per device_id when present, else per reported version. Rate-trips a
|
||||||
// relaunch failure went unseen.
|
// looping client in seconds; never offers a downgrade or a superseded/garbage version.
|
||||||
console.log(`[ota] update check from ${getClientIp(req)}: client=${currentVersion || 'unknown'} latest=${latestVersion} update_available=${!!updateAvailable} apk=${apkExists ? 'present' : 'MISSING'}`);
|
const verdict = otaBreaker.decide(currentVersion, latestVersion, deviceId);
|
||||||
|
const apkPath = resolveApkPath(); // existsSync x2 (cheap)
|
||||||
|
const apkExists = apkPath !== null;
|
||||||
|
const updateAvailable = !!verdict.update_available && apkExists; // never offer if APK missing
|
||||||
|
const apkSize = updateAvailable ? fs.statSync(apkPath).size : 0; // statSync only when actually offering (don't stat on every looped poll)
|
||||||
|
const apkModified = updateAvailable ? fs.statSync(apkPath).mtimeMs : 0;
|
||||||
|
|
||||||
|
if (verdict.log) console.log(verdict.log); // once-per-event (trip / unrecognized)
|
||||||
|
// #96: keep the per-check line observable; now also shows the breaker reason + device_id.
|
||||||
|
console.log(`[ota] update check from ${getClientIp(req)}: device=${deviceId || 'none'} client=${currentVersion || 'unknown'} latest=${latestVersion} update_available=${updateAvailable} reason=${verdict.reason} apk=${apkExists ? 'present' : 'MISSING'}`);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
latest_version: latestVersion,
|
latest_version: latestVersion,
|
||||||
current_version: currentVersion || 'unknown',
|
current_version: currentVersion || 'unknown',
|
||||||
update_available: updateAvailable,
|
update_available: updateAvailable,
|
||||||
|
reason: verdict.reason, // #144: breaker decision, for observability (additive; old clients ignore)
|
||||||
download_url: '/download/apk',
|
download_url: '/download/apk',
|
||||||
apk_size: apkSize,
|
apk_size: apkSize,
|
||||||
apk_modified: apkModified,
|
apk_modified: apkModified,
|
||||||
|
...(verdict.retry_after_seconds ? { retry_after_seconds: verdict.retry_after_seconds } : {}),
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
113
server/test/ota-breaker.test.js
Normal file
113
server/test/ota-breaker.test.js
Normal file
|
|
@ -0,0 +1,113 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
// #144 — OTA-check circuit-breaker + phantom guard. Deterministic unit tests with
|
||||||
|
// injected `now` (no waiting), covering the required cases (a)-(f). No DB/socket;
|
||||||
|
// the breaker module is pure + in-memory.
|
||||||
|
|
||||||
|
const { test, beforeEach } = require('node:test');
|
||||||
|
const assert = require('node:assert/strict');
|
||||||
|
const ota = require('../lib/ota-breaker');
|
||||||
|
|
||||||
|
const LATEST = '1.9.2-beta4'; // simulate the beta4 server
|
||||||
|
const T0 = 1_000_000;
|
||||||
|
beforeEach(() => ota.reset());
|
||||||
|
|
||||||
|
test('semver comparator: real-older < latest, same-core beta order, equal/newer', () => {
|
||||||
|
assert.equal(ota.cmp('1.7.12', LATEST) < 0, true, '1.7.12 older');
|
||||||
|
assert.equal(ota.cmp('1.9.2-beta3', LATEST) < 0, true, 'beta3 < beta4 (same core)');
|
||||||
|
assert.equal(ota.cmp('1.9.2-beta4', LATEST), 0, 'equal');
|
||||||
|
assert.equal(ota.cmp('1.9.3', LATEST) > 0, true, 'newer core');
|
||||||
|
assert.equal(ota.cmp('banana', LATEST), null, 'garbage unparseable');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('(a) PHANTOM/unrecognized -> instant no-offer, no grace, no rate state', () => {
|
||||||
|
// superseded old-core prerelease (strobe's 1.9.1-beta4) — caught on the FIRST check
|
||||||
|
let v = ota.decide('1.9.1-beta4', LATEST, null, T0);
|
||||||
|
assert.equal(v.update_available, false);
|
||||||
|
assert.equal(v.reason, 'superseded-prerelease');
|
||||||
|
// garbage string
|
||||||
|
v = ota.decide('banana', LATEST, null, T0);
|
||||||
|
assert.equal(v.update_available, false);
|
||||||
|
assert.equal(v.reason, 'unrecognized-version');
|
||||||
|
// never offer a downgrade
|
||||||
|
assert.equal(ota.decide('1.9.3', LATEST, null, T0).update_available, false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('(b) fast loop (every 15s) trips within ~3 checks / ~45s, NOT minutes', () => {
|
||||||
|
const r = (dt) => ota.decide('1.7.12', LATEST, null, T0 + dt);
|
||||||
|
assert.equal(r(0).update_available, true, 'check1 offered');
|
||||||
|
assert.equal(r(15_000).update_available, true, 'check2 offered');
|
||||||
|
assert.equal(r(30_000).update_available, true, 'check3 offered');
|
||||||
|
const trip = r(45_000);
|
||||||
|
assert.equal(trip.update_available, false, 'check4 (~45s) trips');
|
||||||
|
assert.equal(trip.reason, 'rate-backoff');
|
||||||
|
assert.ok(trip.retry_after_seconds >= 1, 'backoff has a retry hint');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('(c) healthy straggler on beta3, polling every 12 min, is ALWAYS offered beta4 (rollout NOT throttled)', () => {
|
||||||
|
for (let i = 0; i < 6; i++) {
|
||||||
|
const v = ota.decide('1.9.2-beta3', LATEST, null, T0 + i * 12 * 60_000);
|
||||||
|
assert.equal(v.update_available, true, `12-min poll #${i + 1} still offered`);
|
||||||
|
assert.equal(v.reason, 'offer');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('(d) a device that APPLIES the update (version advances) is never throttled', () => {
|
||||||
|
// it was looping/being offered on the old version...
|
||||||
|
ota.decide('1.7.12', LATEST, 'devX', T0);
|
||||||
|
ota.decide('1.7.12', LATEST, 'devX', T0 + 1000);
|
||||||
|
// ...then it applies -> now reports latest
|
||||||
|
const v = ota.decide(LATEST, LATEST, 'devX', T0 + 2000);
|
||||||
|
assert.equal(v.update_available, false);
|
||||||
|
assert.equal(v.reason, 'up-to-date'); // up-to-date, NOT rate-backoff
|
||||||
|
});
|
||||||
|
|
||||||
|
test('(e) device_id looping is throttled PER-DEVICE; another device on the same version is unaffected', () => {
|
||||||
|
const loopA = (dt) => ota.decide('1.7.12', LATEST, 'A', T0 + dt);
|
||||||
|
loopA(0); loopA(15_000); loopA(30_000);
|
||||||
|
assert.equal(loopA(45_000).update_available, false, 'device A trips');
|
||||||
|
// device B, same version, checking normally -> its own key, still offered
|
||||||
|
assert.equal(ota.decide('1.7.12', LATEST, 'B', T0 + 46_000).update_available, true, 'device B unaffected');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('(f) legacy client without device_id is caught by the version-keyed path (and lumps per version)', () => {
|
||||||
|
// two legacy devices, no device_id, same version -> share the v:1.7.12 bucket
|
||||||
|
const v = (dt) => ota.decide('1.7.12', LATEST, null, T0 + dt);
|
||||||
|
assert.equal(v(0).update_available, true);
|
||||||
|
assert.equal(v(10_000).update_available, true);
|
||||||
|
assert.equal(v(20_000).update_available, true);
|
||||||
|
assert.equal(v(30_000).update_available, false, 'combined version-keyed rate trips without any device_id');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('(scope) slow #144 drip: stable 1.7.12 polling ~every 12 min is NEVER throttled (fast-flood only)', () => {
|
||||||
|
// documents the deliberate scope: this build catches the fast flood + phantoms, NOT the
|
||||||
|
// slow 1.7.12 drip (that needs #144 option-3 skip-after-N, not included here).
|
||||||
|
for (let i = 0; i < 10; i++) {
|
||||||
|
const v = ota.decide('1.7.12', LATEST, null, T0 + i * 12 * 60_000);
|
||||||
|
assert.equal(v.update_available, true, `12-min drip poll #${i + 1} still offered (not throttled)`);
|
||||||
|
assert.equal(v.reason, 'offer');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('state Map is bounded: sweep() evicts idle buckets, keeps recent', () => {
|
||||||
|
ota.decide('1.7.12', LATEST, 'old', T0); // bucket d:old, lastSeen=T0
|
||||||
|
const now = T0 + 2 * 60 * 60_000; // 2h later
|
||||||
|
ota.decide('1.7.12', LATEST, 'recent', now - 60_000); // bucket d:recent, lastSeen=now-1min
|
||||||
|
assert.equal(ota._size(), 2, 'two buckets');
|
||||||
|
const removed = ota.sweep(now);
|
||||||
|
assert.equal(removed, 1, 'the 2h-idle bucket is evicted');
|
||||||
|
assert.equal(ota._size(), 1, 'the recent bucket is kept (no unbounded growth)');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('exponential backoff escalates across cooldowns (30s -> 2m)', () => {
|
||||||
|
const r = (dt) => ota.decide('1.7.12', LATEST, 'esc', T0 + dt);
|
||||||
|
r(0); r(15_000); r(30_000);
|
||||||
|
const t1 = r(45_000); // first trip
|
||||||
|
assert.equal(t1.retry_after_seconds, 30, 'first cooldown 30s');
|
||||||
|
// after the 30s cooldown elapses, flood again -> next cooldown (2m)
|
||||||
|
const base = 45_000 + 31_000;
|
||||||
|
r(base); r(base + 1000); r(base + 2000);
|
||||||
|
const t2 = r(base + 3000);
|
||||||
|
assert.equal(t2.update_available, false);
|
||||||
|
assert.equal(t2.retry_after_seconds, 120, 'second cooldown escalates to 2m');
|
||||||
|
});
|
||||||
70
server/test/ota-check.test.js
Normal file
70
server/test/ota-check.test.js
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
// #144 — HTTP integration: the real /api/update/check endpoint with the breaker wired.
|
||||||
|
// Proves end-to-end behavior + the device_id passthrough/keying. Rapid requests stay
|
||||||
|
// within the 60s rate window, so THRESHOLD(3) trips on the 4th. Unique PORT 3991.
|
||||||
|
|
||||||
|
const { test, before, after } = require('node:test');
|
||||||
|
const assert = require('node:assert/strict');
|
||||||
|
const { spawn } = require('node:child_process');
|
||||||
|
const path = require('node:path');
|
||||||
|
const os = require('node:os');
|
||||||
|
const fs = require('node:fs');
|
||||||
|
const crypto = require('node:crypto');
|
||||||
|
|
||||||
|
const PORT = 3991;
|
||||||
|
const BASE = `http://127.0.0.1:${PORT}`;
|
||||||
|
const DATA_DIR = path.join(os.tmpdir(), 'st-ota-' + crypto.randomBytes(4).toString('hex'));
|
||||||
|
const LOG = path.join(os.tmpdir(), 'st-ota-' + crypto.randomBytes(4).toString('hex') + '.log');
|
||||||
|
let proc, LATEST;
|
||||||
|
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||||
|
const check = async (version, deviceId) => {
|
||||||
|
const q = `version=${encodeURIComponent(version)}` + (deviceId ? `&device_id=${encodeURIComponent(deviceId)}` : '');
|
||||||
|
const r = await fetch(`${BASE}/api/update/check?${q}`);
|
||||||
|
return r.json();
|
||||||
|
};
|
||||||
|
|
||||||
|
before(async () => {
|
||||||
|
// the breaker only reports update_available when an APK actually exists — give the
|
||||||
|
// test server a dummy one (resolveApkPath checks DATA_DIR/ScreenTinker.apk).
|
||||||
|
fs.mkdirSync(DATA_DIR, { recursive: true });
|
||||||
|
fs.writeFileSync(path.join(DATA_DIR, 'ScreenTinker.apk'), Buffer.alloc(1024, 1));
|
||||||
|
const logFd = fs.openSync(LOG, 'w');
|
||||||
|
proc = spawn('node', ['server.js'], { cwd: path.join(__dirname, '..'), env: { ...process.env, DATA_DIR, SELF_HOSTED: 'true', PORT: String(PORT), NODE_ENV: 'test' }, stdio: ['ignore', logFd, logFd] });
|
||||||
|
let up = false;
|
||||||
|
for (let i = 0; i < 80; i++) { try { const r = await fetch(BASE + '/api/status'); if (r.ok) { up = true; break; } } catch { /* */ } await sleep(250); }
|
||||||
|
if (!up) throw new Error('server did not boot:\n' + fs.readFileSync(LOG, 'utf8').slice(-2000));
|
||||||
|
LATEST = (await check('0.0.1')).latest_version; // an ancient version reads back the server's latest
|
||||||
|
});
|
||||||
|
after(() => { try { proc.kill('SIGKILL'); } catch { /* */ } });
|
||||||
|
|
||||||
|
test('a device already on latest gets no offer (up-to-date)', async () => {
|
||||||
|
const r = await check(LATEST);
|
||||||
|
assert.equal(r.update_available, false);
|
||||||
|
assert.equal(r.reason, 'up-to-date');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('(a) phantom version (superseded old-core prerelease) -> instant no-offer over HTTP', async () => {
|
||||||
|
const r = await check('1.9.1-beta4');
|
||||||
|
assert.equal(r.update_available, false);
|
||||||
|
assert.equal(r.reason, 'superseded-prerelease');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('(b/f) legacy client (no device_id) looping the same version trips the version-keyed breaker', async () => {
|
||||||
|
const v = '1.6.0'; // fresh offerable older version, no device_id
|
||||||
|
const results = [];
|
||||||
|
for (let i = 0; i < 5; i++) results.push(await check(v)); // rapid, within the 60s window
|
||||||
|
assert.ok(results.slice(0, 3).every(r => r.update_available === true), 'first 3 offered');
|
||||||
|
assert.equal(results[3].update_available, false, '4th trips');
|
||||||
|
assert.equal(results[3].reason, 'rate-backoff');
|
||||||
|
assert.ok(results[3].retry_after_seconds >= 1, 'response carries retry_after_seconds');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('(e) device_id looping is throttled per-device; another device on the same version is unaffected', async () => {
|
||||||
|
const v = '1.5.0';
|
||||||
|
for (let i = 0; i < 3; i++) await check(v, 'devA');
|
||||||
|
const aTrip = await check(v, 'devA'); // devA 4th -> trips
|
||||||
|
assert.equal(aTrip.update_available, false, 'devA throttled');
|
||||||
|
const bOk = await check(v, 'devB'); // devB first check -> offered
|
||||||
|
assert.equal(bOk.update_available, true, 'devB (same version, different device) unaffected');
|
||||||
|
});
|
||||||
Loading…
Reference in a new issue