mirror of
https://github.com/screentinker/screentinker.git
synced 2026-06-29 17:32:53 -06:00
Compare commits
8 commits
main
...
v1.9.2-bet
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a6e9237db0 | ||
|
|
289d6b6f95 | ||
|
|
6642841cd8 | ||
|
|
8d37c7f5ff | ||
|
|
e32c91cfd1 | ||
|
|
e73428182d | ||
|
|
404c3301dd | ||
|
|
dbac699854 |
|
|
@ -11,8 +11,8 @@ android {
|
|||
applicationId = "com.remotedisplay.player"
|
||||
minSdk = 24
|
||||
targetSdk = 34
|
||||
versionCode = 31
|
||||
versionName = "1.9.2-beta1"
|
||||
versionCode = 34
|
||||
versionName = "1.9.2-beta4"
|
||||
}
|
||||
|
||||
signingConfigs {
|
||||
|
|
|
|||
|
|
@ -119,7 +119,11 @@ class UpdateChecker(private val context: Context) {
|
|||
Thread {
|
||||
try {
|
||||
val currentVersion = getAppVersion()
|
||||
val url = "${config.serverUrl}/api/update/check?version=$currentVersion"
|
||||
// #144: send our stable registered device_id so the server OTA breaker can throttle
|
||||
// per-device (not per-NAT-IP). Reuses the same id we register/socket with; omitted
|
||||
// until provisioned (server then falls back to version-keyed).
|
||||
val deviceParam = if (config.deviceId.isNotEmpty()) "&device_id=${config.deviceId}" else ""
|
||||
val url = "${config.serverUrl}/api/update/check?version=$currentVersion$deviceParam"
|
||||
Log.i(TAG, "Checking for updates: $url")
|
||||
|
||||
val request = Request.Builder().url(url).build()
|
||||
|
|
|
|||
|
|
@ -149,4 +149,29 @@ module.exports = {
|
|||
// (device_id, content_id, status) reports within this window. A status CHANGE
|
||||
// has a different key and passes immediately. In-memory; resets on restart.
|
||||
contentAckDedupMs: parseInt(process.env.CONTENT_ACK_DEDUP_MS) || 10000,
|
||||
|
||||
// #143 content-ack RATE budget (lib/content-ack-limiter.js), layered on top of the
|
||||
// dedup above. Caps TOTAL acks per device per window REGARDLESS of differing
|
||||
// content_id — the flood the dedup misses (a device cycling 2-4 ids makes every
|
||||
// ack look unique, so dedup never fires, yet aggregate volume blocks the loop).
|
||||
// TUNING GUESSES — validate against Bold's real fleet. Legit playlist cadence is
|
||||
// roughly <=1 ack/s/device; the flood is many/s. 20 per 10s (=2/s) sits above
|
||||
// legit and below the flood. Easy to retune via env.
|
||||
contentAckMaxPerWindow: parseInt(process.env.CONTENT_ACK_MAX_PER_WINDOW) || 20,
|
||||
contentAckRateWindowMs: parseInt(process.env.CONTENT_ACK_RATE_WINDOW_MS) || 10000,
|
||||
|
||||
// #143 fingerprint-reclaim liveness. A reinstalled app (same fingerprint, no
|
||||
// device_id, has pairing_code) may reclaim its old device's identity once that
|
||||
// device is gone by RUNTIME signals: no live socket AND last heartbeat older than
|
||||
// this settle window. Previously an effective 24h calendar grace treated a device
|
||||
// merely offline <24h as "active", so a legitimately-gone device (liveConn=false,
|
||||
// status=offline, stale heartbeat) could never reclaim and retried every ~2s,
|
||||
// flooding logs (Bold beta1). Settle = the max reclaim wait; keep it comfortably
|
||||
// above heartbeatTimeout (45s) so a brief blip isn't mistaken for "gone".
|
||||
// SECURITY TRADEOFF: this also shortens the anti-(fingerprint-theft) window from
|
||||
// 24h — raise it to re-tighten, at the cost of reinstall latency. Tuning guess.
|
||||
reclaimSettleSeconds: parseInt(process.env.RECLAIM_SETTLE_SECONDS) || 300,
|
||||
// #143 throttle the reclaim-deferred log to once per device per window, so a
|
||||
// retrying/stuck device can't flood stdout (same discipline as the content-ack shed log).
|
||||
reclaimRejectLogWindowMs: parseInt(process.env.RECLAIM_REJECT_LOG_WINDOW_MS) || 60000,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -234,6 +234,10 @@ const migrations = [
|
|||
// schema.sql creates these on fresh installs; this covers existing DBs.
|
||||
"CREATE TABLE IF NOT EXISTS event_loop_lag (id INTEGER PRIMARY KEY AUTOINCREMENT, sampled_at INTEGER NOT NULL DEFAULT (strftime('%s','now')), mean_ms REAL NOT NULL, p50_ms REAL NOT NULL, p99_ms REAL NOT NULL, max_ms REAL NOT NULL, band TEXT NOT NULL DEFAULT 'normal')",
|
||||
"CREATE INDEX IF NOT EXISTS idx_event_loop_lag_sampled ON event_loop_lag(sampled_at)",
|
||||
// #143: operator device kill switch. blocked=1 refuses the device at the first
|
||||
// register gate on its next reconnect (no restart). Hand-settable by direct SQLite:
|
||||
// UPDATE devices SET blocked = 1 WHERE id = '<device_id>'; (0 to unblock)
|
||||
"ALTER TABLE devices ADD COLUMN blocked INTEGER NOT NULL DEFAULT 0",
|
||||
];
|
||||
// Apply each ALTER idempotently. A "duplicate column name" / "already exists"
|
||||
// error means the column is already present (expected on a migrated DB) - benign.
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ CREATE TABLE IF NOT EXISTS devices (
|
|||
name TEXT NOT NULL DEFAULT 'Unnamed Display',
|
||||
pairing_code TEXT UNIQUE,
|
||||
status TEXT NOT NULL DEFAULT 'offline',
|
||||
blocked INTEGER NOT NULL DEFAULT 0,
|
||||
last_heartbeat INTEGER,
|
||||
ip_address TEXT,
|
||||
android_version TEXT,
|
||||
|
|
|
|||
64
server/lib/content-ack-limiter.js
Normal file
64
server/lib/content-ack-limiter.js
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
// #143 — content-ack flood control (the single control on the content-ack path).
|
||||
//
|
||||
// Folds three concerns into ONE per-device limiter so there are no competing
|
||||
// limiters on this path (reconnect-throttle.js is left untouched):
|
||||
// 1. #142 dedup — drop an exact (content_id, status) repeat within the dedup
|
||||
// window. Legit repeat suppression; does NOT consume rate budget.
|
||||
// 2. #143 per-device RATE budget — cap TOTAL non-duplicate acks per device per
|
||||
// window regardless of differing content_id. This is what dedup misses: a
|
||||
// device cycling 2-4 ids makes each ack look unique, so dedup never fires,
|
||||
// but aggregate volume still floods the loop. Over budget -> shed silently.
|
||||
// 3. #143 global pressure valve — when loop-lag (services/loop-lag.js) reports
|
||||
// the CRITICAL band, shed non-essential acks even for a device within its own
|
||||
// budget. Reuses the existing band + hysteresis; never fires below critical.
|
||||
//
|
||||
// Per-device, in-memory, resets on restart (like lastPlayLogAt / pair-lockout).
|
||||
// Fixed window (counter reset per window) — simple and makes "log once per window"
|
||||
// natural. `band` is injected so this is testable without the loop-lag monitor.
|
||||
|
||||
const config = require('../config');
|
||||
|
||||
// deviceId -> { winStart, count, shedNotified, dup: Map(content|status -> ts) }
|
||||
const state = new Map();
|
||||
|
||||
// Returns one of:
|
||||
// { action: 'pass' } -> caller logs + emits
|
||||
// { action: 'dedup' } -> drop (exact repeat)
|
||||
// { action: 'shed-rate', logStart, observed, budget } -> drop (over per-device budget)
|
||||
// { action: 'shed-valve' } -> drop (global critical-lag valve)
|
||||
function check(deviceId, contentId, status, band = 'normal', now = Date.now()) {
|
||||
let s = state.get(deviceId);
|
||||
if (!s) { s = { winStart: now, count: 0, shedNotified: false, dup: new Map() }; state.set(deviceId, s); }
|
||||
|
||||
// Roll the fixed rate window.
|
||||
if (now - s.winStart >= config.contentAckRateWindowMs) {
|
||||
s.winStart = now;
|
||||
s.count = 0;
|
||||
s.shedNotified = false;
|
||||
// Bound the dedup map: drop entries older than the dedup window.
|
||||
for (const [k, t] of s.dup) if (now - t >= config.contentAckDedupMs) s.dup.delete(k);
|
||||
}
|
||||
|
||||
// 1) Dedup — exact (content, status) repeat within the dedup window. Does NOT
|
||||
// consume rate budget (it's a legit repeat we simply suppress).
|
||||
const key = `${contentId}|${status}`;
|
||||
if (now - (s.dup.get(key) || 0) < config.contentAckDedupMs) return { action: 'dedup' };
|
||||
s.dup.set(key, now);
|
||||
|
||||
// 2) Per-device rate budget — always applies, counts all non-duplicate acks.
|
||||
s.count++;
|
||||
if (s.count > config.contentAckMaxPerWindow) {
|
||||
const logStart = !s.shedNotified; // log ONCE per device per window when shedding starts
|
||||
s.shedNotified = true;
|
||||
return { action: 'shed-rate', logStart, observed: s.count, budget: config.contentAckMaxPerWindow };
|
||||
}
|
||||
|
||||
// 3) Global valve — extra shedding only under critical lag; a within-budget device
|
||||
// in a non-critical band is never touched here.
|
||||
if (band === 'critical') return { action: 'shed-valve' };
|
||||
|
||||
return { action: 'pass' };
|
||||
}
|
||||
|
||||
function reset() { state.clear(); } // tests
|
||||
module.exports = { check, reset };
|
||||
118
server/lib/ota-breaker.js
Normal file
118
server/lib/ota-breaker.js
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
// #144 — OTA update-check circuit-breaker + phantom-version guard.
|
||||
//
|
||||
// The /api/update/check handler offered the update whenever client !== latest (raw
|
||||
// string inequality, not semver). A device that can't APPLY the update (old/broken
|
||||
// OTA client like 1.7.12, signing mismatch, Fire OS) keeps reporting the same old
|
||||
// version and is told update_available=true on every poll. A fast poll loop (10-30s)
|
||||
// then saturates the event loop (prod loop-lag 49s).
|
||||
//
|
||||
// Two independent axes (kept separate on purpose):
|
||||
//
|
||||
// 1. RATE breaker (primary, immediate). Healthy devices poll ~every 12 min, so a key
|
||||
// checking MORE than THRESHOLD times within WINDOW (default >3 / 60s) is by
|
||||
// definition looping -> throttle update_available for that key with exponential
|
||||
// backoff. Catches the fast flood within seconds. A normally-polling device never
|
||||
// approaches this rate, so rollout/straggler updates are inherently safe — there
|
||||
// is deliberately NO "tolerate the flood for N minutes" grace; slow == safe.
|
||||
//
|
||||
// 2. PHANTOM guard (immediate). An unrecognized version, or a prerelease of an OLDER
|
||||
// core (a superseded old-minor beta — e.g. 1.9.1-beta4 when latest is 1.9.2-beta3),
|
||||
// gets "no offer" on the first check. A RECENT real older version (e.g. beta3 when
|
||||
// latest is beta4, or stable 1.7.12) is legitimately offerable and is NOT phantom.
|
||||
//
|
||||
// KEYING: keyed on device_id when the client sends one (beta4+ clients -> precise
|
||||
// per-device throttling), falling back to the reported VERSION when absent (legacy
|
||||
// clients send only ?version=, and the site is behind NAT so IP is useless). So every
|
||||
// device is covered: new clients per-device, stuck legacy clients per-version.
|
||||
//
|
||||
// Constants are env-tunable for ops + tests.
|
||||
|
||||
const WINDOW_MS = parseInt(process.env.OTA_BREAKER_WINDOW_MS) || 60_000; // rate window
|
||||
const THRESHOLD = parseInt(process.env.OTA_BREAKER_THRESHOLD) || 3; // checks/window before tripping (>THRESHOLD trips)
|
||||
const COOLDOWNS_MS = (process.env.OTA_BREAKER_COOLDOWNS_MS
|
||||
? process.env.OTA_BREAKER_COOLDOWNS_MS.split(',').map(s => parseInt(s, 10))
|
||||
: [30_000, 120_000, 480_000, 1_800_000]); // 30s -> 2m -> 8m -> cap 30m
|
||||
const IDLE_RESET_MS = parseInt(process.env.OTA_BREAKER_IDLE_RESET_MS) || 60 * 60 * 1000;
|
||||
|
||||
const state = new Map(); // key -> { hits:number[], blockedUntil, level, lastSeen }
|
||||
const loggedBad = new Set(); // log unrecognized/superseded versions once
|
||||
|
||||
// --- minimal semver-ish parse/compare (no dependency) ---
|
||||
function parseVer(v) {
|
||||
if (typeof v !== 'string') return null;
|
||||
const m = /^(\d+)\.(\d+)\.(\d+)(?:-(.+))?$/.exec(v.trim());
|
||||
if (!m) return null;
|
||||
return { core: [+m[1], +m[2], +m[3]], pre: m[4] || null };
|
||||
}
|
||||
function coreCmp(a, b) { for (let i = 0; i < 3; i++) if (a.core[i] !== b.core[i]) return a.core[i] < b.core[i] ? -1 : 1; return 0; }
|
||||
function cmpParsed(a, b) {
|
||||
const c = coreCmp(a, b);
|
||||
if (c !== 0) return c;
|
||||
if (a.pre === b.pre) return 0;
|
||||
if (a.pre === null) return 1; // release outranks a prerelease of the same core
|
||||
if (b.pre === null) return -1;
|
||||
// lexical prerelease compare — fine for beta1..beta9 (cores decide everything else).
|
||||
return a.pre < b.pre ? -1 : (a.pre > b.pre ? 1 : 0);
|
||||
}
|
||||
function cmp(a, b) { const pa = parseVer(a), pb = parseVer(b); return (!pa || !pb) ? null : cmpParsed(pa, pb); }
|
||||
|
||||
// decide(clientVersion, latestVersion, deviceId?, now?) ->
|
||||
// { update_available, reason, retry_after_seconds?, log? }
|
||||
function decide(clientVersion, latestVersion, deviceId = null, now = Date.now()) {
|
||||
// ---- PHANTOM / unrecognized guard (immediate, version-based, no rate state) ----
|
||||
if (!clientVersion) return { update_available: false, reason: 'no-version' };
|
||||
const pc = parseVer(clientVersion), pl = parseVer(latestVersion);
|
||||
if (!pc || !pl) return { update_available: false, reason: 'unrecognized-version', log: logOnce(clientVersion, `[ota] unrecognized client version '${clientVersion}' — no offer (latest=${latestVersion})`) };
|
||||
const full = cmpParsed(pc, pl);
|
||||
if (full === 0) return { update_available: false, reason: 'up-to-date' };
|
||||
if (full > 0) return { update_available: false, reason: 'client-newer' }; // never offer a downgrade
|
||||
if (pc.pre !== null && coreCmp(pc, pl) < 0) { // superseded old-core prerelease (e.g. 1.9.1-beta4)
|
||||
return { update_available: false, reason: 'superseded-prerelease', log: logOnce(clientVersion, `[ota] superseded prerelease '${clientVersion}' (older core than latest=${latestVersion}) — no offer`) };
|
||||
}
|
||||
|
||||
// ---- offerable (recent real older version) -> RATE breaker, keyed per device / per version ----
|
||||
const key = deviceId ? 'd:' + deviceId : 'v:' + clientVersion;
|
||||
let b = state.get(key);
|
||||
if (!b) { b = { hits: [], blockedUntil: 0, level: 0, lastSeen: now }; state.set(key, b); }
|
||||
if (now - b.lastSeen > IDLE_RESET_MS) { b.hits = []; b.blockedUntil = 0; b.level = 0; } // long-quiet -> fresh
|
||||
b.lastSeen = now;
|
||||
|
||||
if (now < b.blockedUntil) {
|
||||
return { update_available: false, reason: 'rate-backoff', retry_after_seconds: Math.ceil((b.blockedUntil - now) / 1000) };
|
||||
}
|
||||
if (b.blockedUntil !== 0) b.blockedUntil = 0; // cooldown elapsed -> probe window
|
||||
|
||||
b.hits = b.hits.filter(t => now - t < WINDOW_MS);
|
||||
b.hits.push(now);
|
||||
if (b.hits.length > THRESHOLD) { // looping faster than a healthy device ever would
|
||||
const cd = COOLDOWNS_MS[Math.min(b.level, COOLDOWNS_MS.length - 1)];
|
||||
b.blockedUntil = now + cd;
|
||||
b.level++;
|
||||
b.hits = []; // require a fresh burst to re-trip after cooldown
|
||||
return { update_available: false, reason: 'rate-backoff', retry_after_seconds: Math.ceil(cd / 1000),
|
||||
log: `[ota] breaker tripped key=${key} (>${THRESHOLD} checks/${Math.round(WINDOW_MS / 1000)}s, looping) -> backoff ${Math.round(cd / 1000)}s [level ${b.level}]` };
|
||||
}
|
||||
return { update_available: true, reason: 'offer' };
|
||||
}
|
||||
|
||||
function logOnce(version, msg) { if (loggedBad.has(version)) return undefined; loggedBad.add(version); return msg; }
|
||||
|
||||
// #144: actively EVICT idle buckets so the keyed state can't grow unbounded over time
|
||||
// (churned device_ids, varied versions). reset-on-access alone never deletes; this does.
|
||||
function sweep(now = Date.now()) {
|
||||
let n = 0;
|
||||
for (const [k, b] of state) if (now - b.lastSeen > IDLE_RESET_MS) { state.delete(k); n++; }
|
||||
if (n > 0) console.log(`[ota] breaker swept ${n} idle bucket(s) (idle > ${Math.round(IDLE_RESET_MS / 60000)}m); ${state.size} remain`);
|
||||
return n;
|
||||
}
|
||||
let sweepTimer = null;
|
||||
function startSweep() {
|
||||
if (sweepTimer) return sweepTimer;
|
||||
sweepTimer = setInterval(() => sweep(), IDLE_RESET_MS);
|
||||
if (sweepTimer.unref) sweepTimer.unref(); // don't keep the process alive on this timer
|
||||
return sweepTimer;
|
||||
}
|
||||
|
||||
function reset() { state.clear(); loggedBad.clear(); }
|
||||
function _size() { return state.size; }
|
||||
module.exports = { decide, reset, sweep, startSweep, cmp, parseVer, _size, WINDOW_MS, THRESHOLD };
|
||||
4
server/package-lock.json
generated
4
server/package-lock.json
generated
|
|
@ -1,12 +1,12 @@
|
|||
{
|
||||
"name": "screentinker",
|
||||
"version": "1.9.2-beta1",
|
||||
"version": "1.9.2-beta4",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "screentinker",
|
||||
"version": "1.9.2-beta1",
|
||||
"version": "1.9.2-beta4",
|
||||
"dependencies": {
|
||||
"@azure/msal-node": "^5.2.1",
|
||||
"archiver": "^7.0.1",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "screentinker",
|
||||
"version": "1.9.2-beta1",
|
||||
"version": "1.9.2-beta4",
|
||||
"description": "ScreenTinker - Digital Signage Management Server",
|
||||
"main": "server.js",
|
||||
"scripts": {
|
||||
|
|
|
|||
|
|
@ -578,28 +578,36 @@ app.use('/api/status', require('./routes/status'));
|
|||
// route block) - leaving this comment here as a breadcrumb for the move.
|
||||
|
||||
// APK version check endpoint (public, used by devices to check for updates)
|
||||
const otaBreaker = require('./lib/ota-breaker');
|
||||
otaBreaker.startSweep(); // #144: periodically evict idle breaker buckets so keyed state stays bounded
|
||||
app.get('/api/update/check', (req, res) => {
|
||||
const currentVersion = req.query.version;
|
||||
const apkPath = resolveApkPath();
|
||||
const apkExists = apkPath !== null;
|
||||
const apkSize = apkExists ? fs.statSync(apkPath).size : 0;
|
||||
const apkModified = apkExists ? fs.statSync(apkPath).mtimeMs : 0;
|
||||
|
||||
const deviceId = req.query.device_id || null; // #144: optional; beta4+ clients send it for per-device keying
|
||||
const latestVersion = VERSION;
|
||||
const updateAvailable = currentVersion && currentVersion !== latestVersion;
|
||||
|
||||
// #96: log every version check so the OTA is observable - which devices check in, their
|
||||
// version, and whether they'll update. This diagnosability gap is part of why the 1.9.0
|
||||
// relaunch failure went unseen.
|
||||
console.log(`[ota] update check from ${getClientIp(req)}: client=${currentVersion || 'unknown'} latest=${latestVersion} update_available=${!!updateAvailable} apk=${apkExists ? 'present' : 'MISSING'}`);
|
||||
// #144: circuit-breaker + phantom-version guard (replaces the old string-inequality
|
||||
// offer). Keys per device_id when present, else per reported version. Rate-trips a
|
||||
// looping client in seconds; never offers a downgrade or a superseded/garbage version.
|
||||
const verdict = otaBreaker.decide(currentVersion, latestVersion, deviceId);
|
||||
const apkPath = resolveApkPath(); // existsSync x2 (cheap)
|
||||
const apkExists = apkPath !== null;
|
||||
const updateAvailable = !!verdict.update_available && apkExists; // never offer if APK missing
|
||||
const apkSize = updateAvailable ? fs.statSync(apkPath).size : 0; // statSync only when actually offering (don't stat on every looped poll)
|
||||
const apkModified = updateAvailable ? fs.statSync(apkPath).mtimeMs : 0;
|
||||
|
||||
if (verdict.log) console.log(verdict.log); // once-per-event (trip / unrecognized)
|
||||
// #96: keep the per-check line observable; now also shows the breaker reason + device_id.
|
||||
console.log(`[ota] update check from ${getClientIp(req)}: device=${deviceId || 'none'} client=${currentVersion || 'unknown'} latest=${latestVersion} update_available=${updateAvailable} reason=${verdict.reason} apk=${apkExists ? 'present' : 'MISSING'}`);
|
||||
|
||||
res.json({
|
||||
latest_version: latestVersion,
|
||||
current_version: currentVersion || 'unknown',
|
||||
update_available: updateAvailable,
|
||||
reason: verdict.reason, // #144: breaker decision, for observability (additive; old clients ignore)
|
||||
download_url: '/download/apk',
|
||||
apk_size: apkSize,
|
||||
apk_modified: apkModified,
|
||||
...(verdict.retry_after_seconds ? { retry_after_seconds: verdict.retry_after_seconds } : {}),
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -79,6 +79,14 @@ function sample() {
|
|||
const tag = band !== prev ? ` (was ${prev})` : '';
|
||||
console.log(`[loop-lag] band=${band}${tag} mean=${snap.mean_ms}ms p99=${snap.p99_ms}ms max=${snap.max_ms}ms`);
|
||||
}
|
||||
|
||||
// #143 global pressure valve — log ONLY the band edge (open/close), not per shed
|
||||
// message. When critical, deviceSocket sheds non-essential acks (it reads getBand()).
|
||||
if (band === 'critical' && prev !== 'critical') {
|
||||
console.warn(`[shed] global valve OPEN — loop-lag critical (p99=${snap.p99_ms}ms); shedding non-essential device messages (content-acks). reconnects + dashboard still processed.`);
|
||||
} else if (prev === 'critical' && band !== 'critical') {
|
||||
console.log(`[shed] global valve CLOSED — loop-lag recovered (band=${band}, p99=${snap.p99_ms}ms)`);
|
||||
}
|
||||
}
|
||||
|
||||
function pruneLag() {
|
||||
|
|
|
|||
87
server/test/content-ack-flood.test.js
Normal file
87
server/test/content-ack-flood.test.js
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
'use strict';
|
||||
|
||||
// #143 integration — a device cycling DIFFERENT content ids at high rate (the case
|
||||
// dedup misses) is rate-limited; an under-budget device passes every ack. Observed
|
||||
// via the server log: passing acks log `Device <id> content <cid>: ready`; over-
|
||||
// budget drops are silent except ONE `[content-ack] shedding device <id>` line.
|
||||
// Normal band (default lag thresholds), unique PORT 3985.
|
||||
|
||||
const { test, before, after } = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const { spawn } = require('node:child_process');
|
||||
const path = require('node:path');
|
||||
const os = require('node:os');
|
||||
const fs = require('node:fs');
|
||||
const crypto = require('node:crypto');
|
||||
const ioClient = require('socket.io-client');
|
||||
|
||||
const PORT = 3985;
|
||||
const BASE = `http://127.0.0.1:${PORT}`;
|
||||
const DATA_DIR = path.join(os.tmpdir(), 'st-flood-' + crypto.randomBytes(4).toString('hex'));
|
||||
const LOG = path.join(os.tmpdir(), 'st-flood-' + crypto.randomBytes(4).toString('hex') + '.log');
|
||||
let proc;
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
|
||||
before(async () => {
|
||||
const logFd = fs.openSync(LOG, 'w');
|
||||
proc = spawn('node', ['server.js'], {
|
||||
cwd: path.join(__dirname, '..'),
|
||||
env: {
|
||||
...process.env, DATA_DIR, SELF_HOSTED: 'true', PORT: String(PORT), NODE_ENV: 'test',
|
||||
CONTENT_ACK_MAX_PER_WINDOW: '5', CONTENT_ACK_RATE_WINDOW_MS: '10000', CONTENT_ACK_DEDUP_MS: '50',
|
||||
},
|
||||
stdio: ['ignore', logFd, logFd],
|
||||
});
|
||||
let up = false;
|
||||
for (let i = 0; i < 80; i++) { try { const r = await fetch(BASE + '/api/status'); if (r.ok) { up = true; break; } } catch { /* */ } await sleep(250); }
|
||||
if (!up) throw new Error('server did not boot:\n' + fs.readFileSync(LOG, 'utf8').slice(-2000));
|
||||
});
|
||||
after(() => { try { proc.kill('SIGKILL'); } catch { /* */ } });
|
||||
|
||||
function provision() {
|
||||
const code = String(crypto.randomInt(100000, 1000000));
|
||||
return new Promise((resolve) => {
|
||||
const sock = ioClient(`${BASE}/device`, { transports: ['websocket'], reconnection: false, forceNew: true });
|
||||
sock.on('connect', () => sock.emit('device:register', { pairing_code: code }));
|
||||
sock.on('device:registered', (d) => { try { sock.close(); } catch { /* */ } resolve({ id: d.device_id, token: d.device_token }); });
|
||||
setTimeout(() => resolve(null), 4000);
|
||||
});
|
||||
}
|
||||
function openRegistered(dev) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const sock = ioClient(`${BASE}/device`, { transports: ['websocket'], reconnection: false, forceNew: true });
|
||||
sock.on('connect', () => sock.emit('device:register', { device_id: dev.id, device_token: dev.token, device_info: { app_version: 'test' } }));
|
||||
sock.on('device:registered', () => resolve(sock));
|
||||
sock.on('device:auth-error', () => reject(new Error('auth-error')));
|
||||
setTimeout(() => reject(new Error('register timeout')), 4000);
|
||||
});
|
||||
}
|
||||
const linesFor = (id, needle) => fs.readFileSync(LOG, 'utf8').split('\n').filter(l => l.includes(id) && l.includes(needle)).length;
|
||||
|
||||
test('#143: cycling 4 different content ids at high rate is rate-limited (dedup misses it)', async () => {
|
||||
const dev = await provision();
|
||||
assert.ok(dev, 'provisioned');
|
||||
const sock = await openRegistered(dev);
|
||||
const ids = ['a', 'b', 'c', 'd'];
|
||||
// 15 acks, ~100ms apart -> each id repeats every ~400ms (> 50ms dedup) so dedup
|
||||
// never fires; budget is 5/10s, so only 5 pass and the rest are shed.
|
||||
for (let i = 0; i < 15; i++) { sock.emit('device:content-ack', { device_id: dev.id, content_id: ids[i % 4], status: 'ready' }); await sleep(100); }
|
||||
await sleep(400);
|
||||
try { sock.close(); } catch { /* */ }
|
||||
|
||||
const passed = linesFor(dev.id, 'content '); // `Device <id> content <cid>: ready`
|
||||
const shedStart = linesFor(dev.id, 'shedding device');
|
||||
assert.equal(passed, 5, `exactly the budget (5) passed/logged, got ${passed}`);
|
||||
assert.ok(shedStart >= 1, 'a single shed-start line was logged when flood control engaged');
|
||||
assert.ok(shedStart === 1, `shed-start logged ONCE per window (no per-drop flood), got ${shedStart}`);
|
||||
});
|
||||
|
||||
test('a device under budget has every ack processed', async () => {
|
||||
const dev = await provision();
|
||||
const sock = await openRegistered(dev);
|
||||
for (const id of ['p', 'q', 'r', 's']) { sock.emit('device:content-ack', { device_id: dev.id, content_id: id, status: 'ready' }); await sleep(60); }
|
||||
await sleep(300);
|
||||
try { sock.close(); } catch { /* */ }
|
||||
assert.equal(linesFor(dev.id, 'content '), 4, 'all 4 under-budget acks processed');
|
||||
assert.equal(linesFor(dev.id, 'shedding device'), 0, 'no shedding for an under-budget device');
|
||||
});
|
||||
91
server/test/content-ack-limiter.test.js
Normal file
91
server/test/content-ack-limiter.test.js
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
'use strict';
|
||||
|
||||
// #143 — deterministic unit tests for the folded content-ack limiter (dedup +
|
||||
// per-device rate budget + global critical-lag valve). Injected `now`/`band`, no
|
||||
// sockets. DATA_DIR set before require so config's jwt-secret write goes to a temp
|
||||
// dir (not the repo). Rate params pinned via env for clarity.
|
||||
|
||||
const os = require('node:os');
|
||||
const path = require('node:path');
|
||||
const crypto = require('node:crypto');
|
||||
process.env.DATA_DIR = path.join(os.tmpdir(), 'st-ack143-' + crypto.randomBytes(4).toString('hex'));
|
||||
process.env.CONTENT_ACK_MAX_PER_WINDOW = '5';
|
||||
process.env.CONTENT_ACK_RATE_WINDOW_MS = '10000';
|
||||
process.env.CONTENT_ACK_DEDUP_MS = '2000';
|
||||
|
||||
const { test, beforeEach } = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const limiter = require('../lib/content-ack-limiter');
|
||||
|
||||
const T0 = 1_000_000;
|
||||
beforeEach(() => limiter.reset());
|
||||
|
||||
function tally(results) {
|
||||
const t = { pass: 0, dedup: 0, 'shed-rate': 0, 'shed-valve': 0, logStarts: 0 };
|
||||
for (const r of results) { t[r.action]++; if (r.logStart) t.logStarts++; }
|
||||
return t;
|
||||
}
|
||||
|
||||
test('#143 REGRESSION: cycling 2-4 DIFFERENT content ids (evading dedup) IS rate-limited', () => {
|
||||
// 4 ids, spaced 600ms so each id repeats every 2400ms (> 2000ms dedup) -> dedup
|
||||
// never fires (the exact case it misses), so the RATE budget must cap them.
|
||||
const ids = ['a', 'b', 'c', 'd'];
|
||||
const out = [];
|
||||
for (let i = 0; i < 12; i++) out.push(limiter.check('dev', ids[i % 4], 'ready', 'normal', T0 + i * 600));
|
||||
const t = tally(out);
|
||||
assert.equal(t.dedup, 0, 'dedup must NOT mask the cycling flood (proves rate limiter is what caps)');
|
||||
assert.equal(t.pass, 5, 'first MAX(5) pass');
|
||||
assert.equal(t['shed-rate'], 7, 'the remaining 7 are rate-shed');
|
||||
assert.equal(t.logStarts, 1, 'shedding logged exactly once per device per window');
|
||||
});
|
||||
|
||||
test('unique ids (dedup never matches): under budget passes, over budget sheds', () => {
|
||||
const out = [];
|
||||
for (let i = 0; i < 8; i++) out.push(limiter.check('dev', 'u' + i, 'ready', 'normal', T0 + i * 10));
|
||||
const t = tally(out);
|
||||
assert.equal(t.pass, 5);
|
||||
assert.equal(t['shed-rate'], 3);
|
||||
});
|
||||
|
||||
test('a device exactly at budget passes every ack', () => {
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const v = limiter.check('dev', 'u' + i, 'ready', 'normal', T0 + i * 10);
|
||||
assert.equal(v.action, 'pass', `ack ${i + 1} (<=budget) passes`);
|
||||
}
|
||||
});
|
||||
|
||||
test('dedup still works AND does not consume rate budget (no regression)', () => {
|
||||
assert.equal(limiter.check('dev', 'a', 'ready', 'normal', T0).action, 'pass'); // count 1
|
||||
assert.equal(limiter.check('dev', 'a', 'ready', 'normal', T0 + 100).action, 'dedup'); // repeat -> dedup
|
||||
assert.equal(limiter.check('dev', 'a', 'ready', 'normal', T0 + 200).action, 'dedup'); // repeat -> dedup
|
||||
// budget should still have 4 left (the 2 dedups didn't count): 4 distinct pass, 5th sheds
|
||||
for (const id of ['b', 'c', 'd', 'e']) assert.equal(limiter.check('dev', id, 'ready', 'normal', T0 + 300).action, 'pass');
|
||||
assert.equal(limiter.check('dev', 'f', 'ready', 'normal', T0 + 300).action, 'shed-rate', 'budget consumed only by non-duplicates');
|
||||
});
|
||||
|
||||
test('global valve: CRITICAL sheds an in-budget device; NORMAL leaves it untouched', () => {
|
||||
assert.equal(limiter.check('A', 'x', 'ready', 'critical', T0).action, 'shed-valve', 'critical sheds even under budget');
|
||||
assert.equal(limiter.check('B', 'x', 'ready', 'normal', T0).action, 'pass', 'normal-band healthy device is unaffected by the valve');
|
||||
// a healthy device under normal band is never valve-touched across many acks
|
||||
for (let i = 0; i < 5; i++) assert.equal(limiter.check('C', 'u' + i, 'ready', 'normal', T0 + i).action, 'pass');
|
||||
});
|
||||
|
||||
test('over-budget rate shedding takes precedence over the valve', () => {
|
||||
let v;
|
||||
for (let i = 0; i < 6; i++) v = limiter.check('dev', 'u' + i, 'ready', 'critical', T0 + i);
|
||||
assert.equal(v.action, 'shed-rate', 'a flooding device reports rate shedding, not just valve');
|
||||
});
|
||||
|
||||
test('window reset: count + shed-notified reset, so a new window logs shedding again', () => {
|
||||
for (let i = 0; i < 7; i++) limiter.check('dev', 'u' + i, 'ready', 'normal', T0 + i * 10); // sheds, logs once
|
||||
// next window (>10s later): fresh budget
|
||||
for (let i = 0; i < 5; i++) assert.equal(limiter.check('dev', 'w' + i, 'ready', 'normal', T0 + 11000 + i).action, 'pass');
|
||||
const v = limiter.check('dev', 'w9', 'ready', 'normal', T0 + 11000 + 100);
|
||||
assert.equal(v.action, 'shed-rate');
|
||||
assert.equal(v.logStart, true, 'shedding is logged once in the NEW window too');
|
||||
});
|
||||
|
||||
test('per-device isolation: one device flooding does not shed another', () => {
|
||||
for (let i = 0; i < 10; i++) limiter.check('STORM', 'u' + i, 'ready', 'normal', T0 + i); // STORM sheds
|
||||
assert.equal(limiter.check('NEIGHBOR', 'x', 'ready', 'normal', T0 + 11).action, 'pass');
|
||||
});
|
||||
88
server/test/content-ack-valve.test.js
Normal file
88
server/test/content-ack-valve.test.js
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
'use strict';
|
||||
|
||||
// #143 Step 2 integration — the global loop-lag pressure valve. Lag thresholds are
|
||||
// forced to 0 so the band is CRITICAL from the first sample; rate budget is set high
|
||||
// so we isolate the VALVE (not the per-device rate limit). Under critical: content-
|
||||
// acks are shed (no log/emit), while a reconnect AND an HTTP/dashboard request still
|
||||
// process, and the valve edge is logged once. Unique PORT 3986.
|
||||
|
||||
const { test, before, after } = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const { spawn } = require('node:child_process');
|
||||
const path = require('node:path');
|
||||
const os = require('node:os');
|
||||
const fs = require('node:fs');
|
||||
const crypto = require('node:crypto');
|
||||
const ioClient = require('socket.io-client');
|
||||
|
||||
const PORT = 3986;
|
||||
const BASE = `http://127.0.0.1:${PORT}`;
|
||||
const DATA_DIR = path.join(os.tmpdir(), 'st-valve-' + crypto.randomBytes(4).toString('hex'));
|
||||
const LOG = path.join(os.tmpdir(), 'st-valve-' + crypto.randomBytes(4).toString('hex') + '.log');
|
||||
let proc;
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
|
||||
before(async () => {
|
||||
const logFd = fs.openSync(LOG, 'w');
|
||||
proc = spawn('node', ['server.js'], {
|
||||
cwd: path.join(__dirname, '..'),
|
||||
env: {
|
||||
...process.env, DATA_DIR, SELF_HOSTED: 'true', PORT: String(PORT), NODE_ENV: 'test',
|
||||
LAG_CRITICAL_MS: '1', LAG_ELEVATED_MS: '1', LAG_SAMPLE_INTERVAL_MS: '150', // tiny thresholds (NOT 0 — config uses `|| default`, 0 is falsy) -> band critical immediately
|
||||
CONTENT_ACK_MAX_PER_WINDOW: '1000', CONTENT_ACK_RATE_WINDOW_MS: '10000', // high, so the VALVE is what sheds
|
||||
},
|
||||
stdio: ['ignore', logFd, logFd],
|
||||
});
|
||||
let up = false;
|
||||
for (let i = 0; i < 80; i++) { try { const r = await fetch(BASE + '/api/status'); if (r.ok) { up = true; break; } } catch { /* */ } await sleep(250); }
|
||||
if (!up) throw new Error('server did not boot:\n' + fs.readFileSync(LOG, 'utf8').slice(-2000));
|
||||
await sleep(600); // let the valve open (>=1 sample at 150ms in the critical band)
|
||||
});
|
||||
after(() => { try { proc.kill('SIGKILL'); } catch { /* */ } });
|
||||
|
||||
function provision() {
|
||||
const code = String(crypto.randomInt(100000, 1000000));
|
||||
return new Promise((resolve) => {
|
||||
const sock = ioClient(`${BASE}/device`, { transports: ['websocket'], reconnection: false, forceNew: true });
|
||||
sock.on('connect', () => sock.emit('device:register', { pairing_code: code }));
|
||||
sock.on('device:registered', (d) => { try { sock.close(); } catch { /* */ } resolve({ id: d.device_id, token: d.device_token }); });
|
||||
setTimeout(() => resolve(null), 4000);
|
||||
});
|
||||
}
|
||||
function openRegistered(dev) {
|
||||
return new Promise((resolve) => {
|
||||
const sock = ioClient(`${BASE}/device`, { transports: ['websocket'], reconnection: false, forceNew: true });
|
||||
let done = false; const finish = (v) => { if (!done) { done = true; resolve(v); } };
|
||||
sock.on('connect', () => sock.emit('device:register', { device_id: dev.id, device_token: dev.token, device_info: { app_version: 'test' } }));
|
||||
sock.on('device:registered', () => finish({ sock, registered: true }));
|
||||
sock.on('device:auth-error', () => finish({ sock, registered: false }));
|
||||
setTimeout(() => finish({ sock, registered: false }), 4000);
|
||||
});
|
||||
}
|
||||
|
||||
test('valve OPEN under critical: content-acks are shed, reconnect + HTTP still process', async () => {
|
||||
const log0 = fs.readFileSync(LOG, 'utf8');
|
||||
assert.ok(/\[shed\] global valve OPEN/.test(log0), 'valve edge logged once on entering critical');
|
||||
|
||||
// 1) content-acks under critical -> shed (no log/emit)
|
||||
const dev = await provision();
|
||||
const { sock, registered } = await openRegistered(dev);
|
||||
assert.ok(registered, 'a device can still register/reconnect under critical (reconnects always processed)');
|
||||
for (let i = 0; i < 6; i++) { sock.emit('device:content-ack', { device_id: dev.id, content_id: 'v' + i, status: 'ready' }); await sleep(40); }
|
||||
await sleep(300);
|
||||
try { sock.close(); } catch { /* */ }
|
||||
const contentLines = fs.readFileSync(LOG, 'utf8').split('\n').filter(l => l.includes(dev.id) && l.includes('content ')).length;
|
||||
assert.equal(contentLines, 0, 'all content-acks shed by the valve under critical (none logged/emitted)');
|
||||
|
||||
// 2) a fresh reconnect still registers (reconnects are never shed)
|
||||
const dev2 = await provision();
|
||||
const r2 = await openRegistered(dev2);
|
||||
assert.ok(r2.registered, 'reconnect processed under critical');
|
||||
try { r2.sock.close(); } catch { /* */ }
|
||||
|
||||
// 3) HTTP / dashboard path still serves under critical
|
||||
const res = await fetch(BASE + '/api/status');
|
||||
assert.equal(res.status, 200, 'HTTP/dashboard requests always processed');
|
||||
const body = await res.json();
|
||||
assert.equal(body.loop_lag.band, 'critical', 'sanity: band really is critical');
|
||||
});
|
||||
109
server/test/device-block-and-auth.test.js
Normal file
109
server/test/device-block-and-auth.test.js
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
'use strict';
|
||||
|
||||
// #143 (highest-priority) — auth short-circuit fix + operator kill switch.
|
||||
// - A provisioned device whose token is NULLed is now REJECTED (Bold 75c2a08a:
|
||||
// nulling the token used to RE-PROVISION the device instead of locking it out).
|
||||
// - A `blocked=1` device is refused at the first register gate (the enforceable
|
||||
// kill switch), settable by DIRECT SQLite edit while the server runs (no restart).
|
||||
// - First pairing (token-less, no device_id) and normal auth still work.
|
||||
// Direct DB edits below mimic the operator hand-editing SQLite during an outage.
|
||||
// Unique PORT 3987 (not 3982-3986).
|
||||
|
||||
const { test, before, after } = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const { spawn } = require('node:child_process');
|
||||
const path = require('node:path');
|
||||
const os = require('node:os');
|
||||
const fs = require('node:fs');
|
||||
const crypto = require('node:crypto');
|
||||
const ioClient = require('socket.io-client');
|
||||
const Database = require('better-sqlite3');
|
||||
|
||||
const PORT = 3987;
|
||||
const BASE = `http://127.0.0.1:${PORT}`;
|
||||
const DATA_DIR = path.join(os.tmpdir(), 'st-block-' + crypto.randomBytes(4).toString('hex'));
|
||||
const LOG = path.join(os.tmpdir(), 'st-block-' + crypto.randomBytes(4).toString('hex') + '.log');
|
||||
const DB_PATH = path.join(DATA_DIR, 'db', 'remote_display.db');
|
||||
let proc;
|
||||
let tdb; // ONE long-lived operator-style connection (mirrors how the server holds one);
|
||||
// avoids the cross-process WAL checkpoint churn of many short-lived openers.
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
|
||||
before(async () => {
|
||||
const logFd = fs.openSync(LOG, 'w');
|
||||
proc = spawn('node', ['server.js'], {
|
||||
cwd: path.join(__dirname, '..'),
|
||||
env: { ...process.env, DATA_DIR, SELF_HOSTED: 'true', PORT: String(PORT), NODE_ENV: 'test' },
|
||||
stdio: ['ignore', logFd, logFd],
|
||||
});
|
||||
let up = false;
|
||||
for (let i = 0; i < 80; i++) { try { const r = await fetch(BASE + '/api/status'); if (r.ok) { up = true; break; } } catch { /* */ } await sleep(250); }
|
||||
if (!up) throw new Error('server did not boot:\n' + fs.readFileSync(LOG, 'utf8').slice(-2000));
|
||||
tdb = new Database(DB_PATH);
|
||||
tdb.pragma('busy_timeout = 3000');
|
||||
});
|
||||
after(() => { try { tdb && tdb.close(); } catch { /* */ } try { proc.kill('SIGKILL'); } catch { /* */ } });
|
||||
|
||||
// Operator's direct hand-edit of SQLite while the server is running (no restart) —
|
||||
// a single persistent connection, as a `sqlite3` session would be.
|
||||
function dbEdit(sql, ...params) { return tdb.prepare(sql).run(...params).changes; }
|
||||
|
||||
function register(payload) {
|
||||
return new Promise((resolve) => {
|
||||
const sock = ioClient(`${BASE}/device`, { transports: ['websocket'], reconnection: false, forceNew: true });
|
||||
const got = { registered: false, authError: false, errorMsg: null, playlist: false, newId: null, newToken: null };
|
||||
const finish = () => { try { sock.close(); } catch { /* */ } resolve(got); };
|
||||
sock.on('connect', () => sock.emit('device:register', payload));
|
||||
sock.on('device:registered', (d) => { got.registered = true; got.newId = d.device_id; got.newToken = d.device_token; setTimeout(finish, 250); });
|
||||
sock.on('device:playlist-update', () => { got.playlist = true; });
|
||||
sock.on('device:auth-error', (e) => { got.authError = true; got.errorMsg = e && e.error; finish(); });
|
||||
setTimeout(finish, 4000);
|
||||
});
|
||||
}
|
||||
async function provision() {
|
||||
const g = await register({ pairing_code: String(crypto.randomInt(100000, 1000000)) });
|
||||
return g.registered ? { id: g.newId, token: g.newToken } : null;
|
||||
}
|
||||
|
||||
test('#143 repro: a provisioned device whose token is NULLed is REJECTED (was: re-provisioned)', async () => {
|
||||
const dev = await provision();
|
||||
assert.ok(dev && dev.token, 'provisioned with a token');
|
||||
assert.equal(dbEdit('UPDATE devices SET device_token = NULL WHERE id = ?', dev.id), 1, 'operator nulled the token');
|
||||
const got = await register({ device_id: dev.id, device_token: dev.token, device_info: { app_version: 'test' } });
|
||||
assert.ok(got.authError, 'null-token device is rejected (auth-error)');
|
||||
assert.ok(!got.registered, 'and must NOT register / re-provision');
|
||||
});
|
||||
|
||||
test('kill switch: blocked=1 refuses at the first gate (no register, no playlist)', async () => {
|
||||
const dev = await provision();
|
||||
assert.equal(dbEdit('UPDATE devices SET blocked = 1 WHERE id = ?', dev.id), 1, 'operator blocked the device');
|
||||
// no server restart — the block takes effect on the very next reconnect
|
||||
const got = await register({ device_id: dev.id, device_token: dev.token, device_info: { app_version: 'test' } });
|
||||
assert.ok(got.authError && got.errorMsg === 'Device blocked', 'refused with Device blocked');
|
||||
assert.ok(!got.registered, 'no register');
|
||||
assert.ok(!got.playlist, 'no playlist build (refused at the first gate)');
|
||||
});
|
||||
|
||||
test('unblocking (blocked=0) lets the same device connect again', async () => {
|
||||
const dev = await provision();
|
||||
dbEdit('UPDATE devices SET blocked = 1 WHERE id = ?', dev.id);
|
||||
let got = await register({ device_id: dev.id, device_token: dev.token, device_info: {} });
|
||||
assert.ok(!got.registered, 'blocked first');
|
||||
dbEdit('UPDATE devices SET blocked = 0 WHERE id = ?', dev.id);
|
||||
got = await register({ device_id: dev.id, device_token: dev.token, device_info: {} });
|
||||
assert.ok(got.registered, 'unblocked -> connects normally again');
|
||||
});
|
||||
|
||||
test('the pairing/provisioning seam still works: a NEW token-less device first-pairs', async () => {
|
||||
const got = await register({ pairing_code: String(crypto.randomInt(100000, 1000000)) });
|
||||
assert.ok(got.registered, 'first pairing (no device_id) still succeeds');
|
||||
assert.ok(got.newId && got.newToken, 'a fresh device_id + token are issued');
|
||||
});
|
||||
|
||||
test('no regression: a normal device with a valid token registers + gets its playlist', async () => {
|
||||
const dev = await provision();
|
||||
const got = await register({ device_id: dev.id, device_token: dev.token, device_info: { app_version: 'test' } });
|
||||
assert.ok(got.registered, 'valid token registers');
|
||||
assert.ok(got.playlist, 'and receives its playlist');
|
||||
assert.ok(!got.authError, 'no auth error');
|
||||
});
|
||||
91
server/test/device-pairing-notify.test.js
Normal file
91
server/test/device-pairing-notify.test.js
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
'use strict';
|
||||
|
||||
// #143 — server must tell a screen it's paired so it leaves the Connect page. The
|
||||
// app leaves the Connect page ONLY on 'device:paired'. /api/provision/pair pushes
|
||||
// that to a LIVE socket at pair time, but a screen paired-while-disconnected or that
|
||||
// reconnects after pairing never got it and sat on Connect forever (Bold). Fix:
|
||||
// re-emit 'device:paired' on reconnect when the device is paired (user_id set).
|
||||
// Uses the EXISTING client event — no client/protocol change. Unique PORT 3989.
|
||||
|
||||
const { test, before, after } = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const { spawn } = require('node:child_process');
|
||||
const path = require('node:path');
|
||||
const os = require('node:os');
|
||||
const fs = require('node:fs');
|
||||
const crypto = require('node:crypto');
|
||||
const ioClient = require('socket.io-client');
|
||||
|
||||
const PORT = 3989;
|
||||
const BASE = `http://127.0.0.1:${PORT}`;
|
||||
const DATA_DIR = path.join(os.tmpdir(), 'st-pair-' + crypto.randomBytes(4).toString('hex'));
|
||||
const LOG = path.join(os.tmpdir(), 'st-pair-' + crypto.randomBytes(4).toString('hex') + '.log');
|
||||
let proc, JWT;
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
|
||||
before(async () => {
|
||||
const logFd = fs.openSync(LOG, 'w');
|
||||
proc = spawn('node', ['server.js'], { cwd: path.join(__dirname, '..'), env: { ...process.env, DATA_DIR, SELF_HOSTED: 'true', PORT: String(PORT), NODE_ENV: 'test' }, stdio: ['ignore', logFd, logFd] });
|
||||
let up = false;
|
||||
for (let i = 0; i < 80; i++) { try { const r = await fetch(BASE + '/api/status'); if (r.ok) { up = true; break; } } catch { /* */ } await sleep(250); }
|
||||
if (!up) throw new Error('server did not boot:\n' + fs.readFileSync(LOG, 'utf8').slice(-2000));
|
||||
// first user -> admin (self-hosted), gives a workspace for the pair endpoint
|
||||
const r = await fetch(BASE + '/api/auth/register', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ email: 'op@test.local', password: 'test12345', name: 'Op' }) });
|
||||
JWT = (await r.json()).token;
|
||||
});
|
||||
after(() => { try { proc.kill('SIGKILL'); } catch { /* */ } });
|
||||
|
||||
// First pairing: device registers with a pairing code -> gets its device_id + token.
|
||||
function provisionWithCode(code) {
|
||||
return new Promise((resolve) => {
|
||||
const sock = ioClient(`${BASE}/device`, { transports: ['websocket'], reconnection: false, forceNew: true });
|
||||
sock.on('connect', () => sock.emit('device:register', { pairing_code: code }));
|
||||
sock.on('device:registered', (d) => { try { sock.close(); } catch { /* */ } resolve({ id: d.device_id, token: d.device_token }); });
|
||||
setTimeout(() => resolve(null), 4000);
|
||||
});
|
||||
}
|
||||
// Operator pairs the device in the CMS (the device socket is NOT connected now).
|
||||
async function pairViaApi(code, name) {
|
||||
const r = await fetch(BASE + '/api/provision/pair', { method: 'POST', headers: { Authorization: 'Bearer ' + JWT, 'Content-Type': 'application/json' }, body: JSON.stringify({ pairing_code: code, name }) });
|
||||
return r.status;
|
||||
}
|
||||
// A reconnect (device_id + token) — collect what the server pushes.
|
||||
function reconnect(id, token) {
|
||||
return new Promise((resolve) => {
|
||||
const sock = ioClient(`${BASE}/device`, { transports: ['websocket'], reconnection: false, forceNew: true });
|
||||
const got = { registered: false, paired: false, pairedName: null, playlist: false };
|
||||
sock.on('connect', () => sock.emit('device:register', { device_id: id, device_token: token, device_info: { app_version: 'test' } }));
|
||||
sock.on('device:registered', () => { got.registered = true; });
|
||||
sock.on('device:paired', (d) => { got.paired = true; got.pairedName = d && d.name; });
|
||||
sock.on('device:playlist-update', () => { got.playlist = true; });
|
||||
setTimeout(() => { try { sock.close(); } catch { /* */ } resolve(got); }, 700);
|
||||
});
|
||||
}
|
||||
const rnd = () => String(crypto.randomInt(100000, 1000000));
|
||||
|
||||
test('#143 repro: a device paired server-side, on reconnect, RECEIVES device:paired (leaves Connect page)', async () => {
|
||||
const code = rnd();
|
||||
const dev = await provisionWithCode(code);
|
||||
assert.ok(dev && dev.id, 'provisioned (sits on Connect page, status=provisioning)');
|
||||
assert.equal(await pairViaApi(code, 'Lobby'), 200, 'operator pairs it via the CMS while the device socket is closed');
|
||||
const got = await reconnect(dev.id, dev.token);
|
||||
assert.ok(got.registered, 'device reconnects');
|
||||
assert.ok(got.paired, 'server pushes device:paired on reconnect (the exact event the client waits for)');
|
||||
assert.equal(got.pairedName, 'Lobby', 'with the paired name');
|
||||
assert.ok(got.playlist, 'and its playlist, so it can play');
|
||||
});
|
||||
|
||||
test('a device NOT yet paired gets NO device:paired on reconnect (stays on the pairing flow)', async () => {
|
||||
const code = rnd();
|
||||
const dev = await provisionWithCode(code); // provisioned but never paired
|
||||
const got = await reconnect(dev.id, dev.token);
|
||||
assert.ok(got.registered, 'it still registers');
|
||||
assert.ok(!got.paired, 'but is NOT told paired (no false pairing-complete) -> stays on Connect');
|
||||
});
|
||||
|
||||
test('the fix uses the existing client listener: device:paired (no new protocol)', async () => {
|
||||
// The repro test above asserts the client receives 'device:paired' — the same event the
|
||||
// web player (index.html) and Android (ProvisioningActivity.onPaired) already handle. This
|
||||
// test documents that no new client event/protocol was introduced (server-only fix).
|
||||
assert.ok(true);
|
||||
});
|
||||
116
server/test/fingerprint-reclaim.test.js
Normal file
116
server/test/fingerprint-reclaim.test.js
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
'use strict';
|
||||
|
||||
// #143 — fingerprint-reclaim stuck loop. A device gone by every RUNTIME signal
|
||||
// (no live socket + stale heartbeat) must be reclaimable; a genuinely-live device
|
||||
// must still be rejected; the deferral log must not flood. Devices are seeded by
|
||||
// direct SQLite (mimics the real DB state + avoids the disconnect-debounce window
|
||||
// leaving a stale liveConn). Unique PORT 3988 (not 3982-3987).
|
||||
|
||||
const { test, before, after } = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const { spawn } = require('node:child_process');
|
||||
const path = require('node:path');
|
||||
const os = require('node:os');
|
||||
const fs = require('node:fs');
|
||||
const crypto = require('node:crypto');
|
||||
const ioClient = require('socket.io-client');
|
||||
const Database = require('better-sqlite3');
|
||||
|
||||
const PORT = 3988;
|
||||
const BASE = `http://127.0.0.1:${PORT}`;
|
||||
const DATA_DIR = path.join(os.tmpdir(), 'st-recl-' + crypto.randomBytes(4).toString('hex'));
|
||||
const LOG = path.join(os.tmpdir(), 'st-recl-' + crypto.randomBytes(4).toString('hex') + '.log');
|
||||
const DB_PATH = path.join(DATA_DIR, 'db', 'remote_display.db');
|
||||
let proc, tdb;
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
|
||||
before(async () => {
|
||||
const logFd = fs.openSync(LOG, 'w');
|
||||
proc = spawn('node', ['server.js'], {
|
||||
cwd: path.join(__dirname, '..'),
|
||||
env: { ...process.env, DATA_DIR, SELF_HOSTED: 'true', PORT: String(PORT), NODE_ENV: 'test', RECLAIM_SETTLE_SECONDS: '300', RECLAIM_REJECT_LOG_WINDOW_MS: '60000' },
|
||||
stdio: ['ignore', logFd, logFd],
|
||||
});
|
||||
let up = false;
|
||||
for (let i = 0; i < 80; i++) { try { const r = await fetch(BASE + '/api/status'); if (r.ok) { up = true; break; } } catch { /* */ } await sleep(250); }
|
||||
if (!up) throw new Error('server did not boot:\n' + fs.readFileSync(LOG, 'utf8').slice(-2000));
|
||||
tdb = new Database(DB_PATH); tdb.pragma('busy_timeout = 3000'); tdb.pragma('foreign_keys = OFF');
|
||||
});
|
||||
after(() => { try { tdb && tdb.close(); } catch { /* */ } try { proc.kill('SIGKILL'); } catch { /* */ } });
|
||||
|
||||
// Seed a device + its fingerprint link directly (no socket -> no lingering liveConn).
|
||||
function seedDevice(fp, { token, heartbeatAgo }) {
|
||||
const id = crypto.randomUUID();
|
||||
tdb.prepare("INSERT INTO devices (id, status, last_heartbeat, device_token) VALUES (?, 'offline', strftime('%s','now') - ?, ?)").run(id, heartbeatAgo, token);
|
||||
tdb.prepare('INSERT INTO device_fingerprints (fingerprint, device_id) VALUES (?, ?)').run(fp, id);
|
||||
return { id, token };
|
||||
}
|
||||
function staleHeartbeat(id, ago) { tdb.prepare("UPDATE devices SET last_heartbeat = strftime('%s','now') - ? WHERE id = ?").run(ago, id); }
|
||||
|
||||
function attempt(payload) { // one-shot register; resolves and closes
|
||||
return new Promise((resolve) => {
|
||||
const sock = ioClient(`${BASE}/device`, { transports: ['websocket'], reconnection: false, forceNew: true });
|
||||
const got = { registered: false, newId: null, authError: false, errorMsg: null };
|
||||
const finish = () => { try { sock.close(); } catch { /* */ } resolve(got); };
|
||||
sock.on('connect', () => sock.emit('device:register', payload));
|
||||
sock.on('device:registered', (d) => { got.registered = true; got.newId = d.device_id; setTimeout(finish, 150); });
|
||||
sock.on('device:auth-error', (e) => { got.authError = true; got.errorMsg = e && e.error; finish(); });
|
||||
setTimeout(finish, 4000);
|
||||
});
|
||||
}
|
||||
function connectLive(payload) { // keeps the socket open (live connection); caller closes
|
||||
return new Promise((resolve) => {
|
||||
const sock = ioClient(`${BASE}/device`, { transports: ['websocket'], reconnection: false, forceNew: true });
|
||||
sock.on('connect', () => sock.emit('device:register', payload));
|
||||
sock.on('device:registered', () => resolve({ sock, registered: true }));
|
||||
sock.on('device:auth-error', () => resolve({ sock, registered: false }));
|
||||
setTimeout(() => resolve({ sock, registered: false }), 4000);
|
||||
});
|
||||
}
|
||||
const rnd = () => String(crypto.randomInt(100000, 1000000));
|
||||
|
||||
test('#143 repro: a gone device (no live conn + stale heartbeat) is reclaimable', async () => {
|
||||
const fp = 'fp-gone-' + crypto.randomBytes(4).toString('hex');
|
||||
const dev = seedDevice(fp, { token: 'tok', heartbeatAgo: 99999 }); // ~27h stale, never connected
|
||||
const r = await attempt({ pairing_code: rnd(), fingerprint: fp }); // no device_id -> reclaim path
|
||||
assert.ok(r.registered, 'reclaim SUCCEEDS for a gone device');
|
||||
assert.equal(r.newId, dev.id, 'it reclaims the SAME device identity');
|
||||
assert.ok(!r.authError, 'no rejection');
|
||||
});
|
||||
|
||||
test('no regression: a genuinely live device REJECTS a fingerprint reclaim', async () => {
|
||||
const fp = 'fp-live-' + crypto.randomBytes(4).toString('hex');
|
||||
const dev = seedDevice(fp, { token: 'tok2', heartbeatAgo: 10 });
|
||||
const live = await connectLive({ device_id: dev.id, device_token: 'tok2', device_info: {} });
|
||||
assert.ok(live.registered, 'device is live (has a connection)');
|
||||
const r = await attempt({ pairing_code: rnd(), fingerprint: fp });
|
||||
assert.ok(r.authError && !r.registered, 'reclaim of a LIVE device is rejected (abuse protection intact)');
|
||||
try { live.sock.close(); } catch { /* */ }
|
||||
});
|
||||
|
||||
test('clear-on-leave: after disconnect, liveConn is cleared so a (stale) device reclaims', async () => {
|
||||
const fp = 'fp-leave-' + crypto.randomBytes(4).toString('hex');
|
||||
const dev = seedDevice(fp, { token: 'tok3', heartbeatAgo: 99999 });
|
||||
const live = await connectLive({ device_id: dev.id, device_token: 'tok3', device_info: {} });
|
||||
assert.ok(live.registered);
|
||||
// while live, reclaim is rejected (liveConn present)
|
||||
let r = await attempt({ pairing_code: rnd(), fingerprint: fp });
|
||||
assert.ok(!r.registered, 'rejected while a live connection exists');
|
||||
// leave: close + wait past the 5s offline-debounce so removeConnection runs
|
||||
try { live.sock.close(); } catch { /* */ }
|
||||
await sleep(6000);
|
||||
staleHeartbeat(dev.id, 99999); // the live register bumped last_heartbeat; re-stale it
|
||||
r = await attempt({ pairing_code: rnd(), fingerprint: fp });
|
||||
assert.ok(r.registered, 'after disconnect cleared liveConn, the gone device reclaims');
|
||||
});
|
||||
|
||||
test('log noise: a retried reclaim logs at most once per device per window', async () => {
|
||||
const fp = 'fp-log-' + crypto.randomBytes(4).toString('hex');
|
||||
const dev = seedDevice(fp, { token: 'tok4', heartbeatAgo: 5 }); // recent -> reclaim deferred
|
||||
const live = await connectLive({ device_id: dev.id, device_token: 'tok4', device_info: {} });
|
||||
for (let i = 0; i < 4; i++) { const r = await attempt({ pairing_code: rnd(), fingerprint: fp }); assert.ok(r.authError, 'each retry is deferred'); }
|
||||
try { live.sock.close(); } catch { /* */ }
|
||||
await sleep(200);
|
||||
const lines = fs.readFileSync(LOG, 'utf8').split('\n').filter(l => l.includes('reclaim deferred for ' + dev.id)).length;
|
||||
assert.ok(lines <= 1, `at most one deferral log per window (got ${lines}); no double-log / per-2s flood`);
|
||||
});
|
||||
113
server/test/ota-breaker.test.js
Normal file
113
server/test/ota-breaker.test.js
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
'use strict';
|
||||
|
||||
// #144 — OTA-check circuit-breaker + phantom guard. Deterministic unit tests with
|
||||
// injected `now` (no waiting), covering the required cases (a)-(f). No DB/socket;
|
||||
// the breaker module is pure + in-memory.
|
||||
|
||||
const { test, beforeEach } = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const ota = require('../lib/ota-breaker');
|
||||
|
||||
const LATEST = '1.9.2-beta4'; // simulate the beta4 server
|
||||
const T0 = 1_000_000;
|
||||
beforeEach(() => ota.reset());
|
||||
|
||||
test('semver comparator: real-older < latest, same-core beta order, equal/newer', () => {
|
||||
assert.equal(ota.cmp('1.7.12', LATEST) < 0, true, '1.7.12 older');
|
||||
assert.equal(ota.cmp('1.9.2-beta3', LATEST) < 0, true, 'beta3 < beta4 (same core)');
|
||||
assert.equal(ota.cmp('1.9.2-beta4', LATEST), 0, 'equal');
|
||||
assert.equal(ota.cmp('1.9.3', LATEST) > 0, true, 'newer core');
|
||||
assert.equal(ota.cmp('banana', LATEST), null, 'garbage unparseable');
|
||||
});
|
||||
|
||||
test('(a) PHANTOM/unrecognized -> instant no-offer, no grace, no rate state', () => {
|
||||
// superseded old-core prerelease (strobe's 1.9.1-beta4) — caught on the FIRST check
|
||||
let v = ota.decide('1.9.1-beta4', LATEST, null, T0);
|
||||
assert.equal(v.update_available, false);
|
||||
assert.equal(v.reason, 'superseded-prerelease');
|
||||
// garbage string
|
||||
v = ota.decide('banana', LATEST, null, T0);
|
||||
assert.equal(v.update_available, false);
|
||||
assert.equal(v.reason, 'unrecognized-version');
|
||||
// never offer a downgrade
|
||||
assert.equal(ota.decide('1.9.3', LATEST, null, T0).update_available, false);
|
||||
});
|
||||
|
||||
test('(b) fast loop (every 15s) trips within ~3 checks / ~45s, NOT minutes', () => {
|
||||
const r = (dt) => ota.decide('1.7.12', LATEST, null, T0 + dt);
|
||||
assert.equal(r(0).update_available, true, 'check1 offered');
|
||||
assert.equal(r(15_000).update_available, true, 'check2 offered');
|
||||
assert.equal(r(30_000).update_available, true, 'check3 offered');
|
||||
const trip = r(45_000);
|
||||
assert.equal(trip.update_available, false, 'check4 (~45s) trips');
|
||||
assert.equal(trip.reason, 'rate-backoff');
|
||||
assert.ok(trip.retry_after_seconds >= 1, 'backoff has a retry hint');
|
||||
});
|
||||
|
||||
test('(c) healthy straggler on beta3, polling every 12 min, is ALWAYS offered beta4 (rollout NOT throttled)', () => {
|
||||
for (let i = 0; i < 6; i++) {
|
||||
const v = ota.decide('1.9.2-beta3', LATEST, null, T0 + i * 12 * 60_000);
|
||||
assert.equal(v.update_available, true, `12-min poll #${i + 1} still offered`);
|
||||
assert.equal(v.reason, 'offer');
|
||||
}
|
||||
});
|
||||
|
||||
test('(d) a device that APPLIES the update (version advances) is never throttled', () => {
|
||||
// it was looping/being offered on the old version...
|
||||
ota.decide('1.7.12', LATEST, 'devX', T0);
|
||||
ota.decide('1.7.12', LATEST, 'devX', T0 + 1000);
|
||||
// ...then it applies -> now reports latest
|
||||
const v = ota.decide(LATEST, LATEST, 'devX', T0 + 2000);
|
||||
assert.equal(v.update_available, false);
|
||||
assert.equal(v.reason, 'up-to-date'); // up-to-date, NOT rate-backoff
|
||||
});
|
||||
|
||||
test('(e) device_id looping is throttled PER-DEVICE; another device on the same version is unaffected', () => {
|
||||
const loopA = (dt) => ota.decide('1.7.12', LATEST, 'A', T0 + dt);
|
||||
loopA(0); loopA(15_000); loopA(30_000);
|
||||
assert.equal(loopA(45_000).update_available, false, 'device A trips');
|
||||
// device B, same version, checking normally -> its own key, still offered
|
||||
assert.equal(ota.decide('1.7.12', LATEST, 'B', T0 + 46_000).update_available, true, 'device B unaffected');
|
||||
});
|
||||
|
||||
test('(f) legacy client without device_id is caught by the version-keyed path (and lumps per version)', () => {
|
||||
// two legacy devices, no device_id, same version -> share the v:1.7.12 bucket
|
||||
const v = (dt) => ota.decide('1.7.12', LATEST, null, T0 + dt);
|
||||
assert.equal(v(0).update_available, true);
|
||||
assert.equal(v(10_000).update_available, true);
|
||||
assert.equal(v(20_000).update_available, true);
|
||||
assert.equal(v(30_000).update_available, false, 'combined version-keyed rate trips without any device_id');
|
||||
});
|
||||
|
||||
test('(scope) slow #144 drip: stable 1.7.12 polling ~every 12 min is NEVER throttled (fast-flood only)', () => {
|
||||
// documents the deliberate scope: this build catches the fast flood + phantoms, NOT the
|
||||
// slow 1.7.12 drip (that needs #144 option-3 skip-after-N, not included here).
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const v = ota.decide('1.7.12', LATEST, null, T0 + i * 12 * 60_000);
|
||||
assert.equal(v.update_available, true, `12-min drip poll #${i + 1} still offered (not throttled)`);
|
||||
assert.equal(v.reason, 'offer');
|
||||
}
|
||||
});
|
||||
|
||||
test('state Map is bounded: sweep() evicts idle buckets, keeps recent', () => {
|
||||
ota.decide('1.7.12', LATEST, 'old', T0); // bucket d:old, lastSeen=T0
|
||||
const now = T0 + 2 * 60 * 60_000; // 2h later
|
||||
ota.decide('1.7.12', LATEST, 'recent', now - 60_000); // bucket d:recent, lastSeen=now-1min
|
||||
assert.equal(ota._size(), 2, 'two buckets');
|
||||
const removed = ota.sweep(now);
|
||||
assert.equal(removed, 1, 'the 2h-idle bucket is evicted');
|
||||
assert.equal(ota._size(), 1, 'the recent bucket is kept (no unbounded growth)');
|
||||
});
|
||||
|
||||
test('exponential backoff escalates across cooldowns (30s -> 2m)', () => {
|
||||
const r = (dt) => ota.decide('1.7.12', LATEST, 'esc', T0 + dt);
|
||||
r(0); r(15_000); r(30_000);
|
||||
const t1 = r(45_000); // first trip
|
||||
assert.equal(t1.retry_after_seconds, 30, 'first cooldown 30s');
|
||||
// after the 30s cooldown elapses, flood again -> next cooldown (2m)
|
||||
const base = 45_000 + 31_000;
|
||||
r(base); r(base + 1000); r(base + 2000);
|
||||
const t2 = r(base + 3000);
|
||||
assert.equal(t2.update_available, false);
|
||||
assert.equal(t2.retry_after_seconds, 120, 'second cooldown escalates to 2m');
|
||||
});
|
||||
70
server/test/ota-check.test.js
Normal file
70
server/test/ota-check.test.js
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
'use strict';
|
||||
|
||||
// #144 — HTTP integration: the real /api/update/check endpoint with the breaker wired.
|
||||
// Proves end-to-end behavior + the device_id passthrough/keying. Rapid requests stay
|
||||
// within the 60s rate window, so THRESHOLD(3) trips on the 4th. Unique PORT 3991.
|
||||
|
||||
const { test, before, after } = require('node:test');
|
||||
const assert = require('node:assert/strict');
|
||||
const { spawn } = require('node:child_process');
|
||||
const path = require('node:path');
|
||||
const os = require('node:os');
|
||||
const fs = require('node:fs');
|
||||
const crypto = require('node:crypto');
|
||||
|
||||
const PORT = 3991;
|
||||
const BASE = `http://127.0.0.1:${PORT}`;
|
||||
const DATA_DIR = path.join(os.tmpdir(), 'st-ota-' + crypto.randomBytes(4).toString('hex'));
|
||||
const LOG = path.join(os.tmpdir(), 'st-ota-' + crypto.randomBytes(4).toString('hex') + '.log');
|
||||
let proc, LATEST;
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
const check = async (version, deviceId) => {
|
||||
const q = `version=${encodeURIComponent(version)}` + (deviceId ? `&device_id=${encodeURIComponent(deviceId)}` : '');
|
||||
const r = await fetch(`${BASE}/api/update/check?${q}`);
|
||||
return r.json();
|
||||
};
|
||||
|
||||
before(async () => {
|
||||
// the breaker only reports update_available when an APK actually exists — give the
|
||||
// test server a dummy one (resolveApkPath checks DATA_DIR/ScreenTinker.apk).
|
||||
fs.mkdirSync(DATA_DIR, { recursive: true });
|
||||
fs.writeFileSync(path.join(DATA_DIR, 'ScreenTinker.apk'), Buffer.alloc(1024, 1));
|
||||
const logFd = fs.openSync(LOG, 'w');
|
||||
proc = spawn('node', ['server.js'], { cwd: path.join(__dirname, '..'), env: { ...process.env, DATA_DIR, SELF_HOSTED: 'true', PORT: String(PORT), NODE_ENV: 'test' }, stdio: ['ignore', logFd, logFd] });
|
||||
let up = false;
|
||||
for (let i = 0; i < 80; i++) { try { const r = await fetch(BASE + '/api/status'); if (r.ok) { up = true; break; } } catch { /* */ } await sleep(250); }
|
||||
if (!up) throw new Error('server did not boot:\n' + fs.readFileSync(LOG, 'utf8').slice(-2000));
|
||||
LATEST = (await check('0.0.1')).latest_version; // an ancient version reads back the server's latest
|
||||
});
|
||||
after(() => { try { proc.kill('SIGKILL'); } catch { /* */ } });
|
||||
|
||||
test('a device already on latest gets no offer (up-to-date)', async () => {
|
||||
const r = await check(LATEST);
|
||||
assert.equal(r.update_available, false);
|
||||
assert.equal(r.reason, 'up-to-date');
|
||||
});
|
||||
|
||||
test('(a) phantom version (superseded old-core prerelease) -> instant no-offer over HTTP', async () => {
|
||||
const r = await check('1.9.1-beta4');
|
||||
assert.equal(r.update_available, false);
|
||||
assert.equal(r.reason, 'superseded-prerelease');
|
||||
});
|
||||
|
||||
test('(b/f) legacy client (no device_id) looping the same version trips the version-keyed breaker', async () => {
|
||||
const v = '1.6.0'; // fresh offerable older version, no device_id
|
||||
const results = [];
|
||||
for (let i = 0; i < 5; i++) results.push(await check(v)); // rapid, within the 60s window
|
||||
assert.ok(results.slice(0, 3).every(r => r.update_available === true), 'first 3 offered');
|
||||
assert.equal(results[3].update_available, false, '4th trips');
|
||||
assert.equal(results[3].reason, 'rate-backoff');
|
||||
assert.ok(results[3].retry_after_seconds >= 1, 'response carries retry_after_seconds');
|
||||
});
|
||||
|
||||
test('(e) device_id looping is throttled per-device; another device on the same version is unaffected', async () => {
|
||||
const v = '1.5.0';
|
||||
for (let i = 0; i < 3; i++) await check(v, 'devA');
|
||||
const aTrip = await check(v, 'devA'); // devA 4th -> trips
|
||||
assert.equal(aTrip.update_available, false, 'devA throttled');
|
||||
const bOk = await check(v, 'devB'); // devB first check -> offered
|
||||
assert.equal(bOk.update_available, true, 'devB (same version, different device) unaffected');
|
||||
});
|
||||
|
|
@ -7,6 +7,8 @@ const config = require('../config');
|
|||
const heartbeat = require('../services/heartbeat');
|
||||
const commandQueue = require('../lib/command-queue');
|
||||
const reconnectThrottle = require('../lib/reconnect-throttle');
|
||||
const contentAckLimiter = require('../lib/content-ack-limiter');
|
||||
const loopLag = require('../services/loop-lag');
|
||||
|
||||
// Debounce window for marking a device offline on socket disconnect. Brief
|
||||
// flap (Wi-Fi blip, Engine.IO ping miss, server-side eviction-then-reconnect)
|
||||
|
|
@ -29,11 +31,13 @@ const OFFLINE_DEBOUNCE_MS = 5000;
|
|||
const lastPlayLogAt = new Map();
|
||||
const PLAY_LOG_MIN_GAP_MS = 2000;
|
||||
|
||||
// #142 content-ack dedup. An older app can spam "content <id>: ready" for the same
|
||||
// item; each was logged + emitted individually (secondary load). Suppress identical
|
||||
// (device_id, content_id, status) reports within config.contentAckDedupMs. A status
|
||||
// CHANGE has a different key and passes immediately. In-memory; resets on restart.
|
||||
const lastContentAck = new Map();
|
||||
// #142 dedup + #143 per-device rate budget + global loop-lag valve for content-acks
|
||||
// all live in one control: lib/content-ack-limiter.js (required above as
|
||||
// contentAckLimiter). Kept out of this file so there is a single limiter on the path.
|
||||
|
||||
// #143 fingerprint-reclaim deferral log throttle: deviceId -> last-logged ms, so a
|
||||
// device retrying reclaim every ~2s logs at most once per reclaimRejectLogWindowMs.
|
||||
const lastReclaimRejectLogAt = new Map();
|
||||
const { getUserPlan, getUserDeviceCount } = require('../middleware/subscription');
|
||||
// Phase 2.3: deviceRoom() resolves a device_id to its workspace room so
|
||||
// dashboardNs.emit can be scoped instead of broadcast platform-wide.
|
||||
|
|
@ -263,6 +267,24 @@ module.exports = function setupDeviceSocket(io) {
|
|||
socket.on('device:register', (data) => {
|
||||
const { pairing_code, device_id, device_token, device_info, fingerprint } = data;
|
||||
|
||||
// #143 operator KILL SWITCH — the FIRST gate, before the fingerprint block,
|
||||
// the reconnect throttle, any DB writes, or playlist build. A device flagged
|
||||
// `blocked` is refused immediately. Settable by DIRECT SQLite during an outage
|
||||
// (dashboard down): UPDATE devices SET blocked = 1 WHERE id = '<device_id>';
|
||||
// The row is re-read on every register, so a hand-edited UPDATE takes effect on
|
||||
// the device's NEXT reconnect with NO server restart. Unblock: blocked = 0.
|
||||
// Unlike nulling the token (#143: that re-provisioned instead of locking out),
|
||||
// this is an explicit, enforceable block.
|
||||
if (device_id) {
|
||||
const blk = db.prepare('SELECT blocked FROM devices WHERE id = ?').get(device_id);
|
||||
if (blk && blk.blocked) {
|
||||
console.warn(`[blocked] refused device ${device_id} (operator block) from ${getClientIp(socket)}`);
|
||||
socket.emit('device:auth-error', { error: 'Device blocked' });
|
||||
process.nextTick(() => { try { socket.disconnect(true); } catch (_) { /* */ } });
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Track device fingerprint to prevent reinstall abuse
|
||||
if (fingerprint) {
|
||||
try {
|
||||
|
|
@ -276,20 +298,31 @@ module.exports = function setupDeviceSocket(io) {
|
|||
const oldDevice = db.prepare('SELECT * FROM devices WHERE id = ?').get(existing.device_id);
|
||||
if (oldDevice) {
|
||||
// Fingerprint reclaim guard: a leaked/duplicated fingerprint shouldn't be enough
|
||||
// to take over a live device. Reject the reclaim if the device is currently
|
||||
// online OR has been online within the last 24h — by then a real reinstall has
|
||||
// had plenty of time to come back, but a credential thief is more likely caught.
|
||||
// to take over a LIVE device. #143: decide "still alive" from RUNTIME signals —
|
||||
// a live socket, OR a genuinely recent heartbeat (within the settle window). The
|
||||
// old check used `secondsSince < 24h`, which treated a device merely offline <24h
|
||||
// as "active": a legitimately-gone device (liveConn=false, status=offline, stale
|
||||
// heartbeat) could never reclaim and retried every ~2s, flooding logs (Bold beta1
|
||||
// / 2febcaa9, 1984694c, 139159eb). NOT a missing clear — liveConn IS removed on
|
||||
// disconnect + the offline-timeout sweep, and status IS set offline on both; the
|
||||
// 24h TIME gate was the cause. A device gone by every runtime signal is reclaimable.
|
||||
const liveConn = heartbeat.getConnection(existing.device_id);
|
||||
const RECLAIM_GRACE_SECONDS = 24 * 60 * 60;
|
||||
const lastBeat = oldDevice.last_heartbeat || 0;
|
||||
const secondsSince = Math.floor(Date.now() / 1000) - lastBeat;
|
||||
if (liveConn || (oldDevice.status === 'online') || secondsSince < RECLAIM_GRACE_SECONDS) {
|
||||
console.warn(`Fingerprint reclaim rejected for ${existing.device_id}: device active (status=${oldDevice.status}, ${secondsSince}s since last heartbeat, liveConn=${!!liveConn})`);
|
||||
const stillAlive = !!liveConn || secondsSince < config.reclaimSettleSeconds;
|
||||
if (stillAlive) {
|
||||
// Log at most once per device per window so a retrying/stuck device can't flood stdout.
|
||||
const nowMs = Date.now();
|
||||
if (nowMs - (lastReclaimRejectLogAt.get(existing.device_id) || 0) >= config.reclaimRejectLogWindowMs) {
|
||||
lastReclaimRejectLogAt.set(existing.device_id, nowMs);
|
||||
console.warn(`Fingerprint reclaim deferred for ${existing.device_id}: still settling (status=${oldDevice.status}, ${secondsSince}s since heartbeat, liveConn=${!!liveConn}); reclaimable after ${config.reclaimSettleSeconds}s offline`);
|
||||
}
|
||||
socket.emit('device:auth-error', {
|
||||
error: 'This display is currently active. If you reinstalled the app, the original device must be offline for 24 hours before its slot can be reclaimed.'
|
||||
error: `This display was recently active. If you reinstalled the app, retry after it has been offline for ${config.reclaimSettleSeconds} seconds.`
|
||||
});
|
||||
return;
|
||||
}
|
||||
lastReclaimRejectLogAt.delete(existing.device_id); // reclaim proceeding — clear any deferral log state
|
||||
|
||||
// Fingerprint matched — this is a reinstalled app reconnecting to its old device.
|
||||
// Issue a fresh token so the app can authenticate going forward.
|
||||
|
|
@ -353,9 +386,16 @@ module.exports = function setupDeviceSocket(io) {
|
|||
// reconnected" and reading as connection instability (#134 — there were 1415
|
||||
// "reconnected" logs against only ~30 real socket connects and 0 heartbeat timeouts).
|
||||
const isPlaylistRefresh = currentDeviceId === device_id;
|
||||
// Validate device token (skip for legacy devices that don't have a token yet)
|
||||
if (device.device_token && !validateDeviceToken(device_id, device_token)) {
|
||||
console.warn(`Invalid device token for ${device_id} from ${getClientIp(socket)} — received_len=${(device_token || '').length}, stored_len=${device.device_token.length}, received_prefix=${(device_token || '').substring(0, 8)}, stored_prefix=${device.device_token.substring(0, 8)}`);
|
||||
// #143 AUTH FIX: an already-provisioned device (it has a row — every row,
|
||||
// even `provisioning`, is created WITH a token) presenting a null/empty/
|
||||
// invalid token is NOT authenticated — reject and disconnect. The old guard
|
||||
// `device.device_token && !validate(...)` short-circuited on a NULL stored
|
||||
// token, so nulling a device's token RE-PROVISIONED it (auth skipped + a
|
||||
// fresh token minted) instead of locking it out (Bold #143 / 75c2a08a).
|
||||
// validateDeviceToken already returns false for null-stored/missing/mismatch.
|
||||
// First pairing is the pairing_code path below (no device_id) — unaffected.
|
||||
if (!validateDeviceToken(device_id, device_token)) {
|
||||
console.warn(`Invalid/missing device token for ${device_id} from ${getClientIp(socket)} — received_len=${(device_token || '').length}, has_stored_token=${!!device.device_token}`);
|
||||
socket.emit('device:auth-error', { error: 'Invalid device token' });
|
||||
return;
|
||||
}
|
||||
|
|
@ -388,12 +428,10 @@ module.exports = function setupDeviceSocket(io) {
|
|||
db.prepare("UPDATE devices SET status = 'online', last_heartbeat = strftime('%s','now'), ip_address = ?, updated_at = strftime('%s','now') WHERE id = ?")
|
||||
.run(getClientIp(socket), device_id);
|
||||
|
||||
// Generate token for legacy devices that don't have one yet
|
||||
let tokenToSend = device.device_token;
|
||||
if (!tokenToSend) {
|
||||
tokenToSend = generateDeviceToken();
|
||||
db.prepare('UPDATE devices SET device_token = ? WHERE id = ?').run(tokenToSend, device_id);
|
||||
}
|
||||
// #143: past the validateDeviceToken gate above the stored token is
|
||||
// guaranteed non-null, so we just echo it back. The old "mint a token for a
|
||||
// null-token device" path is removed — that was the re-provisioning vector.
|
||||
const tokenToSend = device.device_token;
|
||||
|
||||
if (device_info) {
|
||||
db.prepare(`UPDATE devices SET android_version = ?, app_version = ?, screen_width = ?, screen_height = ?, render_width = ?, render_height = ?,
|
||||
|
|
@ -407,6 +445,16 @@ module.exports = function setupDeviceSocket(io) {
|
|||
heartbeat.registerConnection(device_id, socket.id);
|
||||
socket.join(device_id);
|
||||
socket.emit('device:registered', { device_id, device_token: tokenToSend, status: 'online' });
|
||||
// #143: a device paired/claimed server-side (user_id set) that RECONNECTS must be told
|
||||
// it's paired — the app leaves the Connect page ONLY on 'device:paired' (web: hides the
|
||||
// setup screen; Android ProvisioningActivity.onPaired -> MainActivity). The
|
||||
// /api/provision/pair endpoint pushes device:paired to a LIVE socket at pair time
|
||||
// (server.js), but a screen paired while disconnected — or that reconnects after pairing
|
||||
// — never received it and sat on the Connect page forever showing the URL (Bold #143).
|
||||
// Re-send the exact event the client already listens for; no client change needed.
|
||||
if (device.user_id) {
|
||||
socket.emit('device:paired', { device_id, name: device.name || 'Display' });
|
||||
}
|
||||
logDeviceStatus(device_id, 'online');
|
||||
// Flush any commands/playlist-updates queued while this device was offline.
|
||||
commandQueue.flushQueue(deviceNs, device_id, buildPlaylistPayload);
|
||||
|
|
@ -585,13 +633,18 @@ module.exports = function setupDeviceSocket(io) {
|
|||
if (!requireDeviceAuth()) return;
|
||||
const { device_id, content_id, status } = data;
|
||||
if (device_id !== currentDeviceId) return;
|
||||
// #142: drop repeats of the same (device, content, status) within the dedup
|
||||
// window. Only a change (new content/status) or a report after the window
|
||||
// logs+emits, so a device spamming the same "ready" can't add load.
|
||||
const ackKey = `${device_id}|${content_id}|${status}`;
|
||||
const nowAck = Date.now();
|
||||
if (nowAck - (lastContentAck.get(ackKey) || 0) < config.contentAckDedupMs) return;
|
||||
lastContentAck.set(ackKey, nowAck);
|
||||
// #142 dedup + #143 per-device rate budget + global critical-lag valve, in one
|
||||
// control. Anything but 'pass' is dropped BEFORE the log+emit (that per-ack work
|
||||
// is the cost we shed). Drops are SILENT except a single line per device per
|
||||
// window when rate-shedding STARTS (re-logging per drop would recreate the
|
||||
// flood). The valve's open/close is logged once at the band edge in loop-lag.
|
||||
const verdict = contentAckLimiter.check(device_id, content_id, status, loopLag.getBand());
|
||||
if (verdict.action !== 'pass') {
|
||||
if (verdict.action === 'shed-rate' && verdict.logStart) {
|
||||
console.warn(`[content-ack] shedding device ${device_id}: ${verdict.observed}/${verdict.budget} per ${config.contentAckRateWindowMs}ms — flood control engaged`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
console.log(`Device ${device_id} content ${content_id}: ${status}`);
|
||||
emitToDeviceWorkspace(dashboardNs, device_id, 'dashboard:content-ack', { device_id, content_id, status });
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in a new issue