diff --git a/server/config.js b/server/config.js index 6a146a0..1051a04 100644 --- a/server/config.js +++ b/server/config.js @@ -134,4 +134,13 @@ module.exports = { reconnectWarmupMs: parseInt(process.env.RECONNECT_WARMUP_MS) || 30000, reconnectBandElevatedMult: parseFloat(process.env.RECONNECT_BAND_ELEVATED_MULT) || 2, reconnectBandCriticalMult: parseFloat(process.env.RECONNECT_BAND_CRITICAL_MULT) || 4, + + // #142 device_status_log retention. A GLOBAL scheduled sweep (pruneStatusLog in + // db/database.js, run on startup + the heartbeat interval) deletes rows older + // than this across ALL devices — covering what the per-device insert-time prune + // in deviceSocket.js misses: removed/idle devices that never insert again, and + // the heartbeat.js offline_timeout insert that bypasses logDeviceStatus. Default + // is LOWER than the old hardcoded 7 days (the reporter's bloat happened under 7d); + // 2-3 days is plenty for the dashboard's 24h uptime view + diagnostics. + statusLogRetentionDays: parseFloat(process.env.STATUS_LOG_RETENTION_DAYS) || 3, }; diff --git a/server/db/database.js b/server/db/database.js index 5d5c72f..cd0f920 100644 --- a/server/db/database.js +++ b/server/db/database.js @@ -750,6 +750,21 @@ const { applyTenantDeleteCascade } = require('../lib/tenant-cascade-migration'); } })(); +// #142 GLOBAL device_status_log retention sweep across ALL devices. Run on startup +// and on the heartbeat interval (services/heartbeat.js). This covers the rows the +// per-device insert-time prune in deviceSocket.js misses: removed/idle devices that +// never insert again, and the heartbeat offline_timeout insert that bypasses +// logDeviceStatus. A plain time-range delete (like the play_logs prune) — runs off +// the hot path; after the first sweep the table is small, so the cost is negligible. +function pruneStatusLog() { + try { + const maxAgeSec = Math.round(config.statusLogRetentionDays * 86400); + const n = db.prepare("DELETE FROM device_status_log WHERE timestamp < strftime('%s','now') - ?").run(maxAgeSec).changes; + if (n > 0) console.log(`[status-log] pruned ${n} row(s) older than ${config.statusLogRetentionDays}d`); + return n; + } catch (_) { return 0; } +} + // Prune old telemetry (keep last 24h worth at 15s intervals = ~5760, cap at 6000) function pruneTelemetry(deviceId) { db.prepare(` @@ -822,4 +837,4 @@ try { const { verifyAndRepairSchema } = require('../lib/schema-check'); verifyAndRepairSchema(db); -module.exports = { db, pruneTelemetry, pruneScreenshots }; +module.exports = { db, pruneTelemetry, pruneScreenshots, pruneStatusLog }; diff --git a/server/services/heartbeat.js b/server/services/heartbeat.js index e881120..7d0778e 100644 --- a/server/services/heartbeat.js +++ b/server/services/heartbeat.js @@ -1,4 +1,4 @@ -const { db } = require('../db/database'); +const { db, pruneStatusLog } = require('../db/database'); const config = require('../config'); const { deviceRoom, emitToWorkspace } = require('../lib/socket-rooms'); @@ -6,6 +6,10 @@ const { deviceRoom, emitToWorkspace } = require('../lib/socket-rooms'); const deviceConnections = new Map(); function startHeartbeatChecker(io) { + // #142: sweep stale device_status_log rows once at startup (recovers a bloated + // table immediately after a deploy), then again on each interval below. + pruneStatusLog(); + setInterval(() => { const now = Date.now(); const dashboardNs = io.of('/dashboard'); @@ -49,6 +53,10 @@ function startHeartbeatChecker(io) { DELETE FROM play_logs WHERE started_at < strftime('%s','now') - (90 * 86400) `).run(); + // #142: global device_status_log retention sweep (all devices, incl. removed/idle + // and the offline_timeout insert path that bypasses the per-device prune). + pruneStatusLog(); + // Cleanup: expired team invites db.prepare(` DELETE FROM team_invites WHERE expires_at < strftime('%s','now') diff --git a/server/test/status-log-prune.test.js b/server/test/status-log-prune.test.js new file mode 100644 index 0000000..4e7ef35 --- /dev/null +++ b/server/test/status-log-prune.test.js @@ -0,0 +1,48 @@ +'use strict'; + +// #142 step 4 — global device_status_log retention sweep. Deterministic, in-process +// (no server/port). Isolate the DB and set retention BEFORE requiring the module +// (config reads env at load; database.js initialises a DB on load). + +const os = require('node:os'); +const path = require('node:path'); +const crypto = require('node:crypto'); +process.env.DATA_DIR = path.join(os.tmpdir(), 'st-statusprune-' + crypto.randomBytes(4).toString('hex')); +process.env.STATUS_LOG_RETENTION_DAYS = '2'; + +const { test } = require('node:test'); +const assert = require('node:assert/strict'); +const { db, pruneStatusLog } = require('../db/database'); + +test('global sweep deletes rows older than retention across ALL devices, keeps recent', () => { + db.exec('DELETE FROM device_status_log'); // clean slate + const old = db.prepare("INSERT INTO device_status_log (device_id, status, timestamp) VALUES (?, ?, strftime('%s','now') - ?)"); + + // 5 days old (> 2d retention): an active device, a device NOT in the devices + // table (removed/idle — what the per-device insert-time prune never revisits), + // and the heartbeat offline_timeout status that bypasses logDeviceStatus. + old.run('live-dev', 'online', 5 * 86400); + old.run('removed-idle-dev', 'offline', 5 * 86400); + old.run('hb-dev', 'offline_timeout', 5 * 86400); + // recent (< retention): must survive, regardless of device existence / status. + old.run('live-dev', 'online', 0); + old.run('hb-dev', 'offline_timeout', 3600); + + assert.equal(db.prepare('SELECT COUNT(*) c FROM device_status_log').get().c, 5, 'seeded 5 rows'); + + const deleted = pruneStatusLog(); + assert.equal(deleted, 3, 'the 3 over-retention rows pruned (incl. removed-idle + offline_timeout paths)'); + + const remaining = db.prepare('SELECT device_id, status FROM device_status_log ORDER BY device_id').all(); + assert.equal(remaining.length, 2); + // both survivors are the recent rows; no old row of any device/status survived + assert.deepEqual(remaining.map(r => r.device_id).sort(), ['hb-dev', 'live-dev']); + const oldestNow = db.prepare("SELECT MIN(timestamp) m FROM device_status_log").get().m; + const cutoff = Math.floor(Date.now() / 1000) - 2 * 86400; + assert.ok(oldestNow >= cutoff, 'no surviving row is older than the retention cutoff'); +}); + +test('sweep is safe and idempotent on an empty/already-clean table', () => { + db.exec('DELETE FROM device_status_log'); + assert.equal(pruneStatusLog(), 0, 'nothing to delete -> 0, no throw'); +});