screentinker/server/services/alerts.js
ScreenTinker c71c4016ca feat(email): Microsoft Graph send + alert spam protection + preferences UI
Replaces the unused EMAIL_WEBHOOK_URL stub with a real Microsoft Graph
Mail.Send pipeline via @azure/msal-node client-credentials flow. Prior
state on prod: every alert email was logged to journalctl and never
sent (21 fallback log lines per hour for the chronic-offline devices).

Four coordinated changes shipped as one commit since they're all part
of making email delivery actually work responsibly:

1. services/email.js (NEW): Graph send via plain HTTPS (no SDK), in-memory
   MSAL token cache (refresh 60s pre-expiry), graceful stdout fallback
   when GRAPH_* env vars absent. Drop-in replacement for the old webhook.

2. services/alerts.js refactored: sequential await around sendEmail (was
   parallel fire-and-forget; first run hit Graph's MailboxConcurrency 429
   ApplicationThrottled on a 30-device backlog). Sequential at ~250ms per
   send takes 5-8s for the full backlog, well within the 60s tick. Also:
   24h long-offline cutoff to stop nagging about chronic-offline devices
   (the 20,000+ minute ones); 2-hour dedup window (was 1h) via a generic
   shouldSendAlert(type, id, windowMs) helper that future alert types
   (payment_failed, plan_limit_hit, etc.) can reuse.

3. Preferences UI: single checkbox in settings.js Account section bound
   to users.email_alerts. Saved via the existing Save Profile button. PUT
   /api/auth/me extended to accept email_alerts. requireAuth middleware
   SELECT now includes email_alerts so it propagates via req.user.

4. Dev safety net: GRAPH_DEV_RESTRICT_TO env var as an allow-list. When
   set, only listed recipients reach Graph; everyone else is suppressed
   with a log line. Prevents local dev (which often runs against fresh
   prod DB copies) from accidentally emailing real prod users. UNSET on
   prod systemd unit so production fans out normally.

Also: package.json scripts use --env-file-if-exists=.env so local dev
picks up .env automatically (Node 20.6+ built-in, no dotenv dep). Prod
runs via systemd ExecStart and is unaffected. server/.gitignore added
to keep .env out of git.

Smoke verified end-to-end:
- Sequential send pattern verified (a prior parallel-send tick had hit
  Graph's MailboxConcurrency 429 on 30 simultaneous sends; sequential
  at ~250ms each completes the same backlog without throttling)
- 24h cutoff silenced 20/21 prod devices on the next tick
- Dev restrict suppressed the 1 within-24h send
- User-preference toggle flipped via UI -> DB -> alert path silently
  continued before reaching even the suppression log
2026-05-12 18:16:40 -05:00

116 lines
4.9 KiB
JavaScript

const { db } = require('../db/database');
const { sendEmail } = require('./email');
// Per-(alert_type, target_id) dedup. In-memory Map; restarts reset it, which
// at current alert volume is fine - worst case is one duplicate alert after
// a server restart. Future alert types (payment_failed, plan_limit_hit, etc.)
// share this same mechanism via the alertType axis.
const alertLastSent = new Map();
const DEFAULT_DEDUP_WINDOW_MS = 2 * 60 * 60 * 1000; // 2 hours
function shouldSendAlert(alertType, targetId, windowMs = DEFAULT_DEDUP_WINDOW_MS) {
const key = `${alertType}:${targetId}`;
const last = alertLastSent.get(key) || 0;
if (Date.now() - last < windowMs) return false;
alertLastSent.set(key, Date.now());
return true;
}
function startAlertService(io) {
setInterval(() => checkOfflineDevices(io), 60000);
console.log('Alert service started');
}
async function checkOfflineDevices(io) {
const now = Math.floor(Date.now() / 1000);
const threshold = 300; // 5 minutes offline
const offlineDevices = db.prepare(`
SELECT d.id, d.name, d.user_id, d.workspace_id, d.last_heartbeat, d.status,
u.email as owner_email, u.name as owner_name, u.email_alerts
FROM devices d
LEFT JOIN users u ON d.user_id = u.id
WHERE d.status = 'offline' AND d.last_heartbeat IS NOT NULL
AND (? - d.last_heartbeat) > ?
`).all(now, threshold);
for (const device of offlineDevices) {
// Dedup: skip if we've alerted on this device within the window
if (!shouldSendAlert('device_offline', device.id)) continue;
// Skip if user has alerts disabled
if (!device.email_alerts) continue;
// Long-offline cutoff: stop nagging about devices that have been offline
// for >24 hours. They're not a notification-worthy event anymore - either
// the user knows, or the device is abandoned. Spares ~15 chronic-offline
// prod devices from re-firing every 2-hour dedup window.
const offlineHours = (now - device.last_heartbeat) / 3600;
if (offlineHours > 24) continue;
if (device.owner_email) {
const offlineMinutes = Math.floor((now - device.last_heartbeat) / 60);
const subject = `Display Offline: ${device.name}`;
const body = `Your display "${device.name}" has been offline for ${offlineMinutes} minutes.\n\nLast heartbeat: ${new Date(device.last_heartbeat * 1000).toLocaleString()}\n\nCheck your device and network connection.\n\n- ScreenTinker`;
// Sequential await: Microsoft Graph imposes a MailboxConcurrency limit
// (429 ApplicationThrottled when fanning out ~20+ parallel sends from
// one app). At ~250ms per send, a backlog of 20 devices takes ~5s -
// well within the 60s alert tick interval. sendEmail() never throws
// (catches Graph errors internally) so the .catch is defensive only.
await sendEmail({
to: device.owner_email,
subject,
text: body,
html: buildAlertHtml(device.owner_name, subject, body),
}).catch(e => console.error('[ALERT] sendEmail rejected unexpectedly:', e.message));
// Log activity. Phase 2.2 writer-leak fix: stamp workspace_id from the
// device so the row is tenant-queryable.
try {
db.prepare(
'INSERT INTO activity_log (user_id, device_id, action, details, workspace_id) VALUES (?, ?, ?, ?, ?)'
).run(device.user_id, device.id, 'alert:device_offline', `${device.name} offline for ${offlineMinutes}m`, device.workspace_id || null);
} catch {}
}
}
// Clear notifications for devices that came back online
const onlineDevices = db.prepare("SELECT id FROM devices WHERE status = 'online'").all();
for (const device of onlineDevices) {
alertLastSent.delete(`device_offline:${device.id}`);
}
}
// ScreenTinker-branded HTML body for alert emails. Owns the visual template
// previously inlined in the webhook payload at sendEmailAlert.
function buildAlertHtml(recipientName, subject, body) {
return `<div style="font-family:sans-serif;max-width:600px;margin:0 auto;padding:20px">
<h2 style="color:#3b82f6">ScreenTinker Alert</h2>
<p>Hi ${escapeHtml(recipientName || 'there')},</p>
<div style="background:#f1f5f9;padding:16px;border-radius:8px;margin:16px 0">
<strong>${escapeHtml(subject)}</strong><br><br>
${escapeHtml(body).replace(/\n/g, '<br>')}
</div>
<p style="color:#94a3b8;font-size:12px">You're receiving this because you have email alerts enabled in ScreenTinker.</p>
</div>`;
}
function escapeHtml(s) {
return String(s ?? '').replace(/[&<>"']/g, c =>
({ '&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;' }[c]));
}
// Legacy export name preserved - some other modules may still call this.
// Internally delegates to sendEmail() with the ScreenTinker HTML template.
function sendEmailAlert(to, name, { subject, body }) {
return sendEmail({
to,
subject,
text: body,
html: buildAlertHtml(name, subject, body),
});
}
module.exports = { startAlertService, sendEmailAlert };