diff --git a/.docs/Recurring-Scrape-Setup.md b/.docs/Recurring-Scrape-Setup.md index 3737ee87..42f5242a 100644 --- a/.docs/Recurring-Scrape-Setup.md +++ b/.docs/Recurring-Scrape-Setup.md @@ -141,6 +141,13 @@ Once preflight passes, install the recurring export: This creates a managed cron entry that runs monthly (default). The entry can be updated or removed later. +For KotOR yes_general or other post-OOM catch-up, add `--salvage-before-scrape` so each run merges stale `.dce-temp` exports before incremental scrape: + +```bash +./scripts/setup-cron.sh --config config/scrape-targets.json \ + --target KotOR_discord_msgs --channel 221726893064454144 --salvage-before-scrape +``` + ### 5. Verify Installation Check that the cron job was installed: diff --git a/docs/plans/2026-06-04-081-feat-cron-salvage-before-scrape-plan.md b/docs/plans/2026-06-04-081-feat-cron-salvage-before-scrape-plan.md new file mode 100644 index 00000000..28efdd6c --- /dev/null +++ b/docs/plans/2026-06-04-081-feat-cron-salvage-before-scrape-plan.md @@ -0,0 +1,51 @@ +--- +title: "feat: Cron opt-in salvage-before-scrape" +type: feat +status: complete +date: 2026-06-04 +origin: /lfg — plan 080 deferred --salvage-before-scrape on cron installs +--- + +# feat: Cron opt-in salvage-before-scrape + +## Summary + +Add `--salvage-before-scrape` to `setup-cron.sh` so scheduled jobs can merge stale `.dce-temp` exports before incremental scrape (recommended for KotOR catch-up after OOM). + +## Problem Frame + +Operators use `--salvage-before-scrape` manually on documents scrape and validation; monthly cron (plan 079) runs plain documents scrape without salvage, leaving partial temps unmerged until a manual pass. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `setup-cron.sh --salvage-before-scrape` appends flag to documents scrape cron command | +| R2 | Default install unchanged (no salvage unless flag passed) | +| R3 | Usage and examples document the flag | +| R4 | `setup-cron-smoke.sh` dry-run asserts flag in preview when passed | +| R5 | Docs note KotOR/cron salvage opt-in | +| R6 | `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` → 23/23 | + +## Implementation Units + +### U1. setup-cron.sh + +**Files:** `scripts/setup-cron.sh`, `scripts/tests/setup-cron-smoke.sh` + +### U2. Docs + +**Files:** `docs/recurring-scrape-merge-readiness.md`, `docs/recurring-scrape-operator-checklist.md`, `.docs/Recurring-Scrape-Setup.md` + +## Verification + +```bash +DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh +``` + +## Scope Boundaries + +### Deferred + +- Live KotOR catch-up on host +- Prune stale duplicate Latest blocks from PR body diff --git a/docs/recurring-scrape-merge-readiness.md b/docs/recurring-scrape-merge-readiness.md index c2dfd905..14006464 100644 --- a/docs/recurring-scrape-merge-readiness.md +++ b/docs/recurring-scrape-merge-readiness.md @@ -191,6 +191,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh \ **Plan 080 (2026-06-04):** PR #1538 body refreshed with compact plans 070–079 operator delta and 23/23 gate. +**Plan 081 (2026-06-04):** `setup-cron.sh --salvage-before-scrape` opt-in for scheduled salvage-before incremental scrape. + **Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom. ## CI note (fork PRs) diff --git a/docs/recurring-scrape-operator-checklist.md b/docs/recurring-scrape-operator-checklist.md index 3a0cf691..bebf1599 100644 --- a/docs/recurring-scrape-operator-checklist.md +++ b/docs/recurring-scrape-operator-checklist.md @@ -31,6 +31,7 @@ Installed jobs are marked `# BEGIN discord-scrape` in `crontab -l`. Logs append ./scripts/run-documents-scrape.sh --target KotOR_discord_msgs ./scripts/run-documents-scrape.sh --target KotOR_discord_msgs --channel CHANNEL_ID ./scripts/setup-cron.sh --target KotOR_discord_msgs --channel CHANNEL_ID +# After OOM partials: add --salvage-before-scrape so cron merges stale .dce-temp before scrape ``` ## Scrape lock and salvage diff --git a/scripts/setup-cron.sh b/scripts/setup-cron.sh index ca81f580..9e7695fe 100755 --- a/scripts/setup-cron.sh +++ b/scripts/setup-cron.sh @@ -17,6 +17,7 @@ CRON_EXPRESSION="" DRY_RUN=0 REMOVE=0 SKIP_PREFLIGHT=0 +SALVAGE_BEFORE=0 TARGETS=() GUILDS=() @@ -48,6 +49,7 @@ Options: --log-file PATH Cron log file. Default: $LOG_FILE --config PATH Scrape targets JSON. Default: $CONFIG_FILE --env-file PATH Compose env file. Default: $ENV_FILE + --salvage-before-scrape Cron job merges stale .dce-temp exports before incremental scrape --skip-preflight Install the cron job without running the authenticated container preflight. --dry-run Print the cron block instead of installing it. --remove Remove the managed cron block and exit. @@ -56,7 +58,7 @@ Options: Examples: $(basename "$0") $(basename "$0") --target discord_dms --interval weekly --at 02:30 - $(basename "$0") --target Cline --channel 123456789012345678 --channel 234567890123456789 + $(basename "$0") --target KotOR_discord_msgs --channel 221726893064454144 --salvage-before-scrape EOF } @@ -263,6 +265,10 @@ main() { SKIP_PREFLIGHT=1 shift ;; + --salvage-before-scrape) + SALVAGE_BEFORE=1 + shift + ;; --dry-run) DRY_RUN=1 shift @@ -341,6 +347,9 @@ main() { --log-file "$LOG_FILE" ) append_target_args scrape_args + if (( SALVAGE_BEFORE == 1 )); then + scrape_args+=(--salvage-before-scrape) + fi scrape_command=$(printf '%q ' "${scrape_args[@]}") if command -v flock >/dev/null 2>&1; then lock_prefix=$(printf '%q ' "$(command -v flock)" "-n" "/tmp/${JOB_NAME}.lock") diff --git a/scripts/tests/setup-cron-smoke.sh b/scripts/tests/setup-cron-smoke.sh index f0fa828d..cf023a96 100755 --- a/scripts/tests/setup-cron-smoke.sh +++ b/scripts/tests/setup-cron-smoke.sh @@ -141,4 +141,10 @@ fi preview_custom_cron=$(run_setup --cron "15 03 * * 0" --skip-preflight --dry-run) grep -q '^15 03 \* \* 0 ' <<<"$preview_custom_cron" || { echo "expected validated custom cron in dry-run output" >&2; exit 1; } +salvage_preview=$(run_setup --salvage-before-scrape --skip-preflight --dry-run) +grep -q -- '--salvage-before-scrape' <<<"$salvage_preview" || { + echo "expected --salvage-before-scrape in cron dry-run preview" >&2 + exit 1 +} + echo "setup-cron smoke test passed"