From c0818715a8940b9031d5993943de731193cb2a7f Mon Sep 17 00:00:00 2001 From: Boden Date: Fri, 29 May 2026 16:03:22 -0500 Subject: [PATCH] feat(scrape): add operator-handoff verification script Single entrypoint runs disk summary, verify-operator-ready, and run-documents-scrape --dry-run before cron or full scrapes. --- ...9-028-feat-operator-handoff-script-plan.md | 29 ++++++ docs/recurring-scrape-merge-readiness.md | 1 + docs/recurring-scrape-operator-checklist.md | 3 +- scripts/operator-handoff.sh | 92 +++++++++++++++++++ scripts/tests/operator-handoff-smoke.sh | 42 +++++++++ 5 files changed, 166 insertions(+), 1 deletion(-) create mode 100644 docs/plans/2026-05-29-028-feat-operator-handoff-script-plan.md create mode 100755 scripts/operator-handoff.sh create mode 100755 scripts/tests/operator-handoff-smoke.sh diff --git a/docs/plans/2026-05-29-028-feat-operator-handoff-script-plan.md b/docs/plans/2026-05-29-028-feat-operator-handoff-script-plan.md new file mode 100644 index 00000000..feb4700e --- /dev/null +++ b/docs/plans/2026-05-29-028-feat-operator-handoff-script-plan.md @@ -0,0 +1,29 @@ +--- +title: feat: Single operator handoff verification script +type: feat +status: complete +date: 2026-05-29 +origin: /lfg — recurring scrape stack complete; one command for pre-cron verification +--- + +# feat: Single operator handoff verification script + +## Summary + +Provide `./scripts/operator-handoff.sh` that runs the canonical offline checks (disk, archives, optional dry-run) and prints a clear pass/fail summary before monthly cron or a full scrape. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | Script runs `verify-operator-ready.sh` (full check, not only disk) | +| R2 | Script runs `run-documents-scrape.sh --dry-run` | +| R3 | Prints `df` for archive_root and repo root | +| R4 | `--skip-df` and `--config PATH` supported | +| R5 | `operator-handoff-smoke.sh` validates exit 0 with fixture config | +| R6 | Operator checklist links to the script | + +## Verification + +- `./scripts/tests/operator-handoff-smoke.sh` +- `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` diff --git a/docs/recurring-scrape-merge-readiness.md b/docs/recurring-scrape-merge-readiness.md index ea7dd7e2..6350781b 100644 --- a/docs/recurring-scrape-merge-readiness.md +++ b/docs/recurring-scrape-merge-readiness.md @@ -22,6 +22,7 @@ Fork branch `feat/recurring-cli-scrape` adds append-only, Docker-based increment ## Operator quick path ```bash +./scripts/operator-handoff.sh # disk + verify + archive dry-run ./scripts/verify-operator-ready.sh cp scrape.env.example scrape.env # or ./scripts/sync-token-from-gui.sh --force ./scripts/bootstrap-recurring-scrape.sh diff --git a/docs/recurring-scrape-operator-checklist.md b/docs/recurring-scrape-operator-checklist.md index 5aac4f9a..654ab8ac 100644 --- a/docs/recurring-scrape-operator-checklist.md +++ b/docs/recurring-scrape-operator-checklist.md @@ -4,7 +4,8 @@ Use this after cloning or opening the **source** repo (`DiscordChatExporter`, no ## One-time setup -0. `./scripts/verify-operator-ready.sh` — jq, compose, auth, and archive folders. +0. `./scripts/operator-handoff.sh` — recommended: disk summary, verify-operator-ready, and documents dry-run in one step. + Or `./scripts/verify-operator-ready.sh` alone for prerequisites only. 1. `cp scrape.env.example scrape.env` and set `DISCORD_TOKEN`, or `./scripts/sync-token-from-gui.sh --force` (reads GUI `Settings.dat`). 2. `./scripts/bootstrap-recurring-scrape.sh --dry-run` — confirm every **enabled** target has seeded JSON under `output_dir`. 3. `./scripts/bootstrap-recurring-scrape.sh` — verify archives, build image, preflight Discord. diff --git a/scripts/operator-handoff.sh b/scripts/operator-handoff.sh new file mode 100755 index 00000000..40648737 --- /dev/null +++ b/scripts/operator-handoff.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) +REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" +CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}" +VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh" +DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh" +SKIP_DF=0 + +usage() { + cat <&2 + exit 1 +} + +print_disk_summary() { + local archive_root path + + require_command jq + archive_root=$(jq -r '.archive_root // empty' "$CONFIG_PATH") + [[ -n "$archive_root" && "$archive_root" != null ]] || die "Config is missing archive_root." + + printf 'Disk summary\n' + printf '============\n' + for path in "$archive_root" "$REPO_ROOT"; do + [[ -e "$path" ]] || continue + df -hP "$path" | awk 'NR==1 || NR==2 {print}' + printf '\n' + done +} + +require_command() { + command -v "$1" >/dev/null 2>&1 || die "Required command '$1' is missing." +} + +main() { + while (($#)); do + case "$1" in + --config) + [[ $# -ge 2 ]] || die "Missing value for --config." + CONFIG_PATH=$2 + shift 2 + ;; + --skip-df) + SKIP_DF=1 + shift + ;; + --help|-h) + usage + exit 0 + ;; + *) + die "Unknown option: $1" + ;; + esac + done + + [[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH" + + printf 'Operator handoff\n' + printf '================\n' + printf 'config: %s\n\n' "$CONFIG_PATH" + + if (( SKIP_DF == 0 )); then + print_disk_summary + fi + + "$VERIFY_READY" --config "$CONFIG_PATH" + "$DOCUMENTS_SCRAPE" --dry-run --config "$CONFIG_PATH" + + printf '\nHandoff complete. Safe to run:\n' + printf ' ./scripts/run-documents-scrape.sh\n' + printf ' ./scripts/setup-cron.sh --dry-run\n' +} + +main "$@" diff --git a/scripts/tests/operator-handoff-smoke.sh b/scripts/tests/operator-handoff-smoke.sh new file mode 100755 index 00000000..655e0701 --- /dev/null +++ b/scripts/tests/operator-handoff-smoke.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P) +HANDOFF="$REPO_ROOT/scripts/operator-handoff.sh" +TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-handoff-smoke.XXXXXX") +CONFIG_PATH="$TMP_DIR/config.json" +ENV_PATH="$TMP_DIR/scrape.env" + +cleanup() { + rm -rf "$TMP_DIR" +} +trap cleanup EXIT + +mkdir -p "$TMP_DIR/archive/demo" +printf '{"messages":[{"id":"1"}],"channel":{"id":"111111111111111111"}}\n' \ + >"$TMP_DIR/archive/demo/Guild - general [111111111111111111].json" + +cat >"$CONFIG_PATH" <"$ENV_PATH" + +DCE_MIN_FREE_MB=0 \ + DCE_CONFIG_FILE="$CONFIG_PATH" \ + DCE_ENV_FILE="$ENV_PATH" \ + "$HANDOFF" --config "$CONFIG_PATH" --skip-df | grep -q 'Handoff complete' + +printf 'operator-handoff-smoke: ok\n'