From 058aaa0960e815135dd9e35c9503e0d9a1649438 Mon Sep 17 00:00:00 2001 From: Boden Date: Fri, 29 May 2026 14:02:06 -0500 Subject: [PATCH] feat(scrape): add bootstrap CI smoke and operator checklist Document bootstrap-first workflow; verify dry-run in CI; live bootstrap preflight validated against Documents archives. --- .docs/Recurring-Scrape-Setup.md | 2 + .github/workflows/main.yml | 1 + ...29-015-feat-bootstrap-ci-checklist-plan.md | 38 ++++++++++++++ docs/recurring-scrape-operator-checklist.md | 33 +++++++++++++ .../tests/bootstrap-recurring-scrape-smoke.sh | 49 +++++++++++++++++++ 5 files changed, 123 insertions(+) create mode 100644 docs/plans/2026-05-29-015-feat-bootstrap-ci-checklist-plan.md create mode 100644 docs/recurring-scrape-operator-checklist.md create mode 100755 scripts/tests/bootstrap-recurring-scrape-smoke.sh diff --git a/.docs/Recurring-Scrape-Setup.md b/.docs/Recurring-Scrape-Setup.md index 7e8f3ff9..59efff07 100644 --- a/.docs/Recurring-Scrape-Setup.md +++ b/.docs/Recurring-Scrape-Setup.md @@ -11,6 +11,8 @@ This guide walks you through setting up automated recurring Discord exports usin ## Quick Start +**Fastest path:** `./scripts/bootstrap-recurring-scrape.sh` (see [operator checklist](../docs/recurring-scrape-operator-checklist.md)). + **Append-only contract (read first)** - Each target writes under its configured `output_dir` (for example `~/Documents/KotOR_discord_msgs/`). diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b3af227e..f76ac9a7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -77,6 +77,7 @@ jobs: ./scripts/tests/documents-scrape-smoke.sh ./scripts/tests/verify-documents-auth-smoke.sh ./scripts/tests/scrape-here-smoke.sh + ./scripts/tests/bootstrap-recurring-scrape-smoke.sh test: # Tests need access to secrets, so we can't run them against PRs because of limited trust diff --git a/docs/plans/2026-05-29-015-feat-bootstrap-ci-checklist-plan.md b/docs/plans/2026-05-29-015-feat-bootstrap-ci-checklist-plan.md new file mode 100644 index 00000000..800d30e6 --- /dev/null +++ b/docs/plans/2026-05-29-015-feat-bootstrap-ci-checklist-plan.md @@ -0,0 +1,38 @@ +--- +title: feat: Bootstrap CI smoke and operator checklist +type: feat +status: completed +date: 2026-05-29 +origin: LFG — lock operator path; CI covers bootstrap; live bootstrap verification +--- + +# feat: Bootstrap CI smoke and operator checklist + +## Summary + +Add CI smoke for `bootstrap-recurring-scrape.sh`, a short operator checklist, align docs on bootstrap-first workflow, and verify live bootstrap against `scrape.env`. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `bootstrap-recurring-scrape-smoke.sh` exercises `--help` and `--dry-run` | +| R2 | CI `recurring-scrape-smoke` job runs bootstrap smoke | +| R3 | `docs/recurring-scrape-operator-checklist.md` lists end-to-end steps | +| R4 | Recurring setup doc references bootstrap as primary entry | +| R5 | Live `./scripts/bootstrap-recurring-scrape.sh --skip-build` succeeds with existing `scrape.env` | + +## Implementation Units + +### U1. Bootstrap smoke + CI + +**Files:** `scripts/tests/bootstrap-recurring-scrape-smoke.sh`, `.github/workflows/main.yml` + +### U2. Operator checklist + docs + +**Files:** `docs/recurring-scrape-operator-checklist.md`, `.docs/Recurring-Scrape-Setup.md` + +## Verification + +- All `scripts/tests/*.sh` +- Live bootstrap (skip-build) on one target diff --git a/docs/recurring-scrape-operator-checklist.md b/docs/recurring-scrape-operator-checklist.md new file mode 100644 index 00000000..cecdd11e --- /dev/null +++ b/docs/recurring-scrape-operator-checklist.md @@ -0,0 +1,33 @@ +# Recurring scrape operator checklist + +Use this after cloning or opening the **source** repo (`DiscordChatExporter`, not the GUI zip alone). + +## One-time setup + +1. `cp scrape.env.example scrape.env` and set `DISCORD_TOKEN` (user token recommended for guild history). +2. `./scripts/bootstrap-recurring-scrape.sh --dry-run` — confirm every **enabled** target has seeded JSON under `output_dir`. +3. `./scripts/bootstrap-recurring-scrape.sh` — verify archives, build image, preflight Discord. +4. `./scripts/run-documents-scrape.sh` — first incremental append-only scrape. +5. `./scripts/prove-incremental-append.sh --target ` — optional grow-only proof. + +## Monthly automation + +```bash +./scripts/setup-cron.sh --dry-run +./scripts/setup-cron.sh +``` + +Defaults: first day of month at 04:00. Override with `--interval weekly`, `--at HH:MM`, or `--cron '0 4 1 * *'`. + +## Narrow a run + +```bash +./scripts/run-documents-scrape.sh --target KotOR_discord_msgs +./scripts/setup-cron.sh --target KotOR_discord_msgs --channel CHANNEL_ID +``` + +## GUI zip only + +See `../DiscordChatExporter.linux-x64/RECURRING-SCRAPE.md` or run `../DiscordChatExporter.linux-x64/bootstrap-recurring-scrape.sh`. + +Full detail: [.docs/Recurring-Scrape-Setup.md](../.docs/Recurring-Scrape-Setup.md) diff --git a/scripts/tests/bootstrap-recurring-scrape-smoke.sh b/scripts/tests/bootstrap-recurring-scrape-smoke.sh new file mode 100755 index 00000000..6f4d3b08 --- /dev/null +++ b/scripts/tests/bootstrap-recurring-scrape-smoke.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P) +BOOTSTRAP="$REPO_ROOT/scripts/bootstrap-recurring-scrape.sh" +TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-bootstrap-smoke.XXXXXX") + +cleanup() { + rm -rf "$TMP_DIR" +} +trap cleanup EXIT + +[[ -x "$BOOTSTRAP" ]] || { + printf 'bootstrap-recurring-scrape.sh is not executable\n' >&2 + exit 1 +} + +"$BOOTSTRAP" --help | grep -q 'bootstrap-recurring-scrape' || { + printf 'bootstrap --help missing expected text\n' >&2 + exit 1 +} + +mkdir -p "$TMP_DIR/archive/demo" +printf '{"messages":[{"id":"1","timestamp":"2020-01-01T00:00:00+00:00"}],"channel":{"id":"111111111111111111"}}\n' \ + >"$TMP_DIR/archive/demo/Guild - general [111111111111111111].json" + +cat >"$TMP_DIR/config.json" <&2 + exit 1 +} + +printf 'bootstrap-recurring-scrape-smoke: ok\n'