From 00bcbc5b2131db7553503785d30646a740efdfa5 Mon Sep 17 00:00:00 2001 From: Boden Date: Fri, 29 May 2026 14:16:10 -0500 Subject: [PATCH] feat(scrape): add verify-operator-ready host checks One command validates compose, auth, config, and seeded archives before bootstrap or cron. Includes offline smoke test (14 smokes total). --- .docs/Recurring-Scrape-Setup.md | 3 +- .github/workflows/main.yml | 4 +- ...-29-022-feat-operator-ready-verify-plan.md | 28 +++++ docs/recurring-scrape-merge-readiness.md | 7 ++ docs/recurring-scrape-operator-checklist.md | 1 + scripts/tests/verify-operator-ready-smoke.sh | 59 +++++++++ scripts/verify-operator-ready.sh | 119 ++++++++++++++++++ 7 files changed, 219 insertions(+), 2 deletions(-) create mode 100644 docs/plans/2026-05-29-022-feat-operator-ready-verify-plan.md create mode 100755 scripts/tests/verify-operator-ready-smoke.sh create mode 100755 scripts/verify-operator-ready.sh diff --git a/.docs/Recurring-Scrape-Setup.md b/.docs/Recurring-Scrape-Setup.md index ac7f7705..ad53ec59 100644 --- a/.docs/Recurring-Scrape-Setup.md +++ b/.docs/Recurring-Scrape-Setup.md @@ -11,7 +11,7 @@ This guide walks you through setting up automated recurring Discord exports usin ## Quick Start -**Fastest path:** `./scripts/bootstrap-recurring-scrape.sh` (see [operator checklist](../docs/recurring-scrape-operator-checklist.md)). +**Fastest path:** `./scripts/verify-operator-ready.sh` then `./scripts/bootstrap-recurring-scrape.sh` (see [operator checklist](../docs/recurring-scrape-operator-checklist.md)). **Append-only contract (read first)** @@ -356,6 +356,7 @@ With Docker/Podman, include the container smoke: | `bootstrap-recurring-scrape-smoke.sh` | yes | Bootstrap dry-run | | `audit-archive-json-smoke.sh` | yes | Invalid JSON detection | | `prove-incremental-append-smoke.sh` | yes | Offline prove snapshot/compare | +| `verify-operator-ready-smoke.sh` | yes | Host prerequisite checks | | `container-smoke.sh` | no (local) | Docker build + `help` / `list-targets`; use `--include-container` | GitHub Actions runs `./scripts/run-all-smokes.sh` via `.github/workflows/main.yml` job `recurring-scrape-smoke`. diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0d978a58..896c313d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -65,7 +65,9 @@ jobs: run: sudo apt-get update && sudo apt-get install -y jq - name: Run recurring scrape smoke tests - run: ./scripts/run-all-smokes.sh + run: | + chmod +x scripts/*.sh scripts/tests/*.sh + ./scripts/run-all-smokes.sh test: # Tests need access to secrets, so we can't run them against PRs because of limited trust diff --git a/docs/plans/2026-05-29-022-feat-operator-ready-verify-plan.md b/docs/plans/2026-05-29-022-feat-operator-ready-verify-plan.md new file mode 100644 index 00000000..5288317d --- /dev/null +++ b/docs/plans/2026-05-29-022-feat-operator-ready-verify-plan.md @@ -0,0 +1,28 @@ +--- +title: feat: Operator-ready verification script +type: feat +status: complete +date: 2026-05-29 +origin: Repeated /lfg — close loop with host prerequisite checks before cron/scrape +--- + +# feat: Operator-ready verification script + +## Summary + +Add `verify-operator-ready.sh` to check jq, container runtime, auth, config, and seeded archives in one command. Wire into docs and a smoke test. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `scripts/verify-operator-ready.sh` checks jq, docker compose or podman compose, token presence, valid config | +| R2 | Reports per enabled target: output_dir exists, JSON count, channel-map status | +| R3 | `--preflight TARGET` optional single-target Discord preflight | +| R4 | `scripts/tests/verify-operator-ready-smoke.sh` offline smoke | +| R5 | Document in merge-readiness and operator checklist | + +## Verification + +- `./scripts/tests/verify-operator-ready-smoke.sh` +- `./scripts/run-all-smokes.sh` diff --git a/docs/recurring-scrape-merge-readiness.md b/docs/recurring-scrape-merge-readiness.md index 59123b5c..c7bd2d1b 100644 --- a/docs/recurring-scrape-merge-readiness.md +++ b/docs/recurring-scrape-merge-readiness.md @@ -22,12 +22,19 @@ Fork branch `feat/recurring-cli-scrape` adds append-only, Docker-based increment ## Operator quick path ```bash +./scripts/verify-operator-ready.sh cp scrape.env.example scrape.env # or ./scripts/sync-token-from-gui.sh --force ./scripts/bootstrap-recurring-scrape.sh ./scripts/run-documents-scrape.sh ./scripts/setup-cron.sh --dry-run ``` +Optional Discord probe for one target: + +```bash +./scripts/verify-operator-ready.sh --preflight KotOR_discord_msgs +``` + Detail: [.docs/Recurring-Scrape-Setup.md](../.docs/Recurring-Scrape-Setup.md) · [operator checklist](recurring-scrape-operator-checklist.md) · [troubleshooting](../.docs/Recurring-Scrape-Troubleshooting.md) ## CI note (fork PRs) diff --git a/docs/recurring-scrape-operator-checklist.md b/docs/recurring-scrape-operator-checklist.md index 1817a62a..5aac4f9a 100644 --- a/docs/recurring-scrape-operator-checklist.md +++ b/docs/recurring-scrape-operator-checklist.md @@ -4,6 +4,7 @@ Use this after cloning or opening the **source** repo (`DiscordChatExporter`, no ## One-time setup +0. `./scripts/verify-operator-ready.sh` — jq, compose, auth, and archive folders. 1. `cp scrape.env.example scrape.env` and set `DISCORD_TOKEN`, or `./scripts/sync-token-from-gui.sh --force` (reads GUI `Settings.dat`). 2. `./scripts/bootstrap-recurring-scrape.sh --dry-run` — confirm every **enabled** target has seeded JSON under `output_dir`. 3. `./scripts/bootstrap-recurring-scrape.sh` — verify archives, build image, preflight Discord. diff --git a/scripts/tests/verify-operator-ready-smoke.sh b/scripts/tests/verify-operator-ready-smoke.sh new file mode 100755 index 00000000..d1e25866 --- /dev/null +++ b/scripts/tests/verify-operator-ready-smoke.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P) +VERIFY="$REPO_ROOT/scripts/verify-operator-ready.sh" +TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-op-ready-smoke.XXXXXX") +ARCHIVE_ROOT="$TMP_DIR/archive" +CONFIG_PATH="$TMP_DIR/config.json" +ENV_PATH="$TMP_DIR/scrape.env" +FAKE_DOCKER="$TMP_DIR/docker" +PATH_BACKUP="$PATH" + +cleanup() { + export PATH="$PATH_BACKUP" + rm -rf "$TMP_DIR" +} +trap cleanup EXIT + +mkdir -p "$ARCHIVE_ROOT/demo" +printf '{"messages":[{"id":"1"}],"channel":{"id":"111111111111111111"}}\n' \ + >"$ARCHIVE_ROOT/demo/Guild - general [111111111111111111].json" + +cat >"$CONFIG_PATH" <"$ENV_PATH" + +cat >"$FAKE_DOCKER" <<'EOF' +#!/usr/bin/env bash +if [[ "${1:-}" == "compose" && "${2:-}" == "version" ]]; then + exit 0 +fi +exit 1 +EOF +chmod +x "$FAKE_DOCKER" +export PATH="$TMP_DIR:$PATH_BACKUP" + +DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \ + "$VERIFY" --config "$CONFIG_PATH" + +if DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \ + "$VERIFY" --config "$CONFIG_PATH" --preflight demo 2>/dev/null; then + printf 'ERROR: preflight should fail without real container/token\n' >&2 + exit 1 +fi + +printf 'verify-operator-ready-smoke: ok\n' diff --git a/scripts/verify-operator-ready.sh b/scripts/verify-operator-ready.sh new file mode 100755 index 00000000..987a96cd --- /dev/null +++ b/scripts/verify-operator-ready.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) +REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" +CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}" +ENV_FILE="${DCE_ENV_FILE:-$REPO_ROOT/scrape.env}" +HOST_RUNNER="$REPO_ROOT/scripts/run-discord-scrape-host.sh" +VERIFY_ARCHIVES="$REPO_ROOT/scripts/verify-documents-archives.sh" +DISCOVER="$REPO_ROOT/scripts/discover-discord-token.sh" +PREFLIGHT_TARGET="" + +usage() { + cat <&2 + exit 1 +} + +require_command() { + command -v "$1" >/dev/null 2>&1 || die "Required command '$1' is missing." +} + +resolve_compose() { + if [[ -n "${DCE_COMPOSE_BIN:-}" ]]; then + printf 'compose: %s\n' "$DCE_COMPOSE_BIN" + return 0 + fi + if command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1; then + printf 'compose: docker compose\n' + return 0 + fi + if command -v docker-compose >/dev/null 2>&1; then + printf 'compose: docker-compose\n' + return 0 + fi + if command -v podman >/dev/null 2>&1 && podman compose version >/dev/null 2>&1; then + printf 'compose: podman compose\n' + return 0 + fi + die "Install Docker or Podman with compose support." +} + +check_auth() { + if [[ -f "$ENV_FILE" ]] && grep -qE '^[[:space:]]*DISCORD_TOKEN=' "$ENV_FILE"; then + printf 'auth: scrape.env has DISCORD_TOKEN\n' + return 0 + fi + if [[ -n "${DISCORD_TOKEN:-}" ]]; then + printf 'auth: DISCORD_TOKEN exported in environment\n' + return 0 + fi + if [[ -x "$DISCOVER" ]]; then + local token + token=$("$DISCOVER" 2>/dev/null || true) + if [[ -n "$token" ]]; then + printf 'auth: token discoverable (GUI or config paths)\n' + return 0 + fi + fi + die "No Discord token: set scrape.env, export DISCORD_TOKEN, or sync from GUI." +} + +main() { + while (($#)); do + case "$1" in + --config) + [[ $# -ge 2 ]] || die "Missing value for --config." + CONFIG_PATH=$2 + shift 2 + ;; + --preflight) + [[ $# -ge 2 ]] || die "Missing value for --preflight." + PREFLIGHT_TARGET=$2 + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + die "Unknown option: $1" + ;; + esac + done + + require_command jq + [[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH" + jq empty "$CONFIG_PATH" >/dev/null 2>&1 || die "Invalid JSON config: $CONFIG_PATH" + + printf 'Operator readiness checks\n' + printf '=========================\n' + resolve_compose + check_auth + printf 'config: %s\n\n' "$CONFIG_PATH" + + DCE_PRIMARY_CONFIG="$CONFIG_PATH" "$VERIFY_ARCHIVES" --config "$CONFIG_PATH" + + if [[ -n "$PREFLIGHT_TARGET" ]]; then + printf '\nRunning preflight for target %s...\n' "$PREFLIGHT_TARGET" + "$HOST_RUNNER" preflight --config /config/scrape-targets.json --target "$PREFLIGHT_TARGET" + fi + + printf '\nOperator ready. Next:\n' + printf ' ./scripts/run-documents-scrape.sh\n' + printf ' ./scripts/setup-cron.sh --dry-run\n' +} + +main "$@"