From 2b39a721a96b4c7a1df708adeba0f6319ce23907 Mon Sep 17 00:00:00 2001 From: Boden Date: Fri, 29 May 2026 13:59:04 -0500 Subject: [PATCH] feat(scrape): add bootstrap-recurring-scrape one-shot operator flow Verify archives, build compose image, and preflight in one script. Forward scrape-here --help; add scrape-here-smoke to CI. --- .github/workflows/main.yml | 1 + Readme.md | 2 +- ...-05-29-014-feat-operator-bootstrap-plan.md | 42 +++++ scripts/bootstrap-recurring-scrape.sh | 144 ++++++++++++++++++ scripts/scrape-here.sh | 7 + scripts/tests/scrape-here-smoke.sh | 23 +++ 6 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 docs/plans/2026-05-29-014-feat-operator-bootstrap-plan.md create mode 100755 scripts/bootstrap-recurring-scrape.sh create mode 100755 scripts/tests/scrape-here-smoke.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 010e4531..b3af227e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -76,6 +76,7 @@ jobs: ./scripts/tests/gh-approve-pr-runs-smoke.sh ./scripts/tests/documents-scrape-smoke.sh ./scripts/tests/verify-documents-auth-smoke.sh + ./scripts/tests/scrape-here-smoke.sh test: # Tests need access to secrets, so we can't run them against PRs because of limited trust diff --git a/Readme.md b/Readme.md index 38dc3571..b4a677b4 100644 --- a/Readme.md +++ b/Readme.md @@ -81,7 +81,7 @@ To learn more about the war and how you can help, [click here](https://tyrrrz.me ## See also -- [**Recurring Exports**](.docs/Recurring-Scrape-Setup.md) — automated scheduled exports using cron (Linux/macOS). If you only have the GUI zip (`DiscordChatExporter.linux-x64`), use `scripts/scrape-here.sh` from the source repository (build with `docker compose build`, then `./scripts/scrape-here.sh scrape`). +- [**Recurring Exports**](.docs/Recurring-Scrape-Setup.md) — automated scheduled exports using cron (Linux/macOS). From the source repo run `./scripts/bootstrap-recurring-scrape.sh` (verify, build, preflight). If you only have the GUI zip (`DiscordChatExporter.linux-x64`), use `./bootstrap-recurring-scrape.sh` or `scripts/scrape-here.sh` in the sibling source repository. - [**Documented solutions**](docs/solutions/) — searchable learnings (append-only scrape, Docker/cron workflow); YAML frontmatter: `module`, `tags`, `problem_type` - [**Chat Analytics**](https://github.com/mlomb/chat-analytics) — solution for analyzing chat patterns of Discord users, using exports produced by **DiscordChatExporter**. - [**DiscordChatExporter-frontend**](https://github.com/slatinsky/DiscordChatExporter-frontend) — convenient viewer for exports produced by **DiscordChatExporter**. diff --git a/docs/plans/2026-05-29-014-feat-operator-bootstrap-plan.md b/docs/plans/2026-05-29-014-feat-operator-bootstrap-plan.md new file mode 100644 index 00000000..3737f176 --- /dev/null +++ b/docs/plans/2026-05-29-014-feat-operator-bootstrap-plan.md @@ -0,0 +1,42 @@ +--- +title: feat: One-command recurring scrape bootstrap +type: feat +status: completed +date: 2026-05-29 +origin: LFG — close operator gap between GUI zip workspace and first successful scrape/cron +--- + +# feat: One-command recurring scrape bootstrap + +## Summary + +Provide `bootstrap-recurring-scrape.sh` so operators run verify → docker build → preflight in one command, plus smoke coverage and GUI-workspace stubs. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `bootstrap-recurring-scrape.sh` supports `--dry-run`, `--skip-build`, `--target` | +| R2 | `scrape-here.sh` forwards `--help` to container scrape help | +| R3 | `scrape-here-smoke.sh` in CI recurring-scrape-smoke job | +| R4 | GUI zip folder has executable bootstrap stub | +| R5 | All existing smoke tests still pass | + +## Implementation Units + +### U1. Bootstrap script + +**Files:** `scripts/bootstrap-recurring-scrape.sh` + +### U2. Launcher + smoke + +**Files:** `scripts/scrape-here.sh`, `scripts/tests/scrape-here-smoke.sh`, `.github/workflows/main.yml` + +### U3. GUI workspace stub + +**Files:** `../DiscordChatExporter.linux-x64/bootstrap-recurring-scrape.sh` + +## Verification + +- `./scripts/bootstrap-recurring-scrape.sh --dry-run` +- `./scripts/tests/scrape-here-smoke.sh` diff --git a/scripts/bootstrap-recurring-scrape.sh b/scripts/bootstrap-recurring-scrape.sh new file mode 100755 index 00000000..723a5d82 --- /dev/null +++ b/scripts/bootstrap-recurring-scrape.sh @@ -0,0 +1,144 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) +REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" +CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}" +ENV_FILE="${DCE_ENV_FILE:-$REPO_ROOT/scrape.env}" +COMPOSE_FILE="${DCE_COMPOSE_FILE:-$REPO_ROOT/docker-compose.yml}" +HOST_RUNNER="$REPO_ROOT/scripts/run-discord-scrape-host.sh" +VERIFY_SCRIPT="$REPO_ROOT/scripts/verify-documents-archives.sh" +SETUP_AUTH="$REPO_ROOT/scripts/setup-scrape-auth.sh" + +DRY_RUN=0 +SKIP_BUILD=0 +TARGETS=() + +usage() { + cat <&2 + exit 1 +} + +require_program() { + command -v "$1" >/dev/null 2>&1 || die "Required command '$1' is missing." +} + +resolve_compose() { + if [[ -n "${DCE_COMPOSE_BIN:-}" ]]; then + COMPOSE_BIN=("$DCE_COMPOSE_BIN") + return 0 + fi + if command -v docker-compose >/dev/null 2>&1; then + COMPOSE_BIN=(docker-compose) + return 0 + fi + if command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1; then + COMPOSE_BIN=(docker compose) + return 0 + fi + if command -v podman >/dev/null 2>&1 && podman compose version >/dev/null 2>&1; then + COMPOSE_BIN=(podman compose) + return 0 + fi + die "Install Docker or Podman with compose support." +} + +main() { + while (($#)); do + case "$1" in + --dry-run) + DRY_RUN=1 + shift + ;; + --skip-build) + SKIP_BUILD=1 + shift + ;; + --target) + [[ $# -ge 2 ]] || die "Missing value for --target." + TARGETS+=("$2") + shift 2 + ;; + --config) + [[ $# -ge 2 ]] || die "Missing value for --config." + CONFIG_PATH=$2 + shift 2 + ;; + --env-file) + [[ $# -ge 2 ]] || die "Missing value for --env-file." + ENV_FILE=$2 + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + die "Unknown option: $1" + ;; + esac + done + + require_program jq + [[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH" + + "$VERIFY_SCRIPT" --config "$CONFIG_PATH" + + if (( DRY_RUN == 1 )); then + printf 'Dry run complete: archive paths verified under configured output_dir values.\n' + printf 'Next: cp scrape.env.example scrape.env, set DISCORD_TOKEN, then rerun without --dry-run.\n' + exit 0 + fi + + if (( SKIP_BUILD == 0 )); then + resolve_compose + (cd "$REPO_ROOT" && "${COMPOSE_BIN[@]}" -f "$COMPOSE_FILE" build) + fi + + if [[ -n "${DISCORD_TOKEN:-}" || -n "${DISCORD_TOKEN_FILE:-}" ]]; then + "$SETUP_AUTH" --env-file "$ENV_FILE" 2>/dev/null || true + fi + + [[ -f "$ENV_FILE" ]] || die "Missing $ENV_FILE. Copy scrape.env.example or export DISCORD_TOKEN and run scripts/setup-scrape-auth.sh." + + local -a preflight_args=("$HOST_RUNNER" --env-file "$ENV_FILE" --compose-file "$COMPOSE_FILE" preflight) + local target + for target in "${TARGETS[@]}"; do + preflight_args+=(--target "$target") + done + + "${preflight_args[@]}" + + printf '\nBootstrap complete.\n' + printf ' Scrape now: %s\n' "$REPO_ROOT/scripts/run-documents-scrape.sh" + printf ' Install cron: %s --dry-run\n' "$REPO_ROOT/scripts/setup-cron.sh" +} + +main "$@" diff --git a/scripts/scrape-here.sh b/scripts/scrape-here.sh index f162a09b..33ea5da2 100755 --- a/scripts/scrape-here.sh +++ b/scripts/scrape-here.sh @@ -3,4 +3,11 @@ set -Eeuo pipefail REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd -P) + +case "${1:-}" in + --help|-h|help) + exec "$REPO_ROOT/scripts/run-discord-scrape.sh" help + ;; +esac + exec "$REPO_ROOT/scripts/run-discord-scrape-host.sh" "$@" diff --git a/scripts/tests/scrape-here-smoke.sh b/scripts/tests/scrape-here-smoke.sh new file mode 100755 index 00000000..378839fe --- /dev/null +++ b/scripts/tests/scrape-here-smoke.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P) +SCRAPE_HERE="$REPO_ROOT/scripts/scrape-here.sh" + +[[ -x "$SCRAPE_HERE" ]] || { + printf 'scrape-here.sh is not executable\n' >&2 + exit 1 +} + +"$SCRAPE_HERE" --help | grep -q 'run-discord-scrape.sh' || { + printf 'scrape-here --help did not show scrape subcommand help\n' >&2 + exit 1 +} + +if "$SCRAPE_HERE" not-a-subcommand 2>/dev/null; then + printf 'expected failure for unknown subcommand\n' >&2 + exit 1 +fi + +printf 'scrape-here-smoke: ok\n'