From 090884fed606ec9028e05e6b1f240e665ad0041d Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 24 May 2026 21:07:55 -0500 Subject: [PATCH] fix(auth): add host retry flow for discord scrape Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .docs/Docker.md | 5 +- .docs/Scheduling-Linux.md | 5 +- ...05-24-001-fix-auth-reauth-recovery-plan.md | 170 ++++++++++++++++++ scrape.env.example | 5 + scripts/setup-cron.sh | 53 ++---- .../tests/run-discord-scrape-host-smoke.sh | 84 +++++++++ scripts/tests/setup-cron-smoke.sh | 1 + 7 files changed, 278 insertions(+), 45 deletions(-) create mode 100644 docs/plans/2026-05-24-001-fix-auth-reauth-recovery-plan.md create mode 100755 scripts/tests/run-discord-scrape-host-smoke.sh diff --git a/.docs/Docker.md b/.docs/Docker.md index 959f03b1..77f6dcb2 100644 --- a/.docs/Docker.md +++ b/.docs/Docker.md @@ -84,10 +84,11 @@ This repo also ships a local recurring wrapper around the CLI for source-built a - `Dockerfile` builds `DiscordChatExporter.Cli` from source. - `docker-compose.yml` runs the wrapper container. +- `scripts/run-discord-scrape-host.sh` is the host-side entrypoint for scheduled runs. - `scripts/run-discord-scrape.sh preflight` validates token/config/target resolution without writing archives. - `scripts/run-discord-scrape.sh scrape` performs append-oriented JSON updates by exporting newer messages and merging them into the existing local archive instead of blindly replacing the destination file. -For the recurring flow, keep secrets in `scrape.env` (copied from `scrape.env.example`) and keep target/output mapping in `config/scrape-targets.json`. +For the recurring flow, keep secrets in `scrape.env` (copied from `scrape.env.example`) and keep target/output mapping in `config/scrape-targets.json`. You can provide a direct `DISCORD_TOKEN` or a `DISCORD_TOKEN_FILE` path whose first line is the token. For recurring runs, targets with `enabled: false` are skipped by default. This is the recommended way to keep unresolved archive roots in the config without blocking the rest of the schedule. @@ -95,6 +96,8 @@ If you authenticate with a **bot token**, do not rely on guild-name or DM discov `preflight` now probes one resolved channel per selected target with the source-built CLI before cron is installed. If the token cannot read that channel, setup fails closed and leaves the existing crontab untouched. +The host wrapper (`scripts/run-discord-scrape-host.sh`) classifies Discord auth failures and retries once after reloading `DISCORD_TOKEN_FILE` (if configured). Persistent auth failure still exits non-zero. + If you run the recurring flow through podman on an SELinux-enabled host, keep the bind mounts relabeled (`:z`). The checked-in `docker-compose.yml` already applies this to the recurring wrapper mounts. For rootless podman, set `DCE_USERNS_MODE=keep-id` in `scrape.env` so the mounted archive roots stay writable as your host user instead of appearing as `root:root` inside the container. Keep `DCE_UID` and `DCE_GID` matched to your host user as well. diff --git a/.docs/Scheduling-Linux.md b/.docs/Scheduling-Linux.md index d18e02d8..d7cae549 100644 --- a/.docs/Scheduling-Linux.md +++ b/.docs/Scheduling-Linux.md @@ -5,13 +5,14 @@ This repo now includes a source-built recurring wrapper around the CLI: - `scripts/setup-cron.sh` installs, previews, updates, and removes one managed cron block. +- `scripts/run-discord-scrape-host.sh` runs preflight/scrape through compose and handles one auth-refresh retry. - `Dockerfile` + `docker-compose.yml` build and run the CLI from source. - `scripts/run-discord-scrape.sh preflight` validates token/config/target resolution without writing archives. - `scripts/run-discord-scrape.sh scrape` performs append-oriented JSON updates so existing local history is retained instead of overwritten. The recommended Linux flow is: -1. Copy `scrape.env.example` to `scrape.env` and set `DISCORD_TOKEN`. +1. Copy `scrape.env.example` to `scrape.env` and set `DISCORD_TOKEN` (or set `DISCORD_TOKEN_FILE` for file-based token rotation). 2. Review `config/scrape-targets.json` and keep archive roots under the configured `archive_root`. 3. Run `./scripts/setup-cron.sh` for the default monthly schedule, or pass `--interval`, `--at`, or `--cron` to customize it. 4. Re-run the same script later to update the managed cron block idempotently. Use `--remove` to delete only the managed block. @@ -24,6 +25,8 @@ If you are using a **bot token**, do not depend on guild-name or DM discovery. B If any selected target fails that authenticated probe, `setup-cron.sh` stops without mutating the live crontab. In practice this means the token must already have access to every enabled target you expect cron to update. +For recurring runs, `setup-cron.sh` now installs a cron command that executes `scripts/run-discord-scrape-host.sh scrape ...`. The host wrapper retries once when it detects Discord auth failures (`401`/`403`) by reloading `DISCORD_TOKEN_FILE` if configured. This keeps cron non-interactive and fail-closed. + If you are running the recurring wrapper through podman on an SELinux-enabled host, keep the bind mounts relabeled (`:z`). The checked-in `docker-compose.yml` already includes that for the recurring config and archive mounts. For rootless podman, set `DCE_USERNS_MODE=keep-id` in `scrape.env` so the mounted `Documents` archive roots stay writable as your host user during scheduled runs. Keep `DCE_UID` and `DCE_GID` matched to your host user as well. diff --git a/docs/plans/2026-05-24-001-fix-auth-reauth-recovery-plan.md b/docs/plans/2026-05-24-001-fix-auth-reauth-recovery-plan.md new file mode 100644 index 00000000..fdfaceda --- /dev/null +++ b/docs/plans/2026-05-24-001-fix-auth-reauth-recovery-plan.md @@ -0,0 +1,170 @@ +--- +date: 2026-05-24 +sequence: 001 +plan_type: fix +title: Harden GitHub and Discord reauth recovery +status: active +--- + +# fix: Harden GitHub and Discord reauth recovery + +## Summary + +Ensure this workflow can recover from expired/invalid auth context instead of stopping at blockers: +1) persist and verify GitHub CLI auth from `GITHUB_TOKEN` in `~/.bashrc`, +2) add a durable Discord token refresh/reauth path for recurring scrape runs, +3) document and test the new non-destructive recovery behavior. + +--- + +## Problem Frame + +Current execution fails hard on two recurring auth conditions: +- GitHub Actions approval for cross-repo PR checks can be attempted but must fail closed when repository-admin rights are unavailable. +- Discord scrape/preflight failures (`401`/`403`) currently stop the run without an explicit automated token reload + optional interactive reauth path. + +The plan focuses on making those outcomes explicit, recoverable, and idempotent without changing append-only archive safety. + +--- + +## Scope Boundaries + +### In Scope +- Add a host-side auth-aware runner used by cron that can reload Discord token and retry once on auth failure. +- Add clear failure classification for GitHub approval attempts (permission/policy blockers vs transient CLI auth issues). +- Preserve existing append-only path guarantees and configured archive roots. +- Update docs/env examples and smoke tests for the new auth flow. + +### Out of Scope +- Circumventing Discord access policies or bypassing permissions for channels/accounts. +- Forcing upstream repository admin approvals when the authenticated GitHub user lacks required rights. + +### Deferred to Follow-Up Work +- Optional long-lived secure token broker/secret-store integration beyond env/file-based token refresh. + +--- + +## Key Technical Decisions + +- Use a **host-side wrapper script** for scheduled runs rather than embedding reauth logic only inside container runtime; this is the only place that can safely source `~/.bashrc`, invoke `gh`, and coordinate interactive browser auth when manually triggered. +- Treat Discord auth recovery as a **single bounded retry**: reload token source -> retry preflight/scrape once -> fail with explicit reason. Avoid infinite loops or silent retries. +- Keep GitHub approval behavior **truthful and explicit**: attempt via `gh api`, classify 403 admin-rights response as unresolved upstream permission blocker, and record durable status. + +--- + +## Implementation Units + +### U1. Add auth-aware host runner for recurring scrapes +**Goal:** Provide a single entrypoint cron/manual runs can call that handles Discord token reload and bounded retry behavior. + +**Requirements:** Recoverable auth flow; idempotent scheduling behavior; preserve existing archive update semantics. + +**Dependencies:** None. + +**Files:** +- `scripts/run-discord-scrape-host.sh` (new) +- `scripts/setup-cron.sh` +- `docker-compose.yml` + +**Approach:** +- Create a host runner that: + - sources configured env file and optional token file, + - calls compose preflight/scrape, + - detects Discord auth failures from wrapper output, + - triggers one token refresh path (`DISCORD_TOKEN_FILE` reread and optional reauth command), + - retries once and exits non-zero with explicit reason if still blocked. +- Update cron job line to execute the host runner instead of raw `docker compose run ... scrape`. + +**Patterns to follow:** Existing strict error handling and fail-closed style in `scripts/run-discord-scrape.sh` and `scripts/setup-cron.sh`. + +**Test scenarios:** +- Happy path: valid token runs scrape once, no retry path invoked. +- Edge: missing token file while configured triggers explicit failure before scrape. +- Error path: first scrape returns auth failure, refreshed token succeeds on retry. +- Error path: auth failure persists after retry -> hard fail without data-path mutation. +- Integration: cron-generated command uses host runner and preserves target overrides. + +**Verification:** Cron-managed runs execute through the new runner and show deterministic retry/failure logs. + +### U2. Make GitHub auth/approval handling explicit and durable +**Goal:** Ensure GitHub auth bootstrap and approval attempts are standardized and clear about resolvable vs policy blockers. + +**Requirements:** Reauth from `~/.bashrc` via `gh`; explicit classification for approval failures. + +**Dependencies:** U1 not required. + +**Files:** +- `scripts/gh-approve-pr-runs.sh` (new) +- `.docs/Docker.md` +- `.docs/Scheduling-Linux.md` + +**Approach:** +- Add a helper script that: + - sources `~/.bashrc`, validates `GITHUB_TOKEN`, performs non-interactive `gh auth login --with-token` if needed, + - attempts approval endpoints for provided run IDs, + - maps known API responses (e.g., `Must have admin rights`) to explicit unresolved-policy output and non-zero exit. +- Document expected outcomes so future runs do not misclassify policy blockers as transient auth failures. + +**Patterns to follow:** Existing CLI-first operations and explicit error messages. + +**Test scenarios:** +- Happy path: token present and `gh auth status` valid. +- Error path: missing `GITHUB_TOKEN` yields clear actionable failure. +- Error path: approval 403 admin-rights response is surfaced as upstream-policy blocker. + +**Verification:** Script output distinguishes auth misconfiguration from insufficient repository permission. + +### U3. Extend tests and docs for reauth and scheduling behavior +**Goal:** Keep regression coverage and operator docs aligned with the new auth-recovery slice. + +**Requirements:** Vertical-slice parity across scripts/tests/docs. + +**Dependencies:** U1, U2. + +**Files:** +- `scripts/tests/setup-cron-smoke.sh` +- `scripts/tests/run-discord-scrape-smoke.sh` +- `.docs/Scheduling-Linux.md` +- `.docs/Docker.md` +- `scrape.env.example` + +**Approach:** +- Add smoke coverage for cron line changes and host-runner invocation. +- Add smoke fixtures/modes for first-fail auth then successful retry and persistent auth failure. +- Document env knobs (`DISCORD_TOKEN_FILE`, optional reauth command) and operational expectations for non-interactive cron vs interactive manual recovery. + +**Patterns to follow:** Existing smoke test style and doc conventions already used for recurring wrapper features. + +**Test scenarios:** +- Happy path: cron setup remains idempotent with managed block replacement. +- Edge: dry-run preview includes host runner command and no crontab mutation. +- Error path: simulated auth failure triggers single retry only. +- Integration: docs/env example reflect actual script options and defaults. + +**Verification:** Existing smoke suite passes with new auth cases and docs match runtime behavior. + +--- + +## Risks and Mitigations + +- **Risk:** Retry logic could accidentally mutate paths or overwrite archives. + - **Mitigation:** Keep all archive merge/path logic in existing wrapper; host runner only orchestrates retries. +- **Risk:** Interactive reauth flow unusable in cron context. + - **Mitigation:** Split non-interactive token-file refresh (cron-safe) from optional manual interactive reauth command. +- **Risk:** Users assume GitHub approvals are always automatable. + - **Mitigation:** Explicitly document and emit admin-rights prerequisite when API returns policy 403. + +--- + +## System-Wide Impact + +- Scheduler path changes from direct compose invocation to host runner orchestration. +- Operator setup adds token-file/reauth options but keeps current defaults valid. +- No change to archive file format, append merge semantics, or configured root mappings. + +--- + +## Deferred Implementation Unknowns + +- Final naming of environment variables and helper script CLI flags may adjust for consistency with existing `DCE_*` naming. +- Exact stderr matching strategy for Discord auth failures may need to key off stable wrapper messages rather than raw upstream text. diff --git a/scrape.env.example b/scrape.env.example index 2b6b3b25..bac07444 100644 --- a/scrape.env.example +++ b/scrape.env.example @@ -1,5 +1,10 @@ # Copy this file to scrape.env and fill in your real values. DISCORD_TOKEN= +# Optional: file whose first line contains DISCORD_TOKEN. Useful for token rotation without editing this env file. +DISCORD_TOKEN_FILE= +# Optional (manual runs only): command to refresh Discord auth/session before one retry. +# Example: DCE_REAUTH_COMMAND="agent-browser --headed open https://discord.com/channels/@me" +DCE_REAUTH_COMMAND= TZ=UTC # Match these to the host user that should own created files. diff --git a/scripts/setup-cron.sh b/scripts/setup-cron.sh index 8e17f0f6..e6e601a1 100755 --- a/scripts/setup-cron.sh +++ b/scripts/setup-cron.sh @@ -6,6 +6,7 @@ SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" COMPOSE_FILE="${DCE_COMPOSE_FILE:-$REPO_ROOT/docker-compose.yml}" ENV_FILE="${DCE_ENV_FILE:-$REPO_ROOT/scrape.env}" +HOST_RUNNER="${DCE_HOST_RUNNER:-$REPO_ROOT/scripts/run-discord-scrape-host.sh}" CONFIG_FILE="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}" LOG_FILE="${DCE_LOG_FILE:-$REPO_ROOT/logs/discord-scrape.log}" JOB_NAME="discord-scrape" @@ -96,46 +97,9 @@ strip_existing_job() { ' <<<"$existing_crontab" } -build_compose_command() { - local subcommand=$1 +build_target_args() { local -a command_parts - if [[ -n "$COMPOSE_BIN" ]]; then - command_parts=( - "$COMPOSE_BIN" - --env-file "$ENV_FILE" - -f "$COMPOSE_FILE" - run - -T - --rm - discord-scraper - "$subcommand" - ) - elif (( DOCKER_BIN_OVERRIDDEN == 0 )) && command -v docker-compose >/dev/null 2>&1; then - command_parts=( - docker-compose - --env-file "$ENV_FILE" - -f "$COMPOSE_FILE" - run - -T - --rm - discord-scraper - "$subcommand" - ) - else - command_parts=( - "$DOCKER_BIN" - compose - --env-file "$ENV_FILE" - -f "$COMPOSE_FILE" - run - -T - --rm - discord-scraper - "$subcommand" - ) - fi - local target for target in "${TARGETS[@]}"; do command_parts+=(--target "$target") @@ -198,10 +162,11 @@ validate_targets() { } run_preflight() { - local preflight_command + local preflight_command target_args [[ -f "$ENV_FILE" ]] || die "Missing env file: $ENV_FILE" - preflight_command=$(build_compose_command preflight) + target_args=$(build_target_args) + preflight_command="$(printf '%q ' "$HOST_RUNNER") --env-file $(printf '%q' "$ENV_FILE") --compose-file $(printf '%q' "$COMPOSE_FILE") preflight ${target_args}" eval "$preflight_command" } @@ -286,6 +251,7 @@ main() { fi [[ -f "$COMPOSE_FILE" ]] || die "Missing compose file: $COMPOSE_FILE" + [[ -x "$HOST_RUNNER" ]] || die "Missing or non-executable host runner: $HOST_RUNNER" [[ -f "$CONFIG_FILE" ]] || die "Missing config file: $CONFIG_FILE" "$JQ_BIN" empty "$CONFIG_FILE" >/dev/null 2>&1 || die "Invalid JSON config: $CONFIG_FILE" @@ -304,7 +270,7 @@ main() { local begin_marker="# BEGIN ${JOB_NAME}" local end_marker="# END ${JOB_NAME}" - local current_crontab cleaned_crontab compose_command job_line lock_prefix + local current_crontab cleaned_crontab scrape_command target_args job_line lock_prefix current_crontab=$("$CRONTAB_BIN" -l 2>/dev/null || true) cleaned_crontab=$(strip_existing_job "$current_crontab" "$begin_marker" "$end_marker") @@ -325,14 +291,15 @@ main() { run_preflight fi - compose_command=$(build_compose_command scrape) + target_args=$(build_target_args) + scrape_command="$(printf '%q ' "$HOST_RUNNER") --env-file $(printf '%q' "$ENV_FILE") --compose-file $(printf '%q' "$COMPOSE_FILE") scrape ${target_args}" if command -v flock >/dev/null 2>&1; then lock_prefix=$(printf '%q ' "$(command -v flock)" "-n" "/tmp/${JOB_NAME}.lock") else lock_prefix="" fi - job_line="$cron_line cd $(printf '%q' "$REPO_ROOT") && ${lock_prefix}${compose_command}>> $(printf '%q' "$LOG_FILE") 2>&1" + job_line="$cron_line cd $(printf '%q' "$REPO_ROOT") && ${lock_prefix}${scrape_command}>> $(printf '%q' "$LOG_FILE") 2>&1" local cron_block cron_block=$(printf '%s\n%s\n%s\n' "$begin_marker" "$job_line" "$end_marker") diff --git a/scripts/tests/run-discord-scrape-host-smoke.sh b/scripts/tests/run-discord-scrape-host-smoke.sh new file mode 100755 index 00000000..eaf307d6 --- /dev/null +++ b/scripts/tests/run-discord-scrape-host-smoke.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P) +TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-host-smoke.XXXXXX") +ENV_FILE="$TMP_DIR/scrape.env" +COMPOSE_FILE="$TMP_DIR/docker-compose.yml" +FAKE_DOCKER="$TMP_DIR/docker" +CALL_COUNT="$TMP_DIR/call-count" +TOKEN_FILE="$TMP_DIR/token.txt" + +cleanup() { + rm -rf "$TMP_DIR" +} +trap cleanup EXIT + +cat >"$COMPOSE_FILE" <<'EOF' +services: + discord-scraper: + image: fake +EOF + +cat >"$FAKE_DOCKER" <<'EOF' +#!/usr/bin/env bash +set -Eeuo pipefail + +count_file=${FAKE_DOCKER_CALL_COUNT:?} +token_file=${FAKE_DOCKER_TOKEN_FILE:?} +mode=${FAKE_DOCKER_MODE:?} +count=0 +if [[ -f "$count_file" ]]; then + count=$(cat "$count_file") +fi +count=$((count + 1)) +printf '%s' "$count" >"$count_file" + +if [[ "$mode" == "auth-refresh" ]]; then + if [[ "${DISCORD_TOKEN:-}" == "stale-token" ]]; then + printf 'Authentication token is invalid.\n' >&2 + printf 'fresh-token\n' >"$token_file" + exit 1 + fi + printf 'run succeeded after refresh\n' + exit 0 +fi + +if [[ "$mode" == "auth-persistent-fail" ]]; then + printf "Request to 'channels/111' failed: forbidden.\n" >&2 + exit 1 +fi + +printf 'run succeeded\n' +EOF +chmod +x "$FAKE_DOCKER" + +run_host() { + DCE_REPO_ROOT="$REPO_ROOT" \ + DCE_DOCKER_BIN="$FAKE_DOCKER" \ + DCE_ENV_FILE="$ENV_FILE" \ + DCE_COMPOSE_FILE="$COMPOSE_FILE" \ + FAKE_DOCKER_CALL_COUNT="$CALL_COUNT" \ + FAKE_DOCKER_TOKEN_FILE="$TOKEN_FILE" \ + FAKE_DOCKER_MODE="$1" \ + "$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo +} + +printf 'stale-token\n' >"$TOKEN_FILE" +cat >"$ENV_FILE" </dev/null +[[ "$(cat "$CALL_COUNT")" == "2" ]] || { echo "expected one retry after auth failure" >&2; exit 1; } + +printf 'stale-token\n' >"$TOKEN_FILE" +printf '0' >"$CALL_COUNT" +if run_host auth-persistent-fail >/dev/null; then + echo "expected persistent auth failure to exit non-zero" >&2 + exit 1 +fi +[[ "$(cat "$CALL_COUNT")" == "2" ]] || { echo "expected exactly one retry before final failure" >&2; exit 1; } + +echo "run-discord-scrape-host smoke test passed" diff --git a/scripts/tests/setup-cron-smoke.sh b/scripts/tests/setup-cron-smoke.sh index 709e05c5..d142bed6 100755 --- a/scripts/tests/setup-cron-smoke.sh +++ b/scripts/tests/setup-cron-smoke.sh @@ -85,6 +85,7 @@ run_setup grep -q '^MAILTO=test@example.com$' "$CRONTAB_FILE" || { echo "expected unrelated crontab line to remain" >&2; exit 1; } [[ "$(grep -c '^# BEGIN discord-scrape$' "$CRONTAB_FILE")" == "1" ]] || { echo "expected exactly one managed cron block after install" >&2; exit 1; } grep -q 'compose --env-file' "$DOCKER_LOG" || { echo "expected docker preflight to run during install" >&2; exit 1; } +grep -q 'scripts/run-discord-scrape-host.sh' "$CRONTAB_FILE" || { echo "expected cron job to run host wrapper" >&2; exit 1; } run_setup [[ "$(grep -c '^# BEGIN discord-scrape$' "$CRONTAB_FILE")" == "1" ]] || { echo "expected exactly one managed cron block after reinstall" >&2; exit 1; }