From 8b54b6a498145d7adbc441b844c6e9dae4b370d4 Mon Sep 17 00:00:00 2001 From: Copilot Date: Wed, 3 Jun 2026 05:52:39 -0500 Subject: [PATCH] test(scrape): preserve-partial smoke; fix host token-file precedence Add offline regression for OOM skip preserving partial export temps. Host wrapper now prefers DISCORD_TOKEN_FILE over inherited shell tokens and always writes explicit compose env for auth-retry. All 19 smokes pass. --- ...rve-partial-smoke-kotor-validation-plan.md | 37 +++++++++++++++ docs/recurring-scrape-merge-readiness.md | 4 +- scripts/run-discord-scrape-host.sh | 28 ++++++++---- .../tests/run-discord-scrape-host-smoke.sh | 45 +++++++++++++++---- scripts/tests/run-discord-scrape-smoke.sh | 5 +++ 5 files changed, 100 insertions(+), 19 deletions(-) create mode 100644 docs/plans/2026-06-04-044-feat-preserve-partial-smoke-kotor-validation-plan.md diff --git a/docs/plans/2026-06-04-044-feat-preserve-partial-smoke-kotor-validation-plan.md b/docs/plans/2026-06-04-044-feat-preserve-partial-smoke-kotor-validation-plan.md new file mode 100644 index 00000000..326a55e8 --- /dev/null +++ b/docs/plans/2026-06-04-044-feat-preserve-partial-smoke-kotor-validation-plan.md @@ -0,0 +1,37 @@ +--- +title: "feat: Preserve-partial smoke and KotOR validation run" +type: feat +status: complete +date: 2026-06-04 +origin: /lfg — close plan 043 with regression smoke; run live KotOR validation +--- + +# feat: Preserve-partial smoke and KotOR validation run + +## Summary + +Plan 043 fixed the re-download loop but lacks offline regression for "preserve partial temp on OOM skip". Add smoke coverage, rebuild container, run KotOR validation, update merge-readiness. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | Smoke: fake CLI writes partial export for channel 134 then exits 134; temp dir preserved after SKIPPED | +| R2 | `run-discord-scrape-smoke.sh` and `run-all-smokes.sh` pass (19/19) | +| R3 | Rebuild image; start `run-operator-validation.sh --target KotOR_discord_msgs` with log | +| R4 | `docs/recurring-scrape-merge-readiness.md` updated with validation run status | +| R5 | PR #1538 body notes plan 044 | + +## Verification + +```bash +./scripts/tests/run-discord-scrape-smoke.sh +DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh +podman-compose build +DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh --target KotOR_discord_msgs --log-file logs/kotor-validation-20260604.log +``` + +## Out of scope + +- Waiting for yes_general multi-hour catch-up to finish inside LFG +- Container memory tuning diff --git a/docs/recurring-scrape-merge-readiness.md b/docs/recurring-scrape-merge-readiness.md index c512cbf3..67ad256f 100644 --- a/docs/recurring-scrape-merge-readiness.md +++ b/docs/recurring-scrape-merge-readiness.md @@ -111,7 +111,9 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh --sync-gui --per-target - | expanded_kotor_discord | pass | pass | validation-resume | | eod_discord | pass | pass | validation-resume | | DS_Discord_msgs | pass | pass | validation-resume; some channels forbidden | -| KotOR_discord_msgs | **in progress** | — | `yes_general` has ~5 years of backlog (archive cursor was Jan 2021); plan 043 preserves partial temps on OOM skip | +| KotOR_discord_msgs | **in progress** | — | plan 044 validation started 2026-06-04 (`logs/kotor-validation-20260604.log`); `yes_general` catch-up + preserve-partial smoke | + +**Plan 044 (2026-06-04):** Offline smoke asserts partial temp preserved on OOM skip (channel 134). Host wrapper always writes explicit compose env from `DISCORD_TOKEN_FILE` (fixes auth-retry when shell exports a stale `DISCORD_TOKEN`). `run-all-smokes.sh` → 19/19 pass. **KotOR / yes_general (plan 040–043):** Incremental `--after` works for all channels; most return `UNCHANGED` in seconds. `yes_general` archive last message was **2021-01-17** — the first catch-up legitimately fetches years of history. Prior bug: OOM skip **deleted** partial temp exports, causing re-download loops. Plan 043 preserves partial temps and salvages on next run. diff --git a/scripts/run-discord-scrape-host.sh b/scripts/run-discord-scrape-host.sh index a663dc76..8af05a79 100755 --- a/scripts/run-discord-scrape-host.sh +++ b/scripts/run-discord-scrape-host.sh @@ -126,7 +126,14 @@ configure_rootless_compose() { prepare_compose_env() { if [[ -f "$ENV_FILE" ]]; then load_env_file - COMPOSE_ENV_FILE="$ENV_FILE" + if [[ -n "${DISCORD_TOKEN_FILE:-}" && -f "${DISCORD_TOKEN_FILE}" ]]; then + load_token_from_file || true + elif [[ -z "${DISCORD_TOKEN:-}" ]]; then + discover_token_file || true + load_token_from_file || true + load_token_from_discover_script || true + fi + write_compose_env_temp configure_rootless_compose return 0 fi @@ -190,9 +197,13 @@ load_token_from_discover_script() { } ensure_token_present() { - if [[ -z "${DISCORD_TOKEN:-}" ]]; then + if [[ -n "${DISCORD_TOKEN_FILE:-}" && -f "${DISCORD_TOKEN_FILE}" ]]; then + load_token_from_file || true + elif [[ -z "${DISCORD_TOKEN:-}" ]]; then discover_token_file || true load_token_from_file || true + fi + if [[ -z "${DISCORD_TOKEN:-}" ]]; then load_token_from_discover_script || true fi [[ -n "${DISCORD_TOKEN:-}" ]] || die "DISCORD_TOKEN is not set. Set DISCORD_TOKEN or DISCORD_TOKEN_FILE in $ENV_FILE, export it in the shell, place a token at $REPO_ROOT/.discord-token or ~/.config/discord-scrape/token, or sign in via DiscordChatExporter GUI / Discord desktop on this machine." @@ -365,14 +376,13 @@ run_subcommand_with_retry() { printf 'Detected Discord auth failure. Refreshing token and retrying once...\n' >&2 load_token_from_file || true - load_token_from_discover_script || true - if [[ -f "$ENV_FILE" ]]; then - COMPOSE_ENV_FILE="$ENV_FILE" - elif [[ -n "${DISCORD_TOKEN:-}" ]]; then - rm -f "$COMPOSE_ENV_TEMP" - COMPOSE_ENV_TEMP="" - write_compose_env_temp + if [[ -z "${DISCORD_TOKEN:-}" ]]; then + load_token_from_discover_script || true fi + rm -f "$COMPOSE_ENV_TEMP" + COMPOSE_ENV_TEMP="" + write_compose_env_temp + COMPOSE_ENV_FILE="$COMPOSE_ENV_TEMP" try_interactive_reauth || true ensure_token_present compose_run_args run_args "$subcommand" "$@" diff --git a/scripts/tests/run-discord-scrape-host-smoke.sh b/scripts/tests/run-discord-scrape-host-smoke.sh index cbc39f8a..e4cc3ddf 100755 --- a/scripts/tests/run-discord-scrape-host-smoke.sh +++ b/scripts/tests/run-discord-scrape-host-smoke.sh @@ -35,8 +35,34 @@ fi count=$((count + 1)) printf '%s' "$count" >"$count_file" +while (($#)); do + case "$1" in + --env-file) + if [[ $# -ge 2 && -f "$2" ]]; then + local_env=$2 + while IFS='=' read -r env_key env_value || [[ -n "$env_key" ]]; do + [[ -z "$env_key" || "$env_key" =~ ^# ]] && continue + env_key=${env_key#export } + env_key=${env_key%%[[:space:]]*} + printf -v "$env_key" '%s' "$env_value" + export "$env_key" + done <"$local_env" + fi + shift 2 + ;; + *) + shift + ;; + esac +done + +token="${DISCORD_TOKEN:-}" +if [[ -z "$token" && -n "${DISCORD_TOKEN_FILE:-}" && -f "$DISCORD_TOKEN_FILE" ]]; then + token=$(head -n 1 "$DISCORD_TOKEN_FILE" | tr -d '\r') +fi + if [[ "$mode" == "auth-refresh" ]]; then - if [[ "${DISCORD_TOKEN:-}" == "stale-token" ]]; then + if [[ "$token" == "stale-token" ]]; then printf 'Authentication token is invalid.\n' >&2 printf 'fresh-token\n' >"$token_file" exit 1 @@ -58,14 +84,15 @@ run_host() { local mode=$1 local env_path=${2:-$ENV_FILE} - DCE_REPO_ROOT="$REPO_ROOT" \ - DCE_DOCKER_BIN="$FAKE_DOCKER" \ - DCE_ENV_FILE="$env_path" \ - DCE_COMPOSE_FILE="$COMPOSE_FILE" \ - FAKE_DOCKER_CALL_COUNT="$CALL_COUNT" \ - FAKE_DOCKER_TOKEN_FILE="$TOKEN_FILE" \ - FAKE_DOCKER_MODE="$mode" \ - "$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo + env -u DISCORD_TOKEN \ + DCE_REPO_ROOT="$REPO_ROOT" \ + DCE_DOCKER_BIN="$FAKE_DOCKER" \ + DCE_ENV_FILE="$env_path" \ + DCE_COMPOSE_FILE="$COMPOSE_FILE" \ + FAKE_DOCKER_CALL_COUNT="$CALL_COUNT" \ + FAKE_DOCKER_TOKEN_FILE="$TOKEN_FILE" \ + FAKE_DOCKER_MODE="$mode" \ + "$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo } run_host_with_shell_token() { diff --git a/scripts/tests/run-discord-scrape-smoke.sh b/scripts/tests/run-discord-scrape-smoke.sh index 5a026404..640cfc00 100755 --- a/scripts/tests/run-discord-scrape-smoke.sh +++ b/scripts/tests/run-discord-scrape-smoke.sh @@ -194,6 +194,7 @@ case "$subcommand" in fi if [[ "$channel" == "134" ]]; then + cp "$fixture_dir/salvage-truncated.json" "$output" echo "Aborted (core dumped)" >&2 exit 134 fi @@ -386,6 +387,10 @@ SKIP_ABORT_DEST="$ARCHIVE_ROOT/skip-abort/$DEFAULT_FILE_NAME" [[ "$(jq -r '.messages | length' "$SKIP_ABORT_DEST")" == "3" ]] || { echo "expected skip-abort to append accessible channel" >&2; exit 1; } [[ ! -e "$ARCHIVE_ROOT/skip-abort/channels/134.json" ]] || { echo "unexpected fallback file for skipped abort channel" >&2; exit 1; } grep -q 'SKIPPED.*134' "$SKIP_ABORT_LOG" || { echo "expected SKIPPED line for abort channel 134" >&2; exit 1; } +grep -q 'Preserving partial export temp' "$SKIP_ABORT_LOG" || { echo "expected partial temp preserved on abort channel 134" >&2; exit 1; } +partial_temp_dirs=( "$ARCHIVE_ROOT/skip-abort/.dce-temp"/export.134.* ) +[[ -d "${partial_temp_dirs[0]}" ]] || { echo "expected partial temp dir preserved for channel 134" >&2; exit 1; } +[[ -s "${partial_temp_dirs[0]}/export.json" ]] || { echo "expected partial export.json preserved for channel 134" >&2; exit 1; } # Salvage stale temp export smoke mkdir -p "$ARCHIVE_ROOT/salvage-stale"