mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
test(scrape): preserve-partial smoke; fix host token-file precedence
Add offline regression for OOM skip preserving partial export temps. Host wrapper now prefers DISCORD_TOKEN_FILE over inherited shell tokens and always writes explicit compose env for auth-retry. All 19 smokes pass.
This commit is contained in:
parent
87537eb8b0
commit
8b54b6a498
|
|
@ -0,0 +1,37 @@
|
||||||
|
---
|
||||||
|
title: "feat: Preserve-partial smoke and KotOR validation run"
|
||||||
|
type: feat
|
||||||
|
status: complete
|
||||||
|
date: 2026-06-04
|
||||||
|
origin: /lfg — close plan 043 with regression smoke; run live KotOR validation
|
||||||
|
---
|
||||||
|
|
||||||
|
# feat: Preserve-partial smoke and KotOR validation run
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Plan 043 fixed the re-download loop but lacks offline regression for "preserve partial temp on OOM skip". Add smoke coverage, rebuild container, run KotOR validation, update merge-readiness.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| ID | Requirement |
|
||||||
|
|----|-------------|
|
||||||
|
| R1 | Smoke: fake CLI writes partial export for channel 134 then exits 134; temp dir preserved after SKIPPED |
|
||||||
|
| R2 | `run-discord-scrape-smoke.sh` and `run-all-smokes.sh` pass (19/19) |
|
||||||
|
| R3 | Rebuild image; start `run-operator-validation.sh --target KotOR_discord_msgs` with log |
|
||||||
|
| R4 | `docs/recurring-scrape-merge-readiness.md` updated with validation run status |
|
||||||
|
| R5 | PR #1538 body notes plan 044 |
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/tests/run-discord-scrape-smoke.sh
|
||||||
|
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
|
||||||
|
podman-compose build
|
||||||
|
DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh --target KotOR_discord_msgs --log-file logs/kotor-validation-20260604.log
|
||||||
|
```
|
||||||
|
|
||||||
|
## Out of scope
|
||||||
|
|
||||||
|
- Waiting for yes_general multi-hour catch-up to finish inside LFG
|
||||||
|
- Container memory tuning
|
||||||
|
|
@ -111,7 +111,9 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh --sync-gui --per-target -
|
||||||
| expanded_kotor_discord | pass | pass | validation-resume |
|
| expanded_kotor_discord | pass | pass | validation-resume |
|
||||||
| eod_discord | pass | pass | validation-resume |
|
| eod_discord | pass | pass | validation-resume |
|
||||||
| DS_Discord_msgs | pass | pass | validation-resume; some channels forbidden |
|
| DS_Discord_msgs | pass | pass | validation-resume; some channels forbidden |
|
||||||
| KotOR_discord_msgs | **in progress** | — | `yes_general` has ~5 years of backlog (archive cursor was Jan 2021); plan 043 preserves partial temps on OOM skip |
|
| KotOR_discord_msgs | **in progress** | — | plan 044 validation started 2026-06-04 (`logs/kotor-validation-20260604.log`); `yes_general` catch-up + preserve-partial smoke |
|
||||||
|
|
||||||
|
**Plan 044 (2026-06-04):** Offline smoke asserts partial temp preserved on OOM skip (channel 134). Host wrapper always writes explicit compose env from `DISCORD_TOKEN_FILE` (fixes auth-retry when shell exports a stale `DISCORD_TOKEN`). `run-all-smokes.sh` → 19/19 pass.
|
||||||
|
|
||||||
**KotOR / yes_general (plan 040–043):** Incremental `--after` works for all channels; most return `UNCHANGED` in seconds. `yes_general` archive last message was **2021-01-17** — the first catch-up legitimately fetches years of history. Prior bug: OOM skip **deleted** partial temp exports, causing re-download loops. Plan 043 preserves partial temps and salvages on next run.
|
**KotOR / yes_general (plan 040–043):** Incremental `--after` works for all channels; most return `UNCHANGED` in seconds. `yes_general` archive last message was **2021-01-17** — the first catch-up legitimately fetches years of history. Prior bug: OOM skip **deleted** partial temp exports, causing re-download loops. Plan 043 preserves partial temps and salvages on next run.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,14 @@ configure_rootless_compose() {
|
||||||
prepare_compose_env() {
|
prepare_compose_env() {
|
||||||
if [[ -f "$ENV_FILE" ]]; then
|
if [[ -f "$ENV_FILE" ]]; then
|
||||||
load_env_file
|
load_env_file
|
||||||
COMPOSE_ENV_FILE="$ENV_FILE"
|
if [[ -n "${DISCORD_TOKEN_FILE:-}" && -f "${DISCORD_TOKEN_FILE}" ]]; then
|
||||||
|
load_token_from_file || true
|
||||||
|
elif [[ -z "${DISCORD_TOKEN:-}" ]]; then
|
||||||
|
discover_token_file || true
|
||||||
|
load_token_from_file || true
|
||||||
|
load_token_from_discover_script || true
|
||||||
|
fi
|
||||||
|
write_compose_env_temp
|
||||||
configure_rootless_compose
|
configure_rootless_compose
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
@ -190,9 +197,13 @@ load_token_from_discover_script() {
|
||||||
}
|
}
|
||||||
|
|
||||||
ensure_token_present() {
|
ensure_token_present() {
|
||||||
if [[ -z "${DISCORD_TOKEN:-}" ]]; then
|
if [[ -n "${DISCORD_TOKEN_FILE:-}" && -f "${DISCORD_TOKEN_FILE}" ]]; then
|
||||||
|
load_token_from_file || true
|
||||||
|
elif [[ -z "${DISCORD_TOKEN:-}" ]]; then
|
||||||
discover_token_file || true
|
discover_token_file || true
|
||||||
load_token_from_file || true
|
load_token_from_file || true
|
||||||
|
fi
|
||||||
|
if [[ -z "${DISCORD_TOKEN:-}" ]]; then
|
||||||
load_token_from_discover_script || true
|
load_token_from_discover_script || true
|
||||||
fi
|
fi
|
||||||
[[ -n "${DISCORD_TOKEN:-}" ]] || die "DISCORD_TOKEN is not set. Set DISCORD_TOKEN or DISCORD_TOKEN_FILE in $ENV_FILE, export it in the shell, place a token at $REPO_ROOT/.discord-token or ~/.config/discord-scrape/token, or sign in via DiscordChatExporter GUI / Discord desktop on this machine."
|
[[ -n "${DISCORD_TOKEN:-}" ]] || die "DISCORD_TOKEN is not set. Set DISCORD_TOKEN or DISCORD_TOKEN_FILE in $ENV_FILE, export it in the shell, place a token at $REPO_ROOT/.discord-token or ~/.config/discord-scrape/token, or sign in via DiscordChatExporter GUI / Discord desktop on this machine."
|
||||||
|
|
@ -365,14 +376,13 @@ run_subcommand_with_retry() {
|
||||||
|
|
||||||
printf 'Detected Discord auth failure. Refreshing token and retrying once...\n' >&2
|
printf 'Detected Discord auth failure. Refreshing token and retrying once...\n' >&2
|
||||||
load_token_from_file || true
|
load_token_from_file || true
|
||||||
load_token_from_discover_script || true
|
if [[ -z "${DISCORD_TOKEN:-}" ]]; then
|
||||||
if [[ -f "$ENV_FILE" ]]; then
|
load_token_from_discover_script || true
|
||||||
COMPOSE_ENV_FILE="$ENV_FILE"
|
|
||||||
elif [[ -n "${DISCORD_TOKEN:-}" ]]; then
|
|
||||||
rm -f "$COMPOSE_ENV_TEMP"
|
|
||||||
COMPOSE_ENV_TEMP=""
|
|
||||||
write_compose_env_temp
|
|
||||||
fi
|
fi
|
||||||
|
rm -f "$COMPOSE_ENV_TEMP"
|
||||||
|
COMPOSE_ENV_TEMP=""
|
||||||
|
write_compose_env_temp
|
||||||
|
COMPOSE_ENV_FILE="$COMPOSE_ENV_TEMP"
|
||||||
try_interactive_reauth || true
|
try_interactive_reauth || true
|
||||||
ensure_token_present
|
ensure_token_present
|
||||||
compose_run_args run_args "$subcommand" "$@"
|
compose_run_args run_args "$subcommand" "$@"
|
||||||
|
|
|
||||||
|
|
@ -35,8 +35,34 @@ fi
|
||||||
count=$((count + 1))
|
count=$((count + 1))
|
||||||
printf '%s' "$count" >"$count_file"
|
printf '%s' "$count" >"$count_file"
|
||||||
|
|
||||||
|
while (($#)); do
|
||||||
|
case "$1" in
|
||||||
|
--env-file)
|
||||||
|
if [[ $# -ge 2 && -f "$2" ]]; then
|
||||||
|
local_env=$2
|
||||||
|
while IFS='=' read -r env_key env_value || [[ -n "$env_key" ]]; do
|
||||||
|
[[ -z "$env_key" || "$env_key" =~ ^# ]] && continue
|
||||||
|
env_key=${env_key#export }
|
||||||
|
env_key=${env_key%%[[:space:]]*}
|
||||||
|
printf -v "$env_key" '%s' "$env_value"
|
||||||
|
export "$env_key"
|
||||||
|
done <"$local_env"
|
||||||
|
fi
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
token="${DISCORD_TOKEN:-}"
|
||||||
|
if [[ -z "$token" && -n "${DISCORD_TOKEN_FILE:-}" && -f "$DISCORD_TOKEN_FILE" ]]; then
|
||||||
|
token=$(head -n 1 "$DISCORD_TOKEN_FILE" | tr -d '\r')
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ "$mode" == "auth-refresh" ]]; then
|
if [[ "$mode" == "auth-refresh" ]]; then
|
||||||
if [[ "${DISCORD_TOKEN:-}" == "stale-token" ]]; then
|
if [[ "$token" == "stale-token" ]]; then
|
||||||
printf 'Authentication token is invalid.\n' >&2
|
printf 'Authentication token is invalid.\n' >&2
|
||||||
printf 'fresh-token\n' >"$token_file"
|
printf 'fresh-token\n' >"$token_file"
|
||||||
exit 1
|
exit 1
|
||||||
|
|
@ -58,14 +84,15 @@ run_host() {
|
||||||
local mode=$1
|
local mode=$1
|
||||||
local env_path=${2:-$ENV_FILE}
|
local env_path=${2:-$ENV_FILE}
|
||||||
|
|
||||||
DCE_REPO_ROOT="$REPO_ROOT" \
|
env -u DISCORD_TOKEN \
|
||||||
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
DCE_REPO_ROOT="$REPO_ROOT" \
|
||||||
DCE_ENV_FILE="$env_path" \
|
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
||||||
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
DCE_ENV_FILE="$env_path" \
|
||||||
FAKE_DOCKER_CALL_COUNT="$CALL_COUNT" \
|
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
||||||
FAKE_DOCKER_TOKEN_FILE="$TOKEN_FILE" \
|
FAKE_DOCKER_CALL_COUNT="$CALL_COUNT" \
|
||||||
FAKE_DOCKER_MODE="$mode" \
|
FAKE_DOCKER_TOKEN_FILE="$TOKEN_FILE" \
|
||||||
"$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo
|
FAKE_DOCKER_MODE="$mode" \
|
||||||
|
"$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo
|
||||||
}
|
}
|
||||||
|
|
||||||
run_host_with_shell_token() {
|
run_host_with_shell_token() {
|
||||||
|
|
|
||||||
|
|
@ -194,6 +194,7 @@ case "$subcommand" in
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$channel" == "134" ]]; then
|
if [[ "$channel" == "134" ]]; then
|
||||||
|
cp "$fixture_dir/salvage-truncated.json" "$output"
|
||||||
echo "Aborted (core dumped)" >&2
|
echo "Aborted (core dumped)" >&2
|
||||||
exit 134
|
exit 134
|
||||||
fi
|
fi
|
||||||
|
|
@ -386,6 +387,10 @@ SKIP_ABORT_DEST="$ARCHIVE_ROOT/skip-abort/$DEFAULT_FILE_NAME"
|
||||||
[[ "$(jq -r '.messages | length' "$SKIP_ABORT_DEST")" == "3" ]] || { echo "expected skip-abort to append accessible channel" >&2; exit 1; }
|
[[ "$(jq -r '.messages | length' "$SKIP_ABORT_DEST")" == "3" ]] || { echo "expected skip-abort to append accessible channel" >&2; exit 1; }
|
||||||
[[ ! -e "$ARCHIVE_ROOT/skip-abort/channels/134.json" ]] || { echo "unexpected fallback file for skipped abort channel" >&2; exit 1; }
|
[[ ! -e "$ARCHIVE_ROOT/skip-abort/channels/134.json" ]] || { echo "unexpected fallback file for skipped abort channel" >&2; exit 1; }
|
||||||
grep -q 'SKIPPED.*134' "$SKIP_ABORT_LOG" || { echo "expected SKIPPED line for abort channel 134" >&2; exit 1; }
|
grep -q 'SKIPPED.*134' "$SKIP_ABORT_LOG" || { echo "expected SKIPPED line for abort channel 134" >&2; exit 1; }
|
||||||
|
grep -q 'Preserving partial export temp' "$SKIP_ABORT_LOG" || { echo "expected partial temp preserved on abort channel 134" >&2; exit 1; }
|
||||||
|
partial_temp_dirs=( "$ARCHIVE_ROOT/skip-abort/.dce-temp"/export.134.* )
|
||||||
|
[[ -d "${partial_temp_dirs[0]}" ]] || { echo "expected partial temp dir preserved for channel 134" >&2; exit 1; }
|
||||||
|
[[ -s "${partial_temp_dirs[0]}/export.json" ]] || { echo "expected partial export.json preserved for channel 134" >&2; exit 1; }
|
||||||
|
|
||||||
# Salvage stale temp export smoke
|
# Salvage stale temp export smoke
|
||||||
mkdir -p "$ARCHIVE_ROOT/salvage-stale"
|
mkdir -p "$ARCHIVE_ROOT/salvage-stale"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue