mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
test(scrape): preserve-partial smoke; fix host token-file precedence
Add offline regression for OOM skip preserving partial export temps. Host wrapper now prefers DISCORD_TOKEN_FILE over inherited shell tokens and always writes explicit compose env for auth-retry. All 19 smokes pass.
This commit is contained in:
parent
87537eb8b0
commit
8b54b6a498
|
|
@ -0,0 +1,37 @@
|
|||
---
|
||||
title: "feat: Preserve-partial smoke and KotOR validation run"
|
||||
type: feat
|
||||
status: complete
|
||||
date: 2026-06-04
|
||||
origin: /lfg — close plan 043 with regression smoke; run live KotOR validation
|
||||
---
|
||||
|
||||
# feat: Preserve-partial smoke and KotOR validation run
|
||||
|
||||
## Summary
|
||||
|
||||
Plan 043 fixed the re-download loop but lacks offline regression for "preserve partial temp on OOM skip". Add smoke coverage, rebuild container, run KotOR validation, update merge-readiness.
|
||||
|
||||
## Requirements
|
||||
|
||||
| ID | Requirement |
|
||||
|----|-------------|
|
||||
| R1 | Smoke: fake CLI writes partial export for channel 134 then exits 134; temp dir preserved after SKIPPED |
|
||||
| R2 | `run-discord-scrape-smoke.sh` and `run-all-smokes.sh` pass (19/19) |
|
||||
| R3 | Rebuild image; start `run-operator-validation.sh --target KotOR_discord_msgs` with log |
|
||||
| R4 | `docs/recurring-scrape-merge-readiness.md` updated with validation run status |
|
||||
| R5 | PR #1538 body notes plan 044 |
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
./scripts/tests/run-discord-scrape-smoke.sh
|
||||
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
|
||||
podman-compose build
|
||||
DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh --target KotOR_discord_msgs --log-file logs/kotor-validation-20260604.log
|
||||
```
|
||||
|
||||
## Out of scope
|
||||
|
||||
- Waiting for yes_general multi-hour catch-up to finish inside LFG
|
||||
- Container memory tuning
|
||||
|
|
@ -111,7 +111,9 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh --sync-gui --per-target -
|
|||
| expanded_kotor_discord | pass | pass | validation-resume |
|
||||
| eod_discord | pass | pass | validation-resume |
|
||||
| DS_Discord_msgs | pass | pass | validation-resume; some channels forbidden |
|
||||
| KotOR_discord_msgs | **in progress** | — | `yes_general` has ~5 years of backlog (archive cursor was Jan 2021); plan 043 preserves partial temps on OOM skip |
|
||||
| KotOR_discord_msgs | **in progress** | — | plan 044 validation started 2026-06-04 (`logs/kotor-validation-20260604.log`); `yes_general` catch-up + preserve-partial smoke |
|
||||
|
||||
**Plan 044 (2026-06-04):** Offline smoke asserts partial temp preserved on OOM skip (channel 134). Host wrapper always writes explicit compose env from `DISCORD_TOKEN_FILE` (fixes auth-retry when shell exports a stale `DISCORD_TOKEN`). `run-all-smokes.sh` → 19/19 pass.
|
||||
|
||||
**KotOR / yes_general (plan 040–043):** Incremental `--after` works for all channels; most return `UNCHANGED` in seconds. `yes_general` archive last message was **2021-01-17** — the first catch-up legitimately fetches years of history. Prior bug: OOM skip **deleted** partial temp exports, causing re-download loops. Plan 043 preserves partial temps and salvages on next run.
|
||||
|
||||
|
|
|
|||
|
|
@ -126,7 +126,14 @@ configure_rootless_compose() {
|
|||
prepare_compose_env() {
|
||||
if [[ -f "$ENV_FILE" ]]; then
|
||||
load_env_file
|
||||
COMPOSE_ENV_FILE="$ENV_FILE"
|
||||
if [[ -n "${DISCORD_TOKEN_FILE:-}" && -f "${DISCORD_TOKEN_FILE}" ]]; then
|
||||
load_token_from_file || true
|
||||
elif [[ -z "${DISCORD_TOKEN:-}" ]]; then
|
||||
discover_token_file || true
|
||||
load_token_from_file || true
|
||||
load_token_from_discover_script || true
|
||||
fi
|
||||
write_compose_env_temp
|
||||
configure_rootless_compose
|
||||
return 0
|
||||
fi
|
||||
|
|
@ -190,9 +197,13 @@ load_token_from_discover_script() {
|
|||
}
|
||||
|
||||
ensure_token_present() {
|
||||
if [[ -z "${DISCORD_TOKEN:-}" ]]; then
|
||||
if [[ -n "${DISCORD_TOKEN_FILE:-}" && -f "${DISCORD_TOKEN_FILE}" ]]; then
|
||||
load_token_from_file || true
|
||||
elif [[ -z "${DISCORD_TOKEN:-}" ]]; then
|
||||
discover_token_file || true
|
||||
load_token_from_file || true
|
||||
fi
|
||||
if [[ -z "${DISCORD_TOKEN:-}" ]]; then
|
||||
load_token_from_discover_script || true
|
||||
fi
|
||||
[[ -n "${DISCORD_TOKEN:-}" ]] || die "DISCORD_TOKEN is not set. Set DISCORD_TOKEN or DISCORD_TOKEN_FILE in $ENV_FILE, export it in the shell, place a token at $REPO_ROOT/.discord-token or ~/.config/discord-scrape/token, or sign in via DiscordChatExporter GUI / Discord desktop on this machine."
|
||||
|
|
@ -365,14 +376,13 @@ run_subcommand_with_retry() {
|
|||
|
||||
printf 'Detected Discord auth failure. Refreshing token and retrying once...\n' >&2
|
||||
load_token_from_file || true
|
||||
load_token_from_discover_script || true
|
||||
if [[ -f "$ENV_FILE" ]]; then
|
||||
COMPOSE_ENV_FILE="$ENV_FILE"
|
||||
elif [[ -n "${DISCORD_TOKEN:-}" ]]; then
|
||||
rm -f "$COMPOSE_ENV_TEMP"
|
||||
COMPOSE_ENV_TEMP=""
|
||||
write_compose_env_temp
|
||||
if [[ -z "${DISCORD_TOKEN:-}" ]]; then
|
||||
load_token_from_discover_script || true
|
||||
fi
|
||||
rm -f "$COMPOSE_ENV_TEMP"
|
||||
COMPOSE_ENV_TEMP=""
|
||||
write_compose_env_temp
|
||||
COMPOSE_ENV_FILE="$COMPOSE_ENV_TEMP"
|
||||
try_interactive_reauth || true
|
||||
ensure_token_present
|
||||
compose_run_args run_args "$subcommand" "$@"
|
||||
|
|
|
|||
|
|
@ -35,8 +35,34 @@ fi
|
|||
count=$((count + 1))
|
||||
printf '%s' "$count" >"$count_file"
|
||||
|
||||
while (($#)); do
|
||||
case "$1" in
|
||||
--env-file)
|
||||
if [[ $# -ge 2 && -f "$2" ]]; then
|
||||
local_env=$2
|
||||
while IFS='=' read -r env_key env_value || [[ -n "$env_key" ]]; do
|
||||
[[ -z "$env_key" || "$env_key" =~ ^# ]] && continue
|
||||
env_key=${env_key#export }
|
||||
env_key=${env_key%%[[:space:]]*}
|
||||
printf -v "$env_key" '%s' "$env_value"
|
||||
export "$env_key"
|
||||
done <"$local_env"
|
||||
fi
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
token="${DISCORD_TOKEN:-}"
|
||||
if [[ -z "$token" && -n "${DISCORD_TOKEN_FILE:-}" && -f "$DISCORD_TOKEN_FILE" ]]; then
|
||||
token=$(head -n 1 "$DISCORD_TOKEN_FILE" | tr -d '\r')
|
||||
fi
|
||||
|
||||
if [[ "$mode" == "auth-refresh" ]]; then
|
||||
if [[ "${DISCORD_TOKEN:-}" == "stale-token" ]]; then
|
||||
if [[ "$token" == "stale-token" ]]; then
|
||||
printf 'Authentication token is invalid.\n' >&2
|
||||
printf 'fresh-token\n' >"$token_file"
|
||||
exit 1
|
||||
|
|
@ -58,14 +84,15 @@ run_host() {
|
|||
local mode=$1
|
||||
local env_path=${2:-$ENV_FILE}
|
||||
|
||||
DCE_REPO_ROOT="$REPO_ROOT" \
|
||||
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
||||
DCE_ENV_FILE="$env_path" \
|
||||
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
||||
FAKE_DOCKER_CALL_COUNT="$CALL_COUNT" \
|
||||
FAKE_DOCKER_TOKEN_FILE="$TOKEN_FILE" \
|
||||
FAKE_DOCKER_MODE="$mode" \
|
||||
"$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo
|
||||
env -u DISCORD_TOKEN \
|
||||
DCE_REPO_ROOT="$REPO_ROOT" \
|
||||
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
||||
DCE_ENV_FILE="$env_path" \
|
||||
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
||||
FAKE_DOCKER_CALL_COUNT="$CALL_COUNT" \
|
||||
FAKE_DOCKER_TOKEN_FILE="$TOKEN_FILE" \
|
||||
FAKE_DOCKER_MODE="$mode" \
|
||||
"$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo
|
||||
}
|
||||
|
||||
run_host_with_shell_token() {
|
||||
|
|
|
|||
|
|
@ -194,6 +194,7 @@ case "$subcommand" in
|
|||
fi
|
||||
|
||||
if [[ "$channel" == "134" ]]; then
|
||||
cp "$fixture_dir/salvage-truncated.json" "$output"
|
||||
echo "Aborted (core dumped)" >&2
|
||||
exit 134
|
||||
fi
|
||||
|
|
@ -386,6 +387,10 @@ SKIP_ABORT_DEST="$ARCHIVE_ROOT/skip-abort/$DEFAULT_FILE_NAME"
|
|||
[[ "$(jq -r '.messages | length' "$SKIP_ABORT_DEST")" == "3" ]] || { echo "expected skip-abort to append accessible channel" >&2; exit 1; }
|
||||
[[ ! -e "$ARCHIVE_ROOT/skip-abort/channels/134.json" ]] || { echo "unexpected fallback file for skipped abort channel" >&2; exit 1; }
|
||||
grep -q 'SKIPPED.*134' "$SKIP_ABORT_LOG" || { echo "expected SKIPPED line for abort channel 134" >&2; exit 1; }
|
||||
grep -q 'Preserving partial export temp' "$SKIP_ABORT_LOG" || { echo "expected partial temp preserved on abort channel 134" >&2; exit 1; }
|
||||
partial_temp_dirs=( "$ARCHIVE_ROOT/skip-abort/.dce-temp"/export.134.* )
|
||||
[[ -d "${partial_temp_dirs[0]}" ]] || { echo "expected partial temp dir preserved for channel 134" >&2; exit 1; }
|
||||
[[ -s "${partial_temp_dirs[0]}/export.json" ]] || { echo "expected partial export.json preserved for channel 134" >&2; exit 1; }
|
||||
|
||||
# Salvage stale temp export smoke
|
||||
mkdir -p "$ARCHIVE_ROOT/salvage-stale"
|
||||
|
|
|
|||
Loading…
Reference in a new issue