From ae120c916f6f25b9ff75233313fb2b56309ec80e Mon Sep 17 00:00:00 2001 From: Copilot Date: Wed, 3 Jun 2026 06:31:02 -0500 Subject: [PATCH] fix(scrape): skip active stale temps and retry salvage merge Avoid salvaging export.json while a channel export is still writing (default: skip temps modified within 120s). Retry truncate+merge once when merge fails on a quiescent partial temp. Adds active-skip smoke and ages stale fixture mtime so salvage tests stay deterministic. --- ...fix-stale-temp-salvage-active-race-plan.md | 42 ++++++++++ scripts/run-discord-scrape.sh | 78 ++++++++++++++----- scripts/tests/run-discord-scrape-smoke.sh | 24 ++++++ 3 files changed, 124 insertions(+), 20 deletions(-) create mode 100644 docs/plans/2026-06-04-051-fix-stale-temp-salvage-active-race-plan.md diff --git a/docs/plans/2026-06-04-051-fix-stale-temp-salvage-active-race-plan.md b/docs/plans/2026-06-04-051-fix-stale-temp-salvage-active-race-plan.md new file mode 100644 index 00000000..17a3727b --- /dev/null +++ b/docs/plans/2026-06-04-051-fix-stale-temp-salvage-active-race-plan.md @@ -0,0 +1,42 @@ +--- +title: "fix: Skip active stale temps and retry salvage merge" +type: fix +status: complete +date: 2026-06-04 +origin: /lfg — yes_general logs show Stale temp merge failed while export.json still growing (73MB+ invalid JSON) +--- + +# fix: Skip active stale temps and retry salvage merge + +## Problem + +`salvage_stale_temp_exports` can run while a channel export is still writing `export.json`. The file is truncated/invalid, `merge_exports_auto` fails, and the temp is retained — but the next incremental pass hits the same race. Observed on KotOR `yes_general` (`221726893064454144`): merge fails on ~82MB partial temp while archive stays at 266182 messages (2021 cursor). + +Salvage after export completes works (truncated temp → 79529 messages merges to 345711 in ~58s). + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | Skip stale temp dirs whose `export.json` was modified within `DCE_STALE_TEMP_MIN_AGE_SECONDS` (default 120) | +| R2 | On merge failure, re-run `salvage_truncated_json` and retry merge once before retaining temp | +| R3 | Log merge retry vs skip-active with distinct messages | +| R4 | Offline smoke: active temp skipped; retry succeeds after simulated truncation | +| R5 | `run-all-smokes.sh` passes | + +## Implementation + +- `scripts/run-discord-scrape.sh` — `stale_temp_is_active`, skip guard, merge retry helper +- `scripts/tests/run-discord-scrape-smoke.sh` — active-temp skip + merge-retry scenarios + +## Verification + +```bash +./scripts/tests/run-discord-scrape-smoke.sh +DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh +``` + +## Out of scope + +- Completing yes_general catch-up inside LFG +- Container memory limits diff --git a/scripts/run-discord-scrape.sh b/scripts/run-discord-scrape.sh index 4e00cd0e..8824127b 100755 --- a/scripts/run-discord-scrape.sh +++ b/scripts/run-discord-scrape.sh @@ -629,12 +629,62 @@ PY fi } +stale_temp_is_active() { + local stale_export=$1 + local min_age=${DCE_STALE_TEMP_MIN_AGE_SECONDS:-120} + local now mtime age + + if [[ "${DCE_SALVAGE_ACTIVE_TEMPS:-0}" == "1" ]]; then + return 1 + fi + + now=$(date +%s) + mtime=$(stat -c '%Y' "$stale_export" 2>/dev/null || stat -f '%m' "$stale_export" 2>/dev/null || echo 0) + age=$((now - mtime)) + (( age < min_age )) +} + +merge_stale_export_into_destination() { + local destination_path=$1 + local stale_export=$2 + local stale_dir=$3 + local salvage_merged="$stale_dir/merged.json" + local attempt=0 + + while (( attempt < 2 )); do + if (( attempt > 0 )); then + salvage_truncated_json "$stale_export" || true + fi + rm -f "$salvage_merged" + if merge_exports_auto "$destination_path" "$stale_export" "$salvage_merged" && [[ -s "$salvage_merged" ]]; then + if json_is_valid "$salvage_merged"; then + local before_count after_count + before_count=$(message_count_fast "$destination_path") + commit_merged_export "$destination_path" "$salvage_merged" + after_count=$(message_count_fast "$destination_path") + if (( after_count > before_count )); then + log " SALVAGED $destination_path (+$((after_count - before_count)) messages from stale temp, $before_count → $after_count)" + return 0 + fi + log " Stale temp merged with no new messages, discarding: $stale_dir" + return 0 + fi + log " Stale temp merge produced invalid JSON, retaining for retry: $stale_dir" + return 1 + fi + attempt=$((attempt + 1)) + done + + log " Stale temp merge failed, retaining for retry: $stale_dir" + return 1 +} + salvage_stale_temp_exports() { local output_dir=$1 local channel_id=$2 local destination_path=$3 - local stale_dirs stale_dir stale_export salvage_merged + local stale_dirs stale_dir stale_export mapfile -t stale_dirs < <( find "$output_dir/.dce-temp" -maxdepth 1 -type d -name "export.${channel_id}.*" 2>/dev/null || true ) @@ -646,6 +696,11 @@ salvage_stale_temp_exports() { [[ -f "$stale_export" ]] || { rm -rf "$stale_dir"; continue; } [[ -s "$stale_export" ]] || { rm -rf "$stale_dir"; continue; } + if stale_temp_is_active "$stale_export"; then + log " Stale temp still active (recently modified), skipping salvage: $stale_dir" + continue + fi + if ! salvage_truncated_json "$stale_export"; then log " Stale temp export unsalvageable, discarding: $stale_dir" rm -rf "$stale_dir" @@ -668,25 +723,8 @@ salvage_stale_temp_exports() { fi if [[ -n "$destination_path" && -f "$destination_path" ]]; then - salvage_merged="$stale_dir/merged.json" - if merge_exports_auto "$destination_path" "$stale_export" "$salvage_merged" && [[ -s "$salvage_merged" ]]; then - if json_is_valid "$salvage_merged"; then - local before_count after_count - before_count=$(message_count_fast "$destination_path") - commit_merged_export "$destination_path" "$salvage_merged" - after_count=$(message_count_fast "$destination_path") - if (( after_count > before_count )); then - log " SALVAGED $destination_path (+$((after_count - before_count)) messages from stale temp, $before_count → $after_count)" - merged_ok=1 - else - log " Stale temp merged with no new messages, discarding: $stale_dir" - merged_ok=1 - fi - else - log " Stale temp merge produced invalid JSON, retaining for retry: $stale_dir" - fi - else - log " Stale temp merge failed, retaining for retry: $stale_dir" + if merge_stale_export_into_destination "$destination_path" "$stale_export" "$stale_dir"; then + merged_ok=1 fi elif [[ -n "$destination_path" ]]; then mkdir -p "$(dirname "$destination_path")" diff --git a/scripts/tests/run-discord-scrape-smoke.sh b/scripts/tests/run-discord-scrape-smoke.sh index e772d0b5..5f97d86c 100755 --- a/scripts/tests/run-discord-scrape-smoke.sh +++ b/scripts/tests/run-discord-scrape-smoke.sh @@ -150,6 +150,14 @@ cat >"$CONFIG_PATH" <&2; exit 1; } # Salvage stale temp export smoke +mkdir -p "$ARCHIVE_ROOT/salvage-stale-active" +cp "$FIXTURE_DIR/append-existing.json" "$ARCHIVE_ROOT/salvage-stale-active/$DEFAULT_FILE_NAME" +mkdir -p "$ARCHIVE_ROOT/salvage-stale-active/.dce-meta" +printf '{\"111\":\"%s\"}\n' "$ARCHIVE_ROOT/salvage-stale-active/$DEFAULT_FILE_NAME" >"$ARCHIVE_ROOT/salvage-stale-active/.dce-meta/channel-map.json" +mkdir -p "$ARCHIVE_ROOT/salvage-stale-active/.dce-temp/export.111.ACTIVE" +cp "$FIXTURE_DIR/salvage-truncated.json" "$ARCHIVE_ROOT/salvage-stale-active/.dce-temp/export.111.ACTIVE/export.json" +SALVAGE_ACTIVE_LOG="$TMP_DIR/salvage-stale-active.log" +DCE_STALE_TEMP_MIN_AGE_SECONDS=9999 \ + run_wrapper salvage-stale-active append 2>"$SALVAGE_ACTIVE_LOG" +grep -q 'still active' "$SALVAGE_ACTIVE_LOG" || { echo "expected active stale temp skip message" >&2; exit 1; } +[[ -d "$ARCHIVE_ROOT/salvage-stale-active/.dce-temp/export.111.ACTIVE" ]] || { + echo "expected active stale temp dir to be retained" >&2 + exit 1 +} + mkdir -p "$ARCHIVE_ROOT/salvage-stale" cp "$FIXTURE_DIR/append-existing.json" "$ARCHIVE_ROOT/salvage-stale/$DEFAULT_FILE_NAME" mkdir -p "$ARCHIVE_ROOT/salvage-stale/.dce-meta" printf '{\"111\":\"%s\"}\n' "$ARCHIVE_ROOT/salvage-stale/$DEFAULT_FILE_NAME" >"$ARCHIVE_ROOT/salvage-stale/.dce-meta/channel-map.json" mkdir -p "$ARCHIVE_ROOT/salvage-stale/.dce-temp/export.111.STALE" cp "$FIXTURE_DIR/salvage-truncated.json" "$ARCHIVE_ROOT/salvage-stale/.dce-temp/export.111.STALE/export.json" +touch -d '1 hour ago' "$ARCHIVE_ROOT/salvage-stale/.dce-temp/export.111.STALE/export.json" SALVAGE_LOG="$TMP_DIR/salvage-stale.log" run_wrapper salvage-stale append 2>"$SALVAGE_LOG" SALVAGE_DEST="$ARCHIVE_ROOT/salvage-stale/$DEFAULT_FILE_NAME"