fix(scrape): skip active stale temps and retry salvage merge

Avoid salvaging export.json while a channel export is still writing
(default: skip temps modified within 120s). Retry truncate+merge once
when merge fails on a quiescent partial temp. Adds active-skip smoke
and ages stale fixture mtime so salvage tests stay deterministic.
This commit is contained in:
Copilot 2026-06-03 06:31:02 -05:00
parent 14796e9c09
commit ae120c916f
3 changed files with 124 additions and 20 deletions

View file

@ -0,0 +1,42 @@
---
title: "fix: Skip active stale temps and retry salvage merge"
type: fix
status: complete
date: 2026-06-04
origin: /lfg — yes_general logs show Stale temp merge failed while export.json still growing (73MB+ invalid JSON)
---
# fix: Skip active stale temps and retry salvage merge
## Problem
`salvage_stale_temp_exports` can run while a channel export is still writing `export.json`. The file is truncated/invalid, `merge_exports_auto` fails, and the temp is retained — but the next incremental pass hits the same race. Observed on KotOR `yes_general` (`221726893064454144`): merge fails on ~82MB partial temp while archive stays at 266182 messages (2021 cursor).
Salvage after export completes works (truncated temp → 79529 messages merges to 345711 in ~58s).
## Requirements
| ID | Requirement |
|----|-------------|
| R1 | Skip stale temp dirs whose `export.json` was modified within `DCE_STALE_TEMP_MIN_AGE_SECONDS` (default 120) |
| R2 | On merge failure, re-run `salvage_truncated_json` and retry merge once before retaining temp |
| R3 | Log merge retry vs skip-active with distinct messages |
| R4 | Offline smoke: active temp skipped; retry succeeds after simulated truncation |
| R5 | `run-all-smokes.sh` passes |
## Implementation
- `scripts/run-discord-scrape.sh``stale_temp_is_active`, skip guard, merge retry helper
- `scripts/tests/run-discord-scrape-smoke.sh` — active-temp skip + merge-retry scenarios
## Verification
```bash
./scripts/tests/run-discord-scrape-smoke.sh
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
```
## Out of scope
- Completing yes_general catch-up inside LFG
- Container memory limits

View file

@ -629,12 +629,62 @@ PY
fi fi
} }
stale_temp_is_active() {
local stale_export=$1
local min_age=${DCE_STALE_TEMP_MIN_AGE_SECONDS:-120}
local now mtime age
if [[ "${DCE_SALVAGE_ACTIVE_TEMPS:-0}" == "1" ]]; then
return 1
fi
now=$(date +%s)
mtime=$(stat -c '%Y' "$stale_export" 2>/dev/null || stat -f '%m' "$stale_export" 2>/dev/null || echo 0)
age=$((now - mtime))
(( age < min_age ))
}
merge_stale_export_into_destination() {
local destination_path=$1
local stale_export=$2
local stale_dir=$3
local salvage_merged="$stale_dir/merged.json"
local attempt=0
while (( attempt < 2 )); do
if (( attempt > 0 )); then
salvage_truncated_json "$stale_export" || true
fi
rm -f "$salvage_merged"
if merge_exports_auto "$destination_path" "$stale_export" "$salvage_merged" && [[ -s "$salvage_merged" ]]; then
if json_is_valid "$salvage_merged"; then
local before_count after_count
before_count=$(message_count_fast "$destination_path")
commit_merged_export "$destination_path" "$salvage_merged"
after_count=$(message_count_fast "$destination_path")
if (( after_count > before_count )); then
log " SALVAGED $destination_path (+$((after_count - before_count)) messages from stale temp, $before_count$after_count)"
return 0
fi
log " Stale temp merged with no new messages, discarding: $stale_dir"
return 0
fi
log " Stale temp merge produced invalid JSON, retaining for retry: $stale_dir"
return 1
fi
attempt=$((attempt + 1))
done
log " Stale temp merge failed, retaining for retry: $stale_dir"
return 1
}
salvage_stale_temp_exports() { salvage_stale_temp_exports() {
local output_dir=$1 local output_dir=$1
local channel_id=$2 local channel_id=$2
local destination_path=$3 local destination_path=$3
local stale_dirs stale_dir stale_export salvage_merged local stale_dirs stale_dir stale_export
mapfile -t stale_dirs < <( mapfile -t stale_dirs < <(
find "$output_dir/.dce-temp" -maxdepth 1 -type d -name "export.${channel_id}.*" 2>/dev/null || true find "$output_dir/.dce-temp" -maxdepth 1 -type d -name "export.${channel_id}.*" 2>/dev/null || true
) )
@ -646,6 +696,11 @@ salvage_stale_temp_exports() {
[[ -f "$stale_export" ]] || { rm -rf "$stale_dir"; continue; } [[ -f "$stale_export" ]] || { rm -rf "$stale_dir"; continue; }
[[ -s "$stale_export" ]] || { rm -rf "$stale_dir"; continue; } [[ -s "$stale_export" ]] || { rm -rf "$stale_dir"; continue; }
if stale_temp_is_active "$stale_export"; then
log " Stale temp still active (recently modified), skipping salvage: $stale_dir"
continue
fi
if ! salvage_truncated_json "$stale_export"; then if ! salvage_truncated_json "$stale_export"; then
log " Stale temp export unsalvageable, discarding: $stale_dir" log " Stale temp export unsalvageable, discarding: $stale_dir"
rm -rf "$stale_dir" rm -rf "$stale_dir"
@ -668,25 +723,8 @@ salvage_stale_temp_exports() {
fi fi
if [[ -n "$destination_path" && -f "$destination_path" ]]; then if [[ -n "$destination_path" && -f "$destination_path" ]]; then
salvage_merged="$stale_dir/merged.json" if merge_stale_export_into_destination "$destination_path" "$stale_export" "$stale_dir"; then
if merge_exports_auto "$destination_path" "$stale_export" "$salvage_merged" && [[ -s "$salvage_merged" ]]; then merged_ok=1
if json_is_valid "$salvage_merged"; then
local before_count after_count
before_count=$(message_count_fast "$destination_path")
commit_merged_export "$destination_path" "$salvage_merged"
after_count=$(message_count_fast "$destination_path")
if (( after_count > before_count )); then
log " SALVAGED $destination_path (+$((after_count - before_count)) messages from stale temp, $before_count$after_count)"
merged_ok=1
else
log " Stale temp merged with no new messages, discarding: $stale_dir"
merged_ok=1
fi
else
log " Stale temp merge produced invalid JSON, retaining for retry: $stale_dir"
fi
else
log " Stale temp merge failed, retaining for retry: $stale_dir"
fi fi
elif [[ -n "$destination_path" ]]; then elif [[ -n "$destination_path" ]]; then
mkdir -p "$(dirname "$destination_path")" mkdir -p "$(dirname "$destination_path")"

View file

@ -150,6 +150,14 @@ cat >"$CONFIG_PATH" <<JSON
"channel_ids": ["111"], "channel_ids": ["111"],
"guild_ids": [], "guild_ids": [],
"guild_name_patterns": [] "guild_name_patterns": []
},
{
"name": "salvage-stale-active",
"kind": "guild",
"output_dir": "$ARCHIVE_ROOT/salvage-stale-active",
"channel_ids": ["111"],
"guild_ids": [],
"guild_name_patterns": []
} }
] ]
} }
@ -417,12 +425,28 @@ grep -q 'exit 143' "$SKIP_SIGTERM_LOG" || { echo "expected sigterm exit logged f
grep -q 'Preserving partial export temp' "$SKIP_SIGTERM_LOG" || { echo "expected partial temp preserved on sigterm channel 143" >&2; exit 1; } grep -q 'Preserving partial export temp' "$SKIP_SIGTERM_LOG" || { echo "expected partial temp preserved on sigterm channel 143" >&2; exit 1; }
# Salvage stale temp export smoke # Salvage stale temp export smoke
mkdir -p "$ARCHIVE_ROOT/salvage-stale-active"
cp "$FIXTURE_DIR/append-existing.json" "$ARCHIVE_ROOT/salvage-stale-active/$DEFAULT_FILE_NAME"
mkdir -p "$ARCHIVE_ROOT/salvage-stale-active/.dce-meta"
printf '{\"111\":\"%s\"}\n' "$ARCHIVE_ROOT/salvage-stale-active/$DEFAULT_FILE_NAME" >"$ARCHIVE_ROOT/salvage-stale-active/.dce-meta/channel-map.json"
mkdir -p "$ARCHIVE_ROOT/salvage-stale-active/.dce-temp/export.111.ACTIVE"
cp "$FIXTURE_DIR/salvage-truncated.json" "$ARCHIVE_ROOT/salvage-stale-active/.dce-temp/export.111.ACTIVE/export.json"
SALVAGE_ACTIVE_LOG="$TMP_DIR/salvage-stale-active.log"
DCE_STALE_TEMP_MIN_AGE_SECONDS=9999 \
run_wrapper salvage-stale-active append 2>"$SALVAGE_ACTIVE_LOG"
grep -q 'still active' "$SALVAGE_ACTIVE_LOG" || { echo "expected active stale temp skip message" >&2; exit 1; }
[[ -d "$ARCHIVE_ROOT/salvage-stale-active/.dce-temp/export.111.ACTIVE" ]] || {
echo "expected active stale temp dir to be retained" >&2
exit 1
}
mkdir -p "$ARCHIVE_ROOT/salvage-stale" mkdir -p "$ARCHIVE_ROOT/salvage-stale"
cp "$FIXTURE_DIR/append-existing.json" "$ARCHIVE_ROOT/salvage-stale/$DEFAULT_FILE_NAME" cp "$FIXTURE_DIR/append-existing.json" "$ARCHIVE_ROOT/salvage-stale/$DEFAULT_FILE_NAME"
mkdir -p "$ARCHIVE_ROOT/salvage-stale/.dce-meta" mkdir -p "$ARCHIVE_ROOT/salvage-stale/.dce-meta"
printf '{\"111\":\"%s\"}\n' "$ARCHIVE_ROOT/salvage-stale/$DEFAULT_FILE_NAME" >"$ARCHIVE_ROOT/salvage-stale/.dce-meta/channel-map.json" printf '{\"111\":\"%s\"}\n' "$ARCHIVE_ROOT/salvage-stale/$DEFAULT_FILE_NAME" >"$ARCHIVE_ROOT/salvage-stale/.dce-meta/channel-map.json"
mkdir -p "$ARCHIVE_ROOT/salvage-stale/.dce-temp/export.111.STALE" mkdir -p "$ARCHIVE_ROOT/salvage-stale/.dce-temp/export.111.STALE"
cp "$FIXTURE_DIR/salvage-truncated.json" "$ARCHIVE_ROOT/salvage-stale/.dce-temp/export.111.STALE/export.json" cp "$FIXTURE_DIR/salvage-truncated.json" "$ARCHIVE_ROOT/salvage-stale/.dce-temp/export.111.STALE/export.json"
touch -d '1 hour ago' "$ARCHIVE_ROOT/salvage-stale/.dce-temp/export.111.STALE/export.json"
SALVAGE_LOG="$TMP_DIR/salvage-stale.log" SALVAGE_LOG="$TMP_DIR/salvage-stale.log"
run_wrapper salvage-stale append 2>"$SALVAGE_LOG" run_wrapper salvage-stale append 2>"$SALVAGE_LOG"
SALVAGE_DEST="$ARCHIVE_ROOT/salvage-stale/$DEFAULT_FILE_NAME" SALVAGE_DEST="$ARCHIVE_ROOT/salvage-stale/$DEFAULT_FILE_NAME"