mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
feat(scrape): reclaim stale lock and proof salvage-before smoke
Add --reclaim-stale to scrape-lock-status.sh for dead-holder cleanup and extend operator-proof smoke to cover --salvage-before-scrape end-to-end.
This commit is contained in:
parent
b883943e3a
commit
e82007a2c5
|
|
@ -0,0 +1,44 @@
|
|||
---
|
||||
title: "feat: Reclaim stale scrape lock and proof salvage-before smoke"
|
||||
type: feat
|
||||
status: active
|
||||
date: 2026-06-04
|
||||
origin: /lfg — stale MyBook validation leaves lock/meta; proof lacks salvage-before smoke
|
||||
---
|
||||
|
||||
# feat: Reclaim stale scrape lock and proof salvage-before smoke
|
||||
|
||||
## Summary
|
||||
|
||||
Add `--reclaim-stale` to `scrape-lock-status.sh` for operators to clear dead-holder lock artifacts, and extend `run-operator-proof-smoke.sh` for `--salvage-before-scrape`.
|
||||
|
||||
## Problem Frame
|
||||
|
||||
After a crashed scrape, `{archive_root}/.dce-scrape.lock.meta` may reference a dead pid. Operators need a safe reclaim path before restarting KotOR catch-up.
|
||||
|
||||
## Requirements
|
||||
|
||||
| ID | Requirement |
|
||||
|----|-------------|
|
||||
| R1 | `scrape-lock-status.sh --reclaim-stale` removes stale `.meta` when holder pid is not running |
|
||||
| R2 | Reclaim refuses when flock is actively held or holder pid is running |
|
||||
| R3 | Reclaim removes unheld orphan lock file when safe |
|
||||
| R4 | `run-operator-proof-smoke.sh` covers `--salvage-before-scrape` |
|
||||
| R5 | `run-all-smokes.sh` passes |
|
||||
|
||||
## Implementation Units
|
||||
|
||||
### U1. Lock reclaim flag
|
||||
|
||||
**Files:** `scripts/scrape-lock-status.sh`, `scripts/tests/scrape-lock-status-smoke.sh`
|
||||
|
||||
### U2. Proof salvage-before smoke
|
||||
|
||||
**Files:** `scripts/tests/run-operator-proof-smoke.sh`
|
||||
|
||||
## Scope Boundaries
|
||||
|
||||
### Deferred
|
||||
|
||||
- GUI bridge doc refresh
|
||||
- Live KotOR catch-up on host
|
||||
|
|
@ -9,11 +9,13 @@ CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}"
|
|||
usage() {
|
||||
cat <<EOF
|
||||
Usage:
|
||||
$(basename "$0") [--config PATH]
|
||||
$(basename "$0") [--config PATH] [--reclaim-stale]
|
||||
|
||||
Report scrape serialization lock state for the configured archive root.
|
||||
Uses the same lock path rules as run-discord-scrape-host.sh.
|
||||
|
||||
--reclaim-stale Remove stale .meta and unheld lock file when holder pid is dead
|
||||
|
||||
Exit codes:
|
||||
0 Safe to scrape (no lock, unheld lock file, or stale reclaimable holder)
|
||||
1 Another scrape is actively holding the lock
|
||||
|
|
@ -83,7 +85,31 @@ lock_is_held() {
|
|||
return 0
|
||||
}
|
||||
|
||||
reclaim_stale_lock() {
|
||||
local lock_file=$1 meta_file=$2
|
||||
|
||||
if lock_is_held "$lock_file"; then
|
||||
die "Cannot reclaim: scrape lock is actively held."
|
||||
fi
|
||||
|
||||
if [[ -f "$meta_file" ]]; then
|
||||
local pid
|
||||
pid=$(read_meta_field "$meta_file" pid)
|
||||
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
|
||||
die "Cannot reclaim: holder pid $pid is still running."
|
||||
fi
|
||||
rm -f "$meta_file"
|
||||
printf 'removed stale lock meta: %s\n' "$meta_file"
|
||||
fi
|
||||
|
||||
if [[ -e "$lock_file" ]] && ! lock_is_held "$lock_file"; then
|
||||
rm -f "$lock_file"
|
||||
printf 'removed unheld lock file: %s\n' "$lock_file"
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
local reclaim=0
|
||||
while (($#)); do
|
||||
case "$1" in
|
||||
--config)
|
||||
|
|
@ -91,6 +117,10 @@ main() {
|
|||
CONFIG_PATH=$2
|
||||
shift 2
|
||||
;;
|
||||
--reclaim-stale)
|
||||
reclaim=1
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
usage
|
||||
exit 0
|
||||
|
|
@ -136,10 +166,24 @@ main() {
|
|||
if [[ -n "$pid" ]] && ! kill -0 "$pid" 2>/dev/null; then
|
||||
printf 'state: stale (reclaimable; holder pid %s is not running)\n' "$pid"
|
||||
format_holder_line "$meta_file"
|
||||
if (( reclaim )); then
|
||||
reclaim_stale_lock "$lock_file" "$meta_file"
|
||||
printf 'state: free (stale lock reclaimed)\n'
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
if (( reclaim )); then
|
||||
if [[ -e "$lock_file" ]] && ! lock_is_held "$lock_file"; then
|
||||
reclaim_stale_lock "$lock_file" "$meta_file"
|
||||
printf 'state: free (orphan lock reclaimed)\n'
|
||||
exit 0
|
||||
fi
|
||||
printf 'state: free (nothing to reclaim)\n'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
printf 'state: free (lock file present but not held)\n'
|
||||
format_holder_line "$meta_file"
|
||||
exit 0
|
||||
|
|
|
|||
|
|
@ -35,6 +35,19 @@ JSON
|
|||
|
||||
printf 'DISCORD_TOKEN=dummy\n' >"$ENV_PATH"
|
||||
|
||||
COMPOSE_FILE="$TMP_DIR/docker-compose.yml"
|
||||
FAKE_DOCKER="$TMP_DIR/docker"
|
||||
cat >"$COMPOSE_FILE" <<'EOF'
|
||||
services:
|
||||
discord-scraper:
|
||||
image: fake
|
||||
EOF
|
||||
cat >"$FAKE_DOCKER" <<'EOF'
|
||||
#!/usr/bin/env bash
|
||||
printf 'run succeeded\n'
|
||||
EOF
|
||||
chmod +x "$FAKE_DOCKER"
|
||||
|
||||
set +e
|
||||
output=$(
|
||||
DCE_MIN_FREE_MB=0 \
|
||||
|
|
@ -72,6 +85,30 @@ if [[ "$salvage_status" -ne 0 ]] || ! grep -q 'Salvage-only proof complete' <<<"
|
|||
exit 1
|
||||
fi
|
||||
|
||||
set +e
|
||||
salvage_before_output=$(
|
||||
DCE_MIN_FREE_MB=0 \
|
||||
DCE_CONFIG_FILE="$CONFIG_PATH" \
|
||||
DCE_ENV_FILE="$ENV_PATH" \
|
||||
DCE_SKIP_SCRAPE_LOCK=1 \
|
||||
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
||||
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
||||
"$PROOF" --config "$CONFIG_PATH" --target demo --salvage-before-scrape 2>&1
|
||||
)
|
||||
salvage_before_status=$?
|
||||
set -e
|
||||
|
||||
if [[ "$salvage_before_status" -ne 0 ]] || ! grep -q 'salvage completed' <<<"$salvage_before_output"; then
|
||||
printf 'run-operator-proof --salvage-before-scrape failed (status=%s)\n' "$salvage_before_status" >&2
|
||||
printf '%s\n' "$salvage_before_output" >&2
|
||||
exit 1
|
||||
fi
|
||||
grep -q 'Operator proof passed for demo' <<<"$salvage_before_output" || {
|
||||
printf 'expected operator proof to pass after salvage-before scrape\n' >&2
|
||||
printf '%s\n' "$salvage_before_output" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
command -v flock >/dev/null 2>&1 && {
|
||||
LOCK_FILE="$TMP_DIR/archive/.dce-scrape.lock"
|
||||
HOLDER_PID=""
|
||||
|
|
|
|||
|
|
@ -91,4 +91,19 @@ if [[ "$stale_status" -ne 0 ]] || ! grep -q 'state: stale (reclaimable' <<<"$sta
|
|||
exit 1
|
||||
fi
|
||||
|
||||
set +e
|
||||
reclaim_output=$("$STATUS" --config "$CONFIG_PATH" --reclaim-stale 2>&1)
|
||||
reclaim_status=$?
|
||||
set -e
|
||||
|
||||
if [[ "$reclaim_status" -ne 0 ]] || ! grep -q 'removed stale lock meta' <<<"$reclaim_output"; then
|
||||
echo "expected --reclaim-stale to remove stale meta" >&2
|
||||
printf '%s\n' "$reclaim_output" >&2
|
||||
exit 1
|
||||
fi
|
||||
[[ ! -f "${LOCK_FILE}.meta" ]] || {
|
||||
echo "expected stale meta removed after reclaim" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
printf 'scrape-lock-status-smoke: ok\n'
|
||||
|
|
|
|||
Loading…
Reference in a new issue