mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
feat(scrape): reclaim stale lock and proof salvage-before smoke
Add --reclaim-stale to scrape-lock-status.sh for dead-holder cleanup and extend operator-proof smoke to cover --salvage-before-scrape end-to-end.
This commit is contained in:
parent
b883943e3a
commit
e82007a2c5
|
|
@ -0,0 +1,44 @@
|
||||||
|
---
|
||||||
|
title: "feat: Reclaim stale scrape lock and proof salvage-before smoke"
|
||||||
|
type: feat
|
||||||
|
status: active
|
||||||
|
date: 2026-06-04
|
||||||
|
origin: /lfg — stale MyBook validation leaves lock/meta; proof lacks salvage-before smoke
|
||||||
|
---
|
||||||
|
|
||||||
|
# feat: Reclaim stale scrape lock and proof salvage-before smoke
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Add `--reclaim-stale` to `scrape-lock-status.sh` for operators to clear dead-holder lock artifacts, and extend `run-operator-proof-smoke.sh` for `--salvage-before-scrape`.
|
||||||
|
|
||||||
|
## Problem Frame
|
||||||
|
|
||||||
|
After a crashed scrape, `{archive_root}/.dce-scrape.lock.meta` may reference a dead pid. Operators need a safe reclaim path before restarting KotOR catch-up.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| ID | Requirement |
|
||||||
|
|----|-------------|
|
||||||
|
| R1 | `scrape-lock-status.sh --reclaim-stale` removes stale `.meta` when holder pid is not running |
|
||||||
|
| R2 | Reclaim refuses when flock is actively held or holder pid is running |
|
||||||
|
| R3 | Reclaim removes unheld orphan lock file when safe |
|
||||||
|
| R4 | `run-operator-proof-smoke.sh` covers `--salvage-before-scrape` |
|
||||||
|
| R5 | `run-all-smokes.sh` passes |
|
||||||
|
|
||||||
|
## Implementation Units
|
||||||
|
|
||||||
|
### U1. Lock reclaim flag
|
||||||
|
|
||||||
|
**Files:** `scripts/scrape-lock-status.sh`, `scripts/tests/scrape-lock-status-smoke.sh`
|
||||||
|
|
||||||
|
### U2. Proof salvage-before smoke
|
||||||
|
|
||||||
|
**Files:** `scripts/tests/run-operator-proof-smoke.sh`
|
||||||
|
|
||||||
|
## Scope Boundaries
|
||||||
|
|
||||||
|
### Deferred
|
||||||
|
|
||||||
|
- GUI bridge doc refresh
|
||||||
|
- Live KotOR catch-up on host
|
||||||
|
|
@ -9,11 +9,13 @@ CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}"
|
||||||
usage() {
|
usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage:
|
Usage:
|
||||||
$(basename "$0") [--config PATH]
|
$(basename "$0") [--config PATH] [--reclaim-stale]
|
||||||
|
|
||||||
Report scrape serialization lock state for the configured archive root.
|
Report scrape serialization lock state for the configured archive root.
|
||||||
Uses the same lock path rules as run-discord-scrape-host.sh.
|
Uses the same lock path rules as run-discord-scrape-host.sh.
|
||||||
|
|
||||||
|
--reclaim-stale Remove stale .meta and unheld lock file when holder pid is dead
|
||||||
|
|
||||||
Exit codes:
|
Exit codes:
|
||||||
0 Safe to scrape (no lock, unheld lock file, or stale reclaimable holder)
|
0 Safe to scrape (no lock, unheld lock file, or stale reclaimable holder)
|
||||||
1 Another scrape is actively holding the lock
|
1 Another scrape is actively holding the lock
|
||||||
|
|
@ -83,7 +85,31 @@ lock_is_held() {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reclaim_stale_lock() {
|
||||||
|
local lock_file=$1 meta_file=$2
|
||||||
|
|
||||||
|
if lock_is_held "$lock_file"; then
|
||||||
|
die "Cannot reclaim: scrape lock is actively held."
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -f "$meta_file" ]]; then
|
||||||
|
local pid
|
||||||
|
pid=$(read_meta_field "$meta_file" pid)
|
||||||
|
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
|
||||||
|
die "Cannot reclaim: holder pid $pid is still running."
|
||||||
|
fi
|
||||||
|
rm -f "$meta_file"
|
||||||
|
printf 'removed stale lock meta: %s\n' "$meta_file"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -e "$lock_file" ]] && ! lock_is_held "$lock_file"; then
|
||||||
|
rm -f "$lock_file"
|
||||||
|
printf 'removed unheld lock file: %s\n' "$lock_file"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
|
local reclaim=0
|
||||||
while (($#)); do
|
while (($#)); do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--config)
|
--config)
|
||||||
|
|
@ -91,6 +117,10 @@ main() {
|
||||||
CONFIG_PATH=$2
|
CONFIG_PATH=$2
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--reclaim-stale)
|
||||||
|
reclaim=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
--help|-h)
|
--help|-h)
|
||||||
usage
|
usage
|
||||||
exit 0
|
exit 0
|
||||||
|
|
@ -136,10 +166,24 @@ main() {
|
||||||
if [[ -n "$pid" ]] && ! kill -0 "$pid" 2>/dev/null; then
|
if [[ -n "$pid" ]] && ! kill -0 "$pid" 2>/dev/null; then
|
||||||
printf 'state: stale (reclaimable; holder pid %s is not running)\n' "$pid"
|
printf 'state: stale (reclaimable; holder pid %s is not running)\n' "$pid"
|
||||||
format_holder_line "$meta_file"
|
format_holder_line "$meta_file"
|
||||||
|
if (( reclaim )); then
|
||||||
|
reclaim_stale_lock "$lock_file" "$meta_file"
|
||||||
|
printf 'state: free (stale lock reclaimed)\n'
|
||||||
|
fi
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if (( reclaim )); then
|
||||||
|
if [[ -e "$lock_file" ]] && ! lock_is_held "$lock_file"; then
|
||||||
|
reclaim_stale_lock "$lock_file" "$meta_file"
|
||||||
|
printf 'state: free (orphan lock reclaimed)\n'
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
printf 'state: free (nothing to reclaim)\n'
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
printf 'state: free (lock file present but not held)\n'
|
printf 'state: free (lock file present but not held)\n'
|
||||||
format_holder_line "$meta_file"
|
format_holder_line "$meta_file"
|
||||||
exit 0
|
exit 0
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,19 @@ JSON
|
||||||
|
|
||||||
printf 'DISCORD_TOKEN=dummy\n' >"$ENV_PATH"
|
printf 'DISCORD_TOKEN=dummy\n' >"$ENV_PATH"
|
||||||
|
|
||||||
|
COMPOSE_FILE="$TMP_DIR/docker-compose.yml"
|
||||||
|
FAKE_DOCKER="$TMP_DIR/docker"
|
||||||
|
cat >"$COMPOSE_FILE" <<'EOF'
|
||||||
|
services:
|
||||||
|
discord-scraper:
|
||||||
|
image: fake
|
||||||
|
EOF
|
||||||
|
cat >"$FAKE_DOCKER" <<'EOF'
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
printf 'run succeeded\n'
|
||||||
|
EOF
|
||||||
|
chmod +x "$FAKE_DOCKER"
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
output=$(
|
output=$(
|
||||||
DCE_MIN_FREE_MB=0 \
|
DCE_MIN_FREE_MB=0 \
|
||||||
|
|
@ -72,6 +85,30 @@ if [[ "$salvage_status" -ne 0 ]] || ! grep -q 'Salvage-only proof complete' <<<"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
set +e
|
||||||
|
salvage_before_output=$(
|
||||||
|
DCE_MIN_FREE_MB=0 \
|
||||||
|
DCE_CONFIG_FILE="$CONFIG_PATH" \
|
||||||
|
DCE_ENV_FILE="$ENV_PATH" \
|
||||||
|
DCE_SKIP_SCRAPE_LOCK=1 \
|
||||||
|
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
||||||
|
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
||||||
|
"$PROOF" --config "$CONFIG_PATH" --target demo --salvage-before-scrape 2>&1
|
||||||
|
)
|
||||||
|
salvage_before_status=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$salvage_before_status" -ne 0 ]] || ! grep -q 'salvage completed' <<<"$salvage_before_output"; then
|
||||||
|
printf 'run-operator-proof --salvage-before-scrape failed (status=%s)\n' "$salvage_before_status" >&2
|
||||||
|
printf '%s\n' "$salvage_before_output" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
grep -q 'Operator proof passed for demo' <<<"$salvage_before_output" || {
|
||||||
|
printf 'expected operator proof to pass after salvage-before scrape\n' >&2
|
||||||
|
printf '%s\n' "$salvage_before_output" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
command -v flock >/dev/null 2>&1 && {
|
command -v flock >/dev/null 2>&1 && {
|
||||||
LOCK_FILE="$TMP_DIR/archive/.dce-scrape.lock"
|
LOCK_FILE="$TMP_DIR/archive/.dce-scrape.lock"
|
||||||
HOLDER_PID=""
|
HOLDER_PID=""
|
||||||
|
|
|
||||||
|
|
@ -91,4 +91,19 @@ if [[ "$stale_status" -ne 0 ]] || ! grep -q 'state: stale (reclaimable' <<<"$sta
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
set +e
|
||||||
|
reclaim_output=$("$STATUS" --config "$CONFIG_PATH" --reclaim-stale 2>&1)
|
||||||
|
reclaim_status=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$reclaim_status" -ne 0 ]] || ! grep -q 'removed stale lock meta' <<<"$reclaim_output"; then
|
||||||
|
echo "expected --reclaim-stale to remove stale meta" >&2
|
||||||
|
printf '%s\n' "$reclaim_output" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
[[ ! -f "${LOCK_FILE}.meta" ]] || {
|
||||||
|
echo "expected stale meta removed after reclaim" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
printf 'scrape-lock-status-smoke: ok\n'
|
printf 'scrape-lock-status-smoke: ok\n'
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue