mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
fix(host): flock scrape lock prevents concurrent container exports
Overlapping run-operator-validation invocations spawned twin yes_general exports and repeated OOM skips. Host scrape now holds .dce-scrape.lock; smokes bypass via DCE_SKIP_SCRAPE_LOCK. Added lock smoke (20/20 pass).
This commit is contained in:
parent
928c0ef682
commit
b9bb4bbe64
37
docs/plans/2026-06-04-046-fix-scrape-run-lock-plan.md
Normal file
37
docs/plans/2026-06-04-046-fix-scrape-run-lock-plan.md
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
---
|
||||||
|
title: "fix: Scrape run lock prevents concurrent container exports"
|
||||||
|
type: fix
|
||||||
|
status: complete
|
||||||
|
date: 2026-06-04
|
||||||
|
origin: /lfg — duplicate KotOR validation runs left two yes_general exports OOM-looping
|
||||||
|
---
|
||||||
|
|
||||||
|
# fix: Scrape run lock prevents concurrent container exports
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
Two overlapping `run-operator-validation.sh --target KotOR_discord_msgs` processes each started a full container scrape. Both exported `yes_general` (`221726893064454144`) with the same `--after` cursor, creating twin `.dce-temp/export.*` dirs (~29–34 MiB each) and repeated OOM skips.
|
||||||
|
|
||||||
|
Cron uses `flock`, but manual/host validation does not — overlapping runs are unguarded.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| ID | Requirement |
|
||||||
|
|----|-------------|
|
||||||
|
| R1 | `run-discord-scrape-host.sh scrape` acquires non-blocking `flock` on `$REPO_ROOT/.dce-scrape.lock` |
|
||||||
|
| R2 | `DCE_SKIP_SCRAPE_LOCK=1` bypasses lock (smoke tests) |
|
||||||
|
| R3 | Clear error when lock held; preflight unaffected |
|
||||||
|
| R4 | Offline smoke asserts second scrape fails while lock held |
|
||||||
|
| R5 | `run-all-smokes.sh` passes (19/19); docs note concurrent-run hazard |
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/tests/run-discord-scrape-host-lock-smoke.sh
|
||||||
|
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Out of scope
|
||||||
|
|
||||||
|
- Completing yes_general multi-hour catch-up inside LFG
|
||||||
|
- Container memory limits / tuning
|
||||||
|
|
@ -115,6 +115,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh --sync-gui --per-target -
|
||||||
|
|
||||||
\* Audit failed before plan 045 because truncated partial exports under `.dce-temp/` were scanned as archives. After fix, audit passes while partial temps exist.
|
\* Audit failed before plan 045 because truncated partial exports under `.dce-temp/` were scanned as archives. After fix, audit passes while partial temps exist.
|
||||||
|
|
||||||
|
**Plan 046 (2026-06-04):** `run-discord-scrape-host.sh scrape` holds non-blocking `flock` on `.dce-scrape.lock` so overlapping manual/cron validation cannot spawn twin yes_general exports. Stop duplicate runs before restarting KotOR validation.
|
||||||
|
|
||||||
**Plan 045 (2026-06-04):** `audit-archive-json.sh` and `verify-documents-archives.sh` skip `*/.dce-temp/*` (in-progress partial exports). Salvage run 2026-06-03: 7 merged, 17 unchanged, 3 skipped (+5404 messages); yes_general OOM-skipped with partial temps preserved for next salvage.
|
**Plan 045 (2026-06-04):** `audit-archive-json.sh` and `verify-documents-archives.sh` skip `*/.dce-temp/*` (in-progress partial exports). Salvage run 2026-06-03: 7 merged, 17 unchanged, 3 skipped (+5404 messages); yes_general OOM-skipped with partial temps preserved for next salvage.
|
||||||
|
|
||||||
**Plan 044 (2026-06-04):** Offline smoke asserts partial temp preserved on OOM skip (channel 134). Host wrapper prefers `DISCORD_TOKEN_FILE` over inherited shell tokens. `run-all-smokes.sh` → 19/19 pass.
|
**Plan 044 (2026-06-04):** Offline smoke asserts partial temp preserved on OOM skip (channel 134). Host wrapper prefers `DISCORD_TOKEN_FILE` over inherited shell tokens. `run-all-smokes.sh` → 19/19 pass.
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,8 @@ DOCKER_BIN_OVERRIDDEN=0
|
||||||
REAUTH_COMMAND=""
|
REAUTH_COMMAND=""
|
||||||
COMPOSE_ENV_FILE=""
|
COMPOSE_ENV_FILE=""
|
||||||
COMPOSE_ENV_TEMP=""
|
COMPOSE_ENV_TEMP=""
|
||||||
|
SCRAPE_LOCK_FILE=""
|
||||||
|
SCRAPE_LOCK_FD=""
|
||||||
VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
||||||
|
|
||||||
if [[ -n "${DCE_DOCKER_BIN:-}" ]]; then
|
if [[ -n "${DCE_DOCKER_BIN:-}" ]]; then
|
||||||
|
|
@ -56,6 +58,33 @@ cleanup_compose_env() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
acquire_scrape_lock() {
|
||||||
|
if [[ "${DCE_SKIP_SCRAPE_LOCK:-0}" == "1" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
command -v flock >/dev/null 2>&1 || return 0
|
||||||
|
|
||||||
|
SCRAPE_LOCK_FILE="${DCE_SCRAPE_LOCK_FILE:-$REPO_ROOT/.dce-scrape.lock}"
|
||||||
|
exec {SCRAPE_LOCK_FD}>>"$SCRAPE_LOCK_FILE"
|
||||||
|
if ! flock -n "$SCRAPE_LOCK_FD"; then
|
||||||
|
die "Another scrape is already running (lock: $SCRAPE_LOCK_FILE). Wait for it to finish or confirm no scrape is active before removing the lock."
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
release_scrape_lock() {
|
||||||
|
if [[ -z "${SCRAPE_LOCK_FD:-}" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
flock -u "$SCRAPE_LOCK_FD" 2>/dev/null || true
|
||||||
|
exec {SCRAPE_LOCK_FD}>&-
|
||||||
|
SCRAPE_LOCK_FD=""
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_on_exit() {
|
||||||
|
release_scrape_lock
|
||||||
|
cleanup_compose_env
|
||||||
|
}
|
||||||
|
|
||||||
load_env_file() {
|
load_env_file() {
|
||||||
[[ -f "$ENV_FILE" ]] || die "Missing env file: $ENV_FILE"
|
[[ -f "$ENV_FILE" ]] || die "Missing env file: $ENV_FILE"
|
||||||
local raw_line line key value
|
local raw_line line key value
|
||||||
|
|
@ -402,7 +431,7 @@ main() {
|
||||||
local -a passthrough_args=()
|
local -a passthrough_args=()
|
||||||
local subcommand=""
|
local subcommand=""
|
||||||
|
|
||||||
trap cleanup_compose_env EXIT
|
trap cleanup_on_exit EXIT
|
||||||
|
|
||||||
while (($#)); do
|
while (($#)); do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
|
|
@ -470,7 +499,11 @@ main() {
|
||||||
print_scrape_config_plan "$host_config" "Host $subcommand" "${host_targets[@]}"
|
print_scrape_config_plan "$host_config" "Host $subcommand" "${host_targets[@]}"
|
||||||
|
|
||||||
case "$subcommand" in
|
case "$subcommand" in
|
||||||
preflight|scrape)
|
preflight)
|
||||||
|
run_subcommand_with_retry "$subcommand" "${passthrough_args[@]}"
|
||||||
|
;;
|
||||||
|
scrape)
|
||||||
|
acquire_scrape_lock
|
||||||
run_subcommand_with_retry "$subcommand" "${passthrough_args[@]}"
|
run_subcommand_with_retry "$subcommand" "${passthrough_args[@]}"
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
|
||||||
87
scripts/tests/run-discord-scrape-host-lock-smoke.sh
Executable file
87
scripts/tests/run-discord-scrape-host-lock-smoke.sh
Executable file
|
|
@ -0,0 +1,87 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -Eeuo pipefail
|
||||||
|
|
||||||
|
REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P)
|
||||||
|
TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-host-lock-smoke.XXXXXX")
|
||||||
|
ENV_FILE="$TMP_DIR/scrape.env"
|
||||||
|
COMPOSE_FILE="$TMP_DIR/docker-compose.yml"
|
||||||
|
FAKE_DOCKER="$TMP_DIR/docker"
|
||||||
|
LOCK_FILE="$TMP_DIR/scrape.lock"
|
||||||
|
HOLDER_PID=""
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
if [[ -n "$HOLDER_PID" ]] && kill -0 "$HOLDER_PID" 2>/dev/null; then
|
||||||
|
kill "$HOLDER_PID" 2>/dev/null || true
|
||||||
|
wait "$HOLDER_PID" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
rm -rf "$TMP_DIR"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
command -v flock >/dev/null 2>&1 || {
|
||||||
|
echo "SKIP: flock not available"
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
cat >"$COMPOSE_FILE" <<'EOF'
|
||||||
|
services:
|
||||||
|
discord-scraper:
|
||||||
|
image: fake
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >"$FAKE_DOCKER" <<'EOF'
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
printf 'run succeeded\n'
|
||||||
|
EOF
|
||||||
|
chmod +x "$FAKE_DOCKER"
|
||||||
|
|
||||||
|
cat >"$ENV_FILE" <<EOF
|
||||||
|
DISCORD_TOKEN=dummy-token
|
||||||
|
EOF
|
||||||
|
|
||||||
|
(
|
||||||
|
exec {lock_fd}>>"$LOCK_FILE"
|
||||||
|
flock -n "$lock_fd" || exit 1
|
||||||
|
sleep 120
|
||||||
|
) &
|
||||||
|
HOLDER_PID=$!
|
||||||
|
sleep 0.2
|
||||||
|
|
||||||
|
set +e
|
||||||
|
output=$(
|
||||||
|
DCE_REPO_ROOT="$REPO_ROOT" \
|
||||||
|
DCE_SCRAPE_LOCK_FILE="$LOCK_FILE" \
|
||||||
|
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
||||||
|
DCE_ENV_FILE="$ENV_FILE" \
|
||||||
|
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
||||||
|
"$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo 2>&1
|
||||||
|
)
|
||||||
|
status=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$status" -eq 0 ]]; then
|
||||||
|
echo "expected scrape to fail while lock is held" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if ! grep -q 'Another scrape is already running' <<<"$output"; then
|
||||||
|
echo "expected lock-held error message" >&2
|
||||||
|
printf '%s\n' "$output" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
kill "$HOLDER_PID" 2>/dev/null || true
|
||||||
|
wait "$HOLDER_PID" 2>/dev/null || true
|
||||||
|
HOLDER_PID=""
|
||||||
|
|
||||||
|
if ! DCE_REPO_ROOT="$REPO_ROOT" \
|
||||||
|
DCE_SCRAPE_LOCK_FILE="$LOCK_FILE" \
|
||||||
|
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
||||||
|
DCE_ENV_FILE="$ENV_FILE" \
|
||||||
|
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
||||||
|
"$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo >/dev/null; then
|
||||||
|
echo "expected scrape to succeed after lock released" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "run-discord-scrape-host-lock-smoke: OK"
|
||||||
|
|
@ -85,6 +85,7 @@ run_host() {
|
||||||
local env_path=${2:-$ENV_FILE}
|
local env_path=${2:-$ENV_FILE}
|
||||||
|
|
||||||
env -u DISCORD_TOKEN \
|
env -u DISCORD_TOKEN \
|
||||||
|
DCE_SKIP_SCRAPE_LOCK=1 \
|
||||||
DCE_REPO_ROOT="$REPO_ROOT" \
|
DCE_REPO_ROOT="$REPO_ROOT" \
|
||||||
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
||||||
DCE_ENV_FILE="$env_path" \
|
DCE_ENV_FILE="$env_path" \
|
||||||
|
|
@ -100,6 +101,7 @@ run_host_with_shell_token() {
|
||||||
local missing_env_path=$2
|
local missing_env_path=$2
|
||||||
|
|
||||||
DCE_REPO_ROOT="$REPO_ROOT" \
|
DCE_REPO_ROOT="$REPO_ROOT" \
|
||||||
|
DCE_SKIP_SCRAPE_LOCK=1 \
|
||||||
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
||||||
DCE_ENV_FILE="$missing_env_path" \
|
DCE_ENV_FILE="$missing_env_path" \
|
||||||
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue