mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-10 00:02:37 -06:00
refactor(scrape): extract shared scrape-lock library
Centralize archive-root lock path, held checks, holder formatting, and reclaim helpers in scripts/lib/scrape-lock.sh. Source it from the host runner, lock status script, and operator wrappers to remove duplicated logic. Update documents-scrape smoke fake repo to include the new lib.
This commit is contained in:
parent
ad5384ecc1
commit
a88cd815f4
38
docs/plans/2026-06-04-061-refactor-scrape-lock-lib-plan.md
Normal file
38
docs/plans/2026-06-04-061-refactor-scrape-lock-lib-plan.md
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
---
|
||||||
|
title: "refactor: Shared scrape-lock library"
|
||||||
|
type: refactor
|
||||||
|
status: active
|
||||||
|
date: 2026-06-04
|
||||||
|
origin: /lfg — lock path and gate logic duplicated across host runner, status script, validation, documents scrape
|
||||||
|
---
|
||||||
|
|
||||||
|
# refactor: Shared scrape-lock library
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Extract `scripts/lib/scrape-lock.sh` and source it from lock-related scripts to keep archive-root lock behavior consistent.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| ID | Requirement |
|
||||||
|
|----|-------------|
|
||||||
|
| R1 | `lib/scrape-lock.sh` provides resolve, held check, holder formatting, reclaim helpers |
|
||||||
|
| R2 | `scrape-lock-status.sh` and `run-discord-scrape-host.sh` source the library |
|
||||||
|
| R3 | `run-documents-scrape.sh` and `run-operator-validation.sh` use shared `ensure_scrape_lock_available` |
|
||||||
|
| R4 | `run-all-smokes.sh` passes (21 smokes) |
|
||||||
|
|
||||||
|
## Implementation Units
|
||||||
|
|
||||||
|
### U1. Library extraction
|
||||||
|
|
||||||
|
**Files:** `scripts/lib/scrape-lock.sh`, consumers listed above
|
||||||
|
|
||||||
|
### U2. Smoke gate
|
||||||
|
|
||||||
|
**Verification:** `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh`
|
||||||
|
|
||||||
|
## Scope Boundaries
|
||||||
|
|
||||||
|
### Deferred
|
||||||
|
|
||||||
|
- Live KotOR catch-up on host
|
||||||
132
scripts/lib/scrape-lock.sh
Normal file
132
scripts/lib/scrape-lock.sh
Normal file
|
|
@ -0,0 +1,132 @@
|
||||||
|
# Shared scrape lock path and inspection helpers.
|
||||||
|
# Sourced by run-discord-scrape-host.sh, scrape-lock-status.sh, and operator wrappers.
|
||||||
|
|
||||||
|
resolve_scrape_lock_file() {
|
||||||
|
local config_path=$1 repo_root=$2
|
||||||
|
|
||||||
|
if [[ -n "${DCE_SCRAPE_LOCK_FILE:-}" ]]; then
|
||||||
|
printf '%s\n' "$DCE_SCRAPE_LOCK_FILE"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local archive_root=""
|
||||||
|
if [[ -f "$config_path" ]]; then
|
||||||
|
archive_root=$(jq -r '.archive_root // empty' "$config_path" 2>/dev/null) || true
|
||||||
|
fi
|
||||||
|
if [[ -n "$archive_root" && "$archive_root" != null ]]; then
|
||||||
|
printf '%s/.dce-scrape.lock\n' "$archive_root"
|
||||||
|
else
|
||||||
|
printf '%s/.dce-scrape.lock\n' "$repo_root"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
scrape_lock_meta_path() {
|
||||||
|
printf '%s.meta\n' "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
read_scrape_lock_meta_field() {
|
||||||
|
local meta_file=$1 field=$2
|
||||||
|
grep -E "^${field}=" "$meta_file" 2>/dev/null | head -1 | cut -d= -f2- || true
|
||||||
|
}
|
||||||
|
|
||||||
|
scrape_lock_is_held() {
|
||||||
|
local lock_file=$1
|
||||||
|
|
||||||
|
command -v flock >/dev/null 2>&1 || return 1
|
||||||
|
exec {lock_probe_fd}>>"$lock_file"
|
||||||
|
if flock -n "$lock_probe_fd"; then
|
||||||
|
flock -u "$lock_probe_fd" 2>/dev/null || true
|
||||||
|
exec {lock_probe_fd}>&-
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
exec {lock_probe_fd}>&-
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
scrape_lock_format_holder_summary() {
|
||||||
|
local meta_file=$1
|
||||||
|
local pid="" started="" cmd="" holder_state=""
|
||||||
|
|
||||||
|
[[ -f "$meta_file" ]] || return 0
|
||||||
|
pid=$(read_scrape_lock_meta_field "$meta_file" pid)
|
||||||
|
started=$(read_scrape_lock_meta_field "$meta_file" started)
|
||||||
|
cmd=$(read_scrape_lock_meta_field "$meta_file" cmd)
|
||||||
|
[[ -n "$pid" ]] || return 0
|
||||||
|
|
||||||
|
if kill -0 "$pid" 2>/dev/null; then
|
||||||
|
holder_state="running"
|
||||||
|
else
|
||||||
|
holder_state="not running"
|
||||||
|
fi
|
||||||
|
printf 'Holder pid %s (%s, started %s): %s' "$pid" "$holder_state" "${started:-unknown}" "${cmd:-unknown}"
|
||||||
|
}
|
||||||
|
|
||||||
|
scrape_lock_format_holder_lines() {
|
||||||
|
local meta_file=$1
|
||||||
|
local pid="" started="" cmd="" holder_state=""
|
||||||
|
|
||||||
|
[[ -f "$meta_file" ]] || return 0
|
||||||
|
pid=$(read_scrape_lock_meta_field "$meta_file" pid)
|
||||||
|
started=$(read_scrape_lock_meta_field "$meta_file" started)
|
||||||
|
cmd=$(read_scrape_lock_meta_field "$meta_file" cmd)
|
||||||
|
[[ -n "$pid" ]] || return 0
|
||||||
|
|
||||||
|
if kill -0 "$pid" 2>/dev/null; then
|
||||||
|
holder_state="running"
|
||||||
|
else
|
||||||
|
holder_state="not running"
|
||||||
|
fi
|
||||||
|
printf 'holder: pid %s (%s, started %s)\n' "$pid" "$holder_state" "${started:-unknown}"
|
||||||
|
[[ -n "$cmd" ]] && printf 'cmd: %s\n' "$cmd"
|
||||||
|
}
|
||||||
|
|
||||||
|
scrape_lock_try_reclaim_meta() {
|
||||||
|
local meta_file=$1
|
||||||
|
local pid
|
||||||
|
|
||||||
|
[[ -f "$meta_file" ]] || return 1
|
||||||
|
pid=$(read_scrape_lock_meta_field "$meta_file" pid)
|
||||||
|
[[ -n "$pid" ]] || return 1
|
||||||
|
if kill -0 "$pid" 2>/dev/null; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
rm -f "$meta_file"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
scrape_lock_reclaim_stale_files() {
|
||||||
|
local lock_file=$1 meta_file=$2
|
||||||
|
|
||||||
|
if scrape_lock_is_held "$lock_file"; then
|
||||||
|
return 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -f "$meta_file" ]]; then
|
||||||
|
local pid
|
||||||
|
pid=$(read_scrape_lock_meta_field "$meta_file" pid)
|
||||||
|
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
|
||||||
|
return 3
|
||||||
|
fi
|
||||||
|
rm -f "$meta_file"
|
||||||
|
printf 'removed stale lock meta: %s\n' "$meta_file"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -e "$lock_file" ]] && ! scrape_lock_is_held "$lock_file"; then
|
||||||
|
rm -f "$lock_file"
|
||||||
|
printf 'removed unheld lock file: %s\n' "$lock_file"
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure_scrape_lock_available() {
|
||||||
|
local config_path=$1 status_script=$2
|
||||||
|
|
||||||
|
if [[ "${DCE_SKIP_SCRAPE_LOCK:-0}" == "1" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
[[ -x "$status_script" ]] || return 0
|
||||||
|
if ! "$status_script" --config "$config_path"; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
@ -6,6 +6,8 @@ SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)
|
||||||
REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}"
|
REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}"
|
||||||
# shellcheck source=lib/scrape-run-plan.sh
|
# shellcheck source=lib/scrape-run-plan.sh
|
||||||
source "$SCRIPT_DIR/lib/scrape-run-plan.sh"
|
source "$SCRIPT_DIR/lib/scrape-run-plan.sh"
|
||||||
|
# shellcheck source=lib/scrape-lock.sh
|
||||||
|
source "$SCRIPT_DIR/lib/scrape-lock.sh"
|
||||||
COMPOSE_FILE="${DCE_COMPOSE_FILE:-$REPO_ROOT/docker-compose.yml}"
|
COMPOSE_FILE="${DCE_COMPOSE_FILE:-$REPO_ROOT/docker-compose.yml}"
|
||||||
ENV_FILE="${DCE_ENV_FILE:-$REPO_ROOT/scrape.env}"
|
ENV_FILE="${DCE_ENV_FILE:-$REPO_ROOT/scrape.env}"
|
||||||
DOCKER_BIN="${DCE_DOCKER_BIN:-docker}"
|
DOCKER_BIN="${DCE_DOCKER_BIN:-docker}"
|
||||||
|
|
@ -61,70 +63,26 @@ cleanup_compose_env() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
resolve_scrape_lock_file() {
|
|
||||||
local config_path=$1
|
|
||||||
|
|
||||||
if [[ -n "${DCE_SCRAPE_LOCK_FILE:-}" ]]; then
|
|
||||||
printf '%s\n' "$DCE_SCRAPE_LOCK_FILE"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
local archive_root=""
|
|
||||||
if [[ -f "$config_path" ]]; then
|
|
||||||
archive_root=$(jq -r '.archive_root // empty' "$config_path" 2>/dev/null) || true
|
|
||||||
fi
|
|
||||||
if [[ -n "$archive_root" && "$archive_root" != null ]]; then
|
|
||||||
printf '%s/.dce-scrape.lock\n' "$archive_root"
|
|
||||||
else
|
|
||||||
printf '%s/.dce-scrape.lock\n' "$REPO_ROOT"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
scrape_lock_meta_path() {
|
|
||||||
printf '%s.meta\n' "$SCRAPE_LOCK_FILE"
|
|
||||||
}
|
|
||||||
|
|
||||||
write_scrape_lock_meta() {
|
write_scrape_lock_meta() {
|
||||||
local meta_file
|
local meta_file
|
||||||
meta_file=$(scrape_lock_meta_path)
|
meta_file=$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE")
|
||||||
printf 'pid=%s\nstarted=%s\ncmd=%s\n' \
|
printf 'pid=%s\nstarted=%s\ncmd=%s\n' \
|
||||||
"$$" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$(ps -o args= -p $$ 2>/dev/null | head -c 500 || echo unknown)" >"$meta_file"
|
"$$" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$(ps -o args= -p $$ 2>/dev/null | head -c 500 || echo unknown)" >"$meta_file"
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_scrape_lock_meta() {
|
remove_scrape_lock_meta() {
|
||||||
rm -f "$(scrape_lock_meta_path)"
|
rm -f "$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE")"
|
||||||
}
|
|
||||||
|
|
||||||
format_scrape_lock_holder() {
|
|
||||||
local meta_file=$1
|
|
||||||
local pid="" started="" cmd="" holder_state=""
|
|
||||||
|
|
||||||
[[ -f "$meta_file" ]] || return 0
|
|
||||||
pid=$(grep -E '^pid=' "$meta_file" | head -1 | cut -d= -f2- || true)
|
|
||||||
started=$(grep -E '^started=' "$meta_file" | head -1 | cut -d= -f2- || true)
|
|
||||||
cmd=$(grep -E '^cmd=' "$meta_file" | head -1 | cut -d= -f2- || true)
|
|
||||||
[[ -n "$pid" ]] || return 0
|
|
||||||
|
|
||||||
if kill -0 "$pid" 2>/dev/null; then
|
|
||||||
holder_state="running"
|
|
||||||
else
|
|
||||||
holder_state="not running"
|
|
||||||
fi
|
|
||||||
printf 'Holder pid %s (%s, started %s): %s' "$pid" "$holder_state" "${started:-unknown}" "${cmd:-unknown}"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try_reclaim_stale_scrape_lock() {
|
try_reclaim_stale_scrape_lock() {
|
||||||
local meta_file pid
|
local meta_file pid
|
||||||
meta_file=$(scrape_lock_meta_path)
|
meta_file=$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE")
|
||||||
[[ -f "$meta_file" ]] || return 1
|
pid=$(read_scrape_lock_meta_field "$meta_file" pid)
|
||||||
pid=$(grep -E '^pid=' "$meta_file" | head -1 | cut -d= -f2- || true)
|
if scrape_lock_try_reclaim_meta "$meta_file"; then
|
||||||
[[ -n "$pid" ]] || return 1
|
printf 'WARN: reclaiming scrape lock; previous holder pid %s is not running.\n' "$pid" >&2
|
||||||
if kill -0 "$pid" 2>/dev/null; then
|
return 0
|
||||||
return 1
|
|
||||||
fi
|
fi
|
||||||
printf 'WARN: reclaiming scrape lock; previous holder pid %s is not running.\n' "$pid" >&2
|
return 1
|
||||||
remove_scrape_lock_meta
|
|
||||||
return 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
acquire_scrape_lock() {
|
acquire_scrape_lock() {
|
||||||
|
|
@ -136,7 +94,7 @@ acquire_scrape_lock() {
|
||||||
command -v flock >/dev/null 2>&1 || return 0
|
command -v flock >/dev/null 2>&1 || return 0
|
||||||
|
|
||||||
[[ -n "$config_path" ]] || config_path="$REPO_ROOT/config/scrape-targets.json"
|
[[ -n "$config_path" ]] || config_path="$REPO_ROOT/config/scrape-targets.json"
|
||||||
SCRAPE_LOCK_FILE=$(resolve_scrape_lock_file "$config_path")
|
SCRAPE_LOCK_FILE=$(resolve_scrape_lock_file "$config_path" "$REPO_ROOT")
|
||||||
mkdir -p "$(dirname "$SCRAPE_LOCK_FILE")"
|
mkdir -p "$(dirname "$SCRAPE_LOCK_FILE")"
|
||||||
|
|
||||||
exec {SCRAPE_LOCK_FD}>>"$SCRAPE_LOCK_FILE"
|
exec {SCRAPE_LOCK_FD}>>"$SCRAPE_LOCK_FILE"
|
||||||
|
|
@ -146,7 +104,7 @@ acquire_scrape_lock() {
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
local holder_msg=""
|
local holder_msg=""
|
||||||
holder_msg=$(format_scrape_lock_holder "$(scrape_lock_meta_path)") || true
|
holder_msg=$(scrape_lock_format_holder_summary "$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE")") || true
|
||||||
if [[ -n "$holder_msg" ]]; then
|
if [[ -n "$holder_msg" ]]; then
|
||||||
die "Another scrape is already running (lock: $SCRAPE_LOCK_FILE). $holder_msg"
|
die "Another scrape is already running (lock: $SCRAPE_LOCK_FILE). $holder_msg"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,8 @@ VERIFY_SCRIPT="$REPO_ROOT/scripts/verify-documents-archives.sh"
|
||||||
VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
||||||
SETUP_AUTH="$REPO_ROOT/scripts/setup-scrape-auth.sh"
|
SETUP_AUTH="$REPO_ROOT/scripts/setup-scrape-auth.sh"
|
||||||
LOCK_STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh"
|
LOCK_STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh"
|
||||||
|
# shellcheck source=lib/scrape-lock.sh
|
||||||
|
source "$SCRIPT_DIR/lib/scrape-lock.sh"
|
||||||
# shellcheck source=lib/scrape-run-plan.sh
|
# shellcheck source=lib/scrape-run-plan.sh
|
||||||
source "$SCRIPT_DIR/lib/scrape-run-plan.sh"
|
source "$SCRIPT_DIR/lib/scrape-run-plan.sh"
|
||||||
|
|
||||||
|
|
@ -41,12 +43,8 @@ die() {
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
ensure_scrape_lock_available() {
|
require_scrape_lock_free() {
|
||||||
if [[ "${DCE_SKIP_SCRAPE_LOCK:-0}" == "1" ]]; then
|
if ! ensure_scrape_lock_available "$CONFIG_PATH" "$LOCK_STATUS"; then
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
[[ -x "$LOCK_STATUS" ]] || return 0
|
|
||||||
if ! "$LOCK_STATUS" --config "$CONFIG_PATH"; then
|
|
||||||
die "Scrape lock is held; another scrape may be running. Inspect: $LOCK_STATUS --config $CONFIG_PATH"
|
die "Scrape lock is held; another scrape may be running. Inspect: $LOCK_STATUS --config $CONFIG_PATH"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
@ -139,7 +137,7 @@ main() {
|
||||||
|
|
||||||
"$VERIFY_READY" --disk-only --config "$CONFIG_PATH"
|
"$VERIFY_READY" --disk-only --config "$CONFIG_PATH"
|
||||||
|
|
||||||
ensure_scrape_lock_available
|
require_scrape_lock_free
|
||||||
|
|
||||||
if (( salvage_only == 1 )); then
|
if (( salvage_only == 1 )); then
|
||||||
run_local_salvage "${passthrough[@]}"
|
run_local_salvage "${passthrough[@]}"
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@ VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
||||||
DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh"
|
DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh"
|
||||||
AUDIT_JSON="$REPO_ROOT/scripts/audit-archive-json.sh"
|
AUDIT_JSON="$REPO_ROOT/scripts/audit-archive-json.sh"
|
||||||
LOCK_STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh"
|
LOCK_STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh"
|
||||||
|
# shellcheck source=lib/scrape-lock.sh
|
||||||
|
source "$SCRIPT_DIR/lib/scrape-lock.sh"
|
||||||
|
|
||||||
DRY_RUN=0
|
DRY_RUN=0
|
||||||
SKIP_SCRAPE=0
|
SKIP_SCRAPE=0
|
||||||
|
|
@ -91,12 +93,8 @@ audit_targets() {
|
||||||
(( failures == 0 ))
|
(( failures == 0 ))
|
||||||
}
|
}
|
||||||
|
|
||||||
ensure_scrape_lock_available() {
|
require_scrape_lock_free() {
|
||||||
if [[ "${DCE_SKIP_SCRAPE_LOCK:-0}" == "1" ]]; then
|
if ! ensure_scrape_lock_available "$CONFIG_PATH" "$LOCK_STATUS"; then
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
[[ -x "$LOCK_STATUS" ]] || return 0
|
|
||||||
if ! "$LOCK_STATUS" --config "$CONFIG_PATH"; then
|
|
||||||
die "Scrape lock is held; another scrape may be running. Inspect: $LOCK_STATUS --config $CONFIG_PATH"
|
die "Scrape lock is held; another scrape may be running. Inspect: $LOCK_STATUS --config $CONFIG_PATH"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
@ -292,7 +290,7 @@ main() {
|
||||||
if (( SKIP_SCRAPE )); then
|
if (( SKIP_SCRAPE )); then
|
||||||
log_step "Skip scrape requested."
|
log_step "Skip scrape requested."
|
||||||
else
|
else
|
||||||
ensure_scrape_lock_available || failures=$((failures + 1))
|
require_scrape_lock_free || failures=$((failures + 1))
|
||||||
if (( failures == 0 )); then
|
if (( failures == 0 )); then
|
||||||
if (( PER_TARGET )) && [[ -z "$TARGET" ]]; then
|
if (( PER_TARGET )) && [[ -z "$TARGET" ]]; then
|
||||||
scrape_per_target || failures=$((failures + 1))
|
scrape_per_target || failures=$((failures + 1))
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ set -Eeuo pipefail
|
||||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)
|
||||||
REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}"
|
REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}"
|
||||||
CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}"
|
CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}"
|
||||||
|
# shellcheck source=lib/scrape-lock.sh
|
||||||
|
source "$SCRIPT_DIR/lib/scrape-lock.sh"
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
|
|
@ -28,86 +30,6 @@ die() {
|
||||||
exit 2
|
exit 2
|
||||||
}
|
}
|
||||||
|
|
||||||
resolve_scrape_lock_file() {
|
|
||||||
local config_path=$1
|
|
||||||
|
|
||||||
if [[ -n "${DCE_SCRAPE_LOCK_FILE:-}" ]]; then
|
|
||||||
printf '%s\n' "$DCE_SCRAPE_LOCK_FILE"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
local archive_root=""
|
|
||||||
if [[ -f "$config_path" ]]; then
|
|
||||||
archive_root=$(jq -r '.archive_root // empty' "$config_path" 2>/dev/null) || true
|
|
||||||
fi
|
|
||||||
if [[ -n "$archive_root" && "$archive_root" != null ]]; then
|
|
||||||
printf '%s/.dce-scrape.lock\n' "$archive_root"
|
|
||||||
else
|
|
||||||
printf '%s/.dce-scrape.lock\n' "$REPO_ROOT"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
read_meta_field() {
|
|
||||||
local meta_file=$1 field=$2
|
|
||||||
grep -E "^${field}=" "$meta_file" 2>/dev/null | head -1 | cut -d= -f2- || true
|
|
||||||
}
|
|
||||||
|
|
||||||
format_holder_line() {
|
|
||||||
local meta_file=$1
|
|
||||||
local pid="" started="" cmd="" holder_state=""
|
|
||||||
|
|
||||||
[[ -f "$meta_file" ]] || return 0
|
|
||||||
pid=$(read_meta_field "$meta_file" pid)
|
|
||||||
started=$(read_meta_field "$meta_file" started)
|
|
||||||
cmd=$(read_meta_field "$meta_file" cmd)
|
|
||||||
[[ -n "$pid" ]] || return 0
|
|
||||||
|
|
||||||
if kill -0 "$pid" 2>/dev/null; then
|
|
||||||
holder_state="running"
|
|
||||||
else
|
|
||||||
holder_state="not running"
|
|
||||||
fi
|
|
||||||
printf 'holder: pid %s (%s, started %s)\n' "$pid" "$holder_state" "${started:-unknown}"
|
|
||||||
[[ -n "$cmd" ]] && printf 'cmd: %s\n' "$cmd"
|
|
||||||
}
|
|
||||||
|
|
||||||
lock_is_held() {
|
|
||||||
local lock_file=$1
|
|
||||||
|
|
||||||
command -v flock >/dev/null 2>&1 || return 1
|
|
||||||
exec {lock_probe_fd}>>"$lock_file"
|
|
||||||
if flock -n "$lock_probe_fd"; then
|
|
||||||
flock -u "$lock_probe_fd" 2>/dev/null || true
|
|
||||||
exec {lock_probe_fd}>&-
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
exec {lock_probe_fd}>&-
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
reclaim_stale_lock() {
|
|
||||||
local lock_file=$1 meta_file=$2
|
|
||||||
|
|
||||||
if lock_is_held "$lock_file"; then
|
|
||||||
die "Cannot reclaim: scrape lock is actively held."
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -f "$meta_file" ]]; then
|
|
||||||
local pid
|
|
||||||
pid=$(read_meta_field "$meta_file" pid)
|
|
||||||
if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
|
|
||||||
die "Cannot reclaim: holder pid $pid is still running."
|
|
||||||
fi
|
|
||||||
rm -f "$meta_file"
|
|
||||||
printf 'removed stale lock meta: %s\n' "$meta_file"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -e "$lock_file" ]] && ! lock_is_held "$lock_file"; then
|
|
||||||
rm -f "$lock_file"
|
|
||||||
printf 'removed unheld lock file: %s\n' "$lock_file"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
local reclaim=0
|
local reclaim=0
|
||||||
while (($#)); do
|
while (($#)); do
|
||||||
|
|
@ -134,9 +56,9 @@ main() {
|
||||||
command -v jq >/dev/null 2>&1 || die "Required command 'jq' is missing."
|
command -v jq >/dev/null 2>&1 || die "Required command 'jq' is missing."
|
||||||
[[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH"
|
[[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH"
|
||||||
|
|
||||||
local lock_file meta_file
|
local lock_file meta_file reclaim_status
|
||||||
lock_file=$(resolve_scrape_lock_file "$CONFIG_PATH")
|
lock_file=$(resolve_scrape_lock_file "$CONFIG_PATH" "$REPO_ROOT")
|
||||||
meta_file="${lock_file}.meta"
|
meta_file=$(scrape_lock_meta_path "$lock_file")
|
||||||
|
|
||||||
printf 'Scrape lock status\n'
|
printf 'Scrape lock status\n'
|
||||||
printf '==================\n'
|
printf '==================\n'
|
||||||
|
|
@ -150,24 +72,24 @@ main() {
|
||||||
|
|
||||||
if ! command -v flock >/dev/null 2>&1; then
|
if ! command -v flock >/dev/null 2>&1; then
|
||||||
printf 'state: unknown (flock unavailable; lock file exists)\n'
|
printf 'state: unknown (flock unavailable; lock file exists)\n'
|
||||||
format_holder_line "$meta_file"
|
scrape_lock_format_holder_lines "$meta_file"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if lock_is_held "$lock_file"; then
|
if scrape_lock_is_held "$lock_file"; then
|
||||||
printf 'state: held (active scrape)\n'
|
printf 'state: held (active scrape)\n'
|
||||||
format_holder_line "$meta_file"
|
scrape_lock_format_holder_lines "$meta_file"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ -f "$meta_file" ]]; then
|
if [[ -f "$meta_file" ]]; then
|
||||||
local pid
|
local pid
|
||||||
pid=$(read_meta_field "$meta_file" pid)
|
pid=$(read_scrape_lock_meta_field "$meta_file" pid)
|
||||||
if [[ -n "$pid" ]] && ! kill -0 "$pid" 2>/dev/null; then
|
if [[ -n "$pid" ]] && ! kill -0 "$pid" 2>/dev/null; then
|
||||||
printf 'state: stale (reclaimable; holder pid %s is not running)\n' "$pid"
|
printf 'state: stale (reclaimable; holder pid %s is not running)\n' "$pid"
|
||||||
format_holder_line "$meta_file"
|
scrape_lock_format_holder_lines "$meta_file"
|
||||||
if (( reclaim )); then
|
if (( reclaim )); then
|
||||||
reclaim_stale_lock "$lock_file" "$meta_file"
|
scrape_lock_reclaim_stale_files "$lock_file" "$meta_file" || die "Cannot reclaim stale scrape lock."
|
||||||
printf 'state: free (stale lock reclaimed)\n'
|
printf 'state: free (stale lock reclaimed)\n'
|
||||||
fi
|
fi
|
||||||
exit 0
|
exit 0
|
||||||
|
|
@ -175,8 +97,14 @@ main() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if (( reclaim )); then
|
if (( reclaim )); then
|
||||||
if [[ -e "$lock_file" ]] && ! lock_is_held "$lock_file"; then
|
if [[ -e "$lock_file" ]] && ! scrape_lock_is_held "$lock_file"; then
|
||||||
reclaim_stale_lock "$lock_file" "$meta_file"
|
reclaim_status=0
|
||||||
|
scrape_lock_reclaim_stale_files "$lock_file" "$meta_file" || reclaim_status=$?
|
||||||
|
if (( reclaim_status == 2 )); then
|
||||||
|
die "Cannot reclaim: scrape lock is actively held."
|
||||||
|
elif (( reclaim_status == 3 )); then
|
||||||
|
die "Cannot reclaim: lock holder pid is still running."
|
||||||
|
fi
|
||||||
printf 'state: free (orphan lock reclaimed)\n'
|
printf 'state: free (orphan lock reclaimed)\n'
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
@ -185,7 +113,7 @@ main() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
printf 'state: free (lock file present but not held)\n'
|
printf 'state: free (lock file present but not held)\n'
|
||||||
format_holder_line "$meta_file"
|
scrape_lock_format_holder_lines "$meta_file"
|
||||||
exit 0
|
exit 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ FAKE_REPO="$TMP_DIR/fake-repo"
|
||||||
mkdir -p "$FAKE_REPO/scripts/lib"
|
mkdir -p "$FAKE_REPO/scripts/lib"
|
||||||
cp "$REPO_ROOT/scripts/run-discord-scrape-host.sh" "$FAKE_REPO/scripts/"
|
cp "$REPO_ROOT/scripts/run-discord-scrape-host.sh" "$FAKE_REPO/scripts/"
|
||||||
cp "$REPO_ROOT/scripts/lib/scrape-run-plan.sh" "$FAKE_REPO/scripts/lib/"
|
cp "$REPO_ROOT/scripts/lib/scrape-run-plan.sh" "$FAKE_REPO/scripts/lib/"
|
||||||
|
cp "$REPO_ROOT/scripts/lib/scrape-lock.sh" "$FAKE_REPO/scripts/lib/"
|
||||||
chmod +x "$FAKE_REPO/scripts/run-discord-scrape-host.sh"
|
chmod +x "$FAKE_REPO/scripts/run-discord-scrape-host.sh"
|
||||||
|
|
||||||
COMPOSE_FILE="$TMP_DIR/docker-compose.yml"
|
COMPOSE_FILE="$TMP_DIR/docker-compose.yml"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue