From 682094c348a989dfac02600784e86fc2d8db1f10 Mon Sep 17 00:00:00 2001 From: Copilot Date: Wed, 3 Jun 2026 06:56:56 -0500 Subject: [PATCH] feat(scrape): add scrape-lock-status diagnostic for archive-root lock Introduce read-only scrape-lock-status.sh and surface lock state during operator-handoff so shared Documents archives show active or stale holders before starting another scrape. --- ...-06-04-056-feat-scrape-lock-status-plan.md | 50 ++++++ scripts/operator-handoff.sh | 13 ++ scripts/scrape-lock-status.sh | 148 ++++++++++++++++++ scripts/tests/operator-handoff-smoke.sh | 4 + scripts/tests/scrape-lock-status-smoke.sh | 94 +++++++++++ 5 files changed, 309 insertions(+) create mode 100644 docs/plans/2026-06-04-056-feat-scrape-lock-status-plan.md create mode 100755 scripts/scrape-lock-status.sh create mode 100755 scripts/tests/scrape-lock-status-smoke.sh diff --git a/docs/plans/2026-06-04-056-feat-scrape-lock-status-plan.md b/docs/plans/2026-06-04-056-feat-scrape-lock-status-plan.md new file mode 100644 index 00000000..e5285548 --- /dev/null +++ b/docs/plans/2026-06-04-056-feat-scrape-lock-status-plan.md @@ -0,0 +1,50 @@ +--- +title: "feat: Scrape lock status diagnostic" +type: feat +status: complete +date: 2026-06-04 +origin: /lfg — plan 053 moved lock to archive_root; operators need read-only visibility before starting validation or killing stale runs +--- + +# feat: Scrape lock status diagnostic + +## Summary + +Add `scripts/scrape-lock-status.sh` to report archive-root scrape lock state (path, holder pid/cmd/started, live vs stale) and call it from `operator-handoff.sh` so handoff surfaces blocking scrapes. + +## Problem Frame + +Two checkouts can share `~/Documents` archives. A long validation holds `{archive_root}/.dce-scrape.lock` but operators only discover it when a second scrape fails. They need a read-only check before starting work. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `scrape-lock-status.sh --config PATH` prints lock file path and state | +| R2 | Resolves lock via `DCE_SCRAPE_LOCK_FILE` or `{archive_root}/.dce-scrape.lock` (same rules as host runner) | +| R3 | Reads `.meta` sidecar when present (pid, started, cmd) | +| R4 | Exit 0 when safe to scrape (free or stale reclaimable); exit 1 when actively held | +| R5 | `operator-handoff.sh` prints lock status section after verify-operator-ready | +| R6 | Offline smoke covers held, free, and archive-root path; `run-all-smokes.sh` passes | + +## Implementation Units + +### U1. scrape-lock-status.sh + +**Files:** `scripts/scrape-lock-status.sh` + +### U2. Operator handoff integration + +**Files:** `scripts/operator-handoff.sh`, `scripts/tests/operator-handoff-smoke.sh` + +### U3. Lock status smoke + +**Files:** `scripts/tests/scrape-lock-status-smoke.sh` + +## Scope Boundaries + +### Deferred + +- Refactoring host runner to shared lib (duplicate minimal resolve logic in status script) +- Live KotOR catch-up on host +- operator-handoff `--salvage-only` diff --git a/scripts/operator-handoff.sh b/scripts/operator-handoff.sh index 5594162f..fface363 100755 --- a/scripts/operator-handoff.sh +++ b/scripts/operator-handoff.sh @@ -9,6 +9,7 @@ source "$SCRIPT_DIR/lib/scrape-run-plan.sh" CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}" VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh" DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh" +LOCK_STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh" SKIP_DF=0 TARGET="" CHANNEL_ARGS=() @@ -101,6 +102,18 @@ main() { fi "$VERIFY_READY" --config "$CONFIG_PATH" + + if [[ -x "$LOCK_STATUS" ]]; then + printf '\n' + set +e + "$LOCK_STATUS" --config "$CONFIG_PATH" + lock_status=$? + set -e + if (( lock_status == 1 )); then + printf '\nWARN: scrape lock is held; wait for the active scrape or confirm it is stale before starting another run.\n' + fi + fi + local -a dry_run_args=(--dry-run --config "$CONFIG_PATH") [[ -n "$TARGET" ]] && dry_run_args+=(--target "$TARGET") dry_run_args+=("${CHANNEL_ARGS[@]}") diff --git a/scripts/scrape-lock-status.sh b/scripts/scrape-lock-status.sh new file mode 100755 index 00000000..991a3e1d --- /dev/null +++ b/scripts/scrape-lock-status.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) +REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" +CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}" + +usage() { + cat <&2 + exit 2 +} + +resolve_scrape_lock_file() { + local config_path=$1 + + if [[ -n "${DCE_SCRAPE_LOCK_FILE:-}" ]]; then + printf '%s\n' "$DCE_SCRAPE_LOCK_FILE" + return 0 + fi + + local archive_root="" + if [[ -f "$config_path" ]]; then + archive_root=$(jq -r '.archive_root // empty' "$config_path" 2>/dev/null) || true + fi + if [[ -n "$archive_root" && "$archive_root" != null ]]; then + printf '%s/.dce-scrape.lock\n' "$archive_root" + else + printf '%s/.dce-scrape.lock\n' "$REPO_ROOT" + fi +} + +read_meta_field() { + local meta_file=$1 field=$2 + grep -E "^${field}=" "$meta_file" 2>/dev/null | head -1 | cut -d= -f2- || true +} + +format_holder_line() { + local meta_file=$1 + local pid="" started="" cmd="" holder_state="" + + [[ -f "$meta_file" ]] || return 0 + pid=$(read_meta_field "$meta_file" pid) + started=$(read_meta_field "$meta_file" started) + cmd=$(read_meta_field "$meta_file" cmd) + [[ -n "$pid" ]] || return 0 + + if kill -0 "$pid" 2>/dev/null; then + holder_state="running" + else + holder_state="not running" + fi + printf 'holder: pid %s (%s, started %s)\n' "$pid" "$holder_state" "${started:-unknown}" + [[ -n "$cmd" ]] && printf 'cmd: %s\n' "$cmd" +} + +lock_is_held() { + local lock_file=$1 + + command -v flock >/dev/null 2>&1 || return 1 + exec {lock_probe_fd}>>"$lock_file" + if flock -n "$lock_probe_fd"; then + flock -u "$lock_probe_fd" 2>/dev/null || true + exec {lock_probe_fd}>&- + return 1 + fi + exec {lock_probe_fd}>&- + return 0 +} + +main() { + while (($#)); do + case "$1" in + --config) + [[ $# -ge 2 ]] || die "Missing value for --config." + CONFIG_PATH=$2 + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + die "Unknown option: $1" + ;; + esac + done + + command -v jq >/dev/null 2>&1 || die "Required command 'jq' is missing." + [[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH" + + local lock_file meta_file + lock_file=$(resolve_scrape_lock_file "$CONFIG_PATH") + meta_file="${lock_file}.meta" + + printf 'Scrape lock status\n' + printf '==================\n' + printf 'config: %s\n' "$CONFIG_PATH" + printf 'lock: %s\n' "$lock_file" + + if [[ ! -e "$lock_file" ]]; then + printf 'state: free (no lock file)\n' + exit 0 + fi + + if ! command -v flock >/dev/null 2>&1; then + printf 'state: unknown (flock unavailable; lock file exists)\n' + format_holder_line "$meta_file" + exit 0 + fi + + if lock_is_held "$lock_file"; then + printf 'state: held (active scrape)\n' + format_holder_line "$meta_file" + exit 1 + fi + + if [[ -f "$meta_file" ]]; then + local pid + pid=$(read_meta_field "$meta_file" pid) + if [[ -n "$pid" ]] && ! kill -0 "$pid" 2>/dev/null; then + printf 'state: stale (reclaimable; holder pid %s is not running)\n' "$pid" + format_holder_line "$meta_file" + exit 0 + fi + fi + + printf 'state: free (lock file present but not held)\n' + format_holder_line "$meta_file" + exit 0 +} + +main "$@" diff --git a/scripts/tests/operator-handoff-smoke.sh b/scripts/tests/operator-handoff-smoke.sh index b05ad72d..ea6e300e 100755 --- a/scripts/tests/operator-handoff-smoke.sh +++ b/scripts/tests/operator-handoff-smoke.sh @@ -65,5 +65,9 @@ if [[ "$channel_status" -ne 0 ]] || ! grep -q 'Handoff complete' <<<"$channel_ou printf '%s\n' "$channel_output" >&2 exit 1 fi +if ! grep -q 'Scrape lock status' <<<"$handoff_output"; then + printf 'operator-handoff missing scrape lock status section\n' >&2 + exit 1 +fi printf 'operator-handoff-smoke: ok\n' diff --git a/scripts/tests/scrape-lock-status-smoke.sh b/scripts/tests/scrape-lock-status-smoke.sh new file mode 100755 index 00000000..d21ef2ff --- /dev/null +++ b/scripts/tests/scrape-lock-status-smoke.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P) +STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh" +TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-lock-status-smoke.XXXXXX") +ARCHIVE_ROOT="$TMP_DIR/archive" +CONFIG_PATH="$TMP_DIR/config.json" +LOCK_FILE="$ARCHIVE_ROOT/.dce-scrape.lock" +HOLDER_PID="" + +cleanup() { + if [[ -n "$HOLDER_PID" ]] && kill -0 "$HOLDER_PID" 2>/dev/null; then + kill "$HOLDER_PID" 2>/dev/null || true + wait "$HOLDER_PID" 2>/dev/null || true + fi + rm -rf "$TMP_DIR" +} +trap cleanup EXIT + +command -v flock >/dev/null 2>&1 || { + echo "SKIP: flock not available" + exit 0 +} + +mkdir -p "$ARCHIVE_ROOT" +cat >"$CONFIG_PATH" <&1) +free_status=$? +set -e + +if [[ "$free_status" -ne 0 ]] || ! grep -q 'state: free (no lock file)' <<<"$free_output"; then + echo "expected free lock status" >&2 + printf '%s\n' "$free_output" >&2 + exit 1 +fi +if ! grep -Fq "$LOCK_FILE" <<<"$free_output"; then + echo "expected archive-root lock path in output" >&2 + exit 1 +fi + +( + exec {lock_fd}>>"$LOCK_FILE" + flock -n "$lock_fd" || exit 1 + printf 'pid=%s\nstarted=2020-01-01T00:00:00Z\ncmd=lock-status-smoke-holder\n' "$$" >"${LOCK_FILE}.meta" + sleep 120 +) & +HOLDER_PID=$! +sleep 0.2 + +set +e +held_output=$("$STATUS" --config "$CONFIG_PATH" 2>&1) +held_status=$? +set -e + +if [[ "$held_status" -ne 1 ]] || ! grep -q 'state: held (active scrape)' <<<"$held_output"; then + echo "expected held lock status exit 1" >&2 + printf '%s\n' "$held_output" >&2 + exit 1 +fi +if ! grep -q 'lock-status-smoke-holder' <<<"$held_output"; then + echo "expected holder cmd in status output" >&2 + exit 1 +fi + +kill "$HOLDER_PID" 2>/dev/null || true +wait "$HOLDER_PID" 2>/dev/null || true +HOLDER_PID="" + +printf 'pid=99999999\nstarted=2020-01-01T00:00:00Z\ncmd=dead-smoke-holder\n' >"${LOCK_FILE}.meta" +touch "$LOCK_FILE" + +set +e +stale_output=$("$STATUS" --config "$CONFIG_PATH" 2>&1) +stale_status=$? +set -e + +if [[ "$stale_status" -ne 0 ]] || ! grep -q 'state: stale (reclaimable' <<<"$stale_output"; then + echo "expected stale reclaimable status after holder exit" >&2 + printf '%s\n' "$stale_output" >&2 + exit 1 +fi + +printf 'scrape-lock-status-smoke: ok\n'