mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-10 00:02:37 -06:00
feat(scrape): handoff salvage-only mode and validation lock gate
Wire --salvage-only through operator-handoff and run-operator-proof, and refuse run-operator-validation scrape steps while the archive-root lock is actively held.
This commit is contained in:
parent
682094c348
commit
363749231d
|
|
@ -0,0 +1,54 @@
|
||||||
|
---
|
||||||
|
title: "feat: Operator handoff salvage-only and scrape lock gate"
|
||||||
|
type: feat
|
||||||
|
status: complete
|
||||||
|
date: 2026-06-04
|
||||||
|
origin: /lfg — plan 056 deferred operator-handoff --salvage-only; proof lacks salvage-only mode and salvage smoke
|
||||||
|
---
|
||||||
|
|
||||||
|
# feat: Operator handoff salvage-only and scrape lock gate
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Add `--salvage-only` to `operator-handoff.sh` and `run-operator-proof.sh`, fail fast in `run-operator-validation.sh` when the archive-root scrape lock is held, and extend smokes.
|
||||||
|
|
||||||
|
## Problem Frame
|
||||||
|
|
||||||
|
After stopping a crashed KotOR export, operators need a handoff entry that merges partial temps without dry-run or Discord:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/operator-handoff.sh --salvage-only --target KotOR_discord_msgs --channel 221726893064454144
|
||||||
|
```
|
||||||
|
|
||||||
|
Validation should refuse to start a scrape while another checkout holds `{archive_root}/.dce-scrape.lock`.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| ID | Requirement |
|
||||||
|
|----|-------------|
|
||||||
|
| R1 | `operator-handoff.sh` accepts `--salvage-only` (runs documents salvage instead of dry-run) |
|
||||||
|
| R2 | `run-operator-proof.sh` accepts `--salvage-only` (handoff + salvage per target, no scrape/prove) |
|
||||||
|
| R3 | `run-operator-validation.sh` exits before scrape when lock is actively held |
|
||||||
|
| R4 | Lock gate skipped when `DCE_SKIP_SCRAPE_LOCK=1` or `--skip-scrape` |
|
||||||
|
| R5 | Smokes cover handoff and proof salvage-only; `run-all-smokes.sh` passes |
|
||||||
|
|
||||||
|
## Implementation Units
|
||||||
|
|
||||||
|
### U1. Handoff and proof salvage-only
|
||||||
|
|
||||||
|
**Files:** `scripts/operator-handoff.sh`, `scripts/run-operator-proof.sh`, smokes
|
||||||
|
|
||||||
|
### U2. Validation lock gate
|
||||||
|
|
||||||
|
**Files:** `scripts/run-operator-validation.sh`
|
||||||
|
|
||||||
|
### U3. Smoke gate
|
||||||
|
|
||||||
|
**Verification:** `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh`
|
||||||
|
|
||||||
|
## Scope Boundaries
|
||||||
|
|
||||||
|
### Deferred
|
||||||
|
|
||||||
|
- Live KotOR catch-up on host
|
||||||
|
- Docs refresh for new flags
|
||||||
|
|
@ -11,21 +11,24 @@ VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
||||||
DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh"
|
DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh"
|
||||||
LOCK_STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh"
|
LOCK_STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh"
|
||||||
SKIP_DF=0
|
SKIP_DF=0
|
||||||
|
SALVAGE_ONLY=0
|
||||||
TARGET=""
|
TARGET=""
|
||||||
CHANNEL_ARGS=()
|
CHANNEL_ARGS=()
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage:
|
Usage:
|
||||||
$(basename "$0") [--config PATH] [--skip-df] [--target NAME] [--channel ID]
|
$(basename "$0") [--config PATH] [--skip-df] [--target NAME] [--channel ID] [--salvage-only]
|
||||||
|
|
||||||
Run operator handoff checks before cron install or a full scrape:
|
Run operator handoff checks before cron install or a full scrape:
|
||||||
1. Free-space summary (archive_root + repo)
|
1. Free-space summary (archive_root + repo)
|
||||||
2. verify-operator-ready (jq, compose, auth, archives)
|
2. verify-operator-ready (jq, compose, auth, archives)
|
||||||
3. run-documents-scrape --dry-run (archive paths only)
|
3. scrape lock status (when available)
|
||||||
|
4. run-documents-scrape --dry-run OR --salvage-only
|
||||||
|
|
||||||
--target NAME Limit dry-run scrape plan to one configured target
|
--target NAME Limit documents step to one configured target
|
||||||
--channel ID With exactly one --target, limit dry-run to channel ID (repeatable)
|
--channel ID With exactly one --target, limit to channel ID (repeatable)
|
||||||
|
--salvage-only Merge stale .dce-temp exports only (no dry-run, no Discord scrape)
|
||||||
|
|
||||||
Environment:
|
Environment:
|
||||||
DCE_MIN_FREE_MB Minimum MiB free (default 1024 in verify-operator-ready)
|
DCE_MIN_FREE_MB Minimum MiB free (default 1024 in verify-operator-ready)
|
||||||
|
|
@ -69,6 +72,10 @@ main() {
|
||||||
SKIP_DF=1
|
SKIP_DF=1
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
|
--salvage-only)
|
||||||
|
SALVAGE_ONLY=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
--target)
|
--target)
|
||||||
[[ $# -ge 2 ]] || die "Missing value for --target."
|
[[ $# -ge 2 ]] || die "Missing value for --target."
|
||||||
TARGET=$2
|
TARGET=$2
|
||||||
|
|
@ -114,17 +121,26 @@ main() {
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
local -a dry_run_args=(--dry-run --config "$CONFIG_PATH")
|
local -a documents_args=(--config "$CONFIG_PATH")
|
||||||
[[ -n "$TARGET" ]] && dry_run_args+=(--target "$TARGET")
|
[[ -n "$TARGET" ]] && documents_args+=(--target "$TARGET")
|
||||||
dry_run_args+=("${CHANNEL_ARGS[@]}")
|
documents_args+=("${CHANNEL_ARGS[@]}")
|
||||||
"$DOCUMENTS_SCRAPE" "${dry_run_args[@]}"
|
if (( SALVAGE_ONLY )); then
|
||||||
|
documents_args+=(--salvage-only)
|
||||||
|
"$DOCUMENTS_SCRAPE" "${documents_args[@]}"
|
||||||
|
printf '\nHandoff complete (salvage-only). Next:\n'
|
||||||
|
printf ' ./scripts/run-operator-validation.sh --salvage-before-scrape'
|
||||||
|
else
|
||||||
|
documents_args+=(--dry-run)
|
||||||
|
"$DOCUMENTS_SCRAPE" "${documents_args[@]}"
|
||||||
printf '\nHandoff complete. Safe to run:\n'
|
printf '\nHandoff complete. Safe to run:\n'
|
||||||
printf ' ./scripts/run-documents-scrape.sh'
|
printf ' ./scripts/run-documents-scrape.sh'
|
||||||
|
fi
|
||||||
[[ -n "$TARGET" ]] && printf ' --target %s' "$TARGET"
|
[[ -n "$TARGET" ]] && printf ' --target %s' "$TARGET"
|
||||||
((${#CHANNEL_ARGS[@]})) && printf ' %s' "${CHANNEL_ARGS[*]}"
|
((${#CHANNEL_ARGS[@]})) && printf ' %s' "${CHANNEL_ARGS[*]}"
|
||||||
printf '\n'
|
printf '\n'
|
||||||
|
if (( ! SALVAGE_ONLY )); then
|
||||||
printf ' ./scripts/setup-cron.sh --dry-run\n'
|
printf ' ./scripts/setup-cron.sh --dry-run\n'
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
main "$@"
|
main "$@"
|
||||||
|
|
|
||||||
|
|
@ -17,12 +17,13 @@ TARGET=""
|
||||||
SYNC_GUI_FLAG=0
|
SYNC_GUI_FLAG=0
|
||||||
DRY_RUN=0
|
DRY_RUN=0
|
||||||
SALVAGE_BEFORE=0
|
SALVAGE_BEFORE=0
|
||||||
|
SALVAGE_ONLY=0
|
||||||
CHANNEL_ARGS=()
|
CHANNEL_ARGS=()
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage:
|
Usage:
|
||||||
$(basename "$0") [--target NAME] [--channel ID] [--config PATH] [--sync-gui] [--dry-run] [--salvage-before-scrape]
|
$(basename "$0") [--target NAME] [--channel ID] [--config PATH] [--sync-gui] [--dry-run] [--salvage-only] [--salvage-before-scrape]
|
||||||
|
|
||||||
End-to-end operator proof:
|
End-to-end operator proof:
|
||||||
operator-handoff → [optional salvage] → incremental scrape → prove-incremental-append
|
operator-handoff → [optional salvage] → incremental scrape → prove-incremental-append
|
||||||
|
|
@ -30,6 +31,7 @@ End-to-end operator proof:
|
||||||
When --target is omitted, all enabled targets in the config are processed.
|
When --target is omitted, all enabled targets in the config are processed.
|
||||||
|
|
||||||
--channel ID With exactly one --target, limit scrape/prove to channel ID (repeatable)
|
--channel ID With exactly one --target, limit scrape/prove to channel ID (repeatable)
|
||||||
|
--salvage-only Handoff + merge stale .dce-temp exports only (no Discord scrape or prove)
|
||||||
--salvage-before-scrape Merge stale .dce-temp exports before incremental scrape
|
--salvage-before-scrape Merge stale .dce-temp exports before incremental scrape
|
||||||
|
|
||||||
Logs append to logs/operator-proof-<timestamp>.log
|
Logs append to logs/operator-proof-<timestamp>.log
|
||||||
|
|
@ -66,6 +68,10 @@ main() {
|
||||||
SALVAGE_BEFORE=1
|
SALVAGE_BEFORE=1
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
|
--salvage-only)
|
||||||
|
SALVAGE_ONLY=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
--channel)
|
--channel)
|
||||||
[[ $# -ge 2 ]] || die "Missing value for --channel."
|
[[ $# -ge 2 ]] || die "Missing value for --channel."
|
||||||
CHANNEL_ARGS+=(--channel "$2")
|
CHANNEL_ARGS+=(--channel "$2")
|
||||||
|
|
@ -83,6 +89,13 @@ main() {
|
||||||
|
|
||||||
[[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH"
|
[[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH"
|
||||||
|
|
||||||
|
if (( SALVAGE_ONLY == 1 && DRY_RUN == 1 )); then
|
||||||
|
die "--salvage-only cannot be combined with --dry-run."
|
||||||
|
fi
|
||||||
|
if (( SALVAGE_ONLY == 1 && SALVAGE_BEFORE == 1 )); then
|
||||||
|
die "--salvage-only and --salvage-before-scrape are mutually exclusive."
|
||||||
|
fi
|
||||||
|
|
||||||
local -a targets=()
|
local -a targets=()
|
||||||
if [[ -n "$TARGET" ]]; then
|
if [[ -n "$TARGET" ]]; then
|
||||||
targets=("$TARGET")
|
targets=("$TARGET")
|
||||||
|
|
@ -115,12 +128,17 @@ main() {
|
||||||
local -a handoff_args=(--config "$CONFIG_PATH")
|
local -a handoff_args=(--config "$CONFIG_PATH")
|
||||||
[[ -n "$TARGET" ]] && handoff_args+=(--target "$TARGET")
|
[[ -n "$TARGET" ]] && handoff_args+=(--target "$TARGET")
|
||||||
handoff_args+=("${CHANNEL_ARGS[@]}")
|
handoff_args+=("${CHANNEL_ARGS[@]}")
|
||||||
|
(( SALVAGE_ONLY )) && handoff_args+=(--salvage-only)
|
||||||
"$HANDOFF" "${handoff_args[@]}"
|
"$HANDOFF" "${handoff_args[@]}"
|
||||||
|
|
||||||
if (( DRY_RUN == 1 )); then
|
if (( DRY_RUN == 1 )); then
|
||||||
printf '\nDry run complete (no Discord scrape).\n'
|
printf '\nDry run complete (no Discord scrape).\n'
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
if (( SALVAGE_ONLY == 1 )); then
|
||||||
|
printf '\nSalvage-only proof complete (no Discord scrape or append proof).\n'
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
for name in "${targets[@]}"; do
|
for name in "${targets[@]}"; do
|
||||||
printf '\n--- Target: %s ---\n' "$name"
|
printf '\n--- Target: %s ---\n' "$name"
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ SYNC_GUI="$REPO_ROOT/scripts/sync-token-from-gui.sh"
|
||||||
VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
||||||
DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh"
|
DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh"
|
||||||
AUDIT_JSON="$REPO_ROOT/scripts/audit-archive-json.sh"
|
AUDIT_JSON="$REPO_ROOT/scripts/audit-archive-json.sh"
|
||||||
|
LOCK_STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh"
|
||||||
|
|
||||||
DRY_RUN=0
|
DRY_RUN=0
|
||||||
SKIP_SCRAPE=0
|
SKIP_SCRAPE=0
|
||||||
|
|
@ -90,6 +91,16 @@ audit_targets() {
|
||||||
(( failures == 0 ))
|
(( failures == 0 ))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ensure_scrape_lock_available() {
|
||||||
|
if [[ "${DCE_SKIP_SCRAPE_LOCK:-0}" == "1" ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
[[ -x "$LOCK_STATUS" ]] || return 0
|
||||||
|
if ! "$LOCK_STATUS" --config "$CONFIG_PATH"; then
|
||||||
|
die "Scrape lock is held; another scrape may be running. Inspect: $LOCK_STATUS --config $CONFIG_PATH"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
run_documents_scrape() {
|
run_documents_scrape() {
|
||||||
local -a scrape_args=(--config "$CONFIG_PATH")
|
local -a scrape_args=(--config "$CONFIG_PATH")
|
||||||
scrape_args+=("${CHANNEL_ARGS[@]}")
|
scrape_args+=("${CHANNEL_ARGS[@]}")
|
||||||
|
|
@ -280,7 +291,10 @@ main() {
|
||||||
|
|
||||||
if (( SKIP_SCRAPE )); then
|
if (( SKIP_SCRAPE )); then
|
||||||
log_step "Skip scrape requested."
|
log_step "Skip scrape requested."
|
||||||
elif (( PER_TARGET )) && [[ -z "$TARGET" ]]; then
|
else
|
||||||
|
ensure_scrape_lock_available || failures=$((failures + 1))
|
||||||
|
if (( failures == 0 )); then
|
||||||
|
if (( PER_TARGET )) && [[ -z "$TARGET" ]]; then
|
||||||
scrape_per_target || failures=$((failures + 1))
|
scrape_per_target || failures=$((failures + 1))
|
||||||
else
|
else
|
||||||
if run_documents_scrape; then
|
if run_documents_scrape; then
|
||||||
|
|
@ -291,6 +305,8 @@ main() {
|
||||||
failures=$((failures + 1))
|
failures=$((failures + 1))
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
if (( failures > 0 )); then
|
if (( failures > 0 )); then
|
||||||
log_step "Operator validation failed ($failures step(s))."
|
log_step "Operator validation failed ($failures step(s))."
|
||||||
|
|
|
||||||
|
|
@ -70,4 +70,25 @@ if ! grep -q 'Scrape lock status' <<<"$handoff_output"; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
set +e
|
||||||
|
salvage_output=$(
|
||||||
|
DCE_MIN_FREE_MB=0 \
|
||||||
|
DCE_CONFIG_FILE="$CONFIG_PATH" \
|
||||||
|
DCE_ENV_FILE="$ENV_PATH" \
|
||||||
|
DCE_SKIP_SCRAPE_LOCK=1 \
|
||||||
|
"$HANDOFF" --config "$CONFIG_PATH" --skip-df --salvage-only --target demo 2>&1
|
||||||
|
)
|
||||||
|
salvage_status=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$salvage_status" -ne 0 ]] || ! grep -q 'Handoff complete (salvage-only)' <<<"$salvage_output"; then
|
||||||
|
printf 'operator-handoff --salvage-only failed (status=%s)\n' "$salvage_status" >&2
|
||||||
|
printf '%s\n' "$salvage_output" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
grep -q 'salvage completed' <<<"$salvage_output" || {
|
||||||
|
printf 'operator-handoff --salvage-only missing salvage completed marker\n' >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
printf 'operator-handoff-smoke: ok\n'
|
printf 'operator-handoff-smoke: ok\n'
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ PROOF="$REPO_ROOT/scripts/run-operator-proof.sh"
|
||||||
TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-operator-proof-smoke.XXXXXX")
|
TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-operator-proof-smoke.XXXXXX")
|
||||||
CONFIG_PATH="$TMP_DIR/config.json"
|
CONFIG_PATH="$TMP_DIR/config.json"
|
||||||
ENV_PATH="$TMP_DIR/scrape.env"
|
ENV_PATH="$TMP_DIR/scrape.env"
|
||||||
|
mkdir -p "$TMP_DIR/logs"
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
rm -rf "$TMP_DIR"
|
rm -rf "$TMP_DIR"
|
||||||
|
|
@ -54,4 +55,55 @@ grep -q 'Operator proof run plan' <<<"$output" || {
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
set +e
|
||||||
|
salvage_output=$(
|
||||||
|
DCE_MIN_FREE_MB=0 \
|
||||||
|
DCE_CONFIG_FILE="$CONFIG_PATH" \
|
||||||
|
DCE_ENV_FILE="$ENV_PATH" \
|
||||||
|
DCE_SKIP_SCRAPE_LOCK=1 \
|
||||||
|
"$PROOF" --config "$CONFIG_PATH" --target demo --salvage-only 2>&1
|
||||||
|
)
|
||||||
|
salvage_status=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [[ "$salvage_status" -ne 0 ]] || ! grep -q 'Salvage-only proof complete' <<<"$salvage_output"; then
|
||||||
|
printf 'run-operator-proof --salvage-only failed (status=%s)\n' "$salvage_status" >&2
|
||||||
|
printf '%s\n' "$salvage_output" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
command -v flock >/dev/null 2>&1 && {
|
||||||
|
LOCK_FILE="$TMP_DIR/archive/.dce-scrape.lock"
|
||||||
|
HOLDER_PID=""
|
||||||
|
(
|
||||||
|
exec {lock_fd}>>"$LOCK_FILE"
|
||||||
|
flock -n "$lock_fd" || exit 1
|
||||||
|
sleep 120
|
||||||
|
) &
|
||||||
|
HOLDER_PID=$!
|
||||||
|
sleep 0.2
|
||||||
|
|
||||||
|
set +e
|
||||||
|
blocked_output=$(
|
||||||
|
DCE_MIN_FREE_MB=0 \
|
||||||
|
DCE_CONFIG_FILE="$CONFIG_PATH" \
|
||||||
|
DCE_ENV_FILE="$ENV_PATH" \
|
||||||
|
DCE_LOG_DIR="$TMP_DIR/logs" \
|
||||||
|
"$REPO_ROOT/scripts/run-operator-validation.sh" \
|
||||||
|
--salvage-only --target demo --config "$CONFIG_PATH" \
|
||||||
|
--log-file "$TMP_DIR/logs/lock-blocked.log" 2>&1
|
||||||
|
)
|
||||||
|
blocked_status=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
kill "$HOLDER_PID" 2>/dev/null || true
|
||||||
|
wait "$HOLDER_PID" 2>/dev/null || true
|
||||||
|
|
||||||
|
if [[ "$blocked_status" -eq 0 ]] || ! grep -q 'Scrape lock is held' <<<"$blocked_output"; then
|
||||||
|
printf 'expected validation to fail when scrape lock held (status=%s)\n' "$blocked_status" >&2
|
||||||
|
printf '%s\n' "$blocked_output" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
printf 'run-operator-proof-smoke: ok\n'
|
printf 'run-operator-proof-smoke: ok\n'
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue