mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
feat(scrape): handoff salvage-only mode and validation lock gate
Wire --salvage-only through operator-handoff and run-operator-proof, and refuse run-operator-validation scrape steps while the archive-root lock is actively held.
This commit is contained in:
parent
682094c348
commit
363749231d
|
|
@ -0,0 +1,54 @@
|
|||
---
|
||||
title: "feat: Operator handoff salvage-only and scrape lock gate"
|
||||
type: feat
|
||||
status: complete
|
||||
date: 2026-06-04
|
||||
origin: /lfg — plan 056 deferred operator-handoff --salvage-only; proof lacks salvage-only mode and salvage smoke
|
||||
---
|
||||
|
||||
# feat: Operator handoff salvage-only and scrape lock gate
|
||||
|
||||
## Summary
|
||||
|
||||
Add `--salvage-only` to `operator-handoff.sh` and `run-operator-proof.sh`, fail fast in `run-operator-validation.sh` when the archive-root scrape lock is held, and extend smokes.
|
||||
|
||||
## Problem Frame
|
||||
|
||||
After stopping a crashed KotOR export, operators need a handoff entry that merges partial temps without dry-run or Discord:
|
||||
|
||||
```bash
|
||||
./scripts/operator-handoff.sh --salvage-only --target KotOR_discord_msgs --channel 221726893064454144
|
||||
```
|
||||
|
||||
Validation should refuse to start a scrape while another checkout holds `{archive_root}/.dce-scrape.lock`.
|
||||
|
||||
## Requirements
|
||||
|
||||
| ID | Requirement |
|
||||
|----|-------------|
|
||||
| R1 | `operator-handoff.sh` accepts `--salvage-only` (runs documents salvage instead of dry-run) |
|
||||
| R2 | `run-operator-proof.sh` accepts `--salvage-only` (handoff + salvage per target, no scrape/prove) |
|
||||
| R3 | `run-operator-validation.sh` exits before scrape when lock is actively held |
|
||||
| R4 | Lock gate skipped when `DCE_SKIP_SCRAPE_LOCK=1` or `--skip-scrape` |
|
||||
| R5 | Smokes cover handoff and proof salvage-only; `run-all-smokes.sh` passes |
|
||||
|
||||
## Implementation Units
|
||||
|
||||
### U1. Handoff and proof salvage-only
|
||||
|
||||
**Files:** `scripts/operator-handoff.sh`, `scripts/run-operator-proof.sh`, smokes
|
||||
|
||||
### U2. Validation lock gate
|
||||
|
||||
**Files:** `scripts/run-operator-validation.sh`
|
||||
|
||||
### U3. Smoke gate
|
||||
|
||||
**Verification:** `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh`
|
||||
|
||||
## Scope Boundaries
|
||||
|
||||
### Deferred
|
||||
|
||||
- Live KotOR catch-up on host
|
||||
- Docs refresh for new flags
|
||||
|
|
@ -11,21 +11,24 @@ VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
|||
DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh"
|
||||
LOCK_STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh"
|
||||
SKIP_DF=0
|
||||
SALVAGE_ONLY=0
|
||||
TARGET=""
|
||||
CHANNEL_ARGS=()
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage:
|
||||
$(basename "$0") [--config PATH] [--skip-df] [--target NAME] [--channel ID]
|
||||
$(basename "$0") [--config PATH] [--skip-df] [--target NAME] [--channel ID] [--salvage-only]
|
||||
|
||||
Run operator handoff checks before cron install or a full scrape:
|
||||
1. Free-space summary (archive_root + repo)
|
||||
2. verify-operator-ready (jq, compose, auth, archives)
|
||||
3. run-documents-scrape --dry-run (archive paths only)
|
||||
3. scrape lock status (when available)
|
||||
4. run-documents-scrape --dry-run OR --salvage-only
|
||||
|
||||
--target NAME Limit dry-run scrape plan to one configured target
|
||||
--channel ID With exactly one --target, limit dry-run to channel ID (repeatable)
|
||||
--target NAME Limit documents step to one configured target
|
||||
--channel ID With exactly one --target, limit to channel ID (repeatable)
|
||||
--salvage-only Merge stale .dce-temp exports only (no dry-run, no Discord scrape)
|
||||
|
||||
Environment:
|
||||
DCE_MIN_FREE_MB Minimum MiB free (default 1024 in verify-operator-ready)
|
||||
|
|
@ -69,6 +72,10 @@ main() {
|
|||
SKIP_DF=1
|
||||
shift
|
||||
;;
|
||||
--salvage-only)
|
||||
SALVAGE_ONLY=1
|
||||
shift
|
||||
;;
|
||||
--target)
|
||||
[[ $# -ge 2 ]] || die "Missing value for --target."
|
||||
TARGET=$2
|
||||
|
|
@ -114,17 +121,26 @@ main() {
|
|||
fi
|
||||
fi
|
||||
|
||||
local -a dry_run_args=(--dry-run --config "$CONFIG_PATH")
|
||||
[[ -n "$TARGET" ]] && dry_run_args+=(--target "$TARGET")
|
||||
dry_run_args+=("${CHANNEL_ARGS[@]}")
|
||||
"$DOCUMENTS_SCRAPE" "${dry_run_args[@]}"
|
||||
|
||||
local -a documents_args=(--config "$CONFIG_PATH")
|
||||
[[ -n "$TARGET" ]] && documents_args+=(--target "$TARGET")
|
||||
documents_args+=("${CHANNEL_ARGS[@]}")
|
||||
if (( SALVAGE_ONLY )); then
|
||||
documents_args+=(--salvage-only)
|
||||
"$DOCUMENTS_SCRAPE" "${documents_args[@]}"
|
||||
printf '\nHandoff complete (salvage-only). Next:\n'
|
||||
printf ' ./scripts/run-operator-validation.sh --salvage-before-scrape'
|
||||
else
|
||||
documents_args+=(--dry-run)
|
||||
"$DOCUMENTS_SCRAPE" "${documents_args[@]}"
|
||||
printf '\nHandoff complete. Safe to run:\n'
|
||||
printf ' ./scripts/run-documents-scrape.sh'
|
||||
fi
|
||||
[[ -n "$TARGET" ]] && printf ' --target %s' "$TARGET"
|
||||
((${#CHANNEL_ARGS[@]})) && printf ' %s' "${CHANNEL_ARGS[*]}"
|
||||
printf '\n'
|
||||
if (( ! SALVAGE_ONLY )); then
|
||||
printf ' ./scripts/setup-cron.sh --dry-run\n'
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
|
|
|
|||
|
|
@ -17,12 +17,13 @@ TARGET=""
|
|||
SYNC_GUI_FLAG=0
|
||||
DRY_RUN=0
|
||||
SALVAGE_BEFORE=0
|
||||
SALVAGE_ONLY=0
|
||||
CHANNEL_ARGS=()
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage:
|
||||
$(basename "$0") [--target NAME] [--channel ID] [--config PATH] [--sync-gui] [--dry-run] [--salvage-before-scrape]
|
||||
$(basename "$0") [--target NAME] [--channel ID] [--config PATH] [--sync-gui] [--dry-run] [--salvage-only] [--salvage-before-scrape]
|
||||
|
||||
End-to-end operator proof:
|
||||
operator-handoff → [optional salvage] → incremental scrape → prove-incremental-append
|
||||
|
|
@ -30,6 +31,7 @@ End-to-end operator proof:
|
|||
When --target is omitted, all enabled targets in the config are processed.
|
||||
|
||||
--channel ID With exactly one --target, limit scrape/prove to channel ID (repeatable)
|
||||
--salvage-only Handoff + merge stale .dce-temp exports only (no Discord scrape or prove)
|
||||
--salvage-before-scrape Merge stale .dce-temp exports before incremental scrape
|
||||
|
||||
Logs append to logs/operator-proof-<timestamp>.log
|
||||
|
|
@ -66,6 +68,10 @@ main() {
|
|||
SALVAGE_BEFORE=1
|
||||
shift
|
||||
;;
|
||||
--salvage-only)
|
||||
SALVAGE_ONLY=1
|
||||
shift
|
||||
;;
|
||||
--channel)
|
||||
[[ $# -ge 2 ]] || die "Missing value for --channel."
|
||||
CHANNEL_ARGS+=(--channel "$2")
|
||||
|
|
@ -83,6 +89,13 @@ main() {
|
|||
|
||||
[[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH"
|
||||
|
||||
if (( SALVAGE_ONLY == 1 && DRY_RUN == 1 )); then
|
||||
die "--salvage-only cannot be combined with --dry-run."
|
||||
fi
|
||||
if (( SALVAGE_ONLY == 1 && SALVAGE_BEFORE == 1 )); then
|
||||
die "--salvage-only and --salvage-before-scrape are mutually exclusive."
|
||||
fi
|
||||
|
||||
local -a targets=()
|
||||
if [[ -n "$TARGET" ]]; then
|
||||
targets=("$TARGET")
|
||||
|
|
@ -115,12 +128,17 @@ main() {
|
|||
local -a handoff_args=(--config "$CONFIG_PATH")
|
||||
[[ -n "$TARGET" ]] && handoff_args+=(--target "$TARGET")
|
||||
handoff_args+=("${CHANNEL_ARGS[@]}")
|
||||
(( SALVAGE_ONLY )) && handoff_args+=(--salvage-only)
|
||||
"$HANDOFF" "${handoff_args[@]}"
|
||||
|
||||
if (( DRY_RUN == 1 )); then
|
||||
printf '\nDry run complete (no Discord scrape).\n'
|
||||
exit 0
|
||||
fi
|
||||
if (( SALVAGE_ONLY == 1 )); then
|
||||
printf '\nSalvage-only proof complete (no Discord scrape or append proof).\n'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
for name in "${targets[@]}"; do
|
||||
printf '\n--- Target: %s ---\n' "$name"
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ SYNC_GUI="$REPO_ROOT/scripts/sync-token-from-gui.sh"
|
|||
VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
||||
DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh"
|
||||
AUDIT_JSON="$REPO_ROOT/scripts/audit-archive-json.sh"
|
||||
LOCK_STATUS="$REPO_ROOT/scripts/scrape-lock-status.sh"
|
||||
|
||||
DRY_RUN=0
|
||||
SKIP_SCRAPE=0
|
||||
|
|
@ -90,6 +91,16 @@ audit_targets() {
|
|||
(( failures == 0 ))
|
||||
}
|
||||
|
||||
ensure_scrape_lock_available() {
|
||||
if [[ "${DCE_SKIP_SCRAPE_LOCK:-0}" == "1" ]]; then
|
||||
return 0
|
||||
fi
|
||||
[[ -x "$LOCK_STATUS" ]] || return 0
|
||||
if ! "$LOCK_STATUS" --config "$CONFIG_PATH"; then
|
||||
die "Scrape lock is held; another scrape may be running. Inspect: $LOCK_STATUS --config $CONFIG_PATH"
|
||||
fi
|
||||
}
|
||||
|
||||
run_documents_scrape() {
|
||||
local -a scrape_args=(--config "$CONFIG_PATH")
|
||||
scrape_args+=("${CHANNEL_ARGS[@]}")
|
||||
|
|
@ -280,7 +291,10 @@ main() {
|
|||
|
||||
if (( SKIP_SCRAPE )); then
|
||||
log_step "Skip scrape requested."
|
||||
elif (( PER_TARGET )) && [[ -z "$TARGET" ]]; then
|
||||
else
|
||||
ensure_scrape_lock_available || failures=$((failures + 1))
|
||||
if (( failures == 0 )); then
|
||||
if (( PER_TARGET )) && [[ -z "$TARGET" ]]; then
|
||||
scrape_per_target || failures=$((failures + 1))
|
||||
else
|
||||
if run_documents_scrape; then
|
||||
|
|
@ -291,6 +305,8 @@ main() {
|
|||
failures=$((failures + 1))
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if (( failures > 0 )); then
|
||||
log_step "Operator validation failed ($failures step(s))."
|
||||
|
|
|
|||
|
|
@ -70,4 +70,25 @@ if ! grep -q 'Scrape lock status' <<<"$handoff_output"; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
set +e
|
||||
salvage_output=$(
|
||||
DCE_MIN_FREE_MB=0 \
|
||||
DCE_CONFIG_FILE="$CONFIG_PATH" \
|
||||
DCE_ENV_FILE="$ENV_PATH" \
|
||||
DCE_SKIP_SCRAPE_LOCK=1 \
|
||||
"$HANDOFF" --config "$CONFIG_PATH" --skip-df --salvage-only --target demo 2>&1
|
||||
)
|
||||
salvage_status=$?
|
||||
set -e
|
||||
|
||||
if [[ "$salvage_status" -ne 0 ]] || ! grep -q 'Handoff complete (salvage-only)' <<<"$salvage_output"; then
|
||||
printf 'operator-handoff --salvage-only failed (status=%s)\n' "$salvage_status" >&2
|
||||
printf '%s\n' "$salvage_output" >&2
|
||||
exit 1
|
||||
fi
|
||||
grep -q 'salvage completed' <<<"$salvage_output" || {
|
||||
printf 'operator-handoff --salvage-only missing salvage completed marker\n' >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
printf 'operator-handoff-smoke: ok\n'
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ PROOF="$REPO_ROOT/scripts/run-operator-proof.sh"
|
|||
TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-operator-proof-smoke.XXXXXX")
|
||||
CONFIG_PATH="$TMP_DIR/config.json"
|
||||
ENV_PATH="$TMP_DIR/scrape.env"
|
||||
mkdir -p "$TMP_DIR/logs"
|
||||
|
||||
cleanup() {
|
||||
rm -rf "$TMP_DIR"
|
||||
|
|
@ -54,4 +55,55 @@ grep -q 'Operator proof run plan' <<<"$output" || {
|
|||
exit 1
|
||||
}
|
||||
|
||||
set +e
|
||||
salvage_output=$(
|
||||
DCE_MIN_FREE_MB=0 \
|
||||
DCE_CONFIG_FILE="$CONFIG_PATH" \
|
||||
DCE_ENV_FILE="$ENV_PATH" \
|
||||
DCE_SKIP_SCRAPE_LOCK=1 \
|
||||
"$PROOF" --config "$CONFIG_PATH" --target demo --salvage-only 2>&1
|
||||
)
|
||||
salvage_status=$?
|
||||
set -e
|
||||
|
||||
if [[ "$salvage_status" -ne 0 ]] || ! grep -q 'Salvage-only proof complete' <<<"$salvage_output"; then
|
||||
printf 'run-operator-proof --salvage-only failed (status=%s)\n' "$salvage_status" >&2
|
||||
printf '%s\n' "$salvage_output" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
command -v flock >/dev/null 2>&1 && {
|
||||
LOCK_FILE="$TMP_DIR/archive/.dce-scrape.lock"
|
||||
HOLDER_PID=""
|
||||
(
|
||||
exec {lock_fd}>>"$LOCK_FILE"
|
||||
flock -n "$lock_fd" || exit 1
|
||||
sleep 120
|
||||
) &
|
||||
HOLDER_PID=$!
|
||||
sleep 0.2
|
||||
|
||||
set +e
|
||||
blocked_output=$(
|
||||
DCE_MIN_FREE_MB=0 \
|
||||
DCE_CONFIG_FILE="$CONFIG_PATH" \
|
||||
DCE_ENV_FILE="$ENV_PATH" \
|
||||
DCE_LOG_DIR="$TMP_DIR/logs" \
|
||||
"$REPO_ROOT/scripts/run-operator-validation.sh" \
|
||||
--salvage-only --target demo --config "$CONFIG_PATH" \
|
||||
--log-file "$TMP_DIR/logs/lock-blocked.log" 2>&1
|
||||
)
|
||||
blocked_status=$?
|
||||
set -e
|
||||
|
||||
kill "$HOLDER_PID" 2>/dev/null || true
|
||||
wait "$HOLDER_PID" 2>/dev/null || true
|
||||
|
||||
if [[ "$blocked_status" -eq 0 ]] || ! grep -q 'Scrape lock is held' <<<"$blocked_output"; then
|
||||
printf 'expected validation to fail when scrape lock held (status=%s)\n' "$blocked_status" >&2
|
||||
printf '%s\n' "$blocked_output" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
printf 'run-operator-proof-smoke: ok\n'
|
||||
|
|
|
|||
Loading…
Reference in a new issue