diff --git a/docs/plans/2026-06-04-055-feat-operator-validation-salvage-plan.md b/docs/plans/2026-06-04-055-feat-operator-validation-salvage-plan.md new file mode 100644 index 00000000..2a410690 --- /dev/null +++ b/docs/plans/2026-06-04-055-feat-operator-validation-salvage-plan.md @@ -0,0 +1,55 @@ +--- +title: "feat: Salvage flags on operator validation and proof" +type: feat +status: active +date: 2026-06-04 +origin: /lfg — plan 054 added documents --salvage-only; operator-validation/proof still lack salvage entry points for yes_general catch-up +--- + +# feat: Salvage flags on operator validation and proof + +## Summary + +Add `--salvage-only` and `--salvage-before-scrape` to `run-operator-validation.sh` and `run-operator-proof.sh`, forwarding to `run-documents-scrape.sh --salvage-only` and the normal scrape path. + +## Problem Frame + +After stopping a long KotOR export, operators need: + +```bash +./scripts/run-operator-validation.sh --salvage-only --target KotOR_discord_msgs --channel 221726893064454144 +``` + +Plan 054 implemented salvage on documents-scrape, but validation/proof orchestrators do not expose it. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `run-operator-validation.sh` accepts `--salvage-only` (salvage + audit, no Discord scrape) | +| R2 | `run-operator-validation.sh` accepts `--salvage-before-scrape` (salvage then scrape + audit) | +| R3 | Both flags forward `--target` / `--channel` to documents scrape | +| R4 | `--salvage-only` and `--salvage-before-scrape` are mutually exclusive | +| R5 | `run-operator-proof.sh` accepts `--salvage-before-scrape` and runs salvage before scrape+prove | +| R6 | Smokes cover validation salvage-only; `run-all-smokes.sh` passes | + +## Implementation Units + +### U1. Operator validation salvage flags + +**Files:** `scripts/run-operator-validation.sh`, `scripts/tests/run-operator-validation-smoke.sh` + +### U2. Operator proof salvage-before + +**Files:** `scripts/run-operator-proof.sh` + +### U3. Smoke gate + +**Verification:** `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` + +## Scope Boundaries + +### Deferred + +- Killing stale PID 3868255 on host +- Live yes_general catch-up inside LFG diff --git a/scripts/run-operator-proof.sh b/scripts/run-operator-proof.sh index 3b14a0ab..007b198f 100755 --- a/scripts/run-operator-proof.sh +++ b/scripts/run-operator-proof.sh @@ -16,19 +16,21 @@ source "$SCRIPT_DIR/lib/scrape-run-plan.sh" TARGET="" SYNC_GUI_FLAG=0 DRY_RUN=0 +SALVAGE_BEFORE=0 CHANNEL_ARGS=() usage() { cat <.log EOF @@ -60,6 +62,10 @@ main() { DRY_RUN=1 shift ;; + --salvage-before-scrape) + SALVAGE_BEFORE=1 + shift + ;; --channel) [[ $# -ge 2 ]] || die "Missing value for --channel." CHANNEL_ARGS+=(--channel "$2") @@ -120,6 +126,13 @@ main() { printf '\n--- Target: %s ---\n' "$name" local -a scrape_args=(--config "$CONFIG_PATH" --target "$name") scrape_args+=("${CHANNEL_ARGS[@]}") + if (( SALVAGE_BEFORE )); then + if ! "$DOCUMENTS" "${scrape_args[@]}" --salvage-only; then + failed=$((failed + 1)) + printf 'Operator proof FAILED for %s (salvage-before)\n' "$name" >&2 + continue + fi + fi if "$DOCUMENTS" "${scrape_args[@]}" && "$PROVE" "${scrape_args[@]}"; then succeeded=$((succeeded + 1)) printf 'Operator proof passed for %s\n' "$name" diff --git a/scripts/run-operator-validation.sh b/scripts/run-operator-validation.sh index 3818c91c..a4c0ed2f 100755 --- a/scripts/run-operator-validation.sh +++ b/scripts/run-operator-validation.sh @@ -13,6 +13,8 @@ AUDIT_JSON="$REPO_ROOT/scripts/audit-archive-json.sh" DRY_RUN=0 SKIP_SCRAPE=0 +SALVAGE_ONLY=0 +SALVAGE_BEFORE=0 SYNC_GUI_FLAG=0 PER_TARGET=0 CONTINUE_ON_ERROR=0 @@ -31,6 +33,8 @@ End-to-end operator validation with timestamped log: Options: --dry-run Readiness + archives only (no Discord scrape) --skip-scrape Readiness only (no scrape, no audit loop) + --salvage-only Merge stale .dce-temp exports only, then audit (no Discord scrape) + --salvage-before-scrape Run salvage-only pass before incremental scrape --sync-gui Run sync-token-from-gui.sh --force before checks --target NAME Limit scrape/audit to one configured target --channel ID With exactly one --target, limit scrape to channel ID (repeatable) @@ -86,19 +90,75 @@ audit_targets() { (( failures == 0 )) } +run_documents_scrape() { + local -a scrape_args=(--config "$CONFIG_PATH") + scrape_args+=("${CHANNEL_ARGS[@]}") + [[ -n "$TARGET" ]] && scrape_args+=(--target "$TARGET") + + if (( SALVAGE_ONLY )); then + run_step "run-documents-scrape (salvage-only)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --salvage-only + return $? + fi + + if (( DRY_RUN )); then + run_step "run-documents-scrape (dry-run)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --dry-run + return $? + fi + + if (( SALVAGE_BEFORE )); then + run_step "run-documents-scrape (salvage-before)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --salvage-only || return $? + fi + + run_step "run-documents-scrape" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" +} + scrape_per_target() { local name failures=0 ok=0 local -a scrape_args=(--config "$CONFIG_PATH") scrape_args+=("${CHANNEL_ARGS[@]}") local -a target_names=() - if (( DRY_RUN )); then - scrape_args+=(--dry-run) - fi mapfile -t target_names < <(enabled_targets) for name in "${target_names[@]}"; do [[ -n "$name" ]] || continue log_step "Per-target begin: $name" - if ! run_step "run-documents-scrape ($name)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --target "$name"; then + local -a per_args=("${scrape_args[@]}" --target "$name") + if (( SALVAGE_ONLY )); then + if ! run_step "run-documents-scrape ($name salvage-only)" "$DOCUMENTS_SCRAPE" "${per_args[@]}" --salvage-only; then + log_step "Per-target failed: $name (salvage-only)" + failures=$((failures + 1)) + (( CONTINUE_ON_ERROR )) || return 1 + continue + fi + if run_step "audit-archive-json ($name)" "$AUDIT_JSON" --config "$CONFIG_PATH" --target "$name"; then + log_step "Per-target done: $name (salvage-only ok)" + ok=$((ok + 1)) + else + log_step "Per-target failed: $name (audit)" + failures=$((failures + 1)) + (( CONTINUE_ON_ERROR )) || return 1 + fi + continue + fi + if (( DRY_RUN )); then + if ! run_step "run-documents-scrape ($name)" "$DOCUMENTS_SCRAPE" "${per_args[@]}" --dry-run; then + log_step "Per-target failed: $name (dry-run)" + failures=$((failures + 1)) + (( CONTINUE_ON_ERROR )) || return 1 + continue + fi + log_step "Per-target done: $name (dry-run)" + ok=$((ok + 1)) + continue + fi + if (( SALVAGE_BEFORE )); then + if ! run_step "run-documents-scrape ($name salvage)" "$DOCUMENTS_SCRAPE" "${per_args[@]}" --salvage-only; then + log_step "Per-target failed: $name (salvage-before)" + failures=$((failures + 1)) + (( CONTINUE_ON_ERROR )) || return 1 + continue + fi + fi + if ! run_step "run-documents-scrape ($name)" "$DOCUMENTS_SCRAPE" "${per_args[@]}"; then log_step "Per-target failed: $name (scrape)" failures=$((failures + 1)) if (( CONTINUE_ON_ERROR == 0 )); then @@ -137,6 +197,14 @@ main() { SKIP_SCRAPE=1 shift ;; + --salvage-only) + SALVAGE_ONLY=1 + shift + ;; + --salvage-before-scrape) + SALVAGE_BEFORE=1 + shift + ;; --sync-gui) SYNC_GUI_FLAG=1 shift @@ -179,6 +247,13 @@ main() { esac done + if (( SALVAGE_ONLY == 1 && SALVAGE_BEFORE == 1 )); then + die "--salvage-only and --salvage-before-scrape are mutually exclusive." + fi + if (( SALVAGE_ONLY == 1 && DRY_RUN == 1 )); then + die "--salvage-only cannot be combined with --dry-run." + fi + mkdir -p "$LOG_DIR" if [[ -z "$LOG_FILE" ]]; then LOG_FILE="$LOG_DIR/operator-validation-$(date -u +%Y%m%dT%H%M%SZ).log" @@ -192,6 +267,8 @@ main() { if [[ -n "$TARGET" ]]; then log_step "Targets: $TARGET" ((${#CHANNEL_ARGS[@]})) && log_step "Scrape channel filter: ${CHANNEL_ARGS[*]}" + (( SALVAGE_ONLY )) && log_step "Mode: salvage-only" + (( SALVAGE_BEFORE )) && log_step "Mode: salvage-before-scrape" else log_step "Enabled targets: $(enabled_targets | paste -sd, -)" fi @@ -206,15 +283,12 @@ main() { elif (( PER_TARGET )) && [[ -z "$TARGET" ]]; then scrape_per_target || failures=$((failures + 1)) else - local -a scrape_args=(--config "$CONFIG_PATH") - scrape_args+=("${CHANNEL_ARGS[@]}") - [[ -n "$TARGET" ]] && scrape_args+=(--target "$TARGET") - if (( DRY_RUN )); then - scrape_args+=(--dry-run) - fi - run_step "run-documents-scrape" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" || failures=$((failures + 1)) - if (( DRY_RUN == 0 && failures == 0 )); then - audit_targets || failures=$((failures + 1)) + if run_documents_scrape; then + if (( DRY_RUN == 0 && failures == 0 )); then + audit_targets || failures=$((failures + 1)) + fi + else + failures=$((failures + 1)) fi fi diff --git a/scripts/tests/run-operator-validation-smoke.sh b/scripts/tests/run-operator-validation-smoke.sh index ddb6d901..2cdd3f90 100755 --- a/scripts/tests/run-operator-validation-smoke.sh +++ b/scripts/tests/run-operator-validation-smoke.sh @@ -72,4 +72,23 @@ grep -q 'Operator validation finished successfully' "$LOG_DIR/validation.log" || exit 1 } +SALVAGE_LOG="$LOG_DIR/salvage-validation.log" +DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \ + DCE_LOG_DIR="$LOG_DIR" DCE_SKIP_SCRAPE_LOCK=1 \ + "$RUNNER" --salvage-only --target demo --config "$CONFIG_PATH" --log-file "$SALVAGE_LOG" + +grep -q 'Mode: salvage-only' "$SALVAGE_LOG" || { + printf 'ERROR: salvage-only mode not logged\n' >&2 + exit 1 +} +grep -q 'run-documents-scrape (salvage-only)' "$SALVAGE_LOG" || { + printf 'ERROR: salvage-only documents scrape step missing\n' >&2 + exit 1 +} +grep -q 'Operator validation finished successfully' "$SALVAGE_LOG" || { + printf 'ERROR: salvage-only validation did not succeed\n' >&2 + cat "$SALVAGE_LOG" >&2 + exit 1 +} + printf 'run-operator-validation-smoke: ok\n'