mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
feat(scrape): wire salvage flags through operator validation and proof
Expose --salvage-only and --salvage-before-scrape on run-operator-validation.sh and --salvage-before-scrape on run-operator-proof.sh so operators can merge stale .dce-temp exports before or instead of incremental Discord scrapes.
This commit is contained in:
parent
8468e34e37
commit
22915770e6
|
|
@ -0,0 +1,55 @@
|
||||||
|
---
|
||||||
|
title: "feat: Salvage flags on operator validation and proof"
|
||||||
|
type: feat
|
||||||
|
status: active
|
||||||
|
date: 2026-06-04
|
||||||
|
origin: /lfg — plan 054 added documents --salvage-only; operator-validation/proof still lack salvage entry points for yes_general catch-up
|
||||||
|
---
|
||||||
|
|
||||||
|
# feat: Salvage flags on operator validation and proof
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Add `--salvage-only` and `--salvage-before-scrape` to `run-operator-validation.sh` and `run-operator-proof.sh`, forwarding to `run-documents-scrape.sh --salvage-only` and the normal scrape path.
|
||||||
|
|
||||||
|
## Problem Frame
|
||||||
|
|
||||||
|
After stopping a long KotOR export, operators need:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/run-operator-validation.sh --salvage-only --target KotOR_discord_msgs --channel 221726893064454144
|
||||||
|
```
|
||||||
|
|
||||||
|
Plan 054 implemented salvage on documents-scrape, but validation/proof orchestrators do not expose it.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| ID | Requirement |
|
||||||
|
|----|-------------|
|
||||||
|
| R1 | `run-operator-validation.sh` accepts `--salvage-only` (salvage + audit, no Discord scrape) |
|
||||||
|
| R2 | `run-operator-validation.sh` accepts `--salvage-before-scrape` (salvage then scrape + audit) |
|
||||||
|
| R3 | Both flags forward `--target` / `--channel` to documents scrape |
|
||||||
|
| R4 | `--salvage-only` and `--salvage-before-scrape` are mutually exclusive |
|
||||||
|
| R5 | `run-operator-proof.sh` accepts `--salvage-before-scrape` and runs salvage before scrape+prove |
|
||||||
|
| R6 | Smokes cover validation salvage-only; `run-all-smokes.sh` passes |
|
||||||
|
|
||||||
|
## Implementation Units
|
||||||
|
|
||||||
|
### U1. Operator validation salvage flags
|
||||||
|
|
||||||
|
**Files:** `scripts/run-operator-validation.sh`, `scripts/tests/run-operator-validation-smoke.sh`
|
||||||
|
|
||||||
|
### U2. Operator proof salvage-before
|
||||||
|
|
||||||
|
**Files:** `scripts/run-operator-proof.sh`
|
||||||
|
|
||||||
|
### U3. Smoke gate
|
||||||
|
|
||||||
|
**Verification:** `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh`
|
||||||
|
|
||||||
|
## Scope Boundaries
|
||||||
|
|
||||||
|
### Deferred
|
||||||
|
|
||||||
|
- Killing stale PID 3868255 on host
|
||||||
|
- Live yes_general catch-up inside LFG
|
||||||
|
|
@ -16,19 +16,21 @@ source "$SCRIPT_DIR/lib/scrape-run-plan.sh"
|
||||||
TARGET=""
|
TARGET=""
|
||||||
SYNC_GUI_FLAG=0
|
SYNC_GUI_FLAG=0
|
||||||
DRY_RUN=0
|
DRY_RUN=0
|
||||||
|
SALVAGE_BEFORE=0
|
||||||
CHANNEL_ARGS=()
|
CHANNEL_ARGS=()
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage:
|
Usage:
|
||||||
$(basename "$0") [--target NAME] [--channel ID] [--config PATH] [--sync-gui] [--dry-run]
|
$(basename "$0") [--target NAME] [--channel ID] [--config PATH] [--sync-gui] [--dry-run] [--salvage-before-scrape]
|
||||||
|
|
||||||
End-to-end operator proof:
|
End-to-end operator proof:
|
||||||
operator-handoff → incremental scrape → prove-incremental-append
|
operator-handoff → [optional salvage] → incremental scrape → prove-incremental-append
|
||||||
|
|
||||||
When --target is omitted, all enabled targets in the config are processed.
|
When --target is omitted, all enabled targets in the config are processed.
|
||||||
|
|
||||||
--channel ID With exactly one --target, limit scrape/prove to channel ID (repeatable)
|
--channel ID With exactly one --target, limit scrape/prove to channel ID (repeatable)
|
||||||
|
--salvage-before-scrape Merge stale .dce-temp exports before incremental scrape
|
||||||
|
|
||||||
Logs append to logs/operator-proof-<timestamp>.log
|
Logs append to logs/operator-proof-<timestamp>.log
|
||||||
EOF
|
EOF
|
||||||
|
|
@ -60,6 +62,10 @@ main() {
|
||||||
DRY_RUN=1
|
DRY_RUN=1
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
|
--salvage-before-scrape)
|
||||||
|
SALVAGE_BEFORE=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
--channel)
|
--channel)
|
||||||
[[ $# -ge 2 ]] || die "Missing value for --channel."
|
[[ $# -ge 2 ]] || die "Missing value for --channel."
|
||||||
CHANNEL_ARGS+=(--channel "$2")
|
CHANNEL_ARGS+=(--channel "$2")
|
||||||
|
|
@ -120,6 +126,13 @@ main() {
|
||||||
printf '\n--- Target: %s ---\n' "$name"
|
printf '\n--- Target: %s ---\n' "$name"
|
||||||
local -a scrape_args=(--config "$CONFIG_PATH" --target "$name")
|
local -a scrape_args=(--config "$CONFIG_PATH" --target "$name")
|
||||||
scrape_args+=("${CHANNEL_ARGS[@]}")
|
scrape_args+=("${CHANNEL_ARGS[@]}")
|
||||||
|
if (( SALVAGE_BEFORE )); then
|
||||||
|
if ! "$DOCUMENTS" "${scrape_args[@]}" --salvage-only; then
|
||||||
|
failed=$((failed + 1))
|
||||||
|
printf 'Operator proof FAILED for %s (salvage-before)\n' "$name" >&2
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
fi
|
||||||
if "$DOCUMENTS" "${scrape_args[@]}" && "$PROVE" "${scrape_args[@]}"; then
|
if "$DOCUMENTS" "${scrape_args[@]}" && "$PROVE" "${scrape_args[@]}"; then
|
||||||
succeeded=$((succeeded + 1))
|
succeeded=$((succeeded + 1))
|
||||||
printf 'Operator proof passed for %s\n' "$name"
|
printf 'Operator proof passed for %s\n' "$name"
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,8 @@ AUDIT_JSON="$REPO_ROOT/scripts/audit-archive-json.sh"
|
||||||
|
|
||||||
DRY_RUN=0
|
DRY_RUN=0
|
||||||
SKIP_SCRAPE=0
|
SKIP_SCRAPE=0
|
||||||
|
SALVAGE_ONLY=0
|
||||||
|
SALVAGE_BEFORE=0
|
||||||
SYNC_GUI_FLAG=0
|
SYNC_GUI_FLAG=0
|
||||||
PER_TARGET=0
|
PER_TARGET=0
|
||||||
CONTINUE_ON_ERROR=0
|
CONTINUE_ON_ERROR=0
|
||||||
|
|
@ -31,6 +33,8 @@ End-to-end operator validation with timestamped log:
|
||||||
Options:
|
Options:
|
||||||
--dry-run Readiness + archives only (no Discord scrape)
|
--dry-run Readiness + archives only (no Discord scrape)
|
||||||
--skip-scrape Readiness only (no scrape, no audit loop)
|
--skip-scrape Readiness only (no scrape, no audit loop)
|
||||||
|
--salvage-only Merge stale .dce-temp exports only, then audit (no Discord scrape)
|
||||||
|
--salvage-before-scrape Run salvage-only pass before incremental scrape
|
||||||
--sync-gui Run sync-token-from-gui.sh --force before checks
|
--sync-gui Run sync-token-from-gui.sh --force before checks
|
||||||
--target NAME Limit scrape/audit to one configured target
|
--target NAME Limit scrape/audit to one configured target
|
||||||
--channel ID With exactly one --target, limit scrape to channel ID (repeatable)
|
--channel ID With exactly one --target, limit scrape to channel ID (repeatable)
|
||||||
|
|
@ -86,19 +90,75 @@ audit_targets() {
|
||||||
(( failures == 0 ))
|
(( failures == 0 ))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
run_documents_scrape() {
|
||||||
|
local -a scrape_args=(--config "$CONFIG_PATH")
|
||||||
|
scrape_args+=("${CHANNEL_ARGS[@]}")
|
||||||
|
[[ -n "$TARGET" ]] && scrape_args+=(--target "$TARGET")
|
||||||
|
|
||||||
|
if (( SALVAGE_ONLY )); then
|
||||||
|
run_step "run-documents-scrape (salvage-only)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --salvage-only
|
||||||
|
return $?
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( DRY_RUN )); then
|
||||||
|
run_step "run-documents-scrape (dry-run)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --dry-run
|
||||||
|
return $?
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( SALVAGE_BEFORE )); then
|
||||||
|
run_step "run-documents-scrape (salvage-before)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --salvage-only || return $?
|
||||||
|
fi
|
||||||
|
|
||||||
|
run_step "run-documents-scrape" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}"
|
||||||
|
}
|
||||||
|
|
||||||
scrape_per_target() {
|
scrape_per_target() {
|
||||||
local name failures=0 ok=0
|
local name failures=0 ok=0
|
||||||
local -a scrape_args=(--config "$CONFIG_PATH")
|
local -a scrape_args=(--config "$CONFIG_PATH")
|
||||||
scrape_args+=("${CHANNEL_ARGS[@]}")
|
scrape_args+=("${CHANNEL_ARGS[@]}")
|
||||||
local -a target_names=()
|
local -a target_names=()
|
||||||
if (( DRY_RUN )); then
|
|
||||||
scrape_args+=(--dry-run)
|
|
||||||
fi
|
|
||||||
mapfile -t target_names < <(enabled_targets)
|
mapfile -t target_names < <(enabled_targets)
|
||||||
for name in "${target_names[@]}"; do
|
for name in "${target_names[@]}"; do
|
||||||
[[ -n "$name" ]] || continue
|
[[ -n "$name" ]] || continue
|
||||||
log_step "Per-target begin: $name"
|
log_step "Per-target begin: $name"
|
||||||
if ! run_step "run-documents-scrape ($name)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --target "$name"; then
|
local -a per_args=("${scrape_args[@]}" --target "$name")
|
||||||
|
if (( SALVAGE_ONLY )); then
|
||||||
|
if ! run_step "run-documents-scrape ($name salvage-only)" "$DOCUMENTS_SCRAPE" "${per_args[@]}" --salvage-only; then
|
||||||
|
log_step "Per-target failed: $name (salvage-only)"
|
||||||
|
failures=$((failures + 1))
|
||||||
|
(( CONTINUE_ON_ERROR )) || return 1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if run_step "audit-archive-json ($name)" "$AUDIT_JSON" --config "$CONFIG_PATH" --target "$name"; then
|
||||||
|
log_step "Per-target done: $name (salvage-only ok)"
|
||||||
|
ok=$((ok + 1))
|
||||||
|
else
|
||||||
|
log_step "Per-target failed: $name (audit)"
|
||||||
|
failures=$((failures + 1))
|
||||||
|
(( CONTINUE_ON_ERROR )) || return 1
|
||||||
|
fi
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if (( DRY_RUN )); then
|
||||||
|
if ! run_step "run-documents-scrape ($name)" "$DOCUMENTS_SCRAPE" "${per_args[@]}" --dry-run; then
|
||||||
|
log_step "Per-target failed: $name (dry-run)"
|
||||||
|
failures=$((failures + 1))
|
||||||
|
(( CONTINUE_ON_ERROR )) || return 1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
log_step "Per-target done: $name (dry-run)"
|
||||||
|
ok=$((ok + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if (( SALVAGE_BEFORE )); then
|
||||||
|
if ! run_step "run-documents-scrape ($name salvage)" "$DOCUMENTS_SCRAPE" "${per_args[@]}" --salvage-only; then
|
||||||
|
log_step "Per-target failed: $name (salvage-before)"
|
||||||
|
failures=$((failures + 1))
|
||||||
|
(( CONTINUE_ON_ERROR )) || return 1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if ! run_step "run-documents-scrape ($name)" "$DOCUMENTS_SCRAPE" "${per_args[@]}"; then
|
||||||
log_step "Per-target failed: $name (scrape)"
|
log_step "Per-target failed: $name (scrape)"
|
||||||
failures=$((failures + 1))
|
failures=$((failures + 1))
|
||||||
if (( CONTINUE_ON_ERROR == 0 )); then
|
if (( CONTINUE_ON_ERROR == 0 )); then
|
||||||
|
|
@ -137,6 +197,14 @@ main() {
|
||||||
SKIP_SCRAPE=1
|
SKIP_SCRAPE=1
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
|
--salvage-only)
|
||||||
|
SALVAGE_ONLY=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--salvage-before-scrape)
|
||||||
|
SALVAGE_BEFORE=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
--sync-gui)
|
--sync-gui)
|
||||||
SYNC_GUI_FLAG=1
|
SYNC_GUI_FLAG=1
|
||||||
shift
|
shift
|
||||||
|
|
@ -179,6 +247,13 @@ main() {
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
if (( SALVAGE_ONLY == 1 && SALVAGE_BEFORE == 1 )); then
|
||||||
|
die "--salvage-only and --salvage-before-scrape are mutually exclusive."
|
||||||
|
fi
|
||||||
|
if (( SALVAGE_ONLY == 1 && DRY_RUN == 1 )); then
|
||||||
|
die "--salvage-only cannot be combined with --dry-run."
|
||||||
|
fi
|
||||||
|
|
||||||
mkdir -p "$LOG_DIR"
|
mkdir -p "$LOG_DIR"
|
||||||
if [[ -z "$LOG_FILE" ]]; then
|
if [[ -z "$LOG_FILE" ]]; then
|
||||||
LOG_FILE="$LOG_DIR/operator-validation-$(date -u +%Y%m%dT%H%M%SZ).log"
|
LOG_FILE="$LOG_DIR/operator-validation-$(date -u +%Y%m%dT%H%M%SZ).log"
|
||||||
|
|
@ -192,6 +267,8 @@ main() {
|
||||||
if [[ -n "$TARGET" ]]; then
|
if [[ -n "$TARGET" ]]; then
|
||||||
log_step "Targets: $TARGET"
|
log_step "Targets: $TARGET"
|
||||||
((${#CHANNEL_ARGS[@]})) && log_step "Scrape channel filter: ${CHANNEL_ARGS[*]}"
|
((${#CHANNEL_ARGS[@]})) && log_step "Scrape channel filter: ${CHANNEL_ARGS[*]}"
|
||||||
|
(( SALVAGE_ONLY )) && log_step "Mode: salvage-only"
|
||||||
|
(( SALVAGE_BEFORE )) && log_step "Mode: salvage-before-scrape"
|
||||||
else
|
else
|
||||||
log_step "Enabled targets: $(enabled_targets | paste -sd, -)"
|
log_step "Enabled targets: $(enabled_targets | paste -sd, -)"
|
||||||
fi
|
fi
|
||||||
|
|
@ -206,15 +283,12 @@ main() {
|
||||||
elif (( PER_TARGET )) && [[ -z "$TARGET" ]]; then
|
elif (( PER_TARGET )) && [[ -z "$TARGET" ]]; then
|
||||||
scrape_per_target || failures=$((failures + 1))
|
scrape_per_target || failures=$((failures + 1))
|
||||||
else
|
else
|
||||||
local -a scrape_args=(--config "$CONFIG_PATH")
|
if run_documents_scrape; then
|
||||||
scrape_args+=("${CHANNEL_ARGS[@]}")
|
if (( DRY_RUN == 0 && failures == 0 )); then
|
||||||
[[ -n "$TARGET" ]] && scrape_args+=(--target "$TARGET")
|
audit_targets || failures=$((failures + 1))
|
||||||
if (( DRY_RUN )); then
|
fi
|
||||||
scrape_args+=(--dry-run)
|
else
|
||||||
fi
|
failures=$((failures + 1))
|
||||||
run_step "run-documents-scrape" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" || failures=$((failures + 1))
|
|
||||||
if (( DRY_RUN == 0 && failures == 0 )); then
|
|
||||||
audit_targets || failures=$((failures + 1))
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -72,4 +72,23 @@ grep -q 'Operator validation finished successfully' "$LOG_DIR/validation.log" ||
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SALVAGE_LOG="$LOG_DIR/salvage-validation.log"
|
||||||
|
DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
|
||||||
|
DCE_LOG_DIR="$LOG_DIR" DCE_SKIP_SCRAPE_LOCK=1 \
|
||||||
|
"$RUNNER" --salvage-only --target demo --config "$CONFIG_PATH" --log-file "$SALVAGE_LOG"
|
||||||
|
|
||||||
|
grep -q 'Mode: salvage-only' "$SALVAGE_LOG" || {
|
||||||
|
printf 'ERROR: salvage-only mode not logged\n' >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
grep -q 'run-documents-scrape (salvage-only)' "$SALVAGE_LOG" || {
|
||||||
|
printf 'ERROR: salvage-only documents scrape step missing\n' >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
grep -q 'Operator validation finished successfully' "$SALVAGE_LOG" || {
|
||||||
|
printf 'ERROR: salvage-only validation did not succeed\n' >&2
|
||||||
|
cat "$SALVAGE_LOG" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
printf 'run-operator-validation-smoke: ok\n'
|
printf 'run-operator-validation-smoke: ok\n'
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue