mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
feat(scrape): wire salvage flags through operator validation and proof
Expose --salvage-only and --salvage-before-scrape on run-operator-validation.sh and --salvage-before-scrape on run-operator-proof.sh so operators can merge stale .dce-temp exports before or instead of incremental Discord scrapes.
This commit is contained in:
parent
8468e34e37
commit
22915770e6
|
|
@ -0,0 +1,55 @@
|
|||
---
|
||||
title: "feat: Salvage flags on operator validation and proof"
|
||||
type: feat
|
||||
status: active
|
||||
date: 2026-06-04
|
||||
origin: /lfg — plan 054 added documents --salvage-only; operator-validation/proof still lack salvage entry points for yes_general catch-up
|
||||
---
|
||||
|
||||
# feat: Salvage flags on operator validation and proof
|
||||
|
||||
## Summary
|
||||
|
||||
Add `--salvage-only` and `--salvage-before-scrape` to `run-operator-validation.sh` and `run-operator-proof.sh`, forwarding to `run-documents-scrape.sh --salvage-only` and the normal scrape path.
|
||||
|
||||
## Problem Frame
|
||||
|
||||
After stopping a long KotOR export, operators need:
|
||||
|
||||
```bash
|
||||
./scripts/run-operator-validation.sh --salvage-only --target KotOR_discord_msgs --channel 221726893064454144
|
||||
```
|
||||
|
||||
Plan 054 implemented salvage on documents-scrape, but validation/proof orchestrators do not expose it.
|
||||
|
||||
## Requirements
|
||||
|
||||
| ID | Requirement |
|
||||
|----|-------------|
|
||||
| R1 | `run-operator-validation.sh` accepts `--salvage-only` (salvage + audit, no Discord scrape) |
|
||||
| R2 | `run-operator-validation.sh` accepts `--salvage-before-scrape` (salvage then scrape + audit) |
|
||||
| R3 | Both flags forward `--target` / `--channel` to documents scrape |
|
||||
| R4 | `--salvage-only` and `--salvage-before-scrape` are mutually exclusive |
|
||||
| R5 | `run-operator-proof.sh` accepts `--salvage-before-scrape` and runs salvage before scrape+prove |
|
||||
| R6 | Smokes cover validation salvage-only; `run-all-smokes.sh` passes |
|
||||
|
||||
## Implementation Units
|
||||
|
||||
### U1. Operator validation salvage flags
|
||||
|
||||
**Files:** `scripts/run-operator-validation.sh`, `scripts/tests/run-operator-validation-smoke.sh`
|
||||
|
||||
### U2. Operator proof salvage-before
|
||||
|
||||
**Files:** `scripts/run-operator-proof.sh`
|
||||
|
||||
### U3. Smoke gate
|
||||
|
||||
**Verification:** `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh`
|
||||
|
||||
## Scope Boundaries
|
||||
|
||||
### Deferred
|
||||
|
||||
- Killing stale PID 3868255 on host
|
||||
- Live yes_general catch-up inside LFG
|
||||
|
|
@ -16,19 +16,21 @@ source "$SCRIPT_DIR/lib/scrape-run-plan.sh"
|
|||
TARGET=""
|
||||
SYNC_GUI_FLAG=0
|
||||
DRY_RUN=0
|
||||
SALVAGE_BEFORE=0
|
||||
CHANNEL_ARGS=()
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage:
|
||||
$(basename "$0") [--target NAME] [--channel ID] [--config PATH] [--sync-gui] [--dry-run]
|
||||
$(basename "$0") [--target NAME] [--channel ID] [--config PATH] [--sync-gui] [--dry-run] [--salvage-before-scrape]
|
||||
|
||||
End-to-end operator proof:
|
||||
operator-handoff → incremental scrape → prove-incremental-append
|
||||
operator-handoff → [optional salvage] → incremental scrape → prove-incremental-append
|
||||
|
||||
When --target is omitted, all enabled targets in the config are processed.
|
||||
|
||||
--channel ID With exactly one --target, limit scrape/prove to channel ID (repeatable)
|
||||
--salvage-before-scrape Merge stale .dce-temp exports before incremental scrape
|
||||
|
||||
Logs append to logs/operator-proof-<timestamp>.log
|
||||
EOF
|
||||
|
|
@ -60,6 +62,10 @@ main() {
|
|||
DRY_RUN=1
|
||||
shift
|
||||
;;
|
||||
--salvage-before-scrape)
|
||||
SALVAGE_BEFORE=1
|
||||
shift
|
||||
;;
|
||||
--channel)
|
||||
[[ $# -ge 2 ]] || die "Missing value for --channel."
|
||||
CHANNEL_ARGS+=(--channel "$2")
|
||||
|
|
@ -120,6 +126,13 @@ main() {
|
|||
printf '\n--- Target: %s ---\n' "$name"
|
||||
local -a scrape_args=(--config "$CONFIG_PATH" --target "$name")
|
||||
scrape_args+=("${CHANNEL_ARGS[@]}")
|
||||
if (( SALVAGE_BEFORE )); then
|
||||
if ! "$DOCUMENTS" "${scrape_args[@]}" --salvage-only; then
|
||||
failed=$((failed + 1))
|
||||
printf 'Operator proof FAILED for %s (salvage-before)\n' "$name" >&2
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
if "$DOCUMENTS" "${scrape_args[@]}" && "$PROVE" "${scrape_args[@]}"; then
|
||||
succeeded=$((succeeded + 1))
|
||||
printf 'Operator proof passed for %s\n' "$name"
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ AUDIT_JSON="$REPO_ROOT/scripts/audit-archive-json.sh"
|
|||
|
||||
DRY_RUN=0
|
||||
SKIP_SCRAPE=0
|
||||
SALVAGE_ONLY=0
|
||||
SALVAGE_BEFORE=0
|
||||
SYNC_GUI_FLAG=0
|
||||
PER_TARGET=0
|
||||
CONTINUE_ON_ERROR=0
|
||||
|
|
@ -31,6 +33,8 @@ End-to-end operator validation with timestamped log:
|
|||
Options:
|
||||
--dry-run Readiness + archives only (no Discord scrape)
|
||||
--skip-scrape Readiness only (no scrape, no audit loop)
|
||||
--salvage-only Merge stale .dce-temp exports only, then audit (no Discord scrape)
|
||||
--salvage-before-scrape Run salvage-only pass before incremental scrape
|
||||
--sync-gui Run sync-token-from-gui.sh --force before checks
|
||||
--target NAME Limit scrape/audit to one configured target
|
||||
--channel ID With exactly one --target, limit scrape to channel ID (repeatable)
|
||||
|
|
@ -86,19 +90,75 @@ audit_targets() {
|
|||
(( failures == 0 ))
|
||||
}
|
||||
|
||||
run_documents_scrape() {
|
||||
local -a scrape_args=(--config "$CONFIG_PATH")
|
||||
scrape_args+=("${CHANNEL_ARGS[@]}")
|
||||
[[ -n "$TARGET" ]] && scrape_args+=(--target "$TARGET")
|
||||
|
||||
if (( SALVAGE_ONLY )); then
|
||||
run_step "run-documents-scrape (salvage-only)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --salvage-only
|
||||
return $?
|
||||
fi
|
||||
|
||||
if (( DRY_RUN )); then
|
||||
run_step "run-documents-scrape (dry-run)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --dry-run
|
||||
return $?
|
||||
fi
|
||||
|
||||
if (( SALVAGE_BEFORE )); then
|
||||
run_step "run-documents-scrape (salvage-before)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --salvage-only || return $?
|
||||
fi
|
||||
|
||||
run_step "run-documents-scrape" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}"
|
||||
}
|
||||
|
||||
scrape_per_target() {
|
||||
local name failures=0 ok=0
|
||||
local -a scrape_args=(--config "$CONFIG_PATH")
|
||||
scrape_args+=("${CHANNEL_ARGS[@]}")
|
||||
local -a target_names=()
|
||||
if (( DRY_RUN )); then
|
||||
scrape_args+=(--dry-run)
|
||||
fi
|
||||
mapfile -t target_names < <(enabled_targets)
|
||||
for name in "${target_names[@]}"; do
|
||||
[[ -n "$name" ]] || continue
|
||||
log_step "Per-target begin: $name"
|
||||
if ! run_step "run-documents-scrape ($name)" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" --target "$name"; then
|
||||
local -a per_args=("${scrape_args[@]}" --target "$name")
|
||||
if (( SALVAGE_ONLY )); then
|
||||
if ! run_step "run-documents-scrape ($name salvage-only)" "$DOCUMENTS_SCRAPE" "${per_args[@]}" --salvage-only; then
|
||||
log_step "Per-target failed: $name (salvage-only)"
|
||||
failures=$((failures + 1))
|
||||
(( CONTINUE_ON_ERROR )) || return 1
|
||||
continue
|
||||
fi
|
||||
if run_step "audit-archive-json ($name)" "$AUDIT_JSON" --config "$CONFIG_PATH" --target "$name"; then
|
||||
log_step "Per-target done: $name (salvage-only ok)"
|
||||
ok=$((ok + 1))
|
||||
else
|
||||
log_step "Per-target failed: $name (audit)"
|
||||
failures=$((failures + 1))
|
||||
(( CONTINUE_ON_ERROR )) || return 1
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
if (( DRY_RUN )); then
|
||||
if ! run_step "run-documents-scrape ($name)" "$DOCUMENTS_SCRAPE" "${per_args[@]}" --dry-run; then
|
||||
log_step "Per-target failed: $name (dry-run)"
|
||||
failures=$((failures + 1))
|
||||
(( CONTINUE_ON_ERROR )) || return 1
|
||||
continue
|
||||
fi
|
||||
log_step "Per-target done: $name (dry-run)"
|
||||
ok=$((ok + 1))
|
||||
continue
|
||||
fi
|
||||
if (( SALVAGE_BEFORE )); then
|
||||
if ! run_step "run-documents-scrape ($name salvage)" "$DOCUMENTS_SCRAPE" "${per_args[@]}" --salvage-only; then
|
||||
log_step "Per-target failed: $name (salvage-before)"
|
||||
failures=$((failures + 1))
|
||||
(( CONTINUE_ON_ERROR )) || return 1
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
if ! run_step "run-documents-scrape ($name)" "$DOCUMENTS_SCRAPE" "${per_args[@]}"; then
|
||||
log_step "Per-target failed: $name (scrape)"
|
||||
failures=$((failures + 1))
|
||||
if (( CONTINUE_ON_ERROR == 0 )); then
|
||||
|
|
@ -137,6 +197,14 @@ main() {
|
|||
SKIP_SCRAPE=1
|
||||
shift
|
||||
;;
|
||||
--salvage-only)
|
||||
SALVAGE_ONLY=1
|
||||
shift
|
||||
;;
|
||||
--salvage-before-scrape)
|
||||
SALVAGE_BEFORE=1
|
||||
shift
|
||||
;;
|
||||
--sync-gui)
|
||||
SYNC_GUI_FLAG=1
|
||||
shift
|
||||
|
|
@ -179,6 +247,13 @@ main() {
|
|||
esac
|
||||
done
|
||||
|
||||
if (( SALVAGE_ONLY == 1 && SALVAGE_BEFORE == 1 )); then
|
||||
die "--salvage-only and --salvage-before-scrape are mutually exclusive."
|
||||
fi
|
||||
if (( SALVAGE_ONLY == 1 && DRY_RUN == 1 )); then
|
||||
die "--salvage-only cannot be combined with --dry-run."
|
||||
fi
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
if [[ -z "$LOG_FILE" ]]; then
|
||||
LOG_FILE="$LOG_DIR/operator-validation-$(date -u +%Y%m%dT%H%M%SZ).log"
|
||||
|
|
@ -192,6 +267,8 @@ main() {
|
|||
if [[ -n "$TARGET" ]]; then
|
||||
log_step "Targets: $TARGET"
|
||||
((${#CHANNEL_ARGS[@]})) && log_step "Scrape channel filter: ${CHANNEL_ARGS[*]}"
|
||||
(( SALVAGE_ONLY )) && log_step "Mode: salvage-only"
|
||||
(( SALVAGE_BEFORE )) && log_step "Mode: salvage-before-scrape"
|
||||
else
|
||||
log_step "Enabled targets: $(enabled_targets | paste -sd, -)"
|
||||
fi
|
||||
|
|
@ -206,15 +283,12 @@ main() {
|
|||
elif (( PER_TARGET )) && [[ -z "$TARGET" ]]; then
|
||||
scrape_per_target || failures=$((failures + 1))
|
||||
else
|
||||
local -a scrape_args=(--config "$CONFIG_PATH")
|
||||
scrape_args+=("${CHANNEL_ARGS[@]}")
|
||||
[[ -n "$TARGET" ]] && scrape_args+=(--target "$TARGET")
|
||||
if (( DRY_RUN )); then
|
||||
scrape_args+=(--dry-run)
|
||||
fi
|
||||
run_step "run-documents-scrape" "$DOCUMENTS_SCRAPE" "${scrape_args[@]}" || failures=$((failures + 1))
|
||||
if (( DRY_RUN == 0 && failures == 0 )); then
|
||||
audit_targets || failures=$((failures + 1))
|
||||
if run_documents_scrape; then
|
||||
if (( DRY_RUN == 0 && failures == 0 )); then
|
||||
audit_targets || failures=$((failures + 1))
|
||||
fi
|
||||
else
|
||||
failures=$((failures + 1))
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
|
|||
|
|
@ -72,4 +72,23 @@ grep -q 'Operator validation finished successfully' "$LOG_DIR/validation.log" ||
|
|||
exit 1
|
||||
}
|
||||
|
||||
SALVAGE_LOG="$LOG_DIR/salvage-validation.log"
|
||||
DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
|
||||
DCE_LOG_DIR="$LOG_DIR" DCE_SKIP_SCRAPE_LOCK=1 \
|
||||
"$RUNNER" --salvage-only --target demo --config "$CONFIG_PATH" --log-file "$SALVAGE_LOG"
|
||||
|
||||
grep -q 'Mode: salvage-only' "$SALVAGE_LOG" || {
|
||||
printf 'ERROR: salvage-only mode not logged\n' >&2
|
||||
exit 1
|
||||
}
|
||||
grep -q 'run-documents-scrape (salvage-only)' "$SALVAGE_LOG" || {
|
||||
printf 'ERROR: salvage-only documents scrape step missing\n' >&2
|
||||
exit 1
|
||||
}
|
||||
grep -q 'Operator validation finished successfully' "$SALVAGE_LOG" || {
|
||||
printf 'ERROR: salvage-only validation did not succeed\n' >&2
|
||||
cat "$SALVAGE_LOG" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
printf 'run-operator-validation-smoke: ok\n'
|
||||
|
|
|
|||
Loading…
Reference in a new issue