From dbc887d81c93548eae251c2f1cd6e5589bb3f7cb Mon Sep 17 00:00:00 2001 From: Copilot Date: Wed, 3 Jun 2026 10:35:48 -0500 Subject: [PATCH] feat(scrape): JSON summary export for operator proof runs Auto-enable DCE_RUN_SUMMARY_* when proof scrapes, support --log-file, and recover summary JSON from the teed proof log when file write fails. --- ...3-feat-operator-proof-json-summary-plan.md | 57 +++++++++++++++++++ docs/recurring-scrape-merge-readiness.md | 2 + docs/recurring-scrape-operator-checklist.md | 1 + scripts/run-operator-proof.sh | 36 +++++++++++- scripts/tests/run-operator-proof-smoke.sh | 12 ++++ 5 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 docs/plans/2026-06-04-073-feat-operator-proof-json-summary-plan.md diff --git a/docs/plans/2026-06-04-073-feat-operator-proof-json-summary-plan.md b/docs/plans/2026-06-04-073-feat-operator-proof-json-summary-plan.md new file mode 100644 index 00000000..2f57abf0 --- /dev/null +++ b/docs/plans/2026-06-04-073-feat-operator-proof-json-summary-plan.md @@ -0,0 +1,57 @@ +--- +title: "feat: Operator proof JSON scrape summary export" +type: feat +status: complete +date: 2026-06-04 +origin: /lfg — plan 072 deferred operator-proof JSON summary parity with validation +--- + +# feat: Operator proof JSON scrape summary export + +## Summary + +Mirror operator-validation JSON summary behavior in `run-operator-proof.sh`: auto-enable `DCE_RUN_SUMMARY_*` when scraping, write `*.summary.json` beside the proof log, and recover from the teed log when the file is missing. + +## Problem Frame + +Validation and host runner now emit machine-readable scrape totals (plans 069–072). `run-operator-proof.sh` is the one-target handoff → scrape → prove path but still only produces human-readable logs. Operators running KotOR yes_general proof cannot get `.summary.json` without switching to validation. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | Optional `--log-file PATH`; default `logs/operator-proof-.log` | +| R2 | When scraping (not dry-run, not salvage-only), export `DCE_RUN_SUMMARY_JSON=1` and `*.summary.json` beside log | +| R3 | Log `JSON summary file:` at proof start when export enabled | +| R4 | After tee, recover summary via `recover_json_summary_if_missing` when file missing | +| R5 | Dry-run and salvage-only do not enable JSON export | +| R6 | `run-operator-proof-smoke.sh` asserts dry-run skips JSON summary | +| R7 | `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` → 22/22 | + +## Implementation Units + +### U1. Operator proof wiring + +**Files:** `scripts/run-operator-proof.sh`, `scripts/tests/run-operator-proof-smoke.sh` + +**Approach:** Match validation pattern; export env before tee; recovery after tee. + +### U2. Docs + +**Files:** `docs/recurring-scrape-merge-readiness.md`, `docs/recurring-scrape-operator-checklist.md` + +**Approach:** Plan 073 stamp; note `.summary.json` beside proof logs. + +## Verification + +```bash +DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh +``` + +## Scope Boundaries + +### Deferred + +- Live KotOR catch-up on host +- Per-target separate summary files in multi-target proof loops +- `print-scrape-summary.sh` CLI to pretty-print JSON diff --git a/docs/recurring-scrape-merge-readiness.md b/docs/recurring-scrape-merge-readiness.md index 5365bd0e..753e59fa 100644 --- a/docs/recurring-scrape-merge-readiness.md +++ b/docs/recurring-scrape-merge-readiness.md @@ -174,6 +174,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh \ **Plan 072 (2026-06-04):** Host runner recovers JSON summary from the captured compose run log before deleting the temp file. +**Plan 073 (2026-06-04):** Operator proof auto-writes `*.summary.json` beside proof log with tee-log recovery (parity with validation). + **Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom. ## CI note (fork PRs) diff --git a/docs/recurring-scrape-operator-checklist.md b/docs/recurring-scrape-operator-checklist.md index fb0a7c88..92722aad 100644 --- a/docs/recurring-scrape-operator-checklist.md +++ b/docs/recurring-scrape-operator-checklist.md @@ -56,6 +56,7 @@ Salvage then incremental scrape: ./scripts/run-documents-scrape.sh --salvage-before-scrape --target NAME [--channel ID] ./scripts/run-operator-validation.sh --salvage-before-scrape --target NAME [--channel ID] --log-file logs/scrape.log ./scripts/run-operator-proof.sh --salvage-before-scrape --sync-gui --target NAME +# When scraping, also writes logs/operator-proof-.summary.json beside the proof log ``` **KotOR yes_general** (`221726893064454144`): first catch-up after a 2021 archive cursor can take hours and may OOM; salvage preserved partials before retrying. Stop duplicate validation processes (MyBook vs Downloads checkouts share the same lock). `KotOR_discord_msgs` sets `container_memory: "8g"` in `scrape-targets.json` for single-target runs; override globally with `DCE_CONTAINER_MEMORY` in `scrape.env` if needed. Channel-scoped proof: diff --git a/scripts/run-operator-proof.sh b/scripts/run-operator-proof.sh index d5d19c8d..c3ad9d61 100755 --- a/scripts/run-operator-proof.sh +++ b/scripts/run-operator-proof.sh @@ -19,6 +19,7 @@ DRY_RUN=0 SALVAGE_BEFORE=0 SALVAGE_ONLY=0 CHANNEL_ARGS=() +LOG_FILE="" usage() { cat <.log +Logs append to logs/operator-proof-.log (or --log-file). When scraping, also writes +.summary.json unless DCE_RUN_SUMMARY_FILE is already set. EOF } @@ -77,6 +80,11 @@ main() { CHANNEL_ARGS+=(--channel "$2") shift 2 ;; + --log-file) + [[ $# -ge 2 ]] || die "Missing value for --log-file." + LOG_FILE=$2 + shift 2 + ;; --help|-h) usage exit 0 @@ -106,7 +114,20 @@ main() { mkdir -p "$LOG_DIR" local log_file - log_file="$LOG_DIR/operator-proof-$(date -u +%Y%m%dT%H%M%SZ).log" + if [[ -n "$LOG_FILE" ]]; then + log_file="$LOG_FILE" + else + log_file="$LOG_DIR/operator-proof-$(date -u +%Y%m%dT%H%M%SZ).log" + fi + + local export_json_summary=0 + if (( DRY_RUN == 0 && SALVAGE_ONLY == 0 )); then + export_json_summary=1 + export DCE_RUN_SUMMARY_JSON=1 + if [[ -z "${DCE_RUN_SUMMARY_FILE:-}" ]]; then + export DCE_RUN_SUMMARY_FILE="${log_file%.log}.summary.json" + fi + fi local failed=0 succeeded=0 name @@ -118,6 +139,9 @@ main() { fi printf 'config: %s\n' "$CONFIG_PATH" print_scrape_config_plan "$CONFIG_PATH" "Operator proof" "${targets[@]}" + if (( export_json_summary )); then + printf 'JSON summary file: %s\n' "${DCE_RUN_SUMMARY_FILE:-}" + fi printf 'started: %s\n\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" if (( SYNC_GUI_FLAG == 1 )); then @@ -165,6 +189,14 @@ main() { (( failed == 0 )) || exit 1 } 2>&1 | tee "$log_file" + if (( export_json_summary )) && [[ -n "${DCE_RUN_SUMMARY_FILE:-}" ]]; then + # shellcheck source=lib/scrape-summary-json.sh + source "$SCRIPT_DIR/lib/scrape-summary-json.sh" + if recover_json_summary_if_missing "$log_file" "$DCE_RUN_SUMMARY_FILE"; then + printf 'JSON summary recovered from log: %s\n' "$DCE_RUN_SUMMARY_FILE" + fi + fi + printf 'Log: %s\n' "$log_file" } diff --git a/scripts/tests/run-operator-proof-smoke.sh b/scripts/tests/run-operator-proof-smoke.sh index 4adb5c83..9f8cc4a9 100755 --- a/scripts/tests/run-operator-proof-smoke.sh +++ b/scripts/tests/run-operator-proof-smoke.sh @@ -67,6 +67,10 @@ grep -q 'Operator proof run plan' <<<"$output" || { echo "expected Operator proof run plan in dry-run output" >&2 exit 1 } +if grep -q 'JSON summary file:' <<<"$output"; then + echo "dry-run should not enable JSON summary export" >&2 + exit 1 +fi set +e salvage_output=$( @@ -84,6 +88,10 @@ if [[ "$salvage_status" -ne 0 ]] || ! grep -q 'Salvage-only proof complete' <<<" printf '%s\n' "$salvage_output" >&2 exit 1 fi +if grep -q 'JSON summary file:' <<<"$salvage_output"; then + echo "salvage-only should not enable JSON summary export" >&2 + exit 1 +fi set +e salvage_before_output=$( @@ -108,6 +116,10 @@ grep -q 'Operator proof passed for demo' <<<"$salvage_before_output" || { printf '%s\n' "$salvage_before_output" >&2 exit 1 } +grep -q 'JSON summary file:' <<<"$salvage_before_output" || { + printf 'expected JSON summary path when operator proof scrapes\n' >&2 + exit 1 +} command -v flock >/dev/null 2>&1 && { LOCK_FILE="$TMP_DIR/archive/.dce-scrape.lock"