feat(scrape): recover JSON summary from host compose run log

Reuse shared recover helper before deleting the temp compose log when
DCE_RUN_SUMMARY_FILE is missing after a successful host scrape.
This commit is contained in:
Copilot 2026-06-03 10:30:14 -05:00
parent fcea842fe3
commit 35a7416d8f
7 changed files with 146 additions and 6 deletions

View file

@ -0,0 +1,64 @@
---
title: "feat: Host runner recover JSON summary from compose log"
type: feat
status: complete
date: 2026-06-04
origin: /lfg — plan 071 deferred host-runner recovery when stdout is not teed to a persistent file
---
# feat: Host runner recover JSON summary from compose log
## Summary
`run-discord-scrape-host.sh` already tees compose output to a temporary run log. Before deleting that log on success, recover `DCE_RUN_SUMMARY_FILE` from the last `DCE_JSON_SUMMARY:` line when the file is missing or empty.
## Problem Frame
Operator validation (plan 071) recovers summaries from its teed log. Direct host scrapes (`run-discord-scrape-host.sh scrape`) capture compose stdout in a temp file but discard it after success. When the container logs `DCE_JSON_SUMMARY` but cannot write the mapped file, operators lose machine-readable totals unless they manually grep the scrollback.
## Requirements
| ID | Requirement |
|----|-------------|
| R1 | Shared `recover_json_summary_if_missing(run_log, dest_file)` skips when dest exists and is non-empty |
| R2 | `run_subcommand_with_retry` calls recovery on successful scrape/preflight runs before deleting the temp log |
| R3 | Recovery runs only when `DCE_RUN_SUMMARY_FILE` is set and file is missing or zero-length |
| R4 | Success prints `JSON summary recovered from run log:` to stderr |
| R5 | `run-operator-validation.sh` uses the shared helper instead of inline extract |
| R6 | Host smoke covers recovery from a synthetic run log |
| R7 | `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` → 22/22 |
## Implementation Units
### U1. Shared recovery helper
**Files:** `scripts/lib/scrape-summary-json.sh`, `scripts/tests/scrape-summary-json-smoke.sh`
**Approach:** Add `recover_json_summary_if_missing`; extend smoke with dest-already-exists skip case.
### U2. Host runner wiring
**Files:** `scripts/run-discord-scrape-host.sh`, `scripts/tests/run-discord-scrape-host-smoke.sh`
**Approach:** Source lib in host runner; call recovery before `rm -f "$output_file"` on both success paths in `run_subcommand_with_retry`.
### U3. Validation refactor + docs
**Files:** `scripts/run-operator-validation.sh`, `docs/recurring-scrape-merge-readiness.md`
**Approach:** Replace inline extract block with shared helper; add Plan 072 stamp.
## Verification
```bash
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
```
## Scope Boundaries
### Deferred
- Live KotOR catch-up on host
- Auto-enable JSON summary on bare `host.sh scrape` without env vars
- Operator-proof JSON summary parity
- Merging multiple per-target summaries into one JSON artifact

View file

@ -172,6 +172,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh \
**Plan 071 (2026-06-04):** When summary file write fails, operator validation recovers JSON from the last `DCE_JSON_SUMMARY:` line in the teed log. **Plan 071 (2026-06-04):** When summary file write fails, operator validation recovers JSON from the last `DCE_JSON_SUMMARY:` line in the teed log.
**Plan 072 (2026-06-04):** Host runner recovers JSON summary from the captured compose run log before deleting the temp file.
**Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom. **Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom.
## CI note (fork PRs) ## CI note (fork PRs)

View file

@ -24,3 +24,12 @@ extract_json_summary_from_log() {
mkdir -p "$(dirname "$dest_file")" mkdir -p "$(dirname "$dest_file")"
jq . <<<"$json_payload" >"$dest_file" jq . <<<"$json_payload" >"$dest_file"
} }
recover_json_summary_if_missing() {
local run_log=$1
local dest_file=$2
[[ -n "$run_log" && -n "$dest_file" ]] || return 1
[[ -s "$dest_file" ]] && return 1
extract_json_summary_from_log "$run_log" "$dest_file"
}

View file

@ -511,6 +511,18 @@ try_interactive_reauth() {
"$reauth_script" "$reauth_script"
} }
recover_scrape_summary_from_run_log() {
local output_file=$1
local dest_file=${DCE_RUN_SUMMARY_FILE:-}
[[ -n "$dest_file" ]] || return 0
# shellcheck source=lib/scrape-summary-json.sh
source "$SCRIPT_DIR/lib/scrape-summary-json.sh"
if recover_json_summary_if_missing "$output_file" "$dest_file"; then
printf 'JSON summary recovered from run log: %s\n' "$dest_file" >&2
fi
}
run_subcommand_with_retry() { run_subcommand_with_retry() {
local subcommand=$1 local subcommand=$1
shift shift
@ -522,6 +534,7 @@ run_subcommand_with_retry() {
compose_run_args run_args "$subcommand" "$@" compose_run_args run_args "$subcommand" "$@"
if "${run_args[@]}" 2>&1 | tee "$output_file"; then if "${run_args[@]}" 2>&1 | tee "$output_file"; then
recover_scrape_summary_from_run_log "$output_file"
rm -f "$output_file" rm -f "$output_file"
return 0 return 0
fi fi
@ -545,6 +558,7 @@ run_subcommand_with_retry() {
compose_run_args run_args "$subcommand" "$@" compose_run_args run_args "$subcommand" "$@"
if "${run_args[@]}" 2>&1 | tee "$output_file"; then if "${run_args[@]}" 2>&1 | tee "$output_file"; then
recover_scrape_summary_from_run_log "$output_file"
rm -f "$output_file" rm -f "$output_file"
return 0 return 0
fi fi

View file

@ -327,12 +327,10 @@ main() {
local pipeline_status=${PIPESTATUS[0]} local pipeline_status=${PIPESTATUS[0]}
if (( export_json_summary )) && [[ -n "${DCE_RUN_SUMMARY_FILE:-}" ]]; then if (( export_json_summary )) && [[ -n "${DCE_RUN_SUMMARY_FILE:-}" ]]; then
if [[ ! -s "${DCE_RUN_SUMMARY_FILE}" ]]; then # shellcheck source=lib/scrape-summary-json.sh
# shellcheck source=lib/scrape-summary-json.sh source "$SCRIPT_DIR/lib/scrape-summary-json.sh"
source "$SCRIPT_DIR/lib/scrape-summary-json.sh" if recover_json_summary_if_missing "$LOG_FILE" "$DCE_RUN_SUMMARY_FILE"; then
if extract_json_summary_from_log "$LOG_FILE" "$DCE_RUN_SUMMARY_FILE"; then printf 'JSON summary recovered from log: %s\n' "$DCE_RUN_SUMMARY_FILE"
printf 'JSON summary recovered from log: %s\n' "$DCE_RUN_SUMMARY_FILE"
fi
fi fi
fi fi

View file

@ -83,6 +83,12 @@ if [[ "$mode" == "streaming" ]]; then
exit 0 exit 0
fi fi
if [[ "$mode" == "json-summary-log-only" ]]; then
printf '[2026-06-04T12:00:00Z] DCE_JSON_SUMMARY: {"version":1,"totals":{"merged":42,"unchanged":0,"created":0,"skipped":0,"skipped_oom":0,"messages_appended":7}}\n' >&2
printf 'run succeeded\n'
exit 0
fi
printf 'run succeeded\n' printf 'run succeeded\n'
EOF EOF
chmod +x "$FAKE_DOCKER" chmod +x "$FAKE_DOCKER"
@ -331,4 +337,34 @@ grep -q 'env:DCE_RUN_SUMMARY_FILE=/logs/host-smoke-summary.json' "$COMPOSE_SUMMA
exit 1 exit 1
} }
HOST_RECOVER_SUMMARY="$TMP_DIR/host-run-recovered.summary.json"
HOST_RECOVER_STDERR="$TMP_DIR/host-recover-stderr.txt"
rm -f "$HOST_RECOVER_SUMMARY"
printf '0' >"$CALL_COUNT"
env -u DISCORD_TOKEN \
DCE_SKIP_SCRAPE_LOCK=1 \
DCE_REPO_ROOT="$REPO_ROOT" \
DCE_DOCKER_BIN="$FAKE_DOCKER" \
DCE_ENV_FILE="$ENV_FILE" \
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
DCE_RUN_SUMMARY_FILE="$HOST_RECOVER_SUMMARY" \
FAKE_DOCKER_CALL_COUNT="$CALL_COUNT" \
FAKE_DOCKER_TOKEN_FILE="$TOKEN_FILE" \
FAKE_DOCKER_MODE=json-summary-log-only \
"$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo \
>/dev/null 2>"$HOST_RECOVER_STDERR"
[[ -s "$HOST_RECOVER_SUMMARY" ]] || {
echo "expected host runner to recover summary from compose run log" >&2
exit 1
}
jq -e '.totals.merged == 42 and .totals.messages_appended == 7' "$HOST_RECOVER_SUMMARY" >/dev/null || {
echo "recovered host summary JSON content mismatch" >&2
exit 1
}
grep -q 'JSON summary recovered from run log:' "$HOST_RECOVER_STDERR" || {
echo "expected recovery notice on stderr" >&2
cat "$HOST_RECOVER_STDERR" >&2
exit 1
}
echo "run-discord-scrape-host smoke test passed" echo "run-discord-scrape-host smoke test passed"

View file

@ -43,6 +43,23 @@ jq -e '.totals.merged == 9' "$OUT_FILE" >/dev/null || {
exit 1 exit 1
} }
EXISTING="$TMP_DIR/existing.summary.json"
printf '{"version":1,"totals":{"merged":1}}\n' >"$EXISTING"
if recover_json_summary_if_missing "$LOG_FILE" "$EXISTING" 2>/dev/null; then
printf 'ERROR: recover should skip when dest already non-empty\n' >&2
exit 1
fi
RECOVER_OUT="$TMP_DIR/recover-via-helper.summary.json"
recover_json_summary_if_missing "$LOG_FILE" "$RECOVER_OUT" || {
printf 'ERROR: recover_json_summary_if_missing failed\n' >&2
exit 1
}
jq -e '.totals.merged == 9' "$RECOVER_OUT" >/dev/null || {
printf 'ERROR: recover helper wrote wrong content\n' >&2
exit 1
}
if extract_json_summary_from_log "$TMP_DIR/missing.log" "$OUT_FILE" 2>/dev/null; then if extract_json_summary_from_log "$TMP_DIR/missing.log" "$OUT_FILE" 2>/dev/null; then
printf 'ERROR: extract should fail on missing log\n' >&2 printf 'ERROR: extract should fail on missing log\n' >&2
exit 1 exit 1