mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
feat(scrape): recover JSON summary from host compose run log
Reuse shared recover helper before deleting the temp compose log when DCE_RUN_SUMMARY_FILE is missing after a successful host scrape.
This commit is contained in:
parent
fcea842fe3
commit
35a7416d8f
|
|
@ -0,0 +1,64 @@
|
|||
---
|
||||
title: "feat: Host runner recover JSON summary from compose log"
|
||||
type: feat
|
||||
status: complete
|
||||
date: 2026-06-04
|
||||
origin: /lfg — plan 071 deferred host-runner recovery when stdout is not teed to a persistent file
|
||||
---
|
||||
|
||||
# feat: Host runner recover JSON summary from compose log
|
||||
|
||||
## Summary
|
||||
|
||||
`run-discord-scrape-host.sh` already tees compose output to a temporary run log. Before deleting that log on success, recover `DCE_RUN_SUMMARY_FILE` from the last `DCE_JSON_SUMMARY:` line when the file is missing or empty.
|
||||
|
||||
## Problem Frame
|
||||
|
||||
Operator validation (plan 071) recovers summaries from its teed log. Direct host scrapes (`run-discord-scrape-host.sh scrape`) capture compose stdout in a temp file but discard it after success. When the container logs `DCE_JSON_SUMMARY` but cannot write the mapped file, operators lose machine-readable totals unless they manually grep the scrollback.
|
||||
|
||||
## Requirements
|
||||
|
||||
| ID | Requirement |
|
||||
|----|-------------|
|
||||
| R1 | Shared `recover_json_summary_if_missing(run_log, dest_file)` skips when dest exists and is non-empty |
|
||||
| R2 | `run_subcommand_with_retry` calls recovery on successful scrape/preflight runs before deleting the temp log |
|
||||
| R3 | Recovery runs only when `DCE_RUN_SUMMARY_FILE` is set and file is missing or zero-length |
|
||||
| R4 | Success prints `JSON summary recovered from run log:` to stderr |
|
||||
| R5 | `run-operator-validation.sh` uses the shared helper instead of inline extract |
|
||||
| R6 | Host smoke covers recovery from a synthetic run log |
|
||||
| R7 | `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` → 22/22 |
|
||||
|
||||
## Implementation Units
|
||||
|
||||
### U1. Shared recovery helper
|
||||
|
||||
**Files:** `scripts/lib/scrape-summary-json.sh`, `scripts/tests/scrape-summary-json-smoke.sh`
|
||||
|
||||
**Approach:** Add `recover_json_summary_if_missing`; extend smoke with dest-already-exists skip case.
|
||||
|
||||
### U2. Host runner wiring
|
||||
|
||||
**Files:** `scripts/run-discord-scrape-host.sh`, `scripts/tests/run-discord-scrape-host-smoke.sh`
|
||||
|
||||
**Approach:** Source lib in host runner; call recovery before `rm -f "$output_file"` on both success paths in `run_subcommand_with_retry`.
|
||||
|
||||
### U3. Validation refactor + docs
|
||||
|
||||
**Files:** `scripts/run-operator-validation.sh`, `docs/recurring-scrape-merge-readiness.md`
|
||||
|
||||
**Approach:** Replace inline extract block with shared helper; add Plan 072 stamp.
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
|
||||
```
|
||||
|
||||
## Scope Boundaries
|
||||
|
||||
### Deferred
|
||||
|
||||
- Live KotOR catch-up on host
|
||||
- Auto-enable JSON summary on bare `host.sh scrape` without env vars
|
||||
- Operator-proof JSON summary parity
|
||||
- Merging multiple per-target summaries into one JSON artifact
|
||||
|
|
@ -172,6 +172,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh \
|
|||
|
||||
**Plan 071 (2026-06-04):** When summary file write fails, operator validation recovers JSON from the last `DCE_JSON_SUMMARY:` line in the teed log.
|
||||
|
||||
**Plan 072 (2026-06-04):** Host runner recovers JSON summary from the captured compose run log before deleting the temp file.
|
||||
|
||||
**Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom.
|
||||
|
||||
## CI note (fork PRs)
|
||||
|
|
|
|||
|
|
@ -24,3 +24,12 @@ extract_json_summary_from_log() {
|
|||
mkdir -p "$(dirname "$dest_file")"
|
||||
jq . <<<"$json_payload" >"$dest_file"
|
||||
}
|
||||
|
||||
recover_json_summary_if_missing() {
|
||||
local run_log=$1
|
||||
local dest_file=$2
|
||||
|
||||
[[ -n "$run_log" && -n "$dest_file" ]] || return 1
|
||||
[[ -s "$dest_file" ]] && return 1
|
||||
extract_json_summary_from_log "$run_log" "$dest_file"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -511,6 +511,18 @@ try_interactive_reauth() {
|
|||
"$reauth_script"
|
||||
}
|
||||
|
||||
recover_scrape_summary_from_run_log() {
|
||||
local output_file=$1
|
||||
local dest_file=${DCE_RUN_SUMMARY_FILE:-}
|
||||
|
||||
[[ -n "$dest_file" ]] || return 0
|
||||
# shellcheck source=lib/scrape-summary-json.sh
|
||||
source "$SCRIPT_DIR/lib/scrape-summary-json.sh"
|
||||
if recover_json_summary_if_missing "$output_file" "$dest_file"; then
|
||||
printf 'JSON summary recovered from run log: %s\n' "$dest_file" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
run_subcommand_with_retry() {
|
||||
local subcommand=$1
|
||||
shift
|
||||
|
|
@ -522,6 +534,7 @@ run_subcommand_with_retry() {
|
|||
|
||||
compose_run_args run_args "$subcommand" "$@"
|
||||
if "${run_args[@]}" 2>&1 | tee "$output_file"; then
|
||||
recover_scrape_summary_from_run_log "$output_file"
|
||||
rm -f "$output_file"
|
||||
return 0
|
||||
fi
|
||||
|
|
@ -545,6 +558,7 @@ run_subcommand_with_retry() {
|
|||
compose_run_args run_args "$subcommand" "$@"
|
||||
|
||||
if "${run_args[@]}" 2>&1 | tee "$output_file"; then
|
||||
recover_scrape_summary_from_run_log "$output_file"
|
||||
rm -f "$output_file"
|
||||
return 0
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -327,12 +327,10 @@ main() {
|
|||
local pipeline_status=${PIPESTATUS[0]}
|
||||
|
||||
if (( export_json_summary )) && [[ -n "${DCE_RUN_SUMMARY_FILE:-}" ]]; then
|
||||
if [[ ! -s "${DCE_RUN_SUMMARY_FILE}" ]]; then
|
||||
# shellcheck source=lib/scrape-summary-json.sh
|
||||
source "$SCRIPT_DIR/lib/scrape-summary-json.sh"
|
||||
if extract_json_summary_from_log "$LOG_FILE" "$DCE_RUN_SUMMARY_FILE"; then
|
||||
printf 'JSON summary recovered from log: %s\n' "$DCE_RUN_SUMMARY_FILE"
|
||||
fi
|
||||
# shellcheck source=lib/scrape-summary-json.sh
|
||||
source "$SCRIPT_DIR/lib/scrape-summary-json.sh"
|
||||
if recover_json_summary_if_missing "$LOG_FILE" "$DCE_RUN_SUMMARY_FILE"; then
|
||||
printf 'JSON summary recovered from log: %s\n' "$DCE_RUN_SUMMARY_FILE"
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
|
|||
|
|
@ -83,6 +83,12 @@ if [[ "$mode" == "streaming" ]]; then
|
|||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$mode" == "json-summary-log-only" ]]; then
|
||||
printf '[2026-06-04T12:00:00Z] DCE_JSON_SUMMARY: {"version":1,"totals":{"merged":42,"unchanged":0,"created":0,"skipped":0,"skipped_oom":0,"messages_appended":7}}\n' >&2
|
||||
printf 'run succeeded\n'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
printf 'run succeeded\n'
|
||||
EOF
|
||||
chmod +x "$FAKE_DOCKER"
|
||||
|
|
@ -331,4 +337,34 @@ grep -q 'env:DCE_RUN_SUMMARY_FILE=/logs/host-smoke-summary.json' "$COMPOSE_SUMMA
|
|||
exit 1
|
||||
}
|
||||
|
||||
HOST_RECOVER_SUMMARY="$TMP_DIR/host-run-recovered.summary.json"
|
||||
HOST_RECOVER_STDERR="$TMP_DIR/host-recover-stderr.txt"
|
||||
rm -f "$HOST_RECOVER_SUMMARY"
|
||||
printf '0' >"$CALL_COUNT"
|
||||
env -u DISCORD_TOKEN \
|
||||
DCE_SKIP_SCRAPE_LOCK=1 \
|
||||
DCE_REPO_ROOT="$REPO_ROOT" \
|
||||
DCE_DOCKER_BIN="$FAKE_DOCKER" \
|
||||
DCE_ENV_FILE="$ENV_FILE" \
|
||||
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
||||
DCE_RUN_SUMMARY_FILE="$HOST_RECOVER_SUMMARY" \
|
||||
FAKE_DOCKER_CALL_COUNT="$CALL_COUNT" \
|
||||
FAKE_DOCKER_TOKEN_FILE="$TOKEN_FILE" \
|
||||
FAKE_DOCKER_MODE=json-summary-log-only \
|
||||
"$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape --target demo \
|
||||
>/dev/null 2>"$HOST_RECOVER_STDERR"
|
||||
[[ -s "$HOST_RECOVER_SUMMARY" ]] || {
|
||||
echo "expected host runner to recover summary from compose run log" >&2
|
||||
exit 1
|
||||
}
|
||||
jq -e '.totals.merged == 42 and .totals.messages_appended == 7' "$HOST_RECOVER_SUMMARY" >/dev/null || {
|
||||
echo "recovered host summary JSON content mismatch" >&2
|
||||
exit 1
|
||||
}
|
||||
grep -q 'JSON summary recovered from run log:' "$HOST_RECOVER_STDERR" || {
|
||||
echo "expected recovery notice on stderr" >&2
|
||||
cat "$HOST_RECOVER_STDERR" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "run-discord-scrape-host smoke test passed"
|
||||
|
|
|
|||
|
|
@ -43,6 +43,23 @@ jq -e '.totals.merged == 9' "$OUT_FILE" >/dev/null || {
|
|||
exit 1
|
||||
}
|
||||
|
||||
EXISTING="$TMP_DIR/existing.summary.json"
|
||||
printf '{"version":1,"totals":{"merged":1}}\n' >"$EXISTING"
|
||||
if recover_json_summary_if_missing "$LOG_FILE" "$EXISTING" 2>/dev/null; then
|
||||
printf 'ERROR: recover should skip when dest already non-empty\n' >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
RECOVER_OUT="$TMP_DIR/recover-via-helper.summary.json"
|
||||
recover_json_summary_if_missing "$LOG_FILE" "$RECOVER_OUT" || {
|
||||
printf 'ERROR: recover_json_summary_if_missing failed\n' >&2
|
||||
exit 1
|
||||
}
|
||||
jq -e '.totals.merged == 9' "$RECOVER_OUT" >/dev/null || {
|
||||
printf 'ERROR: recover helper wrote wrong content\n' >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
if extract_json_summary_from_log "$TMP_DIR/missing.log" "$OUT_FILE" 2>/dev/null; then
|
||||
printf 'ERROR: extract should fail on missing log\n' >&2
|
||||
exit 1
|
||||
|
|
|
|||
Loading…
Reference in a new issue