From 5cfb2ed144cefbec4e8fbe7f08aac7c3a5f60f9b Mon Sep 17 00:00:00 2001 From: Copilot Date: Wed, 3 Jun 2026 10:18:33 -0500 Subject: [PATCH] feat(scrape): host compose passthrough for JSON summary Mount logs/ in compose, map DCE_RUN_SUMMARY_FILE to /logs, and auto-enable JSON summary beside operator-validation log files when scraping. --- docker-compose.yml | 1 + ...feat-host-json-summary-passthrough-plan.md | 44 +++++++++++++++++ docs/recurring-scrape-merge-readiness.md | 2 + docs/recurring-scrape-operator-checklist.md | 1 + scrape.env.example | 3 +- scripts/run-discord-scrape-host.sh | 49 +++++++++++++++++++ scripts/run-operator-validation.sh | 12 +++++ .../tests/run-discord-scrape-host-smoke.sh | 27 ++++++++++ .../tests/run-operator-validation-smoke.sh | 5 ++ 9 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 docs/plans/2026-06-04-070-feat-host-json-summary-passthrough-plan.md diff --git a/docker-compose.yml b/docker-compose.yml index f5aac610..76fbffda 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,6 +19,7 @@ services: volumes: - ./config:/config:ro,z - ./scripts/run-discord-scrape.sh:/opt/dce-scheduler/run-discord-scrape.sh:ro,z + - ./logs:/logs:z # Host path must match archive_root in config/scrape-targets.json (override on other machines). - ${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:z command: ["help"] diff --git a/docs/plans/2026-06-04-070-feat-host-json-summary-passthrough-plan.md b/docs/plans/2026-06-04-070-feat-host-json-summary-passthrough-plan.md new file mode 100644 index 00000000..80d93db4 --- /dev/null +++ b/docs/plans/2026-06-04-070-feat-host-json-summary-passthrough-plan.md @@ -0,0 +1,44 @@ +--- +title: "feat: Host compose passthrough for JSON scrape summary" +type: feat +status: complete +date: 2026-06-04 +origin: /lfg — plan 069 deferred host compose passthrough; container runs could not write DCE_RUN_SUMMARY_FILE on host +--- + +# feat: Host compose passthrough for JSON scrape summary + +## Summary + +Mount repo `logs/` into the scrape container, pass `DCE_RUN_SUMMARY_*` through compose env, map host `logs/*.json` paths to `/logs/*`, and auto-enable JSON summary in `run-operator-validation.sh` when scraping. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `docker-compose.yml` mounts `./logs:/logs:z` | +| R2 | `write_compose_env_temp` passes `DCE_RUN_SUMMARY_JSON` and mapped `DCE_RUN_SUMMARY_FILE` | +| R3 | Host paths under `$REPO_ROOT/logs/` map to `/logs/` inside container | +| R4 | `run-operator-validation.sh` sets `DCE_RUN_SUMMARY_JSON=1` and `*.summary.json` beside `--log-file` when scraping | +| R5 | Host smoke asserts summary env passthrough and `/logs/` mapping | +| R6 | Validation smoke asserts dry-run does not enable JSON summary | +| R7 | `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` → 21/21 | + +## Implementation Units + +### U1. Compose mount + host passthrough + +**Files:** `docker-compose.yml`, `scripts/run-discord-scrape-host.sh`, `scripts/run-operator-validation.sh`, smokes + +## Verification + +```bash +DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh +``` + +## Scope Boundaries + +### Deferred + +- Live KotOR catch-up on host +- Auto-extract JSON from tee log when file write fails diff --git a/docs/recurring-scrape-merge-readiness.md b/docs/recurring-scrape-merge-readiness.md index 54b05e3a..1e0c1195 100644 --- a/docs/recurring-scrape-merge-readiness.md +++ b/docs/recurring-scrape-merge-readiness.md @@ -166,6 +166,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh \ **Plan 069 (2026-06-04):** Optional JSON scrape run summary via `DCE_RUN_SUMMARY_JSON` / `DCE_RUN_SUMMARY_FILE`. +**Plan 070 (2026-06-04):** Compose mounts `logs/` at `/logs`; host runner passthrough; operator-validation auto-writes `*.summary.json` beside `--log-file`. + **Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom. ## CI note (fork PRs) diff --git a/docs/recurring-scrape-operator-checklist.md b/docs/recurring-scrape-operator-checklist.md index b14954b6..fb0a7c88 100644 --- a/docs/recurring-scrape-operator-checklist.md +++ b/docs/recurring-scrape-operator-checklist.md @@ -64,6 +64,7 @@ Salvage then incremental scrape: ./scripts/run-operator-validation.sh --salvage-before-scrape \ --target KotOR_discord_msgs --channel 221726893064454144 \ --log-file logs/kotor-yes-general.log +# Also writes logs/kotor-yes-general.summary.json (machine-readable scrape totals) ./scripts/prove-incremental-append.sh \ --target KotOR_discord_msgs --channel 221726893064454144 diff --git a/scrape.env.example b/scrape.env.example index 0c882261..15283cb0 100644 --- a/scrape.env.example +++ b/scrape.env.example @@ -27,5 +27,6 @@ DCE_USERNS_MODE= # DCE_CONTAINER_MEMORY=8g # Optional: machine-readable scrape summary (run-discord-scrape.sh). +# Host paths under logs/ map to /logs/ in the container (see docker-compose.yml). # DCE_RUN_SUMMARY_JSON=1 -# DCE_RUN_SUMMARY_FILE=/path/to/scrape-summary.json +# DCE_RUN_SUMMARY_FILE=logs/scrape-summary.json diff --git a/scripts/run-discord-scrape-host.sh b/scripts/run-discord-scrape-host.sh index a3992371..89f3a4c8 100755 --- a/scripts/run-discord-scrape-host.sh +++ b/scripts/run-discord-scrape-host.sh @@ -45,6 +45,8 @@ Environment: DCE_CONTAINER_MEMORY Optional container memory cap (e.g. 8g, 8192m). Default 0 = unlimited. Targets may set container_memory in scrape-targets.json (used when exactly one --target is selected and this env var is unset or 0). + DCE_RUN_SUMMARY_JSON When 1, container logs DCE_JSON_SUMMARY after scrape. + DCE_RUN_SUMMARY_FILE Host path under logs/ is mapped to /logs/ inside the container. Notes: When $ENV_FILE is missing, exported DISCORD_TOKEN or DISCORD_TOKEN_FILE is used instead. @@ -66,6 +68,40 @@ cleanup_compose_env() { fi } +ensure_repo_logs_dir() { + mkdir -p "$REPO_ROOT/logs" +} + +map_summary_file_for_container() { + local host_path=$1 + local logs_dir="$REPO_ROOT/logs" + + [[ -n "$host_path" ]] || return 0 + case "$host_path" in + /logs/*) + printf '%s\n' "$host_path" + ;; + "$logs_dir"/*) + printf '/logs/%s\n' "$(basename "$host_path")" + ;; + *) + printf '%s\n' "$host_path" + ;; + esac +} + +ensure_summary_file_host_dir() { + local host_path=${1:-${DCE_RUN_SUMMARY_FILE:-}} + + [[ -n "$host_path" ]] || return 0 + case "$host_path" in + /logs/*) + host_path="$REPO_ROOT/logs/$(basename "$host_path")" + ;; + esac + mkdir -p "$(dirname "$host_path")" +} + write_scrape_lock_meta() { local meta_file meta_file=$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE") @@ -191,6 +227,14 @@ write_compose_env_temp() { else printf 'DCE_CONTAINER_MEMORY=0\n' >>"$COMPOSE_ENV_TEMP" fi + if [[ "${DCE_RUN_SUMMARY_JSON:-0}" == "1" ]]; then + printf 'DCE_RUN_SUMMARY_JSON=1\n' >>"$COMPOSE_ENV_TEMP" + fi + if [[ -n "${DCE_RUN_SUMMARY_FILE:-}" ]]; then + local container_summary_file + container_summary_file=$(map_summary_file_for_container "$DCE_RUN_SUMMARY_FILE") + printf 'DCE_RUN_SUMMARY_FILE=%s\n' "$container_summary_file" >>"$COMPOSE_ENV_TEMP" + fi } configure_rootless_compose() { @@ -589,6 +633,11 @@ main() { apply_single_target_container_memory "$host_config" "${host_targets[0]}" fi + if [[ "$subcommand" == "scrape" ]]; then + ensure_repo_logs_dir + ensure_summary_file_host_dir + fi + if [[ "$subcommand" != "salvage" ]]; then prepare_compose_env fi diff --git a/scripts/run-operator-validation.sh b/scripts/run-operator-validation.sh index 06fb32b0..ece1d6fb 100755 --- a/scripts/run-operator-validation.sh +++ b/scripts/run-operator-validation.sh @@ -268,6 +268,15 @@ main() { LOG_FILE="$LOG_DIR/operator-validation-$(date -u +%Y%m%dT%H%M%SZ).log" fi + local export_json_summary=0 + if (( DRY_RUN == 0 && SKIP_SCRAPE == 0 && SALVAGE_ONLY == 0 )); then + export_json_summary=1 + export DCE_RUN_SUMMARY_JSON=1 + if [[ -z "${DCE_RUN_SUMMARY_FILE:-}" ]]; then + export DCE_RUN_SUMMARY_FILE="${LOG_FILE%.log}.summary.json" + fi + fi + local failures=0 set -o pipefail @@ -281,6 +290,9 @@ main() { else log_step "Enabled targets: $(enabled_targets | paste -sd, -)" fi + if (( export_json_summary )); then + log_step "JSON summary file: ${DCE_RUN_SUMMARY_FILE:-}" + fi if (( SYNC_GUI_FLAG )); then run_step "sync-token-from-gui" "$SYNC_GUI" --force || failures=$((failures + 1)) fi diff --git a/scripts/tests/run-discord-scrape-host-smoke.sh b/scripts/tests/run-discord-scrape-host-smoke.sh index 5b2cd512..c919a22e 100755 --- a/scripts/tests/run-discord-scrape-host-smoke.sh +++ b/scripts/tests/run-discord-scrape-host-smoke.sh @@ -216,6 +216,8 @@ while (($#)); do esac done printf 'env:DCE_CONTAINER_MEMORY=%s\n' "${DCE_CONTAINER_MEMORY:-}" >>"${FAKE_COMPOSE_ARGS_LOG:?}" +printf 'env:DCE_RUN_SUMMARY_JSON=%s\n' "${DCE_RUN_SUMMARY_JSON:-0}" >>"${FAKE_COMPOSE_ARGS_LOG:?}" +printf 'env:DCE_RUN_SUMMARY_FILE=%s\n' "${DCE_RUN_SUMMARY_FILE:-}" >>"${FAKE_COMPOSE_ARGS_LOG:?}" printf '%s\n' "${all_args[*]}" >>"${FAKE_COMPOSE_ARGS_LOG:?}" printf 'run succeeded\n' EOF @@ -304,4 +306,29 @@ grep -q 'env:DCE_CONTAINER_MEMORY=2g' "$COMPOSE_OVERRIDE_LOG" || { exit 1 } +SUMMARY_HOST="$REPO_ROOT/logs/host-smoke-summary.json" +mkdir -p "$REPO_ROOT/logs" +COMPOSE_SUMMARY_LOG="$TMP_DIR/compose-summary.log" +env -u DCE_CONTAINER_MEMORY -u DCE_RUN_SUMMARY_FILE \ + DCE_SKIP_SCRAPE_LOCK=1 \ + DCE_COMPOSE_BIN="$FAKE_COMPOSE" \ + DCE_REPO_ROOT="$REPO_ROOT" \ + DCE_ENV_FILE="$ENV_NO_MEM" \ + DCE_COMPOSE_FILE="$COMPOSE_FILE" \ + DCE_RUN_SUMMARY_JSON=1 \ + DCE_RUN_SUMMARY_FILE="$SUMMARY_HOST" \ + FAKE_COMPOSE_ARGS_LOG="$COMPOSE_SUMMARY_LOG" \ + "$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape \ + --config "$TARGET_MEM_CONFIG" --target demo >/dev/null +grep -q 'env:DCE_RUN_SUMMARY_JSON=1' "$COMPOSE_SUMMARY_LOG" || { + echo "expected DCE_RUN_SUMMARY_JSON in compose env passthrough" >&2 + cat "$COMPOSE_SUMMARY_LOG" >&2 + exit 1 +} +grep -q 'env:DCE_RUN_SUMMARY_FILE=/logs/host-smoke-summary.json' "$COMPOSE_SUMMARY_LOG" || { + echo "expected host logs path mapped to /logs in compose env" >&2 + cat "$COMPOSE_SUMMARY_LOG" >&2 + exit 1 +} + echo "run-discord-scrape-host smoke test passed" diff --git a/scripts/tests/run-operator-validation-smoke.sh b/scripts/tests/run-operator-validation-smoke.sh index 2cdd3f90..ea0736d2 100755 --- a/scripts/tests/run-operator-validation-smoke.sh +++ b/scripts/tests/run-operator-validation-smoke.sh @@ -72,6 +72,11 @@ grep -q 'Operator validation finished successfully' "$LOG_DIR/validation.log" || exit 1 } +if grep -q 'JSON summary file:' "$LOG_DIR/validation.log"; then + printf 'ERROR: dry-run should not enable JSON summary export\n' >&2 + exit 1 +fi + SALVAGE_LOG="$LOG_DIR/salvage-validation.log" DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \ DCE_LOG_DIR="$LOG_DIR" DCE_SKIP_SCRAPE_LOCK=1 \