feat(scrape): host compose passthrough for JSON summary

Mount logs/ in compose, map DCE_RUN_SUMMARY_FILE to /logs, and auto-enable
JSON summary beside operator-validation log files when scraping.
This commit is contained in:
Copilot 2026-06-03 10:18:33 -05:00
parent 1dda40ae1b
commit 5cfb2ed144
9 changed files with 143 additions and 1 deletions

View file

@ -19,6 +19,7 @@ services:
volumes:
- ./config:/config:ro,z
- ./scripts/run-discord-scrape.sh:/opt/dce-scheduler/run-discord-scrape.sh:ro,z
- ./logs:/logs:z
# Host path must match archive_root in config/scrape-targets.json (override on other machines).
- ${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:z
command: ["help"]

View file

@ -0,0 +1,44 @@
---
title: "feat: Host compose passthrough for JSON scrape summary"
type: feat
status: complete
date: 2026-06-04
origin: /lfg — plan 069 deferred host compose passthrough; container runs could not write DCE_RUN_SUMMARY_FILE on host
---
# feat: Host compose passthrough for JSON scrape summary
## Summary
Mount repo `logs/` into the scrape container, pass `DCE_RUN_SUMMARY_*` through compose env, map host `logs/*.json` paths to `/logs/*`, and auto-enable JSON summary in `run-operator-validation.sh` when scraping.
## Requirements
| ID | Requirement |
|----|-------------|
| R1 | `docker-compose.yml` mounts `./logs:/logs:z` |
| R2 | `write_compose_env_temp` passes `DCE_RUN_SUMMARY_JSON` and mapped `DCE_RUN_SUMMARY_FILE` |
| R3 | Host paths under `$REPO_ROOT/logs/` map to `/logs/<basename>` inside container |
| R4 | `run-operator-validation.sh` sets `DCE_RUN_SUMMARY_JSON=1` and `*.summary.json` beside `--log-file` when scraping |
| R5 | Host smoke asserts summary env passthrough and `/logs/` mapping |
| R6 | Validation smoke asserts dry-run does not enable JSON summary |
| R7 | `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` → 21/21 |
## Implementation Units
### U1. Compose mount + host passthrough
**Files:** `docker-compose.yml`, `scripts/run-discord-scrape-host.sh`, `scripts/run-operator-validation.sh`, smokes
## Verification
```bash
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
```
## Scope Boundaries
### Deferred
- Live KotOR catch-up on host
- Auto-extract JSON from tee log when file write fails

View file

@ -166,6 +166,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh \
**Plan 069 (2026-06-04):** Optional JSON scrape run summary via `DCE_RUN_SUMMARY_JSON` / `DCE_RUN_SUMMARY_FILE`.
**Plan 070 (2026-06-04):** Compose mounts `logs/` at `/logs`; host runner passthrough; operator-validation auto-writes `*.summary.json` beside `--log-file`.
**Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom.
## CI note (fork PRs)

View file

@ -64,6 +64,7 @@ Salvage then incremental scrape:
./scripts/run-operator-validation.sh --salvage-before-scrape \
--target KotOR_discord_msgs --channel 221726893064454144 \
--log-file logs/kotor-yes-general.log
# Also writes logs/kotor-yes-general.summary.json (machine-readable scrape totals)
./scripts/prove-incremental-append.sh \
--target KotOR_discord_msgs --channel 221726893064454144

View file

@ -27,5 +27,6 @@ DCE_USERNS_MODE=
# DCE_CONTAINER_MEMORY=8g
# Optional: machine-readable scrape summary (run-discord-scrape.sh).
# Host paths under logs/ map to /logs/ in the container (see docker-compose.yml).
# DCE_RUN_SUMMARY_JSON=1
# DCE_RUN_SUMMARY_FILE=/path/to/scrape-summary.json
# DCE_RUN_SUMMARY_FILE=logs/scrape-summary.json

View file

@ -45,6 +45,8 @@ Environment:
DCE_CONTAINER_MEMORY Optional container memory cap (e.g. 8g, 8192m). Default 0 = unlimited.
Targets may set container_memory in scrape-targets.json (used when
exactly one --target is selected and this env var is unset or 0).
DCE_RUN_SUMMARY_JSON When 1, container logs DCE_JSON_SUMMARY after scrape.
DCE_RUN_SUMMARY_FILE Host path under logs/ is mapped to /logs/ inside the container.
Notes:
When $ENV_FILE is missing, exported DISCORD_TOKEN or DISCORD_TOKEN_FILE is used instead.
@ -66,6 +68,40 @@ cleanup_compose_env() {
fi
}
ensure_repo_logs_dir() {
mkdir -p "$REPO_ROOT/logs"
}
map_summary_file_for_container() {
local host_path=$1
local logs_dir="$REPO_ROOT/logs"
[[ -n "$host_path" ]] || return 0
case "$host_path" in
/logs/*)
printf '%s\n' "$host_path"
;;
"$logs_dir"/*)
printf '/logs/%s\n' "$(basename "$host_path")"
;;
*)
printf '%s\n' "$host_path"
;;
esac
}
ensure_summary_file_host_dir() {
local host_path=${1:-${DCE_RUN_SUMMARY_FILE:-}}
[[ -n "$host_path" ]] || return 0
case "$host_path" in
/logs/*)
host_path="$REPO_ROOT/logs/$(basename "$host_path")"
;;
esac
mkdir -p "$(dirname "$host_path")"
}
write_scrape_lock_meta() {
local meta_file
meta_file=$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE")
@ -191,6 +227,14 @@ write_compose_env_temp() {
else
printf 'DCE_CONTAINER_MEMORY=0\n' >>"$COMPOSE_ENV_TEMP"
fi
if [[ "${DCE_RUN_SUMMARY_JSON:-0}" == "1" ]]; then
printf 'DCE_RUN_SUMMARY_JSON=1\n' >>"$COMPOSE_ENV_TEMP"
fi
if [[ -n "${DCE_RUN_SUMMARY_FILE:-}" ]]; then
local container_summary_file
container_summary_file=$(map_summary_file_for_container "$DCE_RUN_SUMMARY_FILE")
printf 'DCE_RUN_SUMMARY_FILE=%s\n' "$container_summary_file" >>"$COMPOSE_ENV_TEMP"
fi
}
configure_rootless_compose() {
@ -589,6 +633,11 @@ main() {
apply_single_target_container_memory "$host_config" "${host_targets[0]}"
fi
if [[ "$subcommand" == "scrape" ]]; then
ensure_repo_logs_dir
ensure_summary_file_host_dir
fi
if [[ "$subcommand" != "salvage" ]]; then
prepare_compose_env
fi

View file

@ -268,6 +268,15 @@ main() {
LOG_FILE="$LOG_DIR/operator-validation-$(date -u +%Y%m%dT%H%M%SZ).log"
fi
local export_json_summary=0
if (( DRY_RUN == 0 && SKIP_SCRAPE == 0 && SALVAGE_ONLY == 0 )); then
export_json_summary=1
export DCE_RUN_SUMMARY_JSON=1
if [[ -z "${DCE_RUN_SUMMARY_FILE:-}" ]]; then
export DCE_RUN_SUMMARY_FILE="${LOG_FILE%.log}.summary.json"
fi
fi
local failures=0
set -o pipefail
@ -281,6 +290,9 @@ main() {
else
log_step "Enabled targets: $(enabled_targets | paste -sd, -)"
fi
if (( export_json_summary )); then
log_step "JSON summary file: ${DCE_RUN_SUMMARY_FILE:-}"
fi
if (( SYNC_GUI_FLAG )); then
run_step "sync-token-from-gui" "$SYNC_GUI" --force || failures=$((failures + 1))
fi

View file

@ -216,6 +216,8 @@ while (($#)); do
esac
done
printf 'env:DCE_CONTAINER_MEMORY=%s\n' "${DCE_CONTAINER_MEMORY:-}" >>"${FAKE_COMPOSE_ARGS_LOG:?}"
printf 'env:DCE_RUN_SUMMARY_JSON=%s\n' "${DCE_RUN_SUMMARY_JSON:-0}" >>"${FAKE_COMPOSE_ARGS_LOG:?}"
printf 'env:DCE_RUN_SUMMARY_FILE=%s\n' "${DCE_RUN_SUMMARY_FILE:-}" >>"${FAKE_COMPOSE_ARGS_LOG:?}"
printf '%s\n' "${all_args[*]}" >>"${FAKE_COMPOSE_ARGS_LOG:?}"
printf 'run succeeded\n'
EOF
@ -304,4 +306,29 @@ grep -q 'env:DCE_CONTAINER_MEMORY=2g' "$COMPOSE_OVERRIDE_LOG" || {
exit 1
}
SUMMARY_HOST="$REPO_ROOT/logs/host-smoke-summary.json"
mkdir -p "$REPO_ROOT/logs"
COMPOSE_SUMMARY_LOG="$TMP_DIR/compose-summary.log"
env -u DCE_CONTAINER_MEMORY -u DCE_RUN_SUMMARY_FILE \
DCE_SKIP_SCRAPE_LOCK=1 \
DCE_COMPOSE_BIN="$FAKE_COMPOSE" \
DCE_REPO_ROOT="$REPO_ROOT" \
DCE_ENV_FILE="$ENV_NO_MEM" \
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
DCE_RUN_SUMMARY_JSON=1 \
DCE_RUN_SUMMARY_FILE="$SUMMARY_HOST" \
FAKE_COMPOSE_ARGS_LOG="$COMPOSE_SUMMARY_LOG" \
"$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape \
--config "$TARGET_MEM_CONFIG" --target demo >/dev/null
grep -q 'env:DCE_RUN_SUMMARY_JSON=1' "$COMPOSE_SUMMARY_LOG" || {
echo "expected DCE_RUN_SUMMARY_JSON in compose env passthrough" >&2
cat "$COMPOSE_SUMMARY_LOG" >&2
exit 1
}
grep -q 'env:DCE_RUN_SUMMARY_FILE=/logs/host-smoke-summary.json' "$COMPOSE_SUMMARY_LOG" || {
echo "expected host logs path mapped to /logs in compose env" >&2
cat "$COMPOSE_SUMMARY_LOG" >&2
exit 1
}
echo "run-discord-scrape-host smoke test passed"

View file

@ -72,6 +72,11 @@ grep -q 'Operator validation finished successfully' "$LOG_DIR/validation.log" ||
exit 1
}
if grep -q 'JSON summary file:' "$LOG_DIR/validation.log"; then
printf 'ERROR: dry-run should not enable JSON summary export\n' >&2
exit 1
fi
SALVAGE_LOG="$LOG_DIR/salvage-validation.log"
DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
DCE_LOG_DIR="$LOG_DIR" DCE_SKIP_SCRAPE_LOCK=1 \