mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
feat(scrape): host compose passthrough for JSON summary
Mount logs/ in compose, map DCE_RUN_SUMMARY_FILE to /logs, and auto-enable JSON summary beside operator-validation log files when scraping.
This commit is contained in:
parent
1dda40ae1b
commit
5cfb2ed144
|
|
@ -19,6 +19,7 @@ services:
|
|||
volumes:
|
||||
- ./config:/config:ro,z
|
||||
- ./scripts/run-discord-scrape.sh:/opt/dce-scheduler/run-discord-scrape.sh:ro,z
|
||||
- ./logs:/logs:z
|
||||
# Host path must match archive_root in config/scrape-targets.json (override on other machines).
|
||||
- ${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:z
|
||||
command: ["help"]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
---
|
||||
title: "feat: Host compose passthrough for JSON scrape summary"
|
||||
type: feat
|
||||
status: complete
|
||||
date: 2026-06-04
|
||||
origin: /lfg — plan 069 deferred host compose passthrough; container runs could not write DCE_RUN_SUMMARY_FILE on host
|
||||
---
|
||||
|
||||
# feat: Host compose passthrough for JSON scrape summary
|
||||
|
||||
## Summary
|
||||
|
||||
Mount repo `logs/` into the scrape container, pass `DCE_RUN_SUMMARY_*` through compose env, map host `logs/*.json` paths to `/logs/*`, and auto-enable JSON summary in `run-operator-validation.sh` when scraping.
|
||||
|
||||
## Requirements
|
||||
|
||||
| ID | Requirement |
|
||||
|----|-------------|
|
||||
| R1 | `docker-compose.yml` mounts `./logs:/logs:z` |
|
||||
| R2 | `write_compose_env_temp` passes `DCE_RUN_SUMMARY_JSON` and mapped `DCE_RUN_SUMMARY_FILE` |
|
||||
| R3 | Host paths under `$REPO_ROOT/logs/` map to `/logs/<basename>` inside container |
|
||||
| R4 | `run-operator-validation.sh` sets `DCE_RUN_SUMMARY_JSON=1` and `*.summary.json` beside `--log-file` when scraping |
|
||||
| R5 | Host smoke asserts summary env passthrough and `/logs/` mapping |
|
||||
| R6 | Validation smoke asserts dry-run does not enable JSON summary |
|
||||
| R7 | `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` → 21/21 |
|
||||
|
||||
## Implementation Units
|
||||
|
||||
### U1. Compose mount + host passthrough
|
||||
|
||||
**Files:** `docker-compose.yml`, `scripts/run-discord-scrape-host.sh`, `scripts/run-operator-validation.sh`, smokes
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
|
||||
```
|
||||
|
||||
## Scope Boundaries
|
||||
|
||||
### Deferred
|
||||
|
||||
- Live KotOR catch-up on host
|
||||
- Auto-extract JSON from tee log when file write fails
|
||||
|
|
@ -166,6 +166,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh \
|
|||
|
||||
**Plan 069 (2026-06-04):** Optional JSON scrape run summary via `DCE_RUN_SUMMARY_JSON` / `DCE_RUN_SUMMARY_FILE`.
|
||||
|
||||
**Plan 070 (2026-06-04):** Compose mounts `logs/` at `/logs`; host runner passthrough; operator-validation auto-writes `*.summary.json` beside `--log-file`.
|
||||
|
||||
**Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom.
|
||||
|
||||
## CI note (fork PRs)
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ Salvage then incremental scrape:
|
|||
./scripts/run-operator-validation.sh --salvage-before-scrape \
|
||||
--target KotOR_discord_msgs --channel 221726893064454144 \
|
||||
--log-file logs/kotor-yes-general.log
|
||||
# Also writes logs/kotor-yes-general.summary.json (machine-readable scrape totals)
|
||||
|
||||
./scripts/prove-incremental-append.sh \
|
||||
--target KotOR_discord_msgs --channel 221726893064454144
|
||||
|
|
|
|||
|
|
@ -27,5 +27,6 @@ DCE_USERNS_MODE=
|
|||
# DCE_CONTAINER_MEMORY=8g
|
||||
|
||||
# Optional: machine-readable scrape summary (run-discord-scrape.sh).
|
||||
# Host paths under logs/ map to /logs/ in the container (see docker-compose.yml).
|
||||
# DCE_RUN_SUMMARY_JSON=1
|
||||
# DCE_RUN_SUMMARY_FILE=/path/to/scrape-summary.json
|
||||
# DCE_RUN_SUMMARY_FILE=logs/scrape-summary.json
|
||||
|
|
|
|||
|
|
@ -45,6 +45,8 @@ Environment:
|
|||
DCE_CONTAINER_MEMORY Optional container memory cap (e.g. 8g, 8192m). Default 0 = unlimited.
|
||||
Targets may set container_memory in scrape-targets.json (used when
|
||||
exactly one --target is selected and this env var is unset or 0).
|
||||
DCE_RUN_SUMMARY_JSON When 1, container logs DCE_JSON_SUMMARY after scrape.
|
||||
DCE_RUN_SUMMARY_FILE Host path under logs/ is mapped to /logs/ inside the container.
|
||||
|
||||
Notes:
|
||||
When $ENV_FILE is missing, exported DISCORD_TOKEN or DISCORD_TOKEN_FILE is used instead.
|
||||
|
|
@ -66,6 +68,40 @@ cleanup_compose_env() {
|
|||
fi
|
||||
}
|
||||
|
||||
ensure_repo_logs_dir() {
|
||||
mkdir -p "$REPO_ROOT/logs"
|
||||
}
|
||||
|
||||
map_summary_file_for_container() {
|
||||
local host_path=$1
|
||||
local logs_dir="$REPO_ROOT/logs"
|
||||
|
||||
[[ -n "$host_path" ]] || return 0
|
||||
case "$host_path" in
|
||||
/logs/*)
|
||||
printf '%s\n' "$host_path"
|
||||
;;
|
||||
"$logs_dir"/*)
|
||||
printf '/logs/%s\n' "$(basename "$host_path")"
|
||||
;;
|
||||
*)
|
||||
printf '%s\n' "$host_path"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
ensure_summary_file_host_dir() {
|
||||
local host_path=${1:-${DCE_RUN_SUMMARY_FILE:-}}
|
||||
|
||||
[[ -n "$host_path" ]] || return 0
|
||||
case "$host_path" in
|
||||
/logs/*)
|
||||
host_path="$REPO_ROOT/logs/$(basename "$host_path")"
|
||||
;;
|
||||
esac
|
||||
mkdir -p "$(dirname "$host_path")"
|
||||
}
|
||||
|
||||
write_scrape_lock_meta() {
|
||||
local meta_file
|
||||
meta_file=$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE")
|
||||
|
|
@ -191,6 +227,14 @@ write_compose_env_temp() {
|
|||
else
|
||||
printf 'DCE_CONTAINER_MEMORY=0\n' >>"$COMPOSE_ENV_TEMP"
|
||||
fi
|
||||
if [[ "${DCE_RUN_SUMMARY_JSON:-0}" == "1" ]]; then
|
||||
printf 'DCE_RUN_SUMMARY_JSON=1\n' >>"$COMPOSE_ENV_TEMP"
|
||||
fi
|
||||
if [[ -n "${DCE_RUN_SUMMARY_FILE:-}" ]]; then
|
||||
local container_summary_file
|
||||
container_summary_file=$(map_summary_file_for_container "$DCE_RUN_SUMMARY_FILE")
|
||||
printf 'DCE_RUN_SUMMARY_FILE=%s\n' "$container_summary_file" >>"$COMPOSE_ENV_TEMP"
|
||||
fi
|
||||
}
|
||||
|
||||
configure_rootless_compose() {
|
||||
|
|
@ -589,6 +633,11 @@ main() {
|
|||
apply_single_target_container_memory "$host_config" "${host_targets[0]}"
|
||||
fi
|
||||
|
||||
if [[ "$subcommand" == "scrape" ]]; then
|
||||
ensure_repo_logs_dir
|
||||
ensure_summary_file_host_dir
|
||||
fi
|
||||
|
||||
if [[ "$subcommand" != "salvage" ]]; then
|
||||
prepare_compose_env
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -268,6 +268,15 @@ main() {
|
|||
LOG_FILE="$LOG_DIR/operator-validation-$(date -u +%Y%m%dT%H%M%SZ).log"
|
||||
fi
|
||||
|
||||
local export_json_summary=0
|
||||
if (( DRY_RUN == 0 && SKIP_SCRAPE == 0 && SALVAGE_ONLY == 0 )); then
|
||||
export_json_summary=1
|
||||
export DCE_RUN_SUMMARY_JSON=1
|
||||
if [[ -z "${DCE_RUN_SUMMARY_FILE:-}" ]]; then
|
||||
export DCE_RUN_SUMMARY_FILE="${LOG_FILE%.log}.summary.json"
|
||||
fi
|
||||
fi
|
||||
|
||||
local failures=0
|
||||
|
||||
set -o pipefail
|
||||
|
|
@ -281,6 +290,9 @@ main() {
|
|||
else
|
||||
log_step "Enabled targets: $(enabled_targets | paste -sd, -)"
|
||||
fi
|
||||
if (( export_json_summary )); then
|
||||
log_step "JSON summary file: ${DCE_RUN_SUMMARY_FILE:-}"
|
||||
fi
|
||||
if (( SYNC_GUI_FLAG )); then
|
||||
run_step "sync-token-from-gui" "$SYNC_GUI" --force || failures=$((failures + 1))
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -216,6 +216,8 @@ while (($#)); do
|
|||
esac
|
||||
done
|
||||
printf 'env:DCE_CONTAINER_MEMORY=%s\n' "${DCE_CONTAINER_MEMORY:-}" >>"${FAKE_COMPOSE_ARGS_LOG:?}"
|
||||
printf 'env:DCE_RUN_SUMMARY_JSON=%s\n' "${DCE_RUN_SUMMARY_JSON:-0}" >>"${FAKE_COMPOSE_ARGS_LOG:?}"
|
||||
printf 'env:DCE_RUN_SUMMARY_FILE=%s\n' "${DCE_RUN_SUMMARY_FILE:-}" >>"${FAKE_COMPOSE_ARGS_LOG:?}"
|
||||
printf '%s\n' "${all_args[*]}" >>"${FAKE_COMPOSE_ARGS_LOG:?}"
|
||||
printf 'run succeeded\n'
|
||||
EOF
|
||||
|
|
@ -304,4 +306,29 @@ grep -q 'env:DCE_CONTAINER_MEMORY=2g' "$COMPOSE_OVERRIDE_LOG" || {
|
|||
exit 1
|
||||
}
|
||||
|
||||
SUMMARY_HOST="$REPO_ROOT/logs/host-smoke-summary.json"
|
||||
mkdir -p "$REPO_ROOT/logs"
|
||||
COMPOSE_SUMMARY_LOG="$TMP_DIR/compose-summary.log"
|
||||
env -u DCE_CONTAINER_MEMORY -u DCE_RUN_SUMMARY_FILE \
|
||||
DCE_SKIP_SCRAPE_LOCK=1 \
|
||||
DCE_COMPOSE_BIN="$FAKE_COMPOSE" \
|
||||
DCE_REPO_ROOT="$REPO_ROOT" \
|
||||
DCE_ENV_FILE="$ENV_NO_MEM" \
|
||||
DCE_COMPOSE_FILE="$COMPOSE_FILE" \
|
||||
DCE_RUN_SUMMARY_JSON=1 \
|
||||
DCE_RUN_SUMMARY_FILE="$SUMMARY_HOST" \
|
||||
FAKE_COMPOSE_ARGS_LOG="$COMPOSE_SUMMARY_LOG" \
|
||||
"$REPO_ROOT/scripts/run-discord-scrape-host.sh" scrape \
|
||||
--config "$TARGET_MEM_CONFIG" --target demo >/dev/null
|
||||
grep -q 'env:DCE_RUN_SUMMARY_JSON=1' "$COMPOSE_SUMMARY_LOG" || {
|
||||
echo "expected DCE_RUN_SUMMARY_JSON in compose env passthrough" >&2
|
||||
cat "$COMPOSE_SUMMARY_LOG" >&2
|
||||
exit 1
|
||||
}
|
||||
grep -q 'env:DCE_RUN_SUMMARY_FILE=/logs/host-smoke-summary.json' "$COMPOSE_SUMMARY_LOG" || {
|
||||
echo "expected host logs path mapped to /logs in compose env" >&2
|
||||
cat "$COMPOSE_SUMMARY_LOG" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "run-discord-scrape-host smoke test passed"
|
||||
|
|
|
|||
|
|
@ -72,6 +72,11 @@ grep -q 'Operator validation finished successfully' "$LOG_DIR/validation.log" ||
|
|||
exit 1
|
||||
}
|
||||
|
||||
if grep -q 'JSON summary file:' "$LOG_DIR/validation.log"; then
|
||||
printf 'ERROR: dry-run should not enable JSON summary export\n' >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SALVAGE_LOG="$LOG_DIR/salvage-validation.log"
|
||||
DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
|
||||
DCE_LOG_DIR="$LOG_DIR" DCE_SKIP_SCRAPE_LOCK=1 \
|
||||
|
|
|
|||
Loading…
Reference in a new issue