diff --git a/docs/plans/2026-06-04-048-feat-stream-host-scrape-output-plan.md b/docs/plans/2026-06-04-048-feat-stream-host-scrape-output-plan.md new file mode 100644 index 00000000..0f85108f --- /dev/null +++ b/docs/plans/2026-06-04-048-feat-stream-host-scrape-output-plan.md @@ -0,0 +1,41 @@ +--- +title: "feat: Stream container scrape output during host runs" +type: feat +status: complete +date: 2026-06-04 +origin: /lfg — KotOR validation log frozen at ~83 lines while yes_general export ran for hours +--- + +# feat: Stream container scrape output during host runs + +## Problem + +`run-discord-scrape-host.sh` captures all container stdout/stderr into a temp file and only `cat`s it after the compose run exits. Long exports (e.g. KotOR `yes_general`) leave operator validation logs silent for hours even though the container is actively exporting. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `run_subcommand_with_retry` streams compose output to stdout as it arrives while still capturing to the temp file for auth-failure detection | +| R2 | Preserve exit-code semantics and auth-retry behavior (pipefail + `PIPESTATUS[0]`) | +| R3 | Do not duplicate full output on success (tee replaces post-hoc `cat`) | +| R4 | Host smoke adds a `streaming` fake-docker mode proving first line appears before command completes | +| R5 | `run-all-smokes.sh` passes | + +## Implementation + +- **File:** `scripts/run-discord-scrape-host.sh` — replace `>"$output_file" 2>&1` + `cat` with `"${run_args[@]}" 2>&1 | tee "$output_file"` and check `${PIPESTATUS[0]}` in both initial and retry paths. +- **File:** `scripts/tests/run-discord-scrape-host-smoke.sh` — add streaming mode assertion. + +## Verification + +```bash +./scripts/tests/run-discord-scrape-host-smoke.sh +DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh +``` + +## Out of scope + +- yes_general catch-up completion +- Container memory limits +- Validation-level flock (host flock already exists) diff --git a/scripts/run-discord-scrape-host.sh b/scripts/run-discord-scrape-host.sh index 1dbb2389..42ba8dcc 100755 --- a/scripts/run-discord-scrape-host.sh +++ b/scripts/run-discord-scrape-host.sh @@ -390,14 +390,11 @@ run_subcommand_with_retry() { output_file=$(mktemp "${TMPDIR:-/tmp}/dce-host-run.XXXXXX.log") compose_run_args run_args "$subcommand" "$@" - if "${run_args[@]}" >"$output_file" 2>&1; then - cat "$output_file" + if "${run_args[@]}" 2>&1 | tee "$output_file"; then rm -f "$output_file" return 0 fi - cat "$output_file" >&2 - if ! is_discord_auth_failure "$output_file"; then rm -f "$output_file" die "Container run failed for '$subcommand' with a non-auth error." @@ -416,13 +413,11 @@ run_subcommand_with_retry() { ensure_token_present compose_run_args run_args "$subcommand" "$@" - if "${run_args[@]}" >"$output_file" 2>&1; then - cat "$output_file" + if "${run_args[@]}" 2>&1 | tee "$output_file"; then rm -f "$output_file" return 0 fi - cat "$output_file" >&2 rm -f "$output_file" die "Container run failed for '$subcommand' after one auth refresh retry." } diff --git a/scripts/tests/documents-scrape-smoke.sh b/scripts/tests/documents-scrape-smoke.sh index edd0b4d8..f9154b3f 100755 --- a/scripts/tests/documents-scrape-smoke.sh +++ b/scripts/tests/documents-scrape-smoke.sh @@ -37,6 +37,7 @@ MISSING_ENV="$TMP_DIR/missing-scrape.env" [[ ! -e "$MISSING_ENV" ]] DCE_REPO_ROOT="$FAKE_REPO" \ + DCE_SKIP_SCRAPE_LOCK=1 \ DCE_DOCKER_BIN="$FAKE_DOCKER" \ DCE_ENV_FILE="$MISSING_ENV" \ DCE_COMPOSE_FILE="$COMPOSE_FILE" \ @@ -68,6 +69,7 @@ HOST="$REPO_ROOT/scripts/run-discord-scrape-host.sh" # Prove script should fail when host would shrink archives (simulate by patching fake docker to no-op) DCE_REPO_ROOT="$REPO_ROOT" \ + DCE_SKIP_SCRAPE_LOCK=1 \ DCE_DOCKER_BIN="$FAKE_DOCKER" \ DCE_ENV_FILE="$MISSING_ENV" \ DCE_COMPOSE_FILE="$COMPOSE_FILE" \ diff --git a/scripts/tests/run-discord-scrape-host-smoke.sh b/scripts/tests/run-discord-scrape-host-smoke.sh index fb2c12b5..5bbd1d0c 100755 --- a/scripts/tests/run-discord-scrape-host-smoke.sh +++ b/scripts/tests/run-discord-scrape-host-smoke.sh @@ -76,6 +76,13 @@ if [[ "$mode" == "auth-persistent-fail" ]]; then exit 1 fi +if [[ "$mode" == "streaming" ]]; then + printf 'streaming-line1\n' + sleep 0.3 + printf 'streaming-line2\n' + exit 0 +fi + printf 'run succeeded\n' EOF chmod +x "$FAKE_DOCKER" @@ -144,4 +151,26 @@ printf '0' >"$CALL_COUNT" run_host_with_shell_token success "$MISSING_ENV" >/dev/null [[ "$(cat "$CALL_COUNT")" == "1" ]] || { echo "expected host wrapper to run with exported DISCORD_TOKEN when scrape.env is missing" >&2; exit 1; } +STREAM_OUTPUT="$TMP_DIR/stream-output.txt" +printf '0' >"$CALL_COUNT" +run_host streaming >"$STREAM_OUTPUT" & +stream_pid=$! +for _ in $(seq 1 20); do + if grep -q streaming-line1 "$STREAM_OUTPUT" 2>/dev/null; then + break + fi + sleep 0.05 +done +grep -q streaming-line1 "$STREAM_OUTPUT" || { + echo "expected streaming-line1 before host scrape completed" >&2 + kill "$stream_pid" 2>/dev/null || true + wait "$stream_pid" 2>/dev/null || true + exit 1 +} +wait "$stream_pid" +grep -q streaming-line2 "$STREAM_OUTPUT" || { + echo "expected streaming-line2 in host scrape output" >&2 + exit 1 +} + echo "run-discord-scrape-host smoke test passed"