From 71a443267e743e6eb692ec92f5f692ea141e8633 Mon Sep 17 00:00:00 2001 From: Copilot Date: Fri, 29 May 2026 20:34:22 -0500 Subject: [PATCH] feat(scrape): run plan, channel ledger, and all-target proof Log scrape plan/summary with per-file message deltas in the core script. Host wrappers and operator entrypoints print target lists; operator-proof defaults to all enabled targets when --target is omitted. --- ...38-feat-scrape-logging-run-summary-plan.md | 97 ++++++++++ scripts/lib/scrape-run-plan.sh | 37 ++++ scripts/operator-handoff.sh | 4 + scripts/run-discord-scrape-host.sh | 14 ++ scripts/run-discord-scrape.sh | 183 +++++++++++++++++- scripts/run-documents-scrape.sh | 8 + scripts/run-operator-proof.sh | 47 ++++- scripts/tests/documents-scrape-smoke.sh | 10 +- scripts/tests/run-discord-scrape-smoke.sh | 15 +- scripts/tests/run-operator-proof-smoke.sh | 4 + 10 files changed, 406 insertions(+), 13 deletions(-) create mode 100644 docs/plans/2026-05-29-038-feat-scrape-logging-run-summary-plan.md create mode 100644 scripts/lib/scrape-run-plan.sh diff --git a/docs/plans/2026-05-29-038-feat-scrape-logging-run-summary-plan.md b/docs/plans/2026-05-29-038-feat-scrape-logging-run-summary-plan.md new file mode 100644 index 00000000..f00a91cc --- /dev/null +++ b/docs/plans/2026-05-29-038-feat-scrape-logging-run-summary-plan.md @@ -0,0 +1,97 @@ +--- +title: "feat: Scrape logging, run summary, and default-all-targets" +type: feat +status: complete +date: 2026-05-29 +origin: /lfg — operator scripts need explicit server/file/message visibility and sane defaults +--- + +# feat: Scrape logging, run summary, and default-all-targets + +## Summary + +Make recurring scrape scripts print a upfront run plan (which guilds/servers, which output folders), per-channel file I/O with message deltas, and a final change summary. Operator entrypoints default to all enabled targets from `config/scrape-targets.json` without requiring repeated `--target` flags. + +## Problem Frame + +Operators cannot tell from current logs which Discord server was scraped, which archive files were touched, or how many messages were appended vs unchanged. `run-operator-proof.sh` still hardcodes `eod_discord`. The core engine (`run-discord-scrape.sh`) logs channel IDs but not guild names, paths, or before/after counts in one place. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | Before scrape/preflight, log config path and every selected target with `name`, resolved guild id/name(s), and `output_dir` | +| R2 | For each channel processed, log destination file path, action (`CREATED`, `MERGED`, `UNCHANGED`, `SKIPPED`), and message counts before → after (plus fetched batch size when merged) | +| R3 | After all targets complete, print a consolidated run summary with per-file deltas and totals | +| R4 | `run-documents-scrape.sh` and host wrapper print the same run-plan header before invoking the container | +| R5 | `run-operator-proof.sh` defaults to all enabled targets (loop handoff → scrape → prove) when `--target` is omitted | +| R6 | Offline smokes pass; scrape smoke asserts summary markers exist | + +## Key Technical Decisions + +- **KTD1: Ledger in `run-discord-scrape.sh`:** Keep summary state in bash arrays inside the core script rather than a new shared library — host wrappers only need jq-based target listing; the container owns channel-level detail. +- **KTD2: Guild labels from cache + export metadata:** Resolve guild names from `load_guild_cache` at target start; enrich per-channel lines from export JSON when available. +- **KTD3: No behavior change to merge semantics:** Logging only; append-only merge and skip rules stay unchanged. + +## Implementation Units + +### U1. Core scrape ledger and summary + +**Goal:** Operator-visible run plan, per-channel I/O lines, and final summary in `run-discord-scrape.sh`. + +**Requirements:** R1, R2, R3 + +**Files:** `scripts/run-discord-scrape.sh`, `scripts/tests/run-discord-scrape-smoke.sh` + +**Approach:** Add `SCRAPE_SUMMARY_ENTRIES`, `guild_name_for_id`, `describe_target_resolution`, `log_run_plan`, `record_channel_result`, `print_scrape_summary`. Call from `run_target_mode` and `scrape_target`. Preflight reuses run plan header. + +**Test scenarios:** +- Happy path: smoke run shows `Scrape run plan`, `MERGED`/`CREATED`/`UNCHANGED` lines, and `Scrape run summary` +- Edge: skipped channel appears as `SKIPPED` in summary +- Error path: failure before summary still leaves partial ledger in stderr + +**Verification:** `./scripts/tests/run-discord-scrape-smoke.sh` passes with grep for summary markers. + +### U2. Host and documents wrapper banners + +**Goal:** Host-side run plan before container execution. + +**Requirements:** R4 + +**Files:** `scripts/run-discord-scrape-host.sh`, `scripts/run-documents-scrape.sh`, `scripts/operator-handoff.sh` + +**Approach:** Shared helper pattern: jq list enabled/selected targets with output_dir; print subcommand and config paths. `operator-handoff` lists enabled targets in handoff header. + +**Test scenarios:** +- Happy path: documents-scrape dry-run output includes target list +- Integration: host smoke unchanged (no regression) + +**Verification:** `./scripts/tests/documents-scrape-smoke.sh`, `./scripts/tests/run-discord-scrape-host-smoke.sh` + +### U3. Operator proof defaults to all enabled targets + +**Goal:** Remove hardcoded `eod_discord`; loop all enabled targets when `--target` omitted. + +**Requirements:** R5 + +**Files:** `scripts/run-operator-proof.sh`, `scripts/tests/run-operator-proof-smoke.sh` (if present) + +**Approach:** When `TARGET` empty, `mapfile` enabled names from config and run handoff once then scrape+prove per target; print per-target summary at end. + +**Test scenarios:** +- Happy path: smoke with fake scripts verifies multi-target loop +- Edge: single `--target` still runs one target only + +**Verification:** operator-proof smoke or documents smoke + manual grep. + +## Scope Boundaries + +### Deferred to Follow-Up Work + +- Structured JSON run logs for machine parsing +- Changing `prove-incremental-append.sh` to require optional `--target` + +### Out of scope + +- Discord API or merge algorithm changes +- New CLI flags beyond existing `--target` narrowing diff --git a/scripts/lib/scrape-run-plan.sh b/scripts/lib/scrape-run-plan.sh new file mode 100644 index 00000000..7f3cc732 --- /dev/null +++ b/scripts/lib/scrape-run-plan.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +# Host-side helpers: list selected/enabled targets before container runs. + +print_scrape_config_plan() { + local config_path=$1 + local action_label=$2 + shift 2 + local -a requested_targets=("$@") + local target_names_json line name output_dir enabled_count + + [[ -f "$config_path" ]] || return 0 + + printf '%s\n' "=== $action_label run plan ===" + printf 'Config: %s\n' "$config_path" + + if (( ${#requested_targets[@]} > 0 )); then + printf 'Targets (%s selected):\n' "${#requested_targets[@]}" + for name in "${requested_targets[@]}"; do + output_dir=$(jq -r --arg name "$name" '.targets[] | select(.name == $name) | .output_dir' "$config_path") + printf ' - %s → %s\n' "$name" "$output_dir" + done + return 0 + fi + + enabled_count=$(jq -r '[.targets[] | select(.enabled != false)] | length' "$config_path") + printf 'Targets (%s enabled, all will run):\n' "$enabled_count" + while IFS=$'\t' read -r name output_dir; do + [[ -n "$name" ]] || continue + printf ' - %s → %s\n' "$name" "$output_dir" + done < <(jq -r '.targets[] | select(.enabled != false) | [.name, .output_dir] | @tsv' "$config_path") +} + +enabled_target_names() { + local config_path=$1 + jq -r '.targets[] | select(.enabled != false) | .name' "$config_path" +} diff --git a/scripts/operator-handoff.sh b/scripts/operator-handoff.sh index 40648737..b6598b35 100755 --- a/scripts/operator-handoff.sh +++ b/scripts/operator-handoff.sh @@ -4,6 +4,8 @@ set -Eeuo pipefail SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" +# shellcheck source=lib/scrape-run-plan.sh +source "$SCRIPT_DIR/lib/scrape-run-plan.sh" CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}" VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh" DOCUMENTS_SCRAPE="$REPO_ROOT/scripts/run-documents-scrape.sh" @@ -76,6 +78,8 @@ main() { printf 'Operator handoff\n' printf '================\n' printf 'config: %s\n\n' "$CONFIG_PATH" + print_scrape_config_plan "$CONFIG_PATH" "Operator handoff" + printf '\n' if (( SKIP_DF == 0 )); then print_disk_summary diff --git a/scripts/run-discord-scrape-host.sh b/scripts/run-discord-scrape-host.sh index e312c7f8..a663dc76 100755 --- a/scripts/run-discord-scrape-host.sh +++ b/scripts/run-discord-scrape-host.sh @@ -4,6 +4,8 @@ set -Eeuo pipefail SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" +# shellcheck source=lib/scrape-run-plan.sh +source "$SCRIPT_DIR/lib/scrape-run-plan.sh" COMPOSE_FILE="${DCE_COMPOSE_FILE:-$REPO_ROOT/docker-compose.yml}" ENV_FILE="${DCE_ENV_FILE:-$REPO_ROOT/scrape.env}" DOCKER_BIN="${DCE_DOCKER_BIN:-docker}" @@ -445,6 +447,18 @@ main() { REAUTH_COMMAND="${DCE_REAUTH_COMMAND:-}" run_disk_preflight_if_enabled "${passthrough_args[@]}" + local host_config host_targets=() arg_idx=0 + host_config=$(resolve_host_config_path "${passthrough_args[@]}") + while (( arg_idx < ${#passthrough_args[@]} )); do + if [[ "${passthrough_args[arg_idx]}" == "--target" ]]; then + host_targets+=("${passthrough_args[arg_idx + 1]:-}") + arg_idx=$((arg_idx + 2)) + continue + fi + arg_idx=$((arg_idx + 1)) + done + print_scrape_config_plan "$host_config" "Host $subcommand" "${host_targets[@]}" + case "$subcommand" in preflight|scrape) run_subcommand_with_retry "$subcommand" "${passthrough_args[@]}" diff --git a/scripts/run-discord-scrape.sh b/scripts/run-discord-scrape.sh index a7a96481..abd1067e 100755 --- a/scripts/run-discord-scrape.sh +++ b/scripts/run-discord-scrape.sh @@ -46,6 +46,163 @@ log() { printf '[%s] %s\n' "$(timestamp)" "$*" >&2 } +SCRAPE_SUMMARY_ENTRIES=() + +reset_scrape_summary() { + SCRAPE_SUMMARY_ENTRIES=() +} + +record_channel_result() { + local target_name=$1 channel_id=$2 guild_label=$3 file_path=$4 action=$5 + local before_count=$6 fetched_count=$7 after_count=$8 + SCRAPE_SUMMARY_ENTRIES+=( + "$target_name"$'\t'"$channel_id"$'\t'"$guild_label"$'\t'"$file_path"$'\t'"$action"$'\t'"$before_count"$'\t'"$fetched_count"$'\t'"$after_count" + ) +} + +guild_name_for_id() { + local guild_id=$1 + local cached_id cached_name + + while IFS=$'\t' read -r cached_id cached_name; do + [[ "$cached_id" == "$guild_id" ]] || continue + printf '%s\n' "$cached_name" + return 0 + done < <(load_guild_cache) + + printf '%s\n' "$guild_id" +} + +guild_label_from_export() { + local export_path=$1 + local guild_id guild_name + + [[ -n "$export_path" && -f "$export_path" ]] || { + printf 'unknown guild\n' + return 0 + } + + guild_id=$(jq -r '.guild.id // empty' "$export_path") + guild_name=$(jq -r '.guild.name // empty' "$export_path") + if [[ -n "$guild_id" && -n "$guild_name" ]]; then + printf '%s [%s]\n' "$guild_name" "$guild_id" + return 0 + fi + + if [[ -n "$guild_id" ]]; then + printf '%s [%s]\n' "$(guild_name_for_id "$guild_id")" "$guild_id" + return 0 + fi + + printf 'unknown guild\n' +} + +describe_target_resolution() { + local target_json=$1 + local kind target_name output_dir + local -a configured_channel_ids configured_guild_ids seeded_channel_ids guild_labels + local guild_id guild_name channel_count + + target_name=$(jq -r '.name' <<<"$target_json") + output_dir=$(jq -r '.output_dir' <<<"$target_json") + kind=$(jq -r '.kind // "guild"' <<<"$target_json") + + if [[ "$kind" == "dms" ]]; then + channel_count=$(load_dm_channel_cache | wc -l | tr -d ' ') + printf 'DM target (%s channel(s))' "${channel_count:-0}" + return 0 + fi + + mapfile -t configured_channel_ids < <(jq -r '.channel_ids[]? | tostring' <<<"$target_json") + if (( ${#configured_channel_ids[@]} > 0 )); then + printf '%s explicit channel id(s)' "${#configured_channel_ids[@]}" + return 0 + fi + + mapfile -t seeded_channel_ids < <(load_archive_seed_channel_ids "$output_dir" | sort -u) + if (( ${#seeded_channel_ids[@]} > 0 )); then + printf 'archive-seeded (%s channel file(s))' "${#seeded_channel_ids[@]}" + return 0 + fi + + mapfile -t configured_guild_ids < <(resolve_configured_guilds "$target_json") + if (( ${#configured_guild_ids[@]} == 0 )); then + printf 'no guild/channel resolution (check config or token)' + return 0 + fi + + for guild_id in "${configured_guild_ids[@]}"; do + guild_name=$(guild_name_for_id "$guild_id") + guild_labels+=("$guild_name [$guild_id]") + done + + (IFS='; '; printf '%s' "${guild_labels[*]}") +} + +log_run_plan() { + local mode=$1 config_path=$2 + shift 2 + local -a selected_targets=("$@") + local target_json target_name output_dir resolution + + log '=== Scrape run plan ===' + log "Config: $config_path" + log "Mode: $mode" + + for target_json in "${selected_targets[@]}"; do + target_name=$(jq -r '.name' <<<"$target_json") + output_dir=$(jq -r '.output_dir' <<<"$target_json") + resolution=$(describe_target_resolution "$target_json") + log " Target '$target_name' → $output_dir" + log " Server scope: $resolution" + done +} + +print_scrape_summary() { + local entry target_name channel_id guild_label file_path action + local before_count fetched_count after_count delta appended=0 + local created=0 merged=0 unchanged=0 skipped=0 + + log '=== Scrape run summary ===' + + if (( ${#SCRAPE_SUMMARY_ENTRIES[@]} == 0 )); then + log ' No channel activity recorded.' + return 0 + fi + + for entry in "${SCRAPE_SUMMARY_ENTRIES[@]}"; do + IFS=$'\t' read -r target_name channel_id guild_label file_path action before_count fetched_count after_count <<<"$entry" + + case "$action" in + CREATED) + created=$((created + 1)) + delta=$((after_count - before_count)) + appended=$((appended + delta)) + log " CREATED $file_path +$delta messages (0 → $after_count) channel $channel_id $guild_label" + ;; + MERGED) + merged=$((merged + 1)) + delta=$((after_count - before_count)) + appended=$((appended + delta)) + log " MERGED $file_path +$delta messages ($before_count → $after_count, fetched $fetched_count) channel $channel_id $guild_label" + ;; + UNCHANGED) + unchanged=$((unchanged + 1)) + log " UNCHANGED $file_path $after_count messages channel $channel_id $guild_label" + ;; + SKIPPED) + skipped=$((skipped + 1)) + log " SKIPPED channel $channel_id $guild_label (inaccessible or non-fatal export error)" + ;; + *) + log " $action $file_path channel $channel_id $guild_label" + ;; + esac + done + + log "Totals: $created created, $merged merged, $unchanged unchanged, $skipped skipped; +$appended messages appended" +} + die() { log "ERROR: $*" exit 1 @@ -761,7 +918,7 @@ scrape_target() { local target_json=$1 local defaults_json=$2 local target_name output_dir destination_path after_id temp_dir temp_export temp_merged - local latest_batch_count + local latest_batch_count guild_label before_count after_count local -a channel_ids local export_status=0 @@ -776,11 +933,14 @@ scrape_target() { fi log "Target '$target_name': processing ${#channel_ids[@]} channel(s) into $output_dir." + log " Server scope: $(describe_target_resolution "$target_json")" local channel_id local skipped_channels=0 for channel_id in "${channel_ids[@]}"; do destination_path=$(resolve_destination_path "$output_dir" "$channel_id") + before_count=0 + guild_label="unknown guild" if [[ -n "$destination_path" ]]; then mkdir -p "$(dirname "$destination_path")" fi @@ -788,6 +948,8 @@ scrape_target() { if [[ -n "$destination_path" && -f "$destination_path" ]]; then jq empty "$destination_path" >/dev/null 2>&1 || die "Existing export is not valid JSON: $destination_path" assert_export_channel_identity "$destination_path" "$channel_id" + before_count=$(message_count "$destination_path") + guild_label=$(guild_label_from_export "$destination_path") fi after_id=$(last_message_id "$destination_path") @@ -796,7 +958,7 @@ scrape_target() { temp_export="$temp_dir/export.json" temp_merged="$temp_dir/merged.json" - log "Exporting channel $channel_id for target '$target_name'${after_id:+ after message $after_id}." + log "Exporting channel $channel_id for target '$target_name'${after_id:+ after message $after_id}${destination_path:+ → $destination_path}." export_status=0 export_channel_incremental "$channel_id" "$temp_export" "$after_id" || export_status=$? @@ -805,6 +967,7 @@ scrape_target() { 2) rm -rf "$temp_dir" skipped_channels=$((skipped_channels + 1)) + record_channel_result "$target_name" "$channel_id" "$guild_label" "${destination_path:-n/a}" SKIPPED "$before_count" 0 "$before_count" continue ;; *) @@ -815,6 +978,7 @@ scrape_target() { jq empty "$temp_export" >/dev/null 2>&1 || die "Incremental export is not valid JSON: $temp_export" assert_export_channel_identity "$temp_export" "$channel_id" + guild_label=$(guild_label_from_export "$temp_export") if [[ -z "$destination_path" ]]; then destination_path=$(resolve_destination_path "$output_dir" "$channel_id" "$temp_export") @@ -824,11 +988,16 @@ scrape_target() { latest_batch_count=$(message_count "$temp_export") if [[ ! -f "$destination_path" ]]; then mv "$temp_export" "$destination_path" + after_count=$(message_count "$destination_path") + record_channel_result "$target_name" "$channel_id" "$guild_label" "$destination_path" CREATED 0 "$after_count" "$after_count" + log " CREATED $destination_path (+$after_count messages, new archive)" rm -rf "$temp_dir" continue fi if (( latest_batch_count == 0 )); then + record_channel_result "$target_name" "$channel_id" "$guild_label" "$destination_path" UNCHANGED "$before_count" 0 "$before_count" + log " UNCHANGED $destination_path ($before_count messages, no new export data)" rm -rf "$temp_dir" continue fi @@ -838,6 +1007,9 @@ scrape_target() { jq empty "$temp_merged" >/dev/null 2>&1 || die "Merged export is not valid JSON: $temp_merged" assert_export_channel_identity "$temp_merged" "$channel_id" commit_merged_export "$destination_path" "$temp_merged" + after_count=$(message_count "$destination_path") + record_channel_result "$target_name" "$channel_id" "$guild_label" "$destination_path" MERGED "$before_count" "$latest_batch_count" "$after_count" + log " MERGED $destination_path (+$((after_count - before_count)) messages, $before_count → $after_count, fetched $latest_batch_count)" rm -rf "$temp_dir" done @@ -945,6 +1117,9 @@ run_target_mode() { CACHE_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/dce-scrape.XXXXXX") trap 'rm -rf "$CACHE_ROOT"' EXIT + reset_scrape_summary + log_run_plan "$mode" "$config_path" "${selected_targets[@]}" + local target_json for target_json in "${selected_targets[@]}"; do if [[ "$mode" == "preflight" ]]; then @@ -953,6 +1128,10 @@ run_target_mode() { scrape_target "$target_json" "$defaults_json" fi done + + if [[ "$mode" == "scrape" ]]; then + print_scrape_summary + fi } main() { diff --git a/scripts/run-documents-scrape.sh b/scripts/run-documents-scrape.sh index f10ab4af..7717976e 100755 --- a/scripts/run-documents-scrape.sh +++ b/scripts/run-documents-scrape.sh @@ -11,6 +11,8 @@ DISCOVER_TOKEN="$REPO_ROOT/scripts/discover-discord-token.sh" VERIFY_SCRIPT="$REPO_ROOT/scripts/verify-documents-archives.sh" VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh" SETUP_AUTH="$REPO_ROOT/scripts/setup-scrape-auth.sh" +# shellcheck source=lib/scrape-run-plan.sh +source "$SCRIPT_DIR/lib/scrape-run-plan.sh" usage() { cat <.log EOF } @@ -63,13 +67,30 @@ main() { esac done + [[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH" + + local -a targets=() + if [[ -n "$TARGET" ]]; then + targets=("$TARGET") + else + mapfile -t targets < <(enabled_target_names "$CONFIG_PATH") + ((${#targets[@]} > 0)) || die "No enabled targets in $CONFIG_PATH" + fi + mkdir -p "$LOG_DIR" local log_file log_file="$LOG_DIR/operator-proof-$(date -u +%Y%m%dT%H%M%SZ).log" + local failed=0 succeeded=0 name + { - printf 'Operator proof for target %s\n' "$TARGET" + if [[ -n "$TARGET" ]]; then + printf 'Operator proof for target %s\n' "$TARGET" + else + printf 'Operator proof for %s enabled target(s)\n' "${#targets[@]}" + fi printf 'config: %s\n' "$CONFIG_PATH" + print_scrape_config_plan "$CONFIG_PATH" "Operator proof" "${targets[@]}" printf 'started: %s\n\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" if (( SYNC_GUI_FLAG == 1 )); then @@ -77,17 +98,27 @@ main() { "$SYNC_GUI" --force fi + "$HANDOFF" --config "$CONFIG_PATH" + if (( DRY_RUN == 1 )); then - "$HANDOFF" --config "$CONFIG_PATH" printf '\nDry run complete (no Discord scrape).\n' exit 0 fi - "$HANDOFF" --config "$CONFIG_PATH" - "$DOCUMENTS" --config "$CONFIG_PATH" --target "$TARGET" - "$PROVE" --config "$CONFIG_PATH" --target "$TARGET" + for name in "${targets[@]}"; do + printf '\n--- Target: %s ---\n' "$name" + if "$DOCUMENTS" --config "$CONFIG_PATH" --target "$name" && "$PROVE" --config "$CONFIG_PATH" --target "$name"; then + succeeded=$((succeeded + 1)) + printf 'Operator proof passed for %s\n' "$name" + else + failed=$((failed + 1)) + printf 'Operator proof FAILED for %s\n' "$name" >&2 + fi + done - printf '\nOperator proof succeeded for %s\n' "$TARGET" + printf '\nOperator proof summary: %s succeeded, %s failed (of %s)\n' \ + "$succeeded" "$failed" "${#targets[@]}" + (( failed == 0 )) || exit 1 } 2>&1 | tee "$log_file" printf 'Log: %s\n' "$log_file" diff --git a/scripts/tests/documents-scrape-smoke.sh b/scripts/tests/documents-scrape-smoke.sh index 8f8fd340..edd0b4d8 100755 --- a/scripts/tests/documents-scrape-smoke.sh +++ b/scripts/tests/documents-scrape-smoke.sh @@ -11,8 +11,9 @@ cleanup() { trap cleanup EXIT FAKE_REPO="$TMP_DIR/fake-repo" -mkdir -p "$FAKE_REPO/scripts" +mkdir -p "$FAKE_REPO/scripts/lib" cp "$REPO_ROOT/scripts/run-discord-scrape-host.sh" "$FAKE_REPO/scripts/" +cp "$REPO_ROOT/scripts/lib/scrape-run-plan.sh" "$FAKE_REPO/scripts/lib/" chmod +x "$FAKE_REPO/scripts/run-discord-scrape-host.sh" COMPOSE_FILE="$TMP_DIR/docker-compose.yml" @@ -73,7 +74,12 @@ DCE_REPO_ROOT="$REPO_ROOT" \ DISCORD_TOKEN=dummy \ "$PROVE" --config "$TMP_DIR/config.json" --target demo >/dev/null -"$REPO_ROOT/scripts/run-documents-scrape.sh" --dry-run --config "$TMP_DIR/config.json" >/dev/null +DOC_OUT="$TMP_DIR/documents-dry-run.log" +"$REPO_ROOT/scripts/run-documents-scrape.sh" --dry-run --config "$TMP_DIR/config.json" >"$DOC_OUT" 2>&1 +grep -q 'Documents scrape run plan' "$DOC_OUT" || { + echo "expected Documents scrape run plan in dry-run output" >&2 + exit 1 +} DCE_MIN_FREE_MB=0 DCE_CONFIG_FILE="$TMP_DIR/config.json" \ "$REPO_ROOT/scripts/verify-operator-ready.sh" --disk-only --config "$TMP_DIR/config.json" \ diff --git a/scripts/tests/run-discord-scrape-smoke.sh b/scripts/tests/run-discord-scrape-smoke.sh index 2ba560a5..f9a23dc4 100755 --- a/scripts/tests/run-discord-scrape-smoke.sh +++ b/scripts/tests/run-discord-scrape-smoke.sh @@ -210,7 +210,20 @@ run_wrapper() { "$REPO_ROOT/scripts/run-discord-scrape.sh" scrape --target "$target_name" } -run_wrapper demo initial +SCRAPE_LOG="$TMP_DIR/scrape.log" +run_wrapper demo initial 2>"$SCRAPE_LOG" +grep -q 'Scrape run plan' "$SCRAPE_LOG" || { + echo "expected Scrape run plan in scrape output" >&2 + exit 1 +} +grep -q 'Scrape run summary' "$SCRAPE_LOG" || { + echo "expected Scrape run summary in scrape output" >&2 + exit 1 +} +grep -qE 'CREATED|MERGED|UNCHANGED' "$SCRAPE_LOG" || { + echo "expected channel result line in scrape output" >&2 + exit 1 +} DEST="$ARCHIVE_ROOT/demo/$DEFAULT_FILE_NAME" [[ -f "$DEST" ]] || { echo "expected destination archive missing" >&2; exit 1; } diff --git a/scripts/tests/run-operator-proof-smoke.sh b/scripts/tests/run-operator-proof-smoke.sh index be431954..58f0774e 100755 --- a/scripts/tests/run-operator-proof-smoke.sh +++ b/scripts/tests/run-operator-proof-smoke.sh @@ -49,5 +49,9 @@ if [[ "$status" -ne 0 ]] || ! grep -q 'Dry run complete' <<<"$output"; then printf '%s\n' "$output" >&2 exit 1 fi +grep -q 'Operator proof run plan' <<<"$output" || { + echo "expected Operator proof run plan in dry-run output" >&2 + exit 1 +} printf 'run-operator-proof-smoke: ok\n'