mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
feat(scrape): optional JSON run summary for automation
Emit DCE_JSON_SUMMARY log line and/or write DCE_RUN_SUMMARY_FILE with per-channel actions and totals after scrape completes.
This commit is contained in:
parent
aa85fe50fa
commit
1dda40ae1b
|
|
@ -0,0 +1,44 @@
|
|||
---
|
||||
title: "feat: Optional JSON scrape run summary"
|
||||
type: feat
|
||||
status: complete
|
||||
date: 2026-06-04
|
||||
origin: /lfg — plan 038 deferred structured JSON run logs for operator validation and automation
|
||||
---
|
||||
|
||||
# feat: Optional JSON scrape run summary
|
||||
|
||||
## Summary
|
||||
|
||||
When `DCE_RUN_SUMMARY_JSON=1` and/or `DCE_RUN_SUMMARY_FILE` is set, emit a machine-readable scrape summary alongside the existing human log summary in `run-discord-scrape.sh`.
|
||||
|
||||
## Requirements
|
||||
|
||||
| ID | Requirement |
|
||||
|----|-------------|
|
||||
| R1 | JSON includes version, finished_at, totals, and per-channel entries matching the text summary |
|
||||
| R2 | `DCE_RUN_SUMMARY_JSON=1` logs one `DCE_JSON_SUMMARY:` line (compact JSON) |
|
||||
| R3 | `DCE_RUN_SUMMARY_FILE` writes pretty-printed JSON when parent dir exists |
|
||||
| R4 | `scrape.env.example` documents both env vars |
|
||||
| R5 | `run-discord-scrape-smoke.sh` asserts valid JSON file with merged channel |
|
||||
| R6 | `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` → 21/21 |
|
||||
|
||||
## Implementation Units
|
||||
|
||||
### U1. JSON writer in run-discord-scrape.sh
|
||||
|
||||
**Files:** `scripts/run-discord-scrape.sh`, `scripts/tests/run-discord-scrape-smoke.sh`, `scrape.env.example`
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
./scripts/tests/run-discord-scrape-smoke.sh
|
||||
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
|
||||
```
|
||||
|
||||
## Scope Boundaries
|
||||
|
||||
### Deferred
|
||||
|
||||
- Live KotOR catch-up on host
|
||||
- Host compose passthrough of summary file path (operators can grep `DCE_JSON_SUMMARY`)
|
||||
|
|
@ -164,6 +164,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh \
|
|||
|
||||
**Plan 068 (2026-06-04):** `verify-documents-archives` MEM column and `verify-operator-ready` target memory hints when global cap unset.
|
||||
|
||||
**Plan 069 (2026-06-04):** Optional JSON scrape run summary via `DCE_RUN_SUMMARY_JSON` / `DCE_RUN_SUMMARY_FILE`.
|
||||
|
||||
**Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom.
|
||||
|
||||
## CI note (fork PRs)
|
||||
|
|
|
|||
|
|
@ -25,3 +25,7 @@ DCE_USERNS_MODE=
|
|||
# Per-target: set container_memory on a target in config/scrape-targets.json (single --target runs).
|
||||
# Global override (wins over config): uncomment below.
|
||||
# DCE_CONTAINER_MEMORY=8g
|
||||
|
||||
# Optional: machine-readable scrape summary (run-discord-scrape.sh).
|
||||
# DCE_RUN_SUMMARY_JSON=1
|
||||
# DCE_RUN_SUMMARY_FILE=/path/to/scrape-summary.json
|
||||
|
|
|
|||
|
|
@ -211,7 +211,79 @@ print_scrape_summary() {
|
|||
|
||||
log "Totals: $created created, $merged merged, $unchanged unchanged, $skipped skipped; +$appended messages appended"
|
||||
if (( skipped_oom > 0 )); then
|
||||
log "Hint: for OOM/aborted channels, set DCE_CONTAINER_MEMORY=8g in scrape.env, run --salvage-before-scrape, then retry with --channel."
|
||||
log "Hint: for OOM/aborted channels, raise container memory (target container_memory in config or DCE_CONTAINER_MEMORY in scrape.env), run --salvage-before-scrape, then retry with --channel."
|
||||
fi
|
||||
write_scrape_summary_json "$created" "$merged" "$unchanged" "$skipped" "$skipped_oom" "$appended"
|
||||
}
|
||||
|
||||
write_scrape_summary_json() {
|
||||
local created=$1 merged=$2 unchanged=$3 skipped=$4 skipped_oom=$5 appended=$6
|
||||
local entry target_name channel_id guild_label file_path action
|
||||
local before_count fetched_count after_count delta channels_json='[]' summary_json
|
||||
|
||||
[[ "${DCE_RUN_SUMMARY_JSON:-0}" == "1" || -n "${DCE_RUN_SUMMARY_FILE:-}" ]] || return 0
|
||||
|
||||
for entry in "${SCRAPE_SUMMARY_ENTRIES[@]}"; do
|
||||
IFS=$'\t' read -r target_name channel_id guild_label file_path action before_count fetched_count after_count <<<"$entry"
|
||||
delta=$((after_count - before_count))
|
||||
channels_json=$(
|
||||
jq -cn \
|
||||
--argjson arr "$channels_json" \
|
||||
--arg target "$target_name" \
|
||||
--arg channel_id "$channel_id" \
|
||||
--arg guild_label "$guild_label" \
|
||||
--arg file_path "$file_path" \
|
||||
--arg action "$action" \
|
||||
--argjson before_count "$before_count" \
|
||||
--argjson fetched_count "$fetched_count" \
|
||||
--argjson after_count "$after_count" \
|
||||
--argjson delta "$delta" \
|
||||
'$arr + [{
|
||||
target: $target,
|
||||
channel_id: $channel_id,
|
||||
guild_label: $guild_label,
|
||||
file_path: $file_path,
|
||||
action: $action,
|
||||
before_count: $before_count,
|
||||
fetched_count: $fetched_count,
|
||||
after_count: $after_count,
|
||||
delta: $delta
|
||||
}]'
|
||||
)
|
||||
done
|
||||
|
||||
summary_json=$(
|
||||
jq -cn \
|
||||
--arg finished_at "$(timestamp)" \
|
||||
--argjson channels "$channels_json" \
|
||||
--argjson created "$created" \
|
||||
--argjson merged "$merged" \
|
||||
--argjson unchanged "$unchanged" \
|
||||
--argjson skipped "$skipped" \
|
||||
--argjson skipped_oom "$skipped_oom" \
|
||||
--argjson messages_appended "$appended" \
|
||||
'{
|
||||
version: 1,
|
||||
finished_at: $finished_at,
|
||||
totals: {
|
||||
created: $created,
|
||||
merged: $merged,
|
||||
unchanged: $unchanged,
|
||||
skipped: $skipped,
|
||||
skipped_oom: $skipped_oom,
|
||||
messages_appended: $messages_appended
|
||||
},
|
||||
channels: $channels
|
||||
}'
|
||||
)
|
||||
|
||||
if [[ "${DCE_RUN_SUMMARY_JSON:-0}" == "1" ]]; then
|
||||
log "DCE_JSON_SUMMARY: $(jq -c . <<<"$summary_json")"
|
||||
fi
|
||||
if [[ -n "${DCE_RUN_SUMMARY_FILE:-}" ]]; then
|
||||
mkdir -p "$(dirname "$DCE_RUN_SUMMARY_FILE")"
|
||||
jq . <<<"$summary_json" >"$DCE_RUN_SUMMARY_FILE"
|
||||
log "JSON summary written: $DCE_RUN_SUMMARY_FILE"
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -502,5 +502,33 @@ if ( commit_merged_export "$SHRINK_EXISTING" "$SHRINK_MERGED" >/dev/null 2>&1 );
|
|||
fi
|
||||
[[ "$(jq -r '.messages | length' "$SHRINK_EXISTING")" == "2" ]] || { echo "existing archive changed after rejected shrink merge" >&2; exit 1; }
|
||||
|
||||
SUMMARY_JSON="$TMP_DIR/scrape-summary.json"
|
||||
DCE_RUN_SUMMARY_FILE="$SUMMARY_JSON" \
|
||||
DISCORD_TOKEN=dummy \
|
||||
DCE_CLI_BIN="$FAKE_CLI" \
|
||||
DCE_PRIMARY_CONFIG="$CONFIG_PATH" \
|
||||
DCE_FALLBACK_CONFIG="$CONFIG_PATH" \
|
||||
FAKE_DCE_FIXTURE_DIR="$FIXTURE_DIR" \
|
||||
FAKE_DCE_MODE=append \
|
||||
"$REPO_ROOT/scripts/run-discord-scrape.sh" scrape --target idempotent 2>"$TMP_DIR/json-scrape.log"
|
||||
jq -e '.version == 1 and (.channels | length) >= 1 and (.totals.merged + .totals.unchanged) >= 1' "$SUMMARY_JSON" >/dev/null || {
|
||||
echo "expected valid JSON scrape summary file" >&2
|
||||
cat "$SUMMARY_JSON" >&2
|
||||
exit 1
|
||||
}
|
||||
DCE_RUN_SUMMARY_JSON=1 \
|
||||
DCE_RUN_SUMMARY_FILE="$TMP_DIR/scrape-summary-inline.json" \
|
||||
DISCORD_TOKEN=dummy \
|
||||
DCE_CLI_BIN="$FAKE_CLI" \
|
||||
DCE_PRIMARY_CONFIG="$CONFIG_PATH" \
|
||||
DCE_FALLBACK_CONFIG="$CONFIG_PATH" \
|
||||
FAKE_DCE_FIXTURE_DIR="$FIXTURE_DIR" \
|
||||
FAKE_DCE_MODE=append \
|
||||
"$REPO_ROOT/scripts/run-discord-scrape.sh" scrape --target idempotent 2>"$TMP_DIR/json-inline.log"
|
||||
grep -q 'DCE_JSON_SUMMARY:' "$TMP_DIR/json-inline.log" || {
|
||||
echo "expected DCE_JSON_SUMMARY line when DCE_RUN_SUMMARY_JSON=1" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "U1: append-only merge test coverage passed"
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue