diff --git a/docker-compose.yml b/docker-compose.yml index 2b31d8b0..5d573d1a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,5 +17,6 @@ services: volumes: - ./config:/config:ro,z - ./scripts/run-discord-scrape.sh:/opt/dce-scheduler/run-discord-scrape.sh:ro,z + # Host path must match archive_root in config/scrape-targets.json (override on other machines). - ${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:z command: ["help"] diff --git a/docs/plans/2026-05-29-012-fix-scrape-residual-review-plan.md b/docs/plans/2026-05-29-012-fix-scrape-residual-review-plan.md new file mode 100644 index 00000000..f3546e53 --- /dev/null +++ b/docs/plans/2026-05-29-012-fix-scrape-residual-review-plan.md @@ -0,0 +1,47 @@ +--- +title: fix: Close recurring scrape residual review findings +type: fix +status: completed +date: 2026-05-29 +origin: LFG — residual review findings on feat/recurring-cli-scrape (R1–R3) +--- + +# fix: Close recurring scrape residual review findings + +## Summary + +Address manual review residuals from plan 011: correct incremental cursor selection, improve guild channel discovery errors, and document portable archive mount configuration. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `last_message_id` picks highest snowflake reliably across mixed digit lengths | +| R2 | `load_guild_channel_cache` surfaces CLI failure output like `load_guild_cache` | +| R3 | `docker-compose.yml` documents required `DCE_ARCHIVE_ROOT` override | +| R4 | Existing smoke tests pass after changes | + +## Implementation Units + +### U1. Fix message cursor (`last_message_id`) + +**Files:** `scripts/run-discord-scrape.sh` + +**Approach:** Replace `max_by(.id)` with `sort_by(.id) | last | .id` for lexicographic ordering on zero-padded-equal-length snowflakes; Discord IDs in one channel are typically same length — sort_by is safer than max_by for strings. + +### U2. Guild channel cache diagnostics + +**Files:** `scripts/run-discord-scrape.sh` + +**Approach:** Capture `channels` CLI stderr/stdout; `die` with context on failure. + +### U3. Compose portability note + +**Files:** `docker-compose.yml` + +**Approach:** Comment above `DCE_ARCHIVE_ROOT` volume line. + +## Verification + +- `scripts/tests/run-discord-scrape-smoke.sh` +- `bash -n scripts/run-discord-scrape.sh` diff --git a/scripts/run-discord-scrape.sh b/scripts/run-discord-scrape.sh index 8d090a72..9e29c3af 100755 --- a/scripts/run-discord-scrape.sh +++ b/scripts/run-discord-scrape.sh @@ -374,7 +374,16 @@ last_message_id() { [[ -f "$export_path" ]] || return 0 jq -r ' (.messages // []) - | if length == 0 then empty else (max_by(.id) | .id) end + | if length == 0 then empty else ( + sort_by( + .id as $id + | ($id | tostring) as $s + | (22 - ($s | length)) as $pad + | if $pad > 0 then ("0" * $pad) + $s else $s end + ) + | last + | .id + ) end ' "$export_path" } @@ -508,11 +517,15 @@ load_guild_channel_cache() { local cache_file="$CACHE_ROOT/channels_${guild_id}_${include_voice}_${include_threads}.txt" if [[ ! -f "$cache_file" ]]; then - "$CLI_BIN" channels \ + local output + if ! output=$("$CLI_BIN" channels \ --guild "$guild_id" \ --include-vc "$include_voice" \ - --include-threads "$include_threads" \ - | parse_channel_listing >"$cache_file" + --include-threads "$include_threads" 2>&1); then + die "Channel discovery failed for guild $guild_id. CLI output: $output" + fi + + printf '%s\n' "$output" | parse_channel_listing >"$cache_file" fi cat "$cache_file"