mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
fix(scrape): address residual review findings R1–R3
Use padded sort for last_message_id cursor, surface guild channel discovery errors, and document DCE_ARCHIVE_ROOT in compose.
This commit is contained in:
parent
c713ee5e64
commit
25e1a7e600
|
|
@ -17,5 +17,6 @@ services:
|
|||
volumes:
|
||||
- ./config:/config:ro,z
|
||||
- ./scripts/run-discord-scrape.sh:/opt/dce-scheduler/run-discord-scrape.sh:ro,z
|
||||
# Host path must match archive_root in config/scrape-targets.json (override on other machines).
|
||||
- ${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:z
|
||||
command: ["help"]
|
||||
|
|
|
|||
47
docs/plans/2026-05-29-012-fix-scrape-residual-review-plan.md
Normal file
47
docs/plans/2026-05-29-012-fix-scrape-residual-review-plan.md
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
---
|
||||
title: fix: Close recurring scrape residual review findings
|
||||
type: fix
|
||||
status: completed
|
||||
date: 2026-05-29
|
||||
origin: LFG — residual review findings on feat/recurring-cli-scrape (R1–R3)
|
||||
---
|
||||
|
||||
# fix: Close recurring scrape residual review findings
|
||||
|
||||
## Summary
|
||||
|
||||
Address manual review residuals from plan 011: correct incremental cursor selection, improve guild channel discovery errors, and document portable archive mount configuration.
|
||||
|
||||
## Requirements
|
||||
|
||||
| ID | Requirement |
|
||||
|----|-------------|
|
||||
| R1 | `last_message_id` picks highest snowflake reliably across mixed digit lengths |
|
||||
| R2 | `load_guild_channel_cache` surfaces CLI failure output like `load_guild_cache` |
|
||||
| R3 | `docker-compose.yml` documents required `DCE_ARCHIVE_ROOT` override |
|
||||
| R4 | Existing smoke tests pass after changes |
|
||||
|
||||
## Implementation Units
|
||||
|
||||
### U1. Fix message cursor (`last_message_id`)
|
||||
|
||||
**Files:** `scripts/run-discord-scrape.sh`
|
||||
|
||||
**Approach:** Replace `max_by(.id)` with `sort_by(.id) | last | .id` for lexicographic ordering on zero-padded-equal-length snowflakes; Discord IDs in one channel are typically same length — sort_by is safer than max_by for strings.
|
||||
|
||||
### U2. Guild channel cache diagnostics
|
||||
|
||||
**Files:** `scripts/run-discord-scrape.sh`
|
||||
|
||||
**Approach:** Capture `channels` CLI stderr/stdout; `die` with context on failure.
|
||||
|
||||
### U3. Compose portability note
|
||||
|
||||
**Files:** `docker-compose.yml`
|
||||
|
||||
**Approach:** Comment above `DCE_ARCHIVE_ROOT` volume line.
|
||||
|
||||
## Verification
|
||||
|
||||
- `scripts/tests/run-discord-scrape-smoke.sh`
|
||||
- `bash -n scripts/run-discord-scrape.sh`
|
||||
|
|
@ -374,7 +374,16 @@ last_message_id() {
|
|||
[[ -f "$export_path" ]] || return 0
|
||||
jq -r '
|
||||
(.messages // [])
|
||||
| if length == 0 then empty else (max_by(.id) | .id) end
|
||||
| if length == 0 then empty else (
|
||||
sort_by(
|
||||
.id as $id
|
||||
| ($id | tostring) as $s
|
||||
| (22 - ($s | length)) as $pad
|
||||
| if $pad > 0 then ("0" * $pad) + $s else $s end
|
||||
)
|
||||
| last
|
||||
| .id
|
||||
) end
|
||||
' "$export_path"
|
||||
}
|
||||
|
||||
|
|
@ -508,11 +517,15 @@ load_guild_channel_cache() {
|
|||
local cache_file="$CACHE_ROOT/channels_${guild_id}_${include_voice}_${include_threads}.txt"
|
||||
|
||||
if [[ ! -f "$cache_file" ]]; then
|
||||
"$CLI_BIN" channels \
|
||||
local output
|
||||
if ! output=$("$CLI_BIN" channels \
|
||||
--guild "$guild_id" \
|
||||
--include-vc "$include_voice" \
|
||||
--include-threads "$include_threads" \
|
||||
| parse_channel_listing >"$cache_file"
|
||||
--include-threads "$include_threads" 2>&1); then
|
||||
die "Channel discovery failed for guild $guild_id. CLI output: $output"
|
||||
fi
|
||||
|
||||
printf '%s\n' "$output" | parse_channel_listing >"$cache_file"
|
||||
fi
|
||||
|
||||
cat "$cache_file"
|
||||
|
|
|
|||
Loading…
Reference in a new issue