mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-10 00:02:37 -06:00
fix(scrape): address residual review findings R1–R3
Use padded sort for last_message_id cursor, surface guild channel discovery errors, and document DCE_ARCHIVE_ROOT in compose.
This commit is contained in:
parent
c713ee5e64
commit
25e1a7e600
|
|
@ -17,5 +17,6 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./config:/config:ro,z
|
- ./config:/config:ro,z
|
||||||
- ./scripts/run-discord-scrape.sh:/opt/dce-scheduler/run-discord-scrape.sh:ro,z
|
- ./scripts/run-discord-scrape.sh:/opt/dce-scheduler/run-discord-scrape.sh:ro,z
|
||||||
|
# Host path must match archive_root in config/scrape-targets.json (override on other machines).
|
||||||
- ${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:z
|
- ${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:${DCE_ARCHIVE_ROOT:-/home/brunner56/Documents}:z
|
||||||
command: ["help"]
|
command: ["help"]
|
||||||
|
|
|
||||||
47
docs/plans/2026-05-29-012-fix-scrape-residual-review-plan.md
Normal file
47
docs/plans/2026-05-29-012-fix-scrape-residual-review-plan.md
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
---
|
||||||
|
title: fix: Close recurring scrape residual review findings
|
||||||
|
type: fix
|
||||||
|
status: completed
|
||||||
|
date: 2026-05-29
|
||||||
|
origin: LFG — residual review findings on feat/recurring-cli-scrape (R1–R3)
|
||||||
|
---
|
||||||
|
|
||||||
|
# fix: Close recurring scrape residual review findings
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Address manual review residuals from plan 011: correct incremental cursor selection, improve guild channel discovery errors, and document portable archive mount configuration.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| ID | Requirement |
|
||||||
|
|----|-------------|
|
||||||
|
| R1 | `last_message_id` picks highest snowflake reliably across mixed digit lengths |
|
||||||
|
| R2 | `load_guild_channel_cache` surfaces CLI failure output like `load_guild_cache` |
|
||||||
|
| R3 | `docker-compose.yml` documents required `DCE_ARCHIVE_ROOT` override |
|
||||||
|
| R4 | Existing smoke tests pass after changes |
|
||||||
|
|
||||||
|
## Implementation Units
|
||||||
|
|
||||||
|
### U1. Fix message cursor (`last_message_id`)
|
||||||
|
|
||||||
|
**Files:** `scripts/run-discord-scrape.sh`
|
||||||
|
|
||||||
|
**Approach:** Replace `max_by(.id)` with `sort_by(.id) | last | .id` for lexicographic ordering on zero-padded-equal-length snowflakes; Discord IDs in one channel are typically same length — sort_by is safer than max_by for strings.
|
||||||
|
|
||||||
|
### U2. Guild channel cache diagnostics
|
||||||
|
|
||||||
|
**Files:** `scripts/run-discord-scrape.sh`
|
||||||
|
|
||||||
|
**Approach:** Capture `channels` CLI stderr/stdout; `die` with context on failure.
|
||||||
|
|
||||||
|
### U3. Compose portability note
|
||||||
|
|
||||||
|
**Files:** `docker-compose.yml`
|
||||||
|
|
||||||
|
**Approach:** Comment above `DCE_ARCHIVE_ROOT` volume line.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
- `scripts/tests/run-discord-scrape-smoke.sh`
|
||||||
|
- `bash -n scripts/run-discord-scrape.sh`
|
||||||
|
|
@ -374,7 +374,16 @@ last_message_id() {
|
||||||
[[ -f "$export_path" ]] || return 0
|
[[ -f "$export_path" ]] || return 0
|
||||||
jq -r '
|
jq -r '
|
||||||
(.messages // [])
|
(.messages // [])
|
||||||
| if length == 0 then empty else (max_by(.id) | .id) end
|
| if length == 0 then empty else (
|
||||||
|
sort_by(
|
||||||
|
.id as $id
|
||||||
|
| ($id | tostring) as $s
|
||||||
|
| (22 - ($s | length)) as $pad
|
||||||
|
| if $pad > 0 then ("0" * $pad) + $s else $s end
|
||||||
|
)
|
||||||
|
| last
|
||||||
|
| .id
|
||||||
|
) end
|
||||||
' "$export_path"
|
' "$export_path"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -508,11 +517,15 @@ load_guild_channel_cache() {
|
||||||
local cache_file="$CACHE_ROOT/channels_${guild_id}_${include_voice}_${include_threads}.txt"
|
local cache_file="$CACHE_ROOT/channels_${guild_id}_${include_voice}_${include_threads}.txt"
|
||||||
|
|
||||||
if [[ ! -f "$cache_file" ]]; then
|
if [[ ! -f "$cache_file" ]]; then
|
||||||
"$CLI_BIN" channels \
|
local output
|
||||||
|
if ! output=$("$CLI_BIN" channels \
|
||||||
--guild "$guild_id" \
|
--guild "$guild_id" \
|
||||||
--include-vc "$include_voice" \
|
--include-vc "$include_voice" \
|
||||||
--include-threads "$include_threads" \
|
--include-threads "$include_threads" 2>&1); then
|
||||||
| parse_channel_listing >"$cache_file"
|
die "Channel discovery failed for guild $guild_id. CLI output: $output"
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf '%s\n' "$output" | parse_channel_listing >"$cache_file"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cat "$cache_file"
|
cat "$cache_file"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue