test(scrape): lock mixed-length snowflake cursor selection

Add cursor-mixed-length smoke where string max_by would pick the wrong
--after value; padded sort_by in last_message_id already picks the max.
This commit is contained in:
Boden 2026-05-29 16:33:00 -05:00
parent 396832b047
commit 1e35761dbb
4 changed files with 93 additions and 0 deletions

View file

@ -0,0 +1,27 @@
---
title: fix: Mixed-length snowflake cursor smoke
type: fix
status: complete
date: 2026-05-29
origin: /lfg — close residual review P2 on last_message_id; lock padded sort behavior
---
# fix: Mixed-length snowflake cursor smoke
## Summary
`last_message_id` already uses zero-padded `sort_by` (not string `max_by`). Add a smoke fixture where an 18-digit ID lexicographically beats a 19-digit ID but is numerically smaller, and assert `--after` uses the true maximum.
## Requirements
| ID | Requirement |
|----|-------------|
| R1 | Fixture archive with IDs `999999999999999999` and `1000000000000000000` (unordered) |
| R2 | Smoke expects `--after` = `1000000000000000000` |
| R3 | `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh` passes |
| R4 | PR #1538 notes residual P2 addressed |
## Verification
- `./scripts/tests/run-discord-scrape-smoke.sh`
- `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh`

View file

@ -103,6 +103,14 @@ cat >"$CONFIG_PATH" <<JSON
"guild_ids": [], "guild_ids": [],
"guild_name_patterns": [] "guild_name_patterns": []
}, },
{
"name": "cursor-mixed-length",
"kind": "guild",
"output_dir": "$ARCHIVE_ROOT/cursor-mixed-length",
"channel_ids": ["111"],
"guild_ids": [],
"guild_name_patterns": []
},
{ {
"name": "bootstrap-map", "name": "bootstrap-map",
"kind": "guild", "kind": "guild",
@ -173,6 +181,7 @@ case "$subcommand" in
initial) cp "$fixture_dir/append-existing.json" "$output" ;; initial) cp "$fixture_dir/append-existing.json" "$output" ;;
append) cp "$fixture_dir/append-incremental.json" "$output" ;; append) cp "$fixture_dir/append-incremental.json" "$output" ;;
append-after-high-id) cp "$fixture_dir/append-after-high-id.json" "$output" ;; append-after-high-id) cp "$fixture_dir/append-after-high-id.json" "$output" ;;
append-after-mixed-length) cp "$fixture_dir/append-after-mixed-length.json" "$output" ;;
partial-write) cp "$fixture_dir/append-partial-write.json" "$output" ;; partial-write) cp "$fixture_dir/append-partial-write.json" "$output" ;;
concurrent-conflict) cp "$fixture_dir/append-concurrent-conflict.json" "$output" ;; concurrent-conflict) cp "$fixture_dir/append-concurrent-conflict.json" "$output" ;;
wrong-channel) cp "$fixture_dir/wrong-channel.json" "$output" ;; wrong-channel) cp "$fixture_dir/wrong-channel.json" "$output" ;;
@ -313,6 +322,12 @@ FAKE_DCE_EXPECT_AFTER=999 run_wrapper cursor-max-id append-after-high-id
CURSOR_DEST="$ARCHIVE_ROOT/cursor-max-id/$DEFAULT_FILE_NAME" CURSOR_DEST="$ARCHIVE_ROOT/cursor-max-id/$DEFAULT_FILE_NAME"
[[ "$(jq -r '.messages | length' "$CURSOR_DEST")" == "4" ]] || { echo "expected cursor-max-id archive to contain four messages" >&2; exit 1; } [[ "$(jq -r '.messages | length' "$CURSOR_DEST")" == "4" ]] || { echo "expected cursor-max-id archive to contain four messages" >&2; exit 1; }
mkdir -p "$ARCHIVE_ROOT/cursor-mixed-length"
cp "$FIXTURE_DIR/append-mixed-length-cursor.json" "$ARCHIVE_ROOT/cursor-mixed-length/$DEFAULT_FILE_NAME"
FAKE_DCE_EXPECT_AFTER=1000000000000000000 run_wrapper cursor-mixed-length append-after-mixed-length
MIXED_CURSOR_DEST="$ARCHIVE_ROOT/cursor-mixed-length/$DEFAULT_FILE_NAME"
[[ "$(jq -r '.messages | length' "$MIXED_CURSOR_DEST")" == "3" ]] || { echo "expected cursor-mixed-length archive to contain three messages" >&2; exit 1; }
mkdir -p "$ARCHIVE_ROOT/bootstrap-map" mkdir -p "$ARCHIVE_ROOT/bootstrap-map"
cp "$FIXTURE_DIR/append-existing.json" "$ARCHIVE_ROOT/bootstrap-map/$DEFAULT_FILE_NAME" cp "$FIXTURE_DIR/append-existing.json" "$ARCHIVE_ROOT/bootstrap-map/$DEFAULT_FILE_NAME"
[[ ! -f "$ARCHIVE_ROOT/bootstrap-map/.dce-meta/channel-map.json" ]] || { echo "bootstrap-map should start without channel map" >&2; exit 1; } [[ ! -f "$ARCHIVE_ROOT/bootstrap-map/.dce-meta/channel-map.json" ]] || { echo "bootstrap-map should start without channel map" >&2; exit 1; }

View file

@ -0,0 +1,23 @@
{
"guild": {
"id": "222",
"name": "Fixture Guild"
},
"channel": {
"id": "111",
"name": "fixture-room",
"category": "Testing Grounds"
},
"messages": [
{
"id": "1000000000000000001",
"timestamp": "2026-01-03T00:00:00Z",
"content": "after-mixed-length-max"
}
],
"dateRange": {
"after": "1000000000000000000",
"before": null
},
"exportedAt": "2026-01-03T00:00:00Z"
}

View file

@ -0,0 +1,28 @@
{
"guild": {
"id": "222",
"name": "Fixture Guild"
},
"channel": {
"id": "111",
"name": "fixture-room",
"category": "Testing Grounds"
},
"messages": [
{
"id": "999999999999999999",
"timestamp": "2026-01-01T00:00:00Z",
"content": "eighteen-digit-high-string-order"
},
{
"id": "1000000000000000000",
"timestamp": "2026-01-02T00:00:00Z",
"content": "nineteen-digit-true-max"
}
],
"dateRange": {
"after": null,
"before": null
},
"exportedAt": "2026-01-02T00:00:00Z"
}