mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
feat(scrape): offline prove smoke and snapshot-only mode
Add --snapshot-only and --compare-snapshots to prove-incremental-append, fix EXIT trap status, wire prove smoke into CI, and document LFG closure plan.
This commit is contained in:
parent
a2aeaaab9c
commit
10cd2a534d
1
.github/workflows/main.yml
vendored
1
.github/workflows/main.yml
vendored
|
|
@ -79,6 +79,7 @@ jobs:
|
||||||
./scripts/tests/scrape-here-smoke.sh
|
./scripts/tests/scrape-here-smoke.sh
|
||||||
./scripts/tests/bootstrap-recurring-scrape-smoke.sh
|
./scripts/tests/bootstrap-recurring-scrape-smoke.sh
|
||||||
./scripts/tests/audit-archive-json-smoke.sh
|
./scripts/tests/audit-archive-json-smoke.sh
|
||||||
|
./scripts/tests/prove-incremental-append-smoke.sh
|
||||||
|
|
||||||
test:
|
test:
|
||||||
# Tests need access to secrets, so we can't run them against PRs because of limited trust
|
# Tests need access to secrets, so we can't run them against PRs because of limited trust
|
||||||
|
|
|
||||||
52
docs/plans/2026-05-29-019-feat-lfg-closure-plan.md
Normal file
52
docs/plans/2026-05-29-019-feat-lfg-closure-plan.md
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
---
|
||||||
|
title: feat: LFG closure — prove smoke and workspace bridge
|
||||||
|
type: feat
|
||||||
|
status: complete
|
||||||
|
date: 2026-05-29
|
||||||
|
origin: Repeated /lfg — recurring scrape stack complete; close gaps for operators and CI
|
||||||
|
---
|
||||||
|
|
||||||
|
# feat: LFG closure — prove smoke and workspace bridge
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Recurring scrape is feature-complete on `feat/recurring-cli-scrape`. This slice adds an offline prove smoke test, documents audit/salvage in the GUI zip bridge, and refreshes the open PR summary.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| ID | Requirement |
|
||||||
|
|----|-------------|
|
||||||
|
| R1 | `prove-incremental-append.sh` supports `--snapshot-only` for offline verification |
|
||||||
|
| R2 | `scripts/tests/prove-incremental-append-smoke.sh` validates invalid JSON skip + grow-only compare |
|
||||||
|
| R3 | CI `recurring-scrape-smoke` job runs prove smoke |
|
||||||
|
| R4 | `DiscordChatExporter.linux-x64/RECURRING-SCRAPE.md` mentions audit/salvage |
|
||||||
|
| R5 | PR #1538 body includes plan 018 audit/salvage summary |
|
||||||
|
|
||||||
|
## Implementation Units
|
||||||
|
|
||||||
|
### U1. Prove snapshot-only mode
|
||||||
|
|
||||||
|
**Files:** `scripts/prove-incremental-append.sh`
|
||||||
|
|
||||||
|
Add `--snapshot-only` that writes snapshot TSV and exits (no Discord scrape).
|
||||||
|
|
||||||
|
### U2. Prove smoke test
|
||||||
|
|
||||||
|
**Files:** `scripts/tests/prove-incremental-append-smoke.sh`
|
||||||
|
|
||||||
|
Fixture archives: valid JSON, invalid JSON (skipped), then simulate grow-only compare.
|
||||||
|
|
||||||
|
### U3. Workspace bridge
|
||||||
|
|
||||||
|
**Files:** `../DiscordChatExporter.linux-x64/RECURRING-SCRAPE.md` (sibling path from repo: document in plan as operator copy target — implement via `scripts/sync-workspace-bridge.sh` or direct edit if path exists)
|
||||||
|
|
||||||
|
Use repo-relative note: bridge file lives beside repo at `DiscordChatExporter.linux-x64/RECURRING-SCRAPE.md`.
|
||||||
|
|
||||||
|
### U4. PR body refresh
|
||||||
|
|
||||||
|
Update PR #1538 via `gh pr edit` with Latest section for `a2aeaaa` and plan 019.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
- `./scripts/tests/prove-incremental-append-smoke.sh`
|
||||||
|
- All existing `scripts/tests/*.sh` pass
|
||||||
|
|
@ -13,6 +13,8 @@ usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage:
|
Usage:
|
||||||
$(basename "$0") --target NAME [--config PATH]
|
$(basename "$0") --target NAME [--config PATH]
|
||||||
|
$(basename "$0") --target NAME --snapshot-only --snapshot-file PATH [--config PATH]
|
||||||
|
$(basename "$0") --compare-snapshots BEFORE.tsv AFTER.tsv
|
||||||
|
|
||||||
Record message counts for every JSON archive under the target's output_dir,
|
Record message counts for every JSON archive under the target's output_dir,
|
||||||
run one incremental scrape, then assert:
|
run one incremental scrape, then assert:
|
||||||
|
|
@ -29,7 +31,10 @@ die() {
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
[[ -n "$SNAPSHOT_DIR" && -d "$SNAPSHOT_DIR" ]] && rm -rf "$SNAPSHOT_DIR"
|
if [[ -n "${SNAPSHOT_DIR:-}" && -d "$SNAPSHOT_DIR" ]]; then
|
||||||
|
rm -rf "$SNAPSHOT_DIR"
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
require_command() {
|
require_command() {
|
||||||
|
|
@ -110,6 +115,10 @@ compare_snapshots() {
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
local target=""
|
local target=""
|
||||||
|
local snapshot_only=0
|
||||||
|
local snapshot_file=""
|
||||||
|
local compare_before=""
|
||||||
|
local compare_after=""
|
||||||
|
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
|
@ -125,6 +134,21 @@ main() {
|
||||||
CONFIG_PATH=$2
|
CONFIG_PATH=$2
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--snapshot-only)
|
||||||
|
snapshot_only=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--snapshot-file)
|
||||||
|
[[ $# -ge 2 ]] || die "Missing value for --snapshot-file."
|
||||||
|
snapshot_file=$2
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--compare-snapshots)
|
||||||
|
[[ $# -ge 3 ]] || die "Missing paths for --compare-snapshots."
|
||||||
|
compare_before=$2
|
||||||
|
compare_after=$3
|
||||||
|
shift 3
|
||||||
|
;;
|
||||||
--help|-h)
|
--help|-h)
|
||||||
usage
|
usage
|
||||||
exit 0
|
exit 0
|
||||||
|
|
@ -135,15 +159,32 @@ main() {
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
require_command jq
|
||||||
|
|
||||||
|
if [[ -n "$compare_before" ]]; then
|
||||||
|
[[ -f "$compare_before" ]] || die "Missing snapshot: $compare_before"
|
||||||
|
[[ -f "$compare_after" ]] || die "Missing snapshot: $compare_after"
|
||||||
|
compare_snapshots "$compare_before" "$compare_after"
|
||||||
|
printf 'Snapshot comparison passed.\n'
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
[[ -n "$target" ]] || die "--target is required."
|
[[ -n "$target" ]] || die "--target is required."
|
||||||
|
|
||||||
require_command jq
|
|
||||||
[[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH"
|
[[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH"
|
||||||
|
|
||||||
local output_dir
|
local output_dir
|
||||||
output_dir=$(target_output_dir "$target")
|
output_dir=$(target_output_dir "$target")
|
||||||
[[ -n "$output_dir" && "$output_dir" != "null" ]] || die "Unknown target: $target"
|
[[ -n "$output_dir" && "$output_dir" != "null" ]] || die "Unknown target: $target"
|
||||||
|
|
||||||
|
if (( snapshot_only )); then
|
||||||
|
[[ -n "$snapshot_file" ]] || die "--snapshot-file is required with --snapshot-only."
|
||||||
|
snapshot_archives "$output_dir" "$snapshot_file"
|
||||||
|
[[ -s "$snapshot_file" ]] || die "No seeded archives found under $output_dir"
|
||||||
|
printf 'Snapshot written: %s\n' "$snapshot_file"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
SNAPSHOT_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-prove-append.XXXXXX")
|
SNAPSHOT_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-prove-append.XXXXXX")
|
||||||
local before_file="$SNAPSHOT_DIR/before.tsv"
|
local before_file="$SNAPSHOT_DIR/before.tsv"
|
||||||
local after_file="$SNAPSHOT_DIR/after.tsv"
|
local after_file="$SNAPSHOT_DIR/after.tsv"
|
||||||
|
|
|
||||||
78
scripts/tests/prove-incremental-append-smoke.sh
Executable file
78
scripts/tests/prove-incremental-append-smoke.sh
Executable file
|
|
@ -0,0 +1,78 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -Eeuo pipefail
|
||||||
|
|
||||||
|
REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P)
|
||||||
|
PROVE="$REPO_ROOT/scripts/prove-incremental-append.sh"
|
||||||
|
TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-prove-smoke.XXXXXX")
|
||||||
|
ARCHIVE_ROOT="$TMP_DIR/archive"
|
||||||
|
CONFIG_PATH="$TMP_DIR/config.json"
|
||||||
|
BEFORE="$TMP_DIR/before.tsv"
|
||||||
|
AFTER="$TMP_DIR/after.tsv"
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
rm -rf "$TMP_DIR"
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
mkdir -p "$ARCHIVE_ROOT/demo"
|
||||||
|
|
||||||
|
cat >"$ARCHIVE_ROOT/demo/Guild - general [111111111111111111].json" <<'JSON'
|
||||||
|
{
|
||||||
|
"guild": {"id": "1", "name": "Guild"},
|
||||||
|
"channel": {"id": "111111111111111111", "name": "general"},
|
||||||
|
"messages": [
|
||||||
|
{"id": "1", "timestamp": "2020-01-01T00:00:00+00:00", "type": "Default", "content": "one"}
|
||||||
|
],
|
||||||
|
"messageCount": 1
|
||||||
|
}
|
||||||
|
JSON
|
||||||
|
|
||||||
|
printf '{"messages":[\n' >"$ARCHIVE_ROOT/demo/truncated [222222222222222222].json"
|
||||||
|
|
||||||
|
cat >"$CONFIG_PATH" <<JSON
|
||||||
|
{
|
||||||
|
"archive_root": "$ARCHIVE_ROOT",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"name": "demo",
|
||||||
|
"kind": "guild",
|
||||||
|
"output_dir": "$ARCHIVE_ROOT/demo",
|
||||||
|
"enabled": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
JSON
|
||||||
|
|
||||||
|
DCE_PRIMARY_CONFIG="$CONFIG_PATH" "$PROVE" --target demo --snapshot-only --snapshot-file "$BEFORE"
|
||||||
|
|
||||||
|
if ! grep -q '111111111111111111' "$BEFORE"; then
|
||||||
|
printf 'ERROR: snapshot missing valid channel archive\n' >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if grep -q '222222222222222222' "$BEFORE"; then
|
||||||
|
printf 'ERROR: invalid JSON file should be skipped in snapshot\n' >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat >"$ARCHIVE_ROOT/demo/Guild - general [111111111111111111].json" <<'JSON'
|
||||||
|
{
|
||||||
|
"guild": {"id": "1", "name": "Guild"},
|
||||||
|
"channel": {"id": "111111111111111111", "name": "general"},
|
||||||
|
"messages": [
|
||||||
|
{"id": "1", "timestamp": "2020-01-01T00:00:00+00:00", "type": "Default", "content": "one"},
|
||||||
|
{"id": "2", "timestamp": "2020-01-02T00:00:00+00:00", "type": "Default", "content": "two"}
|
||||||
|
],
|
||||||
|
"messageCount": 2
|
||||||
|
}
|
||||||
|
JSON
|
||||||
|
|
||||||
|
DCE_PRIMARY_CONFIG="$CONFIG_PATH" "$PROVE" --target demo --snapshot-only --snapshot-file "$AFTER"
|
||||||
|
"$PROVE" --compare-snapshots "$BEFORE" "$AFTER"
|
||||||
|
|
||||||
|
if "$PROVE" --compare-snapshots "$AFTER" "$BEFORE" 2>/dev/null; then
|
||||||
|
printf 'ERROR: shrink comparison should have failed\n' >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf 'prove-incremental-append-smoke: ok\n'
|
||||||
Loading…
Reference in a new issue