feat(scrape): operator proof script and podman-compose smoke fix

Add run-operator-proof for one-target handoff/scrape/prove flows.
Prefer podman-compose on Podman hosts but honor DCE_DOCKER_BIN overrides
so offline smokes keep using fake compose shims.
This commit is contained in:
Boden 2026-05-29 16:20:25 -05:00
parent 3fd42d924e
commit 65c9fb2206
7 changed files with 215 additions and 0 deletions

View file

@ -0,0 +1,30 @@
---
title: feat: Live operator proof script for one target
type: feat
status: complete
date: 2026-05-29
origin: /lfg — handoff passes; prove append-only scrape on host for smallest enabled target
---
# feat: Live operator proof script for one target
## Summary
Add `scripts/run-operator-proof.sh` to run handoff, incremental scrape, and grow-only proof for a single configured target (default `eod_discord`). Validates the full operator path on the host after LFG implementation.
## Requirements
| ID | Requirement |
|----|-------------|
| R1 | `--target NAME` (default `eod_discord`), `--sync-gui`, `--dry-run` (handoff only) |
| R2 | Runs `operator-handoff.sh`, then `run-documents-scrape.sh`, then `prove-incremental-append.sh` |
| R3 | Logs to `logs/operator-proof-TIMESTAMP.log` |
| R4 | `run-operator-proof-smoke.sh` uses `--dry-run` with fixture config |
| R5 | Host run for `eod_discord` when token available |
| R6 | Prefer `podman-compose` in host runner when installed (Fedora/Podman socket) |
## Verification
- `./scripts/tests/run-operator-proof-smoke.sh`
- `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh`
- `./scripts/run-operator-proof.sh --target eod_discord` on host (manual)

View file

@ -38,6 +38,13 @@ Optional Discord probe for one target:
./scripts/verify-operator-ready.sh --preflight KotOR_discord_msgs
```
Single-target live proof (handoff → scrape → grow-only check):
```bash
./scripts/run-operator-proof.sh --sync-gui --target eod_discord
./scripts/run-operator-proof.sh --dry-run # handoff only
```
Full validation with log (GUI token sync + scrape + audit):
```bash
@ -62,6 +69,8 @@ Override threshold: `DCE_MIN_FREE_MB=2048 ./scripts/verify-operator-ready.sh`
Skip check (smokes only): `DCE_MIN_FREE_MB=0`
Also enforced by `run-documents-scrape.sh`, `run-discord-scrape-host.sh` (cron), and `run-operator-validation.sh`.
**Podman hosts:** install `podman-compose` (`dnf install podman-compose`) when `docker compose` cannot reach the socket; scripts auto-prefer `podman-compose` when present.
## CI note (fork PRs)
Upstream workflows may show `action_required` for cross-repo PRs from `th3w1zard1/DiscordChatExporter` until a maintainer approves workflow runs. Local `run-all-smokes.sh` is the authoritative offline gate.

View file

@ -55,6 +55,10 @@ resolve_compose() {
COMPOSE_BIN=("$DCE_COMPOSE_BIN")
return 0
fi
if command -v podman-compose >/dev/null 2>&1 && podman info >/dev/null 2>&1; then
COMPOSE_BIN=(podman-compose)
return 0
fi
if command -v docker-compose >/dev/null 2>&1; then
COMPOSE_BIN=(docker-compose)
return 0

View file

@ -196,11 +196,30 @@ ensure_token_present() {
[[ -n "${DISCORD_TOKEN:-}" ]] || die "DISCORD_TOKEN is not set. Set DISCORD_TOKEN or DISCORD_TOKEN_FILE in $ENV_FILE, export it in the shell, place a token at $REPO_ROOT/.discord-token or ~/.config/discord-scrape/token, or sign in via DiscordChatExporter GUI / Discord desktop on this machine."
}
resolve_compose_bin() {
if [[ -n "${DCE_COMPOSE_BIN:-}" ]]; then
COMPOSE_BIN=$DCE_COMPOSE_BIN
return 0
fi
# Smoke tests inject DCE_DOCKER_BIN with a fake compose shim; never route those through podman-compose.
if (( DOCKER_BIN_OVERRIDDEN == 1 )); then
COMPOSE_BIN=""
return 0
fi
if command -v podman-compose >/dev/null 2>&1 && podman info >/dev/null 2>&1; then
COMPOSE_BIN=podman-compose
return 0
fi
COMPOSE_BIN=""
}
compose_run_args() {
local -n _out=$1
local subcommand=$2
shift 2
resolve_compose_bin
_out=()
if [[ -n "$COMPOSE_BIN" ]]; then
_out=(

96
scripts/run-operator-proof.sh Executable file
View file

@ -0,0 +1,96 @@
#!/usr/bin/env bash
set -Eeuo pipefail
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)
REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}"
CONFIG_PATH="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}"
HANDOFF="$REPO_ROOT/scripts/operator-handoff.sh"
DOCUMENTS="$REPO_ROOT/scripts/run-documents-scrape.sh"
PROVE="$REPO_ROOT/scripts/prove-incremental-append.sh"
SYNC_GUI="$REPO_ROOT/scripts/sync-token-from-gui.sh"
LOG_DIR="$REPO_ROOT/logs"
TARGET="eod_discord"
SYNC_GUI_FLAG=0
DRY_RUN=0
usage() {
cat <<EOF
Usage:
$(basename "$0") [--target NAME] [--config PATH] [--sync-gui] [--dry-run]
End-to-end operator proof for one target:
operator-handoff → incremental scrape → prove-incremental-append
Logs append to logs/operator-proof-<timestamp>.log
EOF
}
die() {
printf 'ERROR: %s\n' "$*" >&2
exit 1
}
main() {
while (($#)); do
case "$1" in
--target)
[[ $# -ge 2 ]] || die "Missing value for --target."
TARGET=$2
shift 2
;;
--config)
[[ $# -ge 2 ]] || die "Missing value for --config."
CONFIG_PATH=$2
shift 2
;;
--sync-gui)
SYNC_GUI_FLAG=1
shift
;;
--dry-run)
DRY_RUN=1
shift
;;
--help|-h)
usage
exit 0
;;
*)
die "Unknown option: $1"
;;
esac
done
mkdir -p "$LOG_DIR"
local log_file
log_file="$LOG_DIR/operator-proof-$(date -u +%Y%m%dT%H%M%SZ).log"
{
printf 'Operator proof for target %s\n' "$TARGET"
printf 'config: %s\n' "$CONFIG_PATH"
printf 'started: %s\n\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
if (( SYNC_GUI_FLAG == 1 )); then
[[ -x "$SYNC_GUI" ]] || die "Missing sync-token-from-gui.sh"
"$SYNC_GUI" --force
fi
if (( DRY_RUN == 1 )); then
"$HANDOFF" --config "$CONFIG_PATH"
printf '\nDry run complete (no Discord scrape).\n'
exit 0
fi
"$HANDOFF" --config "$CONFIG_PATH"
"$DOCUMENTS" --config "$CONFIG_PATH" --target "$TARGET"
"$PROVE" --config "$CONFIG_PATH" --target "$TARGET"
printf '\nOperator proof succeeded for %s\n' "$TARGET"
} 2>&1 | tee "$log_file"
printf 'Log: %s\n' "$log_file"
}
main "$@"

View file

@ -0,0 +1,53 @@
#!/usr/bin/env bash
set -Eeuo pipefail
REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P)
PROOF="$REPO_ROOT/scripts/run-operator-proof.sh"
TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-operator-proof-smoke.XXXXXX")
CONFIG_PATH="$TMP_DIR/config.json"
ENV_PATH="$TMP_DIR/scrape.env"
cleanup() {
rm -rf "$TMP_DIR"
}
trap cleanup EXIT
mkdir -p "$TMP_DIR/archive/demo"
printf '{"messages":[{"id":"1"}],"channel":{"id":"111111111111111111"}}\n' \
>"$TMP_DIR/archive/demo/Guild - general [111111111111111111].json"
cat >"$CONFIG_PATH" <<JSON
{
"archive_root": "$TMP_DIR/archive",
"targets": [
{
"name": "demo",
"kind": "guild",
"output_dir": "$TMP_DIR/archive/demo",
"channel_ids": ["111111111111111111"],
"enabled": true
}
]
}
JSON
printf 'DISCORD_TOKEN=dummy\n' >"$ENV_PATH"
set +e
output=$(
DCE_MIN_FREE_MB=0 \
DCE_CONFIG_FILE="$CONFIG_PATH" \
DCE_ENV_FILE="$ENV_PATH" \
"$PROOF" --config "$CONFIG_PATH" --target demo --dry-run 2>&1
)
status=$?
set -e
if [[ "$status" -ne 0 ]] || ! grep -q 'Dry run complete' <<<"$output"; then
printf 'run-operator-proof dry-run failed (status=%s)\n' "$status" >&2
printf '%s\n' "$output" >&2
exit 1
fi
printf 'run-operator-proof-smoke: ok\n'

View file

@ -38,6 +38,10 @@ resolve_compose() {
printf 'compose: %s\n' "$DCE_COMPOSE_BIN"
return 0
fi
if command -v podman-compose >/dev/null 2>&1 && podman info >/dev/null 2>&1; then
printf 'compose: podman-compose\n'
return 0
fi
if command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1; then
printf 'compose: docker compose\n'
return 0