feat(operator): pass --channel through documents and validation wrappers

Operators can narrow KotOR yes_general catch-up to a single channel ID
without re-scraping every guild channel. Adds dry-run and compose arg
capture smokes for the passthrough path.
This commit is contained in:
Copilot 2026-06-03 06:18:08 -05:00
parent 5820f67caf
commit d8742c5c7b
4 changed files with 87 additions and 0 deletions

View file

@ -0,0 +1,42 @@
---
title: "feat: Pass --channel through operator scrape wrappers"
type: feat
status: complete
date: 2026-06-04
origin: /lfg — yes_general catch-up needs single-channel runs; host runner already supports --channel but documents/validation wrappers reject it
---
# feat: Pass --channel through operator scrape wrappers
## Problem
`run-discord-scrape.sh` and `run-discord-scrape-host.sh` accept repeatable `--channel ID` (requires exactly one `--target`), but `run-documents-scrape.sh` and `run-operator-validation.sh` die on unknown `--channel`. Operators re-running KotOR `yes_general` (`221726893064454144`) must scrape all guild channels instead of narrowing to one backlog channel.
## Requirements
| ID | Requirement |
|----|-------------|
| R1 | `run-documents-scrape.sh` accepts repeatable `--channel ID` and forwards to host preflight/scrape |
| R2 | `run-operator-validation.sh` accepts repeatable `--channel ID` and forwards to documents scrape |
| R3 | Usage text documents `--channel` requires exactly one `--target` (enforced downstream) |
| R4 | Smoke asserts `--channel` is accepted (dry-run path) and forwarded (fake-docker arg capture) |
| R5 | `run-all-smokes.sh` passes |
## Implementation
- `scripts/run-documents-scrape.sh` — parse `--channel`, append to `passthrough`
- `scripts/run-operator-validation.sh` — parse `--channel`, append to `scrape_args` / per-target args
- `scripts/tests/documents-scrape-smoke.sh` — dry-run with `--channel`; optional arg-capture via fake host
## Verification
```bash
./scripts/tests/documents-scrape-smoke.sh
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
```
## Out of scope
- Container memory limits for yes_general
- Completing yes_general catch-up inside LFG
- Changing core scrape channel validation logic

View file

@ -28,6 +28,7 @@ End-to-end Documents scrape workflow:
Options:
--dry-run Verify archives only; do not call Discord
--target NAME Limit preflight/scrape to one configured target
--channel ID With exactly one --target, limit scrape to channel ID (repeatable)
--config PATH Scrape target config (default: config/scrape-targets.json)
EOF
}
@ -54,6 +55,11 @@ main() {
passthrough+=(--target "$2")
shift 2
;;
--channel)
[[ $# -ge 2 ]] || die "Missing value for --channel."
passthrough+=(--channel "$2")
shift 2
;;
--config)
[[ $# -ge 2 ]] || die "Missing value for --config."
CONFIG_PATH=$2

View file

@ -17,6 +17,7 @@ SYNC_GUI_FLAG=0
PER_TARGET=0
CONTINUE_ON_ERROR=0
TARGET=""
CHANNEL_ARGS=()
LOG_FILE=""
usage() {
@ -32,6 +33,7 @@ Options:
--skip-scrape Readiness only (no scrape, no audit loop)
--sync-gui Run sync-token-from-gui.sh --force before checks
--target NAME Limit scrape/audit to one configured target
--channel ID With exactly one --target, limit scrape to channel ID (repeatable)
--per-target Scrape and audit each enabled target separately
--continue-on-error With --per-target, keep going after a target fails
--config PATH Targets JSON (default: config/scrape-targets.json)
@ -87,6 +89,7 @@ audit_targets() {
scrape_per_target() {
local name failures=0 ok=0
local -a scrape_args=(--config "$CONFIG_PATH")
scrape_args+=("${CHANNEL_ARGS[@]}")
local -a target_names=()
if (( DRY_RUN )); then
scrape_args+=(--dry-run)
@ -143,6 +146,11 @@ main() {
TARGET=$2
shift 2
;;
--channel)
[[ $# -ge 2 ]] || die "Missing value for --channel."
CHANNEL_ARGS+=(--channel "$2")
shift 2
;;
--config)
[[ $# -ge 2 ]] || die "Missing value for --config."
CONFIG_PATH=$2
@ -183,6 +191,7 @@ main() {
log_step "Operator validation started (config=$CONFIG_PATH)"
if [[ -n "$TARGET" ]]; then
log_step "Targets: $TARGET"
((${#CHANNEL_ARGS[@]})) && log_step "Scrape channel filter: ${CHANNEL_ARGS[*]}"
else
log_step "Enabled targets: $(enabled_targets | paste -sd, -)"
fi
@ -198,6 +207,7 @@ main() {
scrape_per_target || failures=$((failures + 1))
else
local -a scrape_args=(--config "$CONFIG_PATH")
scrape_args+=("${CHANNEL_ARGS[@]}")
[[ -n "$TARGET" ]] && scrape_args+=(--target "$TARGET")
if (( DRY_RUN )); then
scrape_args+=(--dry-run)

View file

@ -83,6 +83,35 @@ grep -q 'Documents scrape run plan' "$DOC_OUT" || {
exit 1
}
CHANNEL_DRY="$TMP_DIR/channel-dry-run.log"
"$REPO_ROOT/scripts/run-documents-scrape.sh" --dry-run --config "$TMP_DIR/config.json" --target demo --channel 111111111111111111 >"$CHANNEL_DRY" 2>&1
grep -q 'Documents scrape run plan' "$CHANNEL_DRY" || {
echo "expected dry-run to accept --channel passthrough" >&2
exit 1
}
ARGS_LOG="$TMP_DIR/compose-args.log"
cat >"$FAKE_DOCKER" <<'EOF'
#!/usr/bin/env bash
printf '%s\n' "$*" >>"${FAKE_DOCKER_ARGS_LOG:?}"
printf 'run succeeded\n'
EOF
chmod +x "$FAKE_DOCKER"
printf 'DISCORD_TOKEN=dummy-token\n' >"$TMP_DIR/scrape.env"
DCE_MIN_FREE_MB=0 \
DCE_SKIP_SCRAPE_LOCK=1 \
DCE_DOCKER_BIN="$FAKE_DOCKER" \
FAKE_DOCKER_ARGS_LOG="$ARGS_LOG" \
DCE_ENV_FILE="$TMP_DIR/scrape.env" \
"$REPO_ROOT/scripts/run-documents-scrape.sh" --config "$TMP_DIR/config.json" --target demo --channel 111111111111111111 >/dev/null
grep -q '111111111111111111' "$ARGS_LOG" || {
echo "expected --channel to reach container compose invocation" >&2
cat "$ARGS_LOG" >&2
exit 1
}
DCE_MIN_FREE_MB=0 DCE_CONFIG_FILE="$TMP_DIR/config.json" \
"$REPO_ROOT/scripts/verify-operator-ready.sh" --disk-only --config "$TMP_DIR/config.json" \
| grep -q 'disk-only: ok'