From 1142e376b5ac315c4573babfa9b2194bd0959f8c Mon Sep 17 00:00:00 2001 From: Boden Date: Fri, 29 May 2026 15:27:39 -0500 Subject: [PATCH] fix(scrape): disk preflight before compose and skippable disk errors Fail fast when archive or repo paths lack free space (DCE_MIN_FREE_MB), treat disk-full export failures as skippable channels, and add an offline disk-space smoke. Smokes default DCE_MIN_FREE_MB=0 so CI stays portable. --- .github/workflows/main.yml | 2 +- ...05-29-025-fix-disk-space-preflight-plan.md | 28 ++++++++++ docs/recurring-scrape-merge-readiness.md | 12 +++++ scripts/run-all-smokes.sh | 2 + scripts/run-discord-scrape.sh | 4 +- scripts/tests/archive-disk-space-smoke.sh | 54 +++++++++++++++++++ .../tests/run-operator-validation-smoke.sh | 3 +- scripts/tests/verify-operator-ready-smoke.sh | 4 +- scripts/verify-operator-ready.sh | 25 +++++++++ 9 files changed, 128 insertions(+), 6 deletions(-) create mode 100644 docs/plans/2026-05-29-025-fix-disk-space-preflight-plan.md create mode 100755 scripts/tests/archive-disk-space-smoke.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 896c313d..ca12cc5e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -67,7 +67,7 @@ jobs: - name: Run recurring scrape smoke tests run: | chmod +x scripts/*.sh scripts/tests/*.sh - ./scripts/run-all-smokes.sh + DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh test: # Tests need access to secrets, so we can't run them against PRs because of limited trust diff --git a/docs/plans/2026-05-29-025-fix-disk-space-preflight-plan.md b/docs/plans/2026-05-29-025-fix-disk-space-preflight-plan.md new file mode 100644 index 00000000..1336364c --- /dev/null +++ b/docs/plans/2026-05-29-025-fix-disk-space-preflight-plan.md @@ -0,0 +1,28 @@ +--- +title: fix: Disk space preflight and skippable channel failures +type: fix +status: complete +date: 2026-05-29 +origin: Repeated /lfg — full validation failed; /home at 100% capacity during KotOR export +--- + +# fix: Disk space preflight and skippable channel failures + +## Summary + +Host disk reached 100% during KotOR yes_general incremental export. Add archive-root free-space checks before scrape/validation and treat disk-full export errors as skippable channels so other channels in the same target still complete. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `require_archive_disk_space` in verify-operator-ready (configurable `DCE_MIN_FREE_MB`, default 2048) | +| R2 | `run-operator-validation.sh` calls disk check after readiness | +| R3 | `is_skippable_channel_export_failure` matches no-space / SQLITE_FULL / ENOSPC | +| R4 | Smoke: disk check fails when `DCE_MIN_FREE_MB` absurdly high | +| R5 | Document disk requirement in merge-readiness | + +## Verification + +- `./scripts/tests/verify-operator-ready-smoke.sh` or new disk smoke +- `./scripts/run-all-smokes.sh` diff --git a/docs/recurring-scrape-merge-readiness.md b/docs/recurring-scrape-merge-readiness.md index ddff0898..8d5882eb 100644 --- a/docs/recurring-scrape-merge-readiness.md +++ b/docs/recurring-scrape-merge-readiness.md @@ -46,6 +46,18 @@ Full validation with log (GUI token sync + scrape + audit): Detail: [.docs/Recurring-Scrape-Setup.md](../.docs/Recurring-Scrape-Setup.md) · [operator checklist](recurring-scrape-operator-checklist.md) · [troubleshooting](../.docs/Recurring-Scrape-Troubleshooting.md) +## Disk space + +Incremental merges need temporary space (often 2× the largest channel JSON). Before scraping: + +```bash +df -h ~/Documents /home/brunner56/Downloads/DiscordChatExporter +./scripts/verify-operator-ready.sh # fails below 1 GiB free by default +``` + +Override threshold: `DCE_MIN_FREE_MB=2048 ./scripts/verify-operator-ready.sh` +Skip check (smokes only): `DCE_MIN_FREE_MB=0` + ## CI note (fork PRs) Upstream workflows may show `action_required` for cross-repo PRs from `th3w1zard1/DiscordChatExporter` until a maintainer approves workflow runs. Local `run-all-smokes.sh` is the authoritative offline gate. diff --git a/scripts/run-all-smokes.sh b/scripts/run-all-smokes.sh index 5fe62999..5fac71a7 100755 --- a/scripts/run-all-smokes.sh +++ b/scripts/run-all-smokes.sh @@ -43,6 +43,8 @@ main() { chmod +x "$REPO_ROOT"/scripts/*.sh "$tests_dir"/*.sh 2>/dev/null || true + export DCE_MIN_FREE_MB="${DCE_MIN_FREE_MB:-0}" + local script_path failures=0 ran=0 for script_path in "$tests_dir"/*.sh; do [[ -f "$script_path" ]] || continue diff --git a/scripts/run-discord-scrape.sh b/scripts/run-discord-scrape.sh index 9e29c3af..a7a96481 100755 --- a/scripts/run-discord-scrape.sh +++ b/scripts/run-discord-scrape.sh @@ -395,7 +395,7 @@ message_count() { is_skippable_channel_export_failure() { local log_file=$1 grep -qiE \ - "failed: forbidden|failed: not found|Missing Access|403 Forbidden|404 Not Found|Cannot read message history" \ + "failed: forbidden|failed: not found|Missing Access|403 Forbidden|404 Not Found|Cannot read message history|No space left on device|SQLITE_FULL|ENOSPC|disk full|not enough space" \ "$log_file" } @@ -423,7 +423,7 @@ export_channel_incremental() { fi if is_skippable_channel_export_failure "$export_log"; then - log "Skipping channel $channel_id (forbidden or inaccessible)." + log "Skipping channel $channel_id (inaccessible or non-fatal export error)." cat "$export_log" >&2 rm -f "$export_log" return 2 diff --git a/scripts/tests/archive-disk-space-smoke.sh b/scripts/tests/archive-disk-space-smoke.sh new file mode 100755 index 00000000..2c1e41cb --- /dev/null +++ b/scripts/tests/archive-disk-space-smoke.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P) +VERIFY="$REPO_ROOT/scripts/verify-operator-ready.sh" +TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-disk-smoke.XXXXXX") +CONFIG_PATH="$TMP_DIR/config.json" +ENV_PATH="$TMP_DIR/scrape.env" + +cleanup() { + rm -rf "$TMP_DIR" +} +trap cleanup EXIT + +mkdir -p "$TMP_DIR/archive/demo" +printf '{"messages":[{"id":"1"}],"channel":{"id":"111111111111111111"}}\n' \ + >"$TMP_DIR/archive/demo/Guild - general [111111111111111111].json" + +cat >"$CONFIG_PATH" <"$ENV_PATH" + +set +e +output=$( + DCE_MIN_FREE_MB=999999999 \ + DCE_REPO_ROOT="$REPO_ROOT" \ + DCE_CONFIG_FILE="$CONFIG_PATH" \ + DCE_ENV_FILE="$ENV_PATH" \ + "$VERIFY" --config "$CONFIG_PATH" 2>&1 +) +verify_status=$? +set -e + +if (( verify_status != 0 )) && printf '%s\n' "$output" | grep -qi 'Insufficient disk space'; then + printf 'archive-disk-space-smoke: ok\n' + exit 0 +fi + +printf 'ERROR: expected disk space check to fail with high DCE_MIN_FREE_MB (status=%s)\n' "$verify_status" >&2 +printf '%s\n' "$output" >&2 +exit 1 diff --git a/scripts/tests/run-operator-validation-smoke.sh b/scripts/tests/run-operator-validation-smoke.sh index e62362ee..ddb6d901 100755 --- a/scripts/tests/run-operator-validation-smoke.sh +++ b/scripts/tests/run-operator-validation-smoke.sh @@ -58,7 +58,8 @@ EOF chmod +x "$FAKE_DOCKER" export PATH="$TMP_DIR:$PATH_BACKUP" -DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" DCE_LOG_DIR="$LOG_DIR" \ +DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \ + DCE_LOG_DIR="$LOG_DIR" \ "$RUNNER" --dry-run --per-target --config "$CONFIG_PATH" --log-file "$LOG_DIR/validation.log" grep -q 'Per-target summary: 2 succeeded, 0 failed' "$LOG_DIR/validation.log" || { diff --git a/scripts/tests/verify-operator-ready-smoke.sh b/scripts/tests/verify-operator-ready-smoke.sh index d1e25866..651f85da 100755 --- a/scripts/tests/verify-operator-ready-smoke.sh +++ b/scripts/tests/verify-operator-ready-smoke.sh @@ -47,10 +47,10 @@ EOF chmod +x "$FAKE_DOCKER" export PATH="$TMP_DIR:$PATH_BACKUP" -DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \ +DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \ "$VERIFY" --config "$CONFIG_PATH" -if DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \ +if DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \ "$VERIFY" --config "$CONFIG_PATH" --preflight demo 2>/dev/null; then printf 'ERROR: preflight should fail without real container/token\n' >&2 exit 1 diff --git a/scripts/verify-operator-ready.sh b/scripts/verify-operator-ready.sh index 987a96cd..2833293f 100755 --- a/scripts/verify-operator-ready.sh +++ b/scripts/verify-operator-ready.sh @@ -51,6 +51,30 @@ resolve_compose() { die "Install Docker or Podman with compose support." } +require_archive_disk_space() { + local min_mb=${DCE_MIN_FREE_MB:-1024} + local archive_root path avail_kb need_kb + + if (( min_mb <= 0 )); then + printf 'disk: check skipped (DCE_MIN_FREE_MB=%s)\n' "$min_mb" + return 0 + fi + + archive_root=$(jq -r '.archive_root // empty' "$CONFIG_PATH") + [[ -n "$archive_root" && "$archive_root" != null ]] || die "Config is missing archive_root." + need_kb=$((min_mb * 1024)) + + for path in "$archive_root" "$REPO_ROOT"; do + [[ -e "$path" ]] || continue + avail_kb=$(df -Pk "$path" | awk 'NR==2 {print $4}') + [[ -n "$avail_kb" && "$avail_kb" =~ ^[0-9]+$ ]] || die "Could not read free space for $path" + if (( avail_kb < need_kb )); then + die "Insufficient disk space on $(df -Pk "$path" | awk 'NR==2 {print $6}'): $((avail_kb / 1024)) MiB free, need at least ${min_mb} MiB under archive_root ($archive_root). Free space before scraping." + fi + printf 'disk: %s has %s MiB free (need %s MiB)\n' "$(df -Pk "$path" | awk 'NR==2 {print $6}')" "$((avail_kb / 1024))" "$min_mb" + done +} + check_auth() { if [[ -f "$ENV_FILE" ]] && grep -qE '^[[:space:]]*DISCORD_TOKEN=' "$ENV_FILE"; then printf 'auth: scrape.env has DISCORD_TOKEN\n' @@ -100,6 +124,7 @@ main() { printf 'Operator readiness checks\n' printf '=========================\n' + require_archive_disk_space resolve_compose check_auth printf 'config: %s\n\n' "$CONFIG_PATH"