fix(scrape): disk preflight before compose and skippable disk errors

Fail fast when archive or repo paths lack free space (DCE_MIN_FREE_MB),
treat disk-full export failures as skippable channels, and add an offline
disk-space smoke. Smokes default DCE_MIN_FREE_MB=0 so CI stays portable.
This commit is contained in:
Boden 2026-05-29 15:27:39 -05:00
parent a4fd78275d
commit 1142e376b5
9 changed files with 128 additions and 6 deletions

View file

@ -67,7 +67,7 @@ jobs:
- name: Run recurring scrape smoke tests
run: |
chmod +x scripts/*.sh scripts/tests/*.sh
./scripts/run-all-smokes.sh
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
test:
# Tests need access to secrets, so we can't run them against PRs because of limited trust

View file

@ -0,0 +1,28 @@
---
title: fix: Disk space preflight and skippable channel failures
type: fix
status: complete
date: 2026-05-29
origin: Repeated /lfg — full validation failed; /home at 100% capacity during KotOR export
---
# fix: Disk space preflight and skippable channel failures
## Summary
Host disk reached 100% during KotOR yes_general incremental export. Add archive-root free-space checks before scrape/validation and treat disk-full export errors as skippable channels so other channels in the same target still complete.
## Requirements
| ID | Requirement |
|----|-------------|
| R1 | `require_archive_disk_space` in verify-operator-ready (configurable `DCE_MIN_FREE_MB`, default 2048) |
| R2 | `run-operator-validation.sh` calls disk check after readiness |
| R3 | `is_skippable_channel_export_failure` matches no-space / SQLITE_FULL / ENOSPC |
| R4 | Smoke: disk check fails when `DCE_MIN_FREE_MB` absurdly high |
| R5 | Document disk requirement in merge-readiness |
## Verification
- `./scripts/tests/verify-operator-ready-smoke.sh` or new disk smoke
- `./scripts/run-all-smokes.sh`

View file

@ -46,6 +46,18 @@ Full validation with log (GUI token sync + scrape + audit):
Detail: [.docs/Recurring-Scrape-Setup.md](../.docs/Recurring-Scrape-Setup.md) · [operator checklist](recurring-scrape-operator-checklist.md) · [troubleshooting](../.docs/Recurring-Scrape-Troubleshooting.md)
## Disk space
Incremental merges need temporary space (often 2× the largest channel JSON). Before scraping:
```bash
df -h ~/Documents /home/brunner56/Downloads/DiscordChatExporter
./scripts/verify-operator-ready.sh # fails below 1 GiB free by default
```
Override threshold: `DCE_MIN_FREE_MB=2048 ./scripts/verify-operator-ready.sh`
Skip check (smokes only): `DCE_MIN_FREE_MB=0`
## CI note (fork PRs)
Upstream workflows may show `action_required` for cross-repo PRs from `th3w1zard1/DiscordChatExporter` until a maintainer approves workflow runs. Local `run-all-smokes.sh` is the authoritative offline gate.

View file

@ -43,6 +43,8 @@ main() {
chmod +x "$REPO_ROOT"/scripts/*.sh "$tests_dir"/*.sh 2>/dev/null || true
export DCE_MIN_FREE_MB="${DCE_MIN_FREE_MB:-0}"
local script_path failures=0 ran=0
for script_path in "$tests_dir"/*.sh; do
[[ -f "$script_path" ]] || continue

View file

@ -395,7 +395,7 @@ message_count() {
is_skippable_channel_export_failure() {
local log_file=$1
grep -qiE \
"failed: forbidden|failed: not found|Missing Access|403 Forbidden|404 Not Found|Cannot read message history" \
"failed: forbidden|failed: not found|Missing Access|403 Forbidden|404 Not Found|Cannot read message history|No space left on device|SQLITE_FULL|ENOSPC|disk full|not enough space" \
"$log_file"
}
@ -423,7 +423,7 @@ export_channel_incremental() {
fi
if is_skippable_channel_export_failure "$export_log"; then
log "Skipping channel $channel_id (forbidden or inaccessible)."
log "Skipping channel $channel_id (inaccessible or non-fatal export error)."
cat "$export_log" >&2
rm -f "$export_log"
return 2

View file

@ -0,0 +1,54 @@
#!/usr/bin/env bash
set -Eeuo pipefail
REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P)
VERIFY="$REPO_ROOT/scripts/verify-operator-ready.sh"
TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/dce-disk-smoke.XXXXXX")
CONFIG_PATH="$TMP_DIR/config.json"
ENV_PATH="$TMP_DIR/scrape.env"
cleanup() {
rm -rf "$TMP_DIR"
}
trap cleanup EXIT
mkdir -p "$TMP_DIR/archive/demo"
printf '{"messages":[{"id":"1"}],"channel":{"id":"111111111111111111"}}\n' \
>"$TMP_DIR/archive/demo/Guild - general [111111111111111111].json"
cat >"$CONFIG_PATH" <<JSON
{
"archive_root": "$TMP_DIR/archive",
"targets": [
{
"name": "demo",
"kind": "guild",
"output_dir": "$TMP_DIR/archive/demo",
"enabled": true
}
]
}
JSON
printf 'DISCORD_TOKEN=dummy\n' >"$ENV_PATH"
set +e
output=$(
DCE_MIN_FREE_MB=999999999 \
DCE_REPO_ROOT="$REPO_ROOT" \
DCE_CONFIG_FILE="$CONFIG_PATH" \
DCE_ENV_FILE="$ENV_PATH" \
"$VERIFY" --config "$CONFIG_PATH" 2>&1
)
verify_status=$?
set -e
if (( verify_status != 0 )) && printf '%s\n' "$output" | grep -qi 'Insufficient disk space'; then
printf 'archive-disk-space-smoke: ok\n'
exit 0
fi
printf 'ERROR: expected disk space check to fail with high DCE_MIN_FREE_MB (status=%s)\n' "$verify_status" >&2
printf '%s\n' "$output" >&2
exit 1

View file

@ -58,7 +58,8 @@ EOF
chmod +x "$FAKE_DOCKER"
export PATH="$TMP_DIR:$PATH_BACKUP"
DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" DCE_LOG_DIR="$LOG_DIR" \
DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
DCE_LOG_DIR="$LOG_DIR" \
"$RUNNER" --dry-run --per-target --config "$CONFIG_PATH" --log-file "$LOG_DIR/validation.log"
grep -q 'Per-target summary: 2 succeeded, 0 failed' "$LOG_DIR/validation.log" || {

View file

@ -47,10 +47,10 @@ EOF
chmod +x "$FAKE_DOCKER"
export PATH="$TMP_DIR:$PATH_BACKUP"
DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
"$VERIFY" --config "$CONFIG_PATH"
if DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
if DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
"$VERIFY" --config "$CONFIG_PATH" --preflight demo 2>/dev/null; then
printf 'ERROR: preflight should fail without real container/token\n' >&2
exit 1

View file

@ -51,6 +51,30 @@ resolve_compose() {
die "Install Docker or Podman with compose support."
}
require_archive_disk_space() {
local min_mb=${DCE_MIN_FREE_MB:-1024}
local archive_root path avail_kb need_kb
if (( min_mb <= 0 )); then
printf 'disk: check skipped (DCE_MIN_FREE_MB=%s)\n' "$min_mb"
return 0
fi
archive_root=$(jq -r '.archive_root // empty' "$CONFIG_PATH")
[[ -n "$archive_root" && "$archive_root" != null ]] || die "Config is missing archive_root."
need_kb=$((min_mb * 1024))
for path in "$archive_root" "$REPO_ROOT"; do
[[ -e "$path" ]] || continue
avail_kb=$(df -Pk "$path" | awk 'NR==2 {print $4}')
[[ -n "$avail_kb" && "$avail_kb" =~ ^[0-9]+$ ]] || die "Could not read free space for $path"
if (( avail_kb < need_kb )); then
die "Insufficient disk space on $(df -Pk "$path" | awk 'NR==2 {print $6}'): $((avail_kb / 1024)) MiB free, need at least ${min_mb} MiB under archive_root ($archive_root). Free space before scraping."
fi
printf 'disk: %s has %s MiB free (need %s MiB)\n' "$(df -Pk "$path" | awk 'NR==2 {print $6}')" "$((avail_kb / 1024))" "$min_mb"
done
}
check_auth() {
if [[ -f "$ENV_FILE" ]] && grep -qE '^[[:space:]]*DISCORD_TOKEN=' "$ENV_FILE"; then
printf 'auth: scrape.env has DISCORD_TOKEN\n'
@ -100,6 +124,7 @@ main() {
printf 'Operator readiness checks\n'
printf '=========================\n'
require_archive_disk_space
resolve_compose
check_auth
printf 'config: %s\n\n' "$CONFIG_PATH"