mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
feat(scrape): disk-only verify flag and documents scrape preflight
run-documents-scrape.sh checks archive free space before live Discord calls; verify-operator-ready.sh adds --disk-only for lightweight checks.
This commit is contained in:
parent
1142e376b5
commit
32b7f47d45
|
|
@ -0,0 +1,27 @@
|
||||||
|
---
|
||||||
|
title: feat: Disk preflight on documents scrape entrypoint
|
||||||
|
type: feat
|
||||||
|
status: complete
|
||||||
|
date: 2026-05-29
|
||||||
|
origin: /lfg — plan 025 added disk checks to verify-operator-ready but run-documents-scrape bypassed them
|
||||||
|
---
|
||||||
|
|
||||||
|
# feat: Disk preflight on documents scrape entrypoint
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Operators often run `./scripts/run-documents-scrape.sh` directly (and monthly cron uses the host runner). Call the same archive disk check before any live Discord scrape so full disks fail fast with a clear message.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| ID | Requirement |
|
||||||
|
|----|-------------|
|
||||||
|
| R1 | `verify-operator-ready.sh --disk-only` runs config parse + `require_archive_disk_space` only |
|
||||||
|
| R2 | `run-documents-scrape.sh` invokes disk check before preflight/scrape (not on `--dry-run`) |
|
||||||
|
| R3 | `documents-scrape-smoke.sh` covers `--disk-only` success path with `DCE_MIN_FREE_MB=0` |
|
||||||
|
| R4 | `run-all-smokes.sh` still passes |
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
- `./scripts/tests/documents-scrape-smoke.sh`
|
||||||
|
- `DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh`
|
||||||
|
|
@ -9,6 +9,7 @@ CONTAINER_CONFIG="${DCE_CONTAINER_CONFIG:-/config/scrape-targets.json}"
|
||||||
HOST_RUNNER="$REPO_ROOT/scripts/run-discord-scrape-host.sh"
|
HOST_RUNNER="$REPO_ROOT/scripts/run-discord-scrape-host.sh"
|
||||||
DISCOVER_TOKEN="$REPO_ROOT/scripts/discover-discord-token.sh"
|
DISCOVER_TOKEN="$REPO_ROOT/scripts/discover-discord-token.sh"
|
||||||
VERIFY_SCRIPT="$REPO_ROOT/scripts/verify-documents-archives.sh"
|
VERIFY_SCRIPT="$REPO_ROOT/scripts/verify-documents-archives.sh"
|
||||||
|
VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh"
|
||||||
SETUP_AUTH="$REPO_ROOT/scripts/setup-scrape-auth.sh"
|
SETUP_AUTH="$REPO_ROOT/scripts/setup-scrape-auth.sh"
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
|
|
@ -74,6 +75,8 @@ main() {
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
"$VERIFY_READY" --disk-only --config "$CONFIG_PATH"
|
||||||
|
|
||||||
if [[ -n "${DISCORD_TOKEN:-}" || -n "${DISCORD_TOKEN_FILE:-}" ]]; then
|
if [[ -n "${DISCORD_TOKEN:-}" || -n "${DISCORD_TOKEN_FILE:-}" ]]; then
|
||||||
"$SETUP_AUTH" 2>/dev/null || true
|
"$SETUP_AUTH" 2>/dev/null || true
|
||||||
elif [[ -x "$DISCOVER_TOKEN" ]] && "$DISCOVER_TOKEN" >/dev/null 2>&1; then
|
elif [[ -x "$DISCOVER_TOKEN" ]] && "$DISCOVER_TOKEN" >/dev/null 2>&1; then
|
||||||
|
|
|
||||||
|
|
@ -75,4 +75,8 @@ DCE_REPO_ROOT="$REPO_ROOT" \
|
||||||
|
|
||||||
"$REPO_ROOT/scripts/run-documents-scrape.sh" --dry-run --config "$TMP_DIR/config.json" >/dev/null
|
"$REPO_ROOT/scripts/run-documents-scrape.sh" --dry-run --config "$TMP_DIR/config.json" >/dev/null
|
||||||
|
|
||||||
|
DCE_MIN_FREE_MB=0 DCE_CONFIG_FILE="$TMP_DIR/config.json" \
|
||||||
|
"$REPO_ROOT/scripts/verify-operator-ready.sh" --disk-only --config "$TMP_DIR/config.json" \
|
||||||
|
| grep -q 'disk-only: ok'
|
||||||
|
|
||||||
echo "documents-scrape-smoke: ok"
|
echo "documents-scrape-smoke: ok"
|
||||||
|
|
|
||||||
|
|
@ -10,14 +10,16 @@ HOST_RUNNER="$REPO_ROOT/scripts/run-discord-scrape-host.sh"
|
||||||
VERIFY_ARCHIVES="$REPO_ROOT/scripts/verify-documents-archives.sh"
|
VERIFY_ARCHIVES="$REPO_ROOT/scripts/verify-documents-archives.sh"
|
||||||
DISCOVER="$REPO_ROOT/scripts/discover-discord-token.sh"
|
DISCOVER="$REPO_ROOT/scripts/discover-discord-token.sh"
|
||||||
PREFLIGHT_TARGET=""
|
PREFLIGHT_TARGET=""
|
||||||
|
DISK_ONLY=0
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage:
|
Usage:
|
||||||
$(basename "$0") [--config PATH] [--preflight TARGET]
|
$(basename "$0") [--config PATH] [--disk-only] [--preflight TARGET]
|
||||||
|
|
||||||
Check host prerequisites for recurring scrape:
|
Check host prerequisites for recurring scrape:
|
||||||
jq, container compose, Discord auth, valid config, seeded archives.
|
jq, container compose, Discord auth, valid config, seeded archives.
|
||||||
|
With --disk-only, only validate config JSON and archive-root free space (DCE_MIN_FREE_MB).
|
||||||
With --preflight TARGET, also run Discord preflight for one target.
|
With --preflight TARGET, also run Discord preflight for one target.
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
@ -103,6 +105,10 @@ main() {
|
||||||
CONFIG_PATH=$2
|
CONFIG_PATH=$2
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--disk-only)
|
||||||
|
DISK_ONLY=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
--preflight)
|
--preflight)
|
||||||
[[ $# -ge 2 ]] || die "Missing value for --preflight."
|
[[ $# -ge 2 ]] || die "Missing value for --preflight."
|
||||||
PREFLIGHT_TARGET=$2
|
PREFLIGHT_TARGET=$2
|
||||||
|
|
@ -122,6 +128,12 @@ main() {
|
||||||
[[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH"
|
[[ -f "$CONFIG_PATH" ]] || die "Missing config: $CONFIG_PATH"
|
||||||
jq empty "$CONFIG_PATH" >/dev/null 2>&1 || die "Invalid JSON config: $CONFIG_PATH"
|
jq empty "$CONFIG_PATH" >/dev/null 2>&1 || die "Invalid JSON config: $CONFIG_PATH"
|
||||||
|
|
||||||
|
if (( DISK_ONLY == 1 )); then
|
||||||
|
require_archive_disk_space
|
||||||
|
printf 'disk-only: ok (config %s)\n' "$CONFIG_PATH"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
printf 'Operator readiness checks\n'
|
printf 'Operator readiness checks\n'
|
||||||
printf '=========================\n'
|
printf '=========================\n'
|
||||||
require_archive_disk_space
|
require_archive_disk_space
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue