feat(scrape): label OOM skips and hint container memory

Classify aborted/OOM export skips as SKIPPED (OOM/aborted) in the run
summary with salvage/memory guidance; verify-operator-ready shows
configured DCE_CONTAINER_MEMORY.
This commit is contained in:
Copilot 2026-06-03 09:38:45 -05:00
parent e9a3fea9d1
commit a827e6b9bc
6 changed files with 118 additions and 3 deletions

View file

@ -0,0 +1,49 @@
---
title: "feat: OOM skip labels and operator hints in scrape summary"
type: feat
status: complete
date: 2026-06-04
origin: /lfg — plan 063064 added DCE_CONTAINER_MEMORY docs; scrape summary still labels OOM skips as generic SKIPPED with no next-step hint
---
# feat: OOM skip labels and operator hints in scrape summary
## Summary
Distinguish OOM/aborted export skips in the scrape run summary and print a one-line operator hint when they occur. Show configured `DCE_CONTAINER_MEMORY` in `verify-operator-ready` output.
## Requirements
| ID | Requirement |
|----|-------------|
| R1 | `export_channel_incremental` classifies OOM/aborted skips (exit 134/137/139) |
| R2 | Run summary uses `SKIPPED_OOM` with distinct log line vs generic `SKIPPED` |
| R3 | When any `SKIPPED_OOM`, summary footer hints `DCE_CONTAINER_MEMORY` and `--salvage-before-scrape` |
| R4 | `verify-operator-ready.sh` prints non-zero `DCE_CONTAINER_MEMORY` from `scrape.env` |
| R5 | Smokes assert `SKIPPED_OOM` for abort fixture and memory line in verify output |
| R6 | `run-all-smokes.sh` → 21/21 |
## Implementation Units
### U1. OOM skip classification and summary hints
**Files:** `scripts/run-discord-scrape.sh`, `scripts/tests/run-discord-scrape-smoke.sh`
### U2. Verify-operator-ready memory line
**Files:** `scripts/verify-operator-ready.sh`, `scripts/tests/verify-operator-ready-smoke.sh`
## Verification
```bash
./scripts/tests/run-discord-scrape-smoke.sh
./scripts/tests/verify-operator-ready-smoke.sh
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
```
## Scope Boundaries
### Deferred
- Live KotOR catch-up on host
- Per-target memory in config JSON

View file

@ -156,6 +156,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh \
**Plan 064 (2026-06-04):** OOM, scrape-lock, and partial-temp salvage runbooks in `.docs/Recurring-Scrape-Troubleshooting.md`; GUI bridge notes `DCE_CONTAINER_MEMORY` for yes_general. **Plan 064 (2026-06-04):** OOM, scrape-lock, and partial-temp salvage runbooks in `.docs/Recurring-Scrape-Troubleshooting.md`; GUI bridge notes `DCE_CONTAINER_MEMORY` for yes_general.
**Plan 065 (2026-06-04):** Scrape summary labels OOM skips as `SKIPPED (OOM/aborted)` with operator hint; `verify-operator-ready` prints configured container memory.
**Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom. **Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom.
## CI note (fork PRs) ## CI note (fork PRs)

View file

@ -49,9 +49,11 @@ log() {
} }
SCRAPE_SUMMARY_ENTRIES=() SCRAPE_SUMMARY_ENTRIES=()
LAST_EXPORT_SKIP_KIND=""
reset_scrape_summary() { reset_scrape_summary() {
SCRAPE_SUMMARY_ENTRIES=() SCRAPE_SUMMARY_ENTRIES=()
LAST_EXPORT_SKIP_KIND=""
} }
record_channel_result() { record_channel_result() {
@ -163,7 +165,7 @@ log_run_plan() {
print_scrape_summary() { print_scrape_summary() {
local entry target_name channel_id guild_label file_path action local entry target_name channel_id guild_label file_path action
local before_count fetched_count after_count delta appended=0 local before_count fetched_count after_count delta appended=0
local created=0 merged=0 unchanged=0 skipped=0 local created=0 merged=0 unchanged=0 skipped=0 skipped_oom=0
log '=== Scrape run summary ===' log '=== Scrape run summary ==='
@ -192,6 +194,11 @@ print_scrape_summary() {
unchanged=$((unchanged + 1)) unchanged=$((unchanged + 1))
log " UNCHANGED $file_path $after_count messages channel $channel_id $guild_label" log " UNCHANGED $file_path $after_count messages channel $channel_id $guild_label"
;; ;;
SKIPPED_OOM)
skipped=$((skipped + 1))
skipped_oom=$((skipped_oom + 1))
log " SKIPPED (OOM/aborted) channel $channel_id $guild_label (partial temp preserved under .dce-temp when present)"
;;
SKIPPED) SKIPPED)
skipped=$((skipped + 1)) skipped=$((skipped + 1))
log " SKIPPED channel $channel_id $guild_label (inaccessible or non-fatal export error)" log " SKIPPED channel $channel_id $guild_label (inaccessible or non-fatal export error)"
@ -203,6 +210,9 @@ print_scrape_summary() {
done done
log "Totals: $created created, $merged merged, $unchanged unchanged, $skipped skipped; +$appended messages appended" log "Totals: $created created, $merged merged, $unchanged unchanged, $skipped skipped; +$appended messages appended"
if (( skipped_oom > 0 )); then
log "Hint: for OOM/aborted channels, set DCE_CONTAINER_MEMORY=8g in scrape.env, run --salvage-before-scrape, then retry with --channel."
fi
} }
die() { die() {
@ -755,6 +765,8 @@ export_channel_incremental() {
local -a export_command local -a export_command
local export_log export_status=0 local export_log export_status=0
LAST_EXPORT_SKIP_KIND=""
export_command=("$CLI_BIN" export --channel "$channel_id" --format Json --output "$temp_export") export_command=("$CLI_BIN" export --channel "$channel_id" --format Json --output "$temp_export")
if [[ -n "$after_id" ]]; then if [[ -n "$after_id" ]]; then
export_command+=(--after "$after_id") export_command+=(--after "$after_id")
@ -773,6 +785,11 @@ export_channel_incremental() {
# SIGINT (130), SIGTERM (143), SIGABRT (134), SIGKILL/OOM (137), SIGSEGV (139) # SIGINT (130), SIGTERM (143), SIGABRT (134), SIGKILL/OOM (137), SIGSEGV (139)
if (( export_status == 130 || export_status == 143 || export_status == 134 || export_status == 137 || export_status == 139 )); then if (( export_status == 130 || export_status == 143 || export_status == 134 || export_status == 137 || export_status == 139 )); then
if (( export_status == 134 || export_status == 137 || export_status == 139 )); then
LAST_EXPORT_SKIP_KIND=oom
else
LAST_EXPORT_SKIP_KIND=abort
fi
log "Skipping channel $channel_id (export process aborted, exit $export_status)." log "Skipping channel $channel_id (export process aborted, exit $export_status)."
[[ -s "$export_log" ]] && cat "$export_log" >&2 [[ -s "$export_log" ]] && cat "$export_log" >&2
rm -f "$export_log" rm -f "$export_log"
@ -780,6 +797,11 @@ export_channel_incremental() {
fi fi
if is_skippable_channel_export_failure "$export_log"; then if is_skippable_channel_export_failure "$export_log"; then
if grep -qiE 'out of memory|OOM|Killed|SIGKILL|SIGABRT|Aborted \\(core dumped\\)|core dumped' "$export_log"; then
LAST_EXPORT_SKIP_KIND=oom
else
LAST_EXPORT_SKIP_KIND=access
fi
log "Skipping channel $channel_id (inaccessible or non-fatal export error)." log "Skipping channel $channel_id (inaccessible or non-fatal export error)."
cat "$export_log" >&2 cat "$export_log" >&2
rm -f "$export_log" rm -f "$export_log"
@ -1268,7 +1290,11 @@ scrape_target() {
rm -rf "$temp_dir" rm -rf "$temp_dir"
fi fi
skipped_channels=$((skipped_channels + 1)) skipped_channels=$((skipped_channels + 1))
record_channel_result "$target_name" "$channel_id" "$guild_label" "${destination_path:-n/a}" SKIPPED "$before_count" 0 "$before_count" local skip_action=SKIPPED
if [[ "${LAST_EXPORT_SKIP_KIND:-}" == oom ]]; then
skip_action=SKIPPED_OOM
fi
record_channel_result "$target_name" "$channel_id" "$guild_label" "${destination_path:-n/a}" "$skip_action" "$before_count" 0 "$before_count"
continue continue
;; ;;
*) *)

View file

@ -416,7 +416,8 @@ run_wrapper skip-abort append 2>"$SKIP_ABORT_LOG"
SKIP_ABORT_DEST="$ARCHIVE_ROOT/skip-abort/$DEFAULT_FILE_NAME" SKIP_ABORT_DEST="$ARCHIVE_ROOT/skip-abort/$DEFAULT_FILE_NAME"
[[ "$(jq -r '.messages | length' "$SKIP_ABORT_DEST")" == "3" ]] || { echo "expected skip-abort to append accessible channel" >&2; exit 1; } [[ "$(jq -r '.messages | length' "$SKIP_ABORT_DEST")" == "3" ]] || { echo "expected skip-abort to append accessible channel" >&2; exit 1; }
[[ ! -e "$ARCHIVE_ROOT/skip-abort/channels/134.json" ]] || { echo "unexpected fallback file for skipped abort channel" >&2; exit 1; } [[ ! -e "$ARCHIVE_ROOT/skip-abort/channels/134.json" ]] || { echo "unexpected fallback file for skipped abort channel" >&2; exit 1; }
grep -q 'SKIPPED.*134' "$SKIP_ABORT_LOG" || { echo "expected SKIPPED line for abort channel 134" >&2; exit 1; } grep -q 'SKIPPED (OOM/aborted).*134' "$SKIP_ABORT_LOG" || { echo "expected SKIPPED_OOM line for abort channel 134" >&2; exit 1; }
grep -q 'Hint: for OOM/aborted channels' "$SKIP_ABORT_LOG" || { echo "expected OOM operator hint in scrape summary" >&2; exit 1; }
grep -q 'Preserving partial export temp' "$SKIP_ABORT_LOG" || { echo "expected partial temp preserved on abort channel 134" >&2; exit 1; } grep -q 'Preserving partial export temp' "$SKIP_ABORT_LOG" || { echo "expected partial temp preserved on abort channel 134" >&2; exit 1; }
partial_temp_dirs=( "$ARCHIVE_ROOT/skip-abort/.dce-temp"/export.134.* ) partial_temp_dirs=( "$ARCHIVE_ROOT/skip-abort/.dce-temp"/export.134.* )
[[ -d "${partial_temp_dirs[0]}" ]] || { echo "expected partial temp dir preserved for channel 134" >&2; exit 1; } [[ -d "${partial_temp_dirs[0]}" ]] || { echo "expected partial temp dir preserved for channel 134" >&2; exit 1; }

View file

@ -37,6 +37,27 @@ JSON
printf 'DISCORD_TOKEN=dummy\n' >"$ENV_PATH" printf 'DISCORD_TOKEN=dummy\n' >"$ENV_PATH"
mem_output=$(
DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
"$VERIFY" --config "$CONFIG_PATH" 2>&1
)
grep -q 'Operator ready' <<<"$mem_output" || {
printf 'ERROR: verify-operator-ready failed\n' >&2
printf '%s\n' "$mem_output" >&2
exit 1
}
printf 'DISCORD_TOKEN=dummy\nDCE_CONTAINER_MEMORY=8g\n' >"$ENV_PATH"
mem_output=$(
DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
"$VERIFY" --config "$CONFIG_PATH" 2>&1
)
grep -q 'container memory: 8g' <<<"$mem_output" || {
printf 'ERROR: expected container memory line in verify output\n' >&2
printf '%s\n' "$mem_output" >&2
exit 1
}
cat >"$FAKE_DOCKER" <<'EOF' cat >"$FAKE_DOCKER" <<'EOF'
#!/usr/bin/env bash #!/usr/bin/env bash
if [[ "${1:-}" == "compose" && "${2:-}" == "version" ]]; then if [[ "${1:-}" == "compose" && "${2:-}" == "version" ]]; then

View file

@ -101,6 +101,21 @@ check_auth() {
die "No Discord token: set scrape.env, export DISCORD_TOKEN, or sync from GUI." die "No Discord token: set scrape.env, export DISCORD_TOKEN, or sync from GUI."
} }
print_container_memory() {
local mem=""
if [[ -f "$ENV_FILE" ]]; then
mem=$(grep -E '^[[:space:]]*DCE_CONTAINER_MEMORY=' "$ENV_FILE" 2>/dev/null | tail -1 | cut -d= -f2- | tr -d '\r' || true)
fi
if [[ -z "$mem" && -n "${DCE_CONTAINER_MEMORY:-}" ]]; then
mem="$DCE_CONTAINER_MEMORY"
fi
mem=${mem#"${mem%%[![:space:]]*}"}
mem=${mem%"${mem##*[![:space:]]}"}
[[ -n "$mem" && "$mem" != "0" ]] || return 0
printf 'container memory: %s (compose mem_limit)\n' "$mem"
}
main() { main() {
while (($#)); do while (($#)); do
case "$1" in case "$1" in
@ -143,6 +158,7 @@ main() {
require_archive_disk_space require_archive_disk_space
resolve_compose resolve_compose
check_auth check_auth
print_container_memory
printf 'config: %s\n\n' "$CONFIG_PATH" printf 'config: %s\n\n' "$CONFIG_PATH"
DCE_PRIMARY_CONFIG="$CONFIG_PATH" "$VERIFY_ARCHIVES" --config "$CONFIG_PATH" DCE_PRIMARY_CONFIG="$CONFIG_PATH" "$VERIFY_ARCHIVES" --config "$CONFIG_PATH"