mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-10 00:02:37 -06:00
feat(scrape): label OOM skips and hint container memory
Classify aborted/OOM export skips as SKIPPED (OOM/aborted) in the run summary with salvage/memory guidance; verify-operator-ready shows configured DCE_CONTAINER_MEMORY.
This commit is contained in:
parent
e9a3fea9d1
commit
a827e6b9bc
|
|
@ -0,0 +1,49 @@
|
||||||
|
---
|
||||||
|
title: "feat: OOM skip labels and operator hints in scrape summary"
|
||||||
|
type: feat
|
||||||
|
status: complete
|
||||||
|
date: 2026-06-04
|
||||||
|
origin: /lfg — plan 063–064 added DCE_CONTAINER_MEMORY docs; scrape summary still labels OOM skips as generic SKIPPED with no next-step hint
|
||||||
|
---
|
||||||
|
|
||||||
|
# feat: OOM skip labels and operator hints in scrape summary
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Distinguish OOM/aborted export skips in the scrape run summary and print a one-line operator hint when they occur. Show configured `DCE_CONTAINER_MEMORY` in `verify-operator-ready` output.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
| ID | Requirement |
|
||||||
|
|----|-------------|
|
||||||
|
| R1 | `export_channel_incremental` classifies OOM/aborted skips (exit 134/137/139) |
|
||||||
|
| R2 | Run summary uses `SKIPPED_OOM` with distinct log line vs generic `SKIPPED` |
|
||||||
|
| R3 | When any `SKIPPED_OOM`, summary footer hints `DCE_CONTAINER_MEMORY` and `--salvage-before-scrape` |
|
||||||
|
| R4 | `verify-operator-ready.sh` prints non-zero `DCE_CONTAINER_MEMORY` from `scrape.env` |
|
||||||
|
| R5 | Smokes assert `SKIPPED_OOM` for abort fixture and memory line in verify output |
|
||||||
|
| R6 | `run-all-smokes.sh` → 21/21 |
|
||||||
|
|
||||||
|
## Implementation Units
|
||||||
|
|
||||||
|
### U1. OOM skip classification and summary hints
|
||||||
|
|
||||||
|
**Files:** `scripts/run-discord-scrape.sh`, `scripts/tests/run-discord-scrape-smoke.sh`
|
||||||
|
|
||||||
|
### U2. Verify-operator-ready memory line
|
||||||
|
|
||||||
|
**Files:** `scripts/verify-operator-ready.sh`, `scripts/tests/verify-operator-ready-smoke.sh`
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/tests/run-discord-scrape-smoke.sh
|
||||||
|
./scripts/tests/verify-operator-ready-smoke.sh
|
||||||
|
DCE_MIN_FREE_MB=0 ./scripts/run-all-smokes.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scope Boundaries
|
||||||
|
|
||||||
|
### Deferred
|
||||||
|
|
||||||
|
- Live KotOR catch-up on host
|
||||||
|
- Per-target memory in config JSON
|
||||||
|
|
@ -156,6 +156,8 @@ DCE_MIN_FREE_MB=0 ./scripts/run-operator-validation.sh \
|
||||||
|
|
||||||
**Plan 064 (2026-06-04):** OOM, scrape-lock, and partial-temp salvage runbooks in `.docs/Recurring-Scrape-Troubleshooting.md`; GUI bridge notes `DCE_CONTAINER_MEMORY` for yes_general.
|
**Plan 064 (2026-06-04):** OOM, scrape-lock, and partial-temp salvage runbooks in `.docs/Recurring-Scrape-Troubleshooting.md`; GUI bridge notes `DCE_CONTAINER_MEMORY` for yes_general.
|
||||||
|
|
||||||
|
**Plan 065 (2026-06-04):** Scrape summary labels OOM skips as `SKIPPED (OOM/aborted)` with operator hint; `verify-operator-ready` prints configured container memory.
|
||||||
|
|
||||||
**Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom.
|
**Disk:** ~65 GiB free on `/home` (2026-05-30); large channel merges still need headroom.
|
||||||
|
|
||||||
## CI note (fork PRs)
|
## CI note (fork PRs)
|
||||||
|
|
|
||||||
|
|
@ -49,9 +49,11 @@ log() {
|
||||||
}
|
}
|
||||||
|
|
||||||
SCRAPE_SUMMARY_ENTRIES=()
|
SCRAPE_SUMMARY_ENTRIES=()
|
||||||
|
LAST_EXPORT_SKIP_KIND=""
|
||||||
|
|
||||||
reset_scrape_summary() {
|
reset_scrape_summary() {
|
||||||
SCRAPE_SUMMARY_ENTRIES=()
|
SCRAPE_SUMMARY_ENTRIES=()
|
||||||
|
LAST_EXPORT_SKIP_KIND=""
|
||||||
}
|
}
|
||||||
|
|
||||||
record_channel_result() {
|
record_channel_result() {
|
||||||
|
|
@ -163,7 +165,7 @@ log_run_plan() {
|
||||||
print_scrape_summary() {
|
print_scrape_summary() {
|
||||||
local entry target_name channel_id guild_label file_path action
|
local entry target_name channel_id guild_label file_path action
|
||||||
local before_count fetched_count after_count delta appended=0
|
local before_count fetched_count after_count delta appended=0
|
||||||
local created=0 merged=0 unchanged=0 skipped=0
|
local created=0 merged=0 unchanged=0 skipped=0 skipped_oom=0
|
||||||
|
|
||||||
log '=== Scrape run summary ==='
|
log '=== Scrape run summary ==='
|
||||||
|
|
||||||
|
|
@ -192,6 +194,11 @@ print_scrape_summary() {
|
||||||
unchanged=$((unchanged + 1))
|
unchanged=$((unchanged + 1))
|
||||||
log " UNCHANGED $file_path $after_count messages channel $channel_id $guild_label"
|
log " UNCHANGED $file_path $after_count messages channel $channel_id $guild_label"
|
||||||
;;
|
;;
|
||||||
|
SKIPPED_OOM)
|
||||||
|
skipped=$((skipped + 1))
|
||||||
|
skipped_oom=$((skipped_oom + 1))
|
||||||
|
log " SKIPPED (OOM/aborted) channel $channel_id $guild_label (partial temp preserved under .dce-temp when present)"
|
||||||
|
;;
|
||||||
SKIPPED)
|
SKIPPED)
|
||||||
skipped=$((skipped + 1))
|
skipped=$((skipped + 1))
|
||||||
log " SKIPPED channel $channel_id $guild_label (inaccessible or non-fatal export error)"
|
log " SKIPPED channel $channel_id $guild_label (inaccessible or non-fatal export error)"
|
||||||
|
|
@ -203,6 +210,9 @@ print_scrape_summary() {
|
||||||
done
|
done
|
||||||
|
|
||||||
log "Totals: $created created, $merged merged, $unchanged unchanged, $skipped skipped; +$appended messages appended"
|
log "Totals: $created created, $merged merged, $unchanged unchanged, $skipped skipped; +$appended messages appended"
|
||||||
|
if (( skipped_oom > 0 )); then
|
||||||
|
log "Hint: for OOM/aborted channels, set DCE_CONTAINER_MEMORY=8g in scrape.env, run --salvage-before-scrape, then retry with --channel."
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
die() {
|
die() {
|
||||||
|
|
@ -755,6 +765,8 @@ export_channel_incremental() {
|
||||||
local -a export_command
|
local -a export_command
|
||||||
local export_log export_status=0
|
local export_log export_status=0
|
||||||
|
|
||||||
|
LAST_EXPORT_SKIP_KIND=""
|
||||||
|
|
||||||
export_command=("$CLI_BIN" export --channel "$channel_id" --format Json --output "$temp_export")
|
export_command=("$CLI_BIN" export --channel "$channel_id" --format Json --output "$temp_export")
|
||||||
if [[ -n "$after_id" ]]; then
|
if [[ -n "$after_id" ]]; then
|
||||||
export_command+=(--after "$after_id")
|
export_command+=(--after "$after_id")
|
||||||
|
|
@ -773,6 +785,11 @@ export_channel_incremental() {
|
||||||
|
|
||||||
# SIGINT (130), SIGTERM (143), SIGABRT (134), SIGKILL/OOM (137), SIGSEGV (139)
|
# SIGINT (130), SIGTERM (143), SIGABRT (134), SIGKILL/OOM (137), SIGSEGV (139)
|
||||||
if (( export_status == 130 || export_status == 143 || export_status == 134 || export_status == 137 || export_status == 139 )); then
|
if (( export_status == 130 || export_status == 143 || export_status == 134 || export_status == 137 || export_status == 139 )); then
|
||||||
|
if (( export_status == 134 || export_status == 137 || export_status == 139 )); then
|
||||||
|
LAST_EXPORT_SKIP_KIND=oom
|
||||||
|
else
|
||||||
|
LAST_EXPORT_SKIP_KIND=abort
|
||||||
|
fi
|
||||||
log "Skipping channel $channel_id (export process aborted, exit $export_status)."
|
log "Skipping channel $channel_id (export process aborted, exit $export_status)."
|
||||||
[[ -s "$export_log" ]] && cat "$export_log" >&2
|
[[ -s "$export_log" ]] && cat "$export_log" >&2
|
||||||
rm -f "$export_log"
|
rm -f "$export_log"
|
||||||
|
|
@ -780,6 +797,11 @@ export_channel_incremental() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if is_skippable_channel_export_failure "$export_log"; then
|
if is_skippable_channel_export_failure "$export_log"; then
|
||||||
|
if grep -qiE 'out of memory|OOM|Killed|SIGKILL|SIGABRT|Aborted \\(core dumped\\)|core dumped' "$export_log"; then
|
||||||
|
LAST_EXPORT_SKIP_KIND=oom
|
||||||
|
else
|
||||||
|
LAST_EXPORT_SKIP_KIND=access
|
||||||
|
fi
|
||||||
log "Skipping channel $channel_id (inaccessible or non-fatal export error)."
|
log "Skipping channel $channel_id (inaccessible or non-fatal export error)."
|
||||||
cat "$export_log" >&2
|
cat "$export_log" >&2
|
||||||
rm -f "$export_log"
|
rm -f "$export_log"
|
||||||
|
|
@ -1268,7 +1290,11 @@ scrape_target() {
|
||||||
rm -rf "$temp_dir"
|
rm -rf "$temp_dir"
|
||||||
fi
|
fi
|
||||||
skipped_channels=$((skipped_channels + 1))
|
skipped_channels=$((skipped_channels + 1))
|
||||||
record_channel_result "$target_name" "$channel_id" "$guild_label" "${destination_path:-n/a}" SKIPPED "$before_count" 0 "$before_count"
|
local skip_action=SKIPPED
|
||||||
|
if [[ "${LAST_EXPORT_SKIP_KIND:-}" == oom ]]; then
|
||||||
|
skip_action=SKIPPED_OOM
|
||||||
|
fi
|
||||||
|
record_channel_result "$target_name" "$channel_id" "$guild_label" "${destination_path:-n/a}" "$skip_action" "$before_count" 0 "$before_count"
|
||||||
continue
|
continue
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
|
|
||||||
|
|
@ -416,7 +416,8 @@ run_wrapper skip-abort append 2>"$SKIP_ABORT_LOG"
|
||||||
SKIP_ABORT_DEST="$ARCHIVE_ROOT/skip-abort/$DEFAULT_FILE_NAME"
|
SKIP_ABORT_DEST="$ARCHIVE_ROOT/skip-abort/$DEFAULT_FILE_NAME"
|
||||||
[[ "$(jq -r '.messages | length' "$SKIP_ABORT_DEST")" == "3" ]] || { echo "expected skip-abort to append accessible channel" >&2; exit 1; }
|
[[ "$(jq -r '.messages | length' "$SKIP_ABORT_DEST")" == "3" ]] || { echo "expected skip-abort to append accessible channel" >&2; exit 1; }
|
||||||
[[ ! -e "$ARCHIVE_ROOT/skip-abort/channels/134.json" ]] || { echo "unexpected fallback file for skipped abort channel" >&2; exit 1; }
|
[[ ! -e "$ARCHIVE_ROOT/skip-abort/channels/134.json" ]] || { echo "unexpected fallback file for skipped abort channel" >&2; exit 1; }
|
||||||
grep -q 'SKIPPED.*134' "$SKIP_ABORT_LOG" || { echo "expected SKIPPED line for abort channel 134" >&2; exit 1; }
|
grep -q 'SKIPPED (OOM/aborted).*134' "$SKIP_ABORT_LOG" || { echo "expected SKIPPED_OOM line for abort channel 134" >&2; exit 1; }
|
||||||
|
grep -q 'Hint: for OOM/aborted channels' "$SKIP_ABORT_LOG" || { echo "expected OOM operator hint in scrape summary" >&2; exit 1; }
|
||||||
grep -q 'Preserving partial export temp' "$SKIP_ABORT_LOG" || { echo "expected partial temp preserved on abort channel 134" >&2; exit 1; }
|
grep -q 'Preserving partial export temp' "$SKIP_ABORT_LOG" || { echo "expected partial temp preserved on abort channel 134" >&2; exit 1; }
|
||||||
partial_temp_dirs=( "$ARCHIVE_ROOT/skip-abort/.dce-temp"/export.134.* )
|
partial_temp_dirs=( "$ARCHIVE_ROOT/skip-abort/.dce-temp"/export.134.* )
|
||||||
[[ -d "${partial_temp_dirs[0]}" ]] || { echo "expected partial temp dir preserved for channel 134" >&2; exit 1; }
|
[[ -d "${partial_temp_dirs[0]}" ]] || { echo "expected partial temp dir preserved for channel 134" >&2; exit 1; }
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,27 @@ JSON
|
||||||
|
|
||||||
printf 'DISCORD_TOKEN=dummy\n' >"$ENV_PATH"
|
printf 'DISCORD_TOKEN=dummy\n' >"$ENV_PATH"
|
||||||
|
|
||||||
|
mem_output=$(
|
||||||
|
DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
|
||||||
|
"$VERIFY" --config "$CONFIG_PATH" 2>&1
|
||||||
|
)
|
||||||
|
grep -q 'Operator ready' <<<"$mem_output" || {
|
||||||
|
printf 'ERROR: verify-operator-ready failed\n' >&2
|
||||||
|
printf '%s\n' "$mem_output" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
printf 'DISCORD_TOKEN=dummy\nDCE_CONTAINER_MEMORY=8g\n' >"$ENV_PATH"
|
||||||
|
mem_output=$(
|
||||||
|
DCE_MIN_FREE_MB=0 DCE_REPO_ROOT="$REPO_ROOT" DCE_CONFIG_FILE="$CONFIG_PATH" DCE_ENV_FILE="$ENV_PATH" \
|
||||||
|
"$VERIFY" --config "$CONFIG_PATH" 2>&1
|
||||||
|
)
|
||||||
|
grep -q 'container memory: 8g' <<<"$mem_output" || {
|
||||||
|
printf 'ERROR: expected container memory line in verify output\n' >&2
|
||||||
|
printf '%s\n' "$mem_output" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
cat >"$FAKE_DOCKER" <<'EOF'
|
cat >"$FAKE_DOCKER" <<'EOF'
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
if [[ "${1:-}" == "compose" && "${2:-}" == "version" ]]; then
|
if [[ "${1:-}" == "compose" && "${2:-}" == "version" ]]; then
|
||||||
|
|
|
||||||
|
|
@ -101,6 +101,21 @@ check_auth() {
|
||||||
die "No Discord token: set scrape.env, export DISCORD_TOKEN, or sync from GUI."
|
die "No Discord token: set scrape.env, export DISCORD_TOKEN, or sync from GUI."
|
||||||
}
|
}
|
||||||
|
|
||||||
|
print_container_memory() {
|
||||||
|
local mem=""
|
||||||
|
|
||||||
|
if [[ -f "$ENV_FILE" ]]; then
|
||||||
|
mem=$(grep -E '^[[:space:]]*DCE_CONTAINER_MEMORY=' "$ENV_FILE" 2>/dev/null | tail -1 | cut -d= -f2- | tr -d '\r' || true)
|
||||||
|
fi
|
||||||
|
if [[ -z "$mem" && -n "${DCE_CONTAINER_MEMORY:-}" ]]; then
|
||||||
|
mem="$DCE_CONTAINER_MEMORY"
|
||||||
|
fi
|
||||||
|
mem=${mem#"${mem%%[![:space:]]*}"}
|
||||||
|
mem=${mem%"${mem##*[![:space:]]}"}
|
||||||
|
[[ -n "$mem" && "$mem" != "0" ]] || return 0
|
||||||
|
printf 'container memory: %s (compose mem_limit)\n' "$mem"
|
||||||
|
}
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
while (($#)); do
|
while (($#)); do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
|
|
@ -143,6 +158,7 @@ main() {
|
||||||
require_archive_disk_space
|
require_archive_disk_space
|
||||||
resolve_compose
|
resolve_compose
|
||||||
check_auth
|
check_auth
|
||||||
|
print_container_memory
|
||||||
printf 'config: %s\n\n' "$CONFIG_PATH"
|
printf 'config: %s\n\n' "$CONFIG_PATH"
|
||||||
|
|
||||||
DCE_PRIMARY_CONFIG="$CONFIG_PATH" "$VERIFY_ARCHIVES" --config "$CONFIG_PATH"
|
DCE_PRIMARY_CONFIG="$CONFIG_PATH" "$VERIFY_ARCHIVES" --config "$CONFIG_PATH"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue