#!/usr/bin/env bash set -Eeuo pipefail SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" # shellcheck source=lib/scrape-run-plan.sh source "$SCRIPT_DIR/lib/scrape-run-plan.sh" # shellcheck source=lib/scrape-lock.sh source "$SCRIPT_DIR/lib/scrape-lock.sh" COMPOSE_FILE="${DCE_COMPOSE_FILE:-$REPO_ROOT/docker-compose.yml}" ENV_FILE="${DCE_ENV_FILE:-$REPO_ROOT/scrape.env}" DOCKER_BIN="${DCE_DOCKER_BIN:-docker}" COMPOSE_BIN="${DCE_COMPOSE_BIN:-}" DOCKER_BIN_OVERRIDDEN=0 REAUTH_COMMAND="" COMPOSE_ENV_FILE="" COMPOSE_ENV_TEMP="" SCRAPE_LOCK_FILE="" SCRAPE_LOCK_FD="" VERIFY_READY="$REPO_ROOT/scripts/verify-operator-ready.sh" if [[ -n "${DCE_DOCKER_BIN:-}" ]]; then DOCKER_BIN_OVERRIDDEN=1 fi usage() { cat <&2 exit 1 } require_program() { command -v "$1" >/dev/null 2>&1 || die "Required command '$1' is missing." } cleanup_compose_env() { if [[ -n "$COMPOSE_ENV_TEMP" && -f "$COMPOSE_ENV_TEMP" ]]; then rm -f "$COMPOSE_ENV_TEMP" fi } ensure_repo_logs_dir() { mkdir -p "$REPO_ROOT/logs" } map_summary_file_for_container() { local host_path=$1 local logs_dir="$REPO_ROOT/logs" [[ -n "$host_path" ]] || return 0 case "$host_path" in /logs/*) printf '%s\n' "$host_path" ;; "$logs_dir"/*) printf '/logs/%s\n' "$(basename "$host_path")" ;; *) printf '%s\n' "$host_path" ;; esac } ensure_summary_file_host_dir() { local host_path=${1:-${DCE_RUN_SUMMARY_FILE:-}} [[ -n "$host_path" ]] || return 0 case "$host_path" in /logs/*) host_path="$REPO_ROOT/logs/$(basename "$host_path")" ;; esac mkdir -p "$(dirname "$host_path")" } write_scrape_lock_meta() { local meta_file meta_file=$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE") printf 'pid=%s\nstarted=%s\ncmd=%s\n' \ "$$" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$(ps -o args= -p $$ 2>/dev/null | head -c 500 || echo unknown)" >"$meta_file" } remove_scrape_lock_meta() { rm -f "$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE")" } try_reclaim_stale_scrape_lock() { local meta_file pid meta_file=$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE") pid=$(read_scrape_lock_meta_field "$meta_file" pid) if scrape_lock_try_reclaim_meta "$meta_file"; then printf 'WARN: reclaiming scrape lock; previous holder pid %s is not running.\n' "$pid" >&2 return 0 fi return 1 } acquire_scrape_lock() { local config_path=${1:-} if [[ "${DCE_SKIP_SCRAPE_LOCK:-0}" == "1" ]]; then return 0 fi command -v flock >/dev/null 2>&1 || return 0 [[ -n "$config_path" ]] || config_path="$REPO_ROOT/config/scrape-targets.json" SCRAPE_LOCK_FILE=$(resolve_scrape_lock_file "$config_path" "$REPO_ROOT") mkdir -p "$(dirname "$SCRAPE_LOCK_FILE")" exec {SCRAPE_LOCK_FD}>>"$SCRAPE_LOCK_FILE" if ! flock -n "$SCRAPE_LOCK_FD"; then if try_reclaim_stale_scrape_lock && flock -n "$SCRAPE_LOCK_FD"; then write_scrape_lock_meta return 0 fi local holder_msg="" holder_msg=$(scrape_lock_format_holder_summary "$(scrape_lock_meta_path "$SCRAPE_LOCK_FILE")") || true if [[ -n "$holder_msg" ]]; then die "Another scrape is already running (lock: $SCRAPE_LOCK_FILE). $holder_msg" fi die "Another scrape is already running (lock: $SCRAPE_LOCK_FILE). Wait for it to finish or confirm no scrape is active before removing the lock." fi write_scrape_lock_meta } release_scrape_lock() { if [[ -z "${SCRAPE_LOCK_FD:-}" ]]; then return 0 fi flock -u "$SCRAPE_LOCK_FD" 2>/dev/null || true exec {SCRAPE_LOCK_FD}>&- SCRAPE_LOCK_FD="" remove_scrape_lock_meta } cleanup_on_exit() { release_scrape_lock cleanup_compose_env } load_env_file() { [[ -f "$ENV_FILE" ]] || die "Missing env file: $ENV_FILE" local raw_line line key value while IFS= read -r raw_line || [[ -n "$raw_line" ]]; do line=$(printf '%s' "$raw_line" | sed -E 's/^[[:space:]]+//; s/[[:space:]]+$//') [[ -n "$line" ]] || continue if [[ "$line" == \#* ]]; then continue fi if [[ "$line" == export\ * ]]; then line=${line#export } line=$(printf '%s' "$line" | sed -E 's/^[[:space:]]+//') fi [[ "$line" =~ ^[A-Za-z_][A-Za-z0-9_]*= ]] || die "Invalid env assignment in $ENV_FILE: $raw_line" key=${line%%=*} value=${line#*=} if [[ "$value" =~ ^\".*\"$ ]]; then value=${value:1:${#value}-2} elif [[ "$value" =~ ^\'.*\'$ ]]; then value=${value:1:${#value}-2} fi printf -v "$key" '%s' "$value" export "$key" done <"$ENV_FILE" } write_compose_env_temp() { COMPOSE_ENV_TEMP=$(mktemp "${TMPDIR:-/tmp}/dce-compose-env.XXXXXX") COMPOSE_ENV_FILE="$COMPOSE_ENV_TEMP" if [[ -n "${DISCORD_TOKEN:-}" ]]; then printf 'DISCORD_TOKEN=%s\n' "$DISCORD_TOKEN" >"$COMPOSE_ENV_TEMP" else : >"$COMPOSE_ENV_TEMP" fi if [[ -n "${DISCORD_TOKEN_FILE:-}" ]]; then printf 'DISCORD_TOKEN_FILE=%s\n' "$DISCORD_TOKEN_FILE" >>"$COMPOSE_ENV_TEMP" fi if [[ -n "${DCE_REAUTH_COMMAND:-}" ]]; then printf 'DCE_REAUTH_COMMAND=%s\n' "$DCE_REAUTH_COMMAND" >>"$COMPOSE_ENV_TEMP" fi if [[ -n "${DCE_USERNS_MODE:-}" ]]; then printf 'DCE_USERNS_MODE=%s\n' "$DCE_USERNS_MODE" >>"$COMPOSE_ENV_TEMP" fi if [[ -n "${DCE_UID:-}" ]]; then printf 'DCE_UID=%s\n' "$DCE_UID" >>"$COMPOSE_ENV_TEMP" fi if [[ -n "${DCE_GID:-}" ]]; then printf 'DCE_GID=%s\n' "$DCE_GID" >>"$COMPOSE_ENV_TEMP" fi if [[ -n "${DCE_CONTAINER_MEMORY:-}" ]]; then printf 'DCE_CONTAINER_MEMORY=%s\n' "$DCE_CONTAINER_MEMORY" >>"$COMPOSE_ENV_TEMP" else printf 'DCE_CONTAINER_MEMORY=0\n' >>"$COMPOSE_ENV_TEMP" fi if [[ "${DCE_RUN_SUMMARY_JSON:-0}" == "1" ]]; then printf 'DCE_RUN_SUMMARY_JSON=1\n' >>"$COMPOSE_ENV_TEMP" fi if [[ -n "${DCE_RUN_SUMMARY_FILE:-}" ]]; then local container_summary_file container_summary_file=$(map_summary_file_for_container "$DCE_RUN_SUMMARY_FILE") printf 'DCE_RUN_SUMMARY_FILE=%s\n' "$container_summary_file" >>"$COMPOSE_ENV_TEMP" fi } configure_rootless_compose() { if [[ -n "${DCE_USERNS_MODE:-}" ]]; then return 0 fi if [[ "$DOCKER_BIN" == *podman* ]] || podman info >/dev/null 2>&1; then export DCE_USERNS_MODE=keep-id fi } prepare_compose_env() { if [[ -f "$ENV_FILE" ]]; then load_env_file if [[ -n "${DISCORD_TOKEN_FILE:-}" && -f "${DISCORD_TOKEN_FILE}" ]]; then load_token_from_file || true elif [[ -z "${DISCORD_TOKEN:-}" ]]; then discover_token_file || true load_token_from_file || true load_token_from_discover_script || true fi write_compose_env_temp configure_rootless_compose return 0 fi if [[ -z "${DISCORD_TOKEN:-}" ]]; then discover_token_file || true load_token_from_file || true load_token_from_discover_script || true fi if [[ -n "${DISCORD_TOKEN:-}" || -n "${DISCORD_TOKEN_FILE:-}" ]]; then write_compose_env_temp configure_rootless_compose return 0 fi die "Missing env file: $ENV_FILE (copy scrape.env.example to scrape.env), export DISCORD_TOKEN / DISCORD_TOKEN_FILE, or place a token at $REPO_ROOT/.discord-token or ~/.config/discord-scrape/token." } load_token_from_file() { local token_file=${DISCORD_TOKEN_FILE:-} [[ -n "$token_file" ]] || return 1 [[ -f "$token_file" ]] || die "DISCORD_TOKEN_FILE does not exist: $token_file" local token_value token_value=$(head -n 1 "$token_file" | tr -d '\r') [[ -n "$token_value" ]] || die "DISCORD_TOKEN_FILE is empty: $token_file" export DISCORD_TOKEN="$token_value" return 0 } discover_token_file() { local candidate if [[ -n "${DISCORD_TOKEN_FILE:-}" && -f "${DISCORD_TOKEN_FILE}" ]]; then return 0 fi for candidate in \ "$REPO_ROOT/.discord-token" \ "$HOME/.config/discord-scrape/token" \ "$HOME/.config/discord-token"; do if [[ -f "$candidate" ]]; then export DISCORD_TOKEN_FILE="$candidate" return 0 fi done return 1 } load_token_from_discover_script() { local discover_script="$REPO_ROOT/scripts/discover-discord-token.sh" local token_value [[ -x "$discover_script" ]] || return 1 token_value=$("$discover_script" 2>/dev/null) || return 1 [[ -n "$token_value" ]] || return 1 export DISCORD_TOKEN="$token_value" return 0 } ensure_token_present() { if [[ -n "${DISCORD_TOKEN_FILE:-}" && -f "${DISCORD_TOKEN_FILE}" ]]; then load_token_from_file || true elif [[ -z "${DISCORD_TOKEN:-}" ]]; then discover_token_file || true load_token_from_file || true fi if [[ -z "${DISCORD_TOKEN:-}" ]]; then load_token_from_discover_script || true fi [[ -n "${DISCORD_TOKEN:-}" ]] || die "DISCORD_TOKEN is not set. Set DISCORD_TOKEN or DISCORD_TOKEN_FILE in $ENV_FILE, export it in the shell, place a token at $REPO_ROOT/.discord-token or ~/.config/discord-scrape/token, or sign in via DiscordChatExporter GUI / Discord desktop on this machine." } resolve_compose_bin() { if [[ -n "${DCE_COMPOSE_BIN:-}" ]]; then COMPOSE_BIN=$DCE_COMPOSE_BIN return 0 fi # Smoke tests inject DCE_DOCKER_BIN with a fake compose shim; never route those through podman-compose. if (( DOCKER_BIN_OVERRIDDEN == 1 )); then COMPOSE_BIN="" return 0 fi if command -v podman-compose >/dev/null 2>&1 && podman info >/dev/null 2>&1; then COMPOSE_BIN=podman-compose return 0 fi COMPOSE_BIN="" } compose_tty_flag() { if [[ "${DCE_COMPOSE_TTY:-1}" == "0" ]]; then printf '%s' '-T' fi } compose_run_args() { local -n _out=$1 local subcommand=$2 local tty_flag shift 2 tty_flag=$(compose_tty_flag) resolve_compose_bin _out=() if [[ -n "$COMPOSE_BIN" ]]; then _out=( "$COMPOSE_BIN" --env-file "$COMPOSE_ENV_FILE" -f "$COMPOSE_FILE" run ) [[ -n "$tty_flag" ]] && _out+=("$tty_flag") _out+=( --rm discord-scraper "$subcommand" ) elif (( DOCKER_BIN_OVERRIDDEN == 0 )) && command -v docker-compose >/dev/null 2>&1; then _out=( docker-compose --env-file "$COMPOSE_ENV_FILE" -f "$COMPOSE_FILE" run ) [[ -n "$tty_flag" ]] && _out+=("$tty_flag") _out+=( --rm discord-scraper "$subcommand" ) else _out=( "$DOCKER_BIN" compose --env-file "$COMPOSE_ENV_FILE" -f "$COMPOSE_FILE" run ) [[ -n "$tty_flag" ]] && _out+=("$tty_flag") _out+=( --rm discord-scraper "$subcommand" ) fi _out+=("$@") } resolve_reauth_command() { local candidate=$1 local resolved_dir resolved_path [[ -n "$candidate" ]] || return 1 [[ "$candidate" == /* ]] || die "DCE_REAUTH_COMMAND must be an absolute path to an executable script under the repository." resolved_dir=$(cd "$(dirname "$candidate")" && pwd -P) resolved_path="$resolved_dir/$(basename "$candidate")" [[ -f "$resolved_path" ]] || die "DCE_REAUTH_COMMAND does not exist: $candidate" [[ -x "$resolved_path" ]] || die "DCE_REAUTH_COMMAND is not executable: $candidate" case "$resolved_path" in "$REPO_ROOT"/*) ;; *) die "DCE_REAUTH_COMMAND must be a script inside the repository root." ;; esac printf '%s\n' "$resolved_path" } run_local_salvage() { local host_config=$1 shift local -a local_args=() skip_next=0 arg for arg in "$@"; do if (( skip_next )); then skip_next=0 continue fi if [[ "$arg" == "--config" ]]; then skip_next=1 continue fi local_args+=("$arg") done DCE_PRIMARY_CONFIG="$host_config" \ DCE_FALLBACK_CONFIG="$host_config" \ "$SCRIPT_DIR/run-discord-scrape.sh" salvage --config "$host_config" "${local_args[@]}" } resolve_host_config_path() { local -a args=("$@") local i=0 cfg="$REPO_ROOT/config/scrape-targets.json" while (( i < ${#args[@]} )); do if [[ "${args[i]}" == "--config" ]]; then cfg="${args[i + 1]:-}" case "$cfg" in /config/*) cfg="$REPO_ROOT/config/scrape-targets.json" ;; ./*) cfg="$REPO_ROOT/${cfg#./}" ;; /*) ;; *) cfg="$REPO_ROOT/$cfg" ;; esac break fi i=$((i + 1)) done printf '%s\n' "$cfg" } run_disk_preflight_if_enabled() { local -a args=("$@") local cfg if [[ "${DCE_SKIP_DISK_CHECK:-0}" == 1 ]]; then return 0 fi if [[ ! -x "$VERIFY_READY" ]]; then return 0 fi cfg=$(resolve_host_config_path "${args[@]}") "$VERIFY_READY" --disk-only --config "$cfg" } is_discord_auth_failure() { local output_file=$1 grep -Eqi \ "Authentication token is invalid|Request to 'channels/.+' failed: forbidden|failed authenticated preflight|401|403" \ "$output_file" } try_interactive_reauth() { local reauth_script [[ -n "$REAUTH_COMMAND" ]] || return 1 [[ -t 0 && -t 1 ]] || return 1 reauth_script=$(resolve_reauth_command "$REAUTH_COMMAND") printf 'Auth failed; running DCE_REAUTH_COMMAND...\n' >&2 "$reauth_script" } run_subcommand_with_retry() { local subcommand=$1 shift local -a run_args=() local output_file ensure_token_present output_file=$(mktemp "${TMPDIR:-/tmp}/dce-host-run.XXXXXX.log") compose_run_args run_args "$subcommand" "$@" if "${run_args[@]}" 2>&1 | tee "$output_file"; then rm -f "$output_file" return 0 fi if ! is_discord_auth_failure "$output_file"; then rm -f "$output_file" die "Container run failed for '$subcommand' with a non-auth error." fi printf 'Detected Discord auth failure. Refreshing token and retrying once...\n' >&2 load_token_from_file || true if [[ -z "${DISCORD_TOKEN:-}" ]]; then load_token_from_discover_script || true fi rm -f "$COMPOSE_ENV_TEMP" COMPOSE_ENV_TEMP="" write_compose_env_temp COMPOSE_ENV_FILE="$COMPOSE_ENV_TEMP" try_interactive_reauth || true ensure_token_present compose_run_args run_args "$subcommand" "$@" if "${run_args[@]}" 2>&1 | tee "$output_file"; then rm -f "$output_file" return 0 fi rm -f "$output_file" die "Container run failed for '$subcommand' after one auth refresh retry." } collect_passthrough_targets() { local -n _targets_out=$1 shift local -a args=("$@") local idx=0 _targets_out=() while (( idx < ${#args[@]} )); do if [[ "${args[idx]}" == "--target" ]]; then _targets_out+=("${args[idx + 1]:-}") idx=$((idx + 2)) continue fi idx=$((idx + 1)) done } main() { local -a passthrough_args=() local subcommand="" trap cleanup_on_exit EXIT while (($#)); do case "$1" in --env-file) [[ $# -ge 2 ]] || die "Missing value for --env-file." ENV_FILE=$2 shift 2 ;; --compose-file) [[ $# -ge 2 ]] || die "Missing value for --compose-file." COMPOSE_FILE=$2 shift 2 ;; --help|-h) usage exit 0 ;; preflight|scrape|salvage) if [[ -n "$subcommand" ]]; then passthrough_args+=("$1") else subcommand=$1 fi shift ;; *) if [[ -z "$subcommand" ]]; then die "Unsupported subcommand '$1'. Use 'preflight', 'scrape', or 'salvage'." fi passthrough_args+=("$1") shift ;; esac done [[ -n "$subcommand" ]] || { usage exit 1 } require_program grep if [[ -n "$COMPOSE_BIN" ]]; then require_program "$COMPOSE_BIN" elif (( DOCKER_BIN_OVERRIDDEN == 0 )) && command -v docker-compose >/dev/null 2>&1; then : else require_program "$DOCKER_BIN" fi [[ -f "$COMPOSE_FILE" ]] || die "Missing compose file: $COMPOSE_FILE" local host_config host_targets=() host_config=$(resolve_host_config_path "${passthrough_args[@]}") collect_passthrough_targets host_targets "${passthrough_args[@]}" if ((${#host_targets[@]} == 1)); then apply_single_target_container_memory "$host_config" "${host_targets[0]}" fi if [[ "$subcommand" == "scrape" ]]; then ensure_repo_logs_dir ensure_summary_file_host_dir fi if [[ "$subcommand" != "salvage" ]]; then prepare_compose_env fi REAUTH_COMMAND="${DCE_REAUTH_COMMAND:-}" run_disk_preflight_if_enabled "${passthrough_args[@]}" print_scrape_config_plan "$host_config" "Host $subcommand" "${host_targets[@]}" case "$subcommand" in preflight) run_subcommand_with_retry "$subcommand" "${passthrough_args[@]}" ;; scrape) acquire_scrape_lock "$host_config" run_subcommand_with_retry "$subcommand" "${passthrough_args[@]}" ;; salvage) acquire_scrape_lock "$host_config" run_local_salvage "$host_config" "${passthrough_args[@]}" ;; esac } main "$@"