mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-06-09 15:52:37 -06:00
podman-compose and docker compose allocate a pseudo-TTY by default; always passing -T block-buffered export progress. Omit -T for operator runs and set DCE_COMPOSE_TTY=0 only for cron log append. Adds compose TTY smokes and cron job env assertion.
370 lines
9.9 KiB
Bash
Executable file
370 lines
9.9 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
|
|
set -Eeuo pipefail
|
|
|
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)
|
|
REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}"
|
|
COMPOSE_FILE="${DCE_COMPOSE_FILE:-$REPO_ROOT/docker-compose.yml}"
|
|
ENV_FILE="${DCE_ENV_FILE:-$REPO_ROOT/scrape.env}"
|
|
HOST_RUNNER="${DCE_HOST_RUNNER:-$REPO_ROOT/scripts/run-discord-scrape-host.sh}"
|
|
CONFIG_FILE="${DCE_CONFIG_FILE:-$REPO_ROOT/config/scrape-targets.json}"
|
|
LOG_FILE="${DCE_LOG_FILE:-$REPO_ROOT/logs/discord-scrape.log}"
|
|
JOB_NAME="discord-scrape"
|
|
INTERVAL="monthly"
|
|
RUN_AT="04:00"
|
|
CRON_EXPRESSION=""
|
|
DRY_RUN=0
|
|
REMOVE=0
|
|
SKIP_PREFLIGHT=0
|
|
|
|
TARGETS=()
|
|
GUILDS=()
|
|
CHANNELS=()
|
|
|
|
JQ_BIN="${DCE_JQ_BIN:-jq}"
|
|
CRONTAB_BIN="${DCE_CRONTAB_BIN:-crontab}"
|
|
DOCKER_BIN="${DCE_DOCKER_BIN:-docker}"
|
|
COMPOSE_BIN="${DCE_COMPOSE_BIN:-}"
|
|
DOCKER_BIN_OVERRIDDEN=0
|
|
|
|
if [[ -n "${DCE_DOCKER_BIN:-}" ]]; then
|
|
DOCKER_BIN_OVERRIDDEN=1
|
|
fi
|
|
|
|
usage() {
|
|
cat <<EOF
|
|
Usage:
|
|
$(basename "$0") [options]
|
|
|
|
Options:
|
|
--target NAME Restrict the cron job to one configured target. Repeatable.
|
|
--guild ID Narrow a selected target to one of its allowed guild IDs. Repeatable.
|
|
--channel ID Narrow a selected target to one of its allowed channel IDs. Repeatable.
|
|
--interval VALUE monthly, weekly, or daily. Default: monthly
|
|
--at HH:MM Run time in 24-hour format. Default: 04:00
|
|
--cron EXPR Use an explicit five-field cron expression instead of --interval/--at.
|
|
--job-name NAME Marker name for the installed cron block. Default: discord-scrape
|
|
--log-file PATH Cron log file. Default: $LOG_FILE
|
|
--config PATH Scrape targets JSON. Default: $CONFIG_FILE
|
|
--env-file PATH Compose env file. Default: $ENV_FILE
|
|
--skip-preflight Install the cron job without running the authenticated container preflight.
|
|
--dry-run Print the cron block instead of installing it.
|
|
--remove Remove the managed cron block and exit.
|
|
--help Show this help text.
|
|
|
|
Examples:
|
|
$(basename "$0")
|
|
$(basename "$0") --target discord_dms --interval weekly --at 02:30
|
|
$(basename "$0") --target Cline --channel 123456789012345678 --channel 234567890123456789
|
|
EOF
|
|
}
|
|
|
|
die() {
|
|
printf 'ERROR: %s\n' "$*" >&2
|
|
exit 1
|
|
}
|
|
|
|
require_program() {
|
|
local program_path=$1
|
|
command -v "$program_path" >/dev/null 2>&1 || die "Required command '$program_path' is missing."
|
|
}
|
|
|
|
cron_from_schedule() {
|
|
local interval=$1
|
|
local run_at=$2
|
|
local hour minute
|
|
|
|
[[ "$run_at" =~ ^([0-1][0-9]|2[0-3]):([0-5][0-9])$ ]] || die "--at must use HH:MM in 24-hour time."
|
|
hour=${BASH_REMATCH[1]}
|
|
minute=${BASH_REMATCH[2]}
|
|
|
|
case "$interval" in
|
|
monthly) printf '%s %s 1 * *' "$minute" "$hour" ;;
|
|
weekly) printf '%s %s * * 0' "$minute" "$hour" ;;
|
|
daily) printf '%s %s * * *' "$minute" "$hour" ;;
|
|
*) die "Unsupported --interval '$interval'. Use monthly, weekly, or daily." ;;
|
|
esac
|
|
}
|
|
|
|
strip_existing_job() {
|
|
local existing_crontab=$1
|
|
local begin_marker=$2
|
|
local end_marker=$3
|
|
|
|
awk -v begin="$begin_marker" -v end="$end_marker" '
|
|
$0 == begin { skipping = 1; next }
|
|
$0 == end { skipping = 0; next }
|
|
!skipping { print }
|
|
' <<<"$existing_crontab"
|
|
}
|
|
|
|
validate_cron_expression() {
|
|
local expr=$1
|
|
local -a fields=()
|
|
local field
|
|
|
|
read -r -a fields <<<"$expr"
|
|
((${#fields[@]} == 5)) || die "--cron must contain exactly five fields (minute hour day month weekday)."
|
|
|
|
for field in "${fields[@]}"; do
|
|
[[ -n "$field" ]] || die "Empty field in --cron expression."
|
|
[[ "$field" =~ ^[0-9*,/-]+$ ]] || die "Invalid cron field '$field' in --cron expression."
|
|
done
|
|
}
|
|
|
|
append_target_args() {
|
|
local -n _out=$1
|
|
|
|
local target
|
|
for target in "${TARGETS[@]}"; do
|
|
_out+=(--target "$target")
|
|
done
|
|
|
|
local guild_id
|
|
for guild_id in "${GUILDS[@]}"; do
|
|
_out+=(--guild "$guild_id")
|
|
done
|
|
|
|
local channel_id
|
|
for channel_id in "${CHANNELS[@]}"; do
|
|
_out+=(--channel "$channel_id")
|
|
done
|
|
}
|
|
|
|
container_config_path() {
|
|
local config_path=$1
|
|
|
|
if [[ "$config_path" == "$REPO_ROOT/config/"* ]]; then
|
|
printf '/config/%s\n' "$(basename "$config_path")"
|
|
return 0
|
|
fi
|
|
|
|
if [[ "$config_path" == config/* ]]; then
|
|
printf '/config/%s\n' "${config_path#config/}"
|
|
return 0
|
|
fi
|
|
|
|
printf '%s\n' "$config_path"
|
|
}
|
|
|
|
ensure_target_directories() {
|
|
local selected_targets_json archive_root output_dir
|
|
|
|
archive_root=$("$JQ_BIN" -r '.archive_root // empty' "$CONFIG_FILE")
|
|
[[ -n "$archive_root" ]] || die "Config is missing archive_root."
|
|
mkdir -p "$archive_root"
|
|
|
|
selected_targets_json=$("$JQ_BIN" -cn '$ARGS.positional' --args "${TARGETS[@]}")
|
|
|
|
if (( ${#TARGETS[@]} == 0 )); then
|
|
while IFS= read -r output_dir; do
|
|
mkdir -p "$output_dir"
|
|
done < <("$JQ_BIN" -r '.targets[] | select(.enabled != false) | .output_dir' "$CONFIG_FILE")
|
|
return 0
|
|
fi
|
|
|
|
while IFS= read -r output_dir; do
|
|
mkdir -p "$output_dir"
|
|
done < <(
|
|
"$JQ_BIN" -r \
|
|
--argjson selected_targets "$selected_targets_json" \
|
|
'.targets[]
|
|
| select(.name as $name | $selected_targets | index($name))
|
|
| .output_dir' \
|
|
"$CONFIG_FILE"
|
|
)
|
|
}
|
|
|
|
validate_targets() {
|
|
(( ${#TARGETS[@]} == 0 )) && return 0
|
|
|
|
local requested_targets_json resolved_count
|
|
requested_targets_json=$("$JQ_BIN" -cn '$ARGS.positional' --args "${TARGETS[@]}")
|
|
resolved_count=$(
|
|
"$JQ_BIN" -r \
|
|
--argjson requested_targets "$requested_targets_json" \
|
|
'[.targets[] | select(.name as $name | $requested_targets | index($name))] | length' \
|
|
"$CONFIG_FILE"
|
|
)
|
|
|
|
[[ "$resolved_count" == "${#TARGETS[@]}" ]] || die "One or more --target values are missing from $CONFIG_FILE."
|
|
}
|
|
|
|
run_preflight() {
|
|
local -a preflight_args=()
|
|
|
|
[[ -f "$ENV_FILE" ]] || die "Missing env file: $ENV_FILE"
|
|
preflight_args=(
|
|
"$HOST_RUNNER"
|
|
--env-file "$ENV_FILE"
|
|
--compose-file "$COMPOSE_FILE"
|
|
preflight
|
|
--config "$(container_config_path "$CONFIG_FILE")"
|
|
)
|
|
append_target_args preflight_args
|
|
"${preflight_args[@]}"
|
|
}
|
|
|
|
main() {
|
|
while (($#)); do
|
|
case "$1" in
|
|
--target)
|
|
[[ $# -ge 2 ]] || die "Missing value for --target."
|
|
TARGETS+=("$2")
|
|
shift 2
|
|
;;
|
|
--guild)
|
|
[[ $# -ge 2 ]] || die "Missing value for --guild."
|
|
GUILDS+=("$2")
|
|
shift 2
|
|
;;
|
|
--channel)
|
|
[[ $# -ge 2 ]] || die "Missing value for --channel."
|
|
CHANNELS+=("$2")
|
|
shift 2
|
|
;;
|
|
--interval)
|
|
[[ $# -ge 2 ]] || die "Missing value for --interval."
|
|
INTERVAL=$2
|
|
shift 2
|
|
;;
|
|
--at)
|
|
[[ $# -ge 2 ]] || die "Missing value for --at."
|
|
RUN_AT=$2
|
|
shift 2
|
|
;;
|
|
--cron)
|
|
[[ $# -ge 2 ]] || die "Missing value for --cron."
|
|
CRON_EXPRESSION=$2
|
|
shift 2
|
|
;;
|
|
--job-name)
|
|
[[ $# -ge 2 ]] || die "Missing value for --job-name."
|
|
JOB_NAME=$2
|
|
shift 2
|
|
;;
|
|
--log-file)
|
|
[[ $# -ge 2 ]] || die "Missing value for --log-file."
|
|
LOG_FILE=$2
|
|
shift 2
|
|
;;
|
|
--config)
|
|
[[ $# -ge 2 ]] || die "Missing value for --config."
|
|
CONFIG_FILE=$2
|
|
shift 2
|
|
;;
|
|
--env-file)
|
|
[[ $# -ge 2 ]] || die "Missing value for --env-file."
|
|
ENV_FILE=$2
|
|
shift 2
|
|
;;
|
|
--skip-preflight)
|
|
SKIP_PREFLIGHT=1
|
|
shift
|
|
;;
|
|
--dry-run)
|
|
DRY_RUN=1
|
|
shift
|
|
;;
|
|
--remove)
|
|
REMOVE=1
|
|
shift
|
|
;;
|
|
--help|-h)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
die "Unknown option: $1"
|
|
;;
|
|
esac
|
|
done
|
|
|
|
require_program "$JQ_BIN"
|
|
require_program "$CRONTAB_BIN"
|
|
if [[ -n "$COMPOSE_BIN" ]]; then
|
|
require_program "$COMPOSE_BIN"
|
|
elif (( DOCKER_BIN_OVERRIDDEN == 0 )) && command -v docker-compose >/dev/null 2>&1; then
|
|
:
|
|
else
|
|
require_program "$DOCKER_BIN"
|
|
fi
|
|
|
|
[[ -f "$COMPOSE_FILE" ]] || die "Missing compose file: $COMPOSE_FILE"
|
|
[[ -x "$HOST_RUNNER" ]] || die "Missing or non-executable host runner: $HOST_RUNNER"
|
|
[[ -f "$CONFIG_FILE" ]] || die "Missing config file: $CONFIG_FILE"
|
|
"$JQ_BIN" empty "$CONFIG_FILE" >/dev/null 2>&1 || die "Invalid JSON config: $CONFIG_FILE"
|
|
|
|
validate_targets
|
|
|
|
if (( (${#GUILDS[@]} > 0 || ${#CHANNELS[@]} > 0) && ${#TARGETS[@]} != 1 )); then
|
|
die "--guild and --channel overrides require exactly one --target."
|
|
fi
|
|
|
|
local cron_line
|
|
if [[ -n "$CRON_EXPRESSION" ]]; then
|
|
validate_cron_expression "$CRON_EXPRESSION"
|
|
cron_line=$CRON_EXPRESSION
|
|
else
|
|
cron_line=$(cron_from_schedule "$INTERVAL" "$RUN_AT")
|
|
fi
|
|
|
|
local begin_marker="# BEGIN ${JOB_NAME}"
|
|
local end_marker="# END ${JOB_NAME}"
|
|
local current_crontab cleaned_crontab scrape_command job_line lock_prefix
|
|
local -a scrape_args=()
|
|
current_crontab=$("$CRONTAB_BIN" -l 2>/dev/null || true)
|
|
cleaned_crontab=$(strip_existing_job "$current_crontab" "$begin_marker" "$end_marker")
|
|
|
|
if (( REMOVE == 1 )); then
|
|
if (( DRY_RUN == 1 )); then
|
|
printf '%s\n' "$cleaned_crontab"
|
|
exit 0
|
|
fi
|
|
|
|
printf '%s\n' "$cleaned_crontab" | "$CRONTAB_BIN" -
|
|
exit 0
|
|
fi
|
|
|
|
mkdir -p "$(dirname "$LOG_FILE")"
|
|
ensure_target_directories
|
|
|
|
if (( SKIP_PREFLIGHT == 0 )); then
|
|
run_preflight
|
|
fi
|
|
|
|
scrape_args=(
|
|
"$HOST_RUNNER"
|
|
--env-file "$ENV_FILE"
|
|
--compose-file "$COMPOSE_FILE"
|
|
scrape
|
|
--config "$(container_config_path "$CONFIG_FILE")"
|
|
)
|
|
append_target_args scrape_args
|
|
scrape_command=$(printf '%q ' "${scrape_args[@]}")
|
|
if command -v flock >/dev/null 2>&1; then
|
|
lock_prefix=$(printf '%q ' "$(command -v flock)" "-n" "/tmp/${JOB_NAME}.lock")
|
|
else
|
|
lock_prefix=""
|
|
fi
|
|
|
|
job_line="$cron_line cd $(printf '%q' "$REPO_ROOT") && DCE_COMPOSE_TTY=0 ${lock_prefix}${scrape_command}>> $(printf '%q' "$LOG_FILE") 2>&1"
|
|
|
|
local cron_block
|
|
cron_block=$(printf '%s\n%s\n%s\n' "$begin_marker" "$job_line" "$end_marker")
|
|
|
|
if (( DRY_RUN == 1 )); then
|
|
printf '%s\n' "$cron_block"
|
|
exit 0
|
|
fi
|
|
|
|
{
|
|
if [[ -n "$cleaned_crontab" ]]; then
|
|
printf '%s\n\n' "$cleaned_crontab"
|
|
fi
|
|
printf '%s\n' "$cron_block"
|
|
} | "$CRONTAB_BIN" -
|
|
}
|
|
|
|
main "$@"
|