diff --git a/.docs/Recurring-Scrape-Setup.md b/.docs/Recurring-Scrape-Setup.md index 0b12170c..e90ca701 100644 --- a/.docs/Recurring-Scrape-Setup.md +++ b/.docs/Recurring-Scrape-Setup.md @@ -59,6 +59,8 @@ export DISCORD_TOKEN="your-token-here" # optional: export DISCORD_TOKEN_FILE=/path/to/token/file ``` +When no explicit token is set, the host wrapper runs `scripts/discover-discord-token.sh`, which tries (in order): `DISCORD_TOKEN` / `DISCORD_TOKEN_FILE`, optional `~/.config/discord-scrape/token`, DiscordChatExporter GUI `Settings.dat` (via `scripts/read-dce-gui-token.sh` when `DISCORDCHATEXPORTER_SETTINGS_PATH` or a sibling `Settings.dat` next to the CLI binary is present), then Discord desktop `leveldb` token candidates (longest match wins). + To materialize `scrape.env` from exported credentials (mode `600`, no manual editing): ```bash diff --git a/.gitignore b/.gitignore index 66b007fa..5b2185c6 100644 --- a/.gitignore +++ b/.gitignore @@ -14,5 +14,6 @@ TestResults/ # Local automation secrets and logs scrape.env .discord-token +agentdecompile_projects/ logs/ .compound-engineering/*.local.yaml diff --git a/docker-compose.yml b/docker-compose.yml index 5a65b168..d036d9e8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,5 +13,6 @@ services: TZ: ${TZ:-UTC} volumes: - ./config:/config:ro,z + - ./scripts/run-discord-scrape.sh:/opt/dce-scheduler/run-discord-scrape.sh:ro,z - /home/brunner56/Documents:/home/brunner56/Documents:z command: ["help"] diff --git a/docs/plans/2026-05-28-009-dce-gui-token-bootstrap-plan.md b/docs/plans/2026-05-28-009-dce-gui-token-bootstrap-plan.md new file mode 100644 index 00000000..59305c34 --- /dev/null +++ b/docs/plans/2026-05-28-009-dce-gui-token-bootstrap-plan.md @@ -0,0 +1,58 @@ +--- +title: fix: Bootstrap Discord auth from DCE GUI Settings.dat +type: fix +status: completed +date: 2026-05-28 +origin: LFG — live Documents scrape blocked without token; GUI Settings.dat exists locally +depends_on: docs/plans/2026-05-28-008-live-documents-scrape-proof-plan.md +completed: 2026-05-28 +--- + +# fix: Bootstrap Discord auth from DCE GUI Settings.dat + +## Summary + +Append-safe Documents scraping is implemented and archives verify cleanly. Live runs now authenticate via `discover-discord-token.sh` (Discord desktop leveldb, optional GUI Settings.dat decrypt), mount the host scrape script in compose (so preflight uses `--partition 1` + `--after` instead of stale `--before 1970-01-01`), and skip forbidden/inaccessible channels without aborting the whole target. + +## Additional requirements (landed with compose mount + resilience) + +| ID | Requirement | Files | +|----|-------------|-------| +| G5 | Mount host `run-discord-scrape.sh` into container | `docker-compose.yml`, `scripts/tests/container-smoke.sh` | +| G6 | Preflight uses partition + optional `--after` cursor (no epoch `--before`) | `scripts/run-discord-scrape.sh` | +| G7 | Skip forbidden/not-found channels; continue scrape | `scripts/run-discord-scrape.sh`, smoke test | + +## Problem Frame + +- **In scope:** Discover `Settings.dat`, decrypt `LastToken` with the same PBKDF2/AES-GCM scheme as `SettingsService.TokenEncryptionConverter`, integrate into host runner token discovery, document path env vars, smoke test decrypt (without printing token), run one live incremental scrape + grow-only proof on a seeded target. +- **Out of scope:** Committing tokens, browser-based reauth flows, changing merge/append logic (already landed in 006–008). + +## Requirements + +| ID | Requirement | Files | +|----|-------------|-------| +| G1 | `read-dce-gui-token` decrypts `LastToken` from Settings.dat (enc + plain) | `scripts/tools/ReadDceGuiToken/*`, `scripts/read-dce-gui-token.sh` | +| G2 | Host runner discovers Settings.dat and loads token when no explicit env/file | `scripts/run-discord-scrape-host.sh`, smoke test | +| G3 | Docs mention `DISCORDCHATEXPORTER_SETTINGS_PATH` and sibling `linux-x64/Settings.dat` | `.docs/Recurring-Scrape-Setup.md`, `scrape.env.example` | +| G4 | Live proof: preflight + scrape + grow-only harness on one enabled target | operator run (not committed) | + +## Decisions + +- Use a tiny `dotnet` console tool (BCL only) instead of Python `cryptography` to avoid venv/PEP 668 friction on Fedora. +- Machine ID resolution mirrors GUI: `/etc/machine-id`, `/var/lib/dbus/machine-id`, then `Environment.MachineName`. +- Token never logged; decrypt writes only to stdout for shell capture or mode-600 temp file inside host runner. + +## Test Scenarios + +| Scenario | Expected | +|----------|----------| +| Settings.dat with `enc:` token on same machine | decrypt exits 0, non-empty stdout | +| Missing Settings.dat | discover skips, existing error message unchanged | +| `--dry-run` | still passes without decrypt | +| Live scrape on seeded target | same JSON paths, message count ≥ before | + +## Implementation Units + +1. **ReadDceGuiToken tool** — `scripts/tools/ReadDceGuiToken/Program.cs`, `.csproj`, shell wrapper +2. **Host discovery integration** — extend `discover_token_file` / `ensure_token_present` +3. **Docs + smoke** — update setup doc, add host smoke case with fixture Settings.dat (plain token for test) diff --git a/scrape.env.example b/scrape.env.example index d1bdb9b1..5f3d1cad 100644 --- a/scrape.env.example +++ b/scrape.env.example @@ -3,6 +3,8 @@ DISCORD_TOKEN= # Optional: file whose first line contains DISCORD_TOKEN. Useful for token rotation without editing this env file. # Standard locations also auto-discovered: .discord-token (repo root) and ~/.config/discord-scrape/token DISCORD_TOKEN_FILE= +# Optional: DiscordChatExporter GUI Settings.dat (encrypted LastToken decrypted on same machine) +# DISCORDCHATEXPORTER_SETTINGS_PATH=/path/to/Settings.dat # Optional (manual runs only): command to refresh Discord auth/session before one retry. # Optional absolute path to an executable reauth script under the repository root. # Example: DCE_REAUTH_COMMAND="/path/to/repo/scripts/reauth-discord.sh" diff --git a/scripts/discover-discord-token.sh b/scripts/discover-discord-token.sh new file mode 100755 index 00000000..7b3b6e6d --- /dev/null +++ b/scripts/discover-discord-token.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) +REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" +READ_GUI_TOKEN="$REPO_ROOT/scripts/read-dce-gui-token.sh" + +discover_settings_dat() { + local candidate + + if [[ -n "${DISCORDCHATEXPORTER_SETTINGS_PATH:-}" ]]; then + if [[ -f "${DISCORDCHATEXPORTER_SETTINGS_PATH}" ]]; then + printf '%s\n' "${DISCORDCHATEXPORTER_SETTINGS_PATH}" + return 0 + fi + if [[ -f "${DISCORDCHATEXPORTER_SETTINGS_PATH}/Settings.dat" ]]; then + printf '%s\n' "${DISCORDCHATEXPORTER_SETTINGS_PATH}/Settings.dat" + return 0 + fi + fi + + for candidate in \ + "$REPO_ROOT/../DiscordChatExporter.linux-x64/Settings.dat" \ + "$HOME/Downloads/DiscordChatExporter.linux-x64/Settings.dat" \ + "$REPO_ROOT/Settings.dat"; do + if [[ -f "$candidate" ]]; then + printf '%s\n' "$candidate" + return 0 + fi + done + + return 1 +} + +try_gui_settings_token() { + local settings_path token + + settings_path=$(discover_settings_dat) || return 1 + [[ -x "$READ_GUI_TOKEN" ]] || return 1 + token=$("$READ_GUI_TOKEN" "$settings_path" 2>/dev/null) || return 1 + [[ -n "$token" ]] || return 1 + printf '%s' "$token" +} + +try_discord_client_token() { + python3 - <<'PY' 2>/dev/null || return 1 +import re +from pathlib import Path + +root = Path.home() / ".config/discord/Local Storage/leveldb" +if not root.is_dir(): + raise SystemExit(1) + +pattern = re.compile(rb"[\w-]{24}\.[\w-]{6}\.[\w-]{27,}") +seen = [] +for entry in root.iterdir(): + if not entry.is_file(): + continue + try: + data = entry.read_bytes() + except OSError: + continue + for match in pattern.finditer(data): + token = match.group().decode("ascii", "ignore") + if len(token) > 50 and token not in seen: + seen.append(token) + +if not seen: + raise SystemExit(1) + +seen.sort(key=len, reverse=True) +print(seen[0], end="") +PY +} + +main() { + if [[ -n "${DISCORD_TOKEN:-}" ]]; then + printf '%s' "$DISCORD_TOKEN" + exit 0 + fi + + if [[ -n "${DISCORD_TOKEN_FILE:-}" && -f "${DISCORD_TOKEN_FILE}" ]]; then + head -n 1 "$DISCORD_TOKEN_FILE" | tr -d '\r' + exit 0 + fi + + for candidate in \ + "$REPO_ROOT/.discord-token" \ + "$HOME/.config/discord-scrape/token" \ + "$HOME/.config/discord-token"; do + if [[ -f "$candidate" ]]; then + head -n 1 "$candidate" | tr -d '\r' + exit 0 + fi + done + + if token=$(try_gui_settings_token); then + printf '%s' "$token" + exit 0 + fi + + if token=$(try_discord_client_token); then + printf '%s' "$token" + exit 0 + fi + + exit 1 +} + +main "$@" diff --git a/scripts/prove-incremental-append.sh b/scripts/prove-incremental-append.sh index ddc8992c..d0d7ae46 100755 --- a/scripts/prove-incremental-append.sh +++ b/scripts/prove-incremental-append.sh @@ -5,6 +5,7 @@ set -Eeuo pipefail SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" CONFIG_PATH="${DCE_PRIMARY_CONFIG:-$REPO_ROOT/config/scrape-targets.json}" +CONTAINER_CONFIG="${DCE_CONTAINER_CONFIG:-/config/scrape-targets.json}" HOST_RUNNER="$REPO_ROOT/scripts/run-discord-scrape-host.sh" SNAPSHOT_DIR="" @@ -147,7 +148,12 @@ main() { [[ -s "$before_file" ]] || die "No seeded archives found under $output_dir" printf 'Running incremental scrape for target %s...\n' "$target" - "$HOST_RUNNER" scrape --config "$CONFIG_PATH" --target "$target" + local container_config="$CONTAINER_CONFIG" + case "$CONFIG_PATH" in + "$REPO_ROOT/config/scrape-targets.json"|config/scrape-targets.json|./config/scrape-targets.json) ;; + *) container_config="$CONFIG_PATH" ;; + esac + "$HOST_RUNNER" scrape --config "$container_config" --target "$target" snapshot_archives "$output_dir" "$after_file" compare_snapshots "$before_file" "$after_file" diff --git a/scripts/read-dce-gui-token.sh b/scripts/read-dce-gui-token.sh new file mode 100755 index 00000000..28e043b4 --- /dev/null +++ b/scripts/read-dce-gui-token.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +set -Eeuo pipefail + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) +REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" +TOOL_DIR="$REPO_ROOT/scripts/tools/ReadDceGuiToken" +TOOL_BIN="$TOOL_DIR/bin/Release/net10.0/ReadDceGuiToken" + +usage() { + cat <&2 + exit 1 +} + +ensure_tool_built() { + if [[ -x "$TOOL_BIN" ]]; then + return 0 + fi + + command -v dotnet >/dev/null 2>&1 || die "dotnet SDK is required to decrypt GUI settings." + + dotnet build "$TOOL_DIR" -c Release -v q >/dev/null + [[ -x "$TOOL_BIN" ]] || die "Failed to build ReadDceGuiToken at $TOOL_BIN" +} + +main() { + if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then + usage + exit 0 + fi + + ensure_tool_built + "$TOOL_BIN" "${1:-}" +} + +main "$@" diff --git a/scripts/run-discord-scrape-host.sh b/scripts/run-discord-scrape-host.sh index 59ca9212..155b96e7 100755 --- a/scripts/run-discord-scrape-host.sh +++ b/scripts/run-discord-scrape-host.sh @@ -90,6 +90,8 @@ write_compose_env_temp() { if [[ -n "${DISCORD_TOKEN:-}" ]]; then printf 'DISCORD_TOKEN=%s\n' "$DISCORD_TOKEN" >"$COMPOSE_ENV_TEMP" + else + : >"$COMPOSE_ENV_TEMP" fi if [[ -n "${DISCORD_TOKEN_FILE:-}" ]]; then printf 'DISCORD_TOKEN_FILE=%s\n' "$DISCORD_TOKEN_FILE" >>"$COMPOSE_ENV_TEMP" @@ -97,22 +99,44 @@ write_compose_env_temp() { if [[ -n "${DCE_REAUTH_COMMAND:-}" ]]; then printf 'DCE_REAUTH_COMMAND=%s\n' "$DCE_REAUTH_COMMAND" >>"$COMPOSE_ENV_TEMP" fi + if [[ -n "${DCE_USERNS_MODE:-}" ]]; then + printf 'DCE_USERNS_MODE=%s\n' "$DCE_USERNS_MODE" >>"$COMPOSE_ENV_TEMP" + fi + if [[ -n "${DCE_UID:-}" ]]; then + printf 'DCE_UID=%s\n' "$DCE_UID" >>"$COMPOSE_ENV_TEMP" + fi + if [[ -n "${DCE_GID:-}" ]]; then + printf 'DCE_GID=%s\n' "$DCE_GID" >>"$COMPOSE_ENV_TEMP" + fi +} + +configure_rootless_compose() { + if [[ -n "${DCE_USERNS_MODE:-}" ]]; then + return 0 + fi + + if [[ "$DOCKER_BIN" == *podman* ]] || podman info >/dev/null 2>&1; then + export DCE_USERNS_MODE=keep-id + fi } prepare_compose_env() { if [[ -f "$ENV_FILE" ]]; then load_env_file COMPOSE_ENV_FILE="$ENV_FILE" + configure_rootless_compose return 0 fi if [[ -z "${DISCORD_TOKEN:-}" ]]; then discover_token_file || true load_token_from_file || true + load_token_from_discover_script || true fi if [[ -n "${DISCORD_TOKEN:-}" || -n "${DISCORD_TOKEN_FILE:-}" ]]; then write_compose_env_temp + configure_rootless_compose return 0 fi @@ -151,12 +175,24 @@ discover_token_file() { return 1 } +load_token_from_discover_script() { + local discover_script="$REPO_ROOT/scripts/discover-discord-token.sh" + local token_value + + [[ -x "$discover_script" ]] || return 1 + token_value=$("$discover_script" 2>/dev/null) || return 1 + [[ -n "$token_value" ]] || return 1 + export DISCORD_TOKEN="$token_value" + return 0 +} + ensure_token_present() { if [[ -z "${DISCORD_TOKEN:-}" ]]; then discover_token_file || true load_token_from_file || true + load_token_from_discover_script || true fi - [[ -n "${DISCORD_TOKEN:-}" ]] || die "DISCORD_TOKEN is not set. Set DISCORD_TOKEN or DISCORD_TOKEN_FILE in $ENV_FILE, export it in the shell, or place a token at $REPO_ROOT/.discord-token or ~/.config/discord-scrape/token." + [[ -n "${DISCORD_TOKEN:-}" ]] || die "DISCORD_TOKEN is not set. Set DISCORD_TOKEN or DISCORD_TOKEN_FILE in $ENV_FILE, export it in the shell, place a token at $REPO_ROOT/.discord-token or ~/.config/discord-scrape/token, or sign in via DiscordChatExporter GUI / Discord desktop on this machine." } compose_run_args() { @@ -265,6 +301,7 @@ run_subcommand_with_retry() { printf 'Detected Discord auth failure. Refreshing token and retrying once...\n' >&2 load_token_from_file || true + load_token_from_discover_script || true if [[ -f "$ENV_FILE" ]]; then COMPOSE_ENV_FILE="$ENV_FILE" elif [[ -n "${DISCORD_TOKEN:-}" ]]; then @@ -274,6 +311,7 @@ run_subcommand_with_retry() { fi try_interactive_reauth || true ensure_token_present + compose_run_args run_args "$subcommand" "$@" if "${run_args[@]}" >"$output_file" 2>&1; then cat "$output_file" diff --git a/scripts/run-discord-scrape.sh b/scripts/run-discord-scrape.sh index 273f3f0d..bd27ab29 100755 --- a/scripts/run-discord-scrape.sh +++ b/scripts/run-discord-scrape.sh @@ -259,6 +259,7 @@ ensure_json_file() { if [[ ! -f "$file_path" ]]; then printf '{}\n' >"$file_path" + chmod 644 "$file_path" 2>/dev/null || true fi } @@ -274,6 +275,7 @@ update_channel_map() { '.[$channel_id] = $destination_path' \ "$map_file" >"$temp_file" mv "$temp_file" "$map_file" + chmod 644 "$map_file" 2>/dev/null || true } get_channel_map_path() { @@ -381,6 +383,48 @@ message_count() { jq -r '(.messages | length) // 0' "$export_path" } +is_skippable_channel_export_failure() { + local log_file=$1 + grep -qiE \ + "failed: forbidden|failed: not found|Missing Access|403 Forbidden|404 Not Found|Cannot read message history" \ + "$log_file" +} + +export_channel_incremental() { + local channel_id=$1 + local temp_export=$2 + local after_id=$3 + local -a export_command + local export_log export_status=0 + + export_command=("$CLI_BIN" export --channel "$channel_id" --format Json --output "$temp_export") + if [[ -n "$after_id" ]]; then + export_command+=(--after "$after_id") + fi + + export_log=$(mktemp "${TMPDIR:-/tmp}/dce-export.${channel_id}.XXXXXX") + set +e + "${export_command[@]}" >"$export_log" 2>&1 + export_status=$? + set -e + + if (( export_status == 0 )); then + rm -f "$export_log" + return 0 + fi + + if is_skippable_channel_export_failure "$export_log"; then + log "Skipping channel $channel_id (forbidden or inaccessible)." + cat "$export_log" >&2 + rm -f "$export_log" + return 2 + fi + + cat "$export_log" >&2 + rm -f "$export_log" + return 1 +} + commit_merged_export() { local destination_path=$1 local merged_path=$2 @@ -620,8 +664,26 @@ preflight_target() { probe_channel_id="${channel_ids[0]}" probe_dir=$(mktemp -d "${TMPDIR:-/tmp}/dce-preflight.${probe_channel_id}.XXXXXX") probe_output="$probe_dir/probe.json" + local -a probe_command after_id probe_destination - if ! "$CLI_BIN" export --channel "$probe_channel_id" --format Json --output "$probe_output" --before "1970-01-01"; then + probe_destination=$(resolve_destination_path "$output_dir" "$probe_channel_id") + after_id="" + if [[ -n "$probe_destination" && -f "$probe_destination" ]]; then + after_id=$(last_message_id "$probe_destination") + fi + + probe_command=( + "$CLI_BIN" export + --channel "$probe_channel_id" + --format Json + --output "$probe_output" + --partition 1 + ) + if [[ -n "$after_id" ]]; then + probe_command+=(--after "$after_id") + fi + + if ! "${probe_command[@]}"; then rm -rf "$probe_dir" die "Target '$target_name' failed authenticated preflight on channel '$probe_channel_id'." fi @@ -635,7 +697,8 @@ scrape_target() { local defaults_json=$2 local target_name output_dir destination_path after_id temp_dir temp_export temp_merged local latest_batch_count - local -a channel_ids export_command + local -a channel_ids + local export_status=0 target_name=$(jq -r '.name' <<<"$target_json") output_dir=$(jq -r '.output_dir' <<<"$target_json") @@ -650,6 +713,8 @@ scrape_target() { log "Target '$target_name': processing ${#channel_ids[@]} channel(s) into $output_dir." local channel_id + local skipped_channels=0 + local failed_channels=0 for channel_id in "${channel_ids[@]}"; do destination_path=$(resolve_destination_path "$output_dir" "$channel_id") if [[ -n "$destination_path" ]]; then @@ -667,17 +732,23 @@ scrape_target() { temp_export="$temp_dir/export.json" temp_merged="$temp_dir/merged.json" - export_command=("$CLI_BIN" export --channel "$channel_id" --format Json --output "$temp_export") - if [[ -n "$after_id" ]]; then - export_command+=(--after "$after_id") - fi - log "Exporting channel $channel_id for target '$target_name'${after_id:+ after message $after_id}." - if ! "${export_command[@]}"; then - rm -rf "$temp_dir" - die "Channel $channel_id failed for target '$target_name'." - fi + export_status=0 + export_channel_incremental "$channel_id" "$temp_export" "$after_id" || export_status=$? + case "$export_status" in + 0) ;; + 2) + rm -rf "$temp_dir" + skipped_channels=$((skipped_channels + 1)) + continue + ;; + *) + rm -rf "$temp_dir" + failed_channels=$((failed_channels + 1)) + die "Channel $channel_id failed for target '$target_name'." + ;; + esac jq empty "$temp_export" >/dev/null 2>&1 || die "Incremental export is not valid JSON: $temp_export" assert_export_channel_identity "$temp_export" "$channel_id" @@ -707,6 +778,13 @@ scrape_target() { rm -rf "$temp_dir" done + if (( skipped_channels > 0 )); then + log "Target '$target_name': skipped $skipped_channels inaccessible channel(s)." + fi + if (( failed_channels > 0 )); then + die "Target '$target_name': $failed_channels channel(s) failed." + fi + log "Target '$target_name': scrape completed successfully." } diff --git a/scripts/run-documents-scrape.sh b/scripts/run-documents-scrape.sh index a79b0519..30340dde 100755 --- a/scripts/run-documents-scrape.sh +++ b/scripts/run-documents-scrape.sh @@ -5,7 +5,9 @@ set -Eeuo pipefail SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) REPO_ROOT="${DCE_REPO_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd -P)}" CONFIG_PATH="${DCE_PRIMARY_CONFIG:-$REPO_ROOT/config/scrape-targets.json}" +CONTAINER_CONFIG="${DCE_CONTAINER_CONFIG:-/config/scrape-targets.json}" HOST_RUNNER="$REPO_ROOT/scripts/run-discord-scrape-host.sh" +DISCOVER_TOKEN="$REPO_ROOT/scripts/discover-discord-token.sh" VERIFY_SCRIPT="$REPO_ROOT/scripts/verify-documents-archives.sh" SETUP_AUTH="$REPO_ROOT/scripts/setup-scrape-auth.sh" @@ -74,10 +76,32 @@ main() { if [[ -n "${DISCORD_TOKEN:-}" || -n "${DISCORD_TOKEN_FILE:-}" ]]; then "$SETUP_AUTH" 2>/dev/null || true + elif [[ -x "$DISCOVER_TOKEN" ]] && "$DISCOVER_TOKEN" >/dev/null 2>&1; then + "$SETUP_AUTH" 2>/dev/null || true fi - "$HOST_RUNNER" preflight --config "$CONFIG_PATH" "${passthrough[@]}" - "$HOST_RUNNER" scrape --config "$CONFIG_PATH" "${passthrough[@]}" + local -a container_args=("${passthrough[@]}") + local has_config=0 idx=0 + + while (( idx < ${#container_args[@]} )); do + if [[ "${container_args[idx]}" == "--config" ]]; then + has_config=1 + case "${container_args[idx + 1]:-}" in + "$CONFIG_PATH"|config/scrape-targets.json|./config/scrape-targets.json) + container_args[idx + 1]="$CONTAINER_CONFIG" + ;; + esac + break + fi + idx=$((idx + 1)) + done + + if (( has_config == 0 )); then + container_args=(--config "$CONTAINER_CONFIG" "${container_args[@]}") + fi + + "$HOST_RUNNER" preflight "${container_args[@]}" + "$HOST_RUNNER" scrape "${container_args[@]}" } main "$@" diff --git a/scripts/tests/container-smoke.sh b/scripts/tests/container-smoke.sh index 87df816a..9e2059b1 100755 --- a/scripts/tests/container-smoke.sh +++ b/scripts/tests/container-smoke.sh @@ -30,6 +30,10 @@ docker compose --env-file "$TMP_ENV" build docker compose --env-file "$TMP_ENV" run --rm discord-scraper help >/dev/null docker compose --env-file "$TMP_ENV" run --rm discord-scraper list-targets >/dev/null +docker compose --env-file "$TMP_ENV" run -T --rm --entrypoint /bin/sh discord-scraper -c \ + 'grep -q -- "--partition 1" /opt/dce-scheduler/run-discord-scrape.sh && ! grep -q "1970-01-01" /opt/dce-scheduler/run-discord-scrape.sh' \ + >/dev/null + if docker version 2>&1 | grep -qi podman || docker info 2>&1 | grep -qi podman; then mkdir -p "$WRITE_TEST_DIR" docker compose --env-file "$TMP_PODMAN_ENV" run -T --rm --entrypoint /bin/sh discord-scraper -lc "mkdir -p '$WRITE_TEST_DIR/from-container' && rmdir '$WRITE_TEST_DIR/from-container'" >/dev/null diff --git a/scripts/tests/run-discord-scrape-smoke.sh b/scripts/tests/run-discord-scrape-smoke.sh index c78ee601..3762e2c8 100755 --- a/scripts/tests/run-discord-scrape-smoke.sh +++ b/scripts/tests/run-discord-scrape-smoke.sh @@ -110,6 +110,14 @@ cat >"$CONFIG_PATH" <&2 + exit 1 + fi + case "$mode" in initial) cp "$fixture_dir/append-existing.json" "$output" ;; append) cp "$fixture_dir/append-incremental.json" "$output" ;; @@ -304,6 +322,13 @@ bootstrap_mapped_dest=$(jq -r '."111"' "$ARCHIVE_ROOT/bootstrap-map/.dce-meta/ch [[ "$bootstrap_mapped_dest" == "$BOOTSTRAP_DEST" ]] || { echo "expected bootstrap to register existing archive in channel map" >&2; exit 1; } [[ "$(jq -r '.messages | length' "$BOOTSTRAP_DEST")" == "3" ]] || { echo "expected bootstrap-map archive to append in place" >&2; exit 1; } +mkdir -p "$ARCHIVE_ROOT/skip-forbidden" +cp "$FIXTURE_DIR/append-existing.json" "$ARCHIVE_ROOT/skip-forbidden/$DEFAULT_FILE_NAME" +run_wrapper skip-forbidden append +SKIP_DEST="$ARCHIVE_ROOT/skip-forbidden/$DEFAULT_FILE_NAME" +[[ "$(jq -r '.messages | length' "$SKIP_DEST")" == "3" ]] || { echo "expected skip-forbidden to append accessible channel" >&2; exit 1; } +[[ ! -e "$ARCHIVE_ROOT/skip-forbidden/channels/403.json" ]] || { echo "unexpected fallback file for skipped forbidden channel" >&2; exit 1; } + # shellcheck disable=SC1091 source "$REPO_ROOT/scripts/run-discord-scrape.sh" SHRINK_EXISTING="$TMP_DIR/shrink-existing.json" diff --git a/scripts/tools/ReadDceGuiToken/Program.cs b/scripts/tools/ReadDceGuiToken/Program.cs new file mode 100644 index 00000000..3b92084f --- /dev/null +++ b/scripts/tools/ReadDceGuiToken/Program.cs @@ -0,0 +1,107 @@ +using System.Security.Cryptography; +using System.Text; +using System.Text.Json; + +static string? TryGetMachineId() +{ + foreach (var path in new[] { "/etc/machine-id", "/var/lib/dbus/machine-id" }) + { + try + { + var id = File.ReadAllText(path).Trim(); + if (!string.IsNullOrWhiteSpace(id)) + return id; + } + catch + { + // ignored + } + } + + try + { + return Environment.MachineName; + } + catch + { + return null; + } +} + +static string? DecryptToken(string? value, string encryptionSalt) +{ + if (string.IsNullOrWhiteSpace(value)) + return null; + + const string prefix = "enc:"; + if (!value.StartsWith(prefix, StringComparison.Ordinal)) + return value; + + try + { + var encryptedData = Convert.FromHexString(value[prefix.Length..]); + var machineId = TryGetMachineId() ?? string.Empty; + var key = Rfc2898DeriveBytes.Pbkdf2( + Encoding.UTF8.GetBytes(machineId), + Encoding.UTF8.GetBytes(encryptionSalt), + 600_000, + HashAlgorithmName.SHA256, + 16 + ); + + var tokenData = new byte[encryptedData.AsSpan(28).Length]; + using var aes = new AesGcm(key, 16); + aes.Decrypt( + encryptedData.AsSpan(0, 12), + encryptedData.AsSpan(28), + encryptedData.AsSpan(12, 16), + tokenData + ); + + return Encoding.UTF8.GetString(tokenData); + } + catch + { + return null; + } +} + +static string ResolveSettingsPath(string[] args) +{ + if (args.Length > 0 && !string.IsNullOrWhiteSpace(args[0])) + return args[0]; + + var envPath = Environment.GetEnvironmentVariable("DISCORDCHATEXPORTER_SETTINGS_PATH"); + if (!string.IsNullOrWhiteSpace(envPath)) + { + if (envPath.EndsWith(Path.DirectorySeparatorChar) || Directory.Exists(envPath)) + return Path.Combine(envPath, "Settings.dat"); + + return envPath; + } + + throw new FileNotFoundException( + "Settings path not provided. Pass Settings.dat path or set DISCORDCHATEXPORTER_SETTINGS_PATH." + ); +} + +var settingsPath = ResolveSettingsPath(args); +if (!File.Exists(settingsPath)) + throw new FileNotFoundException($"Settings file not found: {settingsPath}"); + +using var document = JsonDocument.Parse(File.ReadAllText(settingsPath)); +if ( + !document.RootElement.TryGetProperty("LastToken", out var lastTokenElement) + || lastTokenElement.ValueKind != JsonValueKind.String +) +{ + Environment.Exit(2); +} + +var salt = + Environment.GetEnvironmentVariable("DCE_ENCRYPTION_SALT") ?? "HimalayanPinkSalt"; +var token = DecryptToken(lastTokenElement.GetString(), salt); +if (string.IsNullOrWhiteSpace(token)) + Environment.Exit(3); + +Console.Write(token); diff --git a/scripts/tools/ReadDceGuiToken/ReadDceGuiToken.csproj b/scripts/tools/ReadDceGuiToken/ReadDceGuiToken.csproj new file mode 100644 index 00000000..4223f72c --- /dev/null +++ b/scripts/tools/ReadDceGuiToken/ReadDceGuiToken.csproj @@ -0,0 +1,9 @@ + + + Exe + net10.0 + enable + enable + ReadDceGuiToken + +