This commit is contained in:
Ben Rucker 2026-02-12 20:39:31 -05:00 committed by GitHub
commit 2a49bb9979
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 177 additions and 7 deletions

View file

@ -1,4 +1,5 @@
using System.IO; using System;
using System.IO;
using System.Linq; using System.Linq;
using System.Threading.Tasks; using System.Threading.Tasks;
using CliFx.Infrastructure; using CliFx.Infrastructure;
@ -39,4 +40,51 @@ public class SelfContainedSpecs
.Should() .Should()
.BeTrue(); .BeTrue();
} }
[Fact]
public async Task I_can_export_a_channel_and_download_all_referenced_assets_with_nested_paths()
{
// Arrange
using var dir = TempDir.Create();
var filePath = Path.Combine(dir.Path, "output.html");
// Act
await new ExportChannelsCommand
{
Token = Secrets.DiscordToken,
ChannelIds = [ChannelIds.SelfContainedTestCases],
ExportFormat = ExportFormat.HtmlDark,
OutputPath = filePath,
ShouldDownloadAssets = true,
ShouldUseNestedMediaFilePaths = true,
}.ExecuteAsync(new FakeConsole());
// DEBUG: Print what's in the HTML vs what exists
var srcPaths = Html.Parse(await File.ReadAllTextAsync(filePath))
.QuerySelectorAll("body [src]")
.Select(e => e.GetAttribute("src")!)
.ToList();
Console.WriteLine("=== SRC paths in HTML ===");
foreach (var src in srcPaths)
{
var fullPath = Path.GetFullPath(src, dir.Path);
var exists = File.Exists(fullPath);
Console.WriteLine($"{(exists ? "" : "")} {src}");
Console.WriteLine($" Full: {fullPath}");
}
Console.WriteLine("\n=== Files actually on disk ===");
foreach (var file in Directory.EnumerateFiles(dir.Path, "*", SearchOption.AllDirectories))
Console.WriteLine(file);
// Assert
Html.Parse(await File.ReadAllTextAsync(filePath))
.QuerySelectorAll("body [src]")
.Select(e => e.GetAttribute("src")!)
.Select(f => Path.GetFullPath(f, dir.Path))
.All(File.Exists)
.Should()
.BeTrue();
}
} }

View file

@ -97,10 +97,17 @@ public abstract class ExportCommandBase : DiscordCommandBase
[CommandOption( [CommandOption(
"reuse-media", "reuse-media",
Description = "Reuse previously downloaded assets to avoid redundant requests." Description = "Reuse previously downloaded assets to avoid redundant requests. "
+ "Keep --media-nested consistent across runs for best results."
)] )]
public bool ShouldReuseAssets { get; init; } = false; public bool ShouldReuseAssets { get; init; } = false;
[CommandOption(
"media-nested",
Description = "Saves assets with a nested file naming convention, creating subdirectories for each distinct asset type."
)]
public bool ShouldUseNestedMediaFilePaths { get; init; } = false;
[CommandOption( [CommandOption(
"media-dir", "media-dir",
Description = "Download assets to this directory. " Description = "Download assets to this directory. "
@ -154,6 +161,12 @@ public abstract class ExportCommandBase : DiscordCommandBase
throw new CommandException("Option --reuse-media cannot be used without --media."); throw new CommandException("Option --reuse-media cannot be used without --media.");
} }
// New media file path can only be enabled if the download assets option is set
if (ShouldUseNestedMediaFilePaths && !ShouldDownloadAssets)
{
throw new CommandException("Option --media-nested cannot be used without --media.");
}
// Assets directory can only be specified if the download assets option is set // Assets directory can only be specified if the download assets option is set
if (!string.IsNullOrWhiteSpace(AssetsDirPath) && !ShouldDownloadAssets) if (!string.IsNullOrWhiteSpace(AssetsDirPath) && !ShouldDownloadAssets)
{ {
@ -270,6 +283,7 @@ public abstract class ExportCommandBase : DiscordCommandBase
ShouldFormatMarkdown, ShouldFormatMarkdown,
ShouldDownloadAssets, ShouldDownloadAssets,
ShouldReuseAssets, ShouldReuseAssets,
ShouldUseNestedMediaFilePaths,
Locale, Locale,
IsUtcNormalizationEnabled IsUtcNormalizationEnabled
); );

View file

@ -1,6 +1,7 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using System.Linq;
using System.Security.Cryptography; using System.Security.Cryptography;
using System.Text; using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
@ -13,7 +14,11 @@ using DiscordChatExporter.Core.Utils.Extensions;
namespace DiscordChatExporter.Core.Exporting; namespace DiscordChatExporter.Core.Exporting;
internal partial class ExportAssetDownloader(string workingDirPath, bool reuse) internal partial class ExportAssetDownloader(
string workingDirPath,
bool reuse,
bool useNestedMediaFilePaths
)
{ {
private static readonly AsyncKeyedLocker<string> Locker = new(); private static readonly AsyncKeyedLocker<string> Locker = new();
@ -25,8 +30,10 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
CancellationToken cancellationToken = default CancellationToken cancellationToken = default
) )
{ {
var fileName = GetFileNameFromUrl(url); var localFilePath = useNestedMediaFilePaths
var filePath = Path.Combine(workingDirPath, fileName); ? GetFilePathFromUrl(url)
: GetFileNameFromUrlLegacy(url);
var filePath = Path.Combine(workingDirPath, localFilePath);
using var _ = await Locker.LockAsync(filePath, cancellationToken); using var _ = await Locker.LockAsync(filePath, cancellationToken);
@ -44,6 +51,9 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
{ {
// Download the file // Download the file
using var response = await Http.Client.GetAsync(url, innerCancellationToken); using var response = await Http.Client.GetAsync(url, innerCancellationToken);
var directory = Path.GetDirectoryName(filePath);
if (directory != null)
Directory.CreateDirectory(directory);
await using var output = File.Create(filePath); await using var output = File.Create(filePath);
await response.Content.CopyToAsync(output, innerCancellationToken); await response.Content.CopyToAsync(output, innerCancellationToken);
}, },
@ -54,6 +64,80 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
} }
} }
internal partial class ExportAssetDownloader
{
private static string GetFilePathFromUrl(string url)
{
var uri = new Uri(NormalizeUrl(url));
// Try to extract the file name from URL
var pathAndFileName = Regex.Match(uri.AbsolutePath, @"/(.+)/([^?]*)");
var path = pathAndFileName.Groups[1].Value;
var fileName = pathAndFileName.Groups[2].Value;
// If this isn't a Discord CDN URL, save the file to the `media/external` folder.
if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase))
{
return GetExternalFilePath(uri);
}
// If it is a Discord URL, we should have matches for both of these. <see cref="ImageCdn"/>
// But if we encounter an unexpected Discord URL, just treat it like an external one.
if (string.IsNullOrWhiteSpace(path) || string.IsNullOrWhiteSpace(fileName))
{
return GetExternalFilePath(uri);
}
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(fileName);
var queryParamsString = FormatQueryParamsForFilename(uri);
var fileExtension = Path.GetExtension(fileName);
var queryParamsSuffix = string.IsNullOrEmpty(queryParamsString)
? ""
: $"_{queryParamsString}";
var fullFilename = $"{fileNameWithoutExtension}{queryParamsSuffix}{fileExtension}";
return $"{path}/{Path.EscapeFileName(fullFilename)}";
}
// Remove signature parameters from Discord CDN URLs to normalize them
private static string NormalizeUrl(string url)
{
var uri = new Uri(url);
if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase))
return url;
var query = HttpUtility.ParseQueryString(uri.Query);
query.Remove("ex");
query.Remove("is");
query.Remove("hm");
var queryString = query.ToString();
if (string.IsNullOrEmpty(queryString))
return uri.GetLeftPart(UriPartial.Path);
return $"{uri.GetLeftPart(UriPartial.Path)}?{queryString}";
}
// Stringifies the query params to be included in a filename.
// Returns a string like "size=256_spoiler=false"
private static string FormatQueryParamsForFilename(Uri uri)
{
var query = HttpUtility.ParseQueryString(uri.Query);
return string.Join(
"_",
query
.AllKeys.Where(key => !string.IsNullOrEmpty(key))
.Select(key => string.IsNullOrEmpty(query[key]) ? key : $"{key}-{query[key]}")
);
}
private static string GetExternalFilePath(Uri uri)
{
return $"external/{uri.Host}{uri.AbsolutePath}";
}
}
internal partial class ExportAssetDownloader internal partial class ExportAssetDownloader
{ {
private static string GetUrlHash(string url) private static string GetUrlHash(string url)
@ -80,7 +164,7 @@ internal partial class ExportAssetDownloader
.Truncate(5); .Truncate(5);
} }
private static string GetFileNameFromUrl(string url) private static string GetFileNameFromUrlLegacy(string url)
{ {
var urlHash = GetUrlHash(url); var urlHash = GetUrlHash(url);

View file

@ -21,7 +21,8 @@ internal class ExportContext(DiscordClient discord, ExportRequest request)
private readonly ExportAssetDownloader _assetDownloader = new( private readonly ExportAssetDownloader _assetDownloader = new(
request.AssetsDirPath, request.AssetsDirPath,
request.ShouldReuseAssets request.ShouldReuseAssets,
request.ShouldUseNestedMediaFilePaths
); );
public DiscordClient Discord { get; } = discord; public DiscordClient Discord { get; } = discord;

View file

@ -39,6 +39,8 @@ public partial class ExportRequest
public bool ShouldReuseAssets { get; } public bool ShouldReuseAssets { get; }
public bool ShouldUseNestedMediaFilePaths { get; }
public string? Locale { get; } public string? Locale { get; }
public CultureInfo? CultureInfo { get; } public CultureInfo? CultureInfo { get; }
@ -58,6 +60,7 @@ public partial class ExportRequest
bool shouldFormatMarkdown, bool shouldFormatMarkdown,
bool shouldDownloadAssets, bool shouldDownloadAssets,
bool shouldReuseAssets, bool shouldReuseAssets,
bool shouldUseNestedMediaFilePaths,
string? locale, string? locale,
bool isUtcNormalizationEnabled bool isUtcNormalizationEnabled
) )
@ -72,6 +75,7 @@ public partial class ExportRequest
ShouldFormatMarkdown = shouldFormatMarkdown; ShouldFormatMarkdown = shouldFormatMarkdown;
ShouldDownloadAssets = shouldDownloadAssets; ShouldDownloadAssets = shouldDownloadAssets;
ShouldReuseAssets = shouldReuseAssets; ShouldReuseAssets = shouldReuseAssets;
ShouldUseNestedMediaFilePaths = shouldUseNestedMediaFilePaths;
Locale = locale; Locale = locale;
IsUtcNormalizationEnabled = isUtcNormalizationEnabled; IsUtcNormalizationEnabled = isUtcNormalizationEnabled;

View file

@ -66,6 +66,9 @@ public partial class SettingsService()
[ObservableProperty] [ObservableProperty]
public partial bool LastShouldReuseAssets { get; set; } public partial bool LastShouldReuseAssets { get; set; }
[ObservableProperty]
public partial bool LastShouldUseNestedMediaFilePaths { get; set; }
[ObservableProperty] [ObservableProperty]
public partial string? LastAssetsDirPath { get; set; } public partial string? LastAssetsDirPath { get; set; }

View file

@ -276,6 +276,7 @@ public partial class DashboardViewModel : ViewModelBase
dialog.ShouldFormatMarkdown, dialog.ShouldFormatMarkdown,
dialog.ShouldDownloadAssets, dialog.ShouldDownloadAssets,
dialog.ShouldReuseAssets, dialog.ShouldReuseAssets,
dialog.ShouldUseNestedMediaFilePaths,
_settingsService.Locale, _settingsService.Locale,
_settingsService.IsUtcNormalizationEnabled _settingsService.IsUtcNormalizationEnabled
); );

View file

@ -67,6 +67,9 @@ public partial class ExportSetupViewModel(
[ObservableProperty] [ObservableProperty]
public partial bool ShouldReuseAssets { get; set; } public partial bool ShouldReuseAssets { get; set; }
[ObservableProperty]
public partial bool ShouldUseNestedMediaFilePaths { get; set; }
[ObservableProperty] [ObservableProperty]
public partial string? AssetsDirPath { get; set; } public partial string? AssetsDirPath { get; set; }
@ -105,6 +108,7 @@ public partial class ExportSetupViewModel(
ShouldFormatMarkdown = settingsService.LastShouldFormatMarkdown; ShouldFormatMarkdown = settingsService.LastShouldFormatMarkdown;
ShouldDownloadAssets = settingsService.LastShouldDownloadAssets; ShouldDownloadAssets = settingsService.LastShouldDownloadAssets;
ShouldReuseAssets = settingsService.LastShouldReuseAssets; ShouldReuseAssets = settingsService.LastShouldReuseAssets;
ShouldUseNestedMediaFilePaths = settingsService.LastShouldUseNestedMediaFilePaths;
AssetsDirPath = settingsService.LastAssetsDirPath; AssetsDirPath = settingsService.LastAssetsDirPath;
// Show the "advanced options" section by default if any // Show the "advanced options" section by default if any
@ -183,6 +187,7 @@ public partial class ExportSetupViewModel(
settingsService.LastShouldFormatMarkdown = ShouldFormatMarkdown; settingsService.LastShouldFormatMarkdown = ShouldFormatMarkdown;
settingsService.LastShouldDownloadAssets = ShouldDownloadAssets; settingsService.LastShouldDownloadAssets = ShouldDownloadAssets;
settingsService.LastShouldReuseAssets = ShouldReuseAssets; settingsService.LastShouldReuseAssets = ShouldReuseAssets;
settingsService.LastShouldUseNestedMediaFilePaths = ShouldUseNestedMediaFilePaths;
settingsService.LastAssetsDirPath = AssetsDirPath; settingsService.LastAssetsDirPath = AssetsDirPath;
Close(true); Close(true);

View file

@ -274,6 +274,16 @@
<ToggleSwitch DockPanel.Dock="Right" IsChecked="{Binding ShouldReuseAssets}" /> <ToggleSwitch DockPanel.Dock="Right" IsChecked="{Binding ShouldReuseAssets}" />
</DockPanel> </DockPanel>
<!-- Use new media file paths -->
<DockPanel
Margin="16,8"
IsEnabled="{Binding ShouldDownloadAssets}"
LastChildFill="False"
ToolTip.Tip="Uses a new organization to save media files to avoid data loss. If you change this setting from run to run, duplicate media may be downloaded.">
<TextBlock DockPanel.Dock="Left" Text="Use nested media file paths" />
<ToggleSwitch DockPanel.Dock="Right" IsChecked="{Binding ShouldUseNestedMediaFilePaths}" />
</DockPanel>
<!-- Assets path --> <!-- Assets path -->
<TextBox <TextBox
Margin="16,8" Margin="16,8"