diff --git a/DiscordChatExporter.Cli.Tests/Specs/SelfContainedSpecs.cs b/DiscordChatExporter.Cli.Tests/Specs/SelfContainedSpecs.cs index b8a2d677..0b5f66e4 100644 --- a/DiscordChatExporter.Cli.Tests/Specs/SelfContainedSpecs.cs +++ b/DiscordChatExporter.Cli.Tests/Specs/SelfContainedSpecs.cs @@ -1,4 +1,5 @@ -using System.IO; +using System; +using System.IO; using System.Linq; using System.Threading.Tasks; using CliFx.Infrastructure; @@ -39,4 +40,51 @@ public class SelfContainedSpecs .Should() .BeTrue(); } + + [Fact] + public async Task I_can_export_a_channel_and_download_all_referenced_assets_with_nested_paths() + { + // Arrange + using var dir = TempDir.Create(); + var filePath = Path.Combine(dir.Path, "output.html"); + + // Act + await new ExportChannelsCommand + { + Token = Secrets.DiscordToken, + ChannelIds = [ChannelIds.SelfContainedTestCases], + ExportFormat = ExportFormat.HtmlDark, + OutputPath = filePath, + ShouldDownloadAssets = true, + ShouldUseNestedMediaFilePaths = true, + }.ExecuteAsync(new FakeConsole()); + + // DEBUG: Print what's in the HTML vs what exists + var srcPaths = Html.Parse(await File.ReadAllTextAsync(filePath)) + .QuerySelectorAll("body [src]") + .Select(e => e.GetAttribute("src")!) + .ToList(); + + Console.WriteLine("=== SRC paths in HTML ==="); + foreach (var src in srcPaths) + { + var fullPath = Path.GetFullPath(src, dir.Path); + var exists = File.Exists(fullPath); + Console.WriteLine($"{(exists ? "✓" : "✗")} {src}"); + Console.WriteLine($" Full: {fullPath}"); + } + + Console.WriteLine("\n=== Files actually on disk ==="); + foreach (var file in Directory.EnumerateFiles(dir.Path, "*", SearchOption.AllDirectories)) + Console.WriteLine(file); + + // Assert + Html.Parse(await File.ReadAllTextAsync(filePath)) + .QuerySelectorAll("body [src]") + .Select(e => e.GetAttribute("src")!) + .Select(f => Path.GetFullPath(f, dir.Path)) + .All(File.Exists) + .Should() + .BeTrue(); + } } diff --git a/DiscordChatExporter.Cli/Commands/Base/ExportCommandBase.cs b/DiscordChatExporter.Cli/Commands/Base/ExportCommandBase.cs index 09ab2691..7f8d7e60 100644 --- a/DiscordChatExporter.Cli/Commands/Base/ExportCommandBase.cs +++ b/DiscordChatExporter.Cli/Commands/Base/ExportCommandBase.cs @@ -97,10 +97,17 @@ public abstract class ExportCommandBase : DiscordCommandBase [CommandOption( "reuse-media", - Description = "Reuse previously downloaded assets to avoid redundant requests." + Description = "Reuse previously downloaded assets to avoid redundant requests. " + + "Keep --media-nested consistent across runs for best results." )] public bool ShouldReuseAssets { get; init; } = false; + [CommandOption( + "media-nested", + Description = "Saves assets with a nested file naming convention, creating subdirectories for each distinct asset type." + )] + public bool ShouldUseNestedMediaFilePaths { get; init; } = false; + [CommandOption( "media-dir", Description = "Download assets to this directory. " @@ -154,6 +161,12 @@ public abstract class ExportCommandBase : DiscordCommandBase throw new CommandException("Option --reuse-media cannot be used without --media."); } + // New media file path can only be enabled if the download assets option is set + if (ShouldUseNestedMediaFilePaths && !ShouldDownloadAssets) + { + throw new CommandException("Option --media-nested cannot be used without --media."); + } + // Assets directory can only be specified if the download assets option is set if (!string.IsNullOrWhiteSpace(AssetsDirPath) && !ShouldDownloadAssets) { @@ -270,6 +283,7 @@ public abstract class ExportCommandBase : DiscordCommandBase ShouldFormatMarkdown, ShouldDownloadAssets, ShouldReuseAssets, + ShouldUseNestedMediaFilePaths, Locale, IsUtcNormalizationEnabled ); diff --git a/DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs b/DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs index b639c7ae..41fdd058 100644 --- a/DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs +++ b/DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Linq; using System.Security.Cryptography; using System.Text; using System.Text.RegularExpressions; @@ -13,7 +14,11 @@ using DiscordChatExporter.Core.Utils.Extensions; namespace DiscordChatExporter.Core.Exporting; -internal partial class ExportAssetDownloader(string workingDirPath, bool reuse) +internal partial class ExportAssetDownloader( + string workingDirPath, + bool reuse, + bool useNestedMediaFilePaths +) { private static readonly AsyncKeyedLocker Locker = new(); @@ -25,8 +30,10 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse) CancellationToken cancellationToken = default ) { - var fileName = GetFileNameFromUrl(url); - var filePath = Path.Combine(workingDirPath, fileName); + var localFilePath = useNestedMediaFilePaths + ? GetFilePathFromUrl(url) + : GetFileNameFromUrlLegacy(url); + var filePath = Path.Combine(workingDirPath, localFilePath); using var _ = await Locker.LockAsync(filePath, cancellationToken); @@ -44,6 +51,9 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse) { // Download the file using var response = await Http.Client.GetAsync(url, innerCancellationToken); + var directory = Path.GetDirectoryName(filePath); + if (directory != null) + Directory.CreateDirectory(directory); await using var output = File.Create(filePath); await response.Content.CopyToAsync(output, innerCancellationToken); }, @@ -54,6 +64,80 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse) } } +internal partial class ExportAssetDownloader +{ + private static string GetFilePathFromUrl(string url) + { + var uri = new Uri(NormalizeUrl(url)); + + // Try to extract the file name from URL + var pathAndFileName = Regex.Match(uri.AbsolutePath, @"/(.+)/([^?]*)"); + var path = pathAndFileName.Groups[1].Value; + var fileName = pathAndFileName.Groups[2].Value; + + // If this isn't a Discord CDN URL, save the file to the `media/external` folder. + if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase)) + { + return GetExternalFilePath(uri); + } + + // If it is a Discord URL, we should have matches for both of these. + // But if we encounter an unexpected Discord URL, just treat it like an external one. + if (string.IsNullOrWhiteSpace(path) || string.IsNullOrWhiteSpace(fileName)) + { + return GetExternalFilePath(uri); + } + + var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(fileName); + var queryParamsString = FormatQueryParamsForFilename(uri); + var fileExtension = Path.GetExtension(fileName); + + var queryParamsSuffix = string.IsNullOrEmpty(queryParamsString) + ? "" + : $"_{queryParamsString}"; + var fullFilename = $"{fileNameWithoutExtension}{queryParamsSuffix}{fileExtension}"; + + return $"{path}/{Path.EscapeFileName(fullFilename)}"; + } + + // Remove signature parameters from Discord CDN URLs to normalize them + private static string NormalizeUrl(string url) + { + var uri = new Uri(url); + if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase)) + return url; + + var query = HttpUtility.ParseQueryString(uri.Query); + query.Remove("ex"); + query.Remove("is"); + query.Remove("hm"); + + var queryString = query.ToString(); + if (string.IsNullOrEmpty(queryString)) + return uri.GetLeftPart(UriPartial.Path); + + return $"{uri.GetLeftPart(UriPartial.Path)}?{queryString}"; + } + + // Stringifies the query params to be included in a filename. + // Returns a string like "size=256_spoiler=false" + private static string FormatQueryParamsForFilename(Uri uri) + { + var query = HttpUtility.ParseQueryString(uri.Query); + return string.Join( + "_", + query + .AllKeys.Where(key => !string.IsNullOrEmpty(key)) + .Select(key => string.IsNullOrEmpty(query[key]) ? key : $"{key}-{query[key]}") + ); + } + + private static string GetExternalFilePath(Uri uri) + { + return $"external/{uri.Host}{uri.AbsolutePath}"; + } +} + internal partial class ExportAssetDownloader { private static string GetUrlHash(string url) @@ -80,7 +164,7 @@ internal partial class ExportAssetDownloader .Truncate(5); } - private static string GetFileNameFromUrl(string url) + private static string GetFileNameFromUrlLegacy(string url) { var urlHash = GetUrlHash(url); diff --git a/DiscordChatExporter.Core/Exporting/ExportContext.cs b/DiscordChatExporter.Core/Exporting/ExportContext.cs index 0ecc12b1..8f8a8f4f 100644 --- a/DiscordChatExporter.Core/Exporting/ExportContext.cs +++ b/DiscordChatExporter.Core/Exporting/ExportContext.cs @@ -21,7 +21,8 @@ internal class ExportContext(DiscordClient discord, ExportRequest request) private readonly ExportAssetDownloader _assetDownloader = new( request.AssetsDirPath, - request.ShouldReuseAssets + request.ShouldReuseAssets, + request.ShouldUseNestedMediaFilePaths ); public DiscordClient Discord { get; } = discord; diff --git a/DiscordChatExporter.Core/Exporting/ExportRequest.cs b/DiscordChatExporter.Core/Exporting/ExportRequest.cs index c814df8a..80b6aea8 100644 --- a/DiscordChatExporter.Core/Exporting/ExportRequest.cs +++ b/DiscordChatExporter.Core/Exporting/ExportRequest.cs @@ -39,6 +39,8 @@ public partial class ExportRequest public bool ShouldReuseAssets { get; } + public bool ShouldUseNestedMediaFilePaths { get; } + public string? Locale { get; } public CultureInfo? CultureInfo { get; } @@ -58,6 +60,7 @@ public partial class ExportRequest bool shouldFormatMarkdown, bool shouldDownloadAssets, bool shouldReuseAssets, + bool shouldUseNestedMediaFilePaths, string? locale, bool isUtcNormalizationEnabled ) @@ -72,6 +75,7 @@ public partial class ExportRequest ShouldFormatMarkdown = shouldFormatMarkdown; ShouldDownloadAssets = shouldDownloadAssets; ShouldReuseAssets = shouldReuseAssets; + ShouldUseNestedMediaFilePaths = shouldUseNestedMediaFilePaths; Locale = locale; IsUtcNormalizationEnabled = isUtcNormalizationEnabled; diff --git a/DiscordChatExporter.Gui/Services/SettingsService.cs b/DiscordChatExporter.Gui/Services/SettingsService.cs index 87fb914b..e5d5087d 100644 --- a/DiscordChatExporter.Gui/Services/SettingsService.cs +++ b/DiscordChatExporter.Gui/Services/SettingsService.cs @@ -66,6 +66,9 @@ public partial class SettingsService() [ObservableProperty] public partial bool LastShouldReuseAssets { get; set; } + [ObservableProperty] + public partial bool LastShouldUseNestedMediaFilePaths { get; set; } + [ObservableProperty] public partial string? LastAssetsDirPath { get; set; } diff --git a/DiscordChatExporter.Gui/ViewModels/Components/DashboardViewModel.cs b/DiscordChatExporter.Gui/ViewModels/Components/DashboardViewModel.cs index ebd416c2..cefd9825 100644 --- a/DiscordChatExporter.Gui/ViewModels/Components/DashboardViewModel.cs +++ b/DiscordChatExporter.Gui/ViewModels/Components/DashboardViewModel.cs @@ -276,6 +276,7 @@ public partial class DashboardViewModel : ViewModelBase dialog.ShouldFormatMarkdown, dialog.ShouldDownloadAssets, dialog.ShouldReuseAssets, + dialog.ShouldUseNestedMediaFilePaths, _settingsService.Locale, _settingsService.IsUtcNormalizationEnabled ); diff --git a/DiscordChatExporter.Gui/ViewModels/Dialogs/ExportSetupViewModel.cs b/DiscordChatExporter.Gui/ViewModels/Dialogs/ExportSetupViewModel.cs index 4574f6ec..b03e1e3a 100644 --- a/DiscordChatExporter.Gui/ViewModels/Dialogs/ExportSetupViewModel.cs +++ b/DiscordChatExporter.Gui/ViewModels/Dialogs/ExportSetupViewModel.cs @@ -67,6 +67,9 @@ public partial class ExportSetupViewModel( [ObservableProperty] public partial bool ShouldReuseAssets { get; set; } + [ObservableProperty] + public partial bool ShouldUseNestedMediaFilePaths { get; set; } + [ObservableProperty] public partial string? AssetsDirPath { get; set; } @@ -105,6 +108,7 @@ public partial class ExportSetupViewModel( ShouldFormatMarkdown = settingsService.LastShouldFormatMarkdown; ShouldDownloadAssets = settingsService.LastShouldDownloadAssets; ShouldReuseAssets = settingsService.LastShouldReuseAssets; + ShouldUseNestedMediaFilePaths = settingsService.LastShouldUseNestedMediaFilePaths; AssetsDirPath = settingsService.LastAssetsDirPath; // Show the "advanced options" section by default if any @@ -183,6 +187,7 @@ public partial class ExportSetupViewModel( settingsService.LastShouldFormatMarkdown = ShouldFormatMarkdown; settingsService.LastShouldDownloadAssets = ShouldDownloadAssets; settingsService.LastShouldReuseAssets = ShouldReuseAssets; + settingsService.LastShouldUseNestedMediaFilePaths = ShouldUseNestedMediaFilePaths; settingsService.LastAssetsDirPath = AssetsDirPath; Close(true); diff --git a/DiscordChatExporter.Gui/Views/Dialogs/ExportSetupView.axaml b/DiscordChatExporter.Gui/Views/Dialogs/ExportSetupView.axaml index f5c999e0..7db836f8 100644 --- a/DiscordChatExporter.Gui/Views/Dialogs/ExportSetupView.axaml +++ b/DiscordChatExporter.Gui/Views/Dialogs/ExportSetupView.axaml @@ -274,6 +274,16 @@ + + + + + +