Add flag to enable collisionless media downloads

This commit is contained in:
Ben Rucker 2026-01-02 16:51:42 -08:00
parent 95953f8cf0
commit e85ca90dd4
8 changed files with 83 additions and 5 deletions

View file

@ -101,6 +101,13 @@ public abstract class ExportCommandBase : DiscordCommandBase
)]
public bool ShouldReuseAssets { get; init; } = false;
[CommandOption(
"new-media-paths",
Description = "Saves media with a new file naming convention to prevent collisions. "
+ "Duplicate media may be downloaded if consecutive runs use different values for this flag."
)]
public bool ShouldUseNewMediaFilePaths { get; init; } = false;
[CommandOption(
"media-dir",
Description = "Download assets to this directory. "
@ -154,6 +161,12 @@ public abstract class ExportCommandBase : DiscordCommandBase
throw new CommandException("Option --reuse-media cannot be used without --media.");
}
// New media file path can only be enabled if the download assets option is set
if (ShouldUseNewMediaFilePaths && !ShouldDownloadAssets)
{
throw new CommandException("Option --new-media-paths cannot be used without --media.");
}
// Assets directory can only be specified if the download assets option is set
if (!string.IsNullOrWhiteSpace(AssetsDirPath) && !ShouldDownloadAssets)
{
@ -270,6 +283,7 @@ public abstract class ExportCommandBase : DiscordCommandBase
ShouldFormatMarkdown,
ShouldDownloadAssets,
ShouldReuseAssets,
ShouldUseNewMediaFilePaths,
Locale,
IsUtcNormalizationEnabled
);

View file

@ -13,7 +13,11 @@ using DiscordChatExporter.Core.Utils.Extensions;
namespace DiscordChatExporter.Core.Exporting;
internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
internal partial class ExportAssetDownloader(
string workingDirPath,
bool reuse,
bool useNewMediaFilePaths
)
{
private static readonly AsyncKeyedLocker<string> Locker = new();
@ -25,8 +29,10 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
CancellationToken cancellationToken = default
)
{
var fileName = GetFileNameFromUrl(url);
var filePath = Path.Combine(workingDirPath, fileName);
var localFilePath = useNewMediaFilePaths
? GetFilePathFromUrl(url)
: GetFileNameFromUrlLegacy(url);
var filePath = Path.Combine(workingDirPath, localFilePath);
using var _ = await Locker.LockAsync(filePath, cancellationToken);
@ -44,6 +50,9 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
{
// Download the file
using var response = await Http.Client.GetAsync(url, innerCancellationToken);
var directory = Path.GetDirectoryName(filePath);
if (directory != null)
Directory.CreateDirectory(directory);
await using var output = File.Create(filePath);
await response.Content.CopyToAsync(output, innerCancellationToken);
},
@ -54,6 +63,37 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
}
}
internal partial class ExportAssetDownloader
{
private static string GetFilePathFromUrl(string url)
{
var uri = new Uri(url);
// Try to extract the file name from URL
var pathAndFileName = Regex.Match(uri.AbsolutePath, @"/(.+)/([^?]*)");
var path = pathAndFileName.Groups[1].Value;
var fileName = pathAndFileName.Groups[2].Value;
// If this isn't a Discord CDN URL, save the file to the `media/external` folder.
if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase))
{
File.AppendAllTextAsync("external_urls.txt", $"{url}\n");
return $"external/{uri.Host}{uri.AbsolutePath}";
}
// If it is a Discord URL, we're guaranteed to have matches for these groups. <see cref="ImageCdn"/>
if (string.IsNullOrWhiteSpace(path) || string.IsNullOrWhiteSpace(fileName))
throw new Exception("Invalid Discord URL shape");
// If there is a size parameter, add it as the final folder in the path.
// This prevents multiple sizes of an avatar, for example, from overwriting each other.
var sizeParam = HttpUtility.ParseQueryString(uri.Query)["size"];
var sizePathSegment = sizeParam != null ? $"{sizeParam}px/" : "";
return $"{path}/{sizePathSegment}{Path.EscapeFileName(fileName)}";
}
}
internal partial class ExportAssetDownloader
{
private static string GetUrlHash(string url)
@ -80,7 +120,7 @@ internal partial class ExportAssetDownloader
.Truncate(5);
}
private static string GetFileNameFromUrl(string url)
private static string GetFileNameFromUrlLegacy(string url)
{
var urlHash = GetUrlHash(url);

View file

@ -21,7 +21,8 @@ internal class ExportContext(DiscordClient discord, ExportRequest request)
private readonly ExportAssetDownloader _assetDownloader = new(
request.AssetsDirPath,
request.ShouldReuseAssets
request.ShouldReuseAssets,
request.ShouldUseNewMediaFilePaths
);
public DiscordClient Discord { get; } = discord;

View file

@ -39,6 +39,8 @@ public partial class ExportRequest
public bool ShouldReuseAssets { get; }
public bool ShouldUseNewMediaFilePaths { get; }
public string? Locale { get; }
public CultureInfo? CultureInfo { get; }
@ -58,6 +60,7 @@ public partial class ExportRequest
bool shouldFormatMarkdown,
bool shouldDownloadAssets,
bool shouldReuseAssets,
bool shouldUseNewMediaFilePaths,
string? locale,
bool isUtcNormalizationEnabled
)
@ -72,6 +75,7 @@ public partial class ExportRequest
ShouldFormatMarkdown = shouldFormatMarkdown;
ShouldDownloadAssets = shouldDownloadAssets;
ShouldReuseAssets = shouldReuseAssets;
ShouldUseNewMediaFilePaths = shouldUseNewMediaFilePaths;
Locale = locale;
IsUtcNormalizationEnabled = isUtcNormalizationEnabled;

View file

@ -66,6 +66,9 @@ public partial class SettingsService()
[ObservableProperty]
public partial bool LastShouldReuseAssets { get; set; }
[ObservableProperty]
public partial bool LastShouldUseNewMediaFilePaths { get; set; }
[ObservableProperty]
public partial string? LastAssetsDirPath { get; set; }

View file

@ -276,6 +276,7 @@ public partial class DashboardViewModel : ViewModelBase
dialog.ShouldFormatMarkdown,
dialog.ShouldDownloadAssets,
dialog.ShouldReuseAssets,
dialog.ShouldUseNewMediaFilePaths,
_settingsService.Locale,
_settingsService.IsUtcNormalizationEnabled
);

View file

@ -67,6 +67,9 @@ public partial class ExportSetupViewModel(
[ObservableProperty]
public partial bool ShouldReuseAssets { get; set; }
[ObservableProperty]
public partial bool ShouldUseNewMediaFilePaths { get; set; }
[ObservableProperty]
public partial string? AssetsDirPath { get; set; }
@ -105,6 +108,7 @@ public partial class ExportSetupViewModel(
ShouldFormatMarkdown = settingsService.LastShouldFormatMarkdown;
ShouldDownloadAssets = settingsService.LastShouldDownloadAssets;
ShouldReuseAssets = settingsService.LastShouldReuseAssets;
ShouldUseNewMediaFilePaths = settingsService.LastShouldUseNewMediaFilePaths;
AssetsDirPath = settingsService.LastAssetsDirPath;
// Show the "advanced options" section by default if any
@ -183,6 +187,7 @@ public partial class ExportSetupViewModel(
settingsService.LastShouldFormatMarkdown = ShouldFormatMarkdown;
settingsService.LastShouldDownloadAssets = ShouldDownloadAssets;
settingsService.LastShouldReuseAssets = ShouldReuseAssets;
settingsService.LastShouldUseNewMediaFilePaths = ShouldUseNewMediaFilePaths;
settingsService.LastAssetsDirPath = AssetsDirPath;
Close(true);

View file

@ -274,6 +274,16 @@
<ToggleSwitch DockPanel.Dock="Right" IsChecked="{Binding ShouldReuseAssets}" />
</DockPanel>
<!-- Use new media file paths -->
<DockPanel
Margin="16,8"
IsEnabled="{Binding ShouldDownloadAssets}"
LastChildFill="False"
ToolTip.Tip="Uses a new organization to save media files to avoid data loss. If you change this setting from run to run, duplicate media may be downloaded.">
<TextBlock DockPanel.Dock="Left" Text="Use new media file paths" />
<ToggleSwitch DockPanel.Dock="Right" IsChecked="{Binding ShouldUseNewMediaFilePaths}" />
</DockPanel>
<!-- Assets path -->
<TextBox
Margin="16,8"