mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-02-14 07:43:31 -07:00
Merge 2a4d625157 into 72f9e981de
This commit is contained in:
commit
2a49bb9979
|
|
@ -1,4 +1,5 @@
|
||||||
using System.IO;
|
using System;
|
||||||
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using CliFx.Infrastructure;
|
using CliFx.Infrastructure;
|
||||||
|
|
@ -39,4 +40,51 @@ public class SelfContainedSpecs
|
||||||
.Should()
|
.Should()
|
||||||
.BeTrue();
|
.BeTrue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task I_can_export_a_channel_and_download_all_referenced_assets_with_nested_paths()
|
||||||
|
{
|
||||||
|
// Arrange
|
||||||
|
using var dir = TempDir.Create();
|
||||||
|
var filePath = Path.Combine(dir.Path, "output.html");
|
||||||
|
|
||||||
|
// Act
|
||||||
|
await new ExportChannelsCommand
|
||||||
|
{
|
||||||
|
Token = Secrets.DiscordToken,
|
||||||
|
ChannelIds = [ChannelIds.SelfContainedTestCases],
|
||||||
|
ExportFormat = ExportFormat.HtmlDark,
|
||||||
|
OutputPath = filePath,
|
||||||
|
ShouldDownloadAssets = true,
|
||||||
|
ShouldUseNestedMediaFilePaths = true,
|
||||||
|
}.ExecuteAsync(new FakeConsole());
|
||||||
|
|
||||||
|
// DEBUG: Print what's in the HTML vs what exists
|
||||||
|
var srcPaths = Html.Parse(await File.ReadAllTextAsync(filePath))
|
||||||
|
.QuerySelectorAll("body [src]")
|
||||||
|
.Select(e => e.GetAttribute("src")!)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
Console.WriteLine("=== SRC paths in HTML ===");
|
||||||
|
foreach (var src in srcPaths)
|
||||||
|
{
|
||||||
|
var fullPath = Path.GetFullPath(src, dir.Path);
|
||||||
|
var exists = File.Exists(fullPath);
|
||||||
|
Console.WriteLine($"{(exists ? "✓" : "✗")} {src}");
|
||||||
|
Console.WriteLine($" Full: {fullPath}");
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine("\n=== Files actually on disk ===");
|
||||||
|
foreach (var file in Directory.EnumerateFiles(dir.Path, "*", SearchOption.AllDirectories))
|
||||||
|
Console.WriteLine(file);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
Html.Parse(await File.ReadAllTextAsync(filePath))
|
||||||
|
.QuerySelectorAll("body [src]")
|
||||||
|
.Select(e => e.GetAttribute("src")!)
|
||||||
|
.Select(f => Path.GetFullPath(f, dir.Path))
|
||||||
|
.All(File.Exists)
|
||||||
|
.Should()
|
||||||
|
.BeTrue();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -97,10 +97,17 @@ public abstract class ExportCommandBase : DiscordCommandBase
|
||||||
|
|
||||||
[CommandOption(
|
[CommandOption(
|
||||||
"reuse-media",
|
"reuse-media",
|
||||||
Description = "Reuse previously downloaded assets to avoid redundant requests."
|
Description = "Reuse previously downloaded assets to avoid redundant requests. "
|
||||||
|
+ "Keep --media-nested consistent across runs for best results."
|
||||||
)]
|
)]
|
||||||
public bool ShouldReuseAssets { get; init; } = false;
|
public bool ShouldReuseAssets { get; init; } = false;
|
||||||
|
|
||||||
|
[CommandOption(
|
||||||
|
"media-nested",
|
||||||
|
Description = "Saves assets with a nested file naming convention, creating subdirectories for each distinct asset type."
|
||||||
|
)]
|
||||||
|
public bool ShouldUseNestedMediaFilePaths { get; init; } = false;
|
||||||
|
|
||||||
[CommandOption(
|
[CommandOption(
|
||||||
"media-dir",
|
"media-dir",
|
||||||
Description = "Download assets to this directory. "
|
Description = "Download assets to this directory. "
|
||||||
|
|
@ -154,6 +161,12 @@ public abstract class ExportCommandBase : DiscordCommandBase
|
||||||
throw new CommandException("Option --reuse-media cannot be used without --media.");
|
throw new CommandException("Option --reuse-media cannot be used without --media.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// New media file path can only be enabled if the download assets option is set
|
||||||
|
if (ShouldUseNestedMediaFilePaths && !ShouldDownloadAssets)
|
||||||
|
{
|
||||||
|
throw new CommandException("Option --media-nested cannot be used without --media.");
|
||||||
|
}
|
||||||
|
|
||||||
// Assets directory can only be specified if the download assets option is set
|
// Assets directory can only be specified if the download assets option is set
|
||||||
if (!string.IsNullOrWhiteSpace(AssetsDirPath) && !ShouldDownloadAssets)
|
if (!string.IsNullOrWhiteSpace(AssetsDirPath) && !ShouldDownloadAssets)
|
||||||
{
|
{
|
||||||
|
|
@ -270,6 +283,7 @@ public abstract class ExportCommandBase : DiscordCommandBase
|
||||||
ShouldFormatMarkdown,
|
ShouldFormatMarkdown,
|
||||||
ShouldDownloadAssets,
|
ShouldDownloadAssets,
|
||||||
ShouldReuseAssets,
|
ShouldReuseAssets,
|
||||||
|
ShouldUseNestedMediaFilePaths,
|
||||||
Locale,
|
Locale,
|
||||||
IsUtcNormalizationEnabled
|
IsUtcNormalizationEnabled
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
using System.Security.Cryptography;
|
using System.Security.Cryptography;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
|
|
@ -13,7 +14,11 @@ using DiscordChatExporter.Core.Utils.Extensions;
|
||||||
|
|
||||||
namespace DiscordChatExporter.Core.Exporting;
|
namespace DiscordChatExporter.Core.Exporting;
|
||||||
|
|
||||||
internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
|
internal partial class ExportAssetDownloader(
|
||||||
|
string workingDirPath,
|
||||||
|
bool reuse,
|
||||||
|
bool useNestedMediaFilePaths
|
||||||
|
)
|
||||||
{
|
{
|
||||||
private static readonly AsyncKeyedLocker<string> Locker = new();
|
private static readonly AsyncKeyedLocker<string> Locker = new();
|
||||||
|
|
||||||
|
|
@ -25,8 +30,10 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
|
||||||
CancellationToken cancellationToken = default
|
CancellationToken cancellationToken = default
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
var fileName = GetFileNameFromUrl(url);
|
var localFilePath = useNestedMediaFilePaths
|
||||||
var filePath = Path.Combine(workingDirPath, fileName);
|
? GetFilePathFromUrl(url)
|
||||||
|
: GetFileNameFromUrlLegacy(url);
|
||||||
|
var filePath = Path.Combine(workingDirPath, localFilePath);
|
||||||
|
|
||||||
using var _ = await Locker.LockAsync(filePath, cancellationToken);
|
using var _ = await Locker.LockAsync(filePath, cancellationToken);
|
||||||
|
|
||||||
|
|
@ -44,6 +51,9 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
|
||||||
{
|
{
|
||||||
// Download the file
|
// Download the file
|
||||||
using var response = await Http.Client.GetAsync(url, innerCancellationToken);
|
using var response = await Http.Client.GetAsync(url, innerCancellationToken);
|
||||||
|
var directory = Path.GetDirectoryName(filePath);
|
||||||
|
if (directory != null)
|
||||||
|
Directory.CreateDirectory(directory);
|
||||||
await using var output = File.Create(filePath);
|
await using var output = File.Create(filePath);
|
||||||
await response.Content.CopyToAsync(output, innerCancellationToken);
|
await response.Content.CopyToAsync(output, innerCancellationToken);
|
||||||
},
|
},
|
||||||
|
|
@ -54,6 +64,80 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal partial class ExportAssetDownloader
|
||||||
|
{
|
||||||
|
private static string GetFilePathFromUrl(string url)
|
||||||
|
{
|
||||||
|
var uri = new Uri(NormalizeUrl(url));
|
||||||
|
|
||||||
|
// Try to extract the file name from URL
|
||||||
|
var pathAndFileName = Regex.Match(uri.AbsolutePath, @"/(.+)/([^?]*)");
|
||||||
|
var path = pathAndFileName.Groups[1].Value;
|
||||||
|
var fileName = pathAndFileName.Groups[2].Value;
|
||||||
|
|
||||||
|
// If this isn't a Discord CDN URL, save the file to the `media/external` folder.
|
||||||
|
if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return GetExternalFilePath(uri);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If it is a Discord URL, we should have matches for both of these. <see cref="ImageCdn"/>
|
||||||
|
// But if we encounter an unexpected Discord URL, just treat it like an external one.
|
||||||
|
if (string.IsNullOrWhiteSpace(path) || string.IsNullOrWhiteSpace(fileName))
|
||||||
|
{
|
||||||
|
return GetExternalFilePath(uri);
|
||||||
|
}
|
||||||
|
|
||||||
|
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(fileName);
|
||||||
|
var queryParamsString = FormatQueryParamsForFilename(uri);
|
||||||
|
var fileExtension = Path.GetExtension(fileName);
|
||||||
|
|
||||||
|
var queryParamsSuffix = string.IsNullOrEmpty(queryParamsString)
|
||||||
|
? ""
|
||||||
|
: $"_{queryParamsString}";
|
||||||
|
var fullFilename = $"{fileNameWithoutExtension}{queryParamsSuffix}{fileExtension}";
|
||||||
|
|
||||||
|
return $"{path}/{Path.EscapeFileName(fullFilename)}";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove signature parameters from Discord CDN URLs to normalize them
|
||||||
|
private static string NormalizeUrl(string url)
|
||||||
|
{
|
||||||
|
var uri = new Uri(url);
|
||||||
|
if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase))
|
||||||
|
return url;
|
||||||
|
|
||||||
|
var query = HttpUtility.ParseQueryString(uri.Query);
|
||||||
|
query.Remove("ex");
|
||||||
|
query.Remove("is");
|
||||||
|
query.Remove("hm");
|
||||||
|
|
||||||
|
var queryString = query.ToString();
|
||||||
|
if (string.IsNullOrEmpty(queryString))
|
||||||
|
return uri.GetLeftPart(UriPartial.Path);
|
||||||
|
|
||||||
|
return $"{uri.GetLeftPart(UriPartial.Path)}?{queryString}";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stringifies the query params to be included in a filename.
|
||||||
|
// Returns a string like "size=256_spoiler=false"
|
||||||
|
private static string FormatQueryParamsForFilename(Uri uri)
|
||||||
|
{
|
||||||
|
var query = HttpUtility.ParseQueryString(uri.Query);
|
||||||
|
return string.Join(
|
||||||
|
"_",
|
||||||
|
query
|
||||||
|
.AllKeys.Where(key => !string.IsNullOrEmpty(key))
|
||||||
|
.Select(key => string.IsNullOrEmpty(query[key]) ? key : $"{key}-{query[key]}")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string GetExternalFilePath(Uri uri)
|
||||||
|
{
|
||||||
|
return $"external/{uri.Host}{uri.AbsolutePath}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
internal partial class ExportAssetDownloader
|
internal partial class ExportAssetDownloader
|
||||||
{
|
{
|
||||||
private static string GetUrlHash(string url)
|
private static string GetUrlHash(string url)
|
||||||
|
|
@ -80,7 +164,7 @@ internal partial class ExportAssetDownloader
|
||||||
.Truncate(5);
|
.Truncate(5);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static string GetFileNameFromUrl(string url)
|
private static string GetFileNameFromUrlLegacy(string url)
|
||||||
{
|
{
|
||||||
var urlHash = GetUrlHash(url);
|
var urlHash = GetUrlHash(url);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,8 @@ internal class ExportContext(DiscordClient discord, ExportRequest request)
|
||||||
|
|
||||||
private readonly ExportAssetDownloader _assetDownloader = new(
|
private readonly ExportAssetDownloader _assetDownloader = new(
|
||||||
request.AssetsDirPath,
|
request.AssetsDirPath,
|
||||||
request.ShouldReuseAssets
|
request.ShouldReuseAssets,
|
||||||
|
request.ShouldUseNestedMediaFilePaths
|
||||||
);
|
);
|
||||||
|
|
||||||
public DiscordClient Discord { get; } = discord;
|
public DiscordClient Discord { get; } = discord;
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,8 @@ public partial class ExportRequest
|
||||||
|
|
||||||
public bool ShouldReuseAssets { get; }
|
public bool ShouldReuseAssets { get; }
|
||||||
|
|
||||||
|
public bool ShouldUseNestedMediaFilePaths { get; }
|
||||||
|
|
||||||
public string? Locale { get; }
|
public string? Locale { get; }
|
||||||
|
|
||||||
public CultureInfo? CultureInfo { get; }
|
public CultureInfo? CultureInfo { get; }
|
||||||
|
|
@ -58,6 +60,7 @@ public partial class ExportRequest
|
||||||
bool shouldFormatMarkdown,
|
bool shouldFormatMarkdown,
|
||||||
bool shouldDownloadAssets,
|
bool shouldDownloadAssets,
|
||||||
bool shouldReuseAssets,
|
bool shouldReuseAssets,
|
||||||
|
bool shouldUseNestedMediaFilePaths,
|
||||||
string? locale,
|
string? locale,
|
||||||
bool isUtcNormalizationEnabled
|
bool isUtcNormalizationEnabled
|
||||||
)
|
)
|
||||||
|
|
@ -72,6 +75,7 @@ public partial class ExportRequest
|
||||||
ShouldFormatMarkdown = shouldFormatMarkdown;
|
ShouldFormatMarkdown = shouldFormatMarkdown;
|
||||||
ShouldDownloadAssets = shouldDownloadAssets;
|
ShouldDownloadAssets = shouldDownloadAssets;
|
||||||
ShouldReuseAssets = shouldReuseAssets;
|
ShouldReuseAssets = shouldReuseAssets;
|
||||||
|
ShouldUseNestedMediaFilePaths = shouldUseNestedMediaFilePaths;
|
||||||
Locale = locale;
|
Locale = locale;
|
||||||
IsUtcNormalizationEnabled = isUtcNormalizationEnabled;
|
IsUtcNormalizationEnabled = isUtcNormalizationEnabled;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,9 @@ public partial class SettingsService()
|
||||||
[ObservableProperty]
|
[ObservableProperty]
|
||||||
public partial bool LastShouldReuseAssets { get; set; }
|
public partial bool LastShouldReuseAssets { get; set; }
|
||||||
|
|
||||||
|
[ObservableProperty]
|
||||||
|
public partial bool LastShouldUseNestedMediaFilePaths { get; set; }
|
||||||
|
|
||||||
[ObservableProperty]
|
[ObservableProperty]
|
||||||
public partial string? LastAssetsDirPath { get; set; }
|
public partial string? LastAssetsDirPath { get; set; }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -276,6 +276,7 @@ public partial class DashboardViewModel : ViewModelBase
|
||||||
dialog.ShouldFormatMarkdown,
|
dialog.ShouldFormatMarkdown,
|
||||||
dialog.ShouldDownloadAssets,
|
dialog.ShouldDownloadAssets,
|
||||||
dialog.ShouldReuseAssets,
|
dialog.ShouldReuseAssets,
|
||||||
|
dialog.ShouldUseNestedMediaFilePaths,
|
||||||
_settingsService.Locale,
|
_settingsService.Locale,
|
||||||
_settingsService.IsUtcNormalizationEnabled
|
_settingsService.IsUtcNormalizationEnabled
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -67,6 +67,9 @@ public partial class ExportSetupViewModel(
|
||||||
[ObservableProperty]
|
[ObservableProperty]
|
||||||
public partial bool ShouldReuseAssets { get; set; }
|
public partial bool ShouldReuseAssets { get; set; }
|
||||||
|
|
||||||
|
[ObservableProperty]
|
||||||
|
public partial bool ShouldUseNestedMediaFilePaths { get; set; }
|
||||||
|
|
||||||
[ObservableProperty]
|
[ObservableProperty]
|
||||||
public partial string? AssetsDirPath { get; set; }
|
public partial string? AssetsDirPath { get; set; }
|
||||||
|
|
||||||
|
|
@ -105,6 +108,7 @@ public partial class ExportSetupViewModel(
|
||||||
ShouldFormatMarkdown = settingsService.LastShouldFormatMarkdown;
|
ShouldFormatMarkdown = settingsService.LastShouldFormatMarkdown;
|
||||||
ShouldDownloadAssets = settingsService.LastShouldDownloadAssets;
|
ShouldDownloadAssets = settingsService.LastShouldDownloadAssets;
|
||||||
ShouldReuseAssets = settingsService.LastShouldReuseAssets;
|
ShouldReuseAssets = settingsService.LastShouldReuseAssets;
|
||||||
|
ShouldUseNestedMediaFilePaths = settingsService.LastShouldUseNestedMediaFilePaths;
|
||||||
AssetsDirPath = settingsService.LastAssetsDirPath;
|
AssetsDirPath = settingsService.LastAssetsDirPath;
|
||||||
|
|
||||||
// Show the "advanced options" section by default if any
|
// Show the "advanced options" section by default if any
|
||||||
|
|
@ -183,6 +187,7 @@ public partial class ExportSetupViewModel(
|
||||||
settingsService.LastShouldFormatMarkdown = ShouldFormatMarkdown;
|
settingsService.LastShouldFormatMarkdown = ShouldFormatMarkdown;
|
||||||
settingsService.LastShouldDownloadAssets = ShouldDownloadAssets;
|
settingsService.LastShouldDownloadAssets = ShouldDownloadAssets;
|
||||||
settingsService.LastShouldReuseAssets = ShouldReuseAssets;
|
settingsService.LastShouldReuseAssets = ShouldReuseAssets;
|
||||||
|
settingsService.LastShouldUseNestedMediaFilePaths = ShouldUseNestedMediaFilePaths;
|
||||||
settingsService.LastAssetsDirPath = AssetsDirPath;
|
settingsService.LastAssetsDirPath = AssetsDirPath;
|
||||||
|
|
||||||
Close(true);
|
Close(true);
|
||||||
|
|
|
||||||
|
|
@ -274,6 +274,16 @@
|
||||||
<ToggleSwitch DockPanel.Dock="Right" IsChecked="{Binding ShouldReuseAssets}" />
|
<ToggleSwitch DockPanel.Dock="Right" IsChecked="{Binding ShouldReuseAssets}" />
|
||||||
</DockPanel>
|
</DockPanel>
|
||||||
|
|
||||||
|
<!-- Use new media file paths -->
|
||||||
|
<DockPanel
|
||||||
|
Margin="16,8"
|
||||||
|
IsEnabled="{Binding ShouldDownloadAssets}"
|
||||||
|
LastChildFill="False"
|
||||||
|
ToolTip.Tip="Uses a new organization to save media files to avoid data loss. If you change this setting from run to run, duplicate media may be downloaded.">
|
||||||
|
<TextBlock DockPanel.Dock="Left" Text="Use nested media file paths" />
|
||||||
|
<ToggleSwitch DockPanel.Dock="Right" IsChecked="{Binding ShouldUseNestedMediaFilePaths}" />
|
||||||
|
</DockPanel>
|
||||||
|
|
||||||
<!-- Assets path -->
|
<!-- Assets path -->
|
||||||
<TextBox
|
<TextBox
|
||||||
Margin="16,8"
|
Margin="16,8"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue