mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-02-14 07:43:31 -07:00
Merge 2a4d625157 into 72f9e981de
This commit is contained in:
commit
2a49bb9979
|
|
@ -1,4 +1,5 @@
|
|||
using System.IO;
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
using CliFx.Infrastructure;
|
||||
|
|
@ -39,4 +40,51 @@ public class SelfContainedSpecs
|
|||
.Should()
|
||||
.BeTrue();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task I_can_export_a_channel_and_download_all_referenced_assets_with_nested_paths()
|
||||
{
|
||||
// Arrange
|
||||
using var dir = TempDir.Create();
|
||||
var filePath = Path.Combine(dir.Path, "output.html");
|
||||
|
||||
// Act
|
||||
await new ExportChannelsCommand
|
||||
{
|
||||
Token = Secrets.DiscordToken,
|
||||
ChannelIds = [ChannelIds.SelfContainedTestCases],
|
||||
ExportFormat = ExportFormat.HtmlDark,
|
||||
OutputPath = filePath,
|
||||
ShouldDownloadAssets = true,
|
||||
ShouldUseNestedMediaFilePaths = true,
|
||||
}.ExecuteAsync(new FakeConsole());
|
||||
|
||||
// DEBUG: Print what's in the HTML vs what exists
|
||||
var srcPaths = Html.Parse(await File.ReadAllTextAsync(filePath))
|
||||
.QuerySelectorAll("body [src]")
|
||||
.Select(e => e.GetAttribute("src")!)
|
||||
.ToList();
|
||||
|
||||
Console.WriteLine("=== SRC paths in HTML ===");
|
||||
foreach (var src in srcPaths)
|
||||
{
|
||||
var fullPath = Path.GetFullPath(src, dir.Path);
|
||||
var exists = File.Exists(fullPath);
|
||||
Console.WriteLine($"{(exists ? "✓" : "✗")} {src}");
|
||||
Console.WriteLine($" Full: {fullPath}");
|
||||
}
|
||||
|
||||
Console.WriteLine("\n=== Files actually on disk ===");
|
||||
foreach (var file in Directory.EnumerateFiles(dir.Path, "*", SearchOption.AllDirectories))
|
||||
Console.WriteLine(file);
|
||||
|
||||
// Assert
|
||||
Html.Parse(await File.ReadAllTextAsync(filePath))
|
||||
.QuerySelectorAll("body [src]")
|
||||
.Select(e => e.GetAttribute("src")!)
|
||||
.Select(f => Path.GetFullPath(f, dir.Path))
|
||||
.All(File.Exists)
|
||||
.Should()
|
||||
.BeTrue();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -97,10 +97,17 @@ public abstract class ExportCommandBase : DiscordCommandBase
|
|||
|
||||
[CommandOption(
|
||||
"reuse-media",
|
||||
Description = "Reuse previously downloaded assets to avoid redundant requests."
|
||||
Description = "Reuse previously downloaded assets to avoid redundant requests. "
|
||||
+ "Keep --media-nested consistent across runs for best results."
|
||||
)]
|
||||
public bool ShouldReuseAssets { get; init; } = false;
|
||||
|
||||
[CommandOption(
|
||||
"media-nested",
|
||||
Description = "Saves assets with a nested file naming convention, creating subdirectories for each distinct asset type."
|
||||
)]
|
||||
public bool ShouldUseNestedMediaFilePaths { get; init; } = false;
|
||||
|
||||
[CommandOption(
|
||||
"media-dir",
|
||||
Description = "Download assets to this directory. "
|
||||
|
|
@ -154,6 +161,12 @@ public abstract class ExportCommandBase : DiscordCommandBase
|
|||
throw new CommandException("Option --reuse-media cannot be used without --media.");
|
||||
}
|
||||
|
||||
// New media file path can only be enabled if the download assets option is set
|
||||
if (ShouldUseNestedMediaFilePaths && !ShouldDownloadAssets)
|
||||
{
|
||||
throw new CommandException("Option --media-nested cannot be used without --media.");
|
||||
}
|
||||
|
||||
// Assets directory can only be specified if the download assets option is set
|
||||
if (!string.IsNullOrWhiteSpace(AssetsDirPath) && !ShouldDownloadAssets)
|
||||
{
|
||||
|
|
@ -270,6 +283,7 @@ public abstract class ExportCommandBase : DiscordCommandBase
|
|||
ShouldFormatMarkdown,
|
||||
ShouldDownloadAssets,
|
||||
ShouldReuseAssets,
|
||||
ShouldUseNestedMediaFilePaths,
|
||||
Locale,
|
||||
IsUtcNormalizationEnabled
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
|
@ -13,7 +14,11 @@ using DiscordChatExporter.Core.Utils.Extensions;
|
|||
|
||||
namespace DiscordChatExporter.Core.Exporting;
|
||||
|
||||
internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
|
||||
internal partial class ExportAssetDownloader(
|
||||
string workingDirPath,
|
||||
bool reuse,
|
||||
bool useNestedMediaFilePaths
|
||||
)
|
||||
{
|
||||
private static readonly AsyncKeyedLocker<string> Locker = new();
|
||||
|
||||
|
|
@ -25,8 +30,10 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
|
|||
CancellationToken cancellationToken = default
|
||||
)
|
||||
{
|
||||
var fileName = GetFileNameFromUrl(url);
|
||||
var filePath = Path.Combine(workingDirPath, fileName);
|
||||
var localFilePath = useNestedMediaFilePaths
|
||||
? GetFilePathFromUrl(url)
|
||||
: GetFileNameFromUrlLegacy(url);
|
||||
var filePath = Path.Combine(workingDirPath, localFilePath);
|
||||
|
||||
using var _ = await Locker.LockAsync(filePath, cancellationToken);
|
||||
|
||||
|
|
@ -44,6 +51,9 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
|
|||
{
|
||||
// Download the file
|
||||
using var response = await Http.Client.GetAsync(url, innerCancellationToken);
|
||||
var directory = Path.GetDirectoryName(filePath);
|
||||
if (directory != null)
|
||||
Directory.CreateDirectory(directory);
|
||||
await using var output = File.Create(filePath);
|
||||
await response.Content.CopyToAsync(output, innerCancellationToken);
|
||||
},
|
||||
|
|
@ -54,6 +64,80 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
|
|||
}
|
||||
}
|
||||
|
||||
internal partial class ExportAssetDownloader
|
||||
{
|
||||
private static string GetFilePathFromUrl(string url)
|
||||
{
|
||||
var uri = new Uri(NormalizeUrl(url));
|
||||
|
||||
// Try to extract the file name from URL
|
||||
var pathAndFileName = Regex.Match(uri.AbsolutePath, @"/(.+)/([^?]*)");
|
||||
var path = pathAndFileName.Groups[1].Value;
|
||||
var fileName = pathAndFileName.Groups[2].Value;
|
||||
|
||||
// If this isn't a Discord CDN URL, save the file to the `media/external` folder.
|
||||
if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return GetExternalFilePath(uri);
|
||||
}
|
||||
|
||||
// If it is a Discord URL, we should have matches for both of these. <see cref="ImageCdn"/>
|
||||
// But if we encounter an unexpected Discord URL, just treat it like an external one.
|
||||
if (string.IsNullOrWhiteSpace(path) || string.IsNullOrWhiteSpace(fileName))
|
||||
{
|
||||
return GetExternalFilePath(uri);
|
||||
}
|
||||
|
||||
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(fileName);
|
||||
var queryParamsString = FormatQueryParamsForFilename(uri);
|
||||
var fileExtension = Path.GetExtension(fileName);
|
||||
|
||||
var queryParamsSuffix = string.IsNullOrEmpty(queryParamsString)
|
||||
? ""
|
||||
: $"_{queryParamsString}";
|
||||
var fullFilename = $"{fileNameWithoutExtension}{queryParamsSuffix}{fileExtension}";
|
||||
|
||||
return $"{path}/{Path.EscapeFileName(fullFilename)}";
|
||||
}
|
||||
|
||||
// Remove signature parameters from Discord CDN URLs to normalize them
|
||||
private static string NormalizeUrl(string url)
|
||||
{
|
||||
var uri = new Uri(url);
|
||||
if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase))
|
||||
return url;
|
||||
|
||||
var query = HttpUtility.ParseQueryString(uri.Query);
|
||||
query.Remove("ex");
|
||||
query.Remove("is");
|
||||
query.Remove("hm");
|
||||
|
||||
var queryString = query.ToString();
|
||||
if (string.IsNullOrEmpty(queryString))
|
||||
return uri.GetLeftPart(UriPartial.Path);
|
||||
|
||||
return $"{uri.GetLeftPart(UriPartial.Path)}?{queryString}";
|
||||
}
|
||||
|
||||
// Stringifies the query params to be included in a filename.
|
||||
// Returns a string like "size=256_spoiler=false"
|
||||
private static string FormatQueryParamsForFilename(Uri uri)
|
||||
{
|
||||
var query = HttpUtility.ParseQueryString(uri.Query);
|
||||
return string.Join(
|
||||
"_",
|
||||
query
|
||||
.AllKeys.Where(key => !string.IsNullOrEmpty(key))
|
||||
.Select(key => string.IsNullOrEmpty(query[key]) ? key : $"{key}-{query[key]}")
|
||||
);
|
||||
}
|
||||
|
||||
private static string GetExternalFilePath(Uri uri)
|
||||
{
|
||||
return $"external/{uri.Host}{uri.AbsolutePath}";
|
||||
}
|
||||
}
|
||||
|
||||
internal partial class ExportAssetDownloader
|
||||
{
|
||||
private static string GetUrlHash(string url)
|
||||
|
|
@ -80,7 +164,7 @@ internal partial class ExportAssetDownloader
|
|||
.Truncate(5);
|
||||
}
|
||||
|
||||
private static string GetFileNameFromUrl(string url)
|
||||
private static string GetFileNameFromUrlLegacy(string url)
|
||||
{
|
||||
var urlHash = GetUrlHash(url);
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,8 @@ internal class ExportContext(DiscordClient discord, ExportRequest request)
|
|||
|
||||
private readonly ExportAssetDownloader _assetDownloader = new(
|
||||
request.AssetsDirPath,
|
||||
request.ShouldReuseAssets
|
||||
request.ShouldReuseAssets,
|
||||
request.ShouldUseNestedMediaFilePaths
|
||||
);
|
||||
|
||||
public DiscordClient Discord { get; } = discord;
|
||||
|
|
|
|||
|
|
@ -39,6 +39,8 @@ public partial class ExportRequest
|
|||
|
||||
public bool ShouldReuseAssets { get; }
|
||||
|
||||
public bool ShouldUseNestedMediaFilePaths { get; }
|
||||
|
||||
public string? Locale { get; }
|
||||
|
||||
public CultureInfo? CultureInfo { get; }
|
||||
|
|
@ -58,6 +60,7 @@ public partial class ExportRequest
|
|||
bool shouldFormatMarkdown,
|
||||
bool shouldDownloadAssets,
|
||||
bool shouldReuseAssets,
|
||||
bool shouldUseNestedMediaFilePaths,
|
||||
string? locale,
|
||||
bool isUtcNormalizationEnabled
|
||||
)
|
||||
|
|
@ -72,6 +75,7 @@ public partial class ExportRequest
|
|||
ShouldFormatMarkdown = shouldFormatMarkdown;
|
||||
ShouldDownloadAssets = shouldDownloadAssets;
|
||||
ShouldReuseAssets = shouldReuseAssets;
|
||||
ShouldUseNestedMediaFilePaths = shouldUseNestedMediaFilePaths;
|
||||
Locale = locale;
|
||||
IsUtcNormalizationEnabled = isUtcNormalizationEnabled;
|
||||
|
||||
|
|
|
|||
|
|
@ -66,6 +66,9 @@ public partial class SettingsService()
|
|||
[ObservableProperty]
|
||||
public partial bool LastShouldReuseAssets { get; set; }
|
||||
|
||||
[ObservableProperty]
|
||||
public partial bool LastShouldUseNestedMediaFilePaths { get; set; }
|
||||
|
||||
[ObservableProperty]
|
||||
public partial string? LastAssetsDirPath { get; set; }
|
||||
|
||||
|
|
|
|||
|
|
@ -276,6 +276,7 @@ public partial class DashboardViewModel : ViewModelBase
|
|||
dialog.ShouldFormatMarkdown,
|
||||
dialog.ShouldDownloadAssets,
|
||||
dialog.ShouldReuseAssets,
|
||||
dialog.ShouldUseNestedMediaFilePaths,
|
||||
_settingsService.Locale,
|
||||
_settingsService.IsUtcNormalizationEnabled
|
||||
);
|
||||
|
|
|
|||
|
|
@ -67,6 +67,9 @@ public partial class ExportSetupViewModel(
|
|||
[ObservableProperty]
|
||||
public partial bool ShouldReuseAssets { get; set; }
|
||||
|
||||
[ObservableProperty]
|
||||
public partial bool ShouldUseNestedMediaFilePaths { get; set; }
|
||||
|
||||
[ObservableProperty]
|
||||
public partial string? AssetsDirPath { get; set; }
|
||||
|
||||
|
|
@ -105,6 +108,7 @@ public partial class ExportSetupViewModel(
|
|||
ShouldFormatMarkdown = settingsService.LastShouldFormatMarkdown;
|
||||
ShouldDownloadAssets = settingsService.LastShouldDownloadAssets;
|
||||
ShouldReuseAssets = settingsService.LastShouldReuseAssets;
|
||||
ShouldUseNestedMediaFilePaths = settingsService.LastShouldUseNestedMediaFilePaths;
|
||||
AssetsDirPath = settingsService.LastAssetsDirPath;
|
||||
|
||||
// Show the "advanced options" section by default if any
|
||||
|
|
@ -183,6 +187,7 @@ public partial class ExportSetupViewModel(
|
|||
settingsService.LastShouldFormatMarkdown = ShouldFormatMarkdown;
|
||||
settingsService.LastShouldDownloadAssets = ShouldDownloadAssets;
|
||||
settingsService.LastShouldReuseAssets = ShouldReuseAssets;
|
||||
settingsService.LastShouldUseNestedMediaFilePaths = ShouldUseNestedMediaFilePaths;
|
||||
settingsService.LastAssetsDirPath = AssetsDirPath;
|
||||
|
||||
Close(true);
|
||||
|
|
|
|||
|
|
@ -274,6 +274,16 @@
|
|||
<ToggleSwitch DockPanel.Dock="Right" IsChecked="{Binding ShouldReuseAssets}" />
|
||||
</DockPanel>
|
||||
|
||||
<!-- Use new media file paths -->
|
||||
<DockPanel
|
||||
Margin="16,8"
|
||||
IsEnabled="{Binding ShouldDownloadAssets}"
|
||||
LastChildFill="False"
|
||||
ToolTip.Tip="Uses a new organization to save media files to avoid data loss. If you change this setting from run to run, duplicate media may be downloaded.">
|
||||
<TextBlock DockPanel.Dock="Left" Text="Use nested media file paths" />
|
||||
<ToggleSwitch DockPanel.Dock="Right" IsChecked="{Binding ShouldUseNestedMediaFilePaths}" />
|
||||
</DockPanel>
|
||||
|
||||
<!-- Assets path -->
|
||||
<TextBox
|
||||
Margin="16,8"
|
||||
|
|
|
|||
Loading…
Reference in a new issue