mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-02-14 15:53:30 -07:00
Added explicit handling of existing export files
Previously, existing export files had just been overwritten by a new export. This has been changed; instead, there are now several possible options on how to explicitly handle existing export files: - Abort aborts the export if the channel had previously been exported. - Overwrite removes the existing export before exporting the channel again. - Append appends the existing export, which means that only messages after the last export will be exported. The option that's used if a previous export is detected is currently hardcoded. A safeguard has been added to prevent the exporter from accidentally overwriting any existing files. Additionally, the creation of a Snowflake from a timestamp has been improved: If a Snowflake will be used to determine the messages starting at the given timestamp, it now won't precisely represent that timestamp anymore, but instead be the latest possible Snowflake just before that timestamp. This is necessary to prevent the first Discord message in that specific millisecond from being excluded (which may have previously happened).
This commit is contained in:
parent
42ebb9928e
commit
5251ae0e95
|
|
@ -30,7 +30,7 @@ public class DateRangeSpecs
|
||||||
ChannelIds = [ChannelIds.DateRangeTestCases],
|
ChannelIds = [ChannelIds.DateRangeTestCases],
|
||||||
ExportFormat = ExportFormat.Json,
|
ExportFormat = ExportFormat.Json,
|
||||||
OutputPath = file.Path,
|
OutputPath = file.Path,
|
||||||
After = Snowflake.FromDate(after),
|
After = Snowflake.FromDate(after, true),
|
||||||
}.ExecuteAsync(new FakeConsole());
|
}.ExecuteAsync(new FakeConsole());
|
||||||
|
|
||||||
// Assert
|
// Assert
|
||||||
|
|
@ -118,7 +118,7 @@ public class DateRangeSpecs
|
||||||
ExportFormat = ExportFormat.Json,
|
ExportFormat = ExportFormat.Json,
|
||||||
OutputPath = file.Path,
|
OutputPath = file.Path,
|
||||||
Before = Snowflake.FromDate(before),
|
Before = Snowflake.FromDate(before),
|
||||||
After = Snowflake.FromDate(after),
|
After = Snowflake.FromDate(after, true),
|
||||||
}.ExecuteAsync(new FakeConsole());
|
}.ExecuteAsync(new FakeConsole());
|
||||||
|
|
||||||
// Assert
|
// Assert
|
||||||
|
|
|
||||||
|
|
@ -19,8 +19,24 @@ public partial record struct Snowflake
|
||||||
{
|
{
|
||||||
public static Snowflake Zero { get; } = new(0);
|
public static Snowflake Zero { get; } = new(0);
|
||||||
|
|
||||||
public static Snowflake FromDate(DateTimeOffset instant) =>
|
/// <summary>
|
||||||
new(((ulong)instant.ToUnixTimeMilliseconds() - 1420070400000UL) << 22);
|
/// Creates and returns a Snowflake representing the given timestamp.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="timestamp">
|
||||||
|
/// The timestamp that should be returned as a Snowflake.
|
||||||
|
/// </param>
|
||||||
|
/// <param name="startTimestamp">
|
||||||
|
/// Whether the Snowflake will be used to determine the messages starting at the given timestamp.
|
||||||
|
/// If true, the Snowflake doesn't precisely represent the given timestamp. Instead, it then is the latest possible
|
||||||
|
/// Snowflake just before that timestamp.
|
||||||
|
/// This is necessary to prevent the first Discord message in that specific millisecond from being excluded.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>A Snowflake representing the given timestamp.</returns>
|
||||||
|
public static Snowflake FromDate(DateTimeOffset timestamp, bool startTimestamp = false) =>
|
||||||
|
new(
|
||||||
|
(((ulong)timestamp.ToUnixTimeMilliseconds() - 1420070400000UL) << 22)
|
||||||
|
- (startTimestamp ? 1UL : 0UL)
|
||||||
|
);
|
||||||
|
|
||||||
public static Snowflake? TryParse(string? value, IFormatProvider? formatProvider = null)
|
public static Snowflake? TryParse(string? value, IFormatProvider? formatProvider = null)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
using System;
|
using System;
|
||||||
|
using System.IO;
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using DiscordChatExporter.Core.Discord;
|
using DiscordChatExporter.Core.Discord;
|
||||||
|
|
@ -27,13 +28,59 @@ public class ChannelExporter(DiscordClient discord)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var currentPartitionIndex = 0;
|
||||||
|
// TODO: Maybe add a way to search for old files after a username change
|
||||||
|
if (File.Exists(request.OutputFilePath))
|
||||||
|
{
|
||||||
|
// TODO: Add a way for the user to choose the setting
|
||||||
|
var choice = FileExistsHandling.Abort;
|
||||||
|
|
||||||
|
switch (choice)
|
||||||
|
{
|
||||||
|
case FileExistsHandling.Abort:
|
||||||
|
Console.WriteLine("Channel aborted");
|
||||||
|
return;
|
||||||
|
case FileExistsHandling.Overwrite:
|
||||||
|
Console.WriteLine("Removing old files");
|
||||||
|
MessageExporter.RemoveExistingFiles(request.OutputFilePath);
|
||||||
|
break;
|
||||||
|
case FileExistsHandling.Append:
|
||||||
|
var lastMessageSnowflake = MessageExporter.GetLastMessageSnowflake(
|
||||||
|
request.OutputFilePath,
|
||||||
|
request.Format
|
||||||
|
);
|
||||||
|
if (lastMessageSnowflake != null)
|
||||||
|
{
|
||||||
|
request.LastPriorMessage = lastMessageSnowflake.Value;
|
||||||
|
|
||||||
|
if (!request.Channel.MayHaveMessagesAfter(request.LastPriorMessage.Value))
|
||||||
|
{
|
||||||
|
Console.WriteLine("Download already up to date");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine(
|
||||||
|
"Downloading data after " + lastMessageSnowflake.Value.ToDate()
|
||||||
|
);
|
||||||
|
currentPartitionIndex = MessageExporter.GetPartitionCount(
|
||||||
|
request.OutputFilePath
|
||||||
|
);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new InvalidOperationException(
|
||||||
|
$"Unknown FileExistsHandling value '{choice}'."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Build context
|
// Build context
|
||||||
var context = new ExportContext(discord, request);
|
var context = new ExportContext(discord, request);
|
||||||
await context.PopulateChannelsAndRolesAsync(cancellationToken);
|
await context.PopulateChannelsAndRolesAsync(cancellationToken);
|
||||||
|
|
||||||
// Initialize the exporter before further checks to ensure the file is created even if
|
// Initialize the exporter before further checks to ensure the file is created even if
|
||||||
// an exception is thrown after this point.
|
// an exception is thrown after this point.
|
||||||
await using var messageExporter = new MessageExporter(context);
|
await using var messageExporter = new MessageExporter(context, currentPartitionIndex);
|
||||||
|
|
||||||
// Check if the channel is empty
|
// Check if the channel is empty
|
||||||
if (request.Channel.IsEmpty)
|
if (request.Channel.IsEmpty)
|
||||||
|
|
@ -67,7 +114,7 @@ public class ChannelExporter(DiscordClient discord)
|
||||||
await foreach (
|
await foreach (
|
||||||
var message in discord.GetMessagesAsync(
|
var message in discord.GetMessagesAsync(
|
||||||
request.Channel.Id,
|
request.Channel.Id,
|
||||||
request.After,
|
request.LastPriorMessage ?? request.After,
|
||||||
request.Before,
|
request.Before,
|
||||||
progress,
|
progress,
|
||||||
cancellationToken
|
cancellationToken
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,12 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
using DiscordChatExporter.Core.Discord;
|
||||||
using DiscordChatExporter.Core.Discord.Data;
|
using DiscordChatExporter.Core.Discord.Data;
|
||||||
using DiscordChatExporter.Core.Utils.Extensions;
|
using DiscordChatExporter.Core.Utils.Extensions;
|
||||||
|
|
||||||
|
|
@ -12,6 +15,8 @@ namespace DiscordChatExporter.Core.Exporting;
|
||||||
internal partial class CsvMessageWriter(Stream stream, ExportContext context)
|
internal partial class CsvMessageWriter(Stream stream, ExportContext context)
|
||||||
: MessageWriter(stream, context)
|
: MessageWriter(stream, context)
|
||||||
{
|
{
|
||||||
|
private const int HeaderSize = 1;
|
||||||
|
|
||||||
private readonly TextWriter _writer = new StreamWriter(stream);
|
private readonly TextWriter _writer = new StreamWriter(stream);
|
||||||
|
|
||||||
private async ValueTask<string> FormatMarkdownAsync(
|
private async ValueTask<string> FormatMarkdownAsync(
|
||||||
|
|
@ -117,6 +122,51 @@ internal partial class CsvMessageWriter(Stream stream, ExportContext context)
|
||||||
await _writer.DisposeAsync();
|
await _writer.DisposeAsync();
|
||||||
await base.DisposeAsync();
|
await base.DisposeAsync();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Retrieves and returns the timestamp of the last written message in the Discord channel that has been exported
|
||||||
|
/// with the CsvMessageWriter to the given file path as a Snowflake.
|
||||||
|
/// This timestamp has millisecond-level precision.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="filePath">
|
||||||
|
/// The path of the Discord channel CSV export whose last message's timestamp should be returned.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>
|
||||||
|
/// The timestamp of the last written message in the Discord channel CSV export under the given path as a Snowflake.
|
||||||
|
/// Null, if the Discord channel CSV export doesn't include any message.
|
||||||
|
/// </returns>
|
||||||
|
/// <exception cref="FormatException">
|
||||||
|
/// Thrown if the file at the given path isn't a correctly formatted Discord channel CSV export.
|
||||||
|
/// </exception>
|
||||||
|
public static Snowflake? GetLastMessageDate(string filePath)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var fileLines = File.ReadAllLines(filePath)
|
||||||
|
.SkipWhile(string.IsNullOrWhiteSpace)
|
||||||
|
.ToArray();
|
||||||
|
if (fileLines.Length <= HeaderSize)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
const string columnPattern = "(?:[^\"]?(?:\"\")?)*";
|
||||||
|
var messageDatePattern = string.Format(
|
||||||
|
"^\"{0}\",\"{0}\",\"({0})\",\"{0}\",\"{0}\",\"{0}\"$",
|
||||||
|
columnPattern
|
||||||
|
);
|
||||||
|
var messageDateRegex = new Regex(messageDatePattern);
|
||||||
|
|
||||||
|
var timestampMatch = messageDateRegex.Match(fileLines[^1]);
|
||||||
|
var timestampString = timestampMatch.Groups[1].Value;
|
||||||
|
var timestamp = DateTimeOffset.Parse(timestampString);
|
||||||
|
return Snowflake.FromDate(timestamp, true);
|
||||||
|
}
|
||||||
|
catch (Exception ex) when (ex is IndexOutOfRangeException or FormatException)
|
||||||
|
{
|
||||||
|
throw new FormatException(
|
||||||
|
"The CSV file is not correctly formatted; the last message timestamp could not be retrieved."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
internal partial class CsvMessageWriter
|
internal partial class CsvMessageWriter
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,8 @@ public partial class ExportRequest
|
||||||
|
|
||||||
public Snowflake? Before { get; }
|
public Snowflake? Before { get; }
|
||||||
|
|
||||||
|
public Snowflake? LastPriorMessage { get; set; }
|
||||||
|
|
||||||
public PartitionLimit PartitionLimit { get; }
|
public PartitionLimit PartitionLimit { get; }
|
||||||
|
|
||||||
public MessageFilter MessageFilter { get; }
|
public MessageFilter MessageFilter { get; }
|
||||||
|
|
|
||||||
23
DiscordChatExporter.Core/Exporting/FileExistsHandling.cs
Normal file
23
DiscordChatExporter.Core/Exporting/FileExistsHandling.cs
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
namespace DiscordChatExporter.Core.Exporting;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Represents the setting on how to handle the export of a channel that has already been exported.
|
||||||
|
/// </summary>
|
||||||
|
public enum FileExistsHandling
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// If a channel had previously been exported, its export will be aborted.
|
||||||
|
/// </summary>
|
||||||
|
Abort,
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// If a channel had previously been exported, the existing export will be removed, and it will be exported again.
|
||||||
|
/// </summary>
|
||||||
|
Overwrite,
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// If a channel had previously been exported, the existing export will be appended, which means that only messages
|
||||||
|
/// after the last export will be exported.
|
||||||
|
/// </summary>
|
||||||
|
Append,
|
||||||
|
}
|
||||||
|
|
@ -2,14 +2,16 @@
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
using DiscordChatExporter.Core.Discord;
|
||||||
using DiscordChatExporter.Core.Discord.Data;
|
using DiscordChatExporter.Core.Discord.Data;
|
||||||
using WebMarkupMin.Core;
|
using WebMarkupMin.Core;
|
||||||
|
|
||||||
namespace DiscordChatExporter.Core.Exporting;
|
namespace DiscordChatExporter.Core.Exporting;
|
||||||
|
|
||||||
internal class HtmlMessageWriter(Stream stream, ExportContext context, string themeName)
|
internal partial class HtmlMessageWriter(Stream stream, ExportContext context, string themeName)
|
||||||
: MessageWriter(stream, context)
|
: MessageWriter(stream, context)
|
||||||
{
|
{
|
||||||
private readonly TextWriter _writer = new StreamWriter(stream);
|
private readonly TextWriter _writer = new StreamWriter(stream);
|
||||||
|
|
@ -142,4 +144,50 @@ internal class HtmlMessageWriter(Stream stream, ExportContext context, string th
|
||||||
await _writer.DisposeAsync();
|
await _writer.DisposeAsync();
|
||||||
await base.DisposeAsync();
|
await base.DisposeAsync();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns the statically created regex that detects and captures the timestamp of a message in a channel HTML
|
||||||
|
/// export.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>
|
||||||
|
/// The regex that detects and captures the timestamp of a message in a Discord channel HTML export.
|
||||||
|
/// </returns>
|
||||||
|
[GeneratedRegex("<span class=chatlog__timestamp title=\"([^\"]*)\">")]
|
||||||
|
private static partial Regex MessageDateRegex();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Retrieves and returns the approximate timestamp of the last written message in the Discord channel that has
|
||||||
|
/// been exported with the HtmlMessageWriter to the given file path as a Snowflake.
|
||||||
|
/// This timestamp only has minute-level precision.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="filePath">
|
||||||
|
/// The path of the Discord channel HTML export whose last message's timestamp should be returned.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>
|
||||||
|
/// The approximate timestamp of the last written message in the Discord channel HTML export under the given path
|
||||||
|
/// as a Snowflake.
|
||||||
|
/// Null, if the Discord channel HTML export doesn't include any message.
|
||||||
|
/// </returns>
|
||||||
|
/// <exception cref="FormatException">
|
||||||
|
/// Thrown if the file at the given path isn't a correctly formatted Discord channel HTML export.
|
||||||
|
/// </exception>
|
||||||
|
public static Snowflake? GetLastMessageDate(string filePath)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var fileContent = File.ReadAllText(filePath);
|
||||||
|
var messageDateRegex = MessageDateRegex();
|
||||||
|
|
||||||
|
var timestampMatches = messageDateRegex.Matches(fileContent);
|
||||||
|
var timestampString = timestampMatches[^1].Groups[1].Value;
|
||||||
|
var timestamp = DateTimeOffset.Parse(timestampString);
|
||||||
|
return Snowflake.FromDate(timestamp, true);
|
||||||
|
}
|
||||||
|
catch (Exception ex) when (ex is IndexOutOfRangeException or FormatException)
|
||||||
|
{
|
||||||
|
throw new FormatException(
|
||||||
|
"The HTML file is not correctly formatted; the last message timestamp could not be retrieved."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ using System.Text.Encodings.Web;
|
||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
using DiscordChatExporter.Core.Discord;
|
||||||
using DiscordChatExporter.Core.Discord.Data;
|
using DiscordChatExporter.Core.Discord.Data;
|
||||||
using DiscordChatExporter.Core.Discord.Data.Embeds;
|
using DiscordChatExporter.Core.Discord.Data.Embeds;
|
||||||
using DiscordChatExporter.Core.Markdown.Parsing;
|
using DiscordChatExporter.Core.Markdown.Parsing;
|
||||||
|
|
@ -579,4 +580,53 @@ internal class JsonMessageWriter(Stream stream, ExportContext context)
|
||||||
await _writer.DisposeAsync();
|
await _writer.DisposeAsync();
|
||||||
await base.DisposeAsync();
|
await base.DisposeAsync();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Retrieves and returns the Snowflake of the last written message in the Discord channel that has been
|
||||||
|
/// exported with the JsonMessageWriter to the given file path.
|
||||||
|
/// This Snowflake contains the message timestamp with millisecond-level precision and the message index.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="filePath">
|
||||||
|
/// The path of the Discord channel JSON export whose last message's timestamp should be returned.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>
|
||||||
|
/// The Snowflake of the last written message in the Discord channel JSON export under the given path.
|
||||||
|
/// Null, if the Discord channel JSON export doesn't include any message.
|
||||||
|
/// </returns>
|
||||||
|
/// <exception cref="FormatException">
|
||||||
|
/// Thrown if the file at the given path isn't a correctly formatted Discord channel JSON export.
|
||||||
|
/// </exception>
|
||||||
|
public static Snowflake? GetLastMessageSnowflake(string filePath)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var fileContent = File.ReadAllText(filePath);
|
||||||
|
using var fileJson = JsonDocument.Parse(fileContent);
|
||||||
|
var messagesJson = fileJson
|
||||||
|
.RootElement.GetProperty("messages")
|
||||||
|
.EnumerateArray()
|
||||||
|
.ToArray();
|
||||||
|
|
||||||
|
if (messagesJson.Length == 0)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
var lastMessage = messagesJson[^1];
|
||||||
|
var snowflakeInt = ulong.Parse(lastMessage.GetProperty("id").GetString()!);
|
||||||
|
var snowflake = new Snowflake(snowflakeInt);
|
||||||
|
return snowflake;
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
when (ex
|
||||||
|
is JsonException
|
||||||
|
or KeyNotFoundException
|
||||||
|
or InvalidOperationException
|
||||||
|
or FormatException
|
||||||
|
or NullReferenceException
|
||||||
|
)
|
||||||
|
{
|
||||||
|
throw new FormatException(
|
||||||
|
"The JSON file is not correctly formatted; the last message timestamp could not be retrieved."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,13 +2,15 @@
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
using DiscordChatExporter.Core.Discord;
|
||||||
using DiscordChatExporter.Core.Discord.Data;
|
using DiscordChatExporter.Core.Discord.Data;
|
||||||
|
|
||||||
namespace DiscordChatExporter.Core.Exporting;
|
namespace DiscordChatExporter.Core.Exporting;
|
||||||
|
|
||||||
internal partial class MessageExporter(ExportContext context) : IAsyncDisposable
|
internal partial class MessageExporter(ExportContext context, int initialPartitionIndex = 0)
|
||||||
|
: IAsyncDisposable
|
||||||
{
|
{
|
||||||
private int _partitionIndex;
|
private int _partitionIndex = initialPartitionIndex;
|
||||||
private MessageWriter? _writer;
|
private MessageWriter? _writer;
|
||||||
|
|
||||||
public long MessagesExported { get; private set; }
|
public long MessagesExported { get; private set; }
|
||||||
|
|
@ -101,21 +103,148 @@ internal partial class MessageExporter
|
||||||
string filePath,
|
string filePath,
|
||||||
ExportFormat format,
|
ExportFormat format,
|
||||||
ExportContext context
|
ExportContext context
|
||||||
) =>
|
)
|
||||||
format switch
|
{
|
||||||
|
// Don't accidentally overwrite anything
|
||||||
|
if (File.Exists(filePath))
|
||||||
{
|
{
|
||||||
ExportFormat.PlainText => new PlainTextMessageWriter(File.Create(filePath), context),
|
throw new InvalidOperationException(
|
||||||
ExportFormat.Csv => new CsvMessageWriter(File.Create(filePath), context),
|
"Error: An exported file already exists. This should never happen."
|
||||||
ExportFormat.HtmlDark => new HtmlMessageWriter(File.Create(filePath), context, "Dark"),
|
);
|
||||||
ExportFormat.HtmlLight => new HtmlMessageWriter(
|
}
|
||||||
File.Create(filePath),
|
|
||||||
context,
|
var file = File.Create(filePath);
|
||||||
"Light"
|
return format switch
|
||||||
),
|
{
|
||||||
ExportFormat.Json => new JsonMessageWriter(File.Create(filePath), context),
|
ExportFormat.PlainText => new PlainTextMessageWriter(file, context),
|
||||||
|
ExportFormat.Csv => new CsvMessageWriter(file, context),
|
||||||
|
ExportFormat.HtmlDark => new HtmlMessageWriter(file, context, "Dark"),
|
||||||
|
ExportFormat.HtmlLight => new HtmlMessageWriter(file, context, "Light"),
|
||||||
|
ExportFormat.Json => new JsonMessageWriter(file, context),
|
||||||
_ => throw new ArgumentOutOfRangeException(
|
_ => throw new ArgumentOutOfRangeException(
|
||||||
nameof(format),
|
nameof(format),
|
||||||
$"Unknown export format '{format}'."
|
$"Unknown export format '{format}'."
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Retrieves and returns the (approximate) Snowflake of the last written message in the Discord channel that has
|
||||||
|
/// previously been exported to the given file path in the given format.
|
||||||
|
///
|
||||||
|
/// If the used format is JSON, this returns its exact Snowflake (which contains the precise message timestamp and
|
||||||
|
/// index and can therefore correctly determine which messages succeed it).
|
||||||
|
/// Otherwise, it returns an approximate Snowflake (which only contains the (possibly approximate) message
|
||||||
|
/// timestamp and may therefore label slightly earlier messages as succeeding).
|
||||||
|
///
|
||||||
|
/// This automatically determines the number of partitions the export has been split up into and uses the last one.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="baseFilePath">
|
||||||
|
/// The path of the first partition of the Discord channel export whose last message's timestamp should be returned.
|
||||||
|
/// </param>
|
||||||
|
/// <param name="format">
|
||||||
|
/// The format of the Discord channel export whose last message's timestamp should be returned.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>
|
||||||
|
/// The (approximate) Snowflake of the last written message in the Discord channel that has been previously
|
||||||
|
/// exported to the given file path in the given format.
|
||||||
|
/// Null, if the Discord channel hasn't previously been exported or if the export doesn't include any message.
|
||||||
|
/// </returns>
|
||||||
|
/// <exception cref="FormatException">
|
||||||
|
/// Thrown if the last Discord channel export partition file isn't correctly formatted according to the given
|
||||||
|
/// format.
|
||||||
|
/// </exception>
|
||||||
|
public static Snowflake? GetLastMessageSnowflake(string baseFilePath, ExportFormat format)
|
||||||
|
{
|
||||||
|
var partitionAmounts = GetPartitionCount(baseFilePath);
|
||||||
|
var lastPartitionFile = GetPartitionFilePath(baseFilePath, partitionAmounts - 1);
|
||||||
|
|
||||||
|
var fileInfo = new FileInfo(lastPartitionFile);
|
||||||
|
if (fileInfo.Length == 0)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return format switch
|
||||||
|
{
|
||||||
|
ExportFormat.PlainText => PlainTextMessageWriter.GetLastMessageDate(lastPartitionFile),
|
||||||
|
ExportFormat.Csv => CsvMessageWriter.GetLastMessageDate(lastPartitionFile),
|
||||||
|
ExportFormat.HtmlDark or ExportFormat.HtmlLight => HtmlMessageWriter.GetLastMessageDate(
|
||||||
|
lastPartitionFile
|
||||||
|
),
|
||||||
|
ExportFormat.Json => JsonMessageWriter.GetLastMessageSnowflake(lastPartitionFile),
|
||||||
|
_ => throw new ArgumentOutOfRangeException(
|
||||||
|
nameof(format),
|
||||||
|
$"Unknown export format '{format}'."
|
||||||
|
),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Removes all partitions of the previously exported Discord channel with the given base file path.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="baseFilePath">
|
||||||
|
/// The path of the first partition of the Discord channel export that should be removed.
|
||||||
|
/// </param>
|
||||||
|
public static void RemoveExistingFiles(string baseFilePath)
|
||||||
|
{
|
||||||
|
var currentPartition = 0;
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
var currentFilePath = GetPartitionFilePath(baseFilePath, currentPartition);
|
||||||
|
if (File.Exists(currentFilePath))
|
||||||
|
{
|
||||||
|
File.Delete(currentFilePath);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
currentPartition++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Determines and returns the number of partitions of the previously exported Discord channel with the given base
|
||||||
|
/// file path.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="baseFilePath">
|
||||||
|
/// The path of the first partition of the Discord channel export whose number of partitions should be returned.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>
|
||||||
|
/// The number of partitions of the previously exported Discord channel with the given base file path.
|
||||||
|
/// </returns>
|
||||||
|
public static int GetPartitionCount(string baseFilePath)
|
||||||
|
{
|
||||||
|
// Use linear search to quickly determine the number of partitions
|
||||||
|
var currentPartition = 1;
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
var currentFilePath = GetPartitionFilePath(baseFilePath, currentPartition - 1);
|
||||||
|
if (File.Exists(currentFilePath))
|
||||||
|
{
|
||||||
|
currentPartition *= 2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var leftBorder = currentPartition / 2;
|
||||||
|
var rightBorder = currentPartition;
|
||||||
|
while (rightBorder - 1 > leftBorder)
|
||||||
|
{
|
||||||
|
var middle = (leftBorder + rightBorder) / 2;
|
||||||
|
var currentFilePath = GetPartitionFilePath(baseFilePath, middle - 1);
|
||||||
|
if (File.Exists(currentFilePath))
|
||||||
|
{
|
||||||
|
leftBorder = middle;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rightBorder = middle;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return leftBorder;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,22 @@
|
||||||
using System.Collections.Generic;
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
using DiscordChatExporter.Core.Discord;
|
||||||
using DiscordChatExporter.Core.Discord.Data;
|
using DiscordChatExporter.Core.Discord.Data;
|
||||||
using DiscordChatExporter.Core.Discord.Data.Embeds;
|
using DiscordChatExporter.Core.Discord.Data.Embeds;
|
||||||
using DiscordChatExporter.Core.Utils.Extensions;
|
using DiscordChatExporter.Core.Utils.Extensions;
|
||||||
|
|
||||||
namespace DiscordChatExporter.Core.Exporting;
|
namespace DiscordChatExporter.Core.Exporting;
|
||||||
|
|
||||||
internal class PlainTextMessageWriter(Stream stream, ExportContext context)
|
internal partial class PlainTextMessageWriter(Stream stream, ExportContext context)
|
||||||
: MessageWriter(stream, context)
|
: MessageWriter(stream, context)
|
||||||
{
|
{
|
||||||
|
private const int HeaderSize = 4;
|
||||||
|
|
||||||
private readonly TextWriter _writer = new StreamWriter(stream);
|
private readonly TextWriter _writer = new StreamWriter(stream);
|
||||||
|
|
||||||
private async ValueTask<string> FormatMarkdownAsync(
|
private async ValueTask<string> FormatMarkdownAsync(
|
||||||
|
|
@ -271,4 +276,60 @@ internal class PlainTextMessageWriter(Stream stream, ExportContext context)
|
||||||
await _writer.DisposeAsync();
|
await _writer.DisposeAsync();
|
||||||
await base.DisposeAsync();
|
await base.DisposeAsync();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns the statically created regex that detects and captures the timestamp of a message in a channel TXT
|
||||||
|
/// export.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>
|
||||||
|
/// The regex that detects and captures the timestamp of a message in a Discord channel TXT export.
|
||||||
|
/// </returns>
|
||||||
|
[GeneratedRegex(@"^\[(.*)\] .*")]
|
||||||
|
private static partial Regex MessageDateRegex();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Retrieves and returns the approximate timestamp of the last written message in the Discord channel that has
|
||||||
|
/// been exported with the PlainTextMessageWriter to the given file path as a Snowflake.
|
||||||
|
/// This timestamp only has minute-level precision.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="filePath">
|
||||||
|
/// The path of the Discord channel TXT export whose last message's timestamp should be returned.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>
|
||||||
|
/// The approximate timestamp of the last written message in the Discord channel TXT export under the given path
|
||||||
|
/// as a Snowflake.
|
||||||
|
/// Null, if the Discord channel TXT export doesn't include any message.
|
||||||
|
/// </returns>
|
||||||
|
/// <exception cref="FormatException">
|
||||||
|
/// Thrown if the file at the given path isn't a correctly formatted Discord channel TXT export.
|
||||||
|
/// </exception>
|
||||||
|
public static Snowflake? GetLastMessageDate(string filePath)
|
||||||
|
{
|
||||||
|
var fileLines = File.ReadAllLines(filePath);
|
||||||
|
if (fileLines.SkipWhile(string.IsNullOrWhiteSpace).ToArray().Length <= HeaderSize)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
var messageDateRegex = MessageDateRegex();
|
||||||
|
|
||||||
|
// Find the last line with a message timestamp
|
||||||
|
for (var i = fileLines.Length - 1; i >= HeaderSize; i--)
|
||||||
|
{
|
||||||
|
var timestampMatch = messageDateRegex.Match(fileLines[i]);
|
||||||
|
if (!timestampMatch.Success)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
var timestampString = timestampMatch.Groups[1].Value;
|
||||||
|
if (
|
||||||
|
DateTimeOffset.TryParse(timestampString, out var timestamp)
|
||||||
|
&& fileLines[i - 1] == ""
|
||||||
|
)
|
||||||
|
{
|
||||||
|
return Snowflake.FromDate(timestamp, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new FormatException(
|
||||||
|
"The TXT file is not correctly formatted; the last message timestamp could not be retrieved."
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -268,8 +268,8 @@ public partial class DashboardViewModel : ViewModelBase
|
||||||
dialog.OutputPath!,
|
dialog.OutputPath!,
|
||||||
dialog.AssetsDirPath,
|
dialog.AssetsDirPath,
|
||||||
dialog.SelectedFormat,
|
dialog.SelectedFormat,
|
||||||
dialog.After?.Pipe(Snowflake.FromDate),
|
dialog.After?.Pipe(timestamp => Snowflake.FromDate(timestamp, true)),
|
||||||
dialog.Before?.Pipe(Snowflake.FromDate),
|
dialog.Before?.Pipe(timestamp => Snowflake.FromDate(timestamp)),
|
||||||
dialog.PartitionLimit,
|
dialog.PartitionLimit,
|
||||||
dialog.MessageFilter,
|
dialog.MessageFilter,
|
||||||
dialog.ShouldFormatMarkdown,
|
dialog.ShouldFormatMarkdown,
|
||||||
|
|
|
||||||
|
|
@ -128,8 +128,8 @@ public partial class ExportSetupViewModel(
|
||||||
Guild!,
|
Guild!,
|
||||||
Channels!.Single(),
|
Channels!.Single(),
|
||||||
SelectedFormat,
|
SelectedFormat,
|
||||||
After?.Pipe(Snowflake.FromDate),
|
After?.Pipe(timestamp => Snowflake.FromDate(timestamp, true)),
|
||||||
Before?.Pipe(Snowflake.FromDate)
|
Before?.Pipe(timestamp => Snowflake.FromDate(timestamp))
|
||||||
);
|
);
|
||||||
|
|
||||||
var extension = SelectedFormat.GetFileExtension();
|
var extension = SelectedFormat.GetFileExtension();
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue