diff --git a/m2d/converters/event-to-message.js b/m2d/converters/event-to-message.js index dde77b7..f793f85 100644 --- a/m2d/converters/event-to-message.js +++ b/m2d/converters/event-to-message.js @@ -2,19 +2,41 @@ const Ty = require("../../types") const DiscordTypes = require("discord-api-types/v10") -const markdown = require("discord-markdown") +const chunk = require("chunk-text") +const TurndownService = require("turndown") const passthrough = require("../../passthrough") const { sync, db, discord } = passthrough /** @type {import("../../matrix/file")} */ const file = sync.require("../../matrix/file") +// https://github.com/mixmark-io/turndown/blob/97e4535ca76bb2e70d9caa2aa4d4686956b06d44/src/utilities.js#L26C28-L33C2 +const BLOCK_ELEMENTS = [ + "ADDRESS", "ARTICLE", "ASIDE", "AUDIO", "BLOCKQUOTE", "BODY", "CANVAS", + "CENTER", "DD", "DETAILS", "DIR", "DIV", "DL", "DT", "FIELDSET", "FIGCAPTION", "FIGURE", + "FOOTER", "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEADER", + "HGROUP", "HR", "HTML", "ISINDEX", "LI", "MAIN", "MENU", "NAV", "NOFRAMES", + "NOSCRIPT", "OL", "OUTPUT", "P", "PRE", "SECTION", "SUMMARY", "TABLE", "TBODY", "TD", + "TFOOT", "TH", "THEAD", "TR", "UL" +] + +const turndownService = new TurndownService({ + hr: "----" +}) + +turndownService.addRule("strikethrough", { + filter: ["del", "s", "strike"], + replacement: function (content) { + return "~~" + content + "~~" + } +}) + /** * @param {Ty.Event.Outer} event */ function eventToMessage(event) { /** @type {(DiscordTypes.RESTPostAPIWebhookWithTokenJSONBody & {files?: {name: string, file: Buffer}[]})[]} */ - const messages = [] + let messages = [] let displayName = event.sender let avatarURL = undefined @@ -24,20 +46,51 @@ function eventToMessage(event) { // TODO: get the media repo domain and the avatar url from the matrix member event } - if (event.content.msgtype === "m.text") { - messages.push({ - content: event.content.body, - username: displayName, - avatar_url: avatarURL - }) - } else if (event.content.msgtype === "m.emote") { - messages.push({ - content: `\* _${displayName} ${event.content.body}_`, - username: displayName, - avatar_url: avatarURL + // Convert content depending on what the message is + let content = event.content.body // ultimate fallback + if (event.content.format === "org.matrix.custom.html" && event.content.formatted_body) { + let input = event.content.formatted_body + if (event.content.msgtype === "m.emote") { + input = `* ${displayName} ${input}` + } + + // Note: Element's renderers on Web and Android currently collapse whitespace, like the browser does. Turndown also collapses whitespace which is good for me. + // If later I'm using a client that doesn't collapse whitespace and I want turndown to follow suit, uncomment the following line of code, and it Just Works: + // input = input.replace(/ /g, " ") + // There is also a corresponding test to uncomment, named "event2message: whitespace is retained" + + // The matrix spec hasn't decided whether \n counts as a newline or not, but I'm going to count it, because if it's in the data it's there for a reason. + // But I should not count it if it's between block elements. + input = input.replace(/(<\/?([^ >]+)[^>]*>)?\n(<\/?([^ >]+)[^>]*>)?/g, (whole, beforeContext, beforeTag, afterContext, afterTag) => { + if (typeof beforeTag !== "string" && typeof afterTag !== "string") { + return "
" + } + beforeContext = beforeContext || "" + beforeTag = beforeTag || "" + afterContext = afterContext || "" + afterTag = afterTag || "" + if (!BLOCK_ELEMENTS.includes(beforeTag.toUpperCase()) && !BLOCK_ELEMENTS.includes(afterTag.toUpperCase())) { + return beforeContext + "
" + afterContext + } else { + return whole + } }) + + // @ts-ignore + content = turndownService.turndown(input) + + // It's optimised for commonmark, we need to replace the space-space-newline with just newline + content = content.replace(/ \n/g, "\n") } + // Split into 2000 character chunks + const chunks = chunk(content, 2000) + messages = messages.concat(chunks.map(content => ({ + content, + username: displayName, + avatar_url: avatarURL + }))) + return messages } diff --git a/m2d/converters/event-to-message.test.js b/m2d/converters/event-to-message.test.js index a45c23b..ac62bf3 100644 --- a/m2d/converters/event-to-message.test.js +++ b/m2d/converters/event-to-message.test.js @@ -4,6 +4,12 @@ const {test} = require("supertape") const {eventToMessage} = require("./event-to-message") const data = require("../../test/data") +function sameFirstContentAndWhitespace(t, a, b) { + const a2 = JSON.stringify(a[0].content) + const b2 = JSON.stringify(b[0].content) + t.equal(a2, b2) +} + test("event2message: janky test", t => { t.deepEqual( eventToMessage({ @@ -28,6 +34,165 @@ test("event2message: janky test", t => { ) }) +test("event2message: basic html is converted to markdown", t => { + t.deepEqual( + eventToMessage({ + content: { + msgtype: "m.text", + body: "wrong body", + format: "org.matrix.custom.html", + formatted_body: "this is a test of formatting" + }, + event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU", + origin_server_ts: 1688301929913, + room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe", + sender: "@cadence:cadence.moe", + type: "m.room.message", + unsigned: { + age: 405299 + } + }), + [{ + username: "cadence", + content: "this **is** a **_test_** of ~~formatting~~", + avatar_url: undefined + }] + ) +}) + +test("event2message: markdown syntax is escaped", t => { + t.deepEqual( + eventToMessage({ + content: { + msgtype: "m.text", + body: "wrong body", + format: "org.matrix.custom.html", + formatted_body: "this **is** an extreme \\*test\\* of" + }, + event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU", + origin_server_ts: 1688301929913, + room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe", + sender: "@cadence:cadence.moe", + type: "m.room.message", + unsigned: { + age: 405299 + } + }), + [{ + username: "cadence", + content: "this \\*\\*is\\*\\* an **_extreme_** \\\\\\*test\\\\\\* of", + avatar_url: undefined + }] + ) +}) + +test("event2message: html lines are bridged correctly", t => { + t.deepEqual( + eventToMessage({ + content: { + msgtype: "m.text", + body: "wrong body", + format: "org.matrix.custom.html", + formatted_body: "

paragraph one
line two
line three

paragraph two\nline two\nline three\n\nparagraph three

paragraph four\nline two
line three\nline four

paragraph five" + }, + event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU", + origin_server_ts: 1688301929913, + room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe", + sender: "@cadence:cadence.moe", + type: "m.room.message", + unsigned: { + age: 405299 + } + }), + [{ + username: "cadence", + content: "paragraph one\nline _two_\nline three\n\nparagraph two\nline _two_\nline three\n\nparagraph three\n\nparagraph four\nline two\nline three\nline four\n\nparagraph five", + avatar_url: undefined + }] + ) +}) + +/*test("event2message: whitespace is retained", t => { + t.deepEqual( + eventToMessage({ + content: { + msgtype: "m.text", + body: "wrong body", + format: "org.matrix.custom.html", + formatted_body: "line one: test test
line two: test test
line three: test test
line four: test test
line five" + }, + event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU", + origin_server_ts: 1688301929913, + room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe", + sender: "@cadence:cadence.moe", + type: "m.room.message", + unsigned: { + age: 405299 + } + }), + [{ + username: "cadence", + content: "line one: test test\nline two: **test** **test**\nline three: **test test**\nline four: test test\n line five", + avatar_url: undefined + }] + ) +})*/ + +test("event2message: whitespace is collapsed", t => { + sameFirstContentAndWhitespace( + t, + eventToMessage({ + content: { + msgtype: "m.text", + body: "wrong body", + format: "org.matrix.custom.html", + formatted_body: "line one: test test
line two: test test
line three: test test
line four: test test
line five" + }, + event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU", + origin_server_ts: 1688301929913, + room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe", + sender: "@cadence:cadence.moe", + type: "m.room.message", + unsigned: { + age: 405299 + } + }), + [{ + username: "cadence", + content: "line one: test test\nline two: **test** **test**\nline three: **test test**\nline four: test test\nline five", + avatar_url: undefined + }] + ) +}) + +test("event2message: lists are bridged correctly", t => { + sameFirstContentAndWhitespace( + t, + eventToMessage({ + "type": "m.room.message", + "sender": "@cadence:cadence.moe", + "content": { + "msgtype": "m.text", + "body": "* line one\n* line two\n* line three\n * nested one\n * nested two\n* line four", + "format": "org.matrix.custom.html", + "formatted_body": "\n" + }, + "origin_server_ts": 1692967314062, + "unsigned": { + "age": 112, + "transaction_id": "m1692967313951.441" + }, + "event_id": "$l-xQPY5vNJo3SNxU9d8aOWNVD1glMslMyrp4M_JEF70", + "room_id": "!BpMdOUkWWhFxmTrENV:cadence.moe" + }), + [{ + username: "cadence", + content: "* line one\n* line two\n* line three\n * nested one\n * nested two\n* line four", + avatar_url: undefined + }] + ) +}) + test("event2message: long messages are split", t => { t.deepEqual( eventToMessage({ @@ -55,3 +220,29 @@ test("event2message: long messages are split", t => { }] ) }) + +test("event2message: m.emote markdown syntax is escaped", t => { + t.deepEqual( + eventToMessage({ + content: { + msgtype: "m.emote", + body: "wrong body", + format: "org.matrix.custom.html", + formatted_body: "shows you **her** extreme \\*test\\* of" + }, + event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU", + origin_server_ts: 1688301929913, + room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe", + sender: "@cadence:cadence.moe", + type: "m.room.message", + unsigned: { + age: 405299 + } + }), + [{ + username: "cadence", + content: "\\* cadence shows you \\*\\*her\\*\\* **_extreme_** \\\\\\*test\\\\\\* of", + avatar_url: undefined + }] + ) +}) diff --git a/package-lock.json b/package-lock.json index 4d32bf5..dfd21ff 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "license": "MIT", "dependencies": { "better-sqlite3": "^8.3.0", + "chunk-text": "^2.0.1", "cloudstorm": "^0.8.0", "discord-markdown": "git+https://git.sr.ht/~cadence/nodejs-discord-markdown#440130ef343c8183a81c7c09809731484aa3a182", "heatsync": "^2.4.1", @@ -20,7 +21,8 @@ "node-fetch": "^2.6.7", "prettier-bytes": "^1.0.4", "snowtransfer": "^0.8.0", - "try-to-catch": "^3.0.1" + "try-to-catch": "^3.0.1", + "turndown": "^7.1.2" }, "devDependencies": { "@types/node": "^18.16.0", @@ -732,6 +734,18 @@ "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==" }, + "node_modules/chunk-text": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/chunk-text/-/chunk-text-2.0.1.tgz", + "integrity": "sha512-ER6TSpe2DT4wjOVOKJ3FFAYv7wE77HA/Ztz88Peiv3lq/2oVMsItYJJsVVI0xNZM8cdImOOTNqlw+LQz7gYdJg==", + "dependencies": { + "runes": "^0.4.3" + }, + "bin": { + "chunk": "bin/server.js", + "chunk-text": "bin/server.js" + } + }, "node_modules/ci-info": { "version": "3.8.0", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.8.0.tgz", @@ -1057,6 +1071,11 @@ "simple-markdown": "^0.7.2" } }, + "node_modules/domino": { + "version": "2.1.6", + "resolved": "https://registry.npmjs.org/domino/-/domino-2.1.6.tgz", + "integrity": "sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ==" + }, "node_modules/ee-first": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", @@ -2646,6 +2665,14 @@ "node": "*" } }, + "node_modules/runes": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/runes/-/runes-0.4.3.tgz", + "integrity": "sha512-K6p9y4ZyL9wPzA+PMDloNQPfoDGTiFYDvdlXznyGKgD10BJpcAosvATKrExRKOrNLgD8E7Um7WGW0lxsnOuNLg==", + "engines": { + "node": ">=4.0.0" + } + }, "node_modules/safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", @@ -3216,6 +3243,14 @@ "node": "*" } }, + "node_modules/turndown": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.1.2.tgz", + "integrity": "sha512-ntI9R7fcUKjqBP6QU8rBK2Ehyt8LAzt3UBT9JR9tgo6GtuKvyUzpayWmeMKJw1DPdXzktvtIT8m2mVXz+bL/Qg==", + "dependencies": { + "domino": "^2.1.6" + } + }, "node_modules/type-is": { "version": "1.6.18", "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", diff --git a/package.json b/package.json index 155bf2e..238b9ae 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "license": "MIT", "dependencies": { "better-sqlite3": "^8.3.0", + "chunk-text": "^2.0.1", "cloudstorm": "^0.8.0", "discord-markdown": "git+https://git.sr.ht/~cadence/nodejs-discord-markdown#440130ef343c8183a81c7c09809731484aa3a182", "heatsync": "^2.4.1", @@ -26,7 +27,8 @@ "node-fetch": "^2.6.7", "prettier-bytes": "^1.0.4", "snowtransfer": "^0.8.0", - "try-to-catch": "^3.0.1" + "try-to-catch": "^3.0.1", + "turndown": "^7.1.2" }, "devDependencies": { "@types/node": "^18.16.0", diff --git a/types.d.ts b/types.d.ts index e9b8a7a..badbbab 100644 --- a/types.d.ts +++ b/types.d.ts @@ -67,7 +67,7 @@ export namespace Event { } export type M_Room_Message = { - msgtype: "m.text" + msgtype: "m.text" | "m.emote" body: string format?: "org.matrix.custom.html" formatted_body?: string