From d679ca8d14f9770f9fdfbef3f1df00c3929ce438 Mon Sep 17 00:00:00 2001 From: ScreenTinker Date: Tue, 12 May 2026 11:55:55 -0500 Subject: [PATCH] fix(upload): re-decode multipart filename header from latin1 to utf8 in multer storage callback busboy reads the Content-Disposition filename="..." header value as latin1 by default - even with defParamCharset:'utf8' set, that option only applies to RFC 5987 encoded filename*=... params, which most clients (browsers, curl, programmatic HTTP) don't send. Modern clients send raw UTF-8 bytes for non-ASCII filenames; busboy interprets those bytes one-byte-per-char as latin1, producing a JS string like 'A-tilde + quarter-mark' for 'u-umlaut'. JS then re-encodes that string as UTF-8 on the way to SQLite, yielding 4 bytes (c3 83 c2 bc) for what should be 2 bytes (c3 bc). Classic double-encoding mojibake - shows up in the UI as 'BegrA-tilde...' instead of 'Begru-umlaut...'. Fix: in the multer filename callback, re-decode file.originalname from latin1 to utf8 to recover the original byte sequence. Mutating originalname here propagates to every route handler reading req.file.originalname (POST /, PUT /:id/replace, and any future upload route using the same middleware). This is the actual visible-mojibake bug semetra22 reported. The prior commit b677752 (NFC normalize in safeFilename) handles a separate but related case (macOS NFD clients sending decomposed forms); both fixes compose correctly - latin1->utf8 first restores the byte sequence, then NFC normalize collapses NFD into composed form. Smoke verified by sending raw UTF-8 multipart from a Node https client (no shell escaping). NFC input 'Begru-umlaut-essungsscreens.jpg' with bytes c3bc c39f arrives clean (was c383c2bc c383c29f before). NFD input 'u + combining diaeresis' arrives as composed NFC c3bc after both fixes. --- server/middleware/upload.js | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/server/middleware/upload.js b/server/middleware/upload.js index ed9f0c7..faa96a5 100644 --- a/server/middleware/upload.js +++ b/server/middleware/upload.js @@ -8,6 +8,21 @@ const storage = multer.diskStorage({ cb(null, config.contentDir); }, filename: (req, file, cb) => { + // busboy decodes the Content-Disposition filename header as latin1 by + // default. Modern clients send raw UTF-8 bytes for non-ASCII filenames + // (e.g. browsers + curl on UTF-8 locales send "Begrussungsscreens.jpg" + // with c3 bc for u-umlaut). Reading those bytes as latin1 produces the + // string "A-tilde + quarter-mark" which JS then re-encodes as 4 UTF-8 + // bytes on the way to the DB - classic double-encoding mojibake. + // + // The `defParamCharset: 'utf8'` option below only takes effect for + // RFC 5987 encoded `filename*=...` params, which most clients don't send. + // For the plain `filename="..."` case, re-decode here to recover the + // original UTF-8 byte sequence. Mutating originalname here propagates to + // every downstream consumer (route handlers reading req.file.originalname). + if (file.originalname) { + file.originalname = Buffer.from(file.originalname, 'latin1').toString('utf8'); + } const ext = path.extname(file.originalname); cb(null, `${uuidv4()}${ext}`); }