screentinker/server/middleware/upload.js
ScreenTinker d679ca8d14 fix(upload): re-decode multipart filename header from latin1 to utf8 in multer storage callback
busboy reads the Content-Disposition filename="..." header value as
latin1 by default - even with defParamCharset:'utf8' set, that option
only applies to RFC 5987 encoded filename*=... params, which most
clients (browsers, curl, programmatic HTTP) don't send. Modern clients
send raw UTF-8 bytes for non-ASCII filenames; busboy interprets those
bytes one-byte-per-char as latin1, producing a JS string like 'A-tilde
+ quarter-mark' for 'u-umlaut'. JS then re-encodes that string as UTF-8
on the way to SQLite, yielding 4 bytes (c3 83 c2 bc) for what should be
2 bytes (c3 bc). Classic double-encoding mojibake - shows up in the UI
as 'BegrA-tilde...' instead of 'Begru-umlaut...'.

Fix: in the multer filename callback, re-decode file.originalname from
latin1 to utf8 to recover the original byte sequence. Mutating
originalname here propagates to every route handler reading
req.file.originalname (POST /, PUT /:id/replace, and any future upload
route using the same middleware).

This is the actual visible-mojibake bug semetra22 reported. The prior
commit b677752 (NFC normalize in safeFilename) handles a separate but
related case (macOS NFD clients sending decomposed forms); both fixes
compose correctly - latin1->utf8 first restores the byte sequence,
then NFC normalize collapses NFD into composed form.

Smoke verified by sending raw UTF-8 multipart from a Node https client
(no shell escaping). NFC input 'Begru-umlaut-essungsscreens.jpg' with
bytes c3bc c39f arrives clean (was c383c2bc c383c29f before). NFD input
'u + combining diaeresis' arrives as composed NFC c3bc after both fixes.
2026-05-12 11:55:55 -05:00

55 lines
2.2 KiB
JavaScript

const multer = require('multer');
const path = require('path');
const { v4: uuidv4 } = require('uuid');
const config = require('../config');
const storage = multer.diskStorage({
destination: (req, file, cb) => {
cb(null, config.contentDir);
},
filename: (req, file, cb) => {
// busboy decodes the Content-Disposition filename header as latin1 by
// default. Modern clients send raw UTF-8 bytes for non-ASCII filenames
// (e.g. browsers + curl on UTF-8 locales send "Begrussungsscreens.jpg"
// with c3 bc for u-umlaut). Reading those bytes as latin1 produces the
// string "A-tilde + quarter-mark" which JS then re-encodes as 4 UTF-8
// bytes on the way to the DB - classic double-encoding mojibake.
//
// The `defParamCharset: 'utf8'` option below only takes effect for
// RFC 5987 encoded `filename*=...` params, which most clients don't send.
// For the plain `filename="..."` case, re-decode here to recover the
// original UTF-8 byte sequence. Mutating originalname here propagates to
// every downstream consumer (route handlers reading req.file.originalname).
if (file.originalname) {
file.originalname = Buffer.from(file.originalname, 'latin1').toString('utf8');
}
const ext = path.extname(file.originalname);
cb(null, `${uuidv4()}${ext}`);
}
});
const fileFilter = (req, file, cb) => {
const allowedTypes = [
'video/mp4', 'video/webm', 'video/avi', 'video/mkv', 'video/mov',
'video/x-msvideo', 'video/quicktime', 'video/x-matroska',
'image/jpeg', 'image/png', 'image/gif', 'image/webp', 'image/bmp'
];
if (allowedTypes.includes(file.mimetype) || file.mimetype.startsWith('video/') || file.mimetype.startsWith('image/')) {
cb(null, true);
} else {
cb(new Error('Only video and image files are allowed'), false);
}
};
// `defParamCharset: 'utf8'` makes busboy decode multipart filename headers as UTF-8.
// Default is latin1, which mangles umlauts and other non-ASCII characters
// (e.g. "Größe.jpg" arrives as "Größe.jpg" and gets stored that way).
const upload = multer({
storage,
fileFilter,
limits: { fileSize: config.maxFileSize },
defParamCharset: 'utf8'
});
module.exports = upload;