mirror of
https://github.com/screentinker/screentinker.git
synced 2026-05-15 07:32:23 -06:00
busboy reads the Content-Disposition filename="..." header value as
latin1 by default - even with defParamCharset:'utf8' set, that option
only applies to RFC 5987 encoded filename*=... params, which most
clients (browsers, curl, programmatic HTTP) don't send. Modern clients
send raw UTF-8 bytes for non-ASCII filenames; busboy interprets those
bytes one-byte-per-char as latin1, producing a JS string like 'A-tilde
+ quarter-mark' for 'u-umlaut'. JS then re-encodes that string as UTF-8
on the way to SQLite, yielding 4 bytes (c3 83 c2 bc) for what should be
2 bytes (c3 bc). Classic double-encoding mojibake - shows up in the UI
as 'BegrA-tilde...' instead of 'Begru-umlaut...'.
Fix: in the multer filename callback, re-decode file.originalname from
latin1 to utf8 to recover the original byte sequence. Mutating
originalname here propagates to every route handler reading
req.file.originalname (POST /, PUT /:id/replace, and any future upload
route using the same middleware).
This is the actual visible-mojibake bug semetra22 reported. The prior
commit b677752 (NFC normalize in safeFilename) handles a separate but
related case (macOS NFD clients sending decomposed forms); both fixes
compose correctly - latin1->utf8 first restores the byte sequence,
then NFC normalize collapses NFD into composed form.
Smoke verified by sending raw UTF-8 multipart from a Node https client
(no shell escaping). NFC input 'Begru-umlaut-essungsscreens.jpg' with
bytes c3bc c39f arrives clean (was c383c2bc c383c29f before). NFD input
'u + combining diaeresis' arrives as composed NFC c3bc after both fixes.
55 lines
2.2 KiB
JavaScript
55 lines
2.2 KiB
JavaScript
const multer = require('multer');
|
|
const path = require('path');
|
|
const { v4: uuidv4 } = require('uuid');
|
|
const config = require('../config');
|
|
|
|
const storage = multer.diskStorage({
|
|
destination: (req, file, cb) => {
|
|
cb(null, config.contentDir);
|
|
},
|
|
filename: (req, file, cb) => {
|
|
// busboy decodes the Content-Disposition filename header as latin1 by
|
|
// default. Modern clients send raw UTF-8 bytes for non-ASCII filenames
|
|
// (e.g. browsers + curl on UTF-8 locales send "Begrussungsscreens.jpg"
|
|
// with c3 bc for u-umlaut). Reading those bytes as latin1 produces the
|
|
// string "A-tilde + quarter-mark" which JS then re-encodes as 4 UTF-8
|
|
// bytes on the way to the DB - classic double-encoding mojibake.
|
|
//
|
|
// The `defParamCharset: 'utf8'` option below only takes effect for
|
|
// RFC 5987 encoded `filename*=...` params, which most clients don't send.
|
|
// For the plain `filename="..."` case, re-decode here to recover the
|
|
// original UTF-8 byte sequence. Mutating originalname here propagates to
|
|
// every downstream consumer (route handlers reading req.file.originalname).
|
|
if (file.originalname) {
|
|
file.originalname = Buffer.from(file.originalname, 'latin1').toString('utf8');
|
|
}
|
|
const ext = path.extname(file.originalname);
|
|
cb(null, `${uuidv4()}${ext}`);
|
|
}
|
|
});
|
|
|
|
const fileFilter = (req, file, cb) => {
|
|
const allowedTypes = [
|
|
'video/mp4', 'video/webm', 'video/avi', 'video/mkv', 'video/mov',
|
|
'video/x-msvideo', 'video/quicktime', 'video/x-matroska',
|
|
'image/jpeg', 'image/png', 'image/gif', 'image/webp', 'image/bmp'
|
|
];
|
|
if (allowedTypes.includes(file.mimetype) || file.mimetype.startsWith('video/') || file.mimetype.startsWith('image/')) {
|
|
cb(null, true);
|
|
} else {
|
|
cb(new Error('Only video and image files are allowed'), false);
|
|
}
|
|
};
|
|
|
|
// `defParamCharset: 'utf8'` makes busboy decode multipart filename headers as UTF-8.
|
|
// Default is latin1, which mangles umlauts and other non-ASCII characters
|
|
// (e.g. "Größe.jpg" arrives as "Größe.jpg" and gets stored that way).
|
|
const upload = multer({
|
|
storage,
|
|
fileFilter,
|
|
limits: { fileSize: config.maxFileSize },
|
|
defParamCharset: 'utf8'
|
|
});
|
|
|
|
module.exports = upload;
|