Detect content-type when extension is missing or unknown

If a file has no known extension the content type gets set to
application/octet-stream causing the browser try and download the file
when viewed directly.

This quickly becomes annoying as many of the files I interact with often
have no extension. I.e., config files, log files, LICENSE files and
other random text files.

This patch uses libmagic to detect the file type and set the
content-type header. It also does this for the RSS feed and webdav for
sake of completeness.

This patch does not touch the front end at all so these files still have a 'txt'
button and a type of '%' in the web UI. But when clicked on, the browser
will display the files correctly.

This feature is enabled with the existing "magic" option. I thought this
fit as the existing functionality also uses libmagic and gives file
extensions to files on upload. Tell me if it should be its own option
instead.

The code base was very confusing, this patch works but I have no idea if
it's the way you'd like this implemented. Hopefully its acceptable as
is.
This commit is contained in:
morganamilo 2025-06-16 17:25:22 +01:00 committed by ed
parent a1c7a095ee
commit ec05f8ccd5
3 changed files with 29 additions and 5 deletions

View file

@ -1024,7 +1024,7 @@ def add_upload(ap):
ap2.add_argument("--u2ts", metavar="TXT", type=u, default="c", help="how to timestamp uploaded files; [\033[32mc\033[0m]=client-last-modified, [\033[32mu\033[0m]=upload-time, [\033[32mfc\033[0m]=force-c, [\033[32mfu\033[0m]=force-u (volflag=u2ts)") ap2.add_argument("--u2ts", metavar="TXT", type=u, default="c", help="how to timestamp uploaded files; [\033[32mc\033[0m]=client-last-modified, [\033[32mu\033[0m]=upload-time, [\033[32mfc\033[0m]=force-c, [\033[32mfu\033[0m]=force-u (volflag=u2ts)")
ap2.add_argument("--rand", action="store_true", help="force randomized filenames, \033[33m--nrand\033[0m chars long (volflag=rand)") ap2.add_argument("--rand", action="store_true", help="force randomized filenames, \033[33m--nrand\033[0m chars long (volflag=rand)")
ap2.add_argument("--nrand", metavar="NUM", type=int, default=9, help="randomized filenames length (volflag=nrand)") ap2.add_argument("--nrand", metavar="NUM", type=int, default=9, help="randomized filenames length (volflag=nrand)")
ap2.add_argument("--magic", action="store_true", help="enable filetype detection on nameless uploads (volflag=magic)") ap2.add_argument("--magic", action="store_true", help="enable filetype detection on extensionless files (volflag=magic)")
ap2.add_argument("--df", metavar="GiB", type=u, default="0", help="ensure \033[33mGiB\033[0m free disk space by rejecting upload requests; assumes gigabytes unless a unit suffix is given: [\033[32m256m\033[0m], [\033[32m4\033[0m], [\033[32m2T\033[0m] (volflag=df)") ap2.add_argument("--df", metavar="GiB", type=u, default="0", help="ensure \033[33mGiB\033[0m free disk space by rejecting upload requests; assumes gigabytes unless a unit suffix is given: [\033[32m256m\033[0m], [\033[32m4\033[0m], [\033[32m2T\033[0m] (volflag=df)")
ap2.add_argument("--sparse", metavar="MiB", type=int, default=4, help="windows-only: minimum size of incoming uploads through up2k before they are made into sparse files") ap2.add_argument("--sparse", metavar="MiB", type=int, default=4, help="windows-only: minimum size of incoming uploads through up2k before they are made into sparse files")
ap2.add_argument("--turbo", metavar="LVL", type=int, default=0, help="configure turbo-mode in up2k client; [\033[32m-1\033[0m] = forbidden/always-off, [\033[32m0\033[0m] = default-off and warn if enabled, [\033[32m1\033[0m] = default-off, [\033[32m2\033[0m] = on, [\033[32m3\033[0m] = on and disable datecheck") ap2.add_argument("--turbo", metavar="LVL", type=int, default=0, help="configure turbo-mode in up2k client; [\033[32m-1\033[0m] = forbidden/always-off, [\033[32m0\033[0m] = default-off and warn if enabled, [\033[32m1\033[0m] = default-off, [\033[32m2\033[0m] = on, [\033[32m3\033[0m] = on and disable datecheck")

View file

@ -1413,6 +1413,8 @@ class HttpCli(object):
pass pass
for i in hits: for i in hits:
f = fsenc(os.path.join(self.vn.realpath, i["rp"])) if "magic" in self.vn.flags else None
iurl = html_escape("%s%s" % (baseurl, i["rp"]), True, True) iurl = html_escape("%s%s" % (baseurl, i["rp"]), True, True)
title = unquotep(i["rp"].split("?")[0].split("/")[-1]) title = unquotep(i["rp"].split("?")[0].split("/")[-1])
title = html_escape(title, True, True) title = html_escape(title, True, True)
@ -1420,7 +1422,7 @@ class HttpCli(object):
tag_a = str(i["tags"].get("artist") or "") tag_a = str(i["tags"].get("artist") or "")
desc = "%s - %s" % (tag_a, tag_t) if tag_t and tag_a else (tag_t or tag_a) desc = "%s - %s" % (tag_a, tag_t) if tag_t and tag_a else (tag_t or tag_a)
desc = html_escape(desc, True, True) if desc else title desc = html_escape(desc, True, True) if desc else title
mime = html_escape(guess_mime(title)) mime = html_escape(guess_mime(title, f))
lmod = formatdate(max(0, i["ts"])) lmod = formatdate(max(0, i["ts"]))
zsa = (iurl, iurl, title, desc, lmod, iurl, mime, i["sz"]) zsa = (iurl, iurl, title, desc, lmod, iurl, mime, i["sz"])
zs = ( zs = (
@ -1599,7 +1601,9 @@ class HttpCli(object):
"supportedlock": '<D:lockentry xmlns:D="DAV:"><D:lockscope><D:exclusive/></D:lockscope><D:locktype><D:write/></D:locktype></D:lockentry>', "supportedlock": '<D:lockentry xmlns:D="DAV:"><D:lockscope><D:exclusive/></D:lockscope><D:locktype><D:write/></D:locktype></D:lockentry>',
} }
if not isdir: if not isdir:
pvs["getcontenttype"] = html_escape(guess_mime(rp)) f = fsenc(os.path.join(tap, x["vp"])) if "magic" in self.vn.flags else None
pvs["getcontenttype"] = html_escape(guess_mime(rp), f)
pvs["getcontentlength"] = str(st.st_size) pvs["getcontentlength"] = str(st.st_size)
for k, v in pvs.items(): for k, v in pvs.items():
@ -4156,6 +4160,8 @@ class HttpCli(object):
mime = "text/plain; charset={}".format(self.uparam["txt"] or "utf-8") mime = "text/plain; charset={}".format(self.uparam["txt"] or "utf-8")
elif "mime" in self.uparam: elif "mime" in self.uparam:
mime = str(self.uparam.get("mime")) mime = str(self.uparam.get("mime"))
elif "magic" in self.vn.flags:
mime = guess_mime(req_path, fsenc(fs_path))
else: else:
mime = guess_mime(req_path) mime = guess_mime(req_path)

View file

@ -3152,11 +3152,11 @@ def unescape_cookie(orig: str) -> str:
return "".join(ret) return "".join(ret)
def guess_mime(url: str, fallback: str = "application/octet-stream") -> str: def guess_mime_ext(url: str) -> str:
try: try:
ext = url.rsplit(".", 1)[1].lower() ext = url.rsplit(".", 1)[1].lower()
except: except:
return fallback return None
ret = MIMES.get(ext) ret = MIMES.get(ext)
@ -3164,6 +3164,24 @@ def guess_mime(url: str, fallback: str = "application/octet-stream") -> str:
x = mimetypes.guess_type(url) x = mimetypes.guess_type(url)
ret = "application/{}".format(x[1]) if x[1] else x[0] ret = "application/{}".format(x[1]) if x[1] else x[0]
return ret
def guess_mime(url: str, path: str = None, fallback: str = "application/octet-stream") -> str:
ret = guess_mime_ext(url)
if not ret and path:
import magic
try:
with open(path, 'rb', 0) as f:
ret = magic.from_buffer(f.read(4096), mime = True)
if ret == "text/html":
# avoid serving up HTML content unless there was actually a .html extension
ret = "text/plain"
except:
pass
if not ret: if not ret:
ret = fallback ret = fallback