From 9dd5dec0936e98b2dffa1771ce6ecb1f33c492a2 Mon Sep 17 00:00:00 2001 From: ed Date: Thu, 19 Jun 2025 17:25:31 +0000 Subject: [PATCH] adjustments after #171; * move the new functionality to --rmagic * performance tweaks --- README.md | 1 + copyparty/__main__.py | 3 ++- copyparty/cfg.py | 2 ++ copyparty/httpcli.py | 27 +++++++++++++++++++-------- copyparty/util.py | 34 ++++++++++++++++------------------ tests/util.py | 2 +- 6 files changed, 41 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 9ed4f294..e07ad4b3 100644 --- a/README.md +++ b/README.md @@ -2755,6 +2755,7 @@ set any of the following environment variables to disable its associated optiona | `PRTY_NO_CFSSL` | never attempt to generate self-signed certificates using [cfssl](https://github.com/cloudflare/cfssl) | | `PRTY_NO_FFMPEG` | **audio transcoding** goes byebye, **thumbnailing** must be handled by Pillow/libvips | | `PRTY_NO_FFPROBE` | **audio transcoding** goes byebye, **thumbnailing** must be handled by Pillow/libvips, **metadata-scanning** must be handled by mutagen | +| `PRTY_NO_MAGIC` | do not use [magic](https://pypi.org/project/python-magic/) for filetype detection | | `PRTY_NO_MUTAGEN` | do not use [mutagen](https://pypi.org/project/mutagen/) for reading metadata from media files; will fallback to ffprobe | | `PRTY_NO_PIL` | disable all [Pillow](https://pypi.org/project/pillow/)-based thumbnail support; will fallback to libvips or ffmpeg | | `PRTY_NO_PILF` | disable Pillow `ImageFont` text rendering, used for folder thumbnails | diff --git a/copyparty/__main__.py b/copyparty/__main__.py index 4ead56e8..4b050fa7 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -964,6 +964,7 @@ def add_general(ap, nc, srvname): ap2.add_argument("--name", metavar="TXT", type=u, default=srvname, help="server name (displayed topleft in browser and in mDNS)") ap2.add_argument("--mime", metavar="EXT=MIME", type=u, action="append", help="map file \033[33mEXT\033[0mension to \033[33mMIME\033[0mtype, for example [\033[32mjpg=image/jpeg\033[0m]") ap2.add_argument("--mimes", action="store_true", help="list default mimetype mapping and exit") + ap2.add_argument("--rmagic", action="store_true", help="do expensive analysis to improve accuracy of returned mimetypes; will make file-downloads, rss, and webdav slower (volflag=rmagic)") ap2.add_argument("--license", action="store_true", help="show licenses and exit") ap2.add_argument("--version", action="store_true", help="show versions and exit") @@ -1024,7 +1025,7 @@ def add_upload(ap): ap2.add_argument("--u2ts", metavar="TXT", type=u, default="c", help="how to timestamp uploaded files; [\033[32mc\033[0m]=client-last-modified, [\033[32mu\033[0m]=upload-time, [\033[32mfc\033[0m]=force-c, [\033[32mfu\033[0m]=force-u (volflag=u2ts)") ap2.add_argument("--rand", action="store_true", help="force randomized filenames, \033[33m--nrand\033[0m chars long (volflag=rand)") ap2.add_argument("--nrand", metavar="NUM", type=int, default=9, help="randomized filenames length (volflag=nrand)") - ap2.add_argument("--magic", action="store_true", help="enable filetype detection on extensionless files (volflag=magic)") + ap2.add_argument("--magic", action="store_true", help="enable filetype detection on nameless uploads (volflag=magic)") ap2.add_argument("--df", metavar="GiB", type=u, default="0", help="ensure \033[33mGiB\033[0m free disk space by rejecting upload requests; assumes gigabytes unless a unit suffix is given: [\033[32m256m\033[0m], [\033[32m4\033[0m], [\033[32m2T\033[0m] (volflag=df)") ap2.add_argument("--sparse", metavar="MiB", type=int, default=4, help="windows-only: minimum size of incoming uploads through up2k before they are made into sparse files") ap2.add_argument("--turbo", metavar="LVL", type=int, default=0, help="configure turbo-mode in up2k client; [\033[32m-1\033[0m] = forbidden/always-off, [\033[32m0\033[0m] = default-off and warn if enabled, [\033[32m1\033[0m] = default-off, [\033[32m2\033[0m] = on, [\033[32m3\033[0m] = on and disable datecheck") diff --git a/copyparty/cfg.py b/copyparty/cfg.py index 81e46792..c8fd0cde 100644 --- a/copyparty/cfg.py +++ b/copyparty/cfg.py @@ -52,6 +52,7 @@ def vf_bmap() -> dict[str, str]: "og_no_head", "og_s_title", "rand", + "rmagic", "rss", "wo_up_readme", "xdev", @@ -322,6 +323,7 @@ flagcats = { "dks": "per-directory accesskeys allow browsing into subdirs", "dky": 'allow seeing files (not folders) inside a specific folder\nwith "g" perm, and does not require a valid dirkey to do so', "rss": "allow '?rss' URL suffix (experimental)", + "rmagic": "expensive analysis for mimetype accuracy", "ups_who=2": "restrict viewing the list of recent uploads", "zip_who=2": "restrict access to download-as-zip/tar", "zipmaxn=9k": "reject download-as-zip if more than 9000 files", diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index e68a3327..f7f8157d 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -1412,8 +1412,12 @@ class HttpCli(object): except: pass + ap = "" + use_magic = "rmagic" in self.vn.flags + for i in hits: - f = fsenc(os.path.join(self.vn.realpath, i["rp"])) if "magic" in self.vn.flags else None + if use_magic: + ap = os.path.join(self.vn.realpath, i["rp"]) iurl = html_escape("%s%s" % (baseurl, i["rp"]), True, True) title = unquotep(i["rp"].split("?")[0].split("/")[-1]) @@ -1422,7 +1426,7 @@ class HttpCli(object): tag_a = str(i["tags"].get("artist") or "") desc = "%s - %s" % (tag_a, tag_t) if tag_t and tag_a else (tag_t or tag_a) desc = html_escape(desc, True, True) if desc else title - mime = html_escape(guess_mime(title, f)) + mime = html_escape(guess_mime(title, ap)) lmod = formatdate(max(0, i["ts"])) zsa = (iurl, iurl, title, desc, lmod, iurl, mime, i["sz"]) zs = ( @@ -1575,6 +1579,9 @@ class HttpCli(object): None, 207, "text/xml; charset=" + enc, {"Transfer-Encoding": "chunked"} ) + ap = "" + use_magic = "rmagic" in vn.flags + ret = '\n' ret = ret.format(uenc) for x in fgen: @@ -1601,9 +1608,9 @@ class HttpCli(object): "supportedlock": '', } if not isdir: - f = fsenc(os.path.join(tap, x["vp"])) if "magic" in self.vn.flags else None - - pvs["getcontenttype"] = html_escape(guess_mime(rp), f) + if use_magic: + ap = os.path.join(tap, x["vp"]) + pvs["getcontenttype"] = html_escape(guess_mime(rp, ap)) pvs["getcontentlength"] = str(st.st_size) for k, v in pvs.items(): @@ -4160,8 +4167,8 @@ class HttpCli(object): mime = "text/plain; charset={}".format(self.uparam["txt"] or "utf-8") elif "mime" in self.uparam: mime = str(self.uparam.get("mime")) - elif "magic" in self.vn.flags: - mime = guess_mime(req_path, fsenc(fs_path)) + elif "rmagic" in self.vn.flags: + mime = guess_mime(req_path, fs_path) else: mime = guess_mime(req_path) @@ -4314,7 +4321,11 @@ class HttpCli(object): if t_fd < now - sec_fd: try: st2 = os.stat(open_args[0]) - if st2.st_ino != st.st_ino or st2.st_size < sent or st2.st_size < st.st_size: + if ( + st2.st_ino != st.st_ino + or st2.st_size < sent + or st2.st_size < st.st_size + ): assert f # !rm # open new file before closing previous to avoid toctous (open may fail; cannot null f before) f2 = open(*open_args) diff --git a/copyparty/util.py b/copyparty/util.py index f0c4433d..6d9c0210 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -153,6 +153,14 @@ try: except: HAVE_PSUTIL = False +try: + if os.environ.get("PRTY_NO_MAGIC"): + raise Exception() + + import magic +except: + pass + if True: # pylint: disable=using-constant-test import types from collections.abc import Callable, Iterable @@ -175,8 +183,6 @@ if True: # pylint: disable=using-constant-test if TYPE_CHECKING: - import magic - from .authsrv import VFS from .broker_util import BrokerCli from .up2k import Up2k @@ -1256,8 +1262,6 @@ class Magician(object): self.magic: Optional["magic.Magic"] = None def ext(self, fpath: str) -> str: - import magic - try: if self.bad_magic: raise Exception() @@ -3152,11 +3156,13 @@ def unescape_cookie(orig: str) -> str: return "".join(ret) -def guess_mime_ext(url: str) -> str: +def guess_mime( + url: str, path: str = "", fallback: str = "application/octet-stream" +) -> str: try: ext = url.rsplit(".", 1)[1].lower() except: - return None + ext = "" ret = MIMES.get(ext) @@ -3164,22 +3170,14 @@ def guess_mime_ext(url: str) -> str: x = mimetypes.guess_type(url) ret = "application/{}".format(x[1]) if x[1] else x[0] - return ret - - -def guess_mime(url: str, path: str = None, fallback: str = "application/octet-stream") -> str: - ret = guess_mime_ext(url) - if not ret and path: - import magic - try: - with open(path, 'rb', 0) as f: - ret = magic.from_buffer(f.read(4096), mime = True) - if ret == "text/html": + with open(fsenc(path), "rb", 0) as f: + ret = magic.from_buffer(f.read(4096), mime=True) + if ret.startswith("text/htm"): # avoid serving up HTML content unless there was actually a .html extension ret = "text/plain" - except: + except Exception as ex: pass if not ret: diff --git a/tests/util.py b/tests/util.py index 4d1236d8..2492b91e 100644 --- a/tests/util.py +++ b/tests/util.py @@ -143,7 +143,7 @@ class Cfg(Namespace): def __init__(self, a=None, v=None, c=None, **ka0): ka = {} - ex = "chpw daw dav_auth dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink ih ihead magic hardlink_only nid nih no_acode no_athumb no_bauth no_clone no_cp no_dav no_db_ip no_del no_dirsz no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tail no_tarcmp no_thumb no_vthumb no_zip nrand nsort nw og og_no_head og_s_title ohead q rand re_dirsz rss smb srch_dbg srch_excl stats uqe vague_403 vc ver wo_up_readme write_uplog xdev xlink xvol zipmaxu zs" + ex = "chpw daw dav_auth dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink hardlink_only ih ihead magic nid nih no_acode no_athumb no_bauth no_clone no_cp no_dav no_db_ip no_del no_dirsz no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tail no_tarcmp no_thumb no_vthumb no_zip nrand nsort nw og og_no_head og_s_title ohead q rand re_dirsz rmagic rss smb srch_dbg srch_excl stats uqe vague_403 vc ver wo_up_readme write_uplog xdev xlink xvol zipmaxu zs" ka.update(**{k: False for k in ex.split()}) ex = "dav_inf dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash see_dots plain_ip"