diff --git a/README.md b/README.md index 99a17780..8abaff31 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ try the **[read-only demo server](https://a.ocv.me/pub/demo/)** 👀 running fro * [periodic rescan](#periodic-rescan) - filesystem monitoring * [upload rules](#upload-rules) - set upload rules using volflags * [compress uploads](#compress-uploads) - files can be autocompressed on upload + * [other flags](#other-flags) * [database location](#database-location) - in-volume (`.hist/up2k.db`, default) or somewhere else * [metadata from audio files](#metadata-from-audio-files) - set `-e2t` to index tags on upload * [file parser plugins](#file-parser-plugins) - provide custom parsers to index additional tags, also see [./bin/mtag/README.md](./bin/mtag/README.md) @@ -264,6 +265,8 @@ some improvement ideas * [Chrome issue 1352210](https://bugs.chromium.org/p/chromium/issues/detail?id=1352210) -- plaintext http may be faster at filehashing than https (but also extremely CPU-intensive and likely to run into the above gc bugs) +* [Firefox issue 1790500](https://bugzilla.mozilla.org/show_bug.cgi?id=1790500) -- sometimes forgets to close filedescriptors during upload so the browser can crash after ~4000 files + * iPhones: the volume control doesn't work because [apple doesn't want it to](https://developer.apple.com/library/archive/documentation/AudioVideo/Conceptual/Using_HTML5_Audio_Video/Device-SpecificConsiderations/Device-SpecificConsiderations.html#//apple_ref/doc/uid/TP40009523-CH5-SW11) * *future workaround:* enable the equalizer, make it all-zero, and set a negative boost to reduce the volume * "future" because `AudioContext` is broken in the current iOS version (15.1), maybe one day... @@ -773,6 +776,11 @@ some examples, allows (but does not force) gz compression if client uploads to `/inc?pk` or `/inc?gz` or `/inc?gz=4` +## other flags + +* `:c,magic` enables filetype detection for nameless uploads, same as `--magic` + + ## database location in-volume (`.hist/up2k.db`, default) or somewhere else @@ -1191,9 +1199,9 @@ upload modifiers: | `Rand: 4` | `rand=4` | generate random filename with 4 characters | | `Life: 30` | `life=30` | delete file after 30 seconds | -`life` only has an effect if the volume has a lifetime, and the volume lifetime must be greater than the file's +* `life` only has an effect if the volume has a lifetime, and the volume lifetime must be greater than the file's -server behavior of `msg` can be reconfigured with `--urlform` +* server behavior of `msg` can be reconfigured with `--urlform` ## admin diff --git a/copyparty/__main__.py b/copyparty/__main__.py index cda4358e..2d939828 100755 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -476,6 +476,7 @@ def run_argparse(argv: list[str], formatter: Any, retry: bool) -> argparse.Names \033[0muploads, general: \033[36mnodupe\033[35m rejects existing files (instead of symlinking them) \033[36mnosub\033[35m forces all uploads into the top folder of the vfs + \033[36mmagic$\033[35m enables filetype detection for nameless uploads \033[36mgz\033[35m allows server-side gzip of uploads with ?gz (also c,xz) \033[36mpk\033[35m forces server-side compression, optional arg: xz,9 @@ -591,6 +592,7 @@ def run_argparse(argv: list[str], formatter: Any, retry: bool) -> argparse.Names ap2.add_argument("--hardlink", action="store_true", help="prefer hardlinks instead of symlinks when possible (within same filesystem)") ap2.add_argument("--never-symlink", action="store_true", help="do not fallback to symlinks when a hardlink cannot be made") ap2.add_argument("--no-dedup", action="store_true", help="disable symlink/hardlink creation; copy file contents instead") + ap2.add_argument("--magic", action="store_true", help="enable filetype detection on nameless uploads") ap2.add_argument("--df", metavar="GiB", type=float, default=0, help="ensure GiB free disk space by rejecting upload requests") ap2.add_argument("--sparse", metavar="MiB", type=int, default=4, help="windows-only: minimum size of incoming uploads through up2k before they are made into sparse files") ap2.add_argument("--turbo", metavar="LVL", type=int, default=0, help="configure turbo-mode in up2k client; 0 = off and warn if enabled, 1 = off, 2 = on, 3 = on and disable datecheck") diff --git a/copyparty/authsrv.py b/copyparty/authsrv.py index d10cc075..e612675a 100644 --- a/copyparty/authsrv.py +++ b/copyparty/authsrv.py @@ -1071,7 +1071,7 @@ class AuthSrv(object): if getattr(self.args, k): vol.flags[k] = True - for ga, vf in [["no_forget", "noforget"]]: + for ga, vf in [["no_forget", "noforget"], ["magic", "magic"]]: if getattr(self.args, ga): vol.flags[vf] = True diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index 8b98d90f..3517b2ef 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -784,7 +784,8 @@ class HttpCli(object): self.log("fallthrough? thats a bug", 1) suffix = "-{:.6f}-{}".format(time.time(), self.dip()) - if not fn: + nameless = not fn + if nameless: suffix += ".bin" fn = "put" + suffix @@ -815,6 +816,28 @@ class HttpCli(object): if self.args.nw: return post_sz, sha_hex, sha_b64, remains, path, "" + if nameless and "magic" in vfs.flags: + try: + ext = self.conn.hsrv.magician.ext(path) + except Exception as ex: + self.log("filetype detection failed for [{}]: {}".format(path, ex), 6) + ext = None + + if ext: + if rnd: + fn2 = self.rand_name(fdir, "a." + ext, rnd) + else: + fn2 = fn.rsplit(".", 1)[0] + "." + ext + + params["suffix"] = suffix[:-4] + with ren_open(fn, *open_a, **params) as zfw: + f, fn = zfw["orz"] + + path2 = os.path.join(fdir, fn2) + atomic_move(path, path2) + fn = fn2 + path = path2 + vfs, rem = vfs.get_dbv(rem) self.conn.hsrv.broker.say( "up2k.hash_file", diff --git a/copyparty/httpsrv.py b/copyparty/httpsrv.py index fe46eb6c..d58108f3 100644 --- a/copyparty/httpsrv.py +++ b/copyparty/httpsrv.py @@ -31,7 +31,15 @@ except ImportError: from .__init__ import MACOS, TYPE_CHECKING, EnvParams from .bos import bos from .httpconn import HttpConn -from .util import FHC, min_ex, shut_socket, spack, start_log_thrs, start_stackmon +from .util import ( + FHC, + Magician, + min_ex, + shut_socket, + spack, + start_log_thrs, + start_stackmon, +) if TYPE_CHECKING: from .broker_util import BrokerCli @@ -60,6 +68,7 @@ class HttpSrv(object): socket.setdefaulttimeout(120) nsuf = "-n{}-i{:x}".format(nid, os.getpid()) if nid else "" + self.magician = Magician() self.name = "hsrv" + nsuf self.mutex = threading.Lock() diff --git a/copyparty/util.py b/copyparty/util.py index 01b15475..beba8654 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -69,6 +69,7 @@ except: if TYPE_CHECKING: from .authsrv import VFS + import magic FAKE_MP = False @@ -154,22 +155,18 @@ IMPLICATIONS = [ MIMES = { - "md": "text/plain", - "txt": "text/plain", - "js": "text/javascript", "opus": "audio/ogg; codecs=opus", - "caf": "audio/x-caf", - "mp3": "audio/mpeg", - "m4a": "audio/mp4", - "jpg": "image/jpeg", } def _add_mimes() -> None: + # `mimetypes` is woefully unpopulated on windows + # but will be used as fallback on linux + for ln in """text css html csv -application json wasm xml pdf rtf zip -image webp jpeg png gif bmp -audio aac ogg wav +application json wasm xml pdf rtf zip jar fits wasm +image webp jpeg png gif bmp jxl jp2 jxs jxr tiff bpg heic heif avif +audio aac ogg wav flac ape amr video webm mp4 mpeg font woff woff2 otf ttf """.splitlines(): @@ -177,10 +174,35 @@ font woff woff2 otf ttf for v in vs.strip().split(): MIMES[v] = "{}/{}".format(k, v) + for ln in """text md=plain txt=plain js=javascript +application 7z=x-7z-compressed tar=x-tar bz2=x-bzip2 gz=gzip rar=x-rar-compressed zst=zstd xz=x-xz lz=lzip cpio=x-cpio +application exe=vnd.microsoft.portable-executable msi=x-ms-installer cab=vnd.ms-cab-compressed rpm=x-rpm crx=x-chrome-extension +application epub=epub+zip mobi=x-mobipocket-ebook lit=x-ms-reader rss=rss+xml atom=atom+xml torrent=x-bittorrent +application p7s=pkcs7-signature dcm=dicom shx=vnd.shx shp=vnd.shp dbf=x-dbf gml=gml+xml gpx=gpx+xml amf=x-amf +application swf=x-shockwave-flash m3u=vnd.apple.mpegurl db3=vnd.sqlite3 sqlite=vnd.sqlite3 +image jpg=jpeg xpm=x-xpixmap psd=vnd.adobe.photoshop jpf=jpx tif=tiff ico=x-icon djvu=vnd.djvu +image heic=heic-sequence heif=heif-sequence hdr=vnd.radiance svg=svg+xml +audio caf=x-caf mp3=mpeg m4a=mp4 mid=midi mpc=musepack aif=aiff au=basic qcp=qcelp +video mkv=x-matroska mov=quicktime avi=x-msvideo m4v=x-m4v ts=mp2t +video asf=x-ms-asf flv=x-flv 3gp=3gpp 3g2=3gpp2 rmvb=vnd.rn-realmedia-vbr +font ttc=collection +""".splitlines(): + k, ems = ln.split(" ", 1) + for em in ems.strip().split(): + ext, mime = em.split("=") + MIMES[ext] = "{}/{}".format(k, mime) + _add_mimes() +EXTS: dict[str, str] = {v: k for k, v in MIMES.items()} + +EXTS["vnd.mozilla.apng"] = "png" + +MAGIC_MAP = {"jpeg": "jpg"} + + REKOBO_KEY = { v: ln.split(" ", 1)[0] for ln in """ @@ -625,6 +647,50 @@ class HMaccas(object): return self.b(msg.encode("utf-8", "replace")) +class Magician(object): + def __init__(self) -> None: + self.bad_magic = False + self.mutex = threading.Lock() + self.magic: Optional["magic.Magic"] = None + + def ext(self, fpath: str) -> str: + import magic + + try: + if self.bad_magic: + raise Exception() + + if not self.magic: + try: + with self.mutex: + if not self.magic: + self.magic = magic.Magic(uncompress=False, extension=True) + except: + self.bad_magic = True + raise + + with self.mutex: + ret = self.magic.from_file(fpath) + except: + ret = "?" + + ret = ret.split("/")[0] + ret = MAGIC_MAP.get(ret, ret) + if "?" not in ret: + return ret + + mime = magic.from_file(fpath, mime=True) + mime = re.split("[; ]", mime, 1)[0] + ret = EXTS.get(mime) + + if not ret: + mg = mimetypes.guess_extension(mime) + if mg: + return mg[1:] + else: + raise Exception() + + if WINDOWS and sys.version_info < (3, 8): _popen = sp.Popen diff --git a/scripts/make-sfx.sh b/scripts/make-sfx.sh index 31ceb7c0..f1ed6884 100755 --- a/scripts/make-sfx.sh +++ b/scripts/make-sfx.sh @@ -170,6 +170,23 @@ tmpdir="$( wget -O$f "$url" || curl -L "$url" >$f) done + echo collecting python-magic + v=0.4.27 + f=python-magic-$v.tar.gz + [ -e "$f" ] || + (url=https://files.pythonhosted.org/packages/da/db/0b3e28ac047452d079d375ec6798bf76a036a08182dbb39ed38116a49130/python-magic-0.4.27.tar.gz; + wget -O$f "$url" || curl -L "$url" >$f) + + tar -zxf $f + mkdir magic + mv python-magic-*/magic . + rm -rf python-magic-* + rm magic/compat.py + f=magic/__init__.py + awk '/^def _add_compat/{o=1} !o; /^_add_compat/{o=0}' <$f >t + tmv "$f" + mv magic ftp/ # doesn't provide a version label anyways + # enable this to dynamically remove type hints at startup, # in case a future python version can use them for performance true || ( @@ -326,6 +343,7 @@ rm have f=j2/jinja2/constants.py awk '/^LOREM_IPSUM_WORDS/{o=1;print "LOREM_IPSUM_WORDS = u\"a\"";next} !o; /"""/{o=0}' <$f >t tmv "$f" +rm -f j2/jinja2/async* grep -rLE '^#[^a-z]*coding: utf-8' j2 | while IFS= read -r f; do