mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
add filetype detection for nameless uploads
This commit is contained in:
parent
df64a62a03
commit
9401b5ae13
12
README.md
12
README.md
|
@ -62,6 +62,7 @@ try the **[read-only demo server](https://a.ocv.me/pub/demo/)** 👀 running fro
|
||||||
* [periodic rescan](#periodic-rescan) - filesystem monitoring
|
* [periodic rescan](#periodic-rescan) - filesystem monitoring
|
||||||
* [upload rules](#upload-rules) - set upload rules using volflags
|
* [upload rules](#upload-rules) - set upload rules using volflags
|
||||||
* [compress uploads](#compress-uploads) - files can be autocompressed on upload
|
* [compress uploads](#compress-uploads) - files can be autocompressed on upload
|
||||||
|
* [other flags](#other-flags)
|
||||||
* [database location](#database-location) - in-volume (`.hist/up2k.db`, default) or somewhere else
|
* [database location](#database-location) - in-volume (`.hist/up2k.db`, default) or somewhere else
|
||||||
* [metadata from audio files](#metadata-from-audio-files) - set `-e2t` to index tags on upload
|
* [metadata from audio files](#metadata-from-audio-files) - set `-e2t` to index tags on upload
|
||||||
* [file parser plugins](#file-parser-plugins) - provide custom parsers to index additional tags, also see [./bin/mtag/README.md](./bin/mtag/README.md)
|
* [file parser plugins](#file-parser-plugins) - provide custom parsers to index additional tags, also see [./bin/mtag/README.md](./bin/mtag/README.md)
|
||||||
|
@ -264,6 +265,8 @@ some improvement ideas
|
||||||
|
|
||||||
* [Chrome issue 1352210](https://bugs.chromium.org/p/chromium/issues/detail?id=1352210) -- plaintext http may be faster at filehashing than https (but also extremely CPU-intensive and likely to run into the above gc bugs)
|
* [Chrome issue 1352210](https://bugs.chromium.org/p/chromium/issues/detail?id=1352210) -- plaintext http may be faster at filehashing than https (but also extremely CPU-intensive and likely to run into the above gc bugs)
|
||||||
|
|
||||||
|
* [Firefox issue 1790500](https://bugzilla.mozilla.org/show_bug.cgi?id=1790500) -- sometimes forgets to close filedescriptors during upload so the browser can crash after ~4000 files
|
||||||
|
|
||||||
* iPhones: the volume control doesn't work because [apple doesn't want it to](https://developer.apple.com/library/archive/documentation/AudioVideo/Conceptual/Using_HTML5_Audio_Video/Device-SpecificConsiderations/Device-SpecificConsiderations.html#//apple_ref/doc/uid/TP40009523-CH5-SW11)
|
* iPhones: the volume control doesn't work because [apple doesn't want it to](https://developer.apple.com/library/archive/documentation/AudioVideo/Conceptual/Using_HTML5_Audio_Video/Device-SpecificConsiderations/Device-SpecificConsiderations.html#//apple_ref/doc/uid/TP40009523-CH5-SW11)
|
||||||
* *future workaround:* enable the equalizer, make it all-zero, and set a negative boost to reduce the volume
|
* *future workaround:* enable the equalizer, make it all-zero, and set a negative boost to reduce the volume
|
||||||
* "future" because `AudioContext` is broken in the current iOS version (15.1), maybe one day...
|
* "future" because `AudioContext` is broken in the current iOS version (15.1), maybe one day...
|
||||||
|
@ -773,6 +776,11 @@ some examples,
|
||||||
allows (but does not force) gz compression if client uploads to `/inc?pk` or `/inc?gz` or `/inc?gz=4`
|
allows (but does not force) gz compression if client uploads to `/inc?pk` or `/inc?gz` or `/inc?gz=4`
|
||||||
|
|
||||||
|
|
||||||
|
## other flags
|
||||||
|
|
||||||
|
* `:c,magic` enables filetype detection for nameless uploads, same as `--magic`
|
||||||
|
|
||||||
|
|
||||||
## database location
|
## database location
|
||||||
|
|
||||||
in-volume (`.hist/up2k.db`, default) or somewhere else
|
in-volume (`.hist/up2k.db`, default) or somewhere else
|
||||||
|
@ -1191,9 +1199,9 @@ upload modifiers:
|
||||||
| `Rand: 4` | `rand=4` | generate random filename with 4 characters |
|
| `Rand: 4` | `rand=4` | generate random filename with 4 characters |
|
||||||
| `Life: 30` | `life=30` | delete file after 30 seconds |
|
| `Life: 30` | `life=30` | delete file after 30 seconds |
|
||||||
|
|
||||||
`life` only has an effect if the volume has a lifetime, and the volume lifetime must be greater than the file's
|
* `life` only has an effect if the volume has a lifetime, and the volume lifetime must be greater than the file's
|
||||||
|
|
||||||
server behavior of `msg` can be reconfigured with `--urlform`
|
* server behavior of `msg` can be reconfigured with `--urlform`
|
||||||
|
|
||||||
## admin
|
## admin
|
||||||
|
|
||||||
|
|
|
@ -476,6 +476,7 @@ def run_argparse(argv: list[str], formatter: Any, retry: bool) -> argparse.Names
|
||||||
\033[0muploads, general:
|
\033[0muploads, general:
|
||||||
\033[36mnodupe\033[35m rejects existing files (instead of symlinking them)
|
\033[36mnodupe\033[35m rejects existing files (instead of symlinking them)
|
||||||
\033[36mnosub\033[35m forces all uploads into the top folder of the vfs
|
\033[36mnosub\033[35m forces all uploads into the top folder of the vfs
|
||||||
|
\033[36mmagic$\033[35m enables filetype detection for nameless uploads
|
||||||
\033[36mgz\033[35m allows server-side gzip of uploads with ?gz (also c,xz)
|
\033[36mgz\033[35m allows server-side gzip of uploads with ?gz (also c,xz)
|
||||||
\033[36mpk\033[35m forces server-side compression, optional arg: xz,9
|
\033[36mpk\033[35m forces server-side compression, optional arg: xz,9
|
||||||
|
|
||||||
|
@ -591,6 +592,7 @@ def run_argparse(argv: list[str], formatter: Any, retry: bool) -> argparse.Names
|
||||||
ap2.add_argument("--hardlink", action="store_true", help="prefer hardlinks instead of symlinks when possible (within same filesystem)")
|
ap2.add_argument("--hardlink", action="store_true", help="prefer hardlinks instead of symlinks when possible (within same filesystem)")
|
||||||
ap2.add_argument("--never-symlink", action="store_true", help="do not fallback to symlinks when a hardlink cannot be made")
|
ap2.add_argument("--never-symlink", action="store_true", help="do not fallback to symlinks when a hardlink cannot be made")
|
||||||
ap2.add_argument("--no-dedup", action="store_true", help="disable symlink/hardlink creation; copy file contents instead")
|
ap2.add_argument("--no-dedup", action="store_true", help="disable symlink/hardlink creation; copy file contents instead")
|
||||||
|
ap2.add_argument("--magic", action="store_true", help="enable filetype detection on nameless uploads")
|
||||||
ap2.add_argument("--df", metavar="GiB", type=float, default=0, help="ensure GiB free disk space by rejecting upload requests")
|
ap2.add_argument("--df", metavar="GiB", type=float, default=0, help="ensure GiB free disk space by rejecting upload requests")
|
||||||
ap2.add_argument("--sparse", metavar="MiB", type=int, default=4, help="windows-only: minimum size of incoming uploads through up2k before they are made into sparse files")
|
ap2.add_argument("--sparse", metavar="MiB", type=int, default=4, help="windows-only: minimum size of incoming uploads through up2k before they are made into sparse files")
|
||||||
ap2.add_argument("--turbo", metavar="LVL", type=int, default=0, help="configure turbo-mode in up2k client; 0 = off and warn if enabled, 1 = off, 2 = on, 3 = on and disable datecheck")
|
ap2.add_argument("--turbo", metavar="LVL", type=int, default=0, help="configure turbo-mode in up2k client; 0 = off and warn if enabled, 1 = off, 2 = on, 3 = on and disable datecheck")
|
||||||
|
|
|
@ -1071,7 +1071,7 @@ class AuthSrv(object):
|
||||||
if getattr(self.args, k):
|
if getattr(self.args, k):
|
||||||
vol.flags[k] = True
|
vol.flags[k] = True
|
||||||
|
|
||||||
for ga, vf in [["no_forget", "noforget"]]:
|
for ga, vf in [["no_forget", "noforget"], ["magic", "magic"]]:
|
||||||
if getattr(self.args, ga):
|
if getattr(self.args, ga):
|
||||||
vol.flags[vf] = True
|
vol.flags[vf] = True
|
||||||
|
|
||||||
|
|
|
@ -784,7 +784,8 @@ class HttpCli(object):
|
||||||
self.log("fallthrough? thats a bug", 1)
|
self.log("fallthrough? thats a bug", 1)
|
||||||
|
|
||||||
suffix = "-{:.6f}-{}".format(time.time(), self.dip())
|
suffix = "-{:.6f}-{}".format(time.time(), self.dip())
|
||||||
if not fn:
|
nameless = not fn
|
||||||
|
if nameless:
|
||||||
suffix += ".bin"
|
suffix += ".bin"
|
||||||
fn = "put" + suffix
|
fn = "put" + suffix
|
||||||
|
|
||||||
|
@ -815,6 +816,28 @@ class HttpCli(object):
|
||||||
if self.args.nw:
|
if self.args.nw:
|
||||||
return post_sz, sha_hex, sha_b64, remains, path, ""
|
return post_sz, sha_hex, sha_b64, remains, path, ""
|
||||||
|
|
||||||
|
if nameless and "magic" in vfs.flags:
|
||||||
|
try:
|
||||||
|
ext = self.conn.hsrv.magician.ext(path)
|
||||||
|
except Exception as ex:
|
||||||
|
self.log("filetype detection failed for [{}]: {}".format(path, ex), 6)
|
||||||
|
ext = None
|
||||||
|
|
||||||
|
if ext:
|
||||||
|
if rnd:
|
||||||
|
fn2 = self.rand_name(fdir, "a." + ext, rnd)
|
||||||
|
else:
|
||||||
|
fn2 = fn.rsplit(".", 1)[0] + "." + ext
|
||||||
|
|
||||||
|
params["suffix"] = suffix[:-4]
|
||||||
|
with ren_open(fn, *open_a, **params) as zfw:
|
||||||
|
f, fn = zfw["orz"]
|
||||||
|
|
||||||
|
path2 = os.path.join(fdir, fn2)
|
||||||
|
atomic_move(path, path2)
|
||||||
|
fn = fn2
|
||||||
|
path = path2
|
||||||
|
|
||||||
vfs, rem = vfs.get_dbv(rem)
|
vfs, rem = vfs.get_dbv(rem)
|
||||||
self.conn.hsrv.broker.say(
|
self.conn.hsrv.broker.say(
|
||||||
"up2k.hash_file",
|
"up2k.hash_file",
|
||||||
|
|
|
@ -31,7 +31,15 @@ except ImportError:
|
||||||
from .__init__ import MACOS, TYPE_CHECKING, EnvParams
|
from .__init__ import MACOS, TYPE_CHECKING, EnvParams
|
||||||
from .bos import bos
|
from .bos import bos
|
||||||
from .httpconn import HttpConn
|
from .httpconn import HttpConn
|
||||||
from .util import FHC, min_ex, shut_socket, spack, start_log_thrs, start_stackmon
|
from .util import (
|
||||||
|
FHC,
|
||||||
|
Magician,
|
||||||
|
min_ex,
|
||||||
|
shut_socket,
|
||||||
|
spack,
|
||||||
|
start_log_thrs,
|
||||||
|
start_stackmon,
|
||||||
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from .broker_util import BrokerCli
|
from .broker_util import BrokerCli
|
||||||
|
@ -60,6 +68,7 @@ class HttpSrv(object):
|
||||||
socket.setdefaulttimeout(120)
|
socket.setdefaulttimeout(120)
|
||||||
|
|
||||||
nsuf = "-n{}-i{:x}".format(nid, os.getpid()) if nid else ""
|
nsuf = "-n{}-i{:x}".format(nid, os.getpid()) if nid else ""
|
||||||
|
self.magician = Magician()
|
||||||
|
|
||||||
self.name = "hsrv" + nsuf
|
self.name = "hsrv" + nsuf
|
||||||
self.mutex = threading.Lock()
|
self.mutex = threading.Lock()
|
||||||
|
|
|
@ -69,6 +69,7 @@ except:
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from .authsrv import VFS
|
from .authsrv import VFS
|
||||||
|
|
||||||
|
import magic
|
||||||
|
|
||||||
FAKE_MP = False
|
FAKE_MP = False
|
||||||
|
|
||||||
|
@ -154,22 +155,18 @@ IMPLICATIONS = [
|
||||||
|
|
||||||
|
|
||||||
MIMES = {
|
MIMES = {
|
||||||
"md": "text/plain",
|
|
||||||
"txt": "text/plain",
|
|
||||||
"js": "text/javascript",
|
|
||||||
"opus": "audio/ogg; codecs=opus",
|
"opus": "audio/ogg; codecs=opus",
|
||||||
"caf": "audio/x-caf",
|
|
||||||
"mp3": "audio/mpeg",
|
|
||||||
"m4a": "audio/mp4",
|
|
||||||
"jpg": "image/jpeg",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _add_mimes() -> None:
|
def _add_mimes() -> None:
|
||||||
|
# `mimetypes` is woefully unpopulated on windows
|
||||||
|
# but will be used as fallback on linux
|
||||||
|
|
||||||
for ln in """text css html csv
|
for ln in """text css html csv
|
||||||
application json wasm xml pdf rtf zip
|
application json wasm xml pdf rtf zip jar fits wasm
|
||||||
image webp jpeg png gif bmp
|
image webp jpeg png gif bmp jxl jp2 jxs jxr tiff bpg heic heif avif
|
||||||
audio aac ogg wav
|
audio aac ogg wav flac ape amr
|
||||||
video webm mp4 mpeg
|
video webm mp4 mpeg
|
||||||
font woff woff2 otf ttf
|
font woff woff2 otf ttf
|
||||||
""".splitlines():
|
""".splitlines():
|
||||||
|
@ -177,10 +174,35 @@ font woff woff2 otf ttf
|
||||||
for v in vs.strip().split():
|
for v in vs.strip().split():
|
||||||
MIMES[v] = "{}/{}".format(k, v)
|
MIMES[v] = "{}/{}".format(k, v)
|
||||||
|
|
||||||
|
for ln in """text md=plain txt=plain js=javascript
|
||||||
|
application 7z=x-7z-compressed tar=x-tar bz2=x-bzip2 gz=gzip rar=x-rar-compressed zst=zstd xz=x-xz lz=lzip cpio=x-cpio
|
||||||
|
application exe=vnd.microsoft.portable-executable msi=x-ms-installer cab=vnd.ms-cab-compressed rpm=x-rpm crx=x-chrome-extension
|
||||||
|
application epub=epub+zip mobi=x-mobipocket-ebook lit=x-ms-reader rss=rss+xml atom=atom+xml torrent=x-bittorrent
|
||||||
|
application p7s=pkcs7-signature dcm=dicom shx=vnd.shx shp=vnd.shp dbf=x-dbf gml=gml+xml gpx=gpx+xml amf=x-amf
|
||||||
|
application swf=x-shockwave-flash m3u=vnd.apple.mpegurl db3=vnd.sqlite3 sqlite=vnd.sqlite3
|
||||||
|
image jpg=jpeg xpm=x-xpixmap psd=vnd.adobe.photoshop jpf=jpx tif=tiff ico=x-icon djvu=vnd.djvu
|
||||||
|
image heic=heic-sequence heif=heif-sequence hdr=vnd.radiance svg=svg+xml
|
||||||
|
audio caf=x-caf mp3=mpeg m4a=mp4 mid=midi mpc=musepack aif=aiff au=basic qcp=qcelp
|
||||||
|
video mkv=x-matroska mov=quicktime avi=x-msvideo m4v=x-m4v ts=mp2t
|
||||||
|
video asf=x-ms-asf flv=x-flv 3gp=3gpp 3g2=3gpp2 rmvb=vnd.rn-realmedia-vbr
|
||||||
|
font ttc=collection
|
||||||
|
""".splitlines():
|
||||||
|
k, ems = ln.split(" ", 1)
|
||||||
|
for em in ems.strip().split():
|
||||||
|
ext, mime = em.split("=")
|
||||||
|
MIMES[ext] = "{}/{}".format(k, mime)
|
||||||
|
|
||||||
|
|
||||||
_add_mimes()
|
_add_mimes()
|
||||||
|
|
||||||
|
|
||||||
|
EXTS: dict[str, str] = {v: k for k, v in MIMES.items()}
|
||||||
|
|
||||||
|
EXTS["vnd.mozilla.apng"] = "png"
|
||||||
|
|
||||||
|
MAGIC_MAP = {"jpeg": "jpg"}
|
||||||
|
|
||||||
|
|
||||||
REKOBO_KEY = {
|
REKOBO_KEY = {
|
||||||
v: ln.split(" ", 1)[0]
|
v: ln.split(" ", 1)[0]
|
||||||
for ln in """
|
for ln in """
|
||||||
|
@ -625,6 +647,50 @@ class HMaccas(object):
|
||||||
return self.b(msg.encode("utf-8", "replace"))
|
return self.b(msg.encode("utf-8", "replace"))
|
||||||
|
|
||||||
|
|
||||||
|
class Magician(object):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.bad_magic = False
|
||||||
|
self.mutex = threading.Lock()
|
||||||
|
self.magic: Optional["magic.Magic"] = None
|
||||||
|
|
||||||
|
def ext(self, fpath: str) -> str:
|
||||||
|
import magic
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self.bad_magic:
|
||||||
|
raise Exception()
|
||||||
|
|
||||||
|
if not self.magic:
|
||||||
|
try:
|
||||||
|
with self.mutex:
|
||||||
|
if not self.magic:
|
||||||
|
self.magic = magic.Magic(uncompress=False, extension=True)
|
||||||
|
except:
|
||||||
|
self.bad_magic = True
|
||||||
|
raise
|
||||||
|
|
||||||
|
with self.mutex:
|
||||||
|
ret = self.magic.from_file(fpath)
|
||||||
|
except:
|
||||||
|
ret = "?"
|
||||||
|
|
||||||
|
ret = ret.split("/")[0]
|
||||||
|
ret = MAGIC_MAP.get(ret, ret)
|
||||||
|
if "?" not in ret:
|
||||||
|
return ret
|
||||||
|
|
||||||
|
mime = magic.from_file(fpath, mime=True)
|
||||||
|
mime = re.split("[; ]", mime, 1)[0]
|
||||||
|
ret = EXTS.get(mime)
|
||||||
|
|
||||||
|
if not ret:
|
||||||
|
mg = mimetypes.guess_extension(mime)
|
||||||
|
if mg:
|
||||||
|
return mg[1:]
|
||||||
|
else:
|
||||||
|
raise Exception()
|
||||||
|
|
||||||
|
|
||||||
if WINDOWS and sys.version_info < (3, 8):
|
if WINDOWS and sys.version_info < (3, 8):
|
||||||
_popen = sp.Popen
|
_popen = sp.Popen
|
||||||
|
|
||||||
|
|
|
@ -170,6 +170,23 @@ tmpdir="$(
|
||||||
wget -O$f "$url" || curl -L "$url" >$f)
|
wget -O$f "$url" || curl -L "$url" >$f)
|
||||||
done
|
done
|
||||||
|
|
||||||
|
echo collecting python-magic
|
||||||
|
v=0.4.27
|
||||||
|
f=python-magic-$v.tar.gz
|
||||||
|
[ -e "$f" ] ||
|
||||||
|
(url=https://files.pythonhosted.org/packages/da/db/0b3e28ac047452d079d375ec6798bf76a036a08182dbb39ed38116a49130/python-magic-0.4.27.tar.gz;
|
||||||
|
wget -O$f "$url" || curl -L "$url" >$f)
|
||||||
|
|
||||||
|
tar -zxf $f
|
||||||
|
mkdir magic
|
||||||
|
mv python-magic-*/magic .
|
||||||
|
rm -rf python-magic-*
|
||||||
|
rm magic/compat.py
|
||||||
|
f=magic/__init__.py
|
||||||
|
awk '/^def _add_compat/{o=1} !o; /^_add_compat/{o=0}' <$f >t
|
||||||
|
tmv "$f"
|
||||||
|
mv magic ftp/ # doesn't provide a version label anyways
|
||||||
|
|
||||||
# enable this to dynamically remove type hints at startup,
|
# enable this to dynamically remove type hints at startup,
|
||||||
# in case a future python version can use them for performance
|
# in case a future python version can use them for performance
|
||||||
true || (
|
true || (
|
||||||
|
@ -326,6 +343,7 @@ rm have
|
||||||
f=j2/jinja2/constants.py
|
f=j2/jinja2/constants.py
|
||||||
awk '/^LOREM_IPSUM_WORDS/{o=1;print "LOREM_IPSUM_WORDS = u\"a\"";next} !o; /"""/{o=0}' <$f >t
|
awk '/^LOREM_IPSUM_WORDS/{o=1;print "LOREM_IPSUM_WORDS = u\"a\"";next} !o; /"""/{o=0}' <$f >t
|
||||||
tmv "$f"
|
tmv "$f"
|
||||||
|
rm -f j2/jinja2/async*
|
||||||
|
|
||||||
grep -rLE '^#[^a-z]*coding: utf-8' j2 |
|
grep -rLE '^#[^a-z]*coding: utf-8' j2 |
|
||||||
while IFS= read -r f; do
|
while IFS= read -r f; do
|
||||||
|
|
Loading…
Reference in a new issue