diff --git a/README.md b/README.md index 2f049e6f..42b60bc1 100644 --- a/README.md +++ b/README.md @@ -100,19 +100,24 @@ through arguments: * `-e2tsr` deletes all existing tags, so a full reindex the same arguments can be set as volume flags, in addition to `d2d` and `d2t` for disabling: -* `-v ~/music::ce2dsa:ce2tsr` does a full reindex of everything on startup -* `-v ~/music::cd2d` disables **all** indexing, even if any `-e2*` are on -* `-v ~/music::cd2t` disables all `-e2t*` (tags), does not affect `-e2d*` +* `-v ~/music::r:ce2dsa:ce2tsr` does a full reindex of everything on startup +* `-v ~/music::r:cd2d` disables **all** indexing, even if any `-e2*` are on +* `-v ~/music::r:cd2t` disables all `-e2t*` (tags), does not affect `-e2d*` `e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and cause `e2ts` to reindex those + +## metadata from audio files + `-mte` decides which tags to index and display in the browser (and also the display order), this can be changed per-volume: -* `-v ~/music::cmte=title,artist` indexes and displays *title* followed by *artist* +* `-v ~/music::r:cmte=title,artist` indexes and displays *title* followed by *artist* if you add/remove a tag from `mte` you will need to run with `-e2tsr` once to rebuild the database, otherwise only new files will be affected `-mtm` can be used to add or redefine a metadata mapping, say you have media files with `foo` and `bar` tags and you want them to display as `qux` in the browser (preferring `foo` if both are present), then do `-mtm qux=foo,bar` and now you can `-mte artist,title,qux` +tags that start with a `.` such as `.bpm` and `.dur`(ation) indicate numeric value + see the beautiful mess of a dictionary in [mtag.py](https://github.com/9001/copyparty/blob/master/copyparty/mtag.py) for the default mappings (should cover mp3,opus,flac,m4a,wav,aif,) `--no-mutagen` disables mutagen and uses ffprobe instead, which... @@ -122,6 +127,15 @@ see the beautiful mess of a dictionary in [mtag.py](https://github.com/9001/copy * more importantly runs ffprobe on incoming files which is bad if your ffmpeg has a cve +## file parser plugins + +copyparty can invoke external programs to collect additional metadata for files using `mtp` (as argument or volume flag) + +* `-mtp key=~/bin/audio-key.py` will execute `~/bin/audio-key.py` with filename as argument 1 to provide the `key` tag if that does not exist in the audio metadata +* `-mtp .bpm=f,~/bin/audio-bpm.py` replaces (`f,`) any existing `.bpm` tag using the `~/bin/audio-bpm.py` program +* `-v ~/music::r:cmtp=key=~/bin/audio-key.py:cmtp=.bpm=f,~/bin/audio-bpm.py` both as a per-volume config wow this is getting ugly + + # client examples * javascript: dump some state into a file (two separate examples) diff --git a/bin/README.md b/bin/README.md index eea75f09..304cc701 100644 --- a/bin/README.md +++ b/bin/README.md @@ -44,4 +44,4 @@ you could replace winfsp with [dokan](https://github.com/dokan-dev/dokany/releas # [`mtag/`](mtag/) * standalone programs which perform misc. file analysis -* copyparty can soon Popen programs like these during file indexing to collect additional metadata +* copyparty can Popen programs like these during file indexing to collect additional metadata diff --git a/bin/mtag/README.md b/bin/mtag/README.md index 4e4f2da1..464435e0 100644 --- a/bin/mtag/README.md +++ b/bin/mtag/README.md @@ -5,4 +5,4 @@ some of these rely on libraries which are not MIT-compatible * [audio-bpm.py](./audio-bpm.py) detects the BPM of music using the BeatRoot Vamp Plugin; imports GPL2 * [audio-key.py](./audio-key.py) detects the melodic key of music using the Mixxx fork of keyfinder; imports GPL3 -run [`install-deps.sh`](install-deps.sh) to build/install most dependencies required by these programs +run [`install-deps.sh`](install-deps.sh) to build/install most dependencies required by these programs (supports windows/linux/macos) diff --git a/bin/mtag/install-deps.sh b/bin/mtag/install-deps.sh index fbdb1b3a..67be35c7 100755 --- a/bin/mtag/install-deps.sh +++ b/bin/mtag/install-deps.sh @@ -4,7 +4,7 @@ set -e # install dependencies for audio-*.py # -# linux: requires {python3,ffmpeg,fftw}-dev py3-{wheel,pip} py3-numpy{,-dev} vamp-sdk-dev +# linux: requires {python3,ffmpeg,fftw}-dev py3-{wheel,pip} py3-numpy{,-dev} vamp-sdk-dev patchelf # win64: requires msys2-mingw64 environment # macos: requires macports # @@ -174,7 +174,8 @@ install_keyfinder() { pypath="$($pybin -c 'import keyfinder; print(keyfinder.__file__)')" for pyso in "${pypath%/*}"/*.so; do [ -e "$pyso" ] || break - patchelf --set-rpath "${libpath%/*}" "$pyso" + patchelf --set-rpath "${libpath%/*}" "$pyso" || + echo "WARNING: patchelf failed (only fatal on musl-based distros)" done mv "$pypath"{,.bak} diff --git a/copyparty/__main__.py b/copyparty/__main__.py index ff5bc9d4..37bae8f7 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -21,7 +21,7 @@ from textwrap import dedent from .__init__ import E, WINDOWS, VT100, PY2 from .__version__ import S_VERSION, S_BUILD_DT, CODENAME from .svchub import SvcHub -from .util import py_desc, align_tab +from .util import py_desc, align_tab, IMPLICATIONS HAVE_SSL = True try: @@ -264,6 +264,7 @@ def main(): ap2.add_argument("-mtm", metavar="M=t,t,t", action="append", type=str, help="add/replace metadata mapping") ap2.add_argument("-mte", metavar="M,M,M", type=str, help="tags to index/display (comma-sep.)", default="circle,album,.tn,artist,title,.bpm,key,.dur,.q") + ap2.add_argument("-mtp", metavar="M=[f,]bin", action="append", type=str, help="read tag M using bin") ap2 = ap.add_argument_group('SSL/TLS options') ap2.add_argument("--http-only", action="store_true", help="disable ssl/tls") @@ -277,13 +278,7 @@ def main(): # fmt: on # propagate implications - for k1, k2 in [ - ["e2dsa", "e2ds"], - ["e2ds", "e2d"], - ["e2tsr", "e2ts"], - ["e2ts", "e2t"], - ["e2t", "e2d"], - ]: + for k1, k2 in IMPLICATIONS: if getattr(al, k1): setattr(al, k2, True) diff --git a/copyparty/authsrv.py b/copyparty/authsrv.py index de370fd3..16a77839 100644 --- a/copyparty/authsrv.py +++ b/copyparty/authsrv.py @@ -6,7 +6,7 @@ import re import threading from .__init__ import PY2, WINDOWS -from .util import undot, Pebkac, fsdec, fsenc, statdir, nuprint +from .util import IMPLICATIONS, undot, Pebkac, fsdec, fsenc, statdir, nuprint class VFS(object): @@ -200,16 +200,39 @@ class AuthSrv(object): continue lvl, uname = ln.split(" ") - if lvl in "ra": - mread[vol_dst].append(uname) - if lvl in "wa": - mwrite[vol_dst].append(uname) - if lvl == "c": - cval = True - if "=" in uname: - uname, cval = uname.split("=", 1) + self._read_vol_str( + lvl, uname, mread[vol_dst], mwrite[vol_dst], mflags[vol_dst] + ) - mflags[vol_dst][uname] = cval + def _read_vol_str(self, lvl, uname, mr, mw, mf): + if lvl == "c": + cval = True + if "=" in uname: + uname, cval = uname.split("=", 1) + + self._read_volflag(mf, uname, cval, False) + return + + if uname == "": + uname = "*" + + if lvl in "ra": + mr.append(uname) + + if lvl in "wa": + mw.append(uname) + + def _read_volflag(self, flags, name, value, is_list): + if name not in ["mtp"]: + flags[name] = value + return + + if not is_list: + value = [value] + elif not value: + return + + flags[name] = flags.get(name, []) + value def reload(self): """ @@ -232,7 +255,7 @@ class AuthSrv(object): if self.args.v: # list of src:dst:permset:permset:... - # permset is [rwa]username + # permset is [rwa]username or [c]flag for v_str in self.args.v: m = self.re_vol.match(v_str) if not m: @@ -249,22 +272,7 @@ class AuthSrv(object): perms = perms.split(":") for (lvl, uname) in [[x[0], x[1:]] for x in perms]: - if lvl == "c": - cval = True - if "=" in uname: - uname, cval = uname.split("=", 1) - - mflags[dst][uname] = cval - continue - - if uname == "": - uname = "*" - - if lvl in "ra": - mread[dst].append(uname) - - if lvl in "wa": - mwrite[dst].append(uname) + self._read_vol_str(lvl, uname, mread[dst], mwrite[dst], mflags[dst]) if self.args.c: for cfg_fn in self.args.c: @@ -321,10 +329,17 @@ class AuthSrv(object): if getattr(self.args, k): vol.flags[k] = True + for k1, k2 in IMPLICATIONS: + if k1 in vol.flags: + vol.flags[k2] = True + # default tag-list if unset if "mte" not in vol.flags: vol.flags["mte"] = self.args.mte + # append parsers from argv to volume-flags + self._read_volflag(vol.flags, "mtp", self.args.mtp, True) + try: v, _ = vfs.get("/", "*", False, True) if self.warn_anonwrite and os.getcwd() == v.realpath: diff --git a/copyparty/mtag.py b/copyparty/mtag.py index dab53f2c..5c58f6c9 100644 --- a/copyparty/mtag.py +++ b/copyparty/mtag.py @@ -312,3 +312,23 @@ class MTag(object): ret = {k: [0, v] for k, v in ret.items()} return self.normalize_tags(ret, md) + + def get_bin(self, parsers, abspath): + pypath = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + pypath = [str(pypath)] + [str(x) for x in sys.path if x] + pypath = str(os.pathsep.join(pypath)) + env = os.environ.copy() + env["PYTHONPATH"] = pypath + + ret = {} + for tagname, binpath in parsers.items(): + try: + cmd = [sys.executable, binpath, abspath] + cmd = [fsenc(x) for x in cmd] + v = sp.check_output(cmd, env=env).strip() + if v: + ret[tagname] = v.decode("utf-8") + except: + pass + + return ret diff --git a/copyparty/up2k.py b/copyparty/up2k.py index a811d476..ec2699c4 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -1,5 +1,6 @@ # coding: utf-8 from __future__ import print_function, unicode_literals +from os.path import abspath import re import os @@ -107,6 +108,10 @@ class Up2k(object): thr.daemon = True thr.start() + thr = threading.Thread(target=self._run_all_mtp) + thr.daemon = True + thr.start() + def log(self, msg, c=0): self.log_func("up2k", msg + "\033[K", c) @@ -219,6 +224,9 @@ class Up2k(object): _, flags = self._expr_idx_filter(flags) + a = ["\033[36m{}:\033[0m{}".format(k, v) for k, v in flags.items()] + self.log(" ".join(a)) + reg = {} path = os.path.join(ptop, ".hist", "up2k.snap") if "e2d" in flags and os.path.exists(path): @@ -435,18 +443,7 @@ class Up2k(object): mpool = False if self.mtag.prefer_mt and not self.args.no_mtag_mt: - # mp.pool.ThreadPool and concurrent.futures.ThreadPoolExecutor - # both do crazy runahead so lets reinvent another wheel - nw = os.cpu_count() if hasattr(os, "cpu_count") else 4 - if self.n_mtag_tags_added == -1: - self.log("using {}x {}".format(nw, self.mtag.backend)) - self.n_mtag_tags_added = 0 - - mpool = Queue(nw) - for _ in range(nw): - thr = threading.Thread(target=self._tag_thr, args=(mpool,)) - thr.daemon = True - thr.start() + mpool = self._start_mpool() c2 = cur.connection.cursor() c3 = cur.connection.cursor() @@ -457,16 +454,20 @@ class Up2k(object): if c2.execute(q, (w[:16],)).fetchone(): continue + if "mtp" in flags: + q = "insert into mt values (?,'t:mtp','a')" + c2.execute(q, (w[:16],)) + if rd.startswith("//") or fn.startswith("//"): rd, fn = s3dec(rd, fn) abspath = os.path.join(ptop, rd, fn) self.pp.msg = "c{} {}".format(n_left, abspath) - args = c3, entags, w, abspath + args = [c3, entags, w, abspath] if not mpool: n_tags = self._tag_file(*args) else: - mpool.put(args) + mpool.put(["mtag"] + args) with self.mutex: n_tags = self.n_mtag_tags_added self.n_mtag_tags_added = 0 @@ -481,17 +482,118 @@ class Up2k(object): last_write = time.time() n_buf = 0 - if mpool: - for _ in range(mpool.maxsize): - mpool.put(None) - - mpool.join() + self._stop_mpool(mpool) c3.close() c2.close() return n_add, n_rm, True + def _run_all_mtp(self): + self.n_mtag_tags_added = 0 + for ptop, flags in self.flags.items(): + if "mtp" in flags: + self._run_one_mtp(ptop) + + def _run_one_mtp(self, ptop): + force = {} + parsers = {} + for parser in self.flags[ptop]["mtp"]: + tag, parser = parser.split("=", 1) + if parser.lower().startswith("f,"): + parser = parser[2:] + force[tag] = True + + parsers[tag] = parser + + q = "select count(w) from mt where k = 't:mtp'" + with self.mutex: + cur = self.cur[ptop] + cur = cur.connection.cursor() + wcur = cur.connection.cursor() + n_left = cur.execute(q).fetchone()[0] + + mpool = self._start_mpool() + batch_sz = mpool.maxsize * 4 + seen = [] + while True: + with self.mutex: + q = "select w from mt where k = 't:mtp' limit ?" + warks = cur.execute(q, (batch_sz,)).fetchall() + warks = [x[0] for x in warks] + warks = [x for x in warks if x not in seen] + seen = warks + jobs = [] + for w in warks: + q = "delete from mt where w = ? and k = 't:mtp'" + cur.execute(q, (w,)) + + q = "select rd, fn from up where substr(w,1,16)=? limit 1" + rd, fn = cur.execute(q, (w,)).fetchone() + rd, fn = s3dec(rd, fn) + abspath = os.path.join(ptop, rd, fn) + + q = "select k from mt where w = ?" + have = cur.execute(q, (w,)).fetchall() + have = [x[0] for x in have] + + if ".dur" not in have: + # skip non-audio + n_left -= 1 + continue + + task_parsers = { + k: v for k, v in parsers.items() if k in force or k not in have + } + jobs.append([task_parsers, wcur, None, w, abspath]) + + if not jobs: + break + + with self.mutex: + msg = "mtp: {} done, {} left" + self.log(msg.format(self.n_mtag_tags_added, n_left)) + + for j in jobs: + n_left -= 1 + mpool.put(j) + + with self.mutex: + cur.connection.commit() + + self._stop_mpool(mpool) + with self.mutex: + cur.connection.commit() + wcur.close() + cur.close() + + self.log("mtp finished") + + def _start_mpool(self): + # mp.pool.ThreadPool and concurrent.futures.ThreadPoolExecutor + # both do crazy runahead so lets reinvent another wheel + nw = os.cpu_count() if hasattr(os, "cpu_count") else 4 + if self.n_mtag_tags_added == -1: + self.log("using {}x {}".format(nw, self.mtag.backend)) + self.n_mtag_tags_added = 0 + + mpool = Queue(nw) + for _ in range(nw): + thr = threading.Thread(target=self._tag_thr, args=(mpool,)) + thr.daemon = True + thr.start() + + return mpool + + def _stop_mpool(self, mpool): + if not mpool: + return + + for _ in range(mpool.maxsize): + mpool.put(None) + + mpool.join() + def _tag_thr(self, q): while True: task = q.get() @@ -500,24 +602,38 @@ class Up2k(object): return try: - write_cur, entags, wark, abspath = task - tags = self.mtag.get(abspath) + parser, write_cur, entags, wark, abspath = task + if parser == "mtag": + tags = self.mtag.get(abspath) + else: + tags = self.mtag.get_bin(parser, abspath) + vtags = [ + "\033[36m{} \033[33m{}".format(k, v) for k, v in tags.items() + ] + self.log("{}\033[0m [{}]".format(" ".join(vtags), abspath)) + with self.mutex: n = self._tag_file(write_cur, entags, wark, abspath, tags) self.n_mtag_tags_added += n except: ex = traceback.format_exc() + if parser == "mtag": + parser = self.mtag.backend + msg = "{} failed to read tags from {}:\n{}" - self.log(msg.format(self.mtag.backend, abspath, ex), c=3) + self.log(msg.format(parser, abspath, ex), c=3) q.task_done() def _tag_file(self, write_cur, entags, wark, abspath, tags=None): - tags = tags or self.mtag.get(abspath) - tags = {k: v for k, v in tags.items() if k in entags} - if not tags: - # indicate scanned without tags - tags = {"x": 0} + if tags is None: + tags = self.mtag.get(abspath) + + if entags: + tags = {k: v for k, v in tags.items() if k in entags} + if not tags: + # indicate scanned without tags + tags = {"x": 0} ret = 0 for k, v in tags.items(): diff --git a/copyparty/util.py b/copyparty/util.py index 9df8d130..5795a7d1 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -61,6 +61,15 @@ HTTPCODE = { } +IMPLICATIONS = [ + ["e2dsa", "e2ds"], + ["e2ds", "e2d"], + ["e2tsr", "e2ts"], + ["e2ts", "e2t"], + ["e2t", "e2d"], +] + + class Counter(object): def __init__(self, v=0): self.v = v