support metadata plugins

This commit is contained in:
ed 2021-03-19 00:08:31 +01:00
parent 8137a99904
commit abd65c6334
9 changed files with 240 additions and 70 deletions

View file

@ -100,19 +100,24 @@ through arguments:
* `-e2tsr` deletes all existing tags, so a full reindex
the same arguments can be set as volume flags, in addition to `d2d` and `d2t` for disabling:
* `-v ~/music::ce2dsa:ce2tsr` does a full reindex of everything on startup
* `-v ~/music::cd2d` disables **all** indexing, even if any `-e2*` are on
* `-v ~/music::cd2t` disables all `-e2t*` (tags), does not affect `-e2d*`
* `-v ~/music::r:ce2dsa:ce2tsr` does a full reindex of everything on startup
* `-v ~/music::r:cd2d` disables **all** indexing, even if any `-e2*` are on
* `-v ~/music::r:cd2t` disables all `-e2t*` (tags), does not affect `-e2d*`
`e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and cause `e2ts` to reindex those
## metadata from audio files
`-mte` decides which tags to index and display in the browser (and also the display order), this can be changed per-volume:
* `-v ~/music::cmte=title,artist` indexes and displays *title* followed by *artist*
* `-v ~/music::r:cmte=title,artist` indexes and displays *title* followed by *artist*
if you add/remove a tag from `mte` you will need to run with `-e2tsr` once to rebuild the database, otherwise only new files will be affected
`-mtm` can be used to add or redefine a metadata mapping, say you have media files with `foo` and `bar` tags and you want them to display as `qux` in the browser (preferring `foo` if both are present), then do `-mtm qux=foo,bar` and now you can `-mte artist,title,qux`
tags that start with a `.` such as `.bpm` and `.dur`(ation) indicate numeric value
see the beautiful mess of a dictionary in [mtag.py](https://github.com/9001/copyparty/blob/master/copyparty/mtag.py) for the default mappings (should cover mp3,opus,flac,m4a,wav,aif,)
`--no-mutagen` disables mutagen and uses ffprobe instead, which...
@ -122,6 +127,15 @@ see the beautiful mess of a dictionary in [mtag.py](https://github.com/9001/copy
* more importantly runs ffprobe on incoming files which is bad if your ffmpeg has a cve
## file parser plugins
copyparty can invoke external programs to collect additional metadata for files using `mtp` (as argument or volume flag)
* `-mtp key=~/bin/audio-key.py` will execute `~/bin/audio-key.py` with filename as argument 1 to provide the `key` tag if that does not exist in the audio metadata
* `-mtp .bpm=f,~/bin/audio-bpm.py` replaces (`f,`) any existing `.bpm` tag using the `~/bin/audio-bpm.py` program
* `-v ~/music::r:cmtp=key=~/bin/audio-key.py:cmtp=.bpm=f,~/bin/audio-bpm.py` both as a per-volume config wow this is getting ugly
# client examples
* javascript: dump some state into a file (two separate examples)

View file

@ -44,4 +44,4 @@ you could replace winfsp with [dokan](https://github.com/dokan-dev/dokany/releas
# [`mtag/`](mtag/)
* standalone programs which perform misc. file analysis
* copyparty can soon Popen programs like these during file indexing to collect additional metadata
* copyparty can Popen programs like these during file indexing to collect additional metadata

View file

@ -5,4 +5,4 @@ some of these rely on libraries which are not MIT-compatible
* [audio-bpm.py](./audio-bpm.py) detects the BPM of music using the BeatRoot Vamp Plugin; imports GPL2
* [audio-key.py](./audio-key.py) detects the melodic key of music using the Mixxx fork of keyfinder; imports GPL3
run [`install-deps.sh`](install-deps.sh) to build/install most dependencies required by these programs
run [`install-deps.sh`](install-deps.sh) to build/install most dependencies required by these programs (supports windows/linux/macos)

View file

@ -4,7 +4,7 @@ set -e
# install dependencies for audio-*.py
#
# linux: requires {python3,ffmpeg,fftw}-dev py3-{wheel,pip} py3-numpy{,-dev} vamp-sdk-dev
# linux: requires {python3,ffmpeg,fftw}-dev py3-{wheel,pip} py3-numpy{,-dev} vamp-sdk-dev patchelf
# win64: requires msys2-mingw64 environment
# macos: requires macports
#
@ -174,7 +174,8 @@ install_keyfinder() {
pypath="$($pybin -c 'import keyfinder; print(keyfinder.__file__)')"
for pyso in "${pypath%/*}"/*.so; do
[ -e "$pyso" ] || break
patchelf --set-rpath "${libpath%/*}" "$pyso"
patchelf --set-rpath "${libpath%/*}" "$pyso" ||
echo "WARNING: patchelf failed (only fatal on musl-based distros)"
done
mv "$pypath"{,.bak}

View file

@ -21,7 +21,7 @@ from textwrap import dedent
from .__init__ import E, WINDOWS, VT100, PY2
from .__version__ import S_VERSION, S_BUILD_DT, CODENAME
from .svchub import SvcHub
from .util import py_desc, align_tab
from .util import py_desc, align_tab, IMPLICATIONS
HAVE_SSL = True
try:
@ -264,6 +264,7 @@ def main():
ap2.add_argument("-mtm", metavar="M=t,t,t", action="append", type=str, help="add/replace metadata mapping")
ap2.add_argument("-mte", metavar="M,M,M", type=str, help="tags to index/display (comma-sep.)",
default="circle,album,.tn,artist,title,.bpm,key,.dur,.q")
ap2.add_argument("-mtp", metavar="M=[f,]bin", action="append", type=str, help="read tag M using bin")
ap2 = ap.add_argument_group('SSL/TLS options')
ap2.add_argument("--http-only", action="store_true", help="disable ssl/tls")
@ -277,13 +278,7 @@ def main():
# fmt: on
# propagate implications
for k1, k2 in [
["e2dsa", "e2ds"],
["e2ds", "e2d"],
["e2tsr", "e2ts"],
["e2ts", "e2t"],
["e2t", "e2d"],
]:
for k1, k2 in IMPLICATIONS:
if getattr(al, k1):
setattr(al, k2, True)

View file

@ -6,7 +6,7 @@ import re
import threading
from .__init__ import PY2, WINDOWS
from .util import undot, Pebkac, fsdec, fsenc, statdir, nuprint
from .util import IMPLICATIONS, undot, Pebkac, fsdec, fsenc, statdir, nuprint
class VFS(object):
@ -200,16 +200,39 @@ class AuthSrv(object):
continue
lvl, uname = ln.split(" ")
if lvl in "ra":
mread[vol_dst].append(uname)
if lvl in "wa":
mwrite[vol_dst].append(uname)
if lvl == "c":
cval = True
if "=" in uname:
uname, cval = uname.split("=", 1)
self._read_vol_str(
lvl, uname, mread[vol_dst], mwrite[vol_dst], mflags[vol_dst]
)
mflags[vol_dst][uname] = cval
def _read_vol_str(self, lvl, uname, mr, mw, mf):
if lvl == "c":
cval = True
if "=" in uname:
uname, cval = uname.split("=", 1)
self._read_volflag(mf, uname, cval, False)
return
if uname == "":
uname = "*"
if lvl in "ra":
mr.append(uname)
if lvl in "wa":
mw.append(uname)
def _read_volflag(self, flags, name, value, is_list):
if name not in ["mtp"]:
flags[name] = value
return
if not is_list:
value = [value]
elif not value:
return
flags[name] = flags.get(name, []) + value
def reload(self):
"""
@ -232,7 +255,7 @@ class AuthSrv(object):
if self.args.v:
# list of src:dst:permset:permset:...
# permset is [rwa]username
# permset is [rwa]username or [c]flag
for v_str in self.args.v:
m = self.re_vol.match(v_str)
if not m:
@ -249,22 +272,7 @@ class AuthSrv(object):
perms = perms.split(":")
for (lvl, uname) in [[x[0], x[1:]] for x in perms]:
if lvl == "c":
cval = True
if "=" in uname:
uname, cval = uname.split("=", 1)
mflags[dst][uname] = cval
continue
if uname == "":
uname = "*"
if lvl in "ra":
mread[dst].append(uname)
if lvl in "wa":
mwrite[dst].append(uname)
self._read_vol_str(lvl, uname, mread[dst], mwrite[dst], mflags[dst])
if self.args.c:
for cfg_fn in self.args.c:
@ -321,10 +329,17 @@ class AuthSrv(object):
if getattr(self.args, k):
vol.flags[k] = True
for k1, k2 in IMPLICATIONS:
if k1 in vol.flags:
vol.flags[k2] = True
# default tag-list if unset
if "mte" not in vol.flags:
vol.flags["mte"] = self.args.mte
# append parsers from argv to volume-flags
self._read_volflag(vol.flags, "mtp", self.args.mtp, True)
try:
v, _ = vfs.get("/", "*", False, True)
if self.warn_anonwrite and os.getcwd() == v.realpath:

View file

@ -312,3 +312,23 @@ class MTag(object):
ret = {k: [0, v] for k, v in ret.items()}
return self.normalize_tags(ret, md)
def get_bin(self, parsers, abspath):
pypath = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
pypath = [str(pypath)] + [str(x) for x in sys.path if x]
pypath = str(os.pathsep.join(pypath))
env = os.environ.copy()
env["PYTHONPATH"] = pypath
ret = {}
for tagname, binpath in parsers.items():
try:
cmd = [sys.executable, binpath, abspath]
cmd = [fsenc(x) for x in cmd]
v = sp.check_output(cmd, env=env).strip()
if v:
ret[tagname] = v.decode("utf-8")
except:
pass
return ret

View file

@ -1,5 +1,6 @@
# coding: utf-8
from __future__ import print_function, unicode_literals
from os.path import abspath
import re
import os
@ -107,6 +108,10 @@ class Up2k(object):
thr.daemon = True
thr.start()
thr = threading.Thread(target=self._run_all_mtp)
thr.daemon = True
thr.start()
def log(self, msg, c=0):
self.log_func("up2k", msg + "\033[K", c)
@ -219,6 +224,9 @@ class Up2k(object):
_, flags = self._expr_idx_filter(flags)
a = ["\033[36m{}:\033[0m{}".format(k, v) for k, v in flags.items()]
self.log(" ".join(a))
reg = {}
path = os.path.join(ptop, ".hist", "up2k.snap")
if "e2d" in flags and os.path.exists(path):
@ -435,18 +443,7 @@ class Up2k(object):
mpool = False
if self.mtag.prefer_mt and not self.args.no_mtag_mt:
# mp.pool.ThreadPool and concurrent.futures.ThreadPoolExecutor
# both do crazy runahead so lets reinvent another wheel
nw = os.cpu_count() if hasattr(os, "cpu_count") else 4
if self.n_mtag_tags_added == -1:
self.log("using {}x {}".format(nw, self.mtag.backend))
self.n_mtag_tags_added = 0
mpool = Queue(nw)
for _ in range(nw):
thr = threading.Thread(target=self._tag_thr, args=(mpool,))
thr.daemon = True
thr.start()
mpool = self._start_mpool()
c2 = cur.connection.cursor()
c3 = cur.connection.cursor()
@ -457,16 +454,20 @@ class Up2k(object):
if c2.execute(q, (w[:16],)).fetchone():
continue
if "mtp" in flags:
q = "insert into mt values (?,'t:mtp','a')"
c2.execute(q, (w[:16],))
if rd.startswith("//") or fn.startswith("//"):
rd, fn = s3dec(rd, fn)
abspath = os.path.join(ptop, rd, fn)
self.pp.msg = "c{} {}".format(n_left, abspath)
args = c3, entags, w, abspath
args = [c3, entags, w, abspath]
if not mpool:
n_tags = self._tag_file(*args)
else:
mpool.put(args)
mpool.put(["mtag"] + args)
with self.mutex:
n_tags = self.n_mtag_tags_added
self.n_mtag_tags_added = 0
@ -481,17 +482,118 @@ class Up2k(object):
last_write = time.time()
n_buf = 0
if mpool:
for _ in range(mpool.maxsize):
mpool.put(None)
mpool.join()
self._stop_mpool(mpool)
c3.close()
c2.close()
return n_add, n_rm, True
def _run_all_mtp(self):
self.n_mtag_tags_added = 0
for ptop, flags in self.flags.items():
if "mtp" in flags:
self._run_one_mtp(ptop)
def _run_one_mtp(self, ptop):
force = {}
parsers = {}
for parser in self.flags[ptop]["mtp"]:
tag, parser = parser.split("=", 1)
if parser.lower().startswith("f,"):
parser = parser[2:]
force[tag] = True
parsers[tag] = parser
q = "select count(w) from mt where k = 't:mtp'"
with self.mutex:
cur = self.cur[ptop]
cur = cur.connection.cursor()
wcur = cur.connection.cursor()
n_left = cur.execute(q).fetchone()[0]
mpool = self._start_mpool()
batch_sz = mpool.maxsize * 4
seen = []
while True:
with self.mutex:
q = "select w from mt where k = 't:mtp' limit ?"
warks = cur.execute(q, (batch_sz,)).fetchall()
warks = [x[0] for x in warks]
warks = [x for x in warks if x not in seen]
seen = warks
jobs = []
for w in warks:
q = "delete from mt where w = ? and k = 't:mtp'"
cur.execute(q, (w,))
q = "select rd, fn from up where substr(w,1,16)=? limit 1"
rd, fn = cur.execute(q, (w,)).fetchone()
rd, fn = s3dec(rd, fn)
abspath = os.path.join(ptop, rd, fn)
q = "select k from mt where w = ?"
have = cur.execute(q, (w,)).fetchall()
have = [x[0] for x in have]
if ".dur" not in have:
# skip non-audio
n_left -= 1
continue
task_parsers = {
k: v for k, v in parsers.items() if k in force or k not in have
}
jobs.append([task_parsers, wcur, None, w, abspath])
if not jobs:
break
with self.mutex:
msg = "mtp: {} done, {} left"
self.log(msg.format(self.n_mtag_tags_added, n_left))
for j in jobs:
n_left -= 1
mpool.put(j)
with self.mutex:
cur.connection.commit()
self._stop_mpool(mpool)
with self.mutex:
cur.connection.commit()
wcur.close()
cur.close()
self.log("mtp finished")
def _start_mpool(self):
# mp.pool.ThreadPool and concurrent.futures.ThreadPoolExecutor
# both do crazy runahead so lets reinvent another wheel
nw = os.cpu_count() if hasattr(os, "cpu_count") else 4
if self.n_mtag_tags_added == -1:
self.log("using {}x {}".format(nw, self.mtag.backend))
self.n_mtag_tags_added = 0
mpool = Queue(nw)
for _ in range(nw):
thr = threading.Thread(target=self._tag_thr, args=(mpool,))
thr.daemon = True
thr.start()
return mpool
def _stop_mpool(self, mpool):
if not mpool:
return
for _ in range(mpool.maxsize):
mpool.put(None)
mpool.join()
def _tag_thr(self, q):
while True:
task = q.get()
@ -500,24 +602,38 @@ class Up2k(object):
return
try:
write_cur, entags, wark, abspath = task
tags = self.mtag.get(abspath)
parser, write_cur, entags, wark, abspath = task
if parser == "mtag":
tags = self.mtag.get(abspath)
else:
tags = self.mtag.get_bin(parser, abspath)
vtags = [
"\033[36m{} \033[33m{}".format(k, v) for k, v in tags.items()
]
self.log("{}\033[0m [{}]".format(" ".join(vtags), abspath))
with self.mutex:
n = self._tag_file(write_cur, entags, wark, abspath, tags)
self.n_mtag_tags_added += n
except:
ex = traceback.format_exc()
if parser == "mtag":
parser = self.mtag.backend
msg = "{} failed to read tags from {}:\n{}"
self.log(msg.format(self.mtag.backend, abspath, ex), c=3)
self.log(msg.format(parser, abspath, ex), c=3)
q.task_done()
def _tag_file(self, write_cur, entags, wark, abspath, tags=None):
tags = tags or self.mtag.get(abspath)
tags = {k: v for k, v in tags.items() if k in entags}
if not tags:
# indicate scanned without tags
tags = {"x": 0}
if tags is None:
tags = self.mtag.get(abspath)
if entags:
tags = {k: v for k, v in tags.items() if k in entags}
if not tags:
# indicate scanned without tags
tags = {"x": 0}
ret = 0
for k, v in tags.items():

View file

@ -61,6 +61,15 @@ HTTPCODE = {
}
IMPLICATIONS = [
["e2dsa", "e2ds"],
["e2ds", "e2d"],
["e2tsr", "e2ts"],
["e2ts", "e2t"],
["e2t", "e2d"],
]
class Counter(object):
def __init__(self, v=0):
self.v = v