index xattrs as tags; closes #134

This commit is contained in:
ed 2026-01-13 02:50:32 +00:00
parent bc24604a83
commit 8240ef6151
7 changed files with 83 additions and 10 deletions

View file

@ -87,6 +87,7 @@ built in Norway 🇳🇴 with contributions from [not-norway](https://github.com
* [other flags](#other-flags)
* [database location](#database-location) - in-volume (`.hist/up2k.db`, default) or somewhere else
* [metadata from audio files](#metadata-from-audio-files) - set `-e2t` to index tags on upload
* [metadata from xattrs](#metadata-from-xattrs) - unix extended file attributes
* [file parser plugins](#file-parser-plugins) - provide custom parsers to index additional tags
* [event hooks](#event-hooks) - trigger a program on uploads, renames etc ([examples](./bin/hooks/))
* [zeromq](#zeromq) - event-hooks can send zeromq messages
@ -1877,6 +1878,20 @@ see the beautiful mess of a dictionary in [mtag.py](https://github.com/9001/copy
`--mtag-to` sets the tag-scan timeout; very high default (60 sec) to cater for zfs and other randomly-freezing filesystems. Lower values like 10 are usually safe, allowing for faster processing of tricky files
### metadata from xattrs
unix extended file attributes can be indexed into the db and made searchable;
* `--db-xattr user.foo,user.bar` will index the xattrs `user.foo` and `user.bar`,
* `--db-xattr user.foo=foo,user.bar=bar` will index them with the names `foo` and `bar`,
* `--db-xattr ~~user.foo,user.bar` will index everything *except* `user.foo` and `user.bar`,
* `--db-xattr ~~` will index everything
however note that the tags must also be enabled with `-mte` so here are some complete examples:
* `-e2ts --db-xattr user.foo,user.bar -mte +user.foo,user.bar`
* `-e2ts --db-xattr user.foo=foo,user.bar=bar -mte +foo,bar`
## file parser plugins
provide custom parsers to index additional tags, also see [./bin/mtag/README.md](./bin/mtag/README.md)

View file

@ -1781,10 +1781,12 @@ def add_db_metadata(ap):
ap2.add_argument("--mtag-mt", metavar="CORES", type=int, default=CORES, help="num cpu cores to use for tag scanning")
ap2.add_argument("--mtag-v", action="store_true", help="verbose tag scanning; print errors from mtp subprocesses and such")
ap2.add_argument("--mtag-vv", action="store_true", help="debug mtp settings and mutagen/FFprobe parsers")
ap2.add_argument("--db-xattr", metavar="t,t", type=u, default="", help="read file xattrs as metadata tags; [\033[32ma,b\033[0m] reads keys \033[33ma\033[0m and \033[33mb\033[0m as tags \033[33ma\033[0m and \033[33mb\033[0m, [\033[32ma=foo,b=bar\033[0m] reads keys \033[33ma\033[0m and \033[33mb\033[0m as tags \033[33mfoo\033[0m and \033[33mbar\033[0m, [\033[32m~~a,b\033[0m] does everything except \033[33ma\033[0m and \033[33mb\033[0m, [\033[32m~~\033[0m] does everything. NOTE: Each tag must also be enabled with \033[33m-mte\033[0m (volflag=db_xattr)")
ap2.add_argument("-mtm", metavar="M=t,t,t", type=u, action="append", help="\033[34mREPEATABLE:\033[0m add/replace metadata mapping")
ap2.add_argument("-mte", metavar="M,M,M", type=u, help="tags to index/display (comma-sep.); either an entire replacement list, or add/remove stuff on the default-list with +foo or /bar", default=DEF_MTE)
ap2.add_argument("-mth", metavar="M,M,M", type=u, help="tags to hide by default (comma-sep.); assign/add/remove same as \033[33m-mte\033[0m", default=DEF_MTH)
ap2.add_argument("-mtp", metavar="M=[f,]BIN", type=u, action="append", help="\033[34mREPEATABLE:\033[0m read tag \033[33mM\033[0m using program \033[33mBIN\033[0m to parse the file")
ap2.add_argument("--have-db-xattr", action="store_true", help=argparse.SUPPRESS)
def add_txt(ap):

View file

@ -2591,6 +2591,19 @@ class AuthSrv(object):
vol.check_landmarks()
if vol.flags.get("db_xattr"):
self.args.have_db_xattr = True
zs = str(vol.flags["db_xattr"])
neg = zs.startswith("~~")
if neg:
zs = zs[2:]
zsl = [x.strip() for x in zs.split(",")]
zsl = [x for x in zsl if x]
if neg:
vol.flags["db_xattr_no"] = set(zsl)
else:
vol.flags["db_xattr_yes"] = zsl
# d2d drops all database features for a volume
for grp, rm in [["d2d", "e2d"], ["d2t", "e2t"], ["d2d", "e2v"]]:
if not vol.flags.get(grp, False):

View file

@ -101,6 +101,7 @@ def vf_vmap() -> dict[str, str]:
"chmod_d",
"chmod_f",
"dbd",
"db_xattr",
"du_who",
"epilogues",
"ufavico",
@ -286,6 +287,7 @@ flagcats = {
"dotsrch": "show dotfiles in search results",
"nodotsrch": "hide dotfiles in search results (default)",
"srch_excl": "exclude search results with URL matching this regex",
"db_xattr=user.foo,user.bar": "index file xattrs as media-tags",
},
'database, audio tags\n"mte", "mth", "mtp", "mtm" all work the same as -mte, -mth, ...': {
"mte=artist,title": "media-tags to index/display",

View file

@ -465,6 +465,8 @@ class MTag(object):
"ffprobe" if args.no_mutagen or (HAVE_FFPROBE and EXE) else "mutagen"
)
self.can_ffprobe = HAVE_FFPROBE and not args.no_mtag_ff
self.read_xattrs = args.have_db_xattr
self.get = self._get_xattr if self.read_xattrs else self._get_main
mappings = args.mtm
or_ffprobe = " or FFprobe"
@ -486,7 +488,13 @@ class MTag(object):
msg = "found FFprobe but it was disabled by --no-mtag-ff"
self.log(msg, c=3)
if self.read_xattrs and not self.usable:
t = "don't have the necessary dependencies to read conventional media tags, but will read xattrs"
self.log(t)
self.usable = True
if not self.usable:
self._get = None
if EXE:
t = "copyparty.exe cannot use mutagen; need ffprobe.exe to read media tags: "
self.log(t + FFMPEG_URL)
@ -645,7 +653,36 @@ class MTag(object):
return r1
def get(self, abspath: str) -> dict[str, Union[str, float]]:
def _get_xattr(
self, abspath: str, vf: dict[str, Any]
) -> dict[str, Union[str, float]]:
ret = self._get_main(abspath, vf) if self._get else {}
if "db_xattr_no" in vf:
try:
neg = vf["db_xattr_no"]
zsl = os.listxattr(abspath)
zsl = [x for x in zsl if x not in neg]
for xattr in zsl:
zb = os.getxattr(abspath, xattr)
ret[xattr] = zb.decode("utf-8", "replace")
except:
self.log("failed to read xattrs from [%s]\n%s", abspath, min_ex(), 3)
elif "db_xattr_yes" in vf:
for xattr in vf["db_xattr_yes"]:
if "=" in xattr:
xattr, name = xattr.split("=", 1)
else:
name = xattr
try:
zs = os.getxattr(abspath, xattr)
ret[name] = zs.decode("utf-8", "replace")
except:
pass
return ret
def _get_main(
self, abspath: str, vf: dict[str, Any]
) -> dict[str, Union[str, float]]:
ext = abspath.split(".")[-1].lower()
if ext not in self.args.au_unpk:
return self._get(abspath)

View file

@ -130,6 +130,7 @@ class Mpqe(object):
self,
mtp: dict[str, MParser],
entags: set[str],
vf: dict[str, Any],
w: str,
abspath: str,
oth_tags: dict[str, Any],
@ -137,6 +138,7 @@ class Mpqe(object):
# mtp empty = mtag
self.mtp = mtp
self.entags = entags
self.vf = vf
self.w = w
self.abspath = abspath
self.oth_tags = oth_tags
@ -2199,7 +2201,7 @@ class Up2k(object):
abspath = djoin(ptop, rd, fn)
self.pp.msg = "c%d %s" % (nq, abspath)
if not mpool:
n_tags = self._tagscan_file(cur, entags, w, abspath, ip, at, un)
n_tags = self._tagscan_file(cur, entags, flags, w, abspath, ip, at, un)
else:
oth_tags = {}
if ip:
@ -2209,7 +2211,7 @@ class Up2k(object):
if un:
oth_tags["up_by"] = un
mpool.put(Mpqe({}, entags, w, abspath, oth_tags))
mpool.put(Mpqe({}, entags, flags, w, abspath, oth_tags))
with self.mutex:
n_tags = len(self._flush_mpool(cur))
@ -2316,9 +2318,10 @@ class Up2k(object):
return
entags = self.entags[ptop]
vf = self.flags[ptop]
parsers = {}
for parser in self.flags[ptop]["mtp"]:
for parser in vf["mtp"]:
try:
parser = MParser(parser)
except:
@ -2393,7 +2396,7 @@ class Up2k(object):
if un:
oth_tags["up_by"] = un
jobs.append(Mpqe(parsers, set(), w, abspath, oth_tags))
jobs.append(Mpqe(parsers, set(), vf, w, abspath, oth_tags))
in_progress[w] = True
with self.mutex:
@ -2524,7 +2527,7 @@ class Up2k(object):
return
for _ in range(mpool.maxsize):
mpool.put(Mpqe({}, set(), "", "", {}))
mpool.put(Mpqe({}, set(), {}, "", "", {}))
mpool.join()
@ -2543,7 +2546,7 @@ class Up2k(object):
t = "tag-thr: {}({})"
self.log(t.format(self.mtag.backend, qe.abspath), "90")
tags = self.mtag.get(qe.abspath) if st.st_size else {}
tags = self.mtag.get(qe.abspath, qe.vf) if st.st_size else {}
else:
if self.args.mtag_vv:
t = "tag-thr: {}({})"
@ -2576,6 +2579,7 @@ class Up2k(object):
self,
write_cur: "sqlite3.Cursor",
entags: set[str],
vf: dict[str, Any],
wark: str,
abspath: str,
ip: str,
@ -2594,7 +2598,7 @@ class Up2k(object):
return 0
try:
tags = self.mtag.get(abspath) if st.st_size else {}
tags = self.mtag.get(abspath, vf) if st.st_size else {}
except Exception as ex:
self._log_tag_err("", abspath, ex)
return 0
@ -5404,7 +5408,7 @@ class Up2k(object):
# self.log("\n " + repr([ptop, rd, fn]))
abspath = djoin(ptop, rd, fn)
try:
tags = self.mtag.get(abspath) if sz else {}
tags = self.mtag.get(abspath, self.flags[ptop]) if sz else {}
ntags1 = len(tags)
parsers = self._get_parsers(ptop, tags, abspath)
if self.args.mtag_vv:

View file

@ -164,7 +164,7 @@ class Cfg(Namespace):
ex = "ctl_re db_act forget_ip idp_cookie idp_store k304 loris no304 nosubtle qr_pin qr_wait re_maxage rproxy rsp_jtr rsp_slp s_wr_slp snap_wri theme themes turbo u2ow zipmaxn zipmaxs"
ka.update(**{k: 0 for k in ex.split()})
ex = "ah_alg bname chdir chmod_f chpw_db doctitle df epilogues exit favico ipa ipar html_head html_head_d html_head_s idp_login idp_logout lg_sba lg_sbf log_date log_fk md_sba md_sbf name og_desc og_site og_th og_title og_title_a og_title_v og_title_i opds_exts preadmes prologues readmes shr tcolor textfiles txt_eol ufavico ufavico_h unlist vname xff_src zipmaxt R RS SR"
ex = "ah_alg bname chdir chmod_f chpw_db db_xattr doctitle df epilogues exit favico ipa ipar html_head html_head_d html_head_s idp_login idp_logout lg_sba lg_sbf log_date log_fk md_sba md_sbf name og_desc og_site og_th og_title og_title_a og_title_v og_title_i opds_exts preadmes prologues readmes shr tcolor textfiles txt_eol ufavico ufavico_h unlist vname xff_src zipmaxt R RS SR"
ka.update(**{k: "" for k in ex.split()})
ex = "apnd_who ban_403 ban_404 ban_422 ban_pw ban_pwc ban_url dont_ban cachectl http_vary rss_fmt_d rss_fmt_t spinner"