From 8240ef61517cc51f72dd00a26ef4f4c612066c99 Mon Sep 17 00:00:00 2001 From: ed Date: Tue, 13 Jan 2026 02:50:32 +0000 Subject: [PATCH] index xattrs as tags; closes #134 --- README.md | 15 +++++++++++++++ copyparty/__main__.py | 2 ++ copyparty/authsrv.py | 13 +++++++++++++ copyparty/cfg.py | 2 ++ copyparty/mtag.py | 39 ++++++++++++++++++++++++++++++++++++++- copyparty/up2k.py | 20 ++++++++++++-------- tests/util.py | 2 +- 7 files changed, 83 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index f153b929..3e79ccb2 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ built in Norway 🇳🇴 with contributions from [not-norway](https://github.com * [other flags](#other-flags) * [database location](#database-location) - in-volume (`.hist/up2k.db`, default) or somewhere else * [metadata from audio files](#metadata-from-audio-files) - set `-e2t` to index tags on upload + * [metadata from xattrs](#metadata-from-xattrs) - unix extended file attributes * [file parser plugins](#file-parser-plugins) - provide custom parsers to index additional tags * [event hooks](#event-hooks) - trigger a program on uploads, renames etc ([examples](./bin/hooks/)) * [zeromq](#zeromq) - event-hooks can send zeromq messages @@ -1877,6 +1878,20 @@ see the beautiful mess of a dictionary in [mtag.py](https://github.com/9001/copy `--mtag-to` sets the tag-scan timeout; very high default (60 sec) to cater for zfs and other randomly-freezing filesystems. Lower values like 10 are usually safe, allowing for faster processing of tricky files +### metadata from xattrs + +unix extended file attributes can be indexed into the db and made searchable; + +* `--db-xattr user.foo,user.bar` will index the xattrs `user.foo` and `user.bar`, +* `--db-xattr user.foo=foo,user.bar=bar` will index them with the names `foo` and `bar`, +* `--db-xattr ~~user.foo,user.bar` will index everything *except* `user.foo` and `user.bar`, +* `--db-xattr ~~` will index everything + +however note that the tags must also be enabled with `-mte` so here are some complete examples: +* `-e2ts --db-xattr user.foo,user.bar -mte +user.foo,user.bar` +* `-e2ts --db-xattr user.foo=foo,user.bar=bar -mte +foo,bar` + + ## file parser plugins provide custom parsers to index additional tags, also see [./bin/mtag/README.md](./bin/mtag/README.md) diff --git a/copyparty/__main__.py b/copyparty/__main__.py index b2186d36..d45f165c 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -1781,10 +1781,12 @@ def add_db_metadata(ap): ap2.add_argument("--mtag-mt", metavar="CORES", type=int, default=CORES, help="num cpu cores to use for tag scanning") ap2.add_argument("--mtag-v", action="store_true", help="verbose tag scanning; print errors from mtp subprocesses and such") ap2.add_argument("--mtag-vv", action="store_true", help="debug mtp settings and mutagen/FFprobe parsers") + ap2.add_argument("--db-xattr", metavar="t,t", type=u, default="", help="read file xattrs as metadata tags; [\033[32ma,b\033[0m] reads keys \033[33ma\033[0m and \033[33mb\033[0m as tags \033[33ma\033[0m and \033[33mb\033[0m, [\033[32ma=foo,b=bar\033[0m] reads keys \033[33ma\033[0m and \033[33mb\033[0m as tags \033[33mfoo\033[0m and \033[33mbar\033[0m, [\033[32m~~a,b\033[0m] does everything except \033[33ma\033[0m and \033[33mb\033[0m, [\033[32m~~\033[0m] does everything. NOTE: Each tag must also be enabled with \033[33m-mte\033[0m (volflag=db_xattr)") ap2.add_argument("-mtm", metavar="M=t,t,t", type=u, action="append", help="\033[34mREPEATABLE:\033[0m add/replace metadata mapping") ap2.add_argument("-mte", metavar="M,M,M", type=u, help="tags to index/display (comma-sep.); either an entire replacement list, or add/remove stuff on the default-list with +foo or /bar", default=DEF_MTE) ap2.add_argument("-mth", metavar="M,M,M", type=u, help="tags to hide by default (comma-sep.); assign/add/remove same as \033[33m-mte\033[0m", default=DEF_MTH) ap2.add_argument("-mtp", metavar="M=[f,]BIN", type=u, action="append", help="\033[34mREPEATABLE:\033[0m read tag \033[33mM\033[0m using program \033[33mBIN\033[0m to parse the file") + ap2.add_argument("--have-db-xattr", action="store_true", help=argparse.SUPPRESS) def add_txt(ap): diff --git a/copyparty/authsrv.py b/copyparty/authsrv.py index 59830e1c..0002f2c8 100644 --- a/copyparty/authsrv.py +++ b/copyparty/authsrv.py @@ -2591,6 +2591,19 @@ class AuthSrv(object): vol.check_landmarks() + if vol.flags.get("db_xattr"): + self.args.have_db_xattr = True + zs = str(vol.flags["db_xattr"]) + neg = zs.startswith("~~") + if neg: + zs = zs[2:] + zsl = [x.strip() for x in zs.split(",")] + zsl = [x for x in zsl if x] + if neg: + vol.flags["db_xattr_no"] = set(zsl) + else: + vol.flags["db_xattr_yes"] = zsl + # d2d drops all database features for a volume for grp, rm in [["d2d", "e2d"], ["d2t", "e2t"], ["d2d", "e2v"]]: if not vol.flags.get(grp, False): diff --git a/copyparty/cfg.py b/copyparty/cfg.py index d33287e9..163fd4ad 100644 --- a/copyparty/cfg.py +++ b/copyparty/cfg.py @@ -101,6 +101,7 @@ def vf_vmap() -> dict[str, str]: "chmod_d", "chmod_f", "dbd", + "db_xattr", "du_who", "epilogues", "ufavico", @@ -286,6 +287,7 @@ flagcats = { "dotsrch": "show dotfiles in search results", "nodotsrch": "hide dotfiles in search results (default)", "srch_excl": "exclude search results with URL matching this regex", + "db_xattr=user.foo,user.bar": "index file xattrs as media-tags", }, 'database, audio tags\n"mte", "mth", "mtp", "mtm" all work the same as -mte, -mth, ...': { "mte=artist,title": "media-tags to index/display", diff --git a/copyparty/mtag.py b/copyparty/mtag.py index 41c28a09..cedc0c27 100644 --- a/copyparty/mtag.py +++ b/copyparty/mtag.py @@ -465,6 +465,8 @@ class MTag(object): "ffprobe" if args.no_mutagen or (HAVE_FFPROBE and EXE) else "mutagen" ) self.can_ffprobe = HAVE_FFPROBE and not args.no_mtag_ff + self.read_xattrs = args.have_db_xattr + self.get = self._get_xattr if self.read_xattrs else self._get_main mappings = args.mtm or_ffprobe = " or FFprobe" @@ -486,7 +488,13 @@ class MTag(object): msg = "found FFprobe but it was disabled by --no-mtag-ff" self.log(msg, c=3) + if self.read_xattrs and not self.usable: + t = "don't have the necessary dependencies to read conventional media tags, but will read xattrs" + self.log(t) + self.usable = True + if not self.usable: + self._get = None if EXE: t = "copyparty.exe cannot use mutagen; need ffprobe.exe to read media tags: " self.log(t + FFMPEG_URL) @@ -645,7 +653,36 @@ class MTag(object): return r1 - def get(self, abspath: str) -> dict[str, Union[str, float]]: + def _get_xattr( + self, abspath: str, vf: dict[str, Any] + ) -> dict[str, Union[str, float]]: + ret = self._get_main(abspath, vf) if self._get else {} + if "db_xattr_no" in vf: + try: + neg = vf["db_xattr_no"] + zsl = os.listxattr(abspath) + zsl = [x for x in zsl if x not in neg] + for xattr in zsl: + zb = os.getxattr(abspath, xattr) + ret[xattr] = zb.decode("utf-8", "replace") + except: + self.log("failed to read xattrs from [%s]\n%s", abspath, min_ex(), 3) + elif "db_xattr_yes" in vf: + for xattr in vf["db_xattr_yes"]: + if "=" in xattr: + xattr, name = xattr.split("=", 1) + else: + name = xattr + try: + zs = os.getxattr(abspath, xattr) + ret[name] = zs.decode("utf-8", "replace") + except: + pass + return ret + + def _get_main( + self, abspath: str, vf: dict[str, Any] + ) -> dict[str, Union[str, float]]: ext = abspath.split(".")[-1].lower() if ext not in self.args.au_unpk: return self._get(abspath) diff --git a/copyparty/up2k.py b/copyparty/up2k.py index 06696cf4..3d0be53c 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -130,6 +130,7 @@ class Mpqe(object): self, mtp: dict[str, MParser], entags: set[str], + vf: dict[str, Any], w: str, abspath: str, oth_tags: dict[str, Any], @@ -137,6 +138,7 @@ class Mpqe(object): # mtp empty = mtag self.mtp = mtp self.entags = entags + self.vf = vf self.w = w self.abspath = abspath self.oth_tags = oth_tags @@ -2199,7 +2201,7 @@ class Up2k(object): abspath = djoin(ptop, rd, fn) self.pp.msg = "c%d %s" % (nq, abspath) if not mpool: - n_tags = self._tagscan_file(cur, entags, w, abspath, ip, at, un) + n_tags = self._tagscan_file(cur, entags, flags, w, abspath, ip, at, un) else: oth_tags = {} if ip: @@ -2209,7 +2211,7 @@ class Up2k(object): if un: oth_tags["up_by"] = un - mpool.put(Mpqe({}, entags, w, abspath, oth_tags)) + mpool.put(Mpqe({}, entags, flags, w, abspath, oth_tags)) with self.mutex: n_tags = len(self._flush_mpool(cur)) @@ -2316,9 +2318,10 @@ class Up2k(object): return entags = self.entags[ptop] + vf = self.flags[ptop] parsers = {} - for parser in self.flags[ptop]["mtp"]: + for parser in vf["mtp"]: try: parser = MParser(parser) except: @@ -2393,7 +2396,7 @@ class Up2k(object): if un: oth_tags["up_by"] = un - jobs.append(Mpqe(parsers, set(), w, abspath, oth_tags)) + jobs.append(Mpqe(parsers, set(), vf, w, abspath, oth_tags)) in_progress[w] = True with self.mutex: @@ -2524,7 +2527,7 @@ class Up2k(object): return for _ in range(mpool.maxsize): - mpool.put(Mpqe({}, set(), "", "", {})) + mpool.put(Mpqe({}, set(), {}, "", "", {})) mpool.join() @@ -2543,7 +2546,7 @@ class Up2k(object): t = "tag-thr: {}({})" self.log(t.format(self.mtag.backend, qe.abspath), "90") - tags = self.mtag.get(qe.abspath) if st.st_size else {} + tags = self.mtag.get(qe.abspath, qe.vf) if st.st_size else {} else: if self.args.mtag_vv: t = "tag-thr: {}({})" @@ -2576,6 +2579,7 @@ class Up2k(object): self, write_cur: "sqlite3.Cursor", entags: set[str], + vf: dict[str, Any], wark: str, abspath: str, ip: str, @@ -2594,7 +2598,7 @@ class Up2k(object): return 0 try: - tags = self.mtag.get(abspath) if st.st_size else {} + tags = self.mtag.get(abspath, vf) if st.st_size else {} except Exception as ex: self._log_tag_err("", abspath, ex) return 0 @@ -5404,7 +5408,7 @@ class Up2k(object): # self.log("\n " + repr([ptop, rd, fn])) abspath = djoin(ptop, rd, fn) try: - tags = self.mtag.get(abspath) if sz else {} + tags = self.mtag.get(abspath, self.flags[ptop]) if sz else {} ntags1 = len(tags) parsers = self._get_parsers(ptop, tags, abspath) if self.args.mtag_vv: diff --git a/tests/util.py b/tests/util.py index 931d98f0..bc66512c 100644 --- a/tests/util.py +++ b/tests/util.py @@ -164,7 +164,7 @@ class Cfg(Namespace): ex = "ctl_re db_act forget_ip idp_cookie idp_store k304 loris no304 nosubtle qr_pin qr_wait re_maxage rproxy rsp_jtr rsp_slp s_wr_slp snap_wri theme themes turbo u2ow zipmaxn zipmaxs" ka.update(**{k: 0 for k in ex.split()}) - ex = "ah_alg bname chdir chmod_f chpw_db doctitle df epilogues exit favico ipa ipar html_head html_head_d html_head_s idp_login idp_logout lg_sba lg_sbf log_date log_fk md_sba md_sbf name og_desc og_site og_th og_title og_title_a og_title_v og_title_i opds_exts preadmes prologues readmes shr tcolor textfiles txt_eol ufavico ufavico_h unlist vname xff_src zipmaxt R RS SR" + ex = "ah_alg bname chdir chmod_f chpw_db db_xattr doctitle df epilogues exit favico ipa ipar html_head html_head_d html_head_s idp_login idp_logout lg_sba lg_sbf log_date log_fk md_sba md_sbf name og_desc og_site og_th og_title og_title_a og_title_v og_title_i opds_exts preadmes prologues readmes shr tcolor textfiles txt_eol ufavico ufavico_h unlist vname xff_src zipmaxt R RS SR" ka.update(**{k: "" for k in ex.split()}) ex = "apnd_who ban_403 ban_404 ban_422 ban_pw ban_pwc ban_url dont_ban cachectl http_vary rss_fmt_d rss_fmt_t spinner"