From edba7fffd31a2f0a5f862bbcbe8b9b99494eec20 Mon Sep 17 00:00:00 2001 From: ed Date: Fri, 25 Jul 2025 18:35:28 +0000 Subject: [PATCH] add landmarks (#182) --- README.md | 6 ++++- copyparty/authsrv.py | 59 ++++++++++++++++++++++++++++++++++++++++---- copyparty/cfg.py | 1 + copyparty/up2k.py | 4 +++ 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5f193349..48c5f6fa 100644 --- a/README.md +++ b/README.md @@ -1605,7 +1605,7 @@ config file example: w: * # anyone can upload here rw: ed # only user "ed" can read-write flags: - e2ds: # filesystem indexing is required for many of these: + e2ds # filesystem indexing is required for many of these: sz: 1k-3m # accept upload only if filesize in this range df: 4g # free disk space cannot go lower than this vmaxb: 1g # volume can never exceed 1 GiB @@ -1662,6 +1662,8 @@ this can instead be kept in a single place using the `--hist` argument, or the ` by default, the per-volume `up2k.db` sqlite3-database for `-e2d` and `-e2t` is stored next to the thumbnails according to the `--hist` option, but the global-option `--dbpath` and/or volflag `dbpath` can be used to put the database somewhere else +if your storage backend is unreliable (NFS or bad HDDs), you can specify one or more "landmarks" to look for before doing anything database-related. A landmark is a file which is always expected to exist inside the volume. This avoids spurious filesystem rescans in the event of an outage. One line per landmark (see example below) + note: * putting the hist-folders on an SSD is strongly recommended for performance * markdown edits are always stored in a local `.hist` subdirectory @@ -1679,6 +1681,8 @@ config file example: flags: hist: - # restore the default (/mnt/nas/pics/.hist/) hist: /mnt/nas/cache/pics/ # can be absolute path + landmark: me.jpg # /mnt/nas/pics/me.jpg must be readable to enable db + landmark: info/a.txt^=ok # and this textfile must start with "ok" ``` diff --git a/copyparty/authsrv.py b/copyparty/authsrv.py index c0f89b1d..7feedc64 100644 --- a/copyparty/authsrv.py +++ b/copyparty/authsrv.py @@ -386,20 +386,20 @@ class VFS(object): self.adot: dict[str, list[str]] = {} self.js_ls = {} self.js_htm = "" + self.all_vols: dict[str, VFS] = {} # flattened recursive + self.all_nodes: dict[str, VFS] = {} # also jumpvols/shares if realpath: rp = realpath + ("" if realpath.endswith(os.sep) else os.sep) vp = vpath + ("/" if vpath else "") self.histpath = os.path.join(realpath, ".hist") # db / thumbcache self.dbpath = self.histpath - self.all_vols = {vpath: self} # flattened recursive - self.all_nodes = {vpath: self} # also jumpvols/shares + self.all_vols[vpath] = self + self.all_nodes[vpath] = self self.all_aps = [(rp, [self])] self.all_vps = [(vp, self)] else: self.histpath = self.dbpath = "" - self.all_vols = {} - self.all_nodes = {} self.all_aps = [] self.all_vps = [] @@ -868,6 +868,53 @@ class VFS(object): return self + def check_landmarks(self) -> bool: + if self.dbv: + return True + + vps = self.flags.get("landmark") or [] + if not vps: + return True + + failed = "" + for vp in vps: + if "^=" in vp: + vp, zs = vp.split("^=", 1) + expect = zs.encode("utf-8") + else: + expect = b"" + + if self.log: + t = "checking [/%s] landmark [%s]" + self.log("vfs", t % (self.vpath, vp), 6) + + ap = "?" + try: + ap = self.canonical(vp) + with open(ap, "rb") as f: + buf = f.read(4096) + if not buf.startswith(expect): + t = "file [%s] does not start with the expected bytes %s" + failed = t % (ap, expect) + break + except Exception as ex: + t = "%r while trying to read [%s] => [%s]" + failed = t % (ex, vp, ap) + break + + if not failed: + return True + + if self.log: + t = "WARNING: landmark verification failed; %s; will now disable up2k database for volume [/%s]" + self.log("vfs", t % (failed, self.vpath), 3) + + for rm in "e2d e2t e2v".split(): + self.flags = {k: v for k, v in self.flags.items() if not k.startswith(rm)} + self.flags["d2d"] = True + self.flags["d2t"] = True + return False + if WINDOWS: re_vol = re.compile(r"^([a-zA-Z]:[\\/][^:]*|[^:]*):([^:]*):(.*)$") @@ -1501,7 +1548,7 @@ class AuthSrv(object): flags[name] = True return - zs = "ext_th mtp on403 on404 xbu xau xiu xbc xac xbr xar xbd xad xm xban" + zs = "ext_th landmark mtp on403 on404 xbu xau xiu xbc xac xbr xar xbd xad xm xban" if name not in zs.split(): if value is True: t = "└─add volflag [{}] = {} ({})" @@ -2237,6 +2284,8 @@ class AuthSrv(object): t = "WARNING: volume [/%s]: invalid value specified for ext-th: %s" self.log(t % (vol.vpath, etv), 3) + vol.check_landmarks() + # d2d drops all database features for a volume for grp, rm in [["d2d", "e2d"], ["d2t", "e2t"], ["d2d", "e2v"]]: if not vol.flags.get(grp, False): diff --git a/copyparty/cfg.py b/copyparty/cfg.py index 24f748bc..640259ed 100644 --- a/copyparty/cfg.py +++ b/copyparty/cfg.py @@ -222,6 +222,7 @@ flagcats = { "d2d": "disables all database stuff, overrides -e2*", "hist=/tmp/cdb": "puts thumbnails and indexes at that location", "dbpath=/tmp/cdb": "puts indexes at that location", + "landmark=foo": "disable db if file foo doesn't exist", "scan=60": "scan for new files every 60sec, same as --re-maxage", "nohash=\\.iso$": "skips hashing file contents if path matches *.iso", "noidx=\\.iso$": "fully ignores the contents at paths matching *.iso", diff --git a/copyparty/up2k.py b/copyparty/up2k.py index 78b83389..42b44f79 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -1379,6 +1379,10 @@ class Up2k(object): t = "volume /%s at [%s] is empty; will not be indexed as this could be due to an offline filesystem" self.log(t % (vol.vpath, rtop), 6) return True, False + if not vol.check_landmarks(): + t = "volume /%s at [%s] will not be indexed due to bad landmarks" + self.log(t % (vol.vpath, rtop), 6) + return True, False n_add, _, _ = self._build_dir( db,