separate histpath and dbpath options (#149)

the up2k databases are, by default, stored in a `.hist` subfolder
inside each volume, next to thumbnails and transcoded audio

add a new option for storing the databases in a separate location,
making it possible to tune the underlying filesystem for optimal
performance characteristics

the `--hist` global-option and `hist` volflag still behave like
before, but `--dbpath` and volflag `dbpath` will override the
histpath for the up2k-db and up2k-snap exclusivey
This commit is contained in:
ed 2025-03-30 16:08:28 +00:00
parent 19ee64e5e3
commit e1b9ac631f
9 changed files with 87 additions and 19 deletions

View file

@ -331,7 +331,8 @@ roughly sorted by chance of encounter
* `--th-ff-jpg` may fix video thumbnails on some FFmpeg versions (macos, some linux) * `--th-ff-jpg` may fix video thumbnails on some FFmpeg versions (macos, some linux)
* `--th-ff-swr` may fix audio thumbnails on some FFmpeg versions * `--th-ff-swr` may fix audio thumbnails on some FFmpeg versions
* if the `up2k.db` (filesystem index) is on a samba-share or network disk, you'll get unpredictable behavior if the share is disconnected for a bit * if the `up2k.db` (filesystem index) is on a samba-share or network disk, you'll get unpredictable behavior if the share is disconnected for a bit
* use `--hist` or the `hist` volflag (`-v [...]:c,hist=/tmp/foo`) to place the db on a local disk instead * use `--hist` or the `hist` volflag (`-v [...]:c,hist=/tmp/foo`) to place the db and thumbnails on a local disk instead
* or, if you only want to move the db (and not the thumbnails), then use `--dbpath` or the `dbpath` volflag
* all volumes must exist / be available on startup; up2k (mtp especially) gets funky otherwise * all volumes must exist / be available on startup; up2k (mtp especially) gets funky otherwise
* probably more, pls let me know * probably more, pls let me know
@ -384,7 +385,8 @@ same order here too
* this is an msys2 bug, the regular windows edition of python is fine * this is an msys2 bug, the regular windows edition of python is fine
* VirtualBox: sqlite throws `Disk I/O Error` when running in a VM and the up2k database is in a vboxsf * VirtualBox: sqlite throws `Disk I/O Error` when running in a VM and the up2k database is in a vboxsf
* use `--hist` or the `hist` volflag (`-v [...]:c,hist=/tmp/foo`) to place the db inside the vm instead * use `--hist` or the `hist` volflag (`-v [...]:c,hist=/tmp/foo`) to place the db and thumbnails inside the vm instead
* or, if you only want to move the db (and not the thumbnails), then use `--dbpath` or the `dbpath` volflag
* also happens on mergerfs, so put the db elsewhere * also happens on mergerfs, so put the db elsewhere
* Ubuntu: dragging files from certain folders into firefox or chrome is impossible * Ubuntu: dragging files from certain folders into firefox or chrome is impossible
@ -1594,6 +1596,8 @@ copyparty creates a subfolder named `.hist` inside each volume where it stores t
this can instead be kept in a single place using the `--hist` argument, or the `hist=` volflag, or a mix of both: this can instead be kept in a single place using the `--hist` argument, or the `hist=` volflag, or a mix of both:
* `--hist ~/.cache/copyparty -v ~/music::r:c,hist=-` sets `~/.cache/copyparty` as the default place to put volume info, but `~/music` gets the regular `.hist` subfolder (`-` restores default behavior) * `--hist ~/.cache/copyparty -v ~/music::r:c,hist=-` sets `~/.cache/copyparty` as the default place to put volume info, but `~/music` gets the regular `.hist` subfolder (`-` restores default behavior)
by default, the per-volume `up2k.db` sqlite3-database for `-e2d` and `-e2t` is stored next to the thumbnails according to the `--hist` option, but the global-option `--dbpath` and/or volflag `dbpath` can be used to put the database somewhere else
note: note:
* putting the hist-folders on an SSD is strongly recommended for performance * putting the hist-folders on an SSD is strongly recommended for performance
* markdown edits are always stored in a local `.hist` subdirectory * markdown edits are always stored in a local `.hist` subdirectory

View file

@ -1393,6 +1393,7 @@ def add_db_general(ap, hcores):
ap2.add_argument("-e2vu", action="store_true", help="on hash mismatch: update the database with the new hash") ap2.add_argument("-e2vu", action="store_true", help="on hash mismatch: update the database with the new hash")
ap2.add_argument("-e2vp", action="store_true", help="on hash mismatch: panic and quit copyparty") ap2.add_argument("-e2vp", action="store_true", help="on hash mismatch: panic and quit copyparty")
ap2.add_argument("--hist", metavar="PATH", type=u, default="", help="where to store volume data (db, thumbs); default is a folder named \".hist\" inside each volume (volflag=hist)") ap2.add_argument("--hist", metavar="PATH", type=u, default="", help="where to store volume data (db, thumbs); default is a folder named \".hist\" inside each volume (volflag=hist)")
ap2.add_argument("--dbpath", metavar="PATH", type=u, default="", help="override where the volume databases are to be placed; default is the same as \033[33m--hist\033[0m (volflag=dbpath)")
ap2.add_argument("--no-hash", metavar="PTN", type=u, default="", help="regex: disable hashing of matching absolute-filesystem-paths during e2ds folder scans (volflag=nohash)") ap2.add_argument("--no-hash", metavar="PTN", type=u, default="", help="regex: disable hashing of matching absolute-filesystem-paths during e2ds folder scans (volflag=nohash)")
ap2.add_argument("--no-idx", metavar="PTN", type=u, default=noidx, help="regex: disable indexing of matching absolute-filesystem-paths during e2ds folder scans (volflag=noidx)") ap2.add_argument("--no-idx", metavar="PTN", type=u, default=noidx, help="regex: disable indexing of matching absolute-filesystem-paths during e2ds folder scans (volflag=noidx)")
ap2.add_argument("--no-dirsz", action="store_true", help="do not show total recursive size of folders in listings, show inode size instead; slightly faster (volflag=nodirsz)") ap2.add_argument("--no-dirsz", action="store_true", help="do not show total recursive size of folders in listings, show inode size instead; slightly faster (volflag=nodirsz)")

View file

@ -360,6 +360,7 @@ class VFS(object):
self.badcfg1 = False self.badcfg1 = False
self.nodes: dict[str, VFS] = {} # child nodes self.nodes: dict[str, VFS] = {} # child nodes
self.histtab: dict[str, str] = {} # all realpath->histpath self.histtab: dict[str, str] = {} # all realpath->histpath
self.dbpaths: dict[str, str] = {} # all realpath->dbpath
self.dbv: Optional[VFS] = None # closest full/non-jump parent self.dbv: Optional[VFS] = None # closest full/non-jump parent
self.lim: Optional[Lim] = None # upload limits; only set for dbv self.lim: Optional[Lim] = None # upload limits; only set for dbv
self.shr_src: Optional[tuple[VFS, str]] = None # source vfs+rem of a share self.shr_src: Optional[tuple[VFS, str]] = None # source vfs+rem of a share
@ -381,12 +382,13 @@ class VFS(object):
rp = realpath + ("" if realpath.endswith(os.sep) else os.sep) rp = realpath + ("" if realpath.endswith(os.sep) else os.sep)
vp = vpath + ("/" if vpath else "") vp = vpath + ("/" if vpath else "")
self.histpath = os.path.join(realpath, ".hist") # db / thumbcache self.histpath = os.path.join(realpath, ".hist") # db / thumbcache
self.dbpath = self.histpath
self.all_vols = {vpath: self} # flattened recursive self.all_vols = {vpath: self} # flattened recursive
self.all_nodes = {vpath: self} # also jumpvols/shares self.all_nodes = {vpath: self} # also jumpvols/shares
self.all_aps = [(rp, self)] self.all_aps = [(rp, self)]
self.all_vps = [(vp, self)] self.all_vps = [(vp, self)]
else: else:
self.histpath = "" self.histpath = self.dbpath = ""
self.all_vols = {} self.all_vols = {}
self.all_nodes = {} self.all_nodes = {}
self.all_aps = [] self.all_aps = []
@ -461,17 +463,23 @@ class VFS(object):
def _copy_flags(self, name: str) -> dict[str, Any]: def _copy_flags(self, name: str) -> dict[str, Any]:
flags = {k: v for k, v in self.flags.items()} flags = {k: v for k, v in self.flags.items()}
hist = flags.get("hist") hist = flags.get("hist")
if hist and hist != "-": if hist and hist != "-":
zs = "{}/{}".format(hist.rstrip("/"), name) zs = "{}/{}".format(hist.rstrip("/"), name)
flags["hist"] = os.path.expandvars(os.path.expanduser(zs)) flags["hist"] = os.path.expandvars(os.path.expanduser(zs))
dbp = flags.get("dbpath")
if dbp and dbp != "-":
zs = "{}/{}".format(dbp.rstrip("/"), name)
flags["dbpath"] = os.path.expandvars(os.path.expanduser(zs))
return flags return flags
def bubble_flags(self) -> None: def bubble_flags(self) -> None:
if self.dbv: if self.dbv:
for k, v in self.dbv.flags.items(): for k, v in self.dbv.flags.items():
if k not in ["hist"]: if k not in ("hist", "dbpath"):
self.flags[k] = v self.flags[k] = v
for n in self.nodes.values(): for n in self.nodes.values():
@ -1759,7 +1767,7 @@ class AuthSrv(object):
pass pass
elif vflag: elif vflag:
vflag = os.path.expandvars(os.path.expanduser(vflag)) vflag = os.path.expandvars(os.path.expanduser(vflag))
vol.histpath = uncyg(vflag) if WINDOWS else vflag vol.histpath = vol.dbpath = uncyg(vflag) if WINDOWS else vflag
elif self.args.hist: elif self.args.hist:
for nch in range(len(hid)): for nch in range(len(hid)):
hpath = os.path.join(self.args.hist, hid[: nch + 1]) hpath = os.path.join(self.args.hist, hid[: nch + 1])
@ -1780,12 +1788,45 @@ class AuthSrv(object):
with open(powner, "wb") as f: with open(powner, "wb") as f:
f.write(me) f.write(me)
vol.histpath = hpath vol.histpath = vol.dbpath = hpath
break break
vol.histpath = absreal(vol.histpath) vol.histpath = absreal(vol.histpath)
for vol in vfs.all_vols.values():
hid = self.hid_cache[vol.realpath]
vflag = vol.flags.get("dbpath")
if vflag == "-":
pass
elif vflag:
vflag = os.path.expandvars(os.path.expanduser(vflag))
vol.dbpath = uncyg(vflag) if WINDOWS else vflag
elif self.args.dbpath:
for nch in range(len(hid)):
hpath = os.path.join(self.args.dbpath, hid[: nch + 1])
bos.makedirs(hpath)
powner = os.path.join(hpath, "owner.txt")
try:
with open(powner, "rb") as f:
owner = f.read().rstrip()
except:
owner = None
me = afsenc(vol.realpath).rstrip()
if owner not in [None, me]:
continue
if owner is None:
with open(powner, "wb") as f:
f.write(me)
vol.dbpath = hpath
break
vol.dbpath = absreal(vol.dbpath)
if vol.dbv: if vol.dbv:
if bos.path.exists(os.path.join(vol.histpath, "up2k.db")): if bos.path.exists(os.path.join(vol.dbpath, "up2k.db")):
promote.append(vol) promote.append(vol)
vol.dbv = None vol.dbv = None
else: else:
@ -1800,9 +1841,7 @@ class AuthSrv(object):
"\n the following jump-volumes were generated to assist the vfs.\n As they contain a database (probably from v0.11.11 or older),\n they are promoted to full volumes:" "\n the following jump-volumes were generated to assist the vfs.\n As they contain a database (probably from v0.11.11 or older),\n they are promoted to full volumes:"
] ]
for vol in promote: for vol in promote:
ta.append( ta.append(" /%s (%s) (%s)" % (vol.vpath, vol.realpath, vol.dbpath))
" /{} ({}) ({})".format(vol.vpath, vol.realpath, vol.histpath)
)
self.log("\n\n".join(ta) + "\n", c=3) self.log("\n\n".join(ta) + "\n", c=3)
@ -1813,13 +1852,27 @@ class AuthSrv(object):
is_shr = shr and zv.vpath.split("/")[0] == shr is_shr = shr and zv.vpath.split("/")[0] == shr
if histp and not is_shr and histp in rhisttab: if histp and not is_shr and histp in rhisttab:
zv2 = rhisttab[histp] zv2 = rhisttab[histp]
t = "invalid config; multiple volumes share the same histpath (database location):\n histpath: %s\n volume 1: /%s [%s]\n volume 2: %s [%s]" t = "invalid config; multiple volumes share the same histpath (database+thumbnails location):\n histpath: %s\n volume 1: /%s [%s]\n volume 2: %s [%s]"
t = t % (histp, zv2.vpath, zv2.realpath, zv.vpath, zv.realpath) t = t % (histp, zv2.vpath, zv2.realpath, zv.vpath, zv.realpath)
self.log(t, 1) self.log(t, 1)
raise Exception(t) raise Exception(t)
rhisttab[histp] = zv rhisttab[histp] = zv
vfs.histtab[zv.realpath] = histp vfs.histtab[zv.realpath] = histp
rdbpaths = {}
vfs.dbpaths = {}
for zv in vfs.all_vols.values():
dbp = zv.dbpath
is_shr = shr and zv.vpath.split("/")[0] == shr
if dbp and not is_shr and dbp in rdbpaths:
zv2 = rdbpaths[dbp]
t = "invalid config; multiple volumes share the same dbpath (database location):\n dbpath: %s\n volume 1: /%s [%s]\n volume 2: %s [%s]"
t = t % (dbp, zv2.vpath, zv2.realpath, zv.vpath, zv.realpath)
self.log(t, 1)
raise Exception(t)
rdbpaths[dbp] = zv
vfs.dbpaths[zv.realpath] = dbp
for vol in vfs.all_vols.values(): for vol in vfs.all_vols.values():
use = False use = False
for k in ["zipmaxn", "zipmaxs"]: for k in ["zipmaxn", "zipmaxs"]:

View file

@ -205,6 +205,7 @@ flagcats = {
"d2v": "disables file verification, overrides -e2v*", "d2v": "disables file verification, overrides -e2v*",
"d2d": "disables all database stuff, overrides -e2*", "d2d": "disables all database stuff, overrides -e2*",
"hist=/tmp/cdb": "puts thumbnails and indexes at that location", "hist=/tmp/cdb": "puts thumbnails and indexes at that location",
"dbpath=/tmp/cdb": "puts indexes at that location",
"scan=60": "scan for new files every 60sec, same as --re-maxage", "scan=60": "scan for new files every 60sec, same as --re-maxage",
"nohash=\\.iso$": "skips hashing file contents if path matches *.iso", "nohash=\\.iso$": "skips hashing file contents if path matches *.iso",
"noidx=\\.iso$": "fully ignores the contents at paths matching *.iso", "noidx=\\.iso$": "fully ignores the contents at paths matching *.iso",

View file

@ -763,7 +763,7 @@ class SvcHub(object):
vl = [os.path.expandvars(os.path.expanduser(x)) for x in vl] vl = [os.path.expandvars(os.path.expanduser(x)) for x in vl]
setattr(al, k, vl) setattr(al, k, vl)
for k in "lo hist ssl_log".split(" "): for k in "lo hist dbpath ssl_log".split(" "):
vs = getattr(al, k) vs = getattr(al, k)
if vs: if vs:
vs = os.path.expandvars(os.path.expanduser(vs)) vs = os.path.expandvars(os.path.expanduser(vs))

View file

@ -134,9 +134,9 @@ class U2idx(object):
assert sqlite3 # type: ignore # !rm assert sqlite3 # type: ignore # !rm
ptop = vn.realpath ptop = vn.realpath
histpath = self.asrv.vfs.histtab.get(ptop) histpath = self.asrv.vfs.dbpaths.get(ptop)
if not histpath: if not histpath:
self.log("no histpath for %r" % (ptop,)) self.log("no dbpath for %r" % (ptop,))
return None return None
db_path = os.path.join(histpath, "up2k.db") db_path = os.path.join(histpath, "up2k.db")

View file

@ -94,7 +94,7 @@ VF_AFFECTS_INDEXING = set(zsg.split(" "))
SBUSY = "cannot receive uploads right now;\nserver busy with %s.\nPlease wait; the client will retry..." SBUSY = "cannot receive uploads right now;\nserver busy with %s.\nPlease wait; the client will retry..."
HINT_HISTPATH = "you could try moving the database to another location (preferably an SSD or NVME drive) using either the --hist argument (global option for all volumes), or the hist volflag (just for this volume)" HINT_HISTPATH = "you could try moving the database to another location (preferably an SSD or NVME drive) using either the --hist argument (global option for all volumes), or the hist volflag (just for this volume), or, if you want to keep the thumbnails in the current location and only move the database itself, then use --dbpath or volflag dbpath"
NULLSTAT = os.stat_result((0, -1, -1, 0, 0, 0, 0, 0, 0, 0)) NULLSTAT = os.stat_result((0, -1, -1, 0, 0, 0, 0, 0, 0, 0))
@ -1096,9 +1096,9 @@ class Up2k(object):
self, ptop: str, flags: dict[str, Any] self, ptop: str, flags: dict[str, Any]
) -> Optional[tuple["sqlite3.Cursor", str]]: ) -> Optional[tuple["sqlite3.Cursor", str]]:
"""mutex(main,reg) me""" """mutex(main,reg) me"""
histpath = self.vfs.histtab.get(ptop) histpath = self.vfs.dbpaths.get(ptop)
if not histpath: if not histpath:
self.log("no histpath for %r" % (ptop,)) self.log("no dbpath for %r" % (ptop,))
return None return None
db_path = os.path.join(histpath, "up2k.db") db_path = os.path.join(histpath, "up2k.db")
@ -1344,12 +1344,15 @@ class Up2k(object):
] ]
excl += [absreal(x) for x in excl] excl += [absreal(x) for x in excl]
excl += list(self.vfs.histtab.values()) excl += list(self.vfs.histtab.values())
excl += list(self.vfs.dbpaths.values())
if WINDOWS: if WINDOWS:
excl = [x.replace("/", "\\") for x in excl] excl = [x.replace("/", "\\") for x in excl]
else: else:
# ~/.wine/dosdevices/z:/ and such # ~/.wine/dosdevices/z:/ and such
excl.extend(("/dev", "/proc", "/run", "/sys")) excl.extend(("/dev", "/proc", "/run", "/sys"))
excl = list({k: 1 for k in excl})
if self.args.re_dirsz: if self.args.re_dirsz:
db.c.execute("delete from ds") db.c.execute("delete from ds")
db.n += 1 db.n += 1
@ -5102,7 +5105,7 @@ class Up2k(object):
def _snap_reg(self, ptop: str, reg: dict[str, dict[str, Any]]) -> None: def _snap_reg(self, ptop: str, reg: dict[str, dict[str, Any]]) -> None:
now = time.time() now = time.time()
histpath = self.vfs.histtab.get(ptop) histpath = self.vfs.dbpaths.get(ptop)
if not histpath: if not histpath:
return return

View file

@ -1546,6 +1546,12 @@ def vol_san(vols: list["VFS"], txt: bytes) -> bytes:
txt = txt.replace(bap.replace(b"\\", b"\\\\"), bvp) txt = txt.replace(bap.replace(b"\\", b"\\\\"), bvp)
txt = txt.replace(bhp.replace(b"\\", b"\\\\"), bvph) txt = txt.replace(bhp.replace(b"\\", b"\\\\"), bvph)
if vol.histpath != vol.dbpath:
bdp = vol.dbpath.encode("utf-8")
bdph = b"$db(/" + bvp + b")"
txt = txt.replace(bdp, bdph)
txt = txt.replace(bdp.replace(b"\\", b"\\\\"), bdph)
if txt != txt0: if txt != txt0:
txt += b"\r\nNOTE: filepaths sanitized; see serverlog for correct values" txt += b"\r\nNOTE: filepaths sanitized; see serverlog for correct values"

View file

@ -135,7 +135,7 @@ class Cfg(Namespace):
ex = "dav_inf dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip" ex = "dav_inf dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip"
ka.update(**{k: True for k in ex.split()}) ka.update(**{k: True for k in ex.split()})
ex = "ah_cli ah_gen css_browser hist ipu js_browser js_other mime mimes no_forget no_hash no_idx nonsus_urls og_tpl og_ua ua_nodoc ua_nozip" ex = "ah_cli ah_gen css_browser dbpath hist ipu js_browser js_other mime mimes no_forget no_hash no_idx nonsus_urls og_tpl og_ua ua_nodoc ua_nozip"
ka.update(**{k: None for k in ex.split()}) ka.update(**{k: None for k in ex.split()})
ex = "hash_mt hsortn safe_dedup srch_time u2abort u2j u2sz" ex = "hash_mt hsortn safe_dedup srch_time u2abort u2j u2sz"