From d6bf300d80a0385700c8a77dd1927db786b98b63 Mon Sep 17 00:00:00 2001 From: ed Date: Thu, 10 Jun 2021 01:27:04 +0200 Subject: [PATCH] option to store state out-of-volume (mostly untested) --- README.md | 12 +++++++++ copyparty/__main__.py | 1 + copyparty/authsrv.py | 54 +++++++++++++++++++++++++++++++++++++++-- copyparty/broker_mpw.py | 4 +++ copyparty/broker_thr.py | 4 +++ copyparty/httpconn.py | 2 +- copyparty/httpsrv.py | 2 +- copyparty/svchub.py | 4 +-- copyparty/th_cli.py | 5 ++-- copyparty/th_srv.py | 33 ++++++++++++++----------- copyparty/u2idx.py | 17 ++++++------- copyparty/up2k.py | 24 ++++++++++-------- tests/test_httpcli.py | 1 + tests/test_vfs.py | 4 +-- 14 files changed, 123 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 7329e9d4..0c5dd2fd 100644 --- a/README.md +++ b/README.md @@ -301,6 +301,18 @@ the same arguments can be set as volume flags, in addition to `d2d` and `d2t` fo the rescan button in the admin panel has no effect unless the volume has `-e2ds` or higher +## database location + +copyparty creates a subfolder named `.hist` inside each volume where it stores the database, thumbnails, and some other stuff + +this can instead be kept in a single place using the `--hist` argument, or the `hist=` volume flag, or a mix of both: +* `--hist ~/.cache/copyparty -v ~/music::r:chist=-` sets `~/.cache/copyparty` as the default place to put volume info, but `~/music` gets the regular `.hist` subfolder (`-` restores default behavior) + +btw, +* markdown edits are always stored in a local `.hist` subdirectory +* on windows the volflag path is cyglike, so `/c/temp` means `C:\temp` + + ## metadata from audio files `-mte` decides which tags to index and display in the browser (and also the display order), this can be changed per-volume: diff --git a/copyparty/__main__.py b/copyparty/__main__.py index e9540e9c..6c2a369d 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -285,6 +285,7 @@ def run_argparse(argv, formatter): ap2.add_argument("-e2t", action="store_true", help="enable metadata indexing") ap2.add_argument("-e2ts", action="store_true", help="enable metadata scanner, sets -e2t") ap2.add_argument("-e2tsr", action="store_true", help="rescan all metadata, sets -e2ts") + ap2.add_argument("--hist", metavar="PATH", type=str, help="where to store volume state") ap2.add_argument("--no-mutagen", action="store_true", help="use ffprobe for tags instead") ap2.add_argument("--no-mtag-mt", action="store_true", help="disable tag-read parallelism") ap2.add_argument("-mtm", metavar="M=t,t,t", action="append", type=str, help="add/replace metadata mapping") diff --git a/copyparty/authsrv.py b/copyparty/authsrv.py index 25aa905b..48c43da8 100644 --- a/copyparty/authsrv.py +++ b/copyparty/authsrv.py @@ -5,6 +5,8 @@ import re import os import sys import stat +import base64 +import hashlib import threading from .__init__ import WINDOWS @@ -22,7 +24,14 @@ class VFS(object): self.uadm = uadm # users who are regular admins self.flags = flags # config switches self.nodes = {} # child nodes - self.all_vols = {vpath: self} if realpath else {} # flattened recursive + self.histtab = None # all realpath->histpath + + if realpath: + self.histpath = os.path.join(realpath, ".hist") # db / thumbcache + self.all_vols = {vpath: self} # flattened recursive + else: + self.histpath = None + self.all_vols = {} def __repr__(self): return "VFS({})".format( @@ -273,7 +282,8 @@ class AuthSrv(object): self.reload() def log(self, msg, c=0): - self.log_func("auth", msg, c) + if self.log_func: + self.log_func("auth", msg, c) def laggy_iter(self, iterable): """returns [value,isFinalValue]""" @@ -467,6 +477,46 @@ class AuthSrv(object): ) raise Exception("invalid config") + for vol in vfs.all_vols.values(): + hid = hashlib.sha512(fsenc(vol.realpath)).digest() + hid = base64.b32encode(hid).decode("ascii").lower() + vflag = vol.flags.get("hist") + if vflag == "-": + pass + elif vflag: + if WINDOWS and vflag.startswith("/"): + vflag = "{}:\\{}".format(vflag[1], vflag[3:]) + vol.histpath = vflag + elif self.args.hist: + for nch in range(len(hid)): + hpath = os.path.join(self.args.hist, hid[: nch + 1]) + try: + os.makedirs(hpath) + except: + pass + + powner = os.path.join(hpath, "owner.txt") + try: + with open(powner, "rb") as f: + owner = f.read().rstrip() + except: + owner = None + + me = fsenc(vol.realpath).rstrip() + if owner not in [None, me]: + continue + + if owner is None: + with open(powner, "wb") as f: + f.write(me) + + vol.histpath = hpath + break + + vol.histpath = os.path.realpath(vol.histpath) + + vfs.histtab = {v.realpath: v.histpath for v in vfs.all_vols.values()} + all_mte = {} errors = False for vol in vfs.all_vols.values(): diff --git a/copyparty/broker_mpw.py b/copyparty/broker_mpw.py index 6dd03627..8e27a5a9 100644 --- a/copyparty/broker_mpw.py +++ b/copyparty/broker_mpw.py @@ -1,5 +1,6 @@ # coding: utf-8 from __future__ import print_function, unicode_literals +from copyparty.authsrv import AuthSrv import sys import time @@ -34,6 +35,9 @@ class MpWorker(object): if not FAKE_MP: signal.signal(signal.SIGINT, self.signal_handler) + # starting to look like a good idea + self.authsrv = AuthSrv(args, None, False) + # instantiate all services here (TODO: inheritance?) self.httpsrv = HttpSrv(self, True) self.httpsrv.disconnect_func = self.httpdrop diff --git a/copyparty/broker_thr.py b/copyparty/broker_thr.py index 643f1f65..b98e336f 100644 --- a/copyparty/broker_thr.py +++ b/copyparty/broker_thr.py @@ -3,6 +3,7 @@ from __future__ import print_function, unicode_literals import threading +from .authsrv import AuthSrv from .httpsrv import HttpSrv from .broker_util import ExceptionalQueue, try_exec @@ -17,6 +18,9 @@ class BrokerThr(object): self.mutex = threading.Lock() + # starting to look like a good idea + self.authsrv = AuthSrv(self.args, None, False) + # instantiate all services here (TODO: inheritance?) self.httpsrv = HttpSrv(self) self.httpsrv.disconnect_func = self.httpdrop diff --git a/copyparty/httpconn.py b/copyparty/httpconn.py index cb2162dd..59fe9d40 100644 --- a/copyparty/httpconn.py +++ b/copyparty/httpconn.py @@ -71,7 +71,7 @@ class HttpConn(object): def get_u2idx(self): if not self.u2idx: - self.u2idx = U2idx(self.args, self.log_func) + self.u2idx = U2idx(self.args, self.log_func, self.auth.vfs) return self.u2idx diff --git a/copyparty/httpsrv.py b/copyparty/httpsrv.py index 6f3375fd..a757e3ae 100644 --- a/copyparty/httpsrv.py +++ b/copyparty/httpsrv.py @@ -40,6 +40,7 @@ class HttpSrv(object): self.is_mp = is_mp self.args = broker.args self.log = broker.log + self.auth = broker.authsrv self.disconnect_func = None self.mutex = threading.Lock() @@ -47,7 +48,6 @@ class HttpSrv(object): self.clients = {} self.workload = 0 self.workload_thr_alive = False - self.auth = AuthSrv(self.args, self.log) env = jinja2.Environment() env.loader = jinja2.FileSystemLoader(os.path.join(E.mod, "web")) diff --git a/copyparty/svchub.py b/copyparty/svchub.py index 3e123efc..654ec2c6 100644 --- a/copyparty/svchub.py +++ b/copyparty/svchub.py @@ -44,7 +44,7 @@ class SvcHub(object): # initiate all services to manage self.tcpsrv = TcpSrv(self) - self.up2k = Up2k(self, auth.vfs.all_vols) + self.up2k = Up2k(self, auth.vfs) self.thumbsrv = None if not args.no_thumb: @@ -54,7 +54,7 @@ class SvcHub(object): msg = "setting --th-no-webp because either libwebp is not available or your Pillow is too old" self.log("thumb", msg, c=3) - self.thumbsrv = ThumbSrv(self, auth.vfs.all_vols) + self.thumbsrv = ThumbSrv(self, auth.vfs) else: msg = "need Pillow to create thumbnails; for example:\n{}{} -m pip install --user Pillow\n" self.log( diff --git a/copyparty/th_cli.py b/copyparty/th_cli.py index fa9285fc..18b04b31 100644 --- a/copyparty/th_cli.py +++ b/copyparty/th_cli.py @@ -2,7 +2,6 @@ from __future__ import print_function, unicode_literals import os -import time from .util import Cooldown from .th_srv import thumb_path, THUMBABLE, FMT_FF @@ -12,6 +11,7 @@ class ThumbCli(object): def __init__(self, broker): self.broker = broker self.args = broker.args + self.hist = broker.authsrv.vfs.histtab # cache on both sides for less broker spam self.cooldown = Cooldown(self.args.th_poke) @@ -30,7 +30,8 @@ class ThumbCli(object): if fmt == "w" and self.args.th_no_webp: fmt = "j" - tpath = thumb_path(ptop, rem, mtime, fmt) + hist = self.hist[ptop] + tpath = thumb_path(hist, rem, mtime, fmt) ret = None try: st = os.stat(tpath) diff --git a/copyparty/th_srv.py b/copyparty/th_srv.py index 138f4ac6..b3540a15 100644 --- a/copyparty/th_srv.py +++ b/copyparty/th_srv.py @@ -74,7 +74,7 @@ if HAVE_FFMPEG and HAVE_FFPROBE: THUMBABLE.update(FMT_FF) -def thumb_path(ptop, rem, mtime, fmt): +def thumb_path(hist, rem, mtime, fmt): # base16 = 16 = 256 # b64-lc = 38 = 1444 # base64 = 64 = 4096 @@ -95,15 +95,16 @@ def thumb_path(ptop, rem, mtime, fmt): h = hashlib.sha512(fsenc(fn)).digest()[:24] fn = base64.urlsafe_b64encode(h).decode("ascii")[:24] - return "{}/.hist/th/{}/{}.{:x}.{}".format( - ptop, rd, fn, int(mtime), "webp" if fmt == "w" else "jpg" + return "{}/th/{}/{}.{:x}.{}".format( + hist, rd, fn, int(mtime), "webp" if fmt == "w" else "jpg" ) class ThumbSrv(object): - def __init__(self, hub, vols): + def __init__(self, hub, vfs): self.hub = hub - self.vols = [v.realpath for v in vols.values()] + self.vols = [v.realpath for v in vfs.all_vols.values()] + self.hist = vfs.histtab self.args = hub.args self.log_func = hub.log @@ -153,7 +154,8 @@ class ThumbSrv(object): return not self.nthr def get(self, ptop, rem, mtime, fmt): - tpath = thumb_path(ptop, rem, mtime, fmt) + hist = self.hist[ptop] + tpath = thumb_path(hist, rem, mtime, fmt) abspath = os.path.join(ptop, rem) cond = threading.Condition() with self.mutex: @@ -319,26 +321,29 @@ class ThumbSrv(object): interval = self.args.th_clean while True: time.sleep(interval) - for vol in self.vols: - vol += "/.hist/th" - self.log("\033[Jcln {}/\033[A".format(vol)) - self.clean(vol) + for vol, hist in self.hist.items(): + if hist.startswith(vol): + self.log("\033[Jcln {}/\033[A".format(hist)) + else: + self.log("\033[Jcln {} ({})/\033[A".format(hist, vol)) + + self.clean(hist) self.log("\033[Jcln ok") - def clean(self, vol): - # self.log("cln {}".format(vol)) + def clean(self, hist): + # self.log("cln {}".format(hist)) maxage = self.args.th_maxage now = time.time() prev_b64 = None prev_fp = None try: - ents = os.listdir(vol) + ents = os.listdir(hist) except: return for f in sorted(ents): - fp = os.path.join(vol, f) + fp = os.path.join(hist, f) cmp = fp.lower().replace("\\", "/") # "top" or b64 prefix/full (a folder) diff --git a/copyparty/u2idx.py b/copyparty/u2idx.py index 1e2c3612..00230011 100644 --- a/copyparty/u2idx.py +++ b/copyparty/u2idx.py @@ -7,7 +7,7 @@ import time import threading from datetime import datetime -from .util import u8safe, s3dec, html_escape, Pebkac +from .util import s3dec, Pebkac from .up2k import up2k_wark_from_hashlist @@ -19,9 +19,11 @@ except: class U2idx(object): - def __init__(self, args, log_func): + def __init__(self, args, log_func, vfs): self.args = args self.log_func = log_func + self.vfs = vfs + self.timeout = args.srch_time if not HAVE_SQLITE3: @@ -60,10 +62,11 @@ class U2idx(object): if cur: return cur - cur = _open(ptop) - if not cur: + db_path = os.path.join(self.vfs.histtab[ptop], "up2k.db") + if not os.path.exists(db_path): return None + cur = sqlite3.connect(db_path).cursor() self.cur[ptop] = cur return cur @@ -262,9 +265,3 @@ class U2idx(object): if identifier == self.active_id: self.active_cur.connection.interrupt() - - -def _open(ptop): - db_path = os.path.join(ptop, ".hist", "up2k.db") - if os.path.exists(db_path): - return sqlite3.connect(db_path).cursor() diff --git a/copyparty/up2k.py b/copyparty/up2k.py index 2b2e4763..cf321a11 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -48,8 +48,10 @@ class Up2k(object): * ~/.config flatfiles for active jobs """ - def __init__(self, hub, all_vols): + def __init__(self, hub, vfs): self.hub = hub + self.vfs = vfs + # TODO stop passing around vfs, do auth or broker instead self.args = hub.args self.log_func = hub.log @@ -94,10 +96,12 @@ class Up2k(object): self.log("could not initialize sqlite3, will use in-memory registry only") if self.args.no_fastboot: - self.deferred_init(all_vols) + self.deferred_init(vfs.all_vols) else: t = threading.Thread( - target=self.deferred_init, args=(all_vols,), name="up2k-deferred-init" + target=self.deferred_init, + args=(vfs.all_vols,), + name="up2k-deferred-init", ) t.daemon = True t.start() @@ -294,7 +298,7 @@ class Up2k(object): return have_e2d def register_vpath(self, ptop, flags): - db_path = os.path.join(ptop, ".hist", "up2k.db") + db_path = os.path.join(self.vfs.histtab[ptop], "up2k.db") if ptop in self.registry: try: return [self.cur[ptop], db_path] @@ -314,7 +318,7 @@ class Up2k(object): self.log(" ".join(sorted(a)) + "\033[0m") reg = {} - path = os.path.join(ptop, ".hist", "up2k.snap") + path = os.path.join(self.vfs.histtab[ptop], "up2k.snap") if "e2d" in flags and os.path.exists(path): with gzip.GzipFile(path, "rb") as f: j = f.read().decode("utf-8") @@ -338,7 +342,7 @@ class Up2k(object): return None try: - os.mkdir(os.path.join(ptop, ".hist")) + os.makedirs(self.vfs.histtab[ptop]) except: pass @@ -379,7 +383,7 @@ class Up2k(object): def _build_dir(self, dbw, top, excl, cdir): self.pp.msg = "a{} {}".format(self.pp.n, cdir) - histdir = os.path.join(top, ".hist") + histdir = self.vfs.histtab[top] ret = 0 g = statdir(self.log, not self.args.no_scandir, False, cdir) for iname, inf in sorted(g): @@ -928,7 +932,7 @@ class Up2k(object): def _create_v3(self, cur): """ collision in 2^(n/2) files where n = bits (6 bits/ch) - 10*6/2 = 2^30 = 1'073'741'824, 24.1mb idx + 10*6/2 = 2^30 = 1'073'741'824, 24.1mb idx 1<<(3*10) 12*6/2 = 2^36 = 68'719'476'736, 24.8mb idx 16*6/2 = 2^48 = 281'474'976'710'656, 26.1mb idx """ @@ -1366,7 +1370,7 @@ class Up2k(object): except: pass - path = os.path.join(k, ".hist", "up2k.snap") + path = os.path.join(self.vfs.histtab[k], "up2k.snap") if not reg: if k not in prev or prev[k] is not None: prev[k] = None @@ -1380,7 +1384,7 @@ class Up2k(object): return try: - os.mkdir(os.path.join(k, ".hist")) + os.makedirs(self.vfs.histtab[k]) except: pass diff --git a/tests/test_httpcli.py b/tests/test_httpcli.py index 7e740763..e99fed81 100644 --- a/tests/test_httpcli.py +++ b/tests/test_httpcli.py @@ -37,6 +37,7 @@ class Cfg(Namespace): nih=True, mtp=[], mte="a", + hist=None, **{k: False for k in "e2d e2ds e2dsa e2t e2ts e2tsr".split()} ) diff --git a/tests/test_vfs.py b/tests/test_vfs.py index 23fe5919..0936a317 100644 --- a/tests/test_vfs.py +++ b/tests/test_vfs.py @@ -18,8 +18,8 @@ from copyparty import util class Cfg(Namespace): def __init__(self, a=[], v=[], c=None): ex = {k: False for k in "e2d e2ds e2dsa e2t e2ts e2tsr".split()} - ex["mtp"] = [] - ex["mte"] = "a" + ex2 = {"mtp": [], "mte": "a", "hist": None} + ex.update(ex2) super(Cfg, self).__init__(a=a, v=v, c=c, **ex)