From 660705a4369d48227ba8361e347c79ed574a9ca6 Mon Sep 17 00:00:00 2001 From: ed Date: Wed, 27 Jul 2022 11:48:47 +0200 Subject: [PATCH] defer volume reindexing on db activity --- README.md | 12 +++++++++++ copyparty/__main__.py | 1 + copyparty/up2k.py | 47 ++++++++++++++++++++++++++++++++++++------- docs/notes.sh | 2 +- 4 files changed, 54 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 0de1a8e3..d84c586c 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,8 @@ try the **[read-only demo server](https://a.ocv.me/pub/demo/)** 👀 running fro * [server config](#server-config) - using arguments or config files, or a mix of both * [ftp-server](#ftp-server) - an FTP server can be started using `--ftp 3921` * [file indexing](#file-indexing) + * [exclude-patterns](#exclude-patterns) + * [periodic rescan](#periodic-rescan) - filesystem monitoring; * [upload rules](#upload-rules) - set upload rules using volume flags * [compress uploads](#compress-uploads) - files can be autocompressed on upload * [database location](#database-location) - in-volume (`.hist/up2k.db`, default) or somewhere else @@ -681,6 +683,8 @@ note: * `e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and `e2ts` would then reindex those, unless there is a new copyparty version with new parsers and the release note says otherwise * the rescan button in the admin panel has no effect unless the volume has `-e2ds` or higher +### exclude-patterns + to save some time, you can provide a regex pattern for filepaths to only index by filename/path/size/last-modified (and not the hash of the file contents) by setting `--no-hash \.iso$` or the volume-flag `:c,nohash=\.iso$`, this has the following consequences: * initial indexing is way faster, especially when the volume is on a network disk * makes it impossible to [file-search](#file-search) @@ -690,6 +694,14 @@ similarly, you can fully ignore files/folders using `--no-idx [...]` and `:c,noi if you set `--no-hash [...]` globally, you can enable hashing for specific volumes using flag `:c,nohash=` +### periodic rescan + +filesystem monitoring; if copyparty is not the only software doing stuff on your filesystem, you may want to enable periodic rescans to keep the index up to date + +argument `--re-maxage 60` will rescan all volumes every 60 sec, same as volflag `:c,scan=60` to specify it per-volume + +uploads are disabled while a rescan is happening, so rescans will be delayed by `--db-act` (default 10 sec) when there is write-activity going on (uploads, renames, ...) + ## upload rules diff --git a/copyparty/__main__.py b/copyparty/__main__.py index e5807ddb..79d7f3bc 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -596,6 +596,7 @@ def run_argparse(argv: list[str], formatter: Any, retry: bool) -> argparse.Names ap2.add_argument("--no-hash", metavar="PTN", type=u, help="regex: disable hashing of matching paths during e2ds folder scans") ap2.add_argument("--no-idx", metavar="PTN", type=u, help="regex: disable indexing of matching paths during e2ds folder scans") ap2.add_argument("--re-maxage", metavar="SEC", type=int, default=0, help="disk rescan volume interval, 0=off, can be set per-volume with the 'scan' volflag") + ap2.add_argument("--db-act", metavar="SEC", type=float, default=10, help="defer any scheduled volume reindexing until SEC seconds after last db write (uploads, renames, ...)") ap2.add_argument("--srch-time", metavar="SEC", type=int, default=30, help="search deadline -- terminate searches running for more than SEC seconds") ap2.add_argument("--srch-hits", metavar="N", type=int, default=7999, help="max search results to allow clients to fetch; 125 results will be shown initially") diff --git a/copyparty/up2k.py b/copyparty/up2k.py index 07c2d7f8..6beedf1c 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -107,6 +107,7 @@ class Up2k(object): self.pp: Optional[ProgressPrinter] = None self.rescan_cond = threading.Condition() self.need_rescan: set[str] = set() + self.db_act = 0.0 self.registry: dict[str, dict[str, dict[str, Any]]] = {} self.flags: dict[str, dict[str, Any]] = {} @@ -262,10 +263,15 @@ class Up2k(object): continue if self.pp: - cooldown = now + 5 + cooldown = now + 1 continue - timeout = now + 9001 + if self.args.no_lifetime: + timeout = now + 9001 + else: + # important; not deferred by db_act + timeout = self._check_lifetimes() + with self.mutex: for vp, vol in sorted(self.asrv.vfs.all_vols.items()): maxage = vol.flags.get("scan") @@ -281,6 +287,20 @@ class Up2k(object): timeout = min(timeout, deadline) + if self.db_act > now - self.args.db_act: + # recent db activity; defer volume rescan + act_timeout = self.db_act + self.args.db_act + if self.need_rescan: + timeout = now + + if timeout < act_timeout: + timeout = act_timeout + t = "volume rescan deferred {:.1f} sec, due to database activity" + self.log(t.format(timeout - now)) + + continue + + with self.mutex: vols = list(sorted(self.need_rescan)) self.need_rescan.clear() @@ -296,9 +316,10 @@ class Up2k(object): for v in vols: volage[v] = now - if self.args.no_lifetime: - continue - + def _check_lifetimes(self) -> float: + now = time.time() + timeout = now + 9001 + if now: # diff-golf for vp, vol in sorted(self.asrv.vfs.all_vols.items()): lifetime = vol.flags.get("lifetime") if not lifetime: @@ -345,6 +366,8 @@ class Up2k(object): if hits: timeout = min(timeout, now + lifetime - (now - hits[0])) + return timeout + def _vis_job_progress(self, job: dict[str, Any]) -> str: perc = 100 - (len(job["need"]) * 100.0 / len(job["hash"])) path = os.path.join(job["ptop"], job["prel"], job["name"]) @@ -1117,6 +1140,7 @@ class Up2k(object): ) -> int: assert self.pp and self.mtag + flags = self.flags[ptop] mpool: Optional[Queue[Mpqe]] = None if self.mtag.prefer_mt and self.args.mtag_mt > 1: mpool = self._start_mpool() @@ -1140,6 +1164,11 @@ class Up2k(object): if rd.startswith("//") or fn.startswith("//"): rd, fn = s3dec(rd, fn) + if "mtp" in flags: + q = "insert into mt values (?,'t:mtp','a')" + with self.mutex: + cur.execute(q, (w[:16],)) + abspath = os.path.join(ptop, rd, fn) self.pp.msg = "c{} {}".format(nq, abspath) if not mpool: @@ -1670,9 +1699,8 @@ class Up2k(object): self._job_volchk(cj) cj["name"] = sanitize_fn(cj["name"], "", [".prologue.html", ".epilogue.html"]) - cj["poke"] = time.time() + cj["poke"] = now = self.db_act = time.time() wark = self._get_wark(cj) - now = time.time() job = None pdir = djoin(cj["ptop"], cj["prel"]) try: @@ -1932,6 +1960,7 @@ class Up2k(object): self, ptop: str, wark: str, chash: str ) -> tuple[int, list[int], str, float, bool]: with self.mutex: + self.db_act = time.time() job = self.registry[ptop].get(wark) if not job: known = " ".join([x for x in self.registry[ptop].keys()]) @@ -1982,6 +2011,7 @@ class Up2k(object): def confirm_chunk(self, ptop: str, wark: str, chash: str) -> tuple[int, str]: with self.mutex: + self.db_act = time.time() try: job = self.registry[ptop][wark] pdir = os.path.join(job["ptop"], job["prel"]) @@ -2016,6 +2046,7 @@ class Up2k(object): self._finish_upload(ptop, wark) def _finish_upload(self, ptop: str, wark: str) -> None: + self.db_act = time.time() try: job = self.registry[ptop][wark] pdir = os.path.join(job["ptop"], job["prel"]) @@ -2158,6 +2189,7 @@ class Up2k(object): def _handle_rm( self, uname: str, ip: str, vpath: str ) -> tuple[int, list[str], list[str]]: + self.db_act = time.time() try: permsets = [[True, False, False, True]] vn, rem = self.asrv.vfs.get(vpath, uname, *permsets[0]) @@ -2242,6 +2274,7 @@ class Up2k(object): return n_files, ok + ok2, ng + ng2 def handle_mv(self, uname: str, svp: str, dvp: str) -> str: + self.db_act = time.time() svn, srem = self.asrv.vfs.get(svp, uname, True, False, True) svn, srem = svn.get_dbv(srem) sabs = svn.canonical(srem, False) diff --git a/docs/notes.sh b/docs/notes.sh index 7fad74f5..54054214 100644 --- a/docs/notes.sh +++ b/docs/notes.sh @@ -185,7 +185,7 @@ brew install python@2 pip install virtualenv # readme toc -cat README.md | awk 'function pr() { if (!h) {return}; if (/^ *[*!#|]/||!s) {printf "%s\n",h;h=0;return}; if (/.../) {printf "%s - %s\n",h,$0;h=0}; }; /^#/{s=1;pr()} /^#* *(file indexing|install on android|dev env setup|just the sfx|complete release|optional gpl stuff)|`$/{s=0} /^#/{lv=length($1);sub(/[^ ]+ /,"");bab=$0;gsub(/ /,"-",bab); h=sprintf("%" ((lv-1)*4+1) "s [%s](#%s)", "*",$0,bab);next} !h{next} {sub(/ .*/,"");sub(/[:,]$/,"")} {pr()}' > toc; grep -E '^## readme toc' -B1000 -A2 p1; grep -E '^## quickstart' -B2 -A999999 p2; (cat p1; grep quickstart -A1000 README.md; rm p1 p2 toc +cat README.md | awk 'function pr() { if (!h) {return}; if (/^ *[*!#|]/||!s) {printf "%s\n",h;h=0;return}; if (/.../) {printf "%s - %s\n",h,$0;h=0}; }; /^#/{s=1;pr()} /^#* *(file indexing|exclude-patterns|install on android|dev env setup|just the sfx|complete release|optional gpl stuff)|`$/{s=0} /^#/{lv=length($1);sub(/[^ ]+ /,"");bab=$0;gsub(/ /,"-",bab); h=sprintf("%" ((lv-1)*4+1) "s [%s](#%s)", "*",$0,bab);next} !h{next} {sub(/ .*/,"");sub(/[:,]$/,"")} {pr()}' > toc; grep -E '^## readme toc' -B1000 -A2 p1; grep -E '^## quickstart' -B2 -A999999 p2; (cat p1; grep quickstart -A1000 README.md; rm p1 p2 toc # fix firefox phantom breakpoints, # suggestions from bugtracker, doesnt work (debugger is not attachable)