defer volume reindexing on db activity

This commit is contained in:
ed 2022-07-27 11:48:47 +02:00
parent 74a3f97671
commit 660705a436
4 changed files with 54 additions and 8 deletions

View file

@ -57,6 +57,8 @@ try the **[read-only demo server](https://a.ocv.me/pub/demo/)** 👀 running fro
* [server config](#server-config) - using arguments or config files, or a mix of both
* [ftp-server](#ftp-server) - an FTP server can be started using `--ftp 3921`
* [file indexing](#file-indexing)
* [exclude-patterns](#exclude-patterns)
* [periodic rescan](#periodic-rescan) - filesystem monitoring;
* [upload rules](#upload-rules) - set upload rules using volume flags
* [compress uploads](#compress-uploads) - files can be autocompressed on upload
* [database location](#database-location) - in-volume (`.hist/up2k.db`, default) or somewhere else
@ -681,6 +683,8 @@ note:
* `e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and `e2ts` would then reindex those, unless there is a new copyparty version with new parsers and the release note says otherwise
* the rescan button in the admin panel has no effect unless the volume has `-e2ds` or higher
### exclude-patterns
to save some time, you can provide a regex pattern for filepaths to only index by filename/path/size/last-modified (and not the hash of the file contents) by setting `--no-hash \.iso$` or the volume-flag `:c,nohash=\.iso$`, this has the following consequences:
* initial indexing is way faster, especially when the volume is on a network disk
* makes it impossible to [file-search](#file-search)
@ -690,6 +694,14 @@ similarly, you can fully ignore files/folders using `--no-idx [...]` and `:c,noi
if you set `--no-hash [...]` globally, you can enable hashing for specific volumes using flag `:c,nohash=`
### periodic rescan
filesystem monitoring; if copyparty is not the only software doing stuff on your filesystem, you may want to enable periodic rescans to keep the index up to date
argument `--re-maxage 60` will rescan all volumes every 60 sec, same as volflag `:c,scan=60` to specify it per-volume
uploads are disabled while a rescan is happening, so rescans will be delayed by `--db-act` (default 10 sec) when there is write-activity going on (uploads, renames, ...)
## upload rules

View file

@ -596,6 +596,7 @@ def run_argparse(argv: list[str], formatter: Any, retry: bool) -> argparse.Names
ap2.add_argument("--no-hash", metavar="PTN", type=u, help="regex: disable hashing of matching paths during e2ds folder scans")
ap2.add_argument("--no-idx", metavar="PTN", type=u, help="regex: disable indexing of matching paths during e2ds folder scans")
ap2.add_argument("--re-maxage", metavar="SEC", type=int, default=0, help="disk rescan volume interval, 0=off, can be set per-volume with the 'scan' volflag")
ap2.add_argument("--db-act", metavar="SEC", type=float, default=10, help="defer any scheduled volume reindexing until SEC seconds after last db write (uploads, renames, ...)")
ap2.add_argument("--srch-time", metavar="SEC", type=int, default=30, help="search deadline -- terminate searches running for more than SEC seconds")
ap2.add_argument("--srch-hits", metavar="N", type=int, default=7999, help="max search results to allow clients to fetch; 125 results will be shown initially")

View file

@ -107,6 +107,7 @@ class Up2k(object):
self.pp: Optional[ProgressPrinter] = None
self.rescan_cond = threading.Condition()
self.need_rescan: set[str] = set()
self.db_act = 0.0
self.registry: dict[str, dict[str, dict[str, Any]]] = {}
self.flags: dict[str, dict[str, Any]] = {}
@ -262,10 +263,15 @@ class Up2k(object):
continue
if self.pp:
cooldown = now + 5
cooldown = now + 1
continue
timeout = now + 9001
if self.args.no_lifetime:
timeout = now + 9001
else:
# important; not deferred by db_act
timeout = self._check_lifetimes()
with self.mutex:
for vp, vol in sorted(self.asrv.vfs.all_vols.items()):
maxage = vol.flags.get("scan")
@ -281,6 +287,20 @@ class Up2k(object):
timeout = min(timeout, deadline)
if self.db_act > now - self.args.db_act:
# recent db activity; defer volume rescan
act_timeout = self.db_act + self.args.db_act
if self.need_rescan:
timeout = now
if timeout < act_timeout:
timeout = act_timeout
t = "volume rescan deferred {:.1f} sec, due to database activity"
self.log(t.format(timeout - now))
continue
with self.mutex:
vols = list(sorted(self.need_rescan))
self.need_rescan.clear()
@ -296,9 +316,10 @@ class Up2k(object):
for v in vols:
volage[v] = now
if self.args.no_lifetime:
continue
def _check_lifetimes(self) -> float:
now = time.time()
timeout = now + 9001
if now: # diff-golf
for vp, vol in sorted(self.asrv.vfs.all_vols.items()):
lifetime = vol.flags.get("lifetime")
if not lifetime:
@ -345,6 +366,8 @@ class Up2k(object):
if hits:
timeout = min(timeout, now + lifetime - (now - hits[0]))
return timeout
def _vis_job_progress(self, job: dict[str, Any]) -> str:
perc = 100 - (len(job["need"]) * 100.0 / len(job["hash"]))
path = os.path.join(job["ptop"], job["prel"], job["name"])
@ -1117,6 +1140,7 @@ class Up2k(object):
) -> int:
assert self.pp and self.mtag
flags = self.flags[ptop]
mpool: Optional[Queue[Mpqe]] = None
if self.mtag.prefer_mt and self.args.mtag_mt > 1:
mpool = self._start_mpool()
@ -1140,6 +1164,11 @@ class Up2k(object):
if rd.startswith("//") or fn.startswith("//"):
rd, fn = s3dec(rd, fn)
if "mtp" in flags:
q = "insert into mt values (?,'t:mtp','a')"
with self.mutex:
cur.execute(q, (w[:16],))
abspath = os.path.join(ptop, rd, fn)
self.pp.msg = "c{} {}".format(nq, abspath)
if not mpool:
@ -1670,9 +1699,8 @@ class Up2k(object):
self._job_volchk(cj)
cj["name"] = sanitize_fn(cj["name"], "", [".prologue.html", ".epilogue.html"])
cj["poke"] = time.time()
cj["poke"] = now = self.db_act = time.time()
wark = self._get_wark(cj)
now = time.time()
job = None
pdir = djoin(cj["ptop"], cj["prel"])
try:
@ -1932,6 +1960,7 @@ class Up2k(object):
self, ptop: str, wark: str, chash: str
) -> tuple[int, list[int], str, float, bool]:
with self.mutex:
self.db_act = time.time()
job = self.registry[ptop].get(wark)
if not job:
known = " ".join([x for x in self.registry[ptop].keys()])
@ -1982,6 +2011,7 @@ class Up2k(object):
def confirm_chunk(self, ptop: str, wark: str, chash: str) -> tuple[int, str]:
with self.mutex:
self.db_act = time.time()
try:
job = self.registry[ptop][wark]
pdir = os.path.join(job["ptop"], job["prel"])
@ -2016,6 +2046,7 @@ class Up2k(object):
self._finish_upload(ptop, wark)
def _finish_upload(self, ptop: str, wark: str) -> None:
self.db_act = time.time()
try:
job = self.registry[ptop][wark]
pdir = os.path.join(job["ptop"], job["prel"])
@ -2158,6 +2189,7 @@ class Up2k(object):
def _handle_rm(
self, uname: str, ip: str, vpath: str
) -> tuple[int, list[str], list[str]]:
self.db_act = time.time()
try:
permsets = [[True, False, False, True]]
vn, rem = self.asrv.vfs.get(vpath, uname, *permsets[0])
@ -2242,6 +2274,7 @@ class Up2k(object):
return n_files, ok + ok2, ng + ng2
def handle_mv(self, uname: str, svp: str, dvp: str) -> str:
self.db_act = time.time()
svn, srem = self.asrv.vfs.get(svp, uname, True, False, True)
svn, srem = svn.get_dbv(srem)
sabs = svn.canonical(srem, False)

View file

@ -185,7 +185,7 @@ brew install python@2
pip install virtualenv
# readme toc
cat README.md | awk 'function pr() { if (!h) {return}; if (/^ *[*!#|]/||!s) {printf "%s\n",h;h=0;return}; if (/.../) {printf "%s - %s\n",h,$0;h=0}; }; /^#/{s=1;pr()} /^#* *(file indexing|install on android|dev env setup|just the sfx|complete release|optional gpl stuff)|`$/{s=0} /^#/{lv=length($1);sub(/[^ ]+ /,"");bab=$0;gsub(/ /,"-",bab); h=sprintf("%" ((lv-1)*4+1) "s [%s](#%s)", "*",$0,bab);next} !h{next} {sub(/ .*/,"");sub(/[:,]$/,"")} {pr()}' > toc; grep -E '^## readme toc' -B1000 -A2 <README.md >p1; grep -E '^## quickstart' -B2 -A999999 <README.md >p2; (cat p1; grep quickstart -A1000 <toc; cat p2) >README.md; rm p1 p2 toc
cat README.md | awk 'function pr() { if (!h) {return}; if (/^ *[*!#|]/||!s) {printf "%s\n",h;h=0;return}; if (/.../) {printf "%s - %s\n",h,$0;h=0}; }; /^#/{s=1;pr()} /^#* *(file indexing|exclude-patterns|install on android|dev env setup|just the sfx|complete release|optional gpl stuff)|`$/{s=0} /^#/{lv=length($1);sub(/[^ ]+ /,"");bab=$0;gsub(/ /,"-",bab); h=sprintf("%" ((lv-1)*4+1) "s [%s](#%s)", "*",$0,bab);next} !h{next} {sub(/ .*/,"");sub(/[:,]$/,"")} {pr()}' > toc; grep -E '^## readme toc' -B1000 -A2 <README.md >p1; grep -E '^## quickstart' -B2 -A999999 <README.md >p2; (cat p1; grep quickstart -A1000 <toc; cat p2) >README.md; rm p1 p2 toc
# fix firefox phantom breakpoints,
# suggestions from bugtracker, doesnt work (debugger is not attachable)