mirror of
https://github.com/9001/copyparty.git
synced 2025-08-18 01:22:13 -06:00
optimize indexer for huge filesystems
This commit is contained in:
parent
b9cf8f3973
commit
b0fd8bf7d4
|
@ -29,6 +29,8 @@ from .util import (
|
||||||
atomic_move,
|
atomic_move,
|
||||||
quotep,
|
quotep,
|
||||||
vsplit,
|
vsplit,
|
||||||
|
w8b64enc,
|
||||||
|
w8b64dec,
|
||||||
s3enc,
|
s3enc,
|
||||||
s3dec,
|
s3dec,
|
||||||
rmdirs,
|
rmdirs,
|
||||||
|
@ -479,11 +481,18 @@ class Up2k(object):
|
||||||
if WINDOWS:
|
if WINDOWS:
|
||||||
excl = [x.replace("/", "\\") for x in excl]
|
excl = [x.replace("/", "\\") for x in excl]
|
||||||
|
|
||||||
n_add = self._build_dir(dbw, top, set(excl), top, nohash, [])
|
n_add = n_rm = 0
|
||||||
n_rm = self._drop_lost(dbw[0], top)
|
try:
|
||||||
|
n_add = self._build_dir(dbw, top, set(excl), top, nohash, [])
|
||||||
|
n_rm = self._drop_lost(dbw[0], top)
|
||||||
|
except:
|
||||||
|
m = "failed to index volume [{}]:\n{}"
|
||||||
|
self.log(m.format(top, min_ex()), c=1)
|
||||||
|
|
||||||
if dbw[1]:
|
if dbw[1]:
|
||||||
self.log("commit {} new files".format(dbw[1]))
|
self.log("commit {} new files".format(dbw[1]))
|
||||||
dbw[0].connection.commit()
|
|
||||||
|
dbw[0].connection.commit()
|
||||||
|
|
||||||
return True, n_add or n_rm or do_vac
|
return True, n_add or n_rm or do_vac
|
||||||
|
|
||||||
|
@ -498,6 +507,7 @@ class Up2k(object):
|
||||||
self.pp.msg = "a{} {}".format(self.pp.n, cdir)
|
self.pp.msg = "a{} {}".format(self.pp.n, cdir)
|
||||||
histpath = self.asrv.vfs.histtab[top]
|
histpath = self.asrv.vfs.histtab[top]
|
||||||
ret = 0
|
ret = 0
|
||||||
|
seen_files = {}
|
||||||
g = statdir(self.log_func, not self.args.no_scandir, False, cdir)
|
g = statdir(self.log_func, not self.args.no_scandir, False, cdir)
|
||||||
for iname, inf in sorted(g):
|
for iname, inf in sorted(g):
|
||||||
abspath = os.path.join(cdir, iname)
|
abspath = os.path.join(cdir, iname)
|
||||||
|
@ -507,9 +517,14 @@ class Up2k(object):
|
||||||
if abspath in excl or abspath == histpath:
|
if abspath in excl or abspath == histpath:
|
||||||
continue
|
continue
|
||||||
# self.log(" dir: {}".format(abspath))
|
# self.log(" dir: {}".format(abspath))
|
||||||
ret += self._build_dir(dbw, top, excl, abspath, nohash, seen)
|
try:
|
||||||
|
ret += self._build_dir(dbw, top, excl, abspath, nohash, seen)
|
||||||
|
except:
|
||||||
|
m = "failed to index subdir [{}]:\n{}"
|
||||||
|
self.log(m.format(abspath, min_ex()), c=1)
|
||||||
else:
|
else:
|
||||||
# self.log("file: {}".format(abspath))
|
# self.log("file: {}".format(abspath))
|
||||||
|
seen_files[iname] = 1
|
||||||
rp = abspath[len(top) + 1 :]
|
rp = abspath[len(top) + 1 :]
|
||||||
if WINDOWS:
|
if WINDOWS:
|
||||||
rp = rp.replace("\\", "/").strip("/")
|
rp = rp.replace("\\", "/").strip("/")
|
||||||
|
@ -568,34 +583,65 @@ class Up2k(object):
|
||||||
dbw[0].connection.commit()
|
dbw[0].connection.commit()
|
||||||
dbw[1] = 0
|
dbw[1] = 0
|
||||||
dbw[2] = time.time()
|
dbw[2] = time.time()
|
||||||
|
|
||||||
|
# drop missing files
|
||||||
|
rd = cdir[len(top) + 1 :].strip("/")
|
||||||
|
if WINDOWS:
|
||||||
|
rd = rd.replace("\\", "/").strip("/")
|
||||||
|
|
||||||
|
q = "select fn from up where rd = ?"
|
||||||
|
try:
|
||||||
|
c = dbw[0].execute(q, (rd,))
|
||||||
|
except:
|
||||||
|
c = dbw[0].execute(q, ("//" + w8b64enc(rd),))
|
||||||
|
|
||||||
|
hits = [w8b64dec(x[2:]) if x.startswith("//") else x for (x,) in c]
|
||||||
|
rm_files = [x for x in hits if x not in seen_files]
|
||||||
|
n_rm = len(rm_files)
|
||||||
|
for fn in rm_files:
|
||||||
|
self.db_rm(dbw[0], rd, fn)
|
||||||
|
|
||||||
|
if n_rm:
|
||||||
|
self.log("forgot {} deleted files".format(n_rm))
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def _drop_lost(self, cur, top):
|
def _drop_lost(self, cur, top):
|
||||||
rm = []
|
rm = []
|
||||||
|
n_rm = 0
|
||||||
nchecked = 0
|
nchecked = 0
|
||||||
nfiles = next(cur.execute("select count(w) from up"))[0]
|
# `_build_dir` did all the files, now do dirs
|
||||||
c = cur.execute("select rd, fn from up")
|
ndirs = next(cur.execute("select count(distinct rd) from up"))[0]
|
||||||
for drd, dfn in c:
|
c = cur.execute("select distinct rd from up order by rd desc")
|
||||||
|
for (drd,) in c:
|
||||||
nchecked += 1
|
nchecked += 1
|
||||||
if drd.startswith("//") or dfn.startswith("//"):
|
if drd.startswith("//"):
|
||||||
drd, dfn = s3dec(drd, dfn)
|
rd = w8b64dec(drd[2:])
|
||||||
|
else:
|
||||||
|
rd = drd
|
||||||
|
|
||||||
abspath = os.path.join(top, drd, dfn)
|
abspath = os.path.join(top, rd)
|
||||||
# almost zero overhead dw
|
self.pp.msg = "b{} {}".format(ndirs - nchecked, abspath)
|
||||||
self.pp.msg = "b{} {}".format(nfiles - nchecked, abspath)
|
|
||||||
try:
|
try:
|
||||||
if not bos.path.exists(abspath):
|
if os.path.isdir(abspath):
|
||||||
rm.append([drd, dfn])
|
continue
|
||||||
except Exception as ex:
|
except:
|
||||||
self.log("stat-rm: {} @ [{}]".format(repr(ex), abspath))
|
pass
|
||||||
|
|
||||||
if rm:
|
rm.append(drd)
|
||||||
self.log("forgetting {} deleted files".format(len(rm)))
|
|
||||||
for rd, fn in rm:
|
|
||||||
# self.log("{} / {}".format(rd, fn))
|
|
||||||
self.db_rm(cur, rd, fn)
|
|
||||||
|
|
||||||
return len(rm)
|
if not rm:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
q = "select count(w) from up where rd = ?"
|
||||||
|
for rd in rm:
|
||||||
|
n_rm += next(cur.execute(q, (rd,)))[0]
|
||||||
|
|
||||||
|
self.log("forgetting {} deleted dirs, {} files".format(len(rm), n_rm))
|
||||||
|
for rd in rm:
|
||||||
|
cur.execute("delete from up where rd = ?", (rd,))
|
||||||
|
|
||||||
|
return n_rm
|
||||||
|
|
||||||
def _build_tags_index(self, vol):
|
def _build_tags_index(self, vol):
|
||||||
ptop = vol.realpath
|
ptop = vol.realpath
|
||||||
|
|
Loading…
Reference in a new issue