show total directory size in listings

sizes are computed during `-e2ds` indexing, and new uploads
are counted, but a rescan is necessary after a move or delete
This commit is contained in:
ed 2024-09-15 23:01:18 +00:00
parent 7d64879ba8
commit 427597b603
7 changed files with 101 additions and 33 deletions

View file

@ -1968,6 +1968,7 @@ below are some tweaks roughly ordered by usefulness:
* and also makes thumbnails load faster, regardless of e2d/e2t * and also makes thumbnails load faster, regardless of e2d/e2t
* `--dedup` enables deduplication and thus avoids writing to the HDD if someone uploads a dupe * `--dedup` enables deduplication and thus avoids writing to the HDD if someone uploads a dupe
* `--safe-dedup 1` makes deduplication much faster during upload by skipping verification of file contents; safe if there is no other software editing/moving the files in the volumes * `--safe-dedup 1` makes deduplication much faster during upload by skipping verification of file contents; safe if there is no other software editing/moving the files in the volumes
* `--no-dirsz` shows the size of folder inodes instead of the total size of the contents, giving about 30% faster folder listings
* `--no-hash .` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable * `--no-hash .` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable
* if your volumes are on a network-disk such as NFS / SMB / s3, specifying larger values for `--iobuf` and/or `--s-rd-sz` and/or `--s-wr-sz` may help; try setting all of them to `524288` or `1048576` or `4194304` * if your volumes are on a network-disk such as NFS / SMB / s3, specifying larger values for `--iobuf` and/or `--s-rd-sz` and/or `--s-wr-sz` may help; try setting all of them to `524288` or `1048576` or `4194304`
* `--no-htp --hash-mt=0 --mtag-mt=1 --th-mt=1` minimizes the number of threads; can help in some eccentric environments (like the vscode debugger) * `--no-htp --hash-mt=0 --mtag-mt=1 --th-mt=1` minimizes the number of threads; can help in some eccentric environments (like the vscode debugger)

View file

@ -1362,6 +1362,8 @@ def add_db_general(ap, hcores):
ap2.add_argument("--hist", metavar="PATH", type=u, default="", help="where to store volume data (db, thumbs); default is a folder named \".hist\" inside each volume (volflag=hist)") ap2.add_argument("--hist", metavar="PATH", type=u, default="", help="where to store volume data (db, thumbs); default is a folder named \".hist\" inside each volume (volflag=hist)")
ap2.add_argument("--no-hash", metavar="PTN", type=u, default="", help="regex: disable hashing of matching absolute-filesystem-paths during e2ds folder scans (volflag=nohash)") ap2.add_argument("--no-hash", metavar="PTN", type=u, default="", help="regex: disable hashing of matching absolute-filesystem-paths during e2ds folder scans (volflag=nohash)")
ap2.add_argument("--no-idx", metavar="PTN", type=u, default=noidx, help="regex: disable indexing of matching absolute-filesystem-paths during e2ds folder scans (volflag=noidx)") ap2.add_argument("--no-idx", metavar="PTN", type=u, default=noidx, help="regex: disable indexing of matching absolute-filesystem-paths during e2ds folder scans (volflag=noidx)")
ap2.add_argument("--no-dirsz", action="store_true", help="do not show total recursive size of folders in listings, show inode size instead; slightly faster (volflag=nodirsz)")
ap2.add_argument("--re-dirsz", action="store_true", help="if the directory-sizes in the UI are bonkers, use this along with \033[33m-e2dsa\033[0m to rebuild the index from scratch")
ap2.add_argument("--no-dhash", action="store_true", help="disable rescan acceleration; do full database integrity check -- makes the db ~5%% smaller and bootup/rescans 3~10x slower") ap2.add_argument("--no-dhash", action="store_true", help="disable rescan acceleration; do full database integrity check -- makes the db ~5%% smaller and bootup/rescans 3~10x slower")
ap2.add_argument("--re-dhash", action="store_true", help="force a cache rebuild on startup; enable this once if it gets out of sync (should never be necessary)") ap2.add_argument("--re-dhash", action="store_true", help="force a cache rebuild on startup; enable this once if it gets out of sync (should never be necessary)")
ap2.add_argument("--no-forget", action="store_true", help="never forget indexed files, even when deleted from disk -- makes it impossible to ever upload the same file twice -- only useful for offloading uploads to a cloud service or something (volflag=noforget)") ap2.add_argument("--no-forget", action="store_true", help="never forget indexed files, even when deleted from disk -- makes it impossible to ever upload the same file twice -- only useful for offloading uploads to a cloud service or something (volflag=noforget)")

View file

@ -13,6 +13,7 @@ def vf_bmap() -> dict[str, str]:
"dav_rt": "davrt", "dav_rt": "davrt",
"ed": "dots", "ed": "dots",
"hardlink_only": "hardlinkonly", "hardlink_only": "hardlinkonly",
"no_dirsz": "nodirsz",
"no_dupe": "nodupe", "no_dupe": "nodupe",
"no_forget": "noforget", "no_forget": "noforget",
"no_pipe": "nopipe", "no_pipe": "nopipe",

View file

@ -5214,13 +5214,23 @@ class HttpCli(object):
fe["tags"] = tags fe["tags"] = tags
if icur: if icur:
for fe in dirs:
fe["tags"] = ODict()
lmte = list(mte) lmte = list(mte)
if self.can_admin: if self.can_admin:
lmte.extend(("up_ip", ".up_at")) lmte.extend(("up_ip", ".up_at"))
if "nodirsz" not in vf:
tagset.add(".files")
vdir = "%s/" % (rd,) if rd else ""
q = "select sz, nf from ds where rd=? limit 1"
for fe in dirs:
hit = icur.execute(q, (vdir + fe["name"],)).fetchone()
if hit:
(fe["sz"], fe["tags"][".files"]) = hit
taglist = [k for k in lmte if k in tagset] taglist = [k for k in lmte if k in tagset]
for fe in dirs:
fe["tags"] = ODict()
else: else:
taglist = list(tagset) taglist = list(tagset)

View file

@ -1204,6 +1204,10 @@ class Up2k(object):
# ~/.wine/dosdevices/z:/ and such # ~/.wine/dosdevices/z:/ and such
excl.extend(("/dev", "/proc", "/run", "/sys")) excl.extend(("/dev", "/proc", "/run", "/sys"))
if self.args.re_dirsz:
db.c.execute("delete from ds")
db.n += 1
rtop = absreal(top) rtop = absreal(top)
n_add = n_rm = 0 n_add = n_rm = 0
try: try:
@ -1212,7 +1216,7 @@ class Up2k(object):
self.log(t % (vol.vpath, rtop), 6) self.log(t % (vol.vpath, rtop), 6)
return True, False return True, False
n_add = self._build_dir( n_add, _, _ = self._build_dir(
db, db,
top, top,
set(excl), set(excl),
@ -1286,17 +1290,18 @@ class Up2k(object):
cst: os.stat_result, cst: os.stat_result,
dev: int, dev: int,
xvol: bool, xvol: bool,
) -> int: ) -> tuple[int, int, int]:
if xvol and not rcdir.startswith(top): if xvol and not rcdir.startswith(top):
self.log("skip xvol: [{}] -> [{}]".format(cdir, rcdir), 6) self.log("skip xvol: [{}] -> [{}]".format(cdir, rcdir), 6)
return 0 return 0, 0, 0
if rcdir in seen: if rcdir in seen:
t = "bailing from symlink loop,\n prev: {}\n curr: {}\n from: {}" t = "bailing from symlink loop,\n prev: {}\n curr: {}\n from: {}"
self.log(t.format(seen[-1], rcdir, cdir), 3) self.log(t.format(seen[-1], rcdir, cdir), 3)
return 0 return 0, 0, 0
ret = 0 # total-files-added, total-num-files, recursive-size
tfa = tnf = rsz = 0
seen = seen + [rcdir] seen = seen + [rcdir]
unreg: list[str] = [] unreg: list[str] = []
files: list[tuple[int, int, str]] = [] files: list[tuple[int, int, str]] = []
@ -1321,7 +1326,7 @@ class Up2k(object):
partials = set([x[0] for x in gl if "PARTIAL" in x[0]]) partials = set([x[0] for x in gl if "PARTIAL" in x[0]])
for iname, inf in gl: for iname, inf in gl:
if self.stop: if self.stop:
return -1 return -1, 0, 0
rp = rds + iname rp = rds + iname
abspath = cdirs + iname abspath = cdirs + iname
@ -1358,7 +1363,7 @@ class Up2k(object):
continue continue
# self.log(" dir: {}".format(abspath)) # self.log(" dir: {}".format(abspath))
try: try:
ret += self._build_dir( i1, i2, i3 = self._build_dir(
db, db,
top, top,
excl, excl,
@ -1373,6 +1378,9 @@ class Up2k(object):
dev, dev,
xvol, xvol,
) )
tfa += i1
tnf += i2
rsz += i3
except: except:
t = "failed to index subdir [{}]:\n{}" t = "failed to index subdir [{}]:\n{}"
self.log(t.format(abspath, min_ex()), c=1) self.log(t.format(abspath, min_ex()), c=1)
@ -1391,6 +1399,7 @@ class Up2k(object):
# placeholder for unfinished upload # placeholder for unfinished upload
continue continue
rsz += sz
files.append((sz, lmod, iname)) files.append((sz, lmod, iname))
liname = iname.lower() liname = iname.lower()
if ( if (
@ -1412,6 +1421,15 @@ class Up2k(object):
): ):
cv = iname cv = iname
if not self.args.no_dirsz:
tnf += len(files)
q = "select sz, nf from ds where rd=? limit 1"
db_sz, db_nf = db.c.execute(q, (rd,)).fetchone() or (-1, -1)
if rsz != db_sz or tnf != db_nf:
db.c.execute("delete from ds where rd=?", (rd,))
db.c.execute("insert into ds values (?,?,?)", (rd, rsz, tnf))
db.n += 1
# folder of 1000 files = ~1 MiB RAM best-case (tiny filenames); # folder of 1000 files = ~1 MiB RAM best-case (tiny filenames);
# free up stuff we're done with before dhashing # free up stuff we're done with before dhashing
gl = [] gl = []
@ -1435,7 +1453,7 @@ class Up2k(object):
c = db.c.execute(sql, (drd, dhash)) c = db.c.execute(sql, (drd, dhash))
if c.fetchone(): if c.fetchone():
return ret return tfa, tnf, rsz
if cv and rd: if cv and rd:
# mojibake not supported (for performance / simplicity): # mojibake not supported (for performance / simplicity):
@ -1452,7 +1470,7 @@ class Up2k(object):
seen_files = set([x[2] for x in files]) # for dropcheck seen_files = set([x[2] for x in files]) # for dropcheck
for sz, lmod, fn in files: for sz, lmod, fn in files:
if self.stop: if self.stop:
return -1 return -1, 0, 0
rp = rds + fn rp = rds + fn
abspath = cdirs + fn abspath = cdirs + fn
@ -1485,7 +1503,7 @@ class Up2k(object):
) )
self.log(t) self.log(t)
self.db_rm(db.c, rd, fn, 0) self.db_rm(db.c, rd, fn, 0)
ret += 1 tfa += 1
db.n += 1 db.n += 1
in_db = [] in_db = []
else: else:
@ -1510,7 +1528,7 @@ class Up2k(object):
continue continue
if not hashes: if not hashes:
return -1 return -1, 0, 0
wark = up2k_wark_from_hashlist(self.salt, sz, hashes) wark = up2k_wark_from_hashlist(self.salt, sz, hashes)
@ -1521,7 +1539,7 @@ class Up2k(object):
# skip upload hooks by not providing vflags # skip upload hooks by not providing vflags
self.db_add(db.c, {}, rd, fn, lmod, sz, "", "", wark, "", "", ip, at) self.db_add(db.c, {}, rd, fn, lmod, sz, "", "", wark, "", "", ip, at)
db.n += 1 db.n += 1
ret += 1 tfa += 1
td = time.time() - db.t td = time.time() - db.t
if db.n >= 4096 or td >= 60: if db.n >= 4096 or td >= 60:
self.log("commit {} new files".format(db.n)) self.log("commit {} new files".format(db.n))
@ -1534,33 +1552,38 @@ class Up2k(object):
db.c.execute("insert into dh values (?,?)", (drd, dhash)) # type: ignore db.c.execute("insert into dh values (?,?)", (drd, dhash)) # type: ignore
if self.stop: if self.stop:
return -1 return -1, 0, 0
# drop shadowed folders # drop shadowed folders
for sh_rd in unreg: for sh_rd in unreg:
n = 0 n = 0
q = "select count(w) from up where (rd=? or rd like ?||'%') and +at == 0" q = "select count(w) from up where (rd=? or rd like ?||'/%') and +at == 0"
for sh_erd in [sh_rd, "//" + w8b64enc(sh_rd)]: for sh_erd in [sh_rd, "//" + w8b64enc(sh_rd)]:
try: try:
n = db.c.execute(q, (sh_erd, sh_erd + "/")).fetchone()[0] erd_erd = (sh_erd, sh_erd)
n = db.c.execute(q, erd_erd).fetchone()[0]
break break
except: except:
pass pass
assert erd_erd # type: ignore # !rm
if n: if n:
t = "forgetting {} shadowed autoindexed files in [{}] > [{}]" t = "forgetting {} shadowed autoindexed files in [{}] > [{}]"
self.log(t.format(n, top, sh_rd)) self.log(t.format(n, top, sh_rd))
assert sh_erd # type: ignore # !rm
q = "delete from dh where (d = ? or d like ?||'%')" q = "delete from dh where (d = ? or d like ?||'/%')"
db.c.execute(q, (sh_erd, sh_erd + "/")) db.c.execute(q, erd_erd)
q = "delete from up where (rd=? or rd like ?||'%') and +at == 0" q = "delete from up where (rd=? or rd like ?||'/%') and +at == 0"
db.c.execute(q, (sh_erd, sh_erd + "/")) db.c.execute(q, erd_erd)
ret += n tfa += n
q = "delete from ds where (rd=? or rd like ?||'/%')"
db.c.execute(q, erd_erd)
if n4g: if n4g:
return ret return tfa, tnf, rsz
# drop missing files # drop missing files
q = "select fn from up where rd = ?" q = "select fn from up where rd = ?"
@ -1578,7 +1601,7 @@ class Up2k(object):
if n_rm: if n_rm:
self.log("forgot {} deleted files".format(n_rm)) self.log("forgot {} deleted files".format(n_rm))
return ret return tfa, tnf, rsz
def _drop_lost(self, cur: "sqlite3.Cursor", top: str, excl: list[str]) -> int: def _drop_lost(self, cur: "sqlite3.Cursor", top: str, excl: list[str]) -> int:
rm = [] rm = []
@ -1796,13 +1819,13 @@ class Up2k(object):
return 0 return 0
with self.mutex: with self.mutex:
q = "update up set w=?, sz=?, mt=? where rd=? and fn=?"
for rd, fn, w, sz, mt in rewark: for rd, fn, w, sz, mt in rewark:
q = "update up set w = ?, sz = ?, mt = ? where rd = ? and fn = ? limit 1"
cur.execute(q, (w, sz, int(mt), rd, fn)) cur.execute(q, (w, sz, int(mt), rd, fn))
for _, _, w in f404: if f404:
q = "delete from up where w = ? limit 1" q = "delete from up where rd=? and fn=? and +w=?"
cur.execute(q, (w,)) cur.executemany(q, f404)
cur.connection.commit() cur.connection.commit()
@ -2478,6 +2501,7 @@ class Up2k(object):
self._add_xiu_tab(cur) self._add_xiu_tab(cur)
self._add_cv_tab(cur) self._add_cv_tab(cur)
self._add_idx_up_vp(cur, db_path) self._add_idx_up_vp(cur, db_path)
self._add_ds_tab(cur)
try: try:
nfiles = next(cur.execute("select count(w) from up"))[0] nfiles = next(cur.execute("select count(w) from up"))[0]
@ -2591,6 +2615,7 @@ class Up2k(object):
self._add_dhash_tab(cur) self._add_dhash_tab(cur)
self._add_xiu_tab(cur) self._add_xiu_tab(cur)
self._add_cv_tab(cur) self._add_cv_tab(cur)
self._add_ds_tab(cur)
self.log("created DB at {}".format(db_path)) self.log("created DB at {}".format(db_path))
return cur return cur
@ -2684,6 +2709,22 @@ class Up2k(object):
cur.connection.commit() cur.connection.commit()
cur.execute("vacuum") cur.execute("vacuum")
def _add_ds_tab(self, cur: "sqlite3.Cursor") -> None:
# v5d -> v5e
try:
cur.execute("select rd, sz from ds limit 1").fetchone()
return
except:
pass
for cmd in [
r"create table ds (rd text, sz int, nf int)",
r"create index ds_rd on ds(rd)",
]:
cur.execute(cmd)
cur.connection.commit()
def wake_rescanner(self): def wake_rescanner(self):
with self.rescan_cond: with self.rescan_cond:
self.rescan_cond.notify_all() self.rescan_cond.notify_all()
@ -3693,6 +3734,19 @@ class Up2k(object):
except: except:
pass pass
if "nodirsz" not in vflags:
try:
q = "update ds set nf=nf+1, sz=sz+? where rd=?"
q2 = "insert into ds values(?,?,1)"
while True:
if not db.execute(q, (sz, rd)).rowcount:
db.execute(q2, (rd, sz))
if not rd:
break
rd = rd.rsplit("/", 1)[0] if "/" in rd else ""
except:
pass
def handle_rm( def handle_rm(
self, self,
uname: str, uname: str,

View file

@ -331,7 +331,7 @@ MAGIC_MAP = {"jpeg": "jpg"}
DEF_EXP = "self.ip self.ua self.uname self.host cfg.name cfg.logout vf.scan vf.thsize hdr.cf_ipcountry srv.itime srv.htime" DEF_EXP = "self.ip self.ua self.uname self.host cfg.name cfg.logout vf.scan vf.thsize hdr.cf_ipcountry srv.itime srv.htime"
DEF_MTE = "circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,vc,ac,fmt,res,.fps,ahash,vhash" DEF_MTE = ".files,circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,vc,ac,fmt,res,.fps,ahash,vhash"
DEF_MTH = ".vq,.aq,vc,ac,fmt,res,.fps" DEF_MTH = ".vq,.aq,vc,ac,fmt,res,.fps"
@ -482,8 +482,8 @@ VERSIONS = (
try: try:
_b64_enc_tl = bytes.maketrans(b'+/', b'-_') _b64_enc_tl = bytes.maketrans(b"+/", b"-_")
_b64_dec_tl = bytes.maketrans(b'-_', b'+/') _b64_dec_tl = bytes.maketrans(b"-_", b"+/")
def ub64enc(bs: bytes) -> bytes: def ub64enc(bs: bytes) -> bytes:
x = binascii.b2a_base64(bs, newline=False) x = binascii.b2a_base64(bs, newline=False)

View file

@ -122,7 +122,7 @@ class Cfg(Namespace):
def __init__(self, a=None, v=None, c=None, **ka0): def __init__(self, a=None, v=None, c=None, **ka0):
ka = {} ka = {}
ex = "chpw daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink ih ihead magic hardlink_only nid nih no_acode no_athumb no_dav no_db_ip no_del no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head og_s_title q rand smb srch_dbg stats uqe vague_403 vc ver write_uplog xdev xlink xvol zs" ex = "chpw daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink ih ihead magic hardlink_only nid nih no_acode no_athumb no_dav no_db_ip no_del no_dirsz no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head og_s_title q rand re_dirsz smb srch_dbg stats uqe vague_403 vc ver write_uplog xdev xlink xvol zs"
ka.update(**{k: False for k in ex.split()}) ka.update(**{k: False for k in ex.split()})
ex = "dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip" ex = "dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip"