show total directory size in listings

sizes are computed during `-e2ds` indexing, and new uploads
are counted, but a rescan is necessary after a move or delete
This commit is contained in:
ed 2024-09-15 23:01:18 +00:00
parent 7d64879ba8
commit 427597b603
7 changed files with 101 additions and 33 deletions

View file

@ -1968,6 +1968,7 @@ below are some tweaks roughly ordered by usefulness:
* and also makes thumbnails load faster, regardless of e2d/e2t
* `--dedup` enables deduplication and thus avoids writing to the HDD if someone uploads a dupe
* `--safe-dedup 1` makes deduplication much faster during upload by skipping verification of file contents; safe if there is no other software editing/moving the files in the volumes
* `--no-dirsz` shows the size of folder inodes instead of the total size of the contents, giving about 30% faster folder listings
* `--no-hash .` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable
* if your volumes are on a network-disk such as NFS / SMB / s3, specifying larger values for `--iobuf` and/or `--s-rd-sz` and/or `--s-wr-sz` may help; try setting all of them to `524288` or `1048576` or `4194304`
* `--no-htp --hash-mt=0 --mtag-mt=1 --th-mt=1` minimizes the number of threads; can help in some eccentric environments (like the vscode debugger)

View file

@ -1362,6 +1362,8 @@ def add_db_general(ap, hcores):
ap2.add_argument("--hist", metavar="PATH", type=u, default="", help="where to store volume data (db, thumbs); default is a folder named \".hist\" inside each volume (volflag=hist)")
ap2.add_argument("--no-hash", metavar="PTN", type=u, default="", help="regex: disable hashing of matching absolute-filesystem-paths during e2ds folder scans (volflag=nohash)")
ap2.add_argument("--no-idx", metavar="PTN", type=u, default=noidx, help="regex: disable indexing of matching absolute-filesystem-paths during e2ds folder scans (volflag=noidx)")
ap2.add_argument("--no-dirsz", action="store_true", help="do not show total recursive size of folders in listings, show inode size instead; slightly faster (volflag=nodirsz)")
ap2.add_argument("--re-dirsz", action="store_true", help="if the directory-sizes in the UI are bonkers, use this along with \033[33m-e2dsa\033[0m to rebuild the index from scratch")
ap2.add_argument("--no-dhash", action="store_true", help="disable rescan acceleration; do full database integrity check -- makes the db ~5%% smaller and bootup/rescans 3~10x slower")
ap2.add_argument("--re-dhash", action="store_true", help="force a cache rebuild on startup; enable this once if it gets out of sync (should never be necessary)")
ap2.add_argument("--no-forget", action="store_true", help="never forget indexed files, even when deleted from disk -- makes it impossible to ever upload the same file twice -- only useful for offloading uploads to a cloud service or something (volflag=noforget)")

View file

@ -13,6 +13,7 @@ def vf_bmap() -> dict[str, str]:
"dav_rt": "davrt",
"ed": "dots",
"hardlink_only": "hardlinkonly",
"no_dirsz": "nodirsz",
"no_dupe": "nodupe",
"no_forget": "noforget",
"no_pipe": "nopipe",

View file

@ -5214,13 +5214,23 @@ class HttpCli(object):
fe["tags"] = tags
if icur:
for fe in dirs:
fe["tags"] = ODict()
lmte = list(mte)
if self.can_admin:
lmte.extend(("up_ip", ".up_at"))
taglist = [k for k in lmte if k in tagset]
if "nodirsz" not in vf:
tagset.add(".files")
vdir = "%s/" % (rd,) if rd else ""
q = "select sz, nf from ds where rd=? limit 1"
for fe in dirs:
fe["tags"] = ODict()
hit = icur.execute(q, (vdir + fe["name"],)).fetchone()
if hit:
(fe["sz"], fe["tags"][".files"]) = hit
taglist = [k for k in lmte if k in tagset]
else:
taglist = list(tagset)

View file

@ -1204,6 +1204,10 @@ class Up2k(object):
# ~/.wine/dosdevices/z:/ and such
excl.extend(("/dev", "/proc", "/run", "/sys"))
if self.args.re_dirsz:
db.c.execute("delete from ds")
db.n += 1
rtop = absreal(top)
n_add = n_rm = 0
try:
@ -1212,7 +1216,7 @@ class Up2k(object):
self.log(t % (vol.vpath, rtop), 6)
return True, False
n_add = self._build_dir(
n_add, _, _ = self._build_dir(
db,
top,
set(excl),
@ -1286,17 +1290,18 @@ class Up2k(object):
cst: os.stat_result,
dev: int,
xvol: bool,
) -> int:
) -> tuple[int, int, int]:
if xvol and not rcdir.startswith(top):
self.log("skip xvol: [{}] -> [{}]".format(cdir, rcdir), 6)
return 0
return 0, 0, 0
if rcdir in seen:
t = "bailing from symlink loop,\n prev: {}\n curr: {}\n from: {}"
self.log(t.format(seen[-1], rcdir, cdir), 3)
return 0
return 0, 0, 0
ret = 0
# total-files-added, total-num-files, recursive-size
tfa = tnf = rsz = 0
seen = seen + [rcdir]
unreg: list[str] = []
files: list[tuple[int, int, str]] = []
@ -1321,7 +1326,7 @@ class Up2k(object):
partials = set([x[0] for x in gl if "PARTIAL" in x[0]])
for iname, inf in gl:
if self.stop:
return -1
return -1, 0, 0
rp = rds + iname
abspath = cdirs + iname
@ -1358,7 +1363,7 @@ class Up2k(object):
continue
# self.log(" dir: {}".format(abspath))
try:
ret += self._build_dir(
i1, i2, i3 = self._build_dir(
db,
top,
excl,
@ -1373,6 +1378,9 @@ class Up2k(object):
dev,
xvol,
)
tfa += i1
tnf += i2
rsz += i3
except:
t = "failed to index subdir [{}]:\n{}"
self.log(t.format(abspath, min_ex()), c=1)
@ -1391,6 +1399,7 @@ class Up2k(object):
# placeholder for unfinished upload
continue
rsz += sz
files.append((sz, lmod, iname))
liname = iname.lower()
if (
@ -1412,6 +1421,15 @@ class Up2k(object):
):
cv = iname
if not self.args.no_dirsz:
tnf += len(files)
q = "select sz, nf from ds where rd=? limit 1"
db_sz, db_nf = db.c.execute(q, (rd,)).fetchone() or (-1, -1)
if rsz != db_sz or tnf != db_nf:
db.c.execute("delete from ds where rd=?", (rd,))
db.c.execute("insert into ds values (?,?,?)", (rd, rsz, tnf))
db.n += 1
# folder of 1000 files = ~1 MiB RAM best-case (tiny filenames);
# free up stuff we're done with before dhashing
gl = []
@ -1435,7 +1453,7 @@ class Up2k(object):
c = db.c.execute(sql, (drd, dhash))
if c.fetchone():
return ret
return tfa, tnf, rsz
if cv and rd:
# mojibake not supported (for performance / simplicity):
@ -1452,7 +1470,7 @@ class Up2k(object):
seen_files = set([x[2] for x in files]) # for dropcheck
for sz, lmod, fn in files:
if self.stop:
return -1
return -1, 0, 0
rp = rds + fn
abspath = cdirs + fn
@ -1485,7 +1503,7 @@ class Up2k(object):
)
self.log(t)
self.db_rm(db.c, rd, fn, 0)
ret += 1
tfa += 1
db.n += 1
in_db = []
else:
@ -1510,7 +1528,7 @@ class Up2k(object):
continue
if not hashes:
return -1
return -1, 0, 0
wark = up2k_wark_from_hashlist(self.salt, sz, hashes)
@ -1521,7 +1539,7 @@ class Up2k(object):
# skip upload hooks by not providing vflags
self.db_add(db.c, {}, rd, fn, lmod, sz, "", "", wark, "", "", ip, at)
db.n += 1
ret += 1
tfa += 1
td = time.time() - db.t
if db.n >= 4096 or td >= 60:
self.log("commit {} new files".format(db.n))
@ -1534,33 +1552,38 @@ class Up2k(object):
db.c.execute("insert into dh values (?,?)", (drd, dhash)) # type: ignore
if self.stop:
return -1
return -1, 0, 0
# drop shadowed folders
for sh_rd in unreg:
n = 0
q = "select count(w) from up where (rd=? or rd like ?||'%') and +at == 0"
q = "select count(w) from up where (rd=? or rd like ?||'/%') and +at == 0"
for sh_erd in [sh_rd, "//" + w8b64enc(sh_rd)]:
try:
n = db.c.execute(q, (sh_erd, sh_erd + "/")).fetchone()[0]
erd_erd = (sh_erd, sh_erd)
n = db.c.execute(q, erd_erd).fetchone()[0]
break
except:
pass
assert erd_erd # type: ignore # !rm
if n:
t = "forgetting {} shadowed autoindexed files in [{}] > [{}]"
self.log(t.format(n, top, sh_rd))
assert sh_erd # type: ignore # !rm
q = "delete from dh where (d = ? or d like ?||'%')"
db.c.execute(q, (sh_erd, sh_erd + "/"))
q = "delete from dh where (d = ? or d like ?||'/%')"
db.c.execute(q, erd_erd)
q = "delete from up where (rd=? or rd like ?||'%') and +at == 0"
db.c.execute(q, (sh_erd, sh_erd + "/"))
ret += n
q = "delete from up where (rd=? or rd like ?||'/%') and +at == 0"
db.c.execute(q, erd_erd)
tfa += n
q = "delete from ds where (rd=? or rd like ?||'/%')"
db.c.execute(q, erd_erd)
if n4g:
return ret
return tfa, tnf, rsz
# drop missing files
q = "select fn from up where rd = ?"
@ -1578,7 +1601,7 @@ class Up2k(object):
if n_rm:
self.log("forgot {} deleted files".format(n_rm))
return ret
return tfa, tnf, rsz
def _drop_lost(self, cur: "sqlite3.Cursor", top: str, excl: list[str]) -> int:
rm = []
@ -1796,13 +1819,13 @@ class Up2k(object):
return 0
with self.mutex:
q = "update up set w=?, sz=?, mt=? where rd=? and fn=?"
for rd, fn, w, sz, mt in rewark:
q = "update up set w = ?, sz = ?, mt = ? where rd = ? and fn = ? limit 1"
cur.execute(q, (w, sz, int(mt), rd, fn))
for _, _, w in f404:
q = "delete from up where w = ? limit 1"
cur.execute(q, (w,))
if f404:
q = "delete from up where rd=? and fn=? and +w=?"
cur.executemany(q, f404)
cur.connection.commit()
@ -2478,6 +2501,7 @@ class Up2k(object):
self._add_xiu_tab(cur)
self._add_cv_tab(cur)
self._add_idx_up_vp(cur, db_path)
self._add_ds_tab(cur)
try:
nfiles = next(cur.execute("select count(w) from up"))[0]
@ -2591,6 +2615,7 @@ class Up2k(object):
self._add_dhash_tab(cur)
self._add_xiu_tab(cur)
self._add_cv_tab(cur)
self._add_ds_tab(cur)
self.log("created DB at {}".format(db_path))
return cur
@ -2684,6 +2709,22 @@ class Up2k(object):
cur.connection.commit()
cur.execute("vacuum")
def _add_ds_tab(self, cur: "sqlite3.Cursor") -> None:
# v5d -> v5e
try:
cur.execute("select rd, sz from ds limit 1").fetchone()
return
except:
pass
for cmd in [
r"create table ds (rd text, sz int, nf int)",
r"create index ds_rd on ds(rd)",
]:
cur.execute(cmd)
cur.connection.commit()
def wake_rescanner(self):
with self.rescan_cond:
self.rescan_cond.notify_all()
@ -3693,6 +3734,19 @@ class Up2k(object):
except:
pass
if "nodirsz" not in vflags:
try:
q = "update ds set nf=nf+1, sz=sz+? where rd=?"
q2 = "insert into ds values(?,?,1)"
while True:
if not db.execute(q, (sz, rd)).rowcount:
db.execute(q2, (rd, sz))
if not rd:
break
rd = rd.rsplit("/", 1)[0] if "/" in rd else ""
except:
pass
def handle_rm(
self,
uname: str,

View file

@ -331,7 +331,7 @@ MAGIC_MAP = {"jpeg": "jpg"}
DEF_EXP = "self.ip self.ua self.uname self.host cfg.name cfg.logout vf.scan vf.thsize hdr.cf_ipcountry srv.itime srv.htime"
DEF_MTE = "circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,vc,ac,fmt,res,.fps,ahash,vhash"
DEF_MTE = ".files,circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,vc,ac,fmt,res,.fps,ahash,vhash"
DEF_MTH = ".vq,.aq,vc,ac,fmt,res,.fps"
@ -482,8 +482,8 @@ VERSIONS = (
try:
_b64_enc_tl = bytes.maketrans(b'+/', b'-_')
_b64_dec_tl = bytes.maketrans(b'-_', b'+/')
_b64_enc_tl = bytes.maketrans(b"+/", b"-_")
_b64_dec_tl = bytes.maketrans(b"-_", b"+/")
def ub64enc(bs: bytes) -> bytes:
x = binascii.b2a_base64(bs, newline=False)

View file

@ -122,7 +122,7 @@ class Cfg(Namespace):
def __init__(self, a=None, v=None, c=None, **ka0):
ka = {}
ex = "chpw daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink ih ihead magic hardlink_only nid nih no_acode no_athumb no_dav no_db_ip no_del no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head og_s_title q rand smb srch_dbg stats uqe vague_403 vc ver write_uplog xdev xlink xvol zs"
ex = "chpw daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink ih ihead magic hardlink_only nid nih no_acode no_athumb no_dav no_db_ip no_del no_dirsz no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head og_s_title q rand re_dirsz smb srch_dbg stats uqe vague_403 vc ver write_uplog xdev xlink xvol zs"
ka.update(**{k: False for k in ex.split()})
ex = "dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip"