diff --git a/README.md b/README.md index 89161c8e..35e317d0 100644 --- a/README.md +++ b/README.md @@ -1968,6 +1968,7 @@ below are some tweaks roughly ordered by usefulness: * and also makes thumbnails load faster, regardless of e2d/e2t * `--dedup` enables deduplication and thus avoids writing to the HDD if someone uploads a dupe * `--safe-dedup 1` makes deduplication much faster during upload by skipping verification of file contents; safe if there is no other software editing/moving the files in the volumes +* `--no-dirsz` shows the size of folder inodes instead of the total size of the contents, giving about 30% faster folder listings * `--no-hash .` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable * if your volumes are on a network-disk such as NFS / SMB / s3, specifying larger values for `--iobuf` and/or `--s-rd-sz` and/or `--s-wr-sz` may help; try setting all of them to `524288` or `1048576` or `4194304` * `--no-htp --hash-mt=0 --mtag-mt=1 --th-mt=1` minimizes the number of threads; can help in some eccentric environments (like the vscode debugger) diff --git a/copyparty/__main__.py b/copyparty/__main__.py index 0d58cddb..c4f2ef75 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -1362,6 +1362,8 @@ def add_db_general(ap, hcores): ap2.add_argument("--hist", metavar="PATH", type=u, default="", help="where to store volume data (db, thumbs); default is a folder named \".hist\" inside each volume (volflag=hist)") ap2.add_argument("--no-hash", metavar="PTN", type=u, default="", help="regex: disable hashing of matching absolute-filesystem-paths during e2ds folder scans (volflag=nohash)") ap2.add_argument("--no-idx", metavar="PTN", type=u, default=noidx, help="regex: disable indexing of matching absolute-filesystem-paths during e2ds folder scans (volflag=noidx)") + ap2.add_argument("--no-dirsz", action="store_true", help="do not show total recursive size of folders in listings, show inode size instead; slightly faster (volflag=nodirsz)") + ap2.add_argument("--re-dirsz", action="store_true", help="if the directory-sizes in the UI are bonkers, use this along with \033[33m-e2dsa\033[0m to rebuild the index from scratch") ap2.add_argument("--no-dhash", action="store_true", help="disable rescan acceleration; do full database integrity check -- makes the db ~5%% smaller and bootup/rescans 3~10x slower") ap2.add_argument("--re-dhash", action="store_true", help="force a cache rebuild on startup; enable this once if it gets out of sync (should never be necessary)") ap2.add_argument("--no-forget", action="store_true", help="never forget indexed files, even when deleted from disk -- makes it impossible to ever upload the same file twice -- only useful for offloading uploads to a cloud service or something (volflag=noforget)") diff --git a/copyparty/cfg.py b/copyparty/cfg.py index deea73ab..9ea72c4f 100644 --- a/copyparty/cfg.py +++ b/copyparty/cfg.py @@ -13,6 +13,7 @@ def vf_bmap() -> dict[str, str]: "dav_rt": "davrt", "ed": "dots", "hardlink_only": "hardlinkonly", + "no_dirsz": "nodirsz", "no_dupe": "nodupe", "no_forget": "noforget", "no_pipe": "nopipe", diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index 4d719c8f..4a0a15f5 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -5214,13 +5214,23 @@ class HttpCli(object): fe["tags"] = tags if icur: + for fe in dirs: + fe["tags"] = ODict() + lmte = list(mte) if self.can_admin: lmte.extend(("up_ip", ".up_at")) + if "nodirsz" not in vf: + tagset.add(".files") + vdir = "%s/" % (rd,) if rd else "" + q = "select sz, nf from ds where rd=? limit 1" + for fe in dirs: + hit = icur.execute(q, (vdir + fe["name"],)).fetchone() + if hit: + (fe["sz"], fe["tags"][".files"]) = hit + taglist = [k for k in lmte if k in tagset] - for fe in dirs: - fe["tags"] = ODict() else: taglist = list(tagset) diff --git a/copyparty/up2k.py b/copyparty/up2k.py index e652077e..85171f4f 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -1204,6 +1204,10 @@ class Up2k(object): # ~/.wine/dosdevices/z:/ and such excl.extend(("/dev", "/proc", "/run", "/sys")) + if self.args.re_dirsz: + db.c.execute("delete from ds") + db.n += 1 + rtop = absreal(top) n_add = n_rm = 0 try: @@ -1212,7 +1216,7 @@ class Up2k(object): self.log(t % (vol.vpath, rtop), 6) return True, False - n_add = self._build_dir( + n_add, _, _ = self._build_dir( db, top, set(excl), @@ -1286,17 +1290,18 @@ class Up2k(object): cst: os.stat_result, dev: int, xvol: bool, - ) -> int: + ) -> tuple[int, int, int]: if xvol and not rcdir.startswith(top): self.log("skip xvol: [{}] -> [{}]".format(cdir, rcdir), 6) - return 0 + return 0, 0, 0 if rcdir in seen: t = "bailing from symlink loop,\n prev: {}\n curr: {}\n from: {}" self.log(t.format(seen[-1], rcdir, cdir), 3) - return 0 + return 0, 0, 0 - ret = 0 + # total-files-added, total-num-files, recursive-size + tfa = tnf = rsz = 0 seen = seen + [rcdir] unreg: list[str] = [] files: list[tuple[int, int, str]] = [] @@ -1321,7 +1326,7 @@ class Up2k(object): partials = set([x[0] for x in gl if "PARTIAL" in x[0]]) for iname, inf in gl: if self.stop: - return -1 + return -1, 0, 0 rp = rds + iname abspath = cdirs + iname @@ -1358,7 +1363,7 @@ class Up2k(object): continue # self.log(" dir: {}".format(abspath)) try: - ret += self._build_dir( + i1, i2, i3 = self._build_dir( db, top, excl, @@ -1373,6 +1378,9 @@ class Up2k(object): dev, xvol, ) + tfa += i1 + tnf += i2 + rsz += i3 except: t = "failed to index subdir [{}]:\n{}" self.log(t.format(abspath, min_ex()), c=1) @@ -1391,6 +1399,7 @@ class Up2k(object): # placeholder for unfinished upload continue + rsz += sz files.append((sz, lmod, iname)) liname = iname.lower() if ( @@ -1412,6 +1421,15 @@ class Up2k(object): ): cv = iname + if not self.args.no_dirsz: + tnf += len(files) + q = "select sz, nf from ds where rd=? limit 1" + db_sz, db_nf = db.c.execute(q, (rd,)).fetchone() or (-1, -1) + if rsz != db_sz or tnf != db_nf: + db.c.execute("delete from ds where rd=?", (rd,)) + db.c.execute("insert into ds values (?,?,?)", (rd, rsz, tnf)) + db.n += 1 + # folder of 1000 files = ~1 MiB RAM best-case (tiny filenames); # free up stuff we're done with before dhashing gl = [] @@ -1435,7 +1453,7 @@ class Up2k(object): c = db.c.execute(sql, (drd, dhash)) if c.fetchone(): - return ret + return tfa, tnf, rsz if cv and rd: # mojibake not supported (for performance / simplicity): @@ -1452,7 +1470,7 @@ class Up2k(object): seen_files = set([x[2] for x in files]) # for dropcheck for sz, lmod, fn in files: if self.stop: - return -1 + return -1, 0, 0 rp = rds + fn abspath = cdirs + fn @@ -1485,7 +1503,7 @@ class Up2k(object): ) self.log(t) self.db_rm(db.c, rd, fn, 0) - ret += 1 + tfa += 1 db.n += 1 in_db = [] else: @@ -1510,7 +1528,7 @@ class Up2k(object): continue if not hashes: - return -1 + return -1, 0, 0 wark = up2k_wark_from_hashlist(self.salt, sz, hashes) @@ -1521,7 +1539,7 @@ class Up2k(object): # skip upload hooks by not providing vflags self.db_add(db.c, {}, rd, fn, lmod, sz, "", "", wark, "", "", ip, at) db.n += 1 - ret += 1 + tfa += 1 td = time.time() - db.t if db.n >= 4096 or td >= 60: self.log("commit {} new files".format(db.n)) @@ -1534,33 +1552,38 @@ class Up2k(object): db.c.execute("insert into dh values (?,?)", (drd, dhash)) # type: ignore if self.stop: - return -1 + return -1, 0, 0 # drop shadowed folders for sh_rd in unreg: n = 0 - q = "select count(w) from up where (rd=? or rd like ?||'%') and +at == 0" + q = "select count(w) from up where (rd=? or rd like ?||'/%') and +at == 0" for sh_erd in [sh_rd, "//" + w8b64enc(sh_rd)]: try: - n = db.c.execute(q, (sh_erd, sh_erd + "/")).fetchone()[0] + erd_erd = (sh_erd, sh_erd) + n = db.c.execute(q, erd_erd).fetchone()[0] break except: pass + assert erd_erd # type: ignore # !rm + if n: t = "forgetting {} shadowed autoindexed files in [{}] > [{}]" self.log(t.format(n, top, sh_rd)) - assert sh_erd # type: ignore # !rm - q = "delete from dh where (d = ? or d like ?||'%')" - db.c.execute(q, (sh_erd, sh_erd + "/")) + q = "delete from dh where (d = ? or d like ?||'/%')" + db.c.execute(q, erd_erd) - q = "delete from up where (rd=? or rd like ?||'%') and +at == 0" - db.c.execute(q, (sh_erd, sh_erd + "/")) - ret += n + q = "delete from up where (rd=? or rd like ?||'/%') and +at == 0" + db.c.execute(q, erd_erd) + tfa += n + + q = "delete from ds where (rd=? or rd like ?||'/%')" + db.c.execute(q, erd_erd) if n4g: - return ret + return tfa, tnf, rsz # drop missing files q = "select fn from up where rd = ?" @@ -1578,7 +1601,7 @@ class Up2k(object): if n_rm: self.log("forgot {} deleted files".format(n_rm)) - return ret + return tfa, tnf, rsz def _drop_lost(self, cur: "sqlite3.Cursor", top: str, excl: list[str]) -> int: rm = [] @@ -1796,13 +1819,13 @@ class Up2k(object): return 0 with self.mutex: + q = "update up set w=?, sz=?, mt=? where rd=? and fn=?" for rd, fn, w, sz, mt in rewark: - q = "update up set w = ?, sz = ?, mt = ? where rd = ? and fn = ? limit 1" cur.execute(q, (w, sz, int(mt), rd, fn)) - for _, _, w in f404: - q = "delete from up where w = ? limit 1" - cur.execute(q, (w,)) + if f404: + q = "delete from up where rd=? and fn=? and +w=?" + cur.executemany(q, f404) cur.connection.commit() @@ -2478,6 +2501,7 @@ class Up2k(object): self._add_xiu_tab(cur) self._add_cv_tab(cur) self._add_idx_up_vp(cur, db_path) + self._add_ds_tab(cur) try: nfiles = next(cur.execute("select count(w) from up"))[0] @@ -2591,6 +2615,7 @@ class Up2k(object): self._add_dhash_tab(cur) self._add_xiu_tab(cur) self._add_cv_tab(cur) + self._add_ds_tab(cur) self.log("created DB at {}".format(db_path)) return cur @@ -2684,6 +2709,22 @@ class Up2k(object): cur.connection.commit() cur.execute("vacuum") + def _add_ds_tab(self, cur: "sqlite3.Cursor") -> None: + # v5d -> v5e + try: + cur.execute("select rd, sz from ds limit 1").fetchone() + return + except: + pass + + for cmd in [ + r"create table ds (rd text, sz int, nf int)", + r"create index ds_rd on ds(rd)", + ]: + cur.execute(cmd) + + cur.connection.commit() + def wake_rescanner(self): with self.rescan_cond: self.rescan_cond.notify_all() @@ -3693,6 +3734,19 @@ class Up2k(object): except: pass + if "nodirsz" not in vflags: + try: + q = "update ds set nf=nf+1, sz=sz+? where rd=?" + q2 = "insert into ds values(?,?,1)" + while True: + if not db.execute(q, (sz, rd)).rowcount: + db.execute(q2, (rd, sz)) + if not rd: + break + rd = rd.rsplit("/", 1)[0] if "/" in rd else "" + except: + pass + def handle_rm( self, uname: str, diff --git a/copyparty/util.py b/copyparty/util.py index 3d1d62c8..bebcc57b 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -331,7 +331,7 @@ MAGIC_MAP = {"jpeg": "jpg"} DEF_EXP = "self.ip self.ua self.uname self.host cfg.name cfg.logout vf.scan vf.thsize hdr.cf_ipcountry srv.itime srv.htime" -DEF_MTE = "circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,vc,ac,fmt,res,.fps,ahash,vhash" +DEF_MTE = ".files,circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,vc,ac,fmt,res,.fps,ahash,vhash" DEF_MTH = ".vq,.aq,vc,ac,fmt,res,.fps" @@ -482,8 +482,8 @@ VERSIONS = ( try: - _b64_enc_tl = bytes.maketrans(b'+/', b'-_') - _b64_dec_tl = bytes.maketrans(b'-_', b'+/') + _b64_enc_tl = bytes.maketrans(b"+/", b"-_") + _b64_dec_tl = bytes.maketrans(b"-_", b"+/") def ub64enc(bs: bytes) -> bytes: x = binascii.b2a_base64(bs, newline=False) diff --git a/tests/util.py b/tests/util.py index bb80673c..098803c3 100644 --- a/tests/util.py +++ b/tests/util.py @@ -122,7 +122,7 @@ class Cfg(Namespace): def __init__(self, a=None, v=None, c=None, **ka0): ka = {} - ex = "chpw daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink ih ihead magic hardlink_only nid nih no_acode no_athumb no_dav no_db_ip no_del no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head og_s_title q rand smb srch_dbg stats uqe vague_403 vc ver write_uplog xdev xlink xvol zs" + ex = "chpw daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink ih ihead magic hardlink_only nid nih no_acode no_athumb no_dav no_db_ip no_del no_dirsz no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head og_s_title q rand re_dirsz smb srch_dbg stats uqe vague_403 vc ver write_uplog xdev xlink xvol zs" ka.update(**{k: False for k in ex.split()}) ex = "dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip"