From cba9e5b669b10f3af4f1d9f32266f4a564c91d8b Mon Sep 17 00:00:00 2001 From: ed Date: Fri, 15 Apr 2022 19:13:53 +0200 Subject: [PATCH] add hardlinks (symlink alternative) for up2k dedup --- copyparty/__main__.py | 4 ++- copyparty/svchub.py | 3 ++ copyparty/up2k.py | 77 +++++++++++++++++++++++++++---------------- 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/copyparty/__main__.py b/copyparty/__main__.py index a65ca640..2662b0ac 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -426,7 +426,9 @@ def run_argparse(argv, formatter): ap2.add_argument("--unpost", metavar="SEC", type=int, default=3600*12, help="grace period where uploads can be deleted by the uploader, even without delete permissions; 0=disabled") ap2.add_argument("--no-fpool", action="store_true", help="disable file-handle pooling -- instead, repeatedly close and reopen files during upload") ap2.add_argument("--use-fpool", action="store_true", help="force file-handle pooling, even if copyparty thinks you're better off without") - ap2.add_argument("--no-symlink", action="store_true", help="duplicate file contents instead") + ap2.add_argument("--hardlink", action="store_true", help="prefer hardlinks instead of symlinks when possible (same filesystem)") + ap2.add_argument("--never-symlink", action="store_true", help="do not fallback to symlinks when a hardlink cannot be made") + ap2.add_argument("--no-dedup", action="store_true", help="disable symlink/hardlink creation; copy file contents instead") ap2.add_argument("--reg-cap", metavar="N", type=int, default=9000, help="max number of uploads to keep in memory when running without -e2d") ap2 = ap.add_argument_group('network options') diff --git a/copyparty/svchub.py b/copyparty/svchub.py index dd20b479..ae50df65 100644 --- a/copyparty/svchub.py +++ b/copyparty/svchub.py @@ -74,6 +74,9 @@ class SvcHub(object): ch = "abcdefghijklmnopqrstuvwx"[int(args.theme / 2)] args.theme = "{0}{1} {0} {1}".format(ch, bri) + if not args.hardlink and args.never_symlink: + args.no_dedup = True + # initiate all services to manage self.asrv = AuthSrv(self.args, self.log) if args.ls: diff --git a/copyparty/up2k.py b/copyparty/up2k.py index 17023f6d..d1a9abdd 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -1240,6 +1240,11 @@ class Up2k(object): wark = self._get_wark(cj) now = time.time() job = None + try: + dev = bos.stat(os.path.join(cj["ptop"], cj["prel"])).st_dev + except: + dev = 0 + with self.mutex: cur = self.cur.get(cj["ptop"]) reg = self.registry[cj["ptop"]] @@ -1251,37 +1256,42 @@ class Up2k(object): q = r"select * from up where substr(w,1,16) = ? and w = ?" argv = (wark[:16], wark) + alts = [] cur = cur.execute(q, argv) for _, dtime, dsize, dp_dir, dp_fn, ip, at in cur: if dp_dir.startswith("//") or dp_fn.startswith("//"): dp_dir, dp_fn = s3dec(dp_dir, dp_fn) - if job and (dp_dir != cj["prel"] or dp_fn != cj["name"]): + dp_abs = "/".join([cj["ptop"], dp_dir, dp_fn]) + try: + st = bos.stat(dp_abs) + if stat.S_ISLNK(st.st_mode): + # broken symlink + raise Exception() + except: continue - dp_abs = "/".join([cj["ptop"], dp_dir, dp_fn]) - # relying on this to fail on broken symlinks - try: - sz = bos.path.getsize(dp_abs) - except: - sz = 0 - - if sz: - # self.log("--- " + wark + " " + dp_abs + " found file", 4) - job = { - "name": dp_fn, - "prel": dp_dir, - "vtop": cj["vtop"], - "ptop": cj["ptop"], - "size": dsize, - "lmod": dtime, - "addr": ip, - "at": at, - "hash": [], - "need": [], - "busy": {}, - } + j = { + "name": dp_fn, + "prel": dp_dir, + "vtop": cj["vtop"], + "ptop": cj["ptop"], + "size": dsize, + "lmod": dtime, + "addr": ip, + "at": at, + "hash": [], + "need": [], + "busy": {}, + } + score = ( + (3 if st.st_dev == dev else 0) + + (2 if dp_dir == cj["prel"] else 0) + + (1 if dp_fn == cj["name"] else 0) + ) + alts.append([score, -len(alts), j]) + job = sorted(alts, reverse=True)[0][2] if alts else None if job and wark in reg: # self.log("pop " + wark + " " + job["name"] + " handle_json db", 4) del reg[wark] @@ -1422,14 +1432,14 @@ class Up2k(object): linked = False try: - if self.args.no_symlink: + if self.args.no_dedup: raise Exception("disabled in config") lsrc = src ldst = dst fs1 = bos.stat(os.path.dirname(src)).st_dev fs2 = bos.stat(os.path.dirname(dst)).st_dev - if fs1 == 0: + if fs1 == 0 or fs2 == 0: # py2 on winxp or other unsupported combination raise OSError() elif fs1 == fs2: @@ -1450,10 +1460,21 @@ class Up2k(object): lsrc = nsrc[nc:] hops = len(ndst[nc:]) - 1 lsrc = "../" * hops + "/".join(lsrc) - os.symlink(fsenc(lsrc), fsenc(ldst)) - linked = True + + try: + if self.args.hardlink: + os.link(fsenc(src), fsenc(dst)) + linked = True + except Exception as ex: + self.log("cannot hardlink: " + repr(ex)) + if self.args.never_symlink: + raise Exception("symlink-fallback disabled in cfg") + + if not linked: + os.symlink(fsenc(lsrc), fsenc(ldst)) + linked = True except Exception as ex: - self.log("cannot symlink; creating copy: " + repr(ex)) + self.log("cannot link; creating copy: " + repr(ex)) shutil.copy2(fsenc(src), fsenc(dst)) if lmod and (not linked or SYMTIME):