diff --git a/copyparty/cfg.py b/copyparty/cfg.py index 7c8f1e5d..313eb8c5 100644 --- a/copyparty/cfg.py +++ b/copyparty/cfg.py @@ -133,8 +133,8 @@ flagcats = { "nodupe": "rejects existing files (instead of symlinking them)", "hardlink": "does dedup with hardlinks instead of symlinks", "neversymlink": "disables symlink fallback; full copy instead", - "safededup": "verify on-disk data before using it for dedup", "copydupes": "disables dedup, always saves full copies of dupes", + "safededup": "verify on-disk data before using it for dedup", "sparse": "force use of sparse files, mainly for s3-backed storage", "daw": "enable full WebDAV write support (dangerous);\nPUT-operations will now \033[1;31mOVERWRITE\033[0;35m existing files", "nosub": "forces all uploads into the top folder of the vfs", diff --git a/copyparty/up2k.py b/copyparty/up2k.py index eebe7507..c4add624 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -3111,9 +3111,17 @@ class Up2k(object): verbose: bool = True, rm: bool = False, lmod: float = 0, + fsrc: Optional[str] = None, ) -> None: + if src == dst or (fsrc and fsrc == dst): + t = "symlinking a file to itself?? orig(%s) fsrc(%s) link(%s)" + raise Exception(t % (src, fsrc, dst)) + if verbose: - self.log("linking dupe:\n {0}\n {1}".format(src, dst)) + t = "linking dupe:\n point-to: {0}\n link-loc: {1}" + if fsrc: + t += "\n data-src: {2}" + self.log(t.format(src, dst, fsrc)) if self.args.nw: return @@ -3121,7 +3129,7 @@ class Up2k(object): linked = False try: if "copydupes" in flags: - raise Exception("disabled in config") + raise Exception("dedup is disabled in config") lsrc = src ldst = dst @@ -3177,7 +3185,15 @@ class Up2k(object): linked = True except Exception as ex: self.log("cannot link; creating copy: " + repr(ex)) - shutil.copy2(fsenc(src), fsenc(dst)) + if bos.path.isfile(src): + csrc = src + elif fsrc and bos.path.isfile(fsrc): + csrc = fsrc + else: + t = "BUG: no valid sources to link from! orig(%s) fsrc(%s) link(%s)" + self.log(t, 1) + raise Exception(t % (src, fsrc, dst)) + shutil.copy2(fsenc(csrc), fsenc(dst)) if lmod and (not linked or SYMTIME): times = (int(time.time()), int(lmod)) @@ -4258,7 +4274,13 @@ class Up2k(object): except: pass - self._symlink(dabs, alink, flags, False, lmod=lmod or 0) + # this creates a link pointing from dabs to alink; alink may + # not exist yet, which becomes problematic if the symlinking + # fails and it has to fall back on hardlinking/copying files + # (for example --no-dedup in a volume with symlinked dupes); + # fsrc=sabs is then a source that currently resolves to copy + + self._symlink(dabs, alink, flags, False, lmod=lmod or 0, fsrc=sabs) return len(full) + len(links) diff --git a/tests/test_dedup.py b/tests/test_dedup.py index c92aea5d..0b341d5b 100644 --- a/tests/test_dedup.py +++ b/tests/test_dedup.py @@ -51,15 +51,21 @@ class TestDedup(unittest.TestCase): ] # (data, chash, wark) - # 3072 uploads in total - self.ctr = 3072 + self.ctr = 336 if quick else 2016 # estimated total num uploads self.conn = None fstab = None for e2d in [True, False]: self.args = Cfg(v=[".::A"], a=[], e2d=e2d) for dn1, fn1, f1 in product(dirnames, filenames, files): + cm1 = (dn1, fn1, f1) for dn2, fn2, f2 in product(dirnames, filenames, files): + cm2 = (dn2, fn2, f2) + if cm1 == cm2: + continue for dn3, fn3, f3 in product(dirnames, filenames, files): + cm3 = (dn3, fn3, f3) + if cm3 in (cm1, cm2): + continue self.reset() if self.conn: fstab = self.conn.hsrv.hub.up2k.fstab diff --git a/tests/test_mv.py b/tests/test_mv.py new file mode 100644 index 00000000..707618f9 --- /dev/null +++ b/tests/test_mv.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +# coding: utf-8 +from __future__ import print_function, unicode_literals + +import json +import os +import shutil +import tempfile +import unittest +from itertools import product + +from copyparty.__init__ import PY2 +from copyparty.authsrv import AuthSrv +from copyparty.httpcli import HttpCli +from tests import util as tu +from tests.util import Cfg + +""" +TODO inject tags into db and verify ls +""" + + +class TestDedup(unittest.TestCase): + def setUp(self): + self.td = tu.get_ramdisk() + + def tearDown(self): + os.chdir(tempfile.gettempdir()) + shutil.rmtree(self.td) + + def reset(self): + td = os.path.join(self.td, "vfs") + if os.path.exists(td): + shutil.rmtree(td) + os.mkdir(td) + os.chdir(td) + return td + + def cinit(self): + if self.conn: + self.fstab = self.conn.hsrv.hub.up2k.fstab + self.conn.hsrv.hub.up2k.shutdown() + self.asrv = AuthSrv(self.args, self.log) + self.conn = tu.VHttpConn(self.args, self.asrv, self.log, b"", True) + if self.fstab: + self.conn.hsrv.hub.up2k.fstab = self.fstab + + def test(self): + if PY2: + raise unittest.SkipTest() + + # tc_e2d = [True, False] # maybe-TODO only known symlinks are translated + tc_e2d = [True] + tc_dedup = ["sym", "no", "sym-no"] + tc_vols = [["::A"], ["::A", "d1:d1:A"]] + dirs = ["d1", "d1/d2", "d1/d2/d3", "d1/d4"] + files = [ + ( + "one", + "BfcDQQeKz2oG1CPSFyD5ZD1flTYm2IoCY23DqeeVgq6w", + "XMbpLRqVdtGmgggqjUI6uSoNMTqZVX4K6zr74XA1BRKc", + ) + ] + # (data, chash, wark) + + self.conn = None + self.fstab = None + self.ctr = 0 # 2304 + tcgen = product(tc_e2d, tc_dedup, tc_vols, dirs, ["d9", "../d9"]) + for e2d, dedup, vols, mv_from, dst in tcgen: + if "/" not in mv_from and dst.startswith(".."): + continue # would move past top of fs + if len(vols) > 1 and mv_from == "d1": + continue # cannot move a vol + + # print(e2d, dedup, vols, mv_from, dst) + ka = {"e2d": e2d} + if dedup == "hard": + ka["hardlink"] = True + elif dedup == "no": + ka["no_dedup"] = True + self.args = Cfg(v=vols[:], a=[], **ka) + + for u1, u2, u3, u4 in product(dirs, dirs, dirs, dirs): + ups = (u1, u2, u3, u4) + if len(set(ups)) < 4: + continue # not unique + + t = "e2d:%s dedup:%s vols:%d from:%s to:%s" + t = t % (e2d, dedup, len(vols), mv_from, dst) + print("\n\n\033[0;7m# files:", ups, t, "\033[0m") + + self.reset() + self.cinit() + + for up in [u1, u2, u3, u4]: + self.do_post(up, "fn", files[0], up == u1) + + restore_args = None + if dedup == "sym-no": + restore_args = self.args + ka = {"e2d": e2d, "no_dedup": True} + self.args = Cfg(v=vols[:], a=[], **ka) + self.cinit() + + mv_to = mv_from + for _ in range(2 if dst.startswith("../") else 1): + mv_to = mv_from.rsplit("/", 1)[0] if "/" in mv_from else "" + mv_to += "/" + dst.lstrip("./") + + self.do_mv(mv_from, mv_to) + + for dirpath in [u1, u2, u3, u4]: + if dirpath == mv_from: + dirpath = mv_to + elif dirpath.startswith(mv_from): + dirpath = mv_to + dirpath[len(mv_from) :] + h, b = self.curl(dirpath + "/fn") + self.assertEqual(b, "one") + + if restore_args: + self.args = restore_args + + def do_mv(self, src, dst): + hdr = "POST /%s?move=/%s HTTP/1.1\r\nConnection: close\r\nContent-Length: 0\r\n\r\n" + buf = (hdr % (src, dst)).encode("utf-8") + print("MV [%s] => [%s]" % (src, dst)) + HttpCli(self.conn.setbuf(buf)).run() + ret = self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1) + print("MV <-- ", ret) + self.assertIn(" 201 Created", ret[0]) + self.assertEqual("k\r\n", ret[1]) + return ret + + def do_post(self, dn, fn, fi, first): + print("\n# do_post", self.ctr, repr((dn, fn, fi, first))) + self.ctr -= 1 + + data, chash, wark = fi + hs = self.handshake(dn, fn, fi) + self.assertEqual(hs["wark"], wark) + + sfn = hs["name"] + if sfn == fn: + print("using original name " + fn) + else: + print(fn + " got renamed to " + sfn) + if first: + raise Exception("wait what") + + if hs["hash"]: + self.assertEqual(hs["hash"][0], chash) + self.put_chunk(dn, wark, chash, data) + elif first: + raise Exception("found first; %r, %r" % ((dn, fn, fi), hs)) + + h, b = self.curl("%s/%s" % (dn, sfn)) + self.assertEqual(b, data) + + def handshake(self, dn, fn, fi): + hdr = "POST /%s/ HTTP/1.1\r\nConnection: close\r\nContent-Type: text/plain\r\nContent-Length: %d\r\n\r\n" + msg = {"name": fn, "size": 3, "lmod": 1234567890, "life": 0, "hash": [fi[1]]} + buf = json.dumps(msg).encode("utf-8") + buf = (hdr % (dn, len(buf))).encode("utf-8") + buf + print("HS -->", buf) + HttpCli(self.conn.setbuf(buf)).run() + ret = self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1) + print("HS <--", ret) + return json.loads(ret[1]) + + def put_chunk(self, dn, wark, chash, data): + msg = [ + "POST /%s/ HTTP/1.1" % (dn,), + "Connection: close", + "Content-Type: application/octet-stream", + "Content-Length: 3", + "X-Up2k-Hash: " + chash, + "X-Up2k-Wark: " + wark, + "", + data, + ] + buf = "\r\n".join(msg).encode("utf-8") + print("PUT -->", buf) + HttpCli(self.conn.setbuf(buf)).run() + ret = self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1) + self.assertEqual(ret[1], "thank") + + def curl(self, url, binary=False): + h = "GET /%s HTTP/1.1\r\nConnection: close\r\n\r\n" + HttpCli(self.conn.setbuf((h % (url,)).encode("utf-8"))).run() + if binary: + h, b = self.conn.s._reply.split(b"\r\n\r\n", 1) + return [h.decode("utf-8"), b] + + return self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1) + + def log(self, src, msg, c=0): + print(msg)