mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
fix mv with --no-dedup
in volumes with dupes;
if --no-dedup was enabled in a volume which already contained symlinked duplicate files, renaming/moving folders could fail this is due to folder contents being moved one file at a time (which is how symlink breakage is prevented) except the links are moved assuming the final directory layout, meaning they may be intermittently broken during the movie with no-dedup, the symlinks are converted into full files as each symlink is encountered, but a temporarily broken symlink would crash the procedure fix this by giving `_symlink` a new parameter `fsrc` which is a known valid inode for data copying purposes
This commit is contained in:
parent
6e671c5245
commit
4401de0413
|
@ -133,8 +133,8 @@ flagcats = {
|
||||||
"nodupe": "rejects existing files (instead of symlinking them)",
|
"nodupe": "rejects existing files (instead of symlinking them)",
|
||||||
"hardlink": "does dedup with hardlinks instead of symlinks",
|
"hardlink": "does dedup with hardlinks instead of symlinks",
|
||||||
"neversymlink": "disables symlink fallback; full copy instead",
|
"neversymlink": "disables symlink fallback; full copy instead",
|
||||||
"safededup": "verify on-disk data before using it for dedup",
|
|
||||||
"copydupes": "disables dedup, always saves full copies of dupes",
|
"copydupes": "disables dedup, always saves full copies of dupes",
|
||||||
|
"safededup": "verify on-disk data before using it for dedup",
|
||||||
"sparse": "force use of sparse files, mainly for s3-backed storage",
|
"sparse": "force use of sparse files, mainly for s3-backed storage",
|
||||||
"daw": "enable full WebDAV write support (dangerous);\nPUT-operations will now \033[1;31mOVERWRITE\033[0;35m existing files",
|
"daw": "enable full WebDAV write support (dangerous);\nPUT-operations will now \033[1;31mOVERWRITE\033[0;35m existing files",
|
||||||
"nosub": "forces all uploads into the top folder of the vfs",
|
"nosub": "forces all uploads into the top folder of the vfs",
|
||||||
|
|
|
@ -3111,9 +3111,17 @@ class Up2k(object):
|
||||||
verbose: bool = True,
|
verbose: bool = True,
|
||||||
rm: bool = False,
|
rm: bool = False,
|
||||||
lmod: float = 0,
|
lmod: float = 0,
|
||||||
|
fsrc: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
if src == dst or (fsrc and fsrc == dst):
|
||||||
|
t = "symlinking a file to itself?? orig(%s) fsrc(%s) link(%s)"
|
||||||
|
raise Exception(t % (src, fsrc, dst))
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
self.log("linking dupe:\n {0}\n {1}".format(src, dst))
|
t = "linking dupe:\n point-to: {0}\n link-loc: {1}"
|
||||||
|
if fsrc:
|
||||||
|
t += "\n data-src: {2}"
|
||||||
|
self.log(t.format(src, dst, fsrc))
|
||||||
|
|
||||||
if self.args.nw:
|
if self.args.nw:
|
||||||
return
|
return
|
||||||
|
@ -3121,7 +3129,7 @@ class Up2k(object):
|
||||||
linked = False
|
linked = False
|
||||||
try:
|
try:
|
||||||
if "copydupes" in flags:
|
if "copydupes" in flags:
|
||||||
raise Exception("disabled in config")
|
raise Exception("dedup is disabled in config")
|
||||||
|
|
||||||
lsrc = src
|
lsrc = src
|
||||||
ldst = dst
|
ldst = dst
|
||||||
|
@ -3177,7 +3185,15 @@ class Up2k(object):
|
||||||
linked = True
|
linked = True
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
self.log("cannot link; creating copy: " + repr(ex))
|
self.log("cannot link; creating copy: " + repr(ex))
|
||||||
shutil.copy2(fsenc(src), fsenc(dst))
|
if bos.path.isfile(src):
|
||||||
|
csrc = src
|
||||||
|
elif fsrc and bos.path.isfile(fsrc):
|
||||||
|
csrc = fsrc
|
||||||
|
else:
|
||||||
|
t = "BUG: no valid sources to link from! orig(%s) fsrc(%s) link(%s)"
|
||||||
|
self.log(t, 1)
|
||||||
|
raise Exception(t % (src, fsrc, dst))
|
||||||
|
shutil.copy2(fsenc(csrc), fsenc(dst))
|
||||||
|
|
||||||
if lmod and (not linked or SYMTIME):
|
if lmod and (not linked or SYMTIME):
|
||||||
times = (int(time.time()), int(lmod))
|
times = (int(time.time()), int(lmod))
|
||||||
|
@ -4258,7 +4274,13 @@ class Up2k(object):
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
self._symlink(dabs, alink, flags, False, lmod=lmod or 0)
|
# this creates a link pointing from dabs to alink; alink may
|
||||||
|
# not exist yet, which becomes problematic if the symlinking
|
||||||
|
# fails and it has to fall back on hardlinking/copying files
|
||||||
|
# (for example --no-dedup in a volume with symlinked dupes);
|
||||||
|
# fsrc=sabs is then a source that currently resolves to copy
|
||||||
|
|
||||||
|
self._symlink(dabs, alink, flags, False, lmod=lmod or 0, fsrc=sabs)
|
||||||
|
|
||||||
return len(full) + len(links)
|
return len(full) + len(links)
|
||||||
|
|
||||||
|
|
|
@ -51,15 +51,21 @@ class TestDedup(unittest.TestCase):
|
||||||
]
|
]
|
||||||
# (data, chash, wark)
|
# (data, chash, wark)
|
||||||
|
|
||||||
# 3072 uploads in total
|
self.ctr = 336 if quick else 2016 # estimated total num uploads
|
||||||
self.ctr = 3072
|
|
||||||
self.conn = None
|
self.conn = None
|
||||||
fstab = None
|
fstab = None
|
||||||
for e2d in [True, False]:
|
for e2d in [True, False]:
|
||||||
self.args = Cfg(v=[".::A"], a=[], e2d=e2d)
|
self.args = Cfg(v=[".::A"], a=[], e2d=e2d)
|
||||||
for dn1, fn1, f1 in product(dirnames, filenames, files):
|
for dn1, fn1, f1 in product(dirnames, filenames, files):
|
||||||
|
cm1 = (dn1, fn1, f1)
|
||||||
for dn2, fn2, f2 in product(dirnames, filenames, files):
|
for dn2, fn2, f2 in product(dirnames, filenames, files):
|
||||||
|
cm2 = (dn2, fn2, f2)
|
||||||
|
if cm1 == cm2:
|
||||||
|
continue
|
||||||
for dn3, fn3, f3 in product(dirnames, filenames, files):
|
for dn3, fn3, f3 in product(dirnames, filenames, files):
|
||||||
|
cm3 = (dn3, fn3, f3)
|
||||||
|
if cm3 in (cm1, cm2):
|
||||||
|
continue
|
||||||
self.reset()
|
self.reset()
|
||||||
if self.conn:
|
if self.conn:
|
||||||
fstab = self.conn.hsrv.hub.up2k.fstab
|
fstab = self.conn.hsrv.hub.up2k.fstab
|
||||||
|
|
198
tests/test_mv.py
Normal file
198
tests/test_mv.py
Normal file
|
@ -0,0 +1,198 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import print_function, unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from itertools import product
|
||||||
|
|
||||||
|
from copyparty.__init__ import PY2
|
||||||
|
from copyparty.authsrv import AuthSrv
|
||||||
|
from copyparty.httpcli import HttpCli
|
||||||
|
from tests import util as tu
|
||||||
|
from tests.util import Cfg
|
||||||
|
|
||||||
|
"""
|
||||||
|
TODO inject tags into db and verify ls
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class TestDedup(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.td = tu.get_ramdisk()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
os.chdir(tempfile.gettempdir())
|
||||||
|
shutil.rmtree(self.td)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
td = os.path.join(self.td, "vfs")
|
||||||
|
if os.path.exists(td):
|
||||||
|
shutil.rmtree(td)
|
||||||
|
os.mkdir(td)
|
||||||
|
os.chdir(td)
|
||||||
|
return td
|
||||||
|
|
||||||
|
def cinit(self):
|
||||||
|
if self.conn:
|
||||||
|
self.fstab = self.conn.hsrv.hub.up2k.fstab
|
||||||
|
self.conn.hsrv.hub.up2k.shutdown()
|
||||||
|
self.asrv = AuthSrv(self.args, self.log)
|
||||||
|
self.conn = tu.VHttpConn(self.args, self.asrv, self.log, b"", True)
|
||||||
|
if self.fstab:
|
||||||
|
self.conn.hsrv.hub.up2k.fstab = self.fstab
|
||||||
|
|
||||||
|
def test(self):
|
||||||
|
if PY2:
|
||||||
|
raise unittest.SkipTest()
|
||||||
|
|
||||||
|
# tc_e2d = [True, False] # maybe-TODO only known symlinks are translated
|
||||||
|
tc_e2d = [True]
|
||||||
|
tc_dedup = ["sym", "no", "sym-no"]
|
||||||
|
tc_vols = [["::A"], ["::A", "d1:d1:A"]]
|
||||||
|
dirs = ["d1", "d1/d2", "d1/d2/d3", "d1/d4"]
|
||||||
|
files = [
|
||||||
|
(
|
||||||
|
"one",
|
||||||
|
"BfcDQQeKz2oG1CPSFyD5ZD1flTYm2IoCY23DqeeVgq6w",
|
||||||
|
"XMbpLRqVdtGmgggqjUI6uSoNMTqZVX4K6zr74XA1BRKc",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
# (data, chash, wark)
|
||||||
|
|
||||||
|
self.conn = None
|
||||||
|
self.fstab = None
|
||||||
|
self.ctr = 0 # 2304
|
||||||
|
tcgen = product(tc_e2d, tc_dedup, tc_vols, dirs, ["d9", "../d9"])
|
||||||
|
for e2d, dedup, vols, mv_from, dst in tcgen:
|
||||||
|
if "/" not in mv_from and dst.startswith(".."):
|
||||||
|
continue # would move past top of fs
|
||||||
|
if len(vols) > 1 and mv_from == "d1":
|
||||||
|
continue # cannot move a vol
|
||||||
|
|
||||||
|
# print(e2d, dedup, vols, mv_from, dst)
|
||||||
|
ka = {"e2d": e2d}
|
||||||
|
if dedup == "hard":
|
||||||
|
ka["hardlink"] = True
|
||||||
|
elif dedup == "no":
|
||||||
|
ka["no_dedup"] = True
|
||||||
|
self.args = Cfg(v=vols[:], a=[], **ka)
|
||||||
|
|
||||||
|
for u1, u2, u3, u4 in product(dirs, dirs, dirs, dirs):
|
||||||
|
ups = (u1, u2, u3, u4)
|
||||||
|
if len(set(ups)) < 4:
|
||||||
|
continue # not unique
|
||||||
|
|
||||||
|
t = "e2d:%s dedup:%s vols:%d from:%s to:%s"
|
||||||
|
t = t % (e2d, dedup, len(vols), mv_from, dst)
|
||||||
|
print("\n\n\033[0;7m# files:", ups, t, "\033[0m")
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
self.cinit()
|
||||||
|
|
||||||
|
for up in [u1, u2, u3, u4]:
|
||||||
|
self.do_post(up, "fn", files[0], up == u1)
|
||||||
|
|
||||||
|
restore_args = None
|
||||||
|
if dedup == "sym-no":
|
||||||
|
restore_args = self.args
|
||||||
|
ka = {"e2d": e2d, "no_dedup": True}
|
||||||
|
self.args = Cfg(v=vols[:], a=[], **ka)
|
||||||
|
self.cinit()
|
||||||
|
|
||||||
|
mv_to = mv_from
|
||||||
|
for _ in range(2 if dst.startswith("../") else 1):
|
||||||
|
mv_to = mv_from.rsplit("/", 1)[0] if "/" in mv_from else ""
|
||||||
|
mv_to += "/" + dst.lstrip("./")
|
||||||
|
|
||||||
|
self.do_mv(mv_from, mv_to)
|
||||||
|
|
||||||
|
for dirpath in [u1, u2, u3, u4]:
|
||||||
|
if dirpath == mv_from:
|
||||||
|
dirpath = mv_to
|
||||||
|
elif dirpath.startswith(mv_from):
|
||||||
|
dirpath = mv_to + dirpath[len(mv_from) :]
|
||||||
|
h, b = self.curl(dirpath + "/fn")
|
||||||
|
self.assertEqual(b, "one")
|
||||||
|
|
||||||
|
if restore_args:
|
||||||
|
self.args = restore_args
|
||||||
|
|
||||||
|
def do_mv(self, src, dst):
|
||||||
|
hdr = "POST /%s?move=/%s HTTP/1.1\r\nConnection: close\r\nContent-Length: 0\r\n\r\n"
|
||||||
|
buf = (hdr % (src, dst)).encode("utf-8")
|
||||||
|
print("MV [%s] => [%s]" % (src, dst))
|
||||||
|
HttpCli(self.conn.setbuf(buf)).run()
|
||||||
|
ret = self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1)
|
||||||
|
print("MV <-- ", ret)
|
||||||
|
self.assertIn(" 201 Created", ret[0])
|
||||||
|
self.assertEqual("k\r\n", ret[1])
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def do_post(self, dn, fn, fi, first):
|
||||||
|
print("\n# do_post", self.ctr, repr((dn, fn, fi, first)))
|
||||||
|
self.ctr -= 1
|
||||||
|
|
||||||
|
data, chash, wark = fi
|
||||||
|
hs = self.handshake(dn, fn, fi)
|
||||||
|
self.assertEqual(hs["wark"], wark)
|
||||||
|
|
||||||
|
sfn = hs["name"]
|
||||||
|
if sfn == fn:
|
||||||
|
print("using original name " + fn)
|
||||||
|
else:
|
||||||
|
print(fn + " got renamed to " + sfn)
|
||||||
|
if first:
|
||||||
|
raise Exception("wait what")
|
||||||
|
|
||||||
|
if hs["hash"]:
|
||||||
|
self.assertEqual(hs["hash"][0], chash)
|
||||||
|
self.put_chunk(dn, wark, chash, data)
|
||||||
|
elif first:
|
||||||
|
raise Exception("found first; %r, %r" % ((dn, fn, fi), hs))
|
||||||
|
|
||||||
|
h, b = self.curl("%s/%s" % (dn, sfn))
|
||||||
|
self.assertEqual(b, data)
|
||||||
|
|
||||||
|
def handshake(self, dn, fn, fi):
|
||||||
|
hdr = "POST /%s/ HTTP/1.1\r\nConnection: close\r\nContent-Type: text/plain\r\nContent-Length: %d\r\n\r\n"
|
||||||
|
msg = {"name": fn, "size": 3, "lmod": 1234567890, "life": 0, "hash": [fi[1]]}
|
||||||
|
buf = json.dumps(msg).encode("utf-8")
|
||||||
|
buf = (hdr % (dn, len(buf))).encode("utf-8") + buf
|
||||||
|
print("HS -->", buf)
|
||||||
|
HttpCli(self.conn.setbuf(buf)).run()
|
||||||
|
ret = self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1)
|
||||||
|
print("HS <--", ret)
|
||||||
|
return json.loads(ret[1])
|
||||||
|
|
||||||
|
def put_chunk(self, dn, wark, chash, data):
|
||||||
|
msg = [
|
||||||
|
"POST /%s/ HTTP/1.1" % (dn,),
|
||||||
|
"Connection: close",
|
||||||
|
"Content-Type: application/octet-stream",
|
||||||
|
"Content-Length: 3",
|
||||||
|
"X-Up2k-Hash: " + chash,
|
||||||
|
"X-Up2k-Wark: " + wark,
|
||||||
|
"",
|
||||||
|
data,
|
||||||
|
]
|
||||||
|
buf = "\r\n".join(msg).encode("utf-8")
|
||||||
|
print("PUT -->", buf)
|
||||||
|
HttpCli(self.conn.setbuf(buf)).run()
|
||||||
|
ret = self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1)
|
||||||
|
self.assertEqual(ret[1], "thank")
|
||||||
|
|
||||||
|
def curl(self, url, binary=False):
|
||||||
|
h = "GET /%s HTTP/1.1\r\nConnection: close\r\n\r\n"
|
||||||
|
HttpCli(self.conn.setbuf((h % (url,)).encode("utf-8"))).run()
|
||||||
|
if binary:
|
||||||
|
h, b = self.conn.s._reply.split(b"\r\n\r\n", 1)
|
||||||
|
return [h.decode("utf-8"), b]
|
||||||
|
|
||||||
|
return self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1)
|
||||||
|
|
||||||
|
def log(self, src, msg, c=0):
|
||||||
|
print(msg)
|
Loading…
Reference in a new issue