mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
fix mv with --no-dedup
in volumes with dupes;
if --no-dedup was enabled in a volume which already contained symlinked duplicate files, renaming/moving folders could fail this is due to folder contents being moved one file at a time (which is how symlink breakage is prevented) except the links are moved assuming the final directory layout, meaning they may be intermittently broken during the movie with no-dedup, the symlinks are converted into full files as each symlink is encountered, but a temporarily broken symlink would crash the procedure fix this by giving `_symlink` a new parameter `fsrc` which is a known valid inode for data copying purposes
This commit is contained in:
parent
6e671c5245
commit
4401de0413
|
@ -133,8 +133,8 @@ flagcats = {
|
|||
"nodupe": "rejects existing files (instead of symlinking them)",
|
||||
"hardlink": "does dedup with hardlinks instead of symlinks",
|
||||
"neversymlink": "disables symlink fallback; full copy instead",
|
||||
"safededup": "verify on-disk data before using it for dedup",
|
||||
"copydupes": "disables dedup, always saves full copies of dupes",
|
||||
"safededup": "verify on-disk data before using it for dedup",
|
||||
"sparse": "force use of sparse files, mainly for s3-backed storage",
|
||||
"daw": "enable full WebDAV write support (dangerous);\nPUT-operations will now \033[1;31mOVERWRITE\033[0;35m existing files",
|
||||
"nosub": "forces all uploads into the top folder of the vfs",
|
||||
|
|
|
@ -3111,9 +3111,17 @@ class Up2k(object):
|
|||
verbose: bool = True,
|
||||
rm: bool = False,
|
||||
lmod: float = 0,
|
||||
fsrc: Optional[str] = None,
|
||||
) -> None:
|
||||
if src == dst or (fsrc and fsrc == dst):
|
||||
t = "symlinking a file to itself?? orig(%s) fsrc(%s) link(%s)"
|
||||
raise Exception(t % (src, fsrc, dst))
|
||||
|
||||
if verbose:
|
||||
self.log("linking dupe:\n {0}\n {1}".format(src, dst))
|
||||
t = "linking dupe:\n point-to: {0}\n link-loc: {1}"
|
||||
if fsrc:
|
||||
t += "\n data-src: {2}"
|
||||
self.log(t.format(src, dst, fsrc))
|
||||
|
||||
if self.args.nw:
|
||||
return
|
||||
|
@ -3121,7 +3129,7 @@ class Up2k(object):
|
|||
linked = False
|
||||
try:
|
||||
if "copydupes" in flags:
|
||||
raise Exception("disabled in config")
|
||||
raise Exception("dedup is disabled in config")
|
||||
|
||||
lsrc = src
|
||||
ldst = dst
|
||||
|
@ -3177,7 +3185,15 @@ class Up2k(object):
|
|||
linked = True
|
||||
except Exception as ex:
|
||||
self.log("cannot link; creating copy: " + repr(ex))
|
||||
shutil.copy2(fsenc(src), fsenc(dst))
|
||||
if bos.path.isfile(src):
|
||||
csrc = src
|
||||
elif fsrc and bos.path.isfile(fsrc):
|
||||
csrc = fsrc
|
||||
else:
|
||||
t = "BUG: no valid sources to link from! orig(%s) fsrc(%s) link(%s)"
|
||||
self.log(t, 1)
|
||||
raise Exception(t % (src, fsrc, dst))
|
||||
shutil.copy2(fsenc(csrc), fsenc(dst))
|
||||
|
||||
if lmod and (not linked or SYMTIME):
|
||||
times = (int(time.time()), int(lmod))
|
||||
|
@ -4258,7 +4274,13 @@ class Up2k(object):
|
|||
except:
|
||||
pass
|
||||
|
||||
self._symlink(dabs, alink, flags, False, lmod=lmod or 0)
|
||||
# this creates a link pointing from dabs to alink; alink may
|
||||
# not exist yet, which becomes problematic if the symlinking
|
||||
# fails and it has to fall back on hardlinking/copying files
|
||||
# (for example --no-dedup in a volume with symlinked dupes);
|
||||
# fsrc=sabs is then a source that currently resolves to copy
|
||||
|
||||
self._symlink(dabs, alink, flags, False, lmod=lmod or 0, fsrc=sabs)
|
||||
|
||||
return len(full) + len(links)
|
||||
|
||||
|
|
|
@ -51,15 +51,21 @@ class TestDedup(unittest.TestCase):
|
|||
]
|
||||
# (data, chash, wark)
|
||||
|
||||
# 3072 uploads in total
|
||||
self.ctr = 3072
|
||||
self.ctr = 336 if quick else 2016 # estimated total num uploads
|
||||
self.conn = None
|
||||
fstab = None
|
||||
for e2d in [True, False]:
|
||||
self.args = Cfg(v=[".::A"], a=[], e2d=e2d)
|
||||
for dn1, fn1, f1 in product(dirnames, filenames, files):
|
||||
cm1 = (dn1, fn1, f1)
|
||||
for dn2, fn2, f2 in product(dirnames, filenames, files):
|
||||
cm2 = (dn2, fn2, f2)
|
||||
if cm1 == cm2:
|
||||
continue
|
||||
for dn3, fn3, f3 in product(dirnames, filenames, files):
|
||||
cm3 = (dn3, fn3, f3)
|
||||
if cm3 in (cm1, cm2):
|
||||
continue
|
||||
self.reset()
|
||||
if self.conn:
|
||||
fstab = self.conn.hsrv.hub.up2k.fstab
|
||||
|
|
198
tests/test_mv.py
Normal file
198
tests/test_mv.py
Normal file
|
@ -0,0 +1,198 @@
|
|||
#!/usr/bin/env python3
|
||||
# coding: utf-8
|
||||
from __future__ import print_function, unicode_literals
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
from itertools import product
|
||||
|
||||
from copyparty.__init__ import PY2
|
||||
from copyparty.authsrv import AuthSrv
|
||||
from copyparty.httpcli import HttpCli
|
||||
from tests import util as tu
|
||||
from tests.util import Cfg
|
||||
|
||||
"""
|
||||
TODO inject tags into db and verify ls
|
||||
"""
|
||||
|
||||
|
||||
class TestDedup(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.td = tu.get_ramdisk()
|
||||
|
||||
def tearDown(self):
|
||||
os.chdir(tempfile.gettempdir())
|
||||
shutil.rmtree(self.td)
|
||||
|
||||
def reset(self):
|
||||
td = os.path.join(self.td, "vfs")
|
||||
if os.path.exists(td):
|
||||
shutil.rmtree(td)
|
||||
os.mkdir(td)
|
||||
os.chdir(td)
|
||||
return td
|
||||
|
||||
def cinit(self):
|
||||
if self.conn:
|
||||
self.fstab = self.conn.hsrv.hub.up2k.fstab
|
||||
self.conn.hsrv.hub.up2k.shutdown()
|
||||
self.asrv = AuthSrv(self.args, self.log)
|
||||
self.conn = tu.VHttpConn(self.args, self.asrv, self.log, b"", True)
|
||||
if self.fstab:
|
||||
self.conn.hsrv.hub.up2k.fstab = self.fstab
|
||||
|
||||
def test(self):
|
||||
if PY2:
|
||||
raise unittest.SkipTest()
|
||||
|
||||
# tc_e2d = [True, False] # maybe-TODO only known symlinks are translated
|
||||
tc_e2d = [True]
|
||||
tc_dedup = ["sym", "no", "sym-no"]
|
||||
tc_vols = [["::A"], ["::A", "d1:d1:A"]]
|
||||
dirs = ["d1", "d1/d2", "d1/d2/d3", "d1/d4"]
|
||||
files = [
|
||||
(
|
||||
"one",
|
||||
"BfcDQQeKz2oG1CPSFyD5ZD1flTYm2IoCY23DqeeVgq6w",
|
||||
"XMbpLRqVdtGmgggqjUI6uSoNMTqZVX4K6zr74XA1BRKc",
|
||||
)
|
||||
]
|
||||
# (data, chash, wark)
|
||||
|
||||
self.conn = None
|
||||
self.fstab = None
|
||||
self.ctr = 0 # 2304
|
||||
tcgen = product(tc_e2d, tc_dedup, tc_vols, dirs, ["d9", "../d9"])
|
||||
for e2d, dedup, vols, mv_from, dst in tcgen:
|
||||
if "/" not in mv_from and dst.startswith(".."):
|
||||
continue # would move past top of fs
|
||||
if len(vols) > 1 and mv_from == "d1":
|
||||
continue # cannot move a vol
|
||||
|
||||
# print(e2d, dedup, vols, mv_from, dst)
|
||||
ka = {"e2d": e2d}
|
||||
if dedup == "hard":
|
||||
ka["hardlink"] = True
|
||||
elif dedup == "no":
|
||||
ka["no_dedup"] = True
|
||||
self.args = Cfg(v=vols[:], a=[], **ka)
|
||||
|
||||
for u1, u2, u3, u4 in product(dirs, dirs, dirs, dirs):
|
||||
ups = (u1, u2, u3, u4)
|
||||
if len(set(ups)) < 4:
|
||||
continue # not unique
|
||||
|
||||
t = "e2d:%s dedup:%s vols:%d from:%s to:%s"
|
||||
t = t % (e2d, dedup, len(vols), mv_from, dst)
|
||||
print("\n\n\033[0;7m# files:", ups, t, "\033[0m")
|
||||
|
||||
self.reset()
|
||||
self.cinit()
|
||||
|
||||
for up in [u1, u2, u3, u4]:
|
||||
self.do_post(up, "fn", files[0], up == u1)
|
||||
|
||||
restore_args = None
|
||||
if dedup == "sym-no":
|
||||
restore_args = self.args
|
||||
ka = {"e2d": e2d, "no_dedup": True}
|
||||
self.args = Cfg(v=vols[:], a=[], **ka)
|
||||
self.cinit()
|
||||
|
||||
mv_to = mv_from
|
||||
for _ in range(2 if dst.startswith("../") else 1):
|
||||
mv_to = mv_from.rsplit("/", 1)[0] if "/" in mv_from else ""
|
||||
mv_to += "/" + dst.lstrip("./")
|
||||
|
||||
self.do_mv(mv_from, mv_to)
|
||||
|
||||
for dirpath in [u1, u2, u3, u4]:
|
||||
if dirpath == mv_from:
|
||||
dirpath = mv_to
|
||||
elif dirpath.startswith(mv_from):
|
||||
dirpath = mv_to + dirpath[len(mv_from) :]
|
||||
h, b = self.curl(dirpath + "/fn")
|
||||
self.assertEqual(b, "one")
|
||||
|
||||
if restore_args:
|
||||
self.args = restore_args
|
||||
|
||||
def do_mv(self, src, dst):
|
||||
hdr = "POST /%s?move=/%s HTTP/1.1\r\nConnection: close\r\nContent-Length: 0\r\n\r\n"
|
||||
buf = (hdr % (src, dst)).encode("utf-8")
|
||||
print("MV [%s] => [%s]" % (src, dst))
|
||||
HttpCli(self.conn.setbuf(buf)).run()
|
||||
ret = self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1)
|
||||
print("MV <-- ", ret)
|
||||
self.assertIn(" 201 Created", ret[0])
|
||||
self.assertEqual("k\r\n", ret[1])
|
||||
return ret
|
||||
|
||||
def do_post(self, dn, fn, fi, first):
|
||||
print("\n# do_post", self.ctr, repr((dn, fn, fi, first)))
|
||||
self.ctr -= 1
|
||||
|
||||
data, chash, wark = fi
|
||||
hs = self.handshake(dn, fn, fi)
|
||||
self.assertEqual(hs["wark"], wark)
|
||||
|
||||
sfn = hs["name"]
|
||||
if sfn == fn:
|
||||
print("using original name " + fn)
|
||||
else:
|
||||
print(fn + " got renamed to " + sfn)
|
||||
if first:
|
||||
raise Exception("wait what")
|
||||
|
||||
if hs["hash"]:
|
||||
self.assertEqual(hs["hash"][0], chash)
|
||||
self.put_chunk(dn, wark, chash, data)
|
||||
elif first:
|
||||
raise Exception("found first; %r, %r" % ((dn, fn, fi), hs))
|
||||
|
||||
h, b = self.curl("%s/%s" % (dn, sfn))
|
||||
self.assertEqual(b, data)
|
||||
|
||||
def handshake(self, dn, fn, fi):
|
||||
hdr = "POST /%s/ HTTP/1.1\r\nConnection: close\r\nContent-Type: text/plain\r\nContent-Length: %d\r\n\r\n"
|
||||
msg = {"name": fn, "size": 3, "lmod": 1234567890, "life": 0, "hash": [fi[1]]}
|
||||
buf = json.dumps(msg).encode("utf-8")
|
||||
buf = (hdr % (dn, len(buf))).encode("utf-8") + buf
|
||||
print("HS -->", buf)
|
||||
HttpCli(self.conn.setbuf(buf)).run()
|
||||
ret = self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1)
|
||||
print("HS <--", ret)
|
||||
return json.loads(ret[1])
|
||||
|
||||
def put_chunk(self, dn, wark, chash, data):
|
||||
msg = [
|
||||
"POST /%s/ HTTP/1.1" % (dn,),
|
||||
"Connection: close",
|
||||
"Content-Type: application/octet-stream",
|
||||
"Content-Length: 3",
|
||||
"X-Up2k-Hash: " + chash,
|
||||
"X-Up2k-Wark: " + wark,
|
||||
"",
|
||||
data,
|
||||
]
|
||||
buf = "\r\n".join(msg).encode("utf-8")
|
||||
print("PUT -->", buf)
|
||||
HttpCli(self.conn.setbuf(buf)).run()
|
||||
ret = self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1)
|
||||
self.assertEqual(ret[1], "thank")
|
||||
|
||||
def curl(self, url, binary=False):
|
||||
h = "GET /%s HTTP/1.1\r\nConnection: close\r\n\r\n"
|
||||
HttpCli(self.conn.setbuf((h % (url,)).encode("utf-8"))).run()
|
||||
if binary:
|
||||
h, b = self.conn.s._reply.split(b"\r\n\r\n", 1)
|
||||
return [h.decode("utf-8"), b]
|
||||
|
||||
return self.conn.s._reply.decode("utf-8").split("\r\n\r\n", 1)
|
||||
|
||||
def log(self, src, msg, c=0):
|
||||
print(msg)
|
Loading…
Reference in a new issue