mirror of
https://github.com/9001/copyparty.git
synced 2025-08-18 01:22:13 -06:00
fix chunkpost-handshake race (affects --no-dedup only);
a handshake arriving in the middle of the final chunk could cause dupes to become empty -- worst case leading to loss of data
This commit is contained in:
parent
0009e31bd3
commit
c98fff1647
|
@ -3,7 +3,7 @@ from __future__ import print_function, unicode_literals
|
||||||
|
|
||||||
"""
|
"""
|
||||||
up2k.py: upload to copyparty
|
up2k.py: upload to copyparty
|
||||||
2023-01-13, v1.2, ed <irc.rizon.net>, MIT-Licensed
|
2023-03-05, v1.3, ed <irc.rizon.net>, MIT-Licensed
|
||||||
https://github.com/9001/copyparty/blob/hovudstraum/bin/up2k.py
|
https://github.com/9001/copyparty/blob/hovudstraum/bin/up2k.py
|
||||||
|
|
||||||
- dependencies: requests
|
- dependencies: requests
|
||||||
|
@ -520,7 +520,11 @@ def handshake(ar, file, search):
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
em = str(ex).split("SSLError(")[-1].split("\nURL: ")[0].strip()
|
em = str(ex).split("SSLError(")[-1].split("\nURL: ")[0].strip()
|
||||||
|
|
||||||
if sc == 422 or "<pre>partial upload exists at a different" in txt:
|
if (
|
||||||
|
sc == 422
|
||||||
|
or "<pre>partial upload exists at a different" in txt
|
||||||
|
or "<pre>source file busy; please try again" in txt
|
||||||
|
):
|
||||||
file.recheck = True
|
file.recheck = True
|
||||||
return [], False
|
return [], False
|
||||||
elif sc == 409 or "<pre>upload rejected, file already exists" in txt:
|
elif sc == 409 or "<pre>upload rejected, file already exists" in txt:
|
||||||
|
|
|
@ -1714,7 +1714,7 @@ class HttpCli(object):
|
||||||
except:
|
except:
|
||||||
raise Pebkac(500, min_ex())
|
raise Pebkac(500, min_ex())
|
||||||
|
|
||||||
x = self.conn.hsrv.broker.ask("up2k.handle_json", body)
|
x = self.conn.hsrv.broker.ask("up2k.handle_json", body, self.u2fh.aps)
|
||||||
ret = x.get()
|
ret = x.get()
|
||||||
if self.is_vproxied:
|
if self.is_vproxied:
|
||||||
if "purl" in ret:
|
if "purl" in ret:
|
||||||
|
@ -1884,17 +1884,10 @@ class HttpCli(object):
|
||||||
with self.mutex:
|
with self.mutex:
|
||||||
self.u2fh.close(path)
|
self.u2fh.close(path)
|
||||||
|
|
||||||
# windows cant rename open files
|
if not num_left and not self.args.nw:
|
||||||
if ANYWIN and path != fin_path and not self.args.nw:
|
self.conn.hsrv.broker.ask(
|
||||||
self.conn.hsrv.broker.ask("up2k.finish_upload", ptop, wark).get()
|
"up2k.finish_upload", ptop, wark, self.u2fh.aps
|
||||||
|
).get()
|
||||||
if not ANYWIN and not num_left:
|
|
||||||
times = (int(time.time()), int(lastmod))
|
|
||||||
self.log("no more chunks, setting times {}".format(times))
|
|
||||||
try:
|
|
||||||
bos.utime(fin_path, times)
|
|
||||||
except:
|
|
||||||
self.log("failed to utime ({}, {})".format(fin_path, times))
|
|
||||||
|
|
||||||
cinf = self.headers.get("x-up2k-stat", "")
|
cinf = self.headers.get("x-up2k-stat", "")
|
||||||
|
|
||||||
|
|
|
@ -149,12 +149,9 @@ class SvcHub(object):
|
||||||
self.log("root", t.format(args.j))
|
self.log("root", t.format(args.j))
|
||||||
|
|
||||||
if not args.no_fpool and args.j != 1:
|
if not args.no_fpool and args.j != 1:
|
||||||
t = "WARNING: --use-fpool combined with multithreading is untested and can probably cause undefined behavior"
|
t = "WARNING: ignoring --use-fpool because multithreading (-j{}) is enabled"
|
||||||
if ANYWIN:
|
self.log("root", t.format(args.j), c=3)
|
||||||
t = 'windows cannot do multithreading without --no-fpool, so enabling that -- note that upload performance will suffer if you have microsoft defender "real-time protection" enabled, so you probably want to use -j 1 instead'
|
args.no_fpool = True
|
||||||
args.no_fpool = True
|
|
||||||
|
|
||||||
self.log("root", t, c=3)
|
|
||||||
|
|
||||||
bri = "zy"[args.theme % 2 :][:1]
|
bri = "zy"[args.theme % 2 :][:1]
|
||||||
ch = "abcdefghijklmnopqrstuvwx"[int(args.theme / 2)]
|
ch = "abcdefghijklmnopqrstuvwx"[int(args.theme / 2)]
|
||||||
|
|
|
@ -124,6 +124,7 @@ class Up2k(object):
|
||||||
self.droppable: dict[str, list[str]] = {}
|
self.droppable: dict[str, list[str]] = {}
|
||||||
self.volstate: dict[str, str] = {}
|
self.volstate: dict[str, str] = {}
|
||||||
self.vol_act: dict[str, float] = {}
|
self.vol_act: dict[str, float] = {}
|
||||||
|
self.busy_aps: set[str] = set()
|
||||||
self.dupesched: dict[str, list[tuple[str, str, float]]] = {}
|
self.dupesched: dict[str, list[tuple[str, str, float]]] = {}
|
||||||
self.snap_persist_interval = 300 # persist unfinished index every 5 min
|
self.snap_persist_interval = 300 # persist unfinished index every 5 min
|
||||||
self.snap_discard_interval = 21600 # drop unfinished after 6 hours inactivity
|
self.snap_discard_interval = 21600 # drop unfinished after 6 hours inactivity
|
||||||
|
@ -161,12 +162,6 @@ class Up2k(object):
|
||||||
t = "could not initialize sqlite3, will use in-memory registry only"
|
t = "could not initialize sqlite3, will use in-memory registry only"
|
||||||
self.log(t, 3)
|
self.log(t, 3)
|
||||||
|
|
||||||
if ANYWIN:
|
|
||||||
# usually fails to set lastmod too quickly
|
|
||||||
self.lastmod_q: list[tuple[str, int, tuple[int, int], bool]] = []
|
|
||||||
self.lastmod_q2 = self.lastmod_q[:]
|
|
||||||
Daemon(self._lastmodder, "up2k-lastmod")
|
|
||||||
|
|
||||||
self.fstab = Fstab(self.log_func)
|
self.fstab = Fstab(self.log_func)
|
||||||
self.gen_fk = self._gen_fk if self.args.log_fk else gen_filekey
|
self.gen_fk = self._gen_fk if self.args.log_fk else gen_filekey
|
||||||
|
|
||||||
|
@ -2113,7 +2108,8 @@ class Up2k(object):
|
||||||
if cj["ptop"] not in self.registry:
|
if cj["ptop"] not in self.registry:
|
||||||
raise Pebkac(410, "location unavailable")
|
raise Pebkac(410, "location unavailable")
|
||||||
|
|
||||||
def handle_json(self, cj: dict[str, Any]) -> dict[str, Any]:
|
def handle_json(self, cj: dict[str, Any], busy_aps: set[str]) -> dict[str, Any]:
|
||||||
|
self.busy_aps = busy_aps
|
||||||
try:
|
try:
|
||||||
# bit expensive; 3.9=10x 3.11=2x
|
# bit expensive; 3.9=10x 3.11=2x
|
||||||
if self.mutex.acquire(timeout=10):
|
if self.mutex.acquire(timeout=10):
|
||||||
|
@ -2287,6 +2283,13 @@ class Up2k(object):
|
||||||
else:
|
else:
|
||||||
# symlink to the client-provided name,
|
# symlink to the client-provided name,
|
||||||
# returning the previous upload info
|
# returning the previous upload info
|
||||||
|
if src in self.busy_aps or (
|
||||||
|
wark in reg and "done" not in reg[wark]
|
||||||
|
):
|
||||||
|
raise Pebkac(
|
||||||
|
422, "source file busy; please try again later"
|
||||||
|
)
|
||||||
|
|
||||||
job = deepcopy(job)
|
job = deepcopy(job)
|
||||||
job["wark"] = wark
|
job["wark"] = wark
|
||||||
job["at"] = cj.get("at") or time.time()
|
job["at"] = cj.get("at") or time.time()
|
||||||
|
@ -2505,10 +2508,7 @@ class Up2k(object):
|
||||||
|
|
||||||
if lmod and (not linked or SYMTIME):
|
if lmod and (not linked or SYMTIME):
|
||||||
times = (int(time.time()), int(lmod))
|
times = (int(time.time()), int(lmod))
|
||||||
if ANYWIN:
|
bos.utime(dst, times, False)
|
||||||
self.lastmod_q.append((dst, 0, times, False))
|
|
||||||
else:
|
|
||||||
bos.utime(dst, times, False)
|
|
||||||
|
|
||||||
def handle_chunk(
|
def handle_chunk(
|
||||||
self, ptop: str, wark: str, chash: str
|
self, ptop: str, wark: str, chash: str
|
||||||
|
@ -2589,13 +2589,10 @@ class Up2k(object):
|
||||||
self.regdrop(ptop, wark)
|
self.regdrop(ptop, wark)
|
||||||
return ret, dst
|
return ret, dst
|
||||||
|
|
||||||
# windows cant rename open files
|
|
||||||
if not ANYWIN or src == dst:
|
|
||||||
self._finish_upload(ptop, wark)
|
|
||||||
|
|
||||||
return ret, dst
|
return ret, dst
|
||||||
|
|
||||||
def finish_upload(self, ptop: str, wark: str) -> None:
|
def finish_upload(self, ptop: str, wark: str, busy_aps: set[str]) -> None:
|
||||||
|
self.busy_aps = busy_aps
|
||||||
with self.mutex:
|
with self.mutex:
|
||||||
self._finish_upload(ptop, wark)
|
self._finish_upload(ptop, wark)
|
||||||
|
|
||||||
|
@ -2608,6 +2605,10 @@ class Up2k(object):
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
raise Pebkac(500, "finish_upload, wark, " + repr(ex))
|
raise Pebkac(500, "finish_upload, wark, " + repr(ex))
|
||||||
|
|
||||||
|
if job["need"]:
|
||||||
|
t = "finish_upload {} with remaining chunks {}"
|
||||||
|
raise Pebkac(500, t.format(wark, job["need"]))
|
||||||
|
|
||||||
# self.log("--- " + wark + " " + dst + " finish_upload atomic " + dst, 4)
|
# self.log("--- " + wark + " " + dst + " finish_upload atomic " + dst, 4)
|
||||||
atomic_move(src, dst)
|
atomic_move(src, dst)
|
||||||
|
|
||||||
|
@ -2615,14 +2616,15 @@ class Up2k(object):
|
||||||
vflags = self.flags[ptop]
|
vflags = self.flags[ptop]
|
||||||
|
|
||||||
times = (int(time.time()), int(job["lmod"]))
|
times = (int(time.time()), int(job["lmod"]))
|
||||||
if ANYWIN:
|
self.log(
|
||||||
z1 = (dst, job["size"], times, job["sprs"])
|
"no more chunks, setting times {} ({}) on {}".format(
|
||||||
self.lastmod_q.append(z1)
|
times, bos.path.getsize(dst), dst
|
||||||
elif not job["hash"]:
|
)
|
||||||
try:
|
)
|
||||||
bos.utime(dst, times)
|
try:
|
||||||
except:
|
bos.utime(dst, times)
|
||||||
pass
|
except:
|
||||||
|
self.log("failed to utime ({}, {})".format(dst, times))
|
||||||
|
|
||||||
zs = "prel name lmod size ptop vtop wark host user addr"
|
zs = "prel name lmod size ptop vtop wark host user addr"
|
||||||
z2 = [job[x] for x in zs.split()]
|
z2 = [job[x] for x in zs.split()]
|
||||||
|
@ -2643,6 +2645,7 @@ class Up2k(object):
|
||||||
if self.idx_wark(vflags, *z2):
|
if self.idx_wark(vflags, *z2):
|
||||||
del self.registry[ptop][wark]
|
del self.registry[ptop][wark]
|
||||||
else:
|
else:
|
||||||
|
self.registry[ptop][wark]["done"] = 1
|
||||||
self.regdrop(ptop, wark)
|
self.regdrop(ptop, wark)
|
||||||
|
|
||||||
if wake_sr:
|
if wake_sr:
|
||||||
|
@ -3426,27 +3429,6 @@ class Up2k(object):
|
||||||
if not job["hash"]:
|
if not job["hash"]:
|
||||||
self._finish_upload(job["ptop"], job["wark"])
|
self._finish_upload(job["ptop"], job["wark"])
|
||||||
|
|
||||||
def _lastmodder(self) -> None:
|
|
||||||
while True:
|
|
||||||
ready = self.lastmod_q2
|
|
||||||
self.lastmod_q2 = self.lastmod_q
|
|
||||||
self.lastmod_q = []
|
|
||||||
|
|
||||||
time.sleep(1)
|
|
||||||
for path, sz, times, sparse in ready:
|
|
||||||
self.log("lmod: setting times {} on {}".format(times, path))
|
|
||||||
try:
|
|
||||||
bos.utime(path, times, False)
|
|
||||||
except:
|
|
||||||
t = "lmod: failed to utime ({}, {}):\n{}"
|
|
||||||
self.log(t.format(path, times, min_ex()))
|
|
||||||
|
|
||||||
if sparse and self.args.sparse and self.args.sparse * 1024 * 1024 <= sz:
|
|
||||||
try:
|
|
||||||
sp.check_call(["fsutil", "sparse", "setflag", path, "0"])
|
|
||||||
except:
|
|
||||||
self.log("could not unsparse [{}]".format(path), 3)
|
|
||||||
|
|
||||||
def _snapshot(self) -> None:
|
def _snapshot(self) -> None:
|
||||||
slp = self.snap_persist_interval
|
slp = self.snap_persist_interval
|
||||||
while True:
|
while True:
|
||||||
|
|
|
@ -668,6 +668,7 @@ class FHC(object):
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.cache: dict[str, FHC.CE] = {}
|
self.cache: dict[str, FHC.CE] = {}
|
||||||
|
self.aps: set[str] = set()
|
||||||
|
|
||||||
def close(self, path: str) -> None:
|
def close(self, path: str) -> None:
|
||||||
try:
|
try:
|
||||||
|
@ -679,6 +680,7 @@ class FHC(object):
|
||||||
fh.close()
|
fh.close()
|
||||||
|
|
||||||
del self.cache[path]
|
del self.cache[path]
|
||||||
|
self.aps.remove(path)
|
||||||
|
|
||||||
def clean(self) -> None:
|
def clean(self) -> None:
|
||||||
if not self.cache:
|
if not self.cache:
|
||||||
|
@ -699,6 +701,7 @@ class FHC(object):
|
||||||
return self.cache[path].fhs.pop()
|
return self.cache[path].fhs.pop()
|
||||||
|
|
||||||
def put(self, path: str, fh: typing.BinaryIO) -> None:
|
def put(self, path: str, fh: typing.BinaryIO) -> None:
|
||||||
|
self.aps.add(path)
|
||||||
try:
|
try:
|
||||||
ce = self.cache[path]
|
ce = self.cache[path]
|
||||||
ce.fhs.append(fh)
|
ce.fhs.append(fh)
|
||||||
|
|
|
@ -2382,16 +2382,17 @@ function up2k_init(subtle) {
|
||||||
}
|
}
|
||||||
|
|
||||||
var err_pend = rsp.indexOf('partial upload exists at a different') + 1,
|
var err_pend = rsp.indexOf('partial upload exists at a different') + 1,
|
||||||
|
err_srcb = rsp.indexOf('source file busy; please try again') + 1,
|
||||||
err_plug = rsp.indexOf('upload blocked by x') + 1,
|
err_plug = rsp.indexOf('upload blocked by x') + 1,
|
||||||
err_dupe = rsp.indexOf('upload rejected, file already exists') + 1;
|
err_dupe = rsp.indexOf('upload rejected, file already exists') + 1;
|
||||||
|
|
||||||
if (err_pend || err_plug || err_dupe) {
|
if (err_pend || err_srcb || err_plug || err_dupe) {
|
||||||
err = rsp;
|
err = rsp;
|
||||||
ofs = err.indexOf('\n/');
|
ofs = err.indexOf('\n/');
|
||||||
if (ofs !== -1) {
|
if (ofs !== -1) {
|
||||||
err = err.slice(0, ofs + 1) + linksplit(err.slice(ofs + 2).trimEnd()).join(' ');
|
err = err.slice(0, ofs + 1) + linksplit(err.slice(ofs + 2).trimEnd()).join(' ');
|
||||||
}
|
}
|
||||||
if (!t.rechecks && err_pend) {
|
if (!t.rechecks && (err_pend || err_srcb)) {
|
||||||
t.rechecks = 0;
|
t.rechecks = 0;
|
||||||
t.want_recheck = true;
|
t.want_recheck = true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue