From 6f8a588c4d473069baa4e42c1caef028a6bf4e98 Mon Sep 17 00:00:00 2001 From: ed Date: Tue, 13 Feb 2024 19:24:06 +0000 Subject: [PATCH] up2k: fix a mostly-harmless race as each chunk is written to the file, httpcli calls up2k.confirm_chunk to register the chunk as completed, and the reply indicates whether that was the final outstanding chunk, in which case httpcli closes the file descriptors since there's nothing more to write the issue is that the final chunk is registered as completed before the file descriptors are closed, meaning there could be writes that haven't finished flushing to disk yet if the client decides to issue another handshake during this window, up2k sees that all chunks are complete and calls up2k.finish_upload even as some threads might still be flushing the final writes to disk so the conditions to hit this bug were as follows (all must be true): * multiprocessing is disabled * there is a reverse-proxy * a client has several idle connections and reuses one of those * the server's filesystem is EXTREMELY slow, to the point where closing a file takes over 30 seconds the fix is to stop handshakes from being processed while a file is being closed, which is unfortunately a small bottleneck in that it prohibits initiating another upload while one is being finalized, but the required complexity to handle this better is probably not worth it (a separate mutex for each upload session or something like that) this issue is mostly harmless, partially because it is super tricky to hit (only aware of it happening synthetically), and because there is usually no harmful consequences; the worst-case is if this were to happen exactly as the server OS decides to crash, which would make the file appear to be fully uploaded even though it's missing some data (all extremely unlikely, but not impossible) there is no performance impact; if anything it should now accept new tcp connections slightly faster thanks to more granular locking --- copyparty/httpcli.py | 15 +++++++++------ copyparty/httpconn.py | 2 +- copyparty/httpsrv.py | 3 ++- tests/util.py | 1 + 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index ef7b7908..153a6763 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -115,7 +115,7 @@ class HttpCli(object): self.t0 = time.time() self.conn = conn - self.mutex = conn.mutex # mypy404 + self.u2mutex = conn.u2mutex # mypy404 self.s = conn.s self.sr = conn.sr self.ip = conn.addr[0] @@ -1988,8 +1988,11 @@ class HttpCli(object): except: raise Pebkac(500, min_ex()) - x = self.conn.hsrv.broker.ask("up2k.handle_json", body, self.u2fh.aps) - ret = x.get() + # not to protect u2fh, but to prevent handshakes while files are closing + with self.u2mutex: + x = self.conn.hsrv.broker.ask("up2k.handle_json", body, self.u2fh.aps) + ret = x.get() + if self.is_vproxied: if "purl" in ret: ret["purl"] = self.args.SR + ret["purl"] @@ -2094,7 +2097,7 @@ class HttpCli(object): f = None fpool = not self.args.no_fpool and sprs if fpool: - with self.mutex: + with self.u2mutex: try: f = self.u2fh.pop(path) except: @@ -2137,7 +2140,7 @@ class HttpCli(object): if not fpool: f.close() else: - with self.mutex: + with self.u2mutex: self.u2fh.put(path, f) except: # maybe busted handle (eg. disk went full) @@ -2156,7 +2159,7 @@ class HttpCli(object): return False if not num_left and fpool: - with self.mutex: + with self.u2mutex: self.u2fh.close(path) if not num_left and not self.args.nw: diff --git a/copyparty/httpconn.py b/copyparty/httpconn.py index 90f40a93..bf3690ab 100644 --- a/copyparty/httpconn.py +++ b/copyparty/httpconn.py @@ -50,7 +50,7 @@ class HttpConn(object): self.addr = addr self.hsrv = hsrv - self.mutex: threading.Lock = hsrv.mutex # mypy404 + self.u2mutex: threading.Lock = hsrv.u2mutex # mypy404 self.args: argparse.Namespace = hsrv.args # mypy404 self.E: EnvParams = self.args.E self.asrv: AuthSrv = hsrv.asrv # mypy404 diff --git a/copyparty/httpsrv.py b/copyparty/httpsrv.py index 6e7daf9e..40bd108f 100644 --- a/copyparty/httpsrv.py +++ b/copyparty/httpsrv.py @@ -117,6 +117,7 @@ class HttpSrv(object): self.bound: set[tuple[str, int]] = set() self.name = "hsrv" + nsuf self.mutex = threading.Lock() + self.u2mutex = threading.Lock() self.stopping = False self.tp_nthr = 0 # actual @@ -220,7 +221,7 @@ class HttpSrv(object): def periodic(self) -> None: while True: time.sleep(2 if self.tp_ncli or self.ncli else 10) - with self.mutex: + with self.u2mutex, self.mutex: self.u2fh.clean() if self.tp_q: self.tp_ncli = max(self.ncli, self.tp_ncli - 2) diff --git a/tests/util.py b/tests/util.py index a91c1cce..3f0967ee 100644 --- a/tests/util.py +++ b/tests/util.py @@ -243,6 +243,7 @@ class VHttpConn(object): self.log_func = log self.log_src = "a" self.mutex = threading.Lock() + self.u2mutex = threading.Lock() self.nbyte = 0 self.nid = None self.nreq = -1