From 19bcf90e9f0a30afc773a884cb2ee91493336fcb Mon Sep 17 00:00:00 2001 From: ed Date: Sat, 12 Dec 2020 00:35:54 +0100 Subject: [PATCH] support uploads with huge filenames --- copyparty/httpcli.py | 30 ++++++++++-------- copyparty/up2k.py | 27 ++++++++-------- copyparty/util.py | 73 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 26 deletions(-) diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index 1f8fb7e1..248901a4 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -568,24 +568,24 @@ class HttpCli(object): self.log("discarding incoming file without filename") # fallthrough - fn = os.devnull if p_file and not nullwrite: fdir = os.path.join(vfs.realpath, rem) - fn = os.path.join(fdir, sanitize_fn(p_file)) + fname = sanitize_fn(p_file) if not os.path.isdir(fsenc(fdir)): raise Pebkac(404, "that folder does not exist") - # TODO broker which avoid this race and - # provides a new filename if taken (same as up2k) - if os.path.exists(fsenc(fn)): - fn += ".{:.6f}-{}".format(time.time(), self.addr[0]) - # using current-time instead of t0 cause clients - # may reuse a name for multiple files in one post + suffix = ".{:.6f}-{}".format(time.time(), self.addr[0]) + open_args = {"fdir": fdir, "suffix": suffix} + else: + open_args = {} + fname = os.devnull + fdir = "" try: - with open(fsenc(fn), "wb") as f: - self.log("writing to {0}".format(fn)) + with ren_open(fname, "wb", **open_args) as f: + f, fname = f["orz"] + self.log("writing to {}/{}".format(fdir, fname)) sz, sha512_hex, _ = hashcopy(self.conn, p_data, f) if sz == 0: raise Pebkac(400, "empty files in post") @@ -594,8 +594,14 @@ class HttpCli(object): self.conn.nbyte += sz except Pebkac: - if fn != os.devnull: - os.rename(fsenc(fn), fsenc(fn + ".PARTIAL")) + if fname != os.devnull: + fp = os.path.join(fdir, fname) + suffix = ".PARTIAL" + try: + os.rename(fsenc(fp), fsenc(fp + suffix)) + except: + fp = fp[: -len(suffix)] + os.rename(fsenc(fp), fsenc(fp + suffix)) raise diff --git a/copyparty/up2k.py b/copyparty/up2k.py index b3dc1271..198a9c3a 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -13,7 +13,7 @@ import threading from copy import deepcopy from .__init__ import WINDOWS -from .util import Pebkac, Queue, fsenc, sanitize_fn +from .util import Pebkac, Queue, fsenc, sanitize_fn, ren_open class Up2k(object): @@ -68,9 +68,11 @@ class Up2k(object): # symlink to the client-provided name, # returning the previous upload info job = deepcopy(job) - suffix = self._suffix(dst, now, job["addr"]) - job["name"] = cj["name"] + suffix - self._symlink(src, dst + suffix) + job["rdir"] = cj["rdir"] + job["name"] = self._untaken(cj["rdir"], cj["name"], now, cj["addr"]) + dst = os.path.join(job["rdir"], job["name"]) + os.unlink(fsenc(dst)) # TODO ed pls + self._symlink(src, dst) else: job = { "wark": wark, @@ -85,9 +87,6 @@ class Up2k(object): "hash": deepcopy(cj["hash"]), } - path = os.path.join(job["rdir"], job["name"]) - job["name"] += self._suffix(path, now, cj["addr"]) - # one chunk may occur multiple times in a file; # filter to unique values for the list of missing chunks # (preserve order to reduce disk thrashing) @@ -108,13 +107,12 @@ class Up2k(object): "wark": wark, } - def _suffix(self, fpath, ts, ip): + def _untaken(self, fdir, fname, ts, ip): # TODO broker which avoid this race and # provides a new filename if taken (same as bup) - if not os.path.exists(fsenc(fpath)): - return "" - - return ".{:.6f}-{}".format(ts, ip) + suffix = ".{:.6f}-{}".format(ts, ip) + with ren_open(fname, "wb", fdir=fdir, suffix=suffix) as f: + return f["orz"][1] def _symlink(self, src, dst): # TODO store this in linktab so we never delete src if there are links to it @@ -218,8 +216,9 @@ class Up2k(object): def _new_upload(self, job): self.registry[job["wark"]] = job - path = os.path.join(job["rdir"], job["name"]) - with open(fsenc(path), "wb") as f: + suffix = ".{:.6f}-{}".format(job["t0"], job["addr"]) + with ren_open(job["name"], "wb", fdir=job["rdir"], suffix=suffix) as f: + f, job["name"] = f["orz"] f.seek(job["size"] - 1) f.write(b"e") diff --git a/copyparty/util.py b/copyparty/util.py index b1cab228..f735a55b 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -2,6 +2,7 @@ from __future__ import print_function, unicode_literals import re +import os import sys import time import base64 @@ -10,6 +11,7 @@ import hashlib import platform import threading import mimetypes +import contextlib import subprocess as sp # nosec from .__init__ import PY2, WINDOWS @@ -96,6 +98,77 @@ class Unrecv(object): self.buf = buf + self.buf +@contextlib.contextmanager +def ren_open(fname, *args, fdir=None, suffix=None, **kwargs): + if hasattr(fname, "write"): + with open(fname, *args, **kwargs) as f: + yield {"orz": [f, fname]} + return + + orig_name = fname + bname = fname + ext = "" + while True: + ofs = bname.rfind(".") + if ofs < 0 or ofs < len(bname) - 7: + # doesn't look like an extension anymore + break + + ext = bname[ofs:] + ext + bname = bname[:ofs] + + b64 = "" + while True: + try: + if fdir: + fpath = os.path.join(fdir, fname) + else: + fpath = fname + + if suffix and os.path.exists(fpath): + fpath += suffix + fname += suffix + ext += suffix + + with open(fsenc(fpath), *args, **kwargs) as f: + if b64: + fp2 = "fn-trunc.{}.txt".format(b64) + fp2 = os.path.join(fdir, fp2) + with open(fsenc(fp2), "wb") as f2: + f2.write(orig_name.encode("utf-8")) + + yield {"orz": [f, fname]} + return + + except OSError as ex_: + ex = ex_ + if ex.errno != 36: + raise + + if not b64: + b64 = (bname + ext).encode("utf-8", "replace") + b64 = hashlib.sha512(b64).digest()[:12] + b64 = base64.urlsafe_b64encode(b64).decode("utf-8").rstrip("=") + + badlen = len(fname) + while len(fname) >= badlen: + if len(bname) < 8: + raise ex + + if len(bname) > len(ext): + # drop the last letter of the filename + bname = bname[:-1] + else: + try: + # drop the leftmost sub-extension + _, ext = ext.split(".", 1) + except: + # okay do the first letter then + ext = "." + ext[2:] + + fname = "{}~{}{}".format(bname, b64, ext) + + class MultipartParser(object): def __init__(self, log_func, sr, http_headers): self.sr = sr