support uploads with huge filenames

This commit is contained in:
ed 2020-12-12 00:35:54 +01:00
parent dae9c00742
commit 19bcf90e9f
3 changed files with 104 additions and 26 deletions

View file

@ -568,24 +568,24 @@ class HttpCli(object):
self.log("discarding incoming file without filename")
# fallthrough
fn = os.devnull
if p_file and not nullwrite:
fdir = os.path.join(vfs.realpath, rem)
fn = os.path.join(fdir, sanitize_fn(p_file))
fname = sanitize_fn(p_file)
if not os.path.isdir(fsenc(fdir)):
raise Pebkac(404, "that folder does not exist")
# TODO broker which avoid this race and
# provides a new filename if taken (same as up2k)
if os.path.exists(fsenc(fn)):
fn += ".{:.6f}-{}".format(time.time(), self.addr[0])
# using current-time instead of t0 cause clients
# may reuse a name for multiple files in one post
suffix = ".{:.6f}-{}".format(time.time(), self.addr[0])
open_args = {"fdir": fdir, "suffix": suffix}
else:
open_args = {}
fname = os.devnull
fdir = ""
try:
with open(fsenc(fn), "wb") as f:
self.log("writing to {0}".format(fn))
with ren_open(fname, "wb", **open_args) as f:
f, fname = f["orz"]
self.log("writing to {}/{}".format(fdir, fname))
sz, sha512_hex, _ = hashcopy(self.conn, p_data, f)
if sz == 0:
raise Pebkac(400, "empty files in post")
@ -594,8 +594,14 @@ class HttpCli(object):
self.conn.nbyte += sz
except Pebkac:
if fn != os.devnull:
os.rename(fsenc(fn), fsenc(fn + ".PARTIAL"))
if fname != os.devnull:
fp = os.path.join(fdir, fname)
suffix = ".PARTIAL"
try:
os.rename(fsenc(fp), fsenc(fp + suffix))
except:
fp = fp[: -len(suffix)]
os.rename(fsenc(fp), fsenc(fp + suffix))
raise

View file

@ -13,7 +13,7 @@ import threading
from copy import deepcopy
from .__init__ import WINDOWS
from .util import Pebkac, Queue, fsenc, sanitize_fn
from .util import Pebkac, Queue, fsenc, sanitize_fn, ren_open
class Up2k(object):
@ -68,9 +68,11 @@ class Up2k(object):
# symlink to the client-provided name,
# returning the previous upload info
job = deepcopy(job)
suffix = self._suffix(dst, now, job["addr"])
job["name"] = cj["name"] + suffix
self._symlink(src, dst + suffix)
job["rdir"] = cj["rdir"]
job["name"] = self._untaken(cj["rdir"], cj["name"], now, cj["addr"])
dst = os.path.join(job["rdir"], job["name"])
os.unlink(fsenc(dst)) # TODO ed pls
self._symlink(src, dst)
else:
job = {
"wark": wark,
@ -85,9 +87,6 @@ class Up2k(object):
"hash": deepcopy(cj["hash"]),
}
path = os.path.join(job["rdir"], job["name"])
job["name"] += self._suffix(path, now, cj["addr"])
# one chunk may occur multiple times in a file;
# filter to unique values for the list of missing chunks
# (preserve order to reduce disk thrashing)
@ -108,13 +107,12 @@ class Up2k(object):
"wark": wark,
}
def _suffix(self, fpath, ts, ip):
def _untaken(self, fdir, fname, ts, ip):
# TODO broker which avoid this race and
# provides a new filename if taken (same as bup)
if not os.path.exists(fsenc(fpath)):
return ""
return ".{:.6f}-{}".format(ts, ip)
suffix = ".{:.6f}-{}".format(ts, ip)
with ren_open(fname, "wb", fdir=fdir, suffix=suffix) as f:
return f["orz"][1]
def _symlink(self, src, dst):
# TODO store this in linktab so we never delete src if there are links to it
@ -218,8 +216,9 @@ class Up2k(object):
def _new_upload(self, job):
self.registry[job["wark"]] = job
path = os.path.join(job["rdir"], job["name"])
with open(fsenc(path), "wb") as f:
suffix = ".{:.6f}-{}".format(job["t0"], job["addr"])
with ren_open(job["name"], "wb", fdir=job["rdir"], suffix=suffix) as f:
f, job["name"] = f["orz"]
f.seek(job["size"] - 1)
f.write(b"e")

View file

@ -2,6 +2,7 @@
from __future__ import print_function, unicode_literals
import re
import os
import sys
import time
import base64
@ -10,6 +11,7 @@ import hashlib
import platform
import threading
import mimetypes
import contextlib
import subprocess as sp # nosec
from .__init__ import PY2, WINDOWS
@ -96,6 +98,77 @@ class Unrecv(object):
self.buf = buf + self.buf
@contextlib.contextmanager
def ren_open(fname, *args, fdir=None, suffix=None, **kwargs):
if hasattr(fname, "write"):
with open(fname, *args, **kwargs) as f:
yield {"orz": [f, fname]}
return
orig_name = fname
bname = fname
ext = ""
while True:
ofs = bname.rfind(".")
if ofs < 0 or ofs < len(bname) - 7:
# doesn't look like an extension anymore
break
ext = bname[ofs:] + ext
bname = bname[:ofs]
b64 = ""
while True:
try:
if fdir:
fpath = os.path.join(fdir, fname)
else:
fpath = fname
if suffix and os.path.exists(fpath):
fpath += suffix
fname += suffix
ext += suffix
with open(fsenc(fpath), *args, **kwargs) as f:
if b64:
fp2 = "fn-trunc.{}.txt".format(b64)
fp2 = os.path.join(fdir, fp2)
with open(fsenc(fp2), "wb") as f2:
f2.write(orig_name.encode("utf-8"))
yield {"orz": [f, fname]}
return
except OSError as ex_:
ex = ex_
if ex.errno != 36:
raise
if not b64:
b64 = (bname + ext).encode("utf-8", "replace")
b64 = hashlib.sha512(b64).digest()[:12]
b64 = base64.urlsafe_b64encode(b64).decode("utf-8").rstrip("=")
badlen = len(fname)
while len(fname) >= badlen:
if len(bname) < 8:
raise ex
if len(bname) > len(ext):
# drop the last letter of the filename
bname = bname[:-1]
else:
try:
# drop the leftmost sub-extension
_, ext = ext.split(".", 1)
except:
# okay do the first letter then
ext = "." + ext[2:]
fname = "{}~{}{}".format(bname, b64, ext)
class MultipartParser(object):
def __init__(self, log_func, sr, http_headers):
self.sr = sr