From c6acd3a90461411f0d7743d6a9993a8a36ec01bf Mon Sep 17 00:00:00 2001 From: ed Date: Sat, 23 Mar 2024 16:35:14 +0000 Subject: [PATCH] add option `--s-rd-sz` (socket read size): counterpart of `--s-wr-sz` which existed already the default (256 KiB) appears optimal in the most popular scenario (linux host with storage on local physical disk, usually NVMe) was previously 32 KiB, so large uploads should now use 17% less CPU also adds sanchecks for values of `--iobuf`, `--s-rd-sz`, `--s-wr-sz` also adds file-overwrite feature for multipart posts --- README.md | 3 ++ copyparty/__main__.py | 1 + copyparty/httpcli.py | 23 ++++++++--- copyparty/svchub.py | 20 +++++++++ copyparty/util.py | 22 ++++++---- docs/bufsize.txt | 96 +++++++++++++++++++++++++++++++++++++++++++ docs/devnotes.md | 1 + tests/util.py | 3 +- 8 files changed, 155 insertions(+), 14 deletions(-) create mode 100644 docs/bufsize.txt diff --git a/README.md b/README.md index 2b382aac..92b3e5ee 100644 --- a/README.md +++ b/README.md @@ -1291,6 +1291,8 @@ you may experience poor upload performance this way, but that can sometimes be f someone has also tested geesefs in combination with [gocryptfs](https://nuetzlich.net/gocryptfs/) with surprisingly good results, getting 60 MiB/s upload speeds on a gbit line, but JuiceFS won with 80 MiB/s using its built-in encryption +you may improve performance by specifying larger values for `--iobuf` / `--s-rd-sz` / `--s-wr-sz` + ## hiding from google @@ -1740,6 +1742,7 @@ below are some tweaks roughly ordered by usefulness: * `--hist` pointing to a fast location (ssd) will make directory listings and searches faster when `-e2d` or `-e2t` is set * and also makes thumbnails load faster, regardless of e2d/e2t * `--no-hash .` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable +* if your volumes are on a network-disk such as NFS / SMB / s3, specifying larger values for `--iobuf` and/or `--s-rd-sz` and/or `--s-wr-sz` may help; try setting all of them to `524288` or `1048576` or `4194304` * `--no-htp --hash-mt=0 --mtag-mt=1 --th-mt=1` minimizes the number of threads; can help in some eccentric environments (like the vscode debugger) * `-j0` enables multiprocessing (actual multithreading), can reduce latency to `20+80/numCores` percent and generally improve performance in cpu-intensive workloads, for example: * lots of connections (many users or heavy clients) diff --git a/copyparty/__main__.py b/copyparty/__main__.py index 1998eff7..a115eeaf 100755 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -917,6 +917,7 @@ def add_network(ap): ap2.add_argument("--freebind", action="store_true", help="allow listening on IPs which do not yet exist, for example if the network interfaces haven't finished going up. Only makes sense for IPs other than '0.0.0.0', '127.0.0.1', '::', and '::1'. May require running as root (unless net.ipv6.ip_nonlocal_bind)") ap2.add_argument("--s-thead", metavar="SEC", type=int, default=120, help="socket timeout (read request header)") ap2.add_argument("--s-tbody", metavar="SEC", type=float, default=186, help="socket timeout (read/write request/response bodies). Use 60 on fast servers (default is extremely safe). Disable with 0 if reverse-proxied for a 2%% speed boost") + ap2.add_argument("--s-rd-sz", metavar="B", type=int, default=256*1024, help="socket read size in bytes (indirectly affects filesystem writes; recommendation: keep equal-to or lower-than \033[33m--iobuf\033[0m)") ap2.add_argument("--s-wr-sz", metavar="B", type=int, default=256*1024, help="socket write size in bytes") ap2.add_argument("--s-wr-slp", metavar="SEC", type=float, default=0, help="debug: socket write delay in seconds") ap2.add_argument("--rsp-slp", metavar="SEC", type=float, default=0, help="debug: response delay in seconds") diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index b13f2dd4..a1a5088a 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -1610,15 +1610,16 @@ class HttpCli(object): return enc or "utf-8" def get_body_reader(self) -> tuple[Generator[bytes, None, None], int]: + bufsz = self.args.s_rd_sz if "chunked" in self.headers.get("transfer-encoding", "").lower(): - return read_socket_chunked(self.sr), -1 + return read_socket_chunked(self.sr, bufsz), -1 remains = int(self.headers.get("content-length", -1)) if remains == -1: self.keepalive = False - return read_socket_unbounded(self.sr), remains + return read_socket_unbounded(self.sr, bufsz), remains else: - return read_socket(self.sr, remains), remains + return read_socket(self.sr, bufsz, remains), remains def dump_to_file(self, is_put: bool) -> tuple[int, str, str, int, str, str]: # post_sz, sha_hex, sha_b64, remains, path, url @@ -1921,7 +1922,7 @@ class HttpCli(object): return "%s %s n%s" % (spd1, spd2, self.conn.nreq) def handle_post_multipart(self) -> bool: - self.parser = MultipartParser(self.log, self.sr, self.headers) + self.parser = MultipartParser(self.log, self.args, self.sr, self.headers) self.parser.parse() file0: list[tuple[str, Optional[str], Generator[bytes, None, None]]] = [] @@ -2150,7 +2151,7 @@ class HttpCli(object): self.log("writing {} #{} @{} len {}".format(path, chash, cstart, remains)) - reader = read_socket(self.sr, remains) + reader = read_socket(self.sr, self.args.s_rd_sz, remains) f = None fpool = not self.args.no_fpool and sprs @@ -2438,6 +2439,18 @@ class HttpCli(object): suffix = "-{:.6f}-{}".format(time.time(), dip) open_args = {"fdir": fdir, "suffix": suffix} + if "replace" in self.uparam: + abspath = os.path.join(fdir, fname) + if not self.can_delete: + self.log("user not allowed to overwrite with ?replace") + elif bos.path.exists(abspath): + try: + bos.unlink(abspath) + t = "overwriting file with new upload: %s" + except: + t = "toctou while deleting for ?replace: %s" + self.log(t % (abspath,)) + # reserve destination filename with ren_open(fname, "wb", fdir=fdir, suffix=suffix) as zfw: fname = zfw["orz"][1] diff --git a/copyparty/svchub.py b/copyparty/svchub.py index 2b7be552..028dcab9 100644 --- a/copyparty/svchub.py +++ b/copyparty/svchub.py @@ -173,6 +173,26 @@ class SvcHub(object): self.log("root", t.format(args.j), c=3) args.no_fpool = True + for name, arg in ( + ("iobuf", "iobuf"), + ("s-rd-sz", "s_rd_sz"), + ("s-wr-sz", "s_wr_sz"), + ): + zi = getattr(args, arg) + if zi < 32768: + t = "WARNING: expect very poor performance because you specified a very low value (%d) for --%s" + self.log("root", t % (zi, name), 3) + zi = 2 + zi2 = 2 ** (zi - 1).bit_length() + if zi != zi2: + zi3 = 2 ** ((zi - 1).bit_length() - 1) + t = "WARNING: expect poor performance because --%s is not a power-of-two; consider using %d or %d instead of %d" + self.log("root", t % (name, zi2, zi3, zi), 3) + + if args.s_rd_sz > args.iobuf: + t = "WARNING: --s-rd-sz (%d) is larger than --iobuf (%d); this may lead to reduced performance" + self.log("root", t % (args.s_rd_sz, args.iobuf), 3) + bri = "zy"[args.theme % 2 :][:1] ch = "abcdefghijklmnopqrstuvwx"[int(args.theme / 2)] args.theme = "{0}{1} {0} {1}".format(ch, bri) diff --git a/copyparty/util.py b/copyparty/util.py index 18cf14a2..dcb91017 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -1400,10 +1400,15 @@ def ren_open( class MultipartParser(object): def __init__( - self, log_func: "NamedLogger", sr: Unrecv, http_headers: dict[str, str] + self, + log_func: "NamedLogger", + args: argparse.Namespace, + sr: Unrecv, + http_headers: dict[str, str], ): self.sr = sr self.log = log_func + self.args = args self.headers = http_headers self.re_ctype = re.compile(r"^content-type: *([^; ]+)", re.IGNORECASE) @@ -1502,7 +1507,7 @@ class MultipartParser(object): def _read_data(self) -> Generator[bytes, None, None]: blen = len(self.boundary) - bufsz = 32 * 1024 + bufsz = self.args.s_rd_sz while True: try: buf = self.sr.recv(bufsz) @@ -2243,10 +2248,11 @@ def shut_socket(log: "NamedLogger", sck: socket.socket, timeout: int = 3) -> Non sck.close() -def read_socket(sr: Unrecv, total_size: int) -> Generator[bytes, None, None]: +def read_socket( + sr: Unrecv, bufsz: int, total_size: int +) -> Generator[bytes, None, None]: remains = total_size while remains > 0: - bufsz = 32 * 1024 if bufsz > remains: bufsz = remains @@ -2260,16 +2266,16 @@ def read_socket(sr: Unrecv, total_size: int) -> Generator[bytes, None, None]: yield buf -def read_socket_unbounded(sr: Unrecv) -> Generator[bytes, None, None]: +def read_socket_unbounded(sr: Unrecv, bufsz: int) -> Generator[bytes, None, None]: try: while True: - yield sr.recv(32 * 1024) + yield sr.recv(bufsz) except: return def read_socket_chunked( - sr: Unrecv, log: Optional["NamedLogger"] = None + sr: Unrecv, bufsz: int, log: Optional["NamedLogger"] = None ) -> Generator[bytes, None, None]: err = "upload aborted: expected chunk length, got [{}] |{}| instead" while True: @@ -2303,7 +2309,7 @@ def read_socket_chunked( if log: log("receiving %d byte chunk" % (chunklen,)) - for chunk in read_socket(sr, chunklen): + for chunk in read_socket(sr, bufsz, chunklen): yield chunk x = sr.recv_ex(2, False) diff --git a/docs/bufsize.txt b/docs/bufsize.txt new file mode 100644 index 00000000..ce4aa053 --- /dev/null +++ b/docs/bufsize.txt @@ -0,0 +1,96 @@ +notes from testing various buffer sizes of files and sockets + +summary: + +download-folder-as-tar: would be 7% faster with --iobuf 65536 (but got 20% faster in v1.11.2) + +download-folder-as-zip: optimal with default --iobuf 262144 + +download-file-over-https: optimal with default --iobuf 262144 + +put-large-file: optimal with default --iobuf 262144, --s-rd-sz 262144 (and got 14% faster in v1.11.2) + +post-large-file: optimal with default --iobuf 262144, --s-rd-sz 262144 (and got 18% faster in v1.11.2) + +---- + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/bigs/?tar + 3.3 req/s 1.11.1 + 4.3 4.0 3.3 req/s 1.12.2 + 64 256 512 --iobuf 256 (prefer smaller) + 32 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/bigs/?zip + 2.9 req/s 1.11.1 + 2.5 2.9 2.9 req/s 1.12.2 + 64 256 512 --iobuf 256 (prefer bigger) + 32 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/pairdupes/?tar + 8.3 req/s 1.11.1 + 8.4 8.4 8.5 req/s 1.12.2 + 64 256 512 --iobuf 256 (prefer bigger) + 32 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/pairdupes/?zip + 13.9 req/s 1.11.1 + 14.1 14.0 13.8 req/s 1.12.2 + 64 256 512 --iobuf 256 (prefer smaller) + 32 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/pairdupes/987a +5260 req/s 1.11.1 +5246 5246 5280 5268 req/s 1.12.2 + 64 256 512 256 --iobuf dontcare + 32 32 32 512 --s-rd-sz dontcare + +oha -z10s -c1 --ipv4 --insecure https://127.0.0.1:3923/pairdupes/987a +4445 req/s 1.11.1 +4462 4494 4444 req/s 1.12.2 + 64 256 512 --iobuf dontcare + 32 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/bigs/gssc-02-cannonball-skydrift/track10.cdda.flac + 95 req/s 1.11.1 + 95 97 req/s 1.12.2 + 64 512 --iobuf dontcare + 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure https://127.0.0.1:3923/bigs/gssc-02-cannonball-skydrift/track10.cdda.flac + 15.4 req/s 1.11.1 + 15.4 15.3 14.9 15.4 req/s 1.12.2 + 64 256 512 512 --iobuf 256 (prefer smaller, and smaller than s-wr-sz) + 32 32 32 32 --s-rd-sz + 256 256 256 512 --s-wr-sz + +---- + +python3 ~/dev/old/copyparty\ v1.11.1\ dont\ ban\ the\ pipes.py -q -i 127.0.0.1 -v .::A --daw +python3 ~/dev/copyparty/dist/copyparty-sfx.py -q -i 127.0.0.1 -v .::A --daw --iobuf $((1024*512)) + +oha -z10s -c1 --ipv4 --insecure -mPUT -r0 -D ~/Music/gssc-02-cannonball-skydrift/track10.cdda.flac http://127.0.0.1:3923/a.bin +10.8 req/s 1.11.1 +10.8 11.5 11.8 12.1 12.2 12.3 req/s new + 512 512 512 512 512 256 --iobuf 256 + 32 64 128 256 512 256 --s-rd-sz 256 (prefer bigger) + +---- + +buildpost() { +b=--jeg-er-grensestaven; +printf -- "$b\r\nContent-Disposition: form-data; name=\"act\"\r\n\r\nbput\r\n$b\r\nContent-Disposition: form-data; name=\"f\"; filename=\"a.bin\"\r\nContent-Type: audio/mpeg\r\n\r\n" +cat "$1" +printf -- "\r\n${b}--\r\n" +} +buildpost ~/Music/gssc-02-cannonball-skydrift/track10.cdda.flac >big.post +buildpost ~/Music/bottomtext.txt >smol.post + +oha -z10s -c1 --ipv4 --insecure -mPOST -r0 -T 'multipart/form-data; boundary=jeg-er-grensestaven' -D big.post http://127.0.0.1:3923/?replace +9.6 11.2 11.3 11.1 10.9 req/s v1.11.2 +512 512 256 128 256 --iobuf 256 + 32 512 256 128 128 --s-rd-sz 256 + +oha -z10s -c1 --ipv4 --insecure -mPOST -r0 -T 'multipart/form-data; boundary=jeg-er-grensestaven' -D smol.post http://127.0.0.1:3923/?replace +2445 2414 2401 2437 + 256 128 256 256 --iobuf 256 + 128 128 256 64 --s-rd-sz 128 (but use 256 since big posts are more important) diff --git a/docs/devnotes.md b/docs/devnotes.md index 72f331c0..5c80f711 100644 --- a/docs/devnotes.md +++ b/docs/devnotes.md @@ -164,6 +164,7 @@ authenticate using header `Cookie: cppwd=foo` or url param `&pw=foo` | PUT | `?xz` | (binary data) | compress with xz and write into file at URL | | mPOST | | `f=FILE` | upload `FILE` into the folder at URL | | mPOST | `?j` | `f=FILE` | ...and reply with json | +| mPOST | `?replace` | `f=FILE` | ...and overwrite existing files | | mPOST | | `act=mkdir`, `name=foo` | create directory `foo` at URL | | POST | `?delete` | | delete URL recursively | | jPOST | `?delete` | `["/foo","/bar"]` | delete `/foo` and `/bar` recursively | diff --git a/tests/util.py b/tests/util.py index 8cfa61ea..415ac5a0 100644 --- a/tests/util.py +++ b/tests/util.py @@ -155,7 +155,8 @@ class Cfg(Namespace): mth={}, mtp=[], rm_retry="0/0", - s_wr_sz=512 * 1024, + s_rd_sz=256 * 1024, + s_wr_sz=256 * 1024, sort="href", srch_hits=99999, th_crop="y",