diff --git a/README.md b/README.md index 2b382aac..92b3e5ee 100644 --- a/README.md +++ b/README.md @@ -1291,6 +1291,8 @@ you may experience poor upload performance this way, but that can sometimes be f someone has also tested geesefs in combination with [gocryptfs](https://nuetzlich.net/gocryptfs/) with surprisingly good results, getting 60 MiB/s upload speeds on a gbit line, but JuiceFS won with 80 MiB/s using its built-in encryption +you may improve performance by specifying larger values for `--iobuf` / `--s-rd-sz` / `--s-wr-sz` + ## hiding from google @@ -1740,6 +1742,7 @@ below are some tweaks roughly ordered by usefulness: * `--hist` pointing to a fast location (ssd) will make directory listings and searches faster when `-e2d` or `-e2t` is set * and also makes thumbnails load faster, regardless of e2d/e2t * `--no-hash .` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable +* if your volumes are on a network-disk such as NFS / SMB / s3, specifying larger values for `--iobuf` and/or `--s-rd-sz` and/or `--s-wr-sz` may help; try setting all of them to `524288` or `1048576` or `4194304` * `--no-htp --hash-mt=0 --mtag-mt=1 --th-mt=1` minimizes the number of threads; can help in some eccentric environments (like the vscode debugger) * `-j0` enables multiprocessing (actual multithreading), can reduce latency to `20+80/numCores` percent and generally improve performance in cpu-intensive workloads, for example: * lots of connections (many users or heavy clients) diff --git a/copyparty/__main__.py b/copyparty/__main__.py index 1998eff7..a115eeaf 100755 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -917,6 +917,7 @@ def add_network(ap): ap2.add_argument("--freebind", action="store_true", help="allow listening on IPs which do not yet exist, for example if the network interfaces haven't finished going up. Only makes sense for IPs other than '0.0.0.0', '127.0.0.1', '::', and '::1'. May require running as root (unless net.ipv6.ip_nonlocal_bind)") ap2.add_argument("--s-thead", metavar="SEC", type=int, default=120, help="socket timeout (read request header)") ap2.add_argument("--s-tbody", metavar="SEC", type=float, default=186, help="socket timeout (read/write request/response bodies). Use 60 on fast servers (default is extremely safe). Disable with 0 if reverse-proxied for a 2%% speed boost") + ap2.add_argument("--s-rd-sz", metavar="B", type=int, default=256*1024, help="socket read size in bytes (indirectly affects filesystem writes; recommendation: keep equal-to or lower-than \033[33m--iobuf\033[0m)") ap2.add_argument("--s-wr-sz", metavar="B", type=int, default=256*1024, help="socket write size in bytes") ap2.add_argument("--s-wr-slp", metavar="SEC", type=float, default=0, help="debug: socket write delay in seconds") ap2.add_argument("--rsp-slp", metavar="SEC", type=float, default=0, help="debug: response delay in seconds") diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index b13f2dd4..a1a5088a 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -1610,15 +1610,16 @@ class HttpCli(object): return enc or "utf-8" def get_body_reader(self) -> tuple[Generator[bytes, None, None], int]: + bufsz = self.args.s_rd_sz if "chunked" in self.headers.get("transfer-encoding", "").lower(): - return read_socket_chunked(self.sr), -1 + return read_socket_chunked(self.sr, bufsz), -1 remains = int(self.headers.get("content-length", -1)) if remains == -1: self.keepalive = False - return read_socket_unbounded(self.sr), remains + return read_socket_unbounded(self.sr, bufsz), remains else: - return read_socket(self.sr, remains), remains + return read_socket(self.sr, bufsz, remains), remains def dump_to_file(self, is_put: bool) -> tuple[int, str, str, int, str, str]: # post_sz, sha_hex, sha_b64, remains, path, url @@ -1921,7 +1922,7 @@ class HttpCli(object): return "%s %s n%s" % (spd1, spd2, self.conn.nreq) def handle_post_multipart(self) -> bool: - self.parser = MultipartParser(self.log, self.sr, self.headers) + self.parser = MultipartParser(self.log, self.args, self.sr, self.headers) self.parser.parse() file0: list[tuple[str, Optional[str], Generator[bytes, None, None]]] = [] @@ -2150,7 +2151,7 @@ class HttpCli(object): self.log("writing {} #{} @{} len {}".format(path, chash, cstart, remains)) - reader = read_socket(self.sr, remains) + reader = read_socket(self.sr, self.args.s_rd_sz, remains) f = None fpool = not self.args.no_fpool and sprs @@ -2438,6 +2439,18 @@ class HttpCli(object): suffix = "-{:.6f}-{}".format(time.time(), dip) open_args = {"fdir": fdir, "suffix": suffix} + if "replace" in self.uparam: + abspath = os.path.join(fdir, fname) + if not self.can_delete: + self.log("user not allowed to overwrite with ?replace") + elif bos.path.exists(abspath): + try: + bos.unlink(abspath) + t = "overwriting file with new upload: %s" + except: + t = "toctou while deleting for ?replace: %s" + self.log(t % (abspath,)) + # reserve destination filename with ren_open(fname, "wb", fdir=fdir, suffix=suffix) as zfw: fname = zfw["orz"][1] diff --git a/copyparty/svchub.py b/copyparty/svchub.py index 2b7be552..028dcab9 100644 --- a/copyparty/svchub.py +++ b/copyparty/svchub.py @@ -173,6 +173,26 @@ class SvcHub(object): self.log("root", t.format(args.j), c=3) args.no_fpool = True + for name, arg in ( + ("iobuf", "iobuf"), + ("s-rd-sz", "s_rd_sz"), + ("s-wr-sz", "s_wr_sz"), + ): + zi = getattr(args, arg) + if zi < 32768: + t = "WARNING: expect very poor performance because you specified a very low value (%d) for --%s" + self.log("root", t % (zi, name), 3) + zi = 2 + zi2 = 2 ** (zi - 1).bit_length() + if zi != zi2: + zi3 = 2 ** ((zi - 1).bit_length() - 1) + t = "WARNING: expect poor performance because --%s is not a power-of-two; consider using %d or %d instead of %d" + self.log("root", t % (name, zi2, zi3, zi), 3) + + if args.s_rd_sz > args.iobuf: + t = "WARNING: --s-rd-sz (%d) is larger than --iobuf (%d); this may lead to reduced performance" + self.log("root", t % (args.s_rd_sz, args.iobuf), 3) + bri = "zy"[args.theme % 2 :][:1] ch = "abcdefghijklmnopqrstuvwx"[int(args.theme / 2)] args.theme = "{0}{1} {0} {1}".format(ch, bri) diff --git a/copyparty/util.py b/copyparty/util.py index 18cf14a2..dcb91017 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -1400,10 +1400,15 @@ def ren_open( class MultipartParser(object): def __init__( - self, log_func: "NamedLogger", sr: Unrecv, http_headers: dict[str, str] + self, + log_func: "NamedLogger", + args: argparse.Namespace, + sr: Unrecv, + http_headers: dict[str, str], ): self.sr = sr self.log = log_func + self.args = args self.headers = http_headers self.re_ctype = re.compile(r"^content-type: *([^; ]+)", re.IGNORECASE) @@ -1502,7 +1507,7 @@ class MultipartParser(object): def _read_data(self) -> Generator[bytes, None, None]: blen = len(self.boundary) - bufsz = 32 * 1024 + bufsz = self.args.s_rd_sz while True: try: buf = self.sr.recv(bufsz) @@ -2243,10 +2248,11 @@ def shut_socket(log: "NamedLogger", sck: socket.socket, timeout: int = 3) -> Non sck.close() -def read_socket(sr: Unrecv, total_size: int) -> Generator[bytes, None, None]: +def read_socket( + sr: Unrecv, bufsz: int, total_size: int +) -> Generator[bytes, None, None]: remains = total_size while remains > 0: - bufsz = 32 * 1024 if bufsz > remains: bufsz = remains @@ -2260,16 +2266,16 @@ def read_socket(sr: Unrecv, total_size: int) -> Generator[bytes, None, None]: yield buf -def read_socket_unbounded(sr: Unrecv) -> Generator[bytes, None, None]: +def read_socket_unbounded(sr: Unrecv, bufsz: int) -> Generator[bytes, None, None]: try: while True: - yield sr.recv(32 * 1024) + yield sr.recv(bufsz) except: return def read_socket_chunked( - sr: Unrecv, log: Optional["NamedLogger"] = None + sr: Unrecv, bufsz: int, log: Optional["NamedLogger"] = None ) -> Generator[bytes, None, None]: err = "upload aborted: expected chunk length, got [{}] |{}| instead" while True: @@ -2303,7 +2309,7 @@ def read_socket_chunked( if log: log("receiving %d byte chunk" % (chunklen,)) - for chunk in read_socket(sr, chunklen): + for chunk in read_socket(sr, bufsz, chunklen): yield chunk x = sr.recv_ex(2, False) diff --git a/docs/bufsize.txt b/docs/bufsize.txt new file mode 100644 index 00000000..ce4aa053 --- /dev/null +++ b/docs/bufsize.txt @@ -0,0 +1,96 @@ +notes from testing various buffer sizes of files and sockets + +summary: + +download-folder-as-tar: would be 7% faster with --iobuf 65536 (but got 20% faster in v1.11.2) + +download-folder-as-zip: optimal with default --iobuf 262144 + +download-file-over-https: optimal with default --iobuf 262144 + +put-large-file: optimal with default --iobuf 262144, --s-rd-sz 262144 (and got 14% faster in v1.11.2) + +post-large-file: optimal with default --iobuf 262144, --s-rd-sz 262144 (and got 18% faster in v1.11.2) + +---- + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/bigs/?tar + 3.3 req/s 1.11.1 + 4.3 4.0 3.3 req/s 1.12.2 + 64 256 512 --iobuf 256 (prefer smaller) + 32 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/bigs/?zip + 2.9 req/s 1.11.1 + 2.5 2.9 2.9 req/s 1.12.2 + 64 256 512 --iobuf 256 (prefer bigger) + 32 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/pairdupes/?tar + 8.3 req/s 1.11.1 + 8.4 8.4 8.5 req/s 1.12.2 + 64 256 512 --iobuf 256 (prefer bigger) + 32 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/pairdupes/?zip + 13.9 req/s 1.11.1 + 14.1 14.0 13.8 req/s 1.12.2 + 64 256 512 --iobuf 256 (prefer smaller) + 32 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/pairdupes/987a +5260 req/s 1.11.1 +5246 5246 5280 5268 req/s 1.12.2 + 64 256 512 256 --iobuf dontcare + 32 32 32 512 --s-rd-sz dontcare + +oha -z10s -c1 --ipv4 --insecure https://127.0.0.1:3923/pairdupes/987a +4445 req/s 1.11.1 +4462 4494 4444 req/s 1.12.2 + 64 256 512 --iobuf dontcare + 32 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/bigs/gssc-02-cannonball-skydrift/track10.cdda.flac + 95 req/s 1.11.1 + 95 97 req/s 1.12.2 + 64 512 --iobuf dontcare + 32 32 --s-rd-sz + +oha -z10s -c1 --ipv4 --insecure https://127.0.0.1:3923/bigs/gssc-02-cannonball-skydrift/track10.cdda.flac + 15.4 req/s 1.11.1 + 15.4 15.3 14.9 15.4 req/s 1.12.2 + 64 256 512 512 --iobuf 256 (prefer smaller, and smaller than s-wr-sz) + 32 32 32 32 --s-rd-sz + 256 256 256 512 --s-wr-sz + +---- + +python3 ~/dev/old/copyparty\ v1.11.1\ dont\ ban\ the\ pipes.py -q -i 127.0.0.1 -v .::A --daw +python3 ~/dev/copyparty/dist/copyparty-sfx.py -q -i 127.0.0.1 -v .::A --daw --iobuf $((1024*512)) + +oha -z10s -c1 --ipv4 --insecure -mPUT -r0 -D ~/Music/gssc-02-cannonball-skydrift/track10.cdda.flac http://127.0.0.1:3923/a.bin +10.8 req/s 1.11.1 +10.8 11.5 11.8 12.1 12.2 12.3 req/s new + 512 512 512 512 512 256 --iobuf 256 + 32 64 128 256 512 256 --s-rd-sz 256 (prefer bigger) + +---- + +buildpost() { +b=--jeg-er-grensestaven; +printf -- "$b\r\nContent-Disposition: form-data; name=\"act\"\r\n\r\nbput\r\n$b\r\nContent-Disposition: form-data; name=\"f\"; filename=\"a.bin\"\r\nContent-Type: audio/mpeg\r\n\r\n" +cat "$1" +printf -- "\r\n${b}--\r\n" +} +buildpost ~/Music/gssc-02-cannonball-skydrift/track10.cdda.flac >big.post +buildpost ~/Music/bottomtext.txt >smol.post + +oha -z10s -c1 --ipv4 --insecure -mPOST -r0 -T 'multipart/form-data; boundary=jeg-er-grensestaven' -D big.post http://127.0.0.1:3923/?replace +9.6 11.2 11.3 11.1 10.9 req/s v1.11.2 +512 512 256 128 256 --iobuf 256 + 32 512 256 128 128 --s-rd-sz 256 + +oha -z10s -c1 --ipv4 --insecure -mPOST -r0 -T 'multipart/form-data; boundary=jeg-er-grensestaven' -D smol.post http://127.0.0.1:3923/?replace +2445 2414 2401 2437 + 256 128 256 256 --iobuf 256 + 128 128 256 64 --s-rd-sz 128 (but use 256 since big posts are more important) diff --git a/docs/devnotes.md b/docs/devnotes.md index 72f331c0..5c80f711 100644 --- a/docs/devnotes.md +++ b/docs/devnotes.md @@ -164,6 +164,7 @@ authenticate using header `Cookie: cppwd=foo` or url param `&pw=foo` | PUT | `?xz` | (binary data) | compress with xz and write into file at URL | | mPOST | | `f=FILE` | upload `FILE` into the folder at URL | | mPOST | `?j` | `f=FILE` | ...and reply with json | +| mPOST | `?replace` | `f=FILE` | ...and overwrite existing files | | mPOST | | `act=mkdir`, `name=foo` | create directory `foo` at URL | | POST | `?delete` | | delete URL recursively | | jPOST | `?delete` | `["/foo","/bar"]` | delete `/foo` and `/bar` recursively | diff --git a/tests/util.py b/tests/util.py index 8cfa61ea..415ac5a0 100644 --- a/tests/util.py +++ b/tests/util.py @@ -155,7 +155,8 @@ class Cfg(Namespace): mth={}, mtp=[], rm_retry="0/0", - s_wr_sz=512 * 1024, + s_rd_sz=256 * 1024, + s_wr_sz=256 * 1024, sort="href", srch_hits=99999, th_crop="y",