add option --s-rd-sz (socket read size):

counterpart of `--s-wr-sz` which existed already

the default (256 KiB) appears optimal in the most popular scenario
(linux host with storage on local physical disk, usually NVMe)

was previously 32 KiB, so large uploads should now use 17% less CPU

also adds sanchecks for values of `--iobuf`, `--s-rd-sz`, `--s-wr-sz`

also adds file-overwrite feature for multipart posts
This commit is contained in:
ed 2024-03-23 16:35:14 +00:00
parent 2b24c50eb7
commit c6acd3a904
8 changed files with 155 additions and 14 deletions

View file

@ -1291,6 +1291,8 @@ you may experience poor upload performance this way, but that can sometimes be f
someone has also tested geesefs in combination with [gocryptfs](https://nuetzlich.net/gocryptfs/) with surprisingly good results, getting 60 MiB/s upload speeds on a gbit line, but JuiceFS won with 80 MiB/s using its built-in encryption
you may improve performance by specifying larger values for `--iobuf` / `--s-rd-sz` / `--s-wr-sz`
## hiding from google
@ -1740,6 +1742,7 @@ below are some tweaks roughly ordered by usefulness:
* `--hist` pointing to a fast location (ssd) will make directory listings and searches faster when `-e2d` or `-e2t` is set
* and also makes thumbnails load faster, regardless of e2d/e2t
* `--no-hash .` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable
* if your volumes are on a network-disk such as NFS / SMB / s3, specifying larger values for `--iobuf` and/or `--s-rd-sz` and/or `--s-wr-sz` may help; try setting all of them to `524288` or `1048576` or `4194304`
* `--no-htp --hash-mt=0 --mtag-mt=1 --th-mt=1` minimizes the number of threads; can help in some eccentric environments (like the vscode debugger)
* `-j0` enables multiprocessing (actual multithreading), can reduce latency to `20+80/numCores` percent and generally improve performance in cpu-intensive workloads, for example:
* lots of connections (many users or heavy clients)

View file

@ -917,6 +917,7 @@ def add_network(ap):
ap2.add_argument("--freebind", action="store_true", help="allow listening on IPs which do not yet exist, for example if the network interfaces haven't finished going up. Only makes sense for IPs other than '0.0.0.0', '127.0.0.1', '::', and '::1'. May require running as root (unless net.ipv6.ip_nonlocal_bind)")
ap2.add_argument("--s-thead", metavar="SEC", type=int, default=120, help="socket timeout (read request header)")
ap2.add_argument("--s-tbody", metavar="SEC", type=float, default=186, help="socket timeout (read/write request/response bodies). Use 60 on fast servers (default is extremely safe). Disable with 0 if reverse-proxied for a 2%% speed boost")
ap2.add_argument("--s-rd-sz", metavar="B", type=int, default=256*1024, help="socket read size in bytes (indirectly affects filesystem writes; recommendation: keep equal-to or lower-than \033[33m--iobuf\033[0m)")
ap2.add_argument("--s-wr-sz", metavar="B", type=int, default=256*1024, help="socket write size in bytes")
ap2.add_argument("--s-wr-slp", metavar="SEC", type=float, default=0, help="debug: socket write delay in seconds")
ap2.add_argument("--rsp-slp", metavar="SEC", type=float, default=0, help="debug: response delay in seconds")

View file

@ -1610,15 +1610,16 @@ class HttpCli(object):
return enc or "utf-8"
def get_body_reader(self) -> tuple[Generator[bytes, None, None], int]:
bufsz = self.args.s_rd_sz
if "chunked" in self.headers.get("transfer-encoding", "").lower():
return read_socket_chunked(self.sr), -1
return read_socket_chunked(self.sr, bufsz), -1
remains = int(self.headers.get("content-length", -1))
if remains == -1:
self.keepalive = False
return read_socket_unbounded(self.sr), remains
return read_socket_unbounded(self.sr, bufsz), remains
else:
return read_socket(self.sr, remains), remains
return read_socket(self.sr, bufsz, remains), remains
def dump_to_file(self, is_put: bool) -> tuple[int, str, str, int, str, str]:
# post_sz, sha_hex, sha_b64, remains, path, url
@ -1921,7 +1922,7 @@ class HttpCli(object):
return "%s %s n%s" % (spd1, spd2, self.conn.nreq)
def handle_post_multipart(self) -> bool:
self.parser = MultipartParser(self.log, self.sr, self.headers)
self.parser = MultipartParser(self.log, self.args, self.sr, self.headers)
self.parser.parse()
file0: list[tuple[str, Optional[str], Generator[bytes, None, None]]] = []
@ -2150,7 +2151,7 @@ class HttpCli(object):
self.log("writing {} #{} @{} len {}".format(path, chash, cstart, remains))
reader = read_socket(self.sr, remains)
reader = read_socket(self.sr, self.args.s_rd_sz, remains)
f = None
fpool = not self.args.no_fpool and sprs
@ -2438,6 +2439,18 @@ class HttpCli(object):
suffix = "-{:.6f}-{}".format(time.time(), dip)
open_args = {"fdir": fdir, "suffix": suffix}
if "replace" in self.uparam:
abspath = os.path.join(fdir, fname)
if not self.can_delete:
self.log("user not allowed to overwrite with ?replace")
elif bos.path.exists(abspath):
try:
bos.unlink(abspath)
t = "overwriting file with new upload: %s"
except:
t = "toctou while deleting for ?replace: %s"
self.log(t % (abspath,))
# reserve destination filename
with ren_open(fname, "wb", fdir=fdir, suffix=suffix) as zfw:
fname = zfw["orz"][1]

View file

@ -173,6 +173,26 @@ class SvcHub(object):
self.log("root", t.format(args.j), c=3)
args.no_fpool = True
for name, arg in (
("iobuf", "iobuf"),
("s-rd-sz", "s_rd_sz"),
("s-wr-sz", "s_wr_sz"),
):
zi = getattr(args, arg)
if zi < 32768:
t = "WARNING: expect very poor performance because you specified a very low value (%d) for --%s"
self.log("root", t % (zi, name), 3)
zi = 2
zi2 = 2 ** (zi - 1).bit_length()
if zi != zi2:
zi3 = 2 ** ((zi - 1).bit_length() - 1)
t = "WARNING: expect poor performance because --%s is not a power-of-two; consider using %d or %d instead of %d"
self.log("root", t % (name, zi2, zi3, zi), 3)
if args.s_rd_sz > args.iobuf:
t = "WARNING: --s-rd-sz (%d) is larger than --iobuf (%d); this may lead to reduced performance"
self.log("root", t % (args.s_rd_sz, args.iobuf), 3)
bri = "zy"[args.theme % 2 :][:1]
ch = "abcdefghijklmnopqrstuvwx"[int(args.theme / 2)]
args.theme = "{0}{1} {0} {1}".format(ch, bri)

View file

@ -1400,10 +1400,15 @@ def ren_open(
class MultipartParser(object):
def __init__(
self, log_func: "NamedLogger", sr: Unrecv, http_headers: dict[str, str]
self,
log_func: "NamedLogger",
args: argparse.Namespace,
sr: Unrecv,
http_headers: dict[str, str],
):
self.sr = sr
self.log = log_func
self.args = args
self.headers = http_headers
self.re_ctype = re.compile(r"^content-type: *([^; ]+)", re.IGNORECASE)
@ -1502,7 +1507,7 @@ class MultipartParser(object):
def _read_data(self) -> Generator[bytes, None, None]:
blen = len(self.boundary)
bufsz = 32 * 1024
bufsz = self.args.s_rd_sz
while True:
try:
buf = self.sr.recv(bufsz)
@ -2243,10 +2248,11 @@ def shut_socket(log: "NamedLogger", sck: socket.socket, timeout: int = 3) -> Non
sck.close()
def read_socket(sr: Unrecv, total_size: int) -> Generator[bytes, None, None]:
def read_socket(
sr: Unrecv, bufsz: int, total_size: int
) -> Generator[bytes, None, None]:
remains = total_size
while remains > 0:
bufsz = 32 * 1024
if bufsz > remains:
bufsz = remains
@ -2260,16 +2266,16 @@ def read_socket(sr: Unrecv, total_size: int) -> Generator[bytes, None, None]:
yield buf
def read_socket_unbounded(sr: Unrecv) -> Generator[bytes, None, None]:
def read_socket_unbounded(sr: Unrecv, bufsz: int) -> Generator[bytes, None, None]:
try:
while True:
yield sr.recv(32 * 1024)
yield sr.recv(bufsz)
except:
return
def read_socket_chunked(
sr: Unrecv, log: Optional["NamedLogger"] = None
sr: Unrecv, bufsz: int, log: Optional["NamedLogger"] = None
) -> Generator[bytes, None, None]:
err = "upload aborted: expected chunk length, got [{}] |{}| instead"
while True:
@ -2303,7 +2309,7 @@ def read_socket_chunked(
if log:
log("receiving %d byte chunk" % (chunklen,))
for chunk in read_socket(sr, chunklen):
for chunk in read_socket(sr, bufsz, chunklen):
yield chunk
x = sr.recv_ex(2, False)

96
docs/bufsize.txt Normal file
View file

@ -0,0 +1,96 @@
notes from testing various buffer sizes of files and sockets
summary:
download-folder-as-tar: would be 7% faster with --iobuf 65536 (but got 20% faster in v1.11.2)
download-folder-as-zip: optimal with default --iobuf 262144
download-file-over-https: optimal with default --iobuf 262144
put-large-file: optimal with default --iobuf 262144, --s-rd-sz 262144 (and got 14% faster in v1.11.2)
post-large-file: optimal with default --iobuf 262144, --s-rd-sz 262144 (and got 18% faster in v1.11.2)
----
oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/bigs/?tar
3.3 req/s 1.11.1
4.3 4.0 3.3 req/s 1.12.2
64 256 512 --iobuf 256 (prefer smaller)
32 32 32 --s-rd-sz
oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/bigs/?zip
2.9 req/s 1.11.1
2.5 2.9 2.9 req/s 1.12.2
64 256 512 --iobuf 256 (prefer bigger)
32 32 32 --s-rd-sz
oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/pairdupes/?tar
8.3 req/s 1.11.1
8.4 8.4 8.5 req/s 1.12.2
64 256 512 --iobuf 256 (prefer bigger)
32 32 32 --s-rd-sz
oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/pairdupes/?zip
13.9 req/s 1.11.1
14.1 14.0 13.8 req/s 1.12.2
64 256 512 --iobuf 256 (prefer smaller)
32 32 32 --s-rd-sz
oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/pairdupes/987a
5260 req/s 1.11.1
5246 5246 5280 5268 req/s 1.12.2
64 256 512 256 --iobuf dontcare
32 32 32 512 --s-rd-sz dontcare
oha -z10s -c1 --ipv4 --insecure https://127.0.0.1:3923/pairdupes/987a
4445 req/s 1.11.1
4462 4494 4444 req/s 1.12.2
64 256 512 --iobuf dontcare
32 32 32 --s-rd-sz
oha -z10s -c1 --ipv4 --insecure http://127.0.0.1:3923/bigs/gssc-02-cannonball-skydrift/track10.cdda.flac
95 req/s 1.11.1
95 97 req/s 1.12.2
64 512 --iobuf dontcare
32 32 --s-rd-sz
oha -z10s -c1 --ipv4 --insecure https://127.0.0.1:3923/bigs/gssc-02-cannonball-skydrift/track10.cdda.flac
15.4 req/s 1.11.1
15.4 15.3 14.9 15.4 req/s 1.12.2
64 256 512 512 --iobuf 256 (prefer smaller, and smaller than s-wr-sz)
32 32 32 32 --s-rd-sz
256 256 256 512 --s-wr-sz
----
python3 ~/dev/old/copyparty\ v1.11.1\ dont\ ban\ the\ pipes.py -q -i 127.0.0.1 -v .::A --daw
python3 ~/dev/copyparty/dist/copyparty-sfx.py -q -i 127.0.0.1 -v .::A --daw --iobuf $((1024*512))
oha -z10s -c1 --ipv4 --insecure -mPUT -r0 -D ~/Music/gssc-02-cannonball-skydrift/track10.cdda.flac http://127.0.0.1:3923/a.bin
10.8 req/s 1.11.1
10.8 11.5 11.8 12.1 12.2 12.3 req/s new
512 512 512 512 512 256 --iobuf 256
32 64 128 256 512 256 --s-rd-sz 256 (prefer bigger)
----
buildpost() {
b=--jeg-er-grensestaven;
printf -- "$b\r\nContent-Disposition: form-data; name=\"act\"\r\n\r\nbput\r\n$b\r\nContent-Disposition: form-data; name=\"f\"; filename=\"a.bin\"\r\nContent-Type: audio/mpeg\r\n\r\n"
cat "$1"
printf -- "\r\n${b}--\r\n"
}
buildpost ~/Music/gssc-02-cannonball-skydrift/track10.cdda.flac >big.post
buildpost ~/Music/bottomtext.txt >smol.post
oha -z10s -c1 --ipv4 --insecure -mPOST -r0 -T 'multipart/form-data; boundary=jeg-er-grensestaven' -D big.post http://127.0.0.1:3923/?replace
9.6 11.2 11.3 11.1 10.9 req/s v1.11.2
512 512 256 128 256 --iobuf 256
32 512 256 128 128 --s-rd-sz 256
oha -z10s -c1 --ipv4 --insecure -mPOST -r0 -T 'multipart/form-data; boundary=jeg-er-grensestaven' -D smol.post http://127.0.0.1:3923/?replace
2445 2414 2401 2437
256 128 256 256 --iobuf 256
128 128 256 64 --s-rd-sz 128 (but use 256 since big posts are more important)

View file

@ -164,6 +164,7 @@ authenticate using header `Cookie: cppwd=foo` or url param `&pw=foo`
| PUT | `?xz` | (binary data) | compress with xz and write into file at URL |
| mPOST | | `f=FILE` | upload `FILE` into the folder at URL |
| mPOST | `?j` | `f=FILE` | ...and reply with json |
| mPOST | `?replace` | `f=FILE` | ...and overwrite existing files |
| mPOST | | `act=mkdir`, `name=foo` | create directory `foo` at URL |
| POST | `?delete` | | delete URL recursively |
| jPOST | `?delete` | `["/foo","/bar"]` | delete `/foo` and `/bar` recursively |

View file

@ -155,7 +155,8 @@ class Cfg(Namespace):
mth={},
mtp=[],
rm_retry="0/0",
s_wr_sz=512 * 1024,
s_rd_sz=256 * 1024,
s_wr_sz=256 * 1024,
sort="href",
srch_hits=99999,
th_crop="y",