add option --iobuf (file r/w buffersize):

the default (256 KiB) appears optimal in the most popular scenario (linux host with storage on local physical disk, usually NVMe) was previously a mix of 64 and 512 KiB; now the same value is enforced everywhere download-as-tar is now 20% faster with the default value
2025-11-24 07:23:22 -07:00 · 2024-03-23 16:17:40 +00:00 · 2024-03-23 16:17:40 +00:00 · 2b24c50eb7
parent d30ae8453d
commit 2b24c50eb7
11 changed files with 37 additions and 23 deletions
--- a/copyparty/main.py
+++ b/copyparty/main.py
@ -869,6 +869,7 @@ def add_fs(ap):
    ap2 = ap.add_argument_group("filesystem options")
    rm_re_def = "5/0.1" if ANYWIN else "0/0"
    ap2.add_argument("--rm-retry", metavar="T/R", type=u, default=rm_re_def, help="if a file cannot be deleted because it is busy, continue trying for \033[33mT\033[0m seconds, retry every \033[33mR\033[0m seconds; disable with 0/0 (volflag=rm_retry)")
    ap2.add_argument("--iobuf", metavar="BYTES", type=int, default=256*1024, help="file I/O buffer-size; if your volumes are on a network drive, try increasing to \033[32m524288\033[0m or even \033[32m4194304\033[0m (and let me know if that improves your performance)")
 def add_upload(ap):
--- a/copyparty/ftpd.py
+++ b/copyparty/ftpd.py
@ -218,7 +218,7 @@ class FtpFs(AbstractedFS):
                raise FSE("Cannot open existing file for writing")
        self.validpath(ap)
-        return open(fsenc(ap), mode)
+        return open(fsenc(ap), mode, self.args.iobuf)
    def chdir(self, path: str) -> None:
        nwd = join(self.cwd, path)
--- a/copyparty/httpcli.py
+++ b/copyparty/httpcli.py
@ -174,7 +174,6 @@ class HttpCli(object):
        self.parser: Optional[MultipartParser] = None
        # end placeholders
        self.bufsz = 1024 * 32
        self.html_head = ""
    def log(self, msg: str, c: Union[int, str] = 0) -> None:
@ -1641,7 +1640,7 @@ class HttpCli(object):
        bos.makedirs(fdir)
        open_ka: dict[str, Any] = {"fun": open}
-        open_a = ["wb", 512 * 1024]
+        open_a = ["wb", self.args.iobuf]
        # user-request || config-force
        if ("gz" in vfs.flags or "xz" in vfs.flags) and (
@ -1900,7 +1899,7 @@ class HttpCli(object):
        f.seek(ofs)
        with open(fp, "wb") as fo:
            while nrem:
-                buf = f.read(min(nrem, 512 * 1024))
+                buf = f.read(min(nrem, self.args.iobuf))
                if not buf:
                    break
@ -2162,7 +2161,7 @@ class HttpCli(object):
                    except:
                        pass
-            f = f or open(fsenc(path), "rb+", 512 * 1024)
+            f = f or open(fsenc(path), "rb+", self.args.iobuf)
            try:
                f.seek(cstart[0])
@ -2185,7 +2184,8 @@ class HttpCli(object):
                    )
                    ofs = 0
                    while ofs < chunksize:
-                        bufsz = min(chunksize - ofs, 4 * 1024 * 1024)
+                        bufsz = max(4 * 1024 * 1024, self.args.iobuf)
                        bufsz = min(chunksize - ofs, bufsz)
                        f.seek(cstart[0] + ofs)
                        buf = f.read(bufsz)
                        for wofs in cstart[1:]:
@ -2482,7 +2482,7 @@ class HttpCli(object):
                        v2 = lim.dfv - lim.dfl
                        max_sz = min(v1, v2) if v1 and v2 else v1 or v2
-                    with ren_open(tnam, "wb", 512 * 1024, **open_args) as zfw:
+                    with ren_open(tnam, "wb", self.args.iobuf, **open_args) as zfw:
                        f, tnam = zfw["orz"]
                        tabspath = os.path.join(fdir, tnam)
                        self.log("writing to {}".format(tabspath))
@ -2778,7 +2778,7 @@ class HttpCli(object):
        if bos.path.exists(fp):
            wunlink(self.log, fp, vfs.flags)
-        with open(fsenc(fp), "wb", 512 * 1024) as f:
+        with open(fsenc(fp), "wb", self.args.iobuf) as f:
            sz, sha512, _ = hashcopy(p_data, f, self.args.s_wr_slp)
        if lim:
@ -3010,8 +3010,7 @@ class HttpCli(object):
            upper = gzip_orig_sz(fs_path)
        else:
            open_func = open
-            # 512 kB is optimal for huge files, use 64k
+            open_args = [fsenc(fs_path), "rb", self.args.iobuf]
            open_args = [fsenc(fs_path), "rb", 64 * 1024]
            use_sendfile = (
                # fmt: off
                not self.tls
@ -3146,6 +3145,7 @@ class HttpCli(object):
        bgen = packer(
            self.log,
            self.args,
            fgen,
            utf8="utf" in uarg,
            pre_crc="crc" in uarg,
@ -3223,7 +3223,7 @@ class HttpCli(object):
        sz_md = 0
        lead = b""
        fullfile = b""
-        for buf in yieldfile(fs_path):
+        for buf in yieldfile(fs_path, self.args.iobuf):
            if sz_md < max_sz:
                fullfile += buf
            else:
@ -3296,7 +3296,7 @@ class HttpCli(object):
            if fullfile:
                self.s.sendall(fullfile)
            else:
-                for buf in yieldfile(fs_path):
+                for buf in yieldfile(fs_path, self.args.iobuf):
                    self.s.sendall(html_bescape(buf))
            self.s.sendall(html[1])
--- a/copyparty/star.py
+++ b/copyparty/star.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import print_function, unicode_literals
 import argparse
 import re
 import stat
 import tarfile
@ -44,11 +45,12 @@ class StreamTar(StreamArc):
    def __init__(
        self,
        log: "NamedLogger",
        args: argparse.Namespace,
        fgen: Generator[dict[str, Any], None, None],
        cmp: str = "",
        **kwargs: Any
    ):
-        super(StreamTar, self).__init__(log, fgen)
+        super(StreamTar, self).__init__(log, args, fgen)
        self.ci = 0
        self.co = 0
@ -126,7 +128,7 @@ class StreamTar(StreamArc):
        inf.gid = 0
        self.ci += inf.size
-        with open(fsenc(src), "rb", 512 * 1024) as fo:
+        with open(fsenc(src), "rb", self.args.iobuf) as fo:
            self.tar.addfile(inf, fo)
    def _gen(self) -> None:
--- a/copyparty/sutil.py
+++ b/copyparty/sutil.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import print_function, unicode_literals
 import argparse
 import os
 import tempfile
 from datetime import datetime
@ -20,10 +21,12 @@ class StreamArc(object):
    def __init__(
        self,
        log: "NamedLogger",
        args: argparse.Namespace,
        fgen: Generator[dict[str, Any], None, None],
        **kwargs: Any
    ):
        self.log = log
        self.args = args
        self.fgen = fgen
        self.stopped = False
--- a/copyparty/szip.py
+++ b/copyparty/szip.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import print_function, unicode_literals
 import argparse
 import calendar
 import stat
 import time
@ -218,12 +219,13 @@ class StreamZip(StreamArc):
    def __init__(
        self,
        log: "NamedLogger",
        args: argparse.Namespace,
        fgen: Generator[dict[str, Any], None, None],
        utf8: bool = False,
        pre_crc: bool = False,
        **kwargs: Any
    ) -> None:
-        super(StreamZip, self).__init__(log, fgen)
+        super(StreamZip, self).__init__(log, args, fgen)
        self.utf8 = utf8
        self.pre_crc = pre_crc
@ -248,7 +250,7 @@ class StreamZip(StreamArc):
        crc = 0
        if self.pre_crc:
-            for buf in yieldfile(src):
+            for buf in yieldfile(src, self.args.iobuf):
                crc = zlib.crc32(buf, crc)
            crc &= 0xFFFFFFFF
@ -257,7 +259,7 @@ class StreamZip(StreamArc):
        buf = gen_hdr(None, name, sz, ts, self.utf8, crc, self.pre_crc)
        yield self._ct(buf)
-        for buf in yieldfile(src):
+        for buf in yieldfile(src, self.args.iobuf):
            if not self.pre_crc:
                crc = zlib.crc32(buf, crc)
--- a/copyparty/tftpd.py
+++ b/copyparty/tftpd.py
@ -340,6 +340,9 @@ class Tftpd(object):
        if not self.args.tftp_nols and bos.path.isdir(ap):
            return self._ls(vpath, "", 0, True)
        if not a:
            a = [self.args.iobuf]
        return open(ap, mode, *a, **ka)
    def _mkdir(self, vpath: str, *a) -> None:
--- a/copyparty/up2k.py
+++ b/copyparty/up2k.py
@ -3920,7 +3920,7 @@ class Up2k(object):
        csz = up2k_chunksize(fsz)
        ret = []
        suffix = " MB, {}".format(path)
-        with open(fsenc(path), "rb", 512 * 1024) as f:
+        with open(fsenc(path), "rb", self.args.iobuf) as f:
            if self.mth and fsz >= 1024 * 512:
                tlt = self.mth.hash(f, fsz, csz, self.pp, prefix, suffix)
                ret = [x[0] for x in tlt]
--- a/copyparty/util.py
+++ b/copyparty/util.py
@ -2361,10 +2361,11 @@ def build_netmap(csv: str):
    return NetMap(ips, cidrs, True)
-def yieldfile(fn: str) -> Generator[bytes, None, None]:
+def yieldfile(fn: str, bufsz: int) -> Generator[bytes, None, None]:
-    with open(fsenc(fn), "rb", 512 * 1024) as f:
+    readsz = min(bufsz, 128 * 1024)
    with open(fsenc(fn), "rb", bufsz) as f:
        while True:
-            buf = f.read(128 * 1024)
+            buf = f.read(readsz)
            if not buf:
                break
--- a/scripts/sfx.py
+++ b/scripts/sfx.py
@ -234,8 +234,9 @@ def u8(gen):
 def yieldfile(fn):
-    with open(fn, "rb") as f:
+    s = 64 * 1024
-        for block in iter(lambda: f.read(64 * 1024), b""):
+    with open(fn, "rb", s * 4) as f:
        for block in iter(lambda: f.read(s), b""):
            yield block
--- a/tests/util.py
+++ b/tests/util.py
@ -147,6 +147,7 @@ class Cfg(Namespace):
            dbd="wal",
            fk_salt="a" * 16,
            idp_gsep=re.compile("[|:;+,]"),
            iobuf=256 * 1024,
            lang="eng",
            log_badpwd=1,
            logout=573,