add option --iobuf (file r/w buffersize):

the default (256 KiB) appears optimal in the most popular scenario
(linux host with storage on local physical disk, usually NVMe)

was previously a mix of 64 and 512 KiB;
now the same value is enforced everywhere

download-as-tar is now 20% faster with the default value
This commit is contained in:
ed 2024-03-23 16:17:40 +00:00
parent d30ae8453d
commit 2b24c50eb7
11 changed files with 37 additions and 23 deletions

View file

@ -869,6 +869,7 @@ def add_fs(ap):
ap2 = ap.add_argument_group("filesystem options") ap2 = ap.add_argument_group("filesystem options")
rm_re_def = "5/0.1" if ANYWIN else "0/0" rm_re_def = "5/0.1" if ANYWIN else "0/0"
ap2.add_argument("--rm-retry", metavar="T/R", type=u, default=rm_re_def, help="if a file cannot be deleted because it is busy, continue trying for \033[33mT\033[0m seconds, retry every \033[33mR\033[0m seconds; disable with 0/0 (volflag=rm_retry)") ap2.add_argument("--rm-retry", metavar="T/R", type=u, default=rm_re_def, help="if a file cannot be deleted because it is busy, continue trying for \033[33mT\033[0m seconds, retry every \033[33mR\033[0m seconds; disable with 0/0 (volflag=rm_retry)")
ap2.add_argument("--iobuf", metavar="BYTES", type=int, default=256*1024, help="file I/O buffer-size; if your volumes are on a network drive, try increasing to \033[32m524288\033[0m or even \033[32m4194304\033[0m (and let me know if that improves your performance)")
def add_upload(ap): def add_upload(ap):

View file

@ -218,7 +218,7 @@ class FtpFs(AbstractedFS):
raise FSE("Cannot open existing file for writing") raise FSE("Cannot open existing file for writing")
self.validpath(ap) self.validpath(ap)
return open(fsenc(ap), mode) return open(fsenc(ap), mode, self.args.iobuf)
def chdir(self, path: str) -> None: def chdir(self, path: str) -> None:
nwd = join(self.cwd, path) nwd = join(self.cwd, path)

View file

@ -174,7 +174,6 @@ class HttpCli(object):
self.parser: Optional[MultipartParser] = None self.parser: Optional[MultipartParser] = None
# end placeholders # end placeholders
self.bufsz = 1024 * 32
self.html_head = "" self.html_head = ""
def log(self, msg: str, c: Union[int, str] = 0) -> None: def log(self, msg: str, c: Union[int, str] = 0) -> None:
@ -1641,7 +1640,7 @@ class HttpCli(object):
bos.makedirs(fdir) bos.makedirs(fdir)
open_ka: dict[str, Any] = {"fun": open} open_ka: dict[str, Any] = {"fun": open}
open_a = ["wb", 512 * 1024] open_a = ["wb", self.args.iobuf]
# user-request || config-force # user-request || config-force
if ("gz" in vfs.flags or "xz" in vfs.flags) and ( if ("gz" in vfs.flags or "xz" in vfs.flags) and (
@ -1900,7 +1899,7 @@ class HttpCli(object):
f.seek(ofs) f.seek(ofs)
with open(fp, "wb") as fo: with open(fp, "wb") as fo:
while nrem: while nrem:
buf = f.read(min(nrem, 512 * 1024)) buf = f.read(min(nrem, self.args.iobuf))
if not buf: if not buf:
break break
@ -2162,7 +2161,7 @@ class HttpCli(object):
except: except:
pass pass
f = f or open(fsenc(path), "rb+", 512 * 1024) f = f or open(fsenc(path), "rb+", self.args.iobuf)
try: try:
f.seek(cstart[0]) f.seek(cstart[0])
@ -2185,7 +2184,8 @@ class HttpCli(object):
) )
ofs = 0 ofs = 0
while ofs < chunksize: while ofs < chunksize:
bufsz = min(chunksize - ofs, 4 * 1024 * 1024) bufsz = max(4 * 1024 * 1024, self.args.iobuf)
bufsz = min(chunksize - ofs, bufsz)
f.seek(cstart[0] + ofs) f.seek(cstart[0] + ofs)
buf = f.read(bufsz) buf = f.read(bufsz)
for wofs in cstart[1:]: for wofs in cstart[1:]:
@ -2482,7 +2482,7 @@ class HttpCli(object):
v2 = lim.dfv - lim.dfl v2 = lim.dfv - lim.dfl
max_sz = min(v1, v2) if v1 and v2 else v1 or v2 max_sz = min(v1, v2) if v1 and v2 else v1 or v2
with ren_open(tnam, "wb", 512 * 1024, **open_args) as zfw: with ren_open(tnam, "wb", self.args.iobuf, **open_args) as zfw:
f, tnam = zfw["orz"] f, tnam = zfw["orz"]
tabspath = os.path.join(fdir, tnam) tabspath = os.path.join(fdir, tnam)
self.log("writing to {}".format(tabspath)) self.log("writing to {}".format(tabspath))
@ -2778,7 +2778,7 @@ class HttpCli(object):
if bos.path.exists(fp): if bos.path.exists(fp):
wunlink(self.log, fp, vfs.flags) wunlink(self.log, fp, vfs.flags)
with open(fsenc(fp), "wb", 512 * 1024) as f: with open(fsenc(fp), "wb", self.args.iobuf) as f:
sz, sha512, _ = hashcopy(p_data, f, self.args.s_wr_slp) sz, sha512, _ = hashcopy(p_data, f, self.args.s_wr_slp)
if lim: if lim:
@ -3010,8 +3010,7 @@ class HttpCli(object):
upper = gzip_orig_sz(fs_path) upper = gzip_orig_sz(fs_path)
else: else:
open_func = open open_func = open
# 512 kB is optimal for huge files, use 64k open_args = [fsenc(fs_path), "rb", self.args.iobuf]
open_args = [fsenc(fs_path), "rb", 64 * 1024]
use_sendfile = ( use_sendfile = (
# fmt: off # fmt: off
not self.tls not self.tls
@ -3146,6 +3145,7 @@ class HttpCli(object):
bgen = packer( bgen = packer(
self.log, self.log,
self.args,
fgen, fgen,
utf8="utf" in uarg, utf8="utf" in uarg,
pre_crc="crc" in uarg, pre_crc="crc" in uarg,
@ -3223,7 +3223,7 @@ class HttpCli(object):
sz_md = 0 sz_md = 0
lead = b"" lead = b""
fullfile = b"" fullfile = b""
for buf in yieldfile(fs_path): for buf in yieldfile(fs_path, self.args.iobuf):
if sz_md < max_sz: if sz_md < max_sz:
fullfile += buf fullfile += buf
else: else:
@ -3296,7 +3296,7 @@ class HttpCli(object):
if fullfile: if fullfile:
self.s.sendall(fullfile) self.s.sendall(fullfile)
else: else:
for buf in yieldfile(fs_path): for buf in yieldfile(fs_path, self.args.iobuf):
self.s.sendall(html_bescape(buf)) self.s.sendall(html_bescape(buf))
self.s.sendall(html[1]) self.s.sendall(html[1])

View file

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals
import argparse
import re import re
import stat import stat
import tarfile import tarfile
@ -44,11 +45,12 @@ class StreamTar(StreamArc):
def __init__( def __init__(
self, self,
log: "NamedLogger", log: "NamedLogger",
args: argparse.Namespace,
fgen: Generator[dict[str, Any], None, None], fgen: Generator[dict[str, Any], None, None],
cmp: str = "", cmp: str = "",
**kwargs: Any **kwargs: Any
): ):
super(StreamTar, self).__init__(log, fgen) super(StreamTar, self).__init__(log, args, fgen)
self.ci = 0 self.ci = 0
self.co = 0 self.co = 0
@ -126,7 +128,7 @@ class StreamTar(StreamArc):
inf.gid = 0 inf.gid = 0
self.ci += inf.size self.ci += inf.size
with open(fsenc(src), "rb", 512 * 1024) as fo: with open(fsenc(src), "rb", self.args.iobuf) as fo:
self.tar.addfile(inf, fo) self.tar.addfile(inf, fo)
def _gen(self) -> None: def _gen(self) -> None:

View file

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals
import argparse
import os import os
import tempfile import tempfile
from datetime import datetime from datetime import datetime
@ -20,10 +21,12 @@ class StreamArc(object):
def __init__( def __init__(
self, self,
log: "NamedLogger", log: "NamedLogger",
args: argparse.Namespace,
fgen: Generator[dict[str, Any], None, None], fgen: Generator[dict[str, Any], None, None],
**kwargs: Any **kwargs: Any
): ):
self.log = log self.log = log
self.args = args
self.fgen = fgen self.fgen = fgen
self.stopped = False self.stopped = False

View file

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import print_function, unicode_literals from __future__ import print_function, unicode_literals
import argparse
import calendar import calendar
import stat import stat
import time import time
@ -218,12 +219,13 @@ class StreamZip(StreamArc):
def __init__( def __init__(
self, self,
log: "NamedLogger", log: "NamedLogger",
args: argparse.Namespace,
fgen: Generator[dict[str, Any], None, None], fgen: Generator[dict[str, Any], None, None],
utf8: bool = False, utf8: bool = False,
pre_crc: bool = False, pre_crc: bool = False,
**kwargs: Any **kwargs: Any
) -> None: ) -> None:
super(StreamZip, self).__init__(log, fgen) super(StreamZip, self).__init__(log, args, fgen)
self.utf8 = utf8 self.utf8 = utf8
self.pre_crc = pre_crc self.pre_crc = pre_crc
@ -248,7 +250,7 @@ class StreamZip(StreamArc):
crc = 0 crc = 0
if self.pre_crc: if self.pre_crc:
for buf in yieldfile(src): for buf in yieldfile(src, self.args.iobuf):
crc = zlib.crc32(buf, crc) crc = zlib.crc32(buf, crc)
crc &= 0xFFFFFFFF crc &= 0xFFFFFFFF
@ -257,7 +259,7 @@ class StreamZip(StreamArc):
buf = gen_hdr(None, name, sz, ts, self.utf8, crc, self.pre_crc) buf = gen_hdr(None, name, sz, ts, self.utf8, crc, self.pre_crc)
yield self._ct(buf) yield self._ct(buf)
for buf in yieldfile(src): for buf in yieldfile(src, self.args.iobuf):
if not self.pre_crc: if not self.pre_crc:
crc = zlib.crc32(buf, crc) crc = zlib.crc32(buf, crc)

View file

@ -340,6 +340,9 @@ class Tftpd(object):
if not self.args.tftp_nols and bos.path.isdir(ap): if not self.args.tftp_nols and bos.path.isdir(ap):
return self._ls(vpath, "", 0, True) return self._ls(vpath, "", 0, True)
if not a:
a = [self.args.iobuf]
return open(ap, mode, *a, **ka) return open(ap, mode, *a, **ka)
def _mkdir(self, vpath: str, *a) -> None: def _mkdir(self, vpath: str, *a) -> None:

View file

@ -3920,7 +3920,7 @@ class Up2k(object):
csz = up2k_chunksize(fsz) csz = up2k_chunksize(fsz)
ret = [] ret = []
suffix = " MB, {}".format(path) suffix = " MB, {}".format(path)
with open(fsenc(path), "rb", 512 * 1024) as f: with open(fsenc(path), "rb", self.args.iobuf) as f:
if self.mth and fsz >= 1024 * 512: if self.mth and fsz >= 1024 * 512:
tlt = self.mth.hash(f, fsz, csz, self.pp, prefix, suffix) tlt = self.mth.hash(f, fsz, csz, self.pp, prefix, suffix)
ret = [x[0] for x in tlt] ret = [x[0] for x in tlt]

View file

@ -2361,10 +2361,11 @@ def build_netmap(csv: str):
return NetMap(ips, cidrs, True) return NetMap(ips, cidrs, True)
def yieldfile(fn: str) -> Generator[bytes, None, None]: def yieldfile(fn: str, bufsz: int) -> Generator[bytes, None, None]:
with open(fsenc(fn), "rb", 512 * 1024) as f: readsz = min(bufsz, 128 * 1024)
with open(fsenc(fn), "rb", bufsz) as f:
while True: while True:
buf = f.read(128 * 1024) buf = f.read(readsz)
if not buf: if not buf:
break break

View file

@ -234,8 +234,9 @@ def u8(gen):
def yieldfile(fn): def yieldfile(fn):
with open(fn, "rb") as f: s = 64 * 1024
for block in iter(lambda: f.read(64 * 1024), b""): with open(fn, "rb", s * 4) as f:
for block in iter(lambda: f.read(s), b""):
yield block yield block

View file

@ -147,6 +147,7 @@ class Cfg(Namespace):
dbd="wal", dbd="wal",
fk_salt="a" * 16, fk_salt="a" * 16,
idp_gsep=re.compile("[|:;+,]"), idp_gsep=re.compile("[|:;+,]"),
iobuf=256 * 1024,
lang="eng", lang="eng",
log_badpwd=1, log_badpwd=1,
logout=573, logout=573,