up to 2.6x faster download-as-zip

when there's lots of files, and especially small ones
and also reduces cpu load by at least 15%
This commit is contained in:
ed 2023-09-05 22:57:03 +00:00
parent 0f9877201b
commit 6420c4bd03
2 changed files with 17 additions and 6 deletions

View file

@ -276,6 +276,7 @@ class StreamZip(StreamArc):
def gen(self) -> Generator[bytes, None, None]: def gen(self) -> Generator[bytes, None, None]:
errf: dict[str, Any] = {} errf: dict[str, Any] = {}
errors = [] errors = []
mbuf = b""
try: try:
for f in self.fgen: for f in self.fgen:
if "err" in f: if "err" in f:
@ -284,13 +285,20 @@ class StreamZip(StreamArc):
try: try:
for x in self.ser(f): for x in self.ser(f):
yield x mbuf += x
if len(mbuf) >= 16384:
yield mbuf
mbuf = b""
except GeneratorExit: except GeneratorExit:
raise raise
except: except:
ex = min_ex(5, True).replace("\n", "\n-- ") ex = min_ex(5, True).replace("\n", "\n-- ")
errors.append((f["vp"], ex)) errors.append((f["vp"], ex))
if mbuf:
yield mbuf
mbuf = b""
if errors: if errors:
errf, txt = errdesc(errors) errf, txt = errdesc(errors)
self.log("\n".join(([repr(errf)] + txt[1:]))) self.log("\n".join(([repr(errf)] + txt[1:])))
@ -300,20 +308,23 @@ class StreamZip(StreamArc):
cdir_pos = self.pos cdir_pos = self.pos
for name, sz, ts, crc, h_pos in self.items: for name, sz, ts, crc, h_pos in self.items:
buf = gen_hdr(h_pos, name, sz, ts, self.utf8, crc, self.pre_crc) buf = gen_hdr(h_pos, name, sz, ts, self.utf8, crc, self.pre_crc)
yield self._ct(buf) mbuf += self._ct(buf)
if len(mbuf) >= 16384:
yield mbuf
mbuf = b""
cdir_end = self.pos cdir_end = self.pos
_, need_64 = gen_ecdr(self.items, cdir_pos, cdir_end) _, need_64 = gen_ecdr(self.items, cdir_pos, cdir_end)
if need_64: if need_64:
ecdir64_pos = self.pos ecdir64_pos = self.pos
buf = gen_ecdr64(self.items, cdir_pos, cdir_end) buf = gen_ecdr64(self.items, cdir_pos, cdir_end)
yield self._ct(buf) mbuf += self._ct(buf)
buf = gen_ecdr64_loc(ecdir64_pos) buf = gen_ecdr64_loc(ecdir64_pos)
yield self._ct(buf) mbuf += self._ct(buf)
ecdr, _ = gen_ecdr(self.items, cdir_pos, cdir_end) ecdr, _ = gen_ecdr(self.items, cdir_pos, cdir_end)
yield self._ct(ecdr) yield mbuf + self._ct(ecdr)
finally: finally:
if errf: if errf:
bos.unlink(errf["ap"]) bos.unlink(errf["ap"])

View file

@ -2125,7 +2125,7 @@ def list_ips() -> list[str]:
def yieldfile(fn: str) -> Generator[bytes, None, None]: def yieldfile(fn: str) -> Generator[bytes, None, None]:
with open(fsenc(fn), "rb", 512 * 1024) as f: with open(fsenc(fn), "rb", 512 * 1024) as f:
while True: while True:
buf = f.read(64 * 1024) buf = f.read(128 * 1024)
if not buf: if not buf:
break break