From 7d64879ba8308c7e201fb20113c07c0c377af70b Mon Sep 17 00:00:00 2001 From: ed Date: Sun, 15 Sep 2024 17:46:43 +0000 Subject: [PATCH] more optimizations, * 5% less cpu load from clients fetching thumbnails * and slight improvement to up2k stuff --- copyparty/__init__.py | 1 + copyparty/__main__.py | 10 ++++--- copyparty/authsrv.py | 15 ++++++---- copyparty/httpcli.py | 10 +++---- copyparty/httpsrv.py | 6 ++-- copyparty/svchub.py | 4 +-- copyparty/th_srv.py | 21 ++++++++++---- copyparty/up2k.py | 16 +++++------ copyparty/util.py | 64 ++++++++++++++++++++++++++++++------------- 9 files changed, 94 insertions(+), 53 deletions(-) diff --git a/copyparty/__init__.py b/copyparty/__init__.py index dc99255f..641414e7 100644 --- a/copyparty/__init__.py +++ b/copyparty/__init__.py @@ -19,6 +19,7 @@ if True: from typing import Any, Callable PY2 = sys.version_info < (3,) +PY36 = sys.version_info > (3, 6) if not PY2: unicode: Callable[[Any], str] = str else: diff --git a/copyparty/__main__.py b/copyparty/__main__.py index 323691f0..0d58cddb 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -27,6 +27,7 @@ from .__init__ import ( EXE, MACOS, PY2, + PY36, VT100, WINDOWS, E, @@ -54,6 +55,7 @@ from .util import ( Daemon, align_tab, ansi_re, + b64enc, dedent, min_ex, pybin, @@ -267,7 +269,7 @@ def get_fk_salt() -> str: with open(fp, "rb") as f: ret = f.read().strip() except: - ret = base64.b64encode(os.urandom(18)) + ret = b64enc(os.urandom(18)) with open(fp, "wb") as f: f.write(ret + b"\n") @@ -280,7 +282,7 @@ def get_dk_salt() -> str: with open(fp, "rb") as f: ret = f.read().strip() except: - ret = base64.b64encode(os.urandom(30)) + ret = b64enc(os.urandom(30)) with open(fp, "wb") as f: f.write(ret + b"\n") @@ -293,7 +295,7 @@ def get_ah_salt() -> str: with open(fp, "rb") as f: ret = f.read().strip() except: - ret = base64.b64encode(os.urandom(18)) + ret = b64enc(os.urandom(18)) with open(fp, "wb") as f: f.write(ret + b"\n") @@ -1759,7 +1761,7 @@ def main(argv: Optional[list[str]] = None, rsrc: Optional[str] = None) -> None: print("error: python2 cannot --smb") return - if sys.version_info < (3, 6): + if not PY36: al.no_scandir = True if not hasattr(os, "sendfile"): diff --git a/copyparty/authsrv.py b/copyparty/authsrv.py index 0808095c..d6ade06c 100644 --- a/copyparty/authsrv.py +++ b/copyparty/authsrv.py @@ -855,6 +855,7 @@ class AuthSrv(object): self.idp_accs: dict[str, list[str]] = {} # username->groupnames self.idp_usr_gh: dict[str, str] = {} # username->group-header-value (cache) + self.hid_cache: dict[str, str] = {} self.mutex = threading.Lock() self.reload() @@ -1550,8 +1551,8 @@ class AuthSrv(object): if s_pw: # gotta reuse the "account" for all shares with this pw, # so do a light scramble as this appears in the web-ui - zs = ub64enc(hashlib.sha512(s_pw.encode("utf-8")).digest())[4:16] - sun = "s_%s" % (zs.decode("utf-8"),) + zb = hashlib.sha512(s_pw.encode("utf-8")).digest() + sun = "s_%s" % (ub64enc(zb)[4:16].decode("ascii"),) acct[sun] = s_pw else: sun = "*" @@ -1656,8 +1657,12 @@ class AuthSrv(object): promote = [] demote = [] for vol in vfs.all_vols.values(): - zb = hashlib.sha512(afsenc(vol.realpath)).digest() - hid = base64.b32encode(zb).decode("ascii").lower() + hid = self.hid_cache.get(vol.realpath) + if not hid: + zb = hashlib.sha512(afsenc(vol.realpath)).digest() + hid = base64.b32encode(zb).decode("ascii").lower() + self.hid_cache[vol.realpath] = hid + vflag = vol.flags.get("hist") if vflag == "-": pass @@ -2286,7 +2291,7 @@ class AuthSrv(object): q = "insert into us values (?,?,?)" for uname in self.acct: if uname not in ases: - sid = ub64enc(os.urandom(blen)).decode("utf-8") + sid = ub64enc(os.urandom(blen)).decode("ascii") cur.execute(q, (uname, sid, int(time.time()))) ases[uname] = sid n.append(uname) diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index c2097fd5..4d719c8f 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -2,7 +2,6 @@ from __future__ import print_function, unicode_literals import argparse # typechk -import base64 import calendar import copy import errno @@ -58,6 +57,7 @@ from .util import ( absreal, alltrace, atomic_move, + b64dec, exclude_dotfiles, formatdate, fsenc, @@ -503,7 +503,7 @@ class HttpCli(object): ): try: zb = zso.split(" ")[1].encode("ascii") - zs = base64.b64decode(zb).decode("utf-8") + zs = b64dec(zb).decode("utf-8") # try "pwd", "x:pwd", "pwd:x" for bauth in [zs] + zs.split(":", 1)[::-1]: if bauth in self.asrv.sesa: @@ -2506,7 +2506,7 @@ class HttpCli(object): logpwd = "" elif self.args.log_badpwd == 2: zb = hashlib.sha512(pwd.encode("utf-8", "replace")).digest() - logpwd = "%" + base64.b64encode(zb[:12]).decode("utf-8") + logpwd = "%" + ub64enc(zb[:12]).decode("ascii") if pwd != "x": self.log("invalid password: {}".format(logpwd), 3) @@ -5364,7 +5364,7 @@ class HttpCli(object): fmt = vn.flags.get("og_th", "j") th_base = ujoin(url_base, quotep(thumb)) query = "th=%s&cache" % (fmt,) - query = ub64enc(query.encode("utf-8")).decode("utf-8") + query = ub64enc(query.encode("utf-8")).decode("ascii") # discord looks at file extension, not content-type... query += "/th.jpg" if "j" in fmt else "/th.webp" j2a["og_thumb"] = "%s/.uqe/%s" % (th_base, query) @@ -5373,7 +5373,7 @@ class HttpCli(object): j2a["og_file"] = file if og_fn: og_fn_q = quotep(og_fn) - query = ub64enc(b"raw").decode("utf-8") + query = ub64enc(b"raw").decode("ascii") query += "/%s" % (og_fn_q,) j2a["og_url"] = ujoin(url_base, og_fn_q) j2a["og_raw"] = j2a["og_url"] + "/.uqe/" + query diff --git a/copyparty/httpsrv.py b/copyparty/httpsrv.py index 7661e990..8271b12f 100644 --- a/copyparty/httpsrv.py +++ b/copyparty/httpsrv.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import print_function, unicode_literals -import base64 import math import os import re @@ -75,6 +74,7 @@ from .util import ( spack, start_log_thrs, start_stackmon, + ub64enc, ) if TYPE_CHECKING: @@ -543,8 +543,8 @@ class HttpSrv(object): except: pass - v = base64.urlsafe_b64encode(spack(b">xxL", int(v))) - self.cb_v = v.decode("ascii")[-4:] + # spack gives 4 lsb, take 3 lsb, get 4 ch + self.cb_v = ub64enc(spack(b">L", int(v))[1:]).decode("ascii") self.cb_ts = time.time() return self.cb_v diff --git a/copyparty/svchub.py b/copyparty/svchub.py index d9e3c8bd..d63d7f3d 100644 --- a/copyparty/svchub.py +++ b/copyparty/svchub.py @@ -2,7 +2,6 @@ from __future__ import print_function, unicode_literals import argparse -import base64 import errno import gzip import logging @@ -67,6 +66,7 @@ from .util import ( pybin, start_log_thrs, start_stackmon, + ub64enc, ) if TYPE_CHECKING: @@ -1297,5 +1297,5 @@ class SvcHub(object): zs = "{}\n{}".format(VERSIONS, alltrace()) zb = zs.encode("utf-8", "replace") zb = gzip.compress(zb) - zs = base64.b64encode(zb).decode("ascii") + zs = ub64enc(zb).decode("ascii") self.log("stacks", zs) diff --git a/copyparty/th_srv.py b/copyparty/th_srv.py index 4121c395..3c3ccd99 100644 --- a/copyparty/th_srv.py +++ b/copyparty/th_srv.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import print_function, unicode_literals -import base64 import hashlib import logging import os @@ -27,6 +26,7 @@ from .util import ( min_ex, runcmd, statdir, + ub64enc, vsplit, wrename, wunlink, @@ -109,6 +109,9 @@ except: HAVE_VIPS = False +th_dir_cache = {} + + def thumb_path(histpath: str, rem: str, mtime: float, fmt: str, ffa: set[str]) -> str: # base16 = 16 = 256 # b64-lc = 38 = 1444 @@ -122,14 +125,20 @@ def thumb_path(histpath: str, rem: str, mtime: float, fmt: str, ffa: set[str]) - if ext in ffa and fmt[:2] in ("wf", "jf"): fmt = fmt.replace("f", "") - rd += "\n" + fmt - h = hashlib.sha512(afsenc(rd)).digest() - b64 = base64.urlsafe_b64encode(h).decode("ascii")[:24] - rd = ("%s/%s/" % (b64[:2], b64[2:4])).lower() + b64 + dcache = th_dir_cache + rd_key = rd + "\n" + fmt + rd = dcache.get(rd_key) + if not rd: + h = hashlib.sha512(afsenc(rd_key)).digest() + b64 = ub64enc(h).decode("ascii")[:24] + rd = ("%s/%s/" % (b64[:2], b64[2:4])).lower() + b64 + if len(dcache) > 9001: + dcache.clear() + dcache[rd_key] = rd # could keep original filenames but this is safer re pathlen h = hashlib.sha512(afsenc(fn)).digest() - fn = base64.urlsafe_b64encode(h).decode("ascii")[:24] + fn = ub64enc(h).decode("ascii")[:24] if fmt in ("opus", "caf", "mp3"): cat = "ac" diff --git a/copyparty/up2k.py b/copyparty/up2k.py index 62d5183f..e652077e 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import print_function, unicode_literals -import base64 import errno import gzip import hashlib @@ -61,6 +60,7 @@ from .util import ( sfsenc, spack, statdir, + ub64enc, unhumanize, vjoin, vsplit, @@ -1156,7 +1156,7 @@ class Up2k(object): zsl = [x[len(prefix) :] for x in zsl] zsl.sort() zb = hashlib.sha1("\n".join(zsl).encode("utf-8", "replace")).digest() - vcfg = base64.urlsafe_b64encode(zb[:18]).decode("ascii") + vcfg = ub64enc(zb[:18]).decode("ascii") c = cur.execute("select v from kv where k = 'volcfg'") try: @@ -1425,7 +1425,7 @@ class Up2k(object): zh.update(cv.encode("utf-8", "replace")) zh.update(spack(b" None: txt = t0 or "" digest = hashlib.sha512(db_path.encode("utf-8", "replace")).digest() - stackname = base64.urlsafe_b64encode(digest[:9]).decode("utf-8") + stackname = ub64enc(digest[:9]).decode("ascii") stackpath = os.path.join(E.cfg, "stack-%s.txt" % (stackname,)) t = " the filesystem at %s may not support locking, or is otherwise incompatible with sqlite\n\n %s\n\n" @@ -4458,8 +4458,7 @@ class Up2k(object): rem -= len(buf) digest = hashobj.digest()[:33] - digest = base64.urlsafe_b64encode(digest) - ret.append(digest.decode("utf-8")) + ret.append(ub64enc(digest).decode("ascii")) return ret, st @@ -4923,11 +4922,10 @@ def up2k_wark_from_hashlist(salt: str, filesize: int, hashes: list[str]) -> str: vstr = "\n".join(values) wark = hashlib.sha512(vstr.encode("utf-8")).digest()[:33] - wark = base64.urlsafe_b64encode(wark) - return wark.decode("ascii") + return ub64enc(wark).decode("ascii") def up2k_wark_from_metadata(salt: str, sz: int, lastmod: int, rd: str, fn: str) -> str: ret = sfsenc("%s\n%d\n%d\n%s\n%s" % (salt, lastmod, sz, rd, fn)) - ret = base64.urlsafe_b64encode(hashlib.sha512(ret).digest()) + ret = ub64enc(hashlib.sha512(ret).digest()) return ("#%s" % (ret.decode("ascii"),))[:44] diff --git a/copyparty/util.py b/copyparty/util.py index 7a060bea..3d1d62c8 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -3,7 +3,7 @@ from __future__ import print_function, unicode_literals import argparse import base64 -import contextlib +import binascii import errno import hashlib import hmac @@ -30,13 +30,10 @@ from collections import Counter from ipaddress import IPv4Address, IPv4Network, IPv6Address, IPv6Network from queue import Queue -from .__init__ import ANYWIN, EXE, MACOS, PY2, TYPE_CHECKING, VT100, WINDOWS +from .__init__ import ANYWIN, EXE, MACOS, PY2, PY36, TYPE_CHECKING, VT100, WINDOWS from .__version__ import S_BUILD_DT, S_VERSION from .stolen import surrogateescape -ub64dec = base64.urlsafe_b64decode -ub64enc = base64.urlsafe_b64encode - try: from datetime import datetime, timezone @@ -64,7 +61,7 @@ if PY2: if sys.version_info >= (3, 7) or ( - sys.version_info >= (3, 6) and platform.python_implementation() == "CPython" + PY36 and platform.python_implementation() == "CPython" ): ODict = dict else: @@ -212,7 +209,7 @@ else: FS_ENCODING = sys.getfilesystemencoding() -SYMTIME = sys.version_info > (3, 6) and os.utime in os.supports_follow_symlinks +SYMTIME = PY36 and os.utime in os.supports_follow_symlinks META_NOBOTS = '\n' @@ -484,6 +481,38 @@ VERSIONS = ( ) +try: + _b64_enc_tl = bytes.maketrans(b'+/', b'-_') + _b64_dec_tl = bytes.maketrans(b'-_', b'+/') + + def ub64enc(bs: bytes) -> bytes: + x = binascii.b2a_base64(bs, newline=False) + return x.translate(_b64_enc_tl) + + def ub64dec(bs: bytes) -> bytes: + bs = bs.translate(_b64_dec_tl) + return binascii.a2b_base64(bs) + + def b64enc(bs: bytes) -> bytes: + return binascii.b2a_base64(bs, newline=False) + + def b64dec(bs: bytes) -> bytes: + return binascii.a2b_base64(bs) + + zb = b">>>????" + zb2 = base64.urlsafe_b64encode(zb) + if zb2 != ub64enc(zb) or zb != ub64dec(zb2): + raise Exception("bad smoke") + +except Exception as ex: + ub64enc = base64.urlsafe_b64encode # type: ignore + ub64dec = base64.urlsafe_b64decode # type: ignore + b64enc = base64.b64encode # type: ignore + b64dec = base64.b64decode # type: ignore + if not PY36: + print("using fallback base64 codec due to %r" % (ex,)) + + class Daemon(threading.Thread): def __init__( self, @@ -1028,7 +1057,7 @@ class MTHash(object): ofs += len(buf) bdig = hashobj.digest()[:33] - udig = base64.urlsafe_b64encode(bdig).decode("utf-8") + udig = ub64enc(bdig).decode("ascii") return nch, udig, ofs0, chunk_sz @@ -1054,7 +1083,7 @@ class HMaccas(object): self.cache = {} zb = hmac.new(self.key, msg, hashlib.sha512).digest() - zs = base64.urlsafe_b64encode(zb)[: self.retlen].decode("utf-8") + zs = ub64enc(zb)[: self.retlen].decode("ascii") self.cache[msg] = zs return zs @@ -1459,8 +1488,7 @@ def ren_open(fname: str, *args: Any, **kwargs: Any) -> tuple[typing.IO[Any], str if not b64: zs = ("%s\n%s" % (orig_name, suffix)).encode("utf-8", "replace") - zs = hashlib.sha512(zs).digest()[:12] - b64 = base64.urlsafe_b64encode(zs).decode("utf-8") + b64 = ub64enc(hashlib.sha512(zs).digest()[:12]).decode("ascii") badlen = len(fname) while len(fname) >= badlen: @@ -1766,9 +1794,8 @@ def rand_name(fdir: str, fn: str, rnd: int) -> str: nc = rnd + extra nb = (6 + 6 * nc) // 8 - zb = os.urandom(nb) - zb = base64.urlsafe_b64encode(zb) - fn = zb[:nc].decode("utf-8") + ext + zb = ub64enc(os.urandom(nb)) + fn = zb[:nc].decode("ascii") + ext ok = not os.path.exists(fsenc(os.path.join(fdir, fn))) return fn @@ -1781,7 +1808,7 @@ def gen_filekey(alg: int, salt: str, fspath: str, fsize: int, inode: int) -> str zs = "%s %s" % (salt, fspath) zb = zs.encode("utf-8", "replace") - return base64.urlsafe_b64encode(hashlib.sha512(zb).digest()).decode("ascii") + return ub64enc(hashlib.sha512(zb).digest()).decode("ascii") def gen_filekey_dbg( @@ -2263,12 +2290,12 @@ w8enc = _w8enc3 if not PY2 else _w8enc2 def w8b64dec(txt: str) -> str: """decodes base64(filesystem-bytes) to wtf8""" - return w8dec(base64.urlsafe_b64decode(txt.encode("ascii"))) + return w8dec(ub64dec(txt.encode("ascii"))) def w8b64enc(txt: str) -> str: """encodes wtf8 to base64(filesystem-bytes)""" - return base64.urlsafe_b64encode(w8enc(txt)).decode("ascii") + return ub64enc(w8enc(txt)).decode("ascii") if not PY2 and WINDOWS: @@ -2644,8 +2671,7 @@ def hashcopy( if slp: time.sleep(slp) - digest = hashobj.digest()[:33] - digest_b64 = base64.urlsafe_b64encode(digest).decode("utf-8") + digest_b64 = ub64enc(hashobj.digest()[:33]).decode("ascii") return tlen, hashobj.hexdigest(), digest_b64