diff --git a/copyparty/__main__.py b/copyparty/__main__.py index 7314f9a2..c57fde07 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -65,6 +65,7 @@ from .util import ( load_resource, min_ex, pybin, + read_utf8, termsize, wrap, ) @@ -255,8 +256,7 @@ def get_srvname(verbose) -> str: if verbose: lprint("using hostname from {}\n".format(fp)) try: - with open(fp, "rb") as f: - ret = f.read().decode("utf-8", "replace").strip() + return read_utf8(None, fp, True).strip() except: ret = "" namelen = 5 @@ -265,47 +265,18 @@ def get_srvname(verbose) -> str: ret = re.sub("[234567=]", "", ret)[:namelen] with open(fp, "wb") as f: f.write(ret.encode("utf-8") + b"\n") - - return ret + return ret -def get_fk_salt() -> str: - fp = os.path.join(E.cfg, "fk-salt.txt") +def get_salt(name: str, nbytes: int) -> str: + fp = os.path.join(E.cfg, "%s-salt.txt" % (name,)) try: - with open(fp, "rb") as f: - ret = f.read().strip() + return read_utf8(None, fp, True).strip() except: - ret = b64enc(os.urandom(18)) + ret = b64enc(os.urandom(nbytes)) with open(fp, "wb") as f: f.write(ret + b"\n") - - return ret.decode("utf-8") - - -def get_dk_salt() -> str: - fp = os.path.join(E.cfg, "dk-salt.txt") - try: - with open(fp, "rb") as f: - ret = f.read().strip() - except: - ret = b64enc(os.urandom(30)) - with open(fp, "wb") as f: - f.write(ret + b"\n") - - return ret.decode("utf-8") - - -def get_ah_salt() -> str: - fp = os.path.join(E.cfg, "ah-salt.txt") - try: - with open(fp, "rb") as f: - ret = f.read().strip() - except: - ret = b64enc(os.urandom(18)) - with open(fp, "wb") as f: - f.write(ret + b"\n") - - return ret.decode("utf-8") + return ret.decode("utf-8") def ensure_locale() -> None: @@ -1552,9 +1523,9 @@ def run_argparse( cert_path = os.path.join(E.cfg, "cert.pem") - fk_salt = get_fk_salt() - dk_salt = get_dk_salt() - ah_salt = get_ah_salt() + fk_salt = get_salt("fk", 18) + dk_salt = get_salt("dk", 30) + ah_salt = get_salt("ah", 18) # alpine peaks at 5 threads for some reason, # all others scale past that (but try to avoid SMT), diff --git a/copyparty/authsrv.py b/copyparty/authsrv.py index ee2c7434..db9c1cdd 100644 --- a/copyparty/authsrv.py +++ b/copyparty/authsrv.py @@ -33,6 +33,7 @@ from .util import ( get_df, humansize, odfusion, + read_utf8, relchk, statdir, ub64enc, @@ -2547,8 +2548,8 @@ class AuthSrv(object): if not bos.path.exists(ap): pwdb = {} else: - with open(ap, "r", encoding="utf-8") as f: - pwdb = json.load(f) + jtxt = read_utf8(self.log, ap, True) + pwdb = json.loads(jtxt) pwdb = [x for x in pwdb if x[0] != uname] pwdb.append((uname, self.defpw[uname], hpw)) @@ -2571,8 +2572,8 @@ class AuthSrv(object): if not self.args.chpw or not bos.path.exists(ap): return - with open(ap, "r", encoding="utf-8") as f: - pwdb = json.load(f) + jtxt = read_utf8(self.log, ap, True) + pwdb = json.loads(jtxt) useen = set() urst = set() @@ -3068,8 +3069,9 @@ def expand_config_file( ipath += " -> " + fp ret.append("#\033[36m opening cfg file{}\033[0m".format(ipath)) - with open(fp, "rb") as f: - for oln in [x.decode("utf-8").rstrip() for x in f]: + cfg_lines = read_utf8(log, fp, True).split("\n") + if True: # diff-golf + for oln in [x.rstrip() for x in cfg_lines]: ln = oln.split(" #")[0].strip() if ln.startswith("% "): pad = " " * len(oln.split("%")[0]) diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index 116b7320..f439d6c1 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -87,6 +87,7 @@ from .util import ( quotep, rand_name, read_header, + read_utf8, read_socket, read_socket_chunked, read_socket_unbounded, @@ -870,8 +871,7 @@ class HttpCli(object): html = html.replace("%", "", 1) if html.startswith("@"): - with open(html[1:], "rb") as f: - html = f.read().decode("utf-8") + html = read_utf8(self.log, html[1:], True) if html.startswith("%"): html = html[1:] @@ -3740,8 +3740,7 @@ class HttpCli(object): continue fn = "%s/%s" % (abspath, fn) if bos.path.isfile(fn): - with open(fsenc(fn), "rb") as f: - logues[n] = f.read().decode("utf-8") + logues[n] = read_utf8(self.log, fsenc(fn), False) if "exp" in vn.flags: logues[n] = self._expand( logues[n], vn.flags.get("exp_lg") or [] @@ -3762,9 +3761,8 @@ class HttpCli(object): for fn in fns: fn = "%s/%s" % (abspath, fn) if bos.path.isfile(fn): - with open(fsenc(fn), "rb") as f: - txt = f.read().decode("utf-8") - break + txt = read_utf8(self.log, fsenc(fn), False) + break if txt and "exp" in vn.flags: txt = self._expand(txt, vn.flags.get("exp_md") or []) @@ -6254,9 +6252,7 @@ class HttpCli(object): docpath = os.path.join(abspath, doc) sz = bos.path.getsize(docpath) if sz < 1024 * self.args.txt_max: - with open(fsenc(docpath), "rb") as f: - doctxt = f.read().decode("utf-8", "replace") - + doctxt = read_utf8(self.log, fsenc(docpath), False) if doc.lower().endswith(".md") and "exp" in vn.flags: doctxt = self._expand(doctxt, vn.flags.get("exp_md") or []) else: diff --git a/copyparty/util.py b/copyparty/util.py index c17c451e..692d3dfa 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -594,6 +594,38 @@ except Exception as ex: print("using fallback base64 codec due to %r" % (ex,)) +class NotUTF8(Exception): + pass + + +def read_utf8(log: Optional["NamedLogger"], ap: Union[str, bytes], strict: bool) -> str: + with open(ap, "rb") as f: + buf = f.read() + + try: + return buf.decode("utf-8", "strict") + except UnicodeDecodeError as ex: + eo = ex.start + eb = buf[eo : eo + 1] + + if not strict: + t = "WARNING: The file [%s] is not using the UTF-8 character encoding; some characters in the file will be skipped/ignored. The first unreadable character was byte %r at offset %d. Please convert this file to UTF-8 by opening the file in your text-editor and saving it as UTF-8." + t = t % (ap, eb, eo) + if log: + log(t, 3) + else: + print(t) + return buf.decode("utf-8", "replace") + + t = "ERROR: The file [%s] is not using the UTF-8 character encoding, and cannot be loaded. The first unreadable character was byte %r at offset %d. Please convert this file to UTF-8 by opening the file in your text-editor and saving it as UTF-8." + t = t % (ap, eb, eo) + if log: + log(t, 3) + else: + print(t) + raise NotUTF8(t) + + class Daemon(threading.Thread): def __init__( self,