mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
improve errmsg when reading non-utf8 files (#143)
previously, the native python-error was printed when reading the contents of a textfile using the wrong character encoding while technically correct, it could be confusing for end-users add a helper to produce a more helpful errormessage when someone (for example) tries to load a latin-1 config file
This commit is contained in:
parent
12fcb42201
commit
25974d660d
|
@ -65,6 +65,7 @@ from .util import (
|
|||
load_resource,
|
||||
min_ex,
|
||||
pybin,
|
||||
read_utf8,
|
||||
termsize,
|
||||
wrap,
|
||||
)
|
||||
|
@ -255,8 +256,7 @@ def get_srvname(verbose) -> str:
|
|||
if verbose:
|
||||
lprint("using hostname from {}\n".format(fp))
|
||||
try:
|
||||
with open(fp, "rb") as f:
|
||||
ret = f.read().decode("utf-8", "replace").strip()
|
||||
return read_utf8(None, fp, True).strip()
|
||||
except:
|
||||
ret = ""
|
||||
namelen = 5
|
||||
|
@ -265,47 +265,18 @@ def get_srvname(verbose) -> str:
|
|||
ret = re.sub("[234567=]", "", ret)[:namelen]
|
||||
with open(fp, "wb") as f:
|
||||
f.write(ret.encode("utf-8") + b"\n")
|
||||
|
||||
return ret
|
||||
return ret
|
||||
|
||||
|
||||
def get_fk_salt() -> str:
|
||||
fp = os.path.join(E.cfg, "fk-salt.txt")
|
||||
def get_salt(name: str, nbytes: int) -> str:
|
||||
fp = os.path.join(E.cfg, "%s-salt.txt" % (name,))
|
||||
try:
|
||||
with open(fp, "rb") as f:
|
||||
ret = f.read().strip()
|
||||
return read_utf8(None, fp, True).strip()
|
||||
except:
|
||||
ret = b64enc(os.urandom(18))
|
||||
ret = b64enc(os.urandom(nbytes))
|
||||
with open(fp, "wb") as f:
|
||||
f.write(ret + b"\n")
|
||||
|
||||
return ret.decode("utf-8")
|
||||
|
||||
|
||||
def get_dk_salt() -> str:
|
||||
fp = os.path.join(E.cfg, "dk-salt.txt")
|
||||
try:
|
||||
with open(fp, "rb") as f:
|
||||
ret = f.read().strip()
|
||||
except:
|
||||
ret = b64enc(os.urandom(30))
|
||||
with open(fp, "wb") as f:
|
||||
f.write(ret + b"\n")
|
||||
|
||||
return ret.decode("utf-8")
|
||||
|
||||
|
||||
def get_ah_salt() -> str:
|
||||
fp = os.path.join(E.cfg, "ah-salt.txt")
|
||||
try:
|
||||
with open(fp, "rb") as f:
|
||||
ret = f.read().strip()
|
||||
except:
|
||||
ret = b64enc(os.urandom(18))
|
||||
with open(fp, "wb") as f:
|
||||
f.write(ret + b"\n")
|
||||
|
||||
return ret.decode("utf-8")
|
||||
return ret.decode("utf-8")
|
||||
|
||||
|
||||
def ensure_locale() -> None:
|
||||
|
@ -1552,9 +1523,9 @@ def run_argparse(
|
|||
|
||||
cert_path = os.path.join(E.cfg, "cert.pem")
|
||||
|
||||
fk_salt = get_fk_salt()
|
||||
dk_salt = get_dk_salt()
|
||||
ah_salt = get_ah_salt()
|
||||
fk_salt = get_salt("fk", 18)
|
||||
dk_salt = get_salt("dk", 30)
|
||||
ah_salt = get_salt("ah", 18)
|
||||
|
||||
# alpine peaks at 5 threads for some reason,
|
||||
# all others scale past that (but try to avoid SMT),
|
||||
|
|
|
@ -33,6 +33,7 @@ from .util import (
|
|||
get_df,
|
||||
humansize,
|
||||
odfusion,
|
||||
read_utf8,
|
||||
relchk,
|
||||
statdir,
|
||||
ub64enc,
|
||||
|
@ -2547,8 +2548,8 @@ class AuthSrv(object):
|
|||
if not bos.path.exists(ap):
|
||||
pwdb = {}
|
||||
else:
|
||||
with open(ap, "r", encoding="utf-8") as f:
|
||||
pwdb = json.load(f)
|
||||
jtxt = read_utf8(self.log, ap, True)
|
||||
pwdb = json.loads(jtxt)
|
||||
|
||||
pwdb = [x for x in pwdb if x[0] != uname]
|
||||
pwdb.append((uname, self.defpw[uname], hpw))
|
||||
|
@ -2571,8 +2572,8 @@ class AuthSrv(object):
|
|||
if not self.args.chpw or not bos.path.exists(ap):
|
||||
return
|
||||
|
||||
with open(ap, "r", encoding="utf-8") as f:
|
||||
pwdb = json.load(f)
|
||||
jtxt = read_utf8(self.log, ap, True)
|
||||
pwdb = json.loads(jtxt)
|
||||
|
||||
useen = set()
|
||||
urst = set()
|
||||
|
@ -3068,8 +3069,9 @@ def expand_config_file(
|
|||
ipath += " -> " + fp
|
||||
ret.append("#\033[36m opening cfg file{}\033[0m".format(ipath))
|
||||
|
||||
with open(fp, "rb") as f:
|
||||
for oln in [x.decode("utf-8").rstrip() for x in f]:
|
||||
cfg_lines = read_utf8(log, fp, True).split("\n")
|
||||
if True: # diff-golf
|
||||
for oln in [x.rstrip() for x in cfg_lines]:
|
||||
ln = oln.split(" #")[0].strip()
|
||||
if ln.startswith("% "):
|
||||
pad = " " * len(oln.split("%")[0])
|
||||
|
|
|
@ -87,6 +87,7 @@ from .util import (
|
|||
quotep,
|
||||
rand_name,
|
||||
read_header,
|
||||
read_utf8,
|
||||
read_socket,
|
||||
read_socket_chunked,
|
||||
read_socket_unbounded,
|
||||
|
@ -870,8 +871,7 @@ class HttpCli(object):
|
|||
html = html.replace("%", "", 1)
|
||||
|
||||
if html.startswith("@"):
|
||||
with open(html[1:], "rb") as f:
|
||||
html = f.read().decode("utf-8")
|
||||
html = read_utf8(self.log, html[1:], True)
|
||||
|
||||
if html.startswith("%"):
|
||||
html = html[1:]
|
||||
|
@ -3740,8 +3740,7 @@ class HttpCli(object):
|
|||
continue
|
||||
fn = "%s/%s" % (abspath, fn)
|
||||
if bos.path.isfile(fn):
|
||||
with open(fsenc(fn), "rb") as f:
|
||||
logues[n] = f.read().decode("utf-8")
|
||||
logues[n] = read_utf8(self.log, fsenc(fn), False)
|
||||
if "exp" in vn.flags:
|
||||
logues[n] = self._expand(
|
||||
logues[n], vn.flags.get("exp_lg") or []
|
||||
|
@ -3762,9 +3761,8 @@ class HttpCli(object):
|
|||
for fn in fns:
|
||||
fn = "%s/%s" % (abspath, fn)
|
||||
if bos.path.isfile(fn):
|
||||
with open(fsenc(fn), "rb") as f:
|
||||
txt = f.read().decode("utf-8")
|
||||
break
|
||||
txt = read_utf8(self.log, fsenc(fn), False)
|
||||
break
|
||||
|
||||
if txt and "exp" in vn.flags:
|
||||
txt = self._expand(txt, vn.flags.get("exp_md") or [])
|
||||
|
@ -6254,9 +6252,7 @@ class HttpCli(object):
|
|||
docpath = os.path.join(abspath, doc)
|
||||
sz = bos.path.getsize(docpath)
|
||||
if sz < 1024 * self.args.txt_max:
|
||||
with open(fsenc(docpath), "rb") as f:
|
||||
doctxt = f.read().decode("utf-8", "replace")
|
||||
|
||||
doctxt = read_utf8(self.log, fsenc(docpath), False)
|
||||
if doc.lower().endswith(".md") and "exp" in vn.flags:
|
||||
doctxt = self._expand(doctxt, vn.flags.get("exp_md") or [])
|
||||
else:
|
||||
|
|
|
@ -594,6 +594,38 @@ except Exception as ex:
|
|||
print("using fallback base64 codec due to %r" % (ex,))
|
||||
|
||||
|
||||
class NotUTF8(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def read_utf8(log: Optional["NamedLogger"], ap: Union[str, bytes], strict: bool) -> str:
|
||||
with open(ap, "rb") as f:
|
||||
buf = f.read()
|
||||
|
||||
try:
|
||||
return buf.decode("utf-8", "strict")
|
||||
except UnicodeDecodeError as ex:
|
||||
eo = ex.start
|
||||
eb = buf[eo : eo + 1]
|
||||
|
||||
if not strict:
|
||||
t = "WARNING: The file [%s] is not using the UTF-8 character encoding; some characters in the file will be skipped/ignored. The first unreadable character was byte %r at offset %d. Please convert this file to UTF-8 by opening the file in your text-editor and saving it as UTF-8."
|
||||
t = t % (ap, eb, eo)
|
||||
if log:
|
||||
log(t, 3)
|
||||
else:
|
||||
print(t)
|
||||
return buf.decode("utf-8", "replace")
|
||||
|
||||
t = "ERROR: The file [%s] is not using the UTF-8 character encoding, and cannot be loaded. The first unreadable character was byte %r at offset %d. Please convert this file to UTF-8 by opening the file in your text-editor and saving it as UTF-8."
|
||||
t = t % (ap, eb, eo)
|
||||
if log:
|
||||
log(t, 3)
|
||||
else:
|
||||
print(t)
|
||||
raise NotUTF8(t)
|
||||
|
||||
|
||||
class Daemon(threading.Thread):
|
||||
def __init__(
|
||||
self,
|
||||
|
|
Loading…
Reference in a new issue