mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
improve errmsg when reading non-utf8 files (#143)
previously, the native python-error was printed when reading the contents of a textfile using the wrong character encoding while technically correct, it could be confusing for end-users add a helper to produce a more helpful errormessage when someone (for example) tries to load a latin-1 config file
This commit is contained in:
parent
12fcb42201
commit
25974d660d
|
@ -65,6 +65,7 @@ from .util import (
|
||||||
load_resource,
|
load_resource,
|
||||||
min_ex,
|
min_ex,
|
||||||
pybin,
|
pybin,
|
||||||
|
read_utf8,
|
||||||
termsize,
|
termsize,
|
||||||
wrap,
|
wrap,
|
||||||
)
|
)
|
||||||
|
@ -255,8 +256,7 @@ def get_srvname(verbose) -> str:
|
||||||
if verbose:
|
if verbose:
|
||||||
lprint("using hostname from {}\n".format(fp))
|
lprint("using hostname from {}\n".format(fp))
|
||||||
try:
|
try:
|
||||||
with open(fp, "rb") as f:
|
return read_utf8(None, fp, True).strip()
|
||||||
ret = f.read().decode("utf-8", "replace").strip()
|
|
||||||
except:
|
except:
|
||||||
ret = ""
|
ret = ""
|
||||||
namelen = 5
|
namelen = 5
|
||||||
|
@ -265,47 +265,18 @@ def get_srvname(verbose) -> str:
|
||||||
ret = re.sub("[234567=]", "", ret)[:namelen]
|
ret = re.sub("[234567=]", "", ret)[:namelen]
|
||||||
with open(fp, "wb") as f:
|
with open(fp, "wb") as f:
|
||||||
f.write(ret.encode("utf-8") + b"\n")
|
f.write(ret.encode("utf-8") + b"\n")
|
||||||
|
return ret
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
def get_fk_salt() -> str:
|
def get_salt(name: str, nbytes: int) -> str:
|
||||||
fp = os.path.join(E.cfg, "fk-salt.txt")
|
fp = os.path.join(E.cfg, "%s-salt.txt" % (name,))
|
||||||
try:
|
try:
|
||||||
with open(fp, "rb") as f:
|
return read_utf8(None, fp, True).strip()
|
||||||
ret = f.read().strip()
|
|
||||||
except:
|
except:
|
||||||
ret = b64enc(os.urandom(18))
|
ret = b64enc(os.urandom(nbytes))
|
||||||
with open(fp, "wb") as f:
|
with open(fp, "wb") as f:
|
||||||
f.write(ret + b"\n")
|
f.write(ret + b"\n")
|
||||||
|
return ret.decode("utf-8")
|
||||||
return ret.decode("utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
def get_dk_salt() -> str:
|
|
||||||
fp = os.path.join(E.cfg, "dk-salt.txt")
|
|
||||||
try:
|
|
||||||
with open(fp, "rb") as f:
|
|
||||||
ret = f.read().strip()
|
|
||||||
except:
|
|
||||||
ret = b64enc(os.urandom(30))
|
|
||||||
with open(fp, "wb") as f:
|
|
||||||
f.write(ret + b"\n")
|
|
||||||
|
|
||||||
return ret.decode("utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
def get_ah_salt() -> str:
|
|
||||||
fp = os.path.join(E.cfg, "ah-salt.txt")
|
|
||||||
try:
|
|
||||||
with open(fp, "rb") as f:
|
|
||||||
ret = f.read().strip()
|
|
||||||
except:
|
|
||||||
ret = b64enc(os.urandom(18))
|
|
||||||
with open(fp, "wb") as f:
|
|
||||||
f.write(ret + b"\n")
|
|
||||||
|
|
||||||
return ret.decode("utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_locale() -> None:
|
def ensure_locale() -> None:
|
||||||
|
@ -1552,9 +1523,9 @@ def run_argparse(
|
||||||
|
|
||||||
cert_path = os.path.join(E.cfg, "cert.pem")
|
cert_path = os.path.join(E.cfg, "cert.pem")
|
||||||
|
|
||||||
fk_salt = get_fk_salt()
|
fk_salt = get_salt("fk", 18)
|
||||||
dk_salt = get_dk_salt()
|
dk_salt = get_salt("dk", 30)
|
||||||
ah_salt = get_ah_salt()
|
ah_salt = get_salt("ah", 18)
|
||||||
|
|
||||||
# alpine peaks at 5 threads for some reason,
|
# alpine peaks at 5 threads for some reason,
|
||||||
# all others scale past that (but try to avoid SMT),
|
# all others scale past that (but try to avoid SMT),
|
||||||
|
|
|
@ -33,6 +33,7 @@ from .util import (
|
||||||
get_df,
|
get_df,
|
||||||
humansize,
|
humansize,
|
||||||
odfusion,
|
odfusion,
|
||||||
|
read_utf8,
|
||||||
relchk,
|
relchk,
|
||||||
statdir,
|
statdir,
|
||||||
ub64enc,
|
ub64enc,
|
||||||
|
@ -2547,8 +2548,8 @@ class AuthSrv(object):
|
||||||
if not bos.path.exists(ap):
|
if not bos.path.exists(ap):
|
||||||
pwdb = {}
|
pwdb = {}
|
||||||
else:
|
else:
|
||||||
with open(ap, "r", encoding="utf-8") as f:
|
jtxt = read_utf8(self.log, ap, True)
|
||||||
pwdb = json.load(f)
|
pwdb = json.loads(jtxt)
|
||||||
|
|
||||||
pwdb = [x for x in pwdb if x[0] != uname]
|
pwdb = [x for x in pwdb if x[0] != uname]
|
||||||
pwdb.append((uname, self.defpw[uname], hpw))
|
pwdb.append((uname, self.defpw[uname], hpw))
|
||||||
|
@ -2571,8 +2572,8 @@ class AuthSrv(object):
|
||||||
if not self.args.chpw or not bos.path.exists(ap):
|
if not self.args.chpw or not bos.path.exists(ap):
|
||||||
return
|
return
|
||||||
|
|
||||||
with open(ap, "r", encoding="utf-8") as f:
|
jtxt = read_utf8(self.log, ap, True)
|
||||||
pwdb = json.load(f)
|
pwdb = json.loads(jtxt)
|
||||||
|
|
||||||
useen = set()
|
useen = set()
|
||||||
urst = set()
|
urst = set()
|
||||||
|
@ -3068,8 +3069,9 @@ def expand_config_file(
|
||||||
ipath += " -> " + fp
|
ipath += " -> " + fp
|
||||||
ret.append("#\033[36m opening cfg file{}\033[0m".format(ipath))
|
ret.append("#\033[36m opening cfg file{}\033[0m".format(ipath))
|
||||||
|
|
||||||
with open(fp, "rb") as f:
|
cfg_lines = read_utf8(log, fp, True).split("\n")
|
||||||
for oln in [x.decode("utf-8").rstrip() for x in f]:
|
if True: # diff-golf
|
||||||
|
for oln in [x.rstrip() for x in cfg_lines]:
|
||||||
ln = oln.split(" #")[0].strip()
|
ln = oln.split(" #")[0].strip()
|
||||||
if ln.startswith("% "):
|
if ln.startswith("% "):
|
||||||
pad = " " * len(oln.split("%")[0])
|
pad = " " * len(oln.split("%")[0])
|
||||||
|
|
|
@ -87,6 +87,7 @@ from .util import (
|
||||||
quotep,
|
quotep,
|
||||||
rand_name,
|
rand_name,
|
||||||
read_header,
|
read_header,
|
||||||
|
read_utf8,
|
||||||
read_socket,
|
read_socket,
|
||||||
read_socket_chunked,
|
read_socket_chunked,
|
||||||
read_socket_unbounded,
|
read_socket_unbounded,
|
||||||
|
@ -870,8 +871,7 @@ class HttpCli(object):
|
||||||
html = html.replace("%", "", 1)
|
html = html.replace("%", "", 1)
|
||||||
|
|
||||||
if html.startswith("@"):
|
if html.startswith("@"):
|
||||||
with open(html[1:], "rb") as f:
|
html = read_utf8(self.log, html[1:], True)
|
||||||
html = f.read().decode("utf-8")
|
|
||||||
|
|
||||||
if html.startswith("%"):
|
if html.startswith("%"):
|
||||||
html = html[1:]
|
html = html[1:]
|
||||||
|
@ -3740,8 +3740,7 @@ class HttpCli(object):
|
||||||
continue
|
continue
|
||||||
fn = "%s/%s" % (abspath, fn)
|
fn = "%s/%s" % (abspath, fn)
|
||||||
if bos.path.isfile(fn):
|
if bos.path.isfile(fn):
|
||||||
with open(fsenc(fn), "rb") as f:
|
logues[n] = read_utf8(self.log, fsenc(fn), False)
|
||||||
logues[n] = f.read().decode("utf-8")
|
|
||||||
if "exp" in vn.flags:
|
if "exp" in vn.flags:
|
||||||
logues[n] = self._expand(
|
logues[n] = self._expand(
|
||||||
logues[n], vn.flags.get("exp_lg") or []
|
logues[n], vn.flags.get("exp_lg") or []
|
||||||
|
@ -3762,9 +3761,8 @@ class HttpCli(object):
|
||||||
for fn in fns:
|
for fn in fns:
|
||||||
fn = "%s/%s" % (abspath, fn)
|
fn = "%s/%s" % (abspath, fn)
|
||||||
if bos.path.isfile(fn):
|
if bos.path.isfile(fn):
|
||||||
with open(fsenc(fn), "rb") as f:
|
txt = read_utf8(self.log, fsenc(fn), False)
|
||||||
txt = f.read().decode("utf-8")
|
break
|
||||||
break
|
|
||||||
|
|
||||||
if txt and "exp" in vn.flags:
|
if txt and "exp" in vn.flags:
|
||||||
txt = self._expand(txt, vn.flags.get("exp_md") or [])
|
txt = self._expand(txt, vn.flags.get("exp_md") or [])
|
||||||
|
@ -6254,9 +6252,7 @@ class HttpCli(object):
|
||||||
docpath = os.path.join(abspath, doc)
|
docpath = os.path.join(abspath, doc)
|
||||||
sz = bos.path.getsize(docpath)
|
sz = bos.path.getsize(docpath)
|
||||||
if sz < 1024 * self.args.txt_max:
|
if sz < 1024 * self.args.txt_max:
|
||||||
with open(fsenc(docpath), "rb") as f:
|
doctxt = read_utf8(self.log, fsenc(docpath), False)
|
||||||
doctxt = f.read().decode("utf-8", "replace")
|
|
||||||
|
|
||||||
if doc.lower().endswith(".md") and "exp" in vn.flags:
|
if doc.lower().endswith(".md") and "exp" in vn.flags:
|
||||||
doctxt = self._expand(doctxt, vn.flags.get("exp_md") or [])
|
doctxt = self._expand(doctxt, vn.flags.get("exp_md") or [])
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -594,6 +594,38 @@ except Exception as ex:
|
||||||
print("using fallback base64 codec due to %r" % (ex,))
|
print("using fallback base64 codec due to %r" % (ex,))
|
||||||
|
|
||||||
|
|
||||||
|
class NotUTF8(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def read_utf8(log: Optional["NamedLogger"], ap: Union[str, bytes], strict: bool) -> str:
|
||||||
|
with open(ap, "rb") as f:
|
||||||
|
buf = f.read()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return buf.decode("utf-8", "strict")
|
||||||
|
except UnicodeDecodeError as ex:
|
||||||
|
eo = ex.start
|
||||||
|
eb = buf[eo : eo + 1]
|
||||||
|
|
||||||
|
if not strict:
|
||||||
|
t = "WARNING: The file [%s] is not using the UTF-8 character encoding; some characters in the file will be skipped/ignored. The first unreadable character was byte %r at offset %d. Please convert this file to UTF-8 by opening the file in your text-editor and saving it as UTF-8."
|
||||||
|
t = t % (ap, eb, eo)
|
||||||
|
if log:
|
||||||
|
log(t, 3)
|
||||||
|
else:
|
||||||
|
print(t)
|
||||||
|
return buf.decode("utf-8", "replace")
|
||||||
|
|
||||||
|
t = "ERROR: The file [%s] is not using the UTF-8 character encoding, and cannot be loaded. The first unreadable character was byte %r at offset %d. Please convert this file to UTF-8 by opening the file in your text-editor and saving it as UTF-8."
|
||||||
|
t = t % (ap, eb, eo)
|
||||||
|
if log:
|
||||||
|
log(t, 3)
|
||||||
|
else:
|
||||||
|
print(t)
|
||||||
|
raise NotUTF8(t)
|
||||||
|
|
||||||
|
|
||||||
class Daemon(threading.Thread):
|
class Daemon(threading.Thread):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|
Loading…
Reference in a new issue