From 63e089a5f0f6969a9dde36d72d06798afa451e24 Mon Sep 17 00:00:00 2001 From: ed Date: Wed, 12 Jun 2019 16:39:43 +0000 Subject: [PATCH] support mojibake and py3.2 --- README.md | 4 +- copyparty/__init__.py | 2 +- copyparty/__main__.py | 2 +- copyparty/authsrv.py | 12 ++--- copyparty/httpcli.py | 73 +++++++++++-------------- copyparty/httpconn.py | 2 +- copyparty/httpsrv.py | 2 +- copyparty/mpsrv.py | 2 +- copyparty/msgsvc.py | 2 +- copyparty/stolen/surrogateescape.py | 69 ++++++++++-------------- copyparty/tcpsrv.py | 10 ++-- copyparty/util.py | 84 +++++++++++++++++++++++------ docs/notes.sh | 4 +- setup.py | 1 + tests/test_vfs.py | 2 +- 15 files changed, 150 insertions(+), 121 deletions(-) diff --git a/README.md b/README.md index aa7c2956..f19ffc56 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ turn your phone or raspi into a portable file server with resumable uploads/downloads using IE6 or any other browser -* server runs on anything with `py2.7` or `py3.3+` +* server runs on anything with `py2.7` or `py3.2+` * *resumable* uploads need `firefox 12+` / `chrome 6+` / `safari 6+` / `IE 10+` * code standard: `black` @@ -31,7 +31,7 @@ summary: it works # dependencies * `jinja2` - * pulls in `markupsafe` + * pulls in `markupsafe` as of v2.7; use jinja 2.6 on py3.2 optional, enables thumbnails: * `Pillow` (requires py2.7 or py3.5+) diff --git a/copyparty/__init__.py b/copyparty/__init__.py index 1d427577..046fc070 100644 --- a/copyparty/__init__.py +++ b/copyparty/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals import platform import sys diff --git a/copyparty/__main__.py b/copyparty/__main__.py index 6bfa045a..2d1c63ad 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals """copyparty: http file sharing hub (py2/py3)""" __author__ = "ed " diff --git a/copyparty/authsrv.py b/copyparty/authsrv.py index 0885f998..9b4b7b34 100644 --- a/copyparty/authsrv.py +++ b/copyparty/authsrv.py @@ -1,12 +1,12 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals import os import threading from .__init__ import PY2 -from .util import undot, Pebkac +from .util import undot, Pebkac, fsdec, fsenc class VFS(object): @@ -90,12 +90,12 @@ class VFS(object): if rem: rp += "/" + rem - return os.path.realpath(rp) + return fsdec(os.path.realpath(fsenc(rp))) def ls(self, rem, uname): """return user-readable [fsdir,real,virt] items at vpath""" abspath = self.canonical(rem) - real = os.listdir(abspath) + real = [fsdec(x) for x in os.listdir(fsenc(abspath))] real.sort() if rem: virt_vis = [] @@ -182,7 +182,7 @@ class AuthSrv(object): raise Exception('invalid mountpoint "{}"'.format(vol_dst)) # cfg files override arguments and previous files - vol_src = os.path.abspath(vol_src) + vol_src = fsdec(os.path.abspath(fsenc(vol_src))) vol_dst = vol_dst.strip("/") mount[vol_dst] = vol_src mread[vol_dst] = [] @@ -217,7 +217,7 @@ class AuthSrv(object): # list of src:dst:permset:permset:... # permset is [rwa]username for src, dst, perms in [x.split(":", 2) for x in self.args.v]: - src = os.path.abspath(src) + src = fsdec(os.path.abspath(fsenc(src))) dst = dst.strip("/") mount[dst] = src mread[dst] = [] diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index b460d519..0fe53e2c 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals import os import stat @@ -14,11 +14,6 @@ from .util import * # noqa # pylint: disable=unused-wildcard-import if not PY2: unicode = str - from urllib.parse import unquote_plus - from urllib.parse import quote_plus -else: - from urllib import unquote_plus # pylint: disable=no-name-in-module - from urllib import quote_plus class HttpCli(object): @@ -76,8 +71,6 @@ class HttpCli(object): if self.uname: self.rvol = self.auth.vfs.user_tree(self.uname, readable=True) self.wvol = self.auth.vfs.user_tree(self.uname, writable=True) - self.log(self.rvol) - self.log(self.wvol) # split req into vpath + uparam uparam = {} @@ -100,7 +93,7 @@ class HttpCli(object): uparam[k.lower()] = True self.uparam = uparam - self.vpath = unquote_plus(vpath) + self.vpath = unquotep(vpath) try: if mode == "GET": @@ -108,7 +101,7 @@ class HttpCli(object): elif mode == "POST": self.handle_post() else: - self.loud_reply(u'invalid HTTP mode "{0}"'.format(mode)) + self.loud_reply('invalid HTTP mode "{0}"'.format(mode)) except Pebkac as ex: self.loud_reply(str(ex)) @@ -119,16 +112,16 @@ class HttpCli(object): def reply(self, body, status="200 OK", mime="text/html", headers=[]): # TODO something to reply with user-supplied values safely response = [ - u"HTTP/1.1 " + status, - u"Connection: Keep-Alive", - u"Content-Type: " + mime, - u"Content-Length: " + str(len(body)), + "HTTP/1.1 " + status, + "Connection: Keep-Alive", + "Content-Type: " + mime, + "Content-Length: " + str(len(body)), ] for k, v in self.out_headers.items(): response.append("{}: {}".format(k, v)) response.extend(headers) - response_str = u"\r\n".join(response).encode("utf-8") + response_str = "\r\n".join(response).encode("utf-8") if self.ok: self.s.send(response_str + b"\r\n\r\n" + body) @@ -143,7 +136,7 @@ class HttpCli(object): self.log("GET " + self.req) # "embedded" resources - if self.vpath.startswith(u".cpr"): + if self.vpath.startswith(".cpr"): static_path = os.path.join(E.mod, "web/", self.vpath[5:]) if os.path.isfile(static_path): @@ -193,11 +186,11 @@ class HttpCli(object): act = self.parser.require("act", 64) - if act == u"bput": + if act == "bput": self.handle_plain_upload() return - if act == u"login": + if act == "login": self.handle_login() return @@ -208,10 +201,10 @@ class HttpCli(object): self.parser.drop() if pwd in self.auth.iuser: - msg = u"login ok" + msg = "login ok" else: - msg = u"naw dude" - pwd = u"x" # nosec + msg = "naw dude" + pwd = "x" # nosec h = ["Set-Cookie: cppwd={}; Path=/".format(pwd)] html = self.conn.tpl_msg.render(h1=msg, h2='ack', redir="/") @@ -235,7 +228,7 @@ class HttpCli(object): # TODO broker which avoid this race # and provides a new filename if taken - if os.path.exists(fn): + if os.path.exists(fsenc(fn)): fn += ".{:.6f}".format(time.time()) with open(fn, "wb") as f: @@ -254,10 +247,10 @@ class HttpCli(object): if not self.ok: status = "ERROR" - msg = u"{0} // {1} bytes // {2:.3f} MiB/s\n".format(status, sz_total, spd) + msg = "{0} // {1} bytes // {2:.3f} MiB/s\n".format(status, sz_total, spd) for sz, sha512 in files: - msg += u"sha512: {0} // {1} bytes\n".format(sha512[:56], sz) + msg += "sha512: {0} // {1} bytes\n".format(sha512[:56], sz) # truncated SHA-512 prevents length extension attacks; # using SHA-512/224, optionally SHA-512/256 = :64 @@ -276,10 +269,10 @@ class HttpCli(object): with open(log_fn, "wb") as f: f.write( ( - u"\n".join( + "\n".join( unicode(x) for x in [ - u":".join(unicode(x) for x in self.addr), + ":".join(unicode(x) for x in self.addr), msg.rstrip(), ] ) @@ -288,7 +281,7 @@ class HttpCli(object): ) def tx_file(self, path): - sz = os.path.getsize(path) + sz = os.path.getsize(fsenc(path)) mime = mimetypes.guess_type(path)[0] header = "HTTP/1.1 200 OK\r\nConnection: Keep-Alive\r\nContent-Type: {}\r\nContent-Length: {}\r\n\r\n".format( mime, sz @@ -299,7 +292,7 @@ class HttpCli(object): if self.ok: self.s.send(header) - with open(path, "rb") as f: + with open(fsenc(path), "rb") as f: while self.ok: buf = f.read(4096) if not buf: @@ -321,20 +314,20 @@ class HttpCli(object): self.loud_reply("TODO jupper {}".format(self.vpath)) def tx_browser(self): - vpath = u"" - vpnodes = [[u"/", u"/"]] + vpath = "" + vpnodes = [["/", "/"]] for node in self.vpath.split("/"): - vpath += u"/" + node - vpnodes.append([quote_plus(vpath, safe="/") + "/", cgi.escape(node)]) + vpath += "/" + node + vpnodes.append([quotep(vpath) + "/", cgi.escape(node)]) vn, rem = self.auth.vfs.get(self.vpath, self.uname, True, False) abspath = vn.canonical(rem) - if not os.path.exists(abspath): + if not os.path.exists(fsenc(abspath)): print(abspath) raise Pebkac("404 not found") - if not os.path.isdir(abspath): + if not os.path.isdir(fsenc(abspath)): return self.tx_file(abspath) fsroot, vfs_ls, vfs_virt = vn.ls(rem, self.uname) @@ -348,7 +341,7 @@ class HttpCli(object): href = vpath + "/" + fn fspath = fsroot + "/" + fn - inf = os.stat(fspath) + inf = os.stat(fsenc(fspath)) is_dir = stat.S_ISDIR(inf.st_mode) if is_dir: @@ -361,13 +354,7 @@ class HttpCli(object): dt = datetime.utcfromtimestamp(inf.st_mtime) dt = dt.strftime("%Y-%m-%d %H:%M:%S") - item = [ - margin, - quote_plus(href, safe="/"), - cgi.escape(fn, quote=True), - sz, - dt, - ] + item = [margin, quotep(href), cgi.escape(fn, quote=True), sz, dt] if is_dir: dirs.append(item) else: @@ -377,4 +364,4 @@ class HttpCli(object): html = self.conn.tpl_browser.render( vpnodes=vpnodes, files=dirs, can_upload=self.writable ) - self.reply(html.encode("utf-8")) + self.reply(html.encode("utf-8", "replace")) diff --git a/copyparty/httpconn.py b/copyparty/httpconn.py index ab87db70..27744c72 100644 --- a/copyparty/httpconn.py +++ b/copyparty/httpconn.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals import os import jinja2 diff --git a/copyparty/httpsrv.py b/copyparty/httpsrv.py index 7aa58472..4058f12c 100644 --- a/copyparty/httpsrv.py +++ b/copyparty/httpsrv.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals import time import threading diff --git a/copyparty/mpsrv.py b/copyparty/mpsrv.py index 5c3d7c46..84c04fa5 100644 --- a/copyparty/mpsrv.py +++ b/copyparty/mpsrv.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals import sys import time diff --git a/copyparty/msgsvc.py b/copyparty/msgsvc.py index eab543d1..d18554f1 100644 --- a/copyparty/msgsvc.py +++ b/copyparty/msgsvc.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals class MsgSvc(object): diff --git a/copyparty/stolen/surrogateescape.py b/copyparty/stolen/surrogateescape.py index 0dcc9fa6..d8830dfc 100644 --- a/copyparty/stolen/surrogateescape.py +++ b/copyparty/stolen/surrogateescape.py @@ -2,7 +2,10 @@ This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error handler of Python 3. -Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc +Scissored from the python-future module to avoid 4.4MB of additional dependencies: +https://github.com/PythonCharmers/python-future/blob/e12549c42ed3a38ece45b9d88c75f5f3ee4d658d/src/future/utils/surrogateescape.py + +Original source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc """ # This code is released under the Python license and the BSD 2-clause license @@ -10,33 +13,33 @@ Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc import codecs import sys -from future import utils +PY3 = sys.version_info[0] > 2 +FS_ERRORS = "surrogateescape" -FS_ERRORS = 'surrogateescape' - -# # -- Python 2/3 compatibility ------------------------------------- -# FS_ERRORS = 'my_surrogateescape' def u(text): - if utils.PY3: + if PY3: return text else: - return text.decode('unicode_escape') + return text.decode("unicode_escape") + def b(data): - if utils.PY3: - return data.encode('latin1') + if PY3: + return data.encode("latin1") else: return data -if utils.PY3: + +if PY3: _unichr = chr bytes_chr = lambda code: bytes((code,)) else: _unichr = unichr bytes_chr = chr + def surrogateescape_handler(exc): """ Pure Python implementation of the PEP 383: the "surrogateescape" error @@ -44,7 +47,7 @@ def surrogateescape_handler(exc): character U+DCxx on decoding, and these are translated into the original bytes on encoding. """ - mystring = exc.object[exc.start:exc.end] + mystring = exc.object[exc.start : exc.end] try: if isinstance(exc, UnicodeDecodeError): @@ -75,9 +78,6 @@ def replace_surrogate_encode(mystring): """ decoded = [] for ch in mystring: - # if utils.PY3: - # code = ch - # else: code = ord(ch) # The following magic comes from Py3.3's Python/codecs.c file: @@ -114,16 +114,12 @@ def replace_surrogate_decode(mybytes): elif code <= 0x7F: decoded.append(_unichr(code)) else: - # # It may be a bad byte - # # Try swallowing it. - # continue - # print("RAISE!") raise NotASurrogateError return str().join(decoded) def encodefilename(fn): - if FS_ENCODING == 'ascii': + if FS_ENCODING == "ascii": # ASCII encoder of Python 2 expects that the error handler returns a # Unicode string encodable to ASCII, whereas our surrogateescape error # handler has to return bytes in 0x80-0xFF range. @@ -135,12 +131,12 @@ def encodefilename(fn): elif 0xDC80 <= code <= 0xDCFF: ch = bytes_chr(code - 0xDC00) else: - raise UnicodeEncodeError(FS_ENCODING, - fn, index, index+1, - 'ordinal not in range(128)') + raise UnicodeEncodeError( + FS_ENCODING, fn, index, index + 1, "ordinal not in range(128)" + ) encoded.append(ch) return bytes().join(encoded) - elif FS_ENCODING == 'utf-8': + elif FS_ENCODING == "utf-8": # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF # doesn't go through our error handler encoded = [] @@ -152,19 +148,22 @@ def encodefilename(fn): encoded.append(ch) else: raise UnicodeEncodeError( - FS_ENCODING, - fn, index, index+1, 'surrogates not allowed') + FS_ENCODING, fn, index, index + 1, "surrogates not allowed" + ) else: - ch_utf8 = ch.encode('utf-8') + ch_utf8 = ch.encode("utf-8") encoded.append(ch_utf8) return bytes().join(encoded) else: return fn.encode(FS_ENCODING, FS_ERRORS) + def decodefilename(fn): return fn.decode(FS_ENCODING, FS_ERRORS) -FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]') + +FS_ENCODING = sys.getfilesystemencoding() +# FS_ENCODING = "ascii"; fn = b("[abc\xff]"); encoded = u("[abc\udcff]") # FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]') # FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]') @@ -178,21 +177,9 @@ def register_surrogateescape(): """ Registers the surrogateescape error handler on Python 2 (only) """ - if utils.PY3: + if PY3: return try: codecs.lookup_error(FS_ERRORS) except LookupError: codecs.register_error(FS_ERRORS, surrogateescape_handler) - - -if __name__ == '__main__': - pass - # # Tests: - # register_surrogateescape() - - # b = decodefilename(fn) - # assert b == encoded, "%r != %r" % (b, encoded) - # c = encodefilename(b) - # assert c == fn, '%r != %r' % (c, fn) - # # print("ok") diff --git a/copyparty/tcpsrv.py b/copyparty/tcpsrv.py index 430c7a74..5de0e88a 100644 --- a/copyparty/tcpsrv.py +++ b/copyparty/tcpsrv.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals import sys import time @@ -31,8 +31,10 @@ class TcpSrv(object): try: s.connect(("10.255.255.255", 1)) ip = s.getsockname()[0] - except OSError: - pass + except (OSError, socket.error) as ex: + if ex.errno != 101: + raise + s.close() self.log("root", "available @ http://{0}:{1}/".format(ip, self.args.p)) @@ -41,7 +43,7 @@ class TcpSrv(object): self.srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) try: self.srv.bind((self.args.i, self.args.p)) - except OSError as ex: + except (OSError, socket.error) as ex: if ex.errno != 98: raise diff --git a/copyparty/util.py b/copyparty/util.py index dcd31dfb..8be58230 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -1,10 +1,25 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals import re +import sys import hashlib +from .__init__ import PY2 + +if not PY2: + from urllib.parse import unquote_to_bytes as unquote + from urllib.parse import quote_from_bytes as quote +else: + from urllib import unquote # pylint: disable=no-name-in-module + from urllib import quote + +from .stolen import surrogateescape + +surrogateescape.register_surrogateescape() +FS_ENCODING = sys.getfilesystemencoding() + class Unrecv(object): """ @@ -103,18 +118,18 @@ class MultipartParser(object): # this breaks on firefox uploads that contain \" # since firefox escapes " but forgets to escape \ # so it'll truncate after the \ - ret = u"" + ret = "" esc = False for ch in fn: if esc: - if ch in [u'"', u"\\"]: - ret += u'"' + if ch in ['"', "\\"]: + ret += '"' else: ret += esc + ch esc = False - elif ch == u"\\": + elif ch == "\\": esc = True - elif ch == u'"': + elif ch == '"': break else: ret += ch @@ -204,7 +219,7 @@ class MultipartParser(object): # discard junk before the first boundary for junk in self._read_data(): self.log( - u"discarding preamble: [{}]".format(junk.decode("utf-8", "ignore")) + "discarding preamble: [{}]".format(junk.decode("utf-8", "replace")) ) # nice, now make it fast @@ -220,7 +235,7 @@ class MultipartParser(object): if p_field != field_name: raise Pebkac('expected field "{}", got "{}"'.format(field_name, p_field)) - return self._read_value(p_data, max_len).decode("utf-8", "ignore") + return self._read_value(p_data, max_len).decode("utf-8", "surrogateescape") def drop(self): """discards the remaining multipart body""" @@ -261,29 +276,64 @@ def read_header(sr): ret += buf - return ret[:-4].decode("utf-8", "replace").split("\r\n") + return ret[:-4].decode("utf-8", "surrogateescape").split("\r\n") def undot(path): ret = [] - for node in path.split(u"/"): - if node in [u"", u"."]: + for node in path.split("/"): + if node in ["", "."]: continue - if node == u"..": + if node == "..": if ret: ret.pop() continue ret.append(node) - return u"/".join(ret) + return "/".join(ret) def sanitize_fn(fn): return fn.replace("\\", "/").split("/")[-1].strip() +def quotep(txt): + """url quoter which deals with bytes correctly""" + btxt = fsenc(txt) + quot1 = quote(btxt, safe=b"/") + if not PY2: + quot1 = quot1.encode('ascii') + + quot2 = quot1.replace(b" ", b"+") + return fsdec(quot2) + + +def unquotep(txt): + """url unquoter which deals with bytes correctly""" + btxt = fsenc(txt) + unq1 = btxt.replace(b"+", b" ") + unq2 = unquote(unq1) + return fsdec(unq2) + + +def fsdec(txt): + """decodes filesystem-bytes to wtf8""" + if PY2: + return surrogateescape.decodefilename(txt) + + return txt.decode(FS_ENCODING, "surrogateescape") + + +def fsenc(txt): + """encodes wtf8 to filesystem-bytes""" + if PY2: + return surrogateescape.encodefilename(txt) + + return txt.encode(FS_ENCODING, "surrogateescape") + + def hashcopy(actor, fin, fout): u32_lim = int((2 ** 31) * 0.9) hashobj = hashlib.sha512() @@ -302,10 +352,10 @@ def hashcopy(actor, fin, fout): def unescape_cookie(orig): # mw=idk; doot=qwe%2Crty%3Basd+fgh%2Bjkl%25zxc%26vbn # qwe,rty;asd fgh+jkl%zxc&vbn - ret = u"" - esc = u"" + ret = "" + esc = "" for ch in orig: - if ch == u"%": + if ch == "%": if len(esc) > 0: ret += esc esc = ch @@ -317,7 +367,7 @@ def unescape_cookie(orig): ret += chr(int(esc[1:], 16)) except: ret += esc - esc = u"" + esc = "" else: ret += ch diff --git a/docs/notes.sh b/docs/notes.sh index 94e03b89..fe721497 100644 --- a/docs/notes.sh +++ b/docs/notes.sh @@ -38,7 +38,9 @@ avg() { awk 'function pr(ncsz) {if (nsmp>0) {printf "%3s %s\n", csz, sum/nsmp} c ## ## bad filenames -echo hi > 'qwe,rty;asd fgh+jkl%zxc&vbn "rty'"'"'uio&asd fgh'.html +dirs=("$HOME/vfs/ほげ" "$HOME/vfs/ほげ/ぴよ" "$HOME/vfs/$(printf \\xed\\x91)" "$HOME/vfs/$(printf \\xed\\x91/\\xed\\x92)") +mkdir -p "${dirs[@]}" +for dir in "${dirs[@]}"; do for fn in ふが "$(printf \\xed\\x93)" 'qwe,rty;asd fgh+jkl%zxc&vbn "rty'"'"'uio&asd fgh'; do echo "$dir" > "$dir/$fn.html"; done; done ## diff --git a/setup.py b/setup.py index 3d701a73..875ea3f0 100755 --- a/setup.py +++ b/setup.py @@ -168,6 +168,7 @@ args = { "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", diff --git a/tests/test_vfs.py b/tests/test_vfs.py index 18baad21..188d4aa1 100644 --- a/tests/test_vfs.py +++ b/tests/test_vfs.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import print_function +from __future__ import print_function, unicode_literals import os import json