support mojibake and py3.2

2025-10-10 10:32:19 -06:00 · 2019-06-12 16:39:43 +00:00 · 2019-06-12 16:39:43 +00:00 · 63e089a5f0
parent bf95527e92
commit 63e089a5f0
15 changed files with 150 additions and 121 deletions
--- a/README.md
+++ b/README.md
@ -7,7 +7,7 @@
 turn your phone or raspi into a portable file server with resumable uploads/downloads using IE6 or any other browser
-* server runs on anything with `py2.7` or `py3.3+`
+* server runs on anything with `py2.7` or `py3.2+`
 * *resumable* uploads need `firefox 12+` / `chrome 6+` / `safari 6+` / `IE 10+`
 * code standard: `black`
@ -31,7 +31,7 @@ summary: it works
 # dependencies
 * `jinja2`
-  * pulls in `markupsafe`
+  * pulls in `markupsafe` as of v2.7; use jinja 2.6 on py3.2
 optional, enables thumbnails:
 * `Pillow` (requires py2.7 or py3.5+)
--- a/copyparty/init.py
+++ b/copyparty/init.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 import platform
 import sys
--- a/copyparty/main.py
+++ b/copyparty/main.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 """copyparty: http file sharing hub (py2/py3)"""
 __author__ = "ed <copyparty@ocv.me>"
--- a/copyparty/authsrv.py
+++ b/copyparty/authsrv.py
@ -1,12 +1,12 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 import os
 import threading
 from .__init__ import PY2
-from .util import undot, Pebkac
+from .util import undot, Pebkac, fsdec, fsenc
 class VFS(object):
@ -90,12 +90,12 @@ class VFS(object):
        if rem:
            rp += "/" + rem
-        return os.path.realpath(rp)
+        return fsdec(os.path.realpath(fsenc(rp)))
    def ls(self, rem, uname):
        """return user-readable [fsdir,real,virt] items at vpath"""
        abspath = self.canonical(rem)
-        real = os.listdir(abspath)
+        real = [fsdec(x) for x in os.listdir(fsenc(abspath))]
        real.sort()
        if rem:
            virt_vis = []
@ -182,7 +182,7 @@ class AuthSrv(object):
                    raise Exception('invalid mountpoint "{}"'.format(vol_dst))
                # cfg files override arguments and previous files
-                vol_src = os.path.abspath(vol_src)
+                vol_src = fsdec(os.path.abspath(fsenc(vol_src)))
                vol_dst = vol_dst.strip("/")
                mount[vol_dst] = vol_src
                mread[vol_dst] = []
@ -217,7 +217,7 @@ class AuthSrv(object):
            # list of src:dst:permset:permset:...
            # permset is [rwa]username
            for src, dst, perms in [x.split(":", 2) for x in self.args.v]:
-                src = os.path.abspath(src)
+                src = fsdec(os.path.abspath(fsenc(src)))
                dst = dst.strip("/")
                mount[dst] = src
                mread[dst] = []
--- a/copyparty/httpcli.py
+++ b/copyparty/httpcli.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 import os
 import stat
@ -14,11 +14,6 @@ from .util import *  # noqa  # pylint: disable=unused-wildcard-import
 if not PY2:
    unicode = str
    from urllib.parse import unquote_plus
    from urllib.parse import quote_plus
 else:
    from urllib import unquote_plus  # pylint: disable=no-name-in-module
    from urllib import quote_plus
 class HttpCli(object):
@ -76,8 +71,6 @@ class HttpCli(object):
        if self.uname:
            self.rvol = self.auth.vfs.user_tree(self.uname, readable=True)
            self.wvol = self.auth.vfs.user_tree(self.uname, writable=True)
            self.log(self.rvol)
            self.log(self.wvol)
        # split req into vpath + uparam
        uparam = {}
@ -100,7 +93,7 @@ class HttpCli(object):
                    uparam[k.lower()] = True
        self.uparam = uparam
-        self.vpath = unquote_plus(vpath)
+        self.vpath = unquotep(vpath)
        try:
            if mode == "GET":
@ -108,7 +101,7 @@ class HttpCli(object):
            elif mode == "POST":
                self.handle_post()
            else:
-                self.loud_reply(u'invalid HTTP mode "{0}"'.format(mode))
+                self.loud_reply('invalid HTTP mode "{0}"'.format(mode))
        except Pebkac as ex:
            self.loud_reply(str(ex))
@ -119,16 +112,16 @@ class HttpCli(object):
    def reply(self, body, status="200 OK", mime="text/html", headers=[]):
        # TODO something to reply with user-supplied values safely
        response = [
-            u"HTTP/1.1 " + status,
+            "HTTP/1.1 " + status,
-            u"Connection: Keep-Alive",
+            "Connection: Keep-Alive",
-            u"Content-Type: " + mime,
+            "Content-Type: " + mime,
-            u"Content-Length: " + str(len(body)),
+            "Content-Length: " + str(len(body)),
        ]
        for k, v in self.out_headers.items():
            response.append("{}: {}".format(k, v))
        response.extend(headers)
-        response_str = u"\r\n".join(response).encode("utf-8")
+        response_str = "\r\n".join(response).encode("utf-8")
        if self.ok:
            self.s.send(response_str + b"\r\n\r\n" + body)
@ -143,7 +136,7 @@ class HttpCli(object):
        self.log("GET  " + self.req)
        # "embedded" resources
-        if self.vpath.startswith(u".cpr"):
+        if self.vpath.startswith(".cpr"):
            static_path = os.path.join(E.mod, "web/", self.vpath[5:])
            if os.path.isfile(static_path):
@ -193,11 +186,11 @@ class HttpCli(object):
        act = self.parser.require("act", 64)
-        if act == u"bput":
+        if act == "bput":
            self.handle_plain_upload()
            return
-        if act == u"login":
+        if act == "login":
            self.handle_login()
            return
@ -208,10 +201,10 @@ class HttpCli(object):
        self.parser.drop()
        if pwd in self.auth.iuser:
-            msg = u"login ok"
+            msg = "login ok"
        else:
-            msg = u"naw dude"
+            msg = "naw dude"
-            pwd = u"x"  # nosec
+            pwd = "x"  # nosec
        h = ["Set-Cookie: cppwd={}; Path=/".format(pwd)]
        html = self.conn.tpl_msg.render(h1=msg, h2='<a href="/">ack</a>', redir="/")
@ -235,7 +228,7 @@ class HttpCli(object):
                # TODO broker which avoid this race
                # and provides a new filename if taken
-                if os.path.exists(fn):
+                if os.path.exists(fsenc(fn)):
                    fn += ".{:.6f}".format(time.time())
            with open(fn, "wb") as f:
@ -254,10 +247,10 @@ class HttpCli(object):
        if not self.ok:
            status = "ERROR"
-        msg = u"{0} // {1} bytes // {2:.3f} MiB/s\n".format(status, sz_total, spd)
+        msg = "{0} // {1} bytes // {2:.3f} MiB/s\n".format(status, sz_total, spd)
        for sz, sha512 in files:
-            msg += u"sha512: {0} // {1} bytes\n".format(sha512[:56], sz)
+            msg += "sha512: {0} // {1} bytes\n".format(sha512[:56], sz)
            # truncated SHA-512 prevents length extension attacks;
            # using SHA-512/224, optionally SHA-512/256 = :64
@ -276,10 +269,10 @@ class HttpCli(object):
            with open(log_fn, "wb") as f:
                f.write(
                    (
-                        u"\n".join(
+                        "\n".join(
                            unicode(x)
                            for x in [
-                                u":".join(unicode(x) for x in self.addr),
+                                ":".join(unicode(x) for x in self.addr),
                                msg.rstrip(),
                            ]
                        )
@ -288,7 +281,7 @@ class HttpCli(object):
                )
    def tx_file(self, path):
-        sz = os.path.getsize(path)
+        sz = os.path.getsize(fsenc(path))
        mime = mimetypes.guess_type(path)[0]
        header = "HTTP/1.1 200 OK\r\nConnection: Keep-Alive\r\nContent-Type: {}\r\nContent-Length: {}\r\n\r\n".format(
            mime, sz
@ -299,7 +292,7 @@ class HttpCli(object):
        if self.ok:
            self.s.send(header)
-        with open(path, "rb") as f:
+        with open(fsenc(path), "rb") as f:
            while self.ok:
                buf = f.read(4096)
                if not buf:
@ -321,20 +314,20 @@ class HttpCli(object):
        self.loud_reply("TODO jupper {}".format(self.vpath))
    def tx_browser(self):
-        vpath = u""
+        vpath = ""
-        vpnodes = [[u"/", u"/"]]
+        vpnodes = [["/", "/"]]
        for node in self.vpath.split("/"):
-            vpath += u"/" + node
+            vpath += "/" + node
-            vpnodes.append([quote_plus(vpath, safe="/") + "/", cgi.escape(node)])
+            vpnodes.append([quotep(vpath) + "/", cgi.escape(node)])
        vn, rem = self.auth.vfs.get(self.vpath, self.uname, True, False)
        abspath = vn.canonical(rem)
-        if not os.path.exists(abspath):
+        if not os.path.exists(fsenc(abspath)):
            print(abspath)
            raise Pebkac("404 not found")
-        if not os.path.isdir(abspath):
+        if not os.path.isdir(fsenc(abspath)):
            return self.tx_file(abspath)
        fsroot, vfs_ls, vfs_virt = vn.ls(rem, self.uname)
@ -348,7 +341,7 @@ class HttpCli(object):
                href = vpath + "/" + fn
            fspath = fsroot + "/" + fn
-            inf = os.stat(fspath)
+            inf = os.stat(fsenc(fspath))
            is_dir = stat.S_ISDIR(inf.st_mode)
            if is_dir:
@ -361,13 +354,7 @@ class HttpCli(object):
            dt = datetime.utcfromtimestamp(inf.st_mtime)
            dt = dt.strftime("%Y-%m-%d %H:%M:%S")
-            item = [
+            item = [margin, quotep(href), cgi.escape(fn, quote=True), sz, dt]
                margin,
                quote_plus(href, safe="/"),
                cgi.escape(fn, quote=True),
                sz,
                dt,
            ]
            if is_dir:
                dirs.append(item)
            else:
@ -377,4 +364,4 @@ class HttpCli(object):
        html = self.conn.tpl_browser.render(
            vpnodes=vpnodes, files=dirs, can_upload=self.writable
        )
-        self.reply(html.encode("utf-8"))
+        self.reply(html.encode("utf-8", "replace"))
--- a/copyparty/httpconn.py
+++ b/copyparty/httpconn.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 import os
 import jinja2
--- a/copyparty/httpsrv.py
+++ b/copyparty/httpsrv.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 import time
 import threading
--- a/copyparty/mpsrv.py
+++ b/copyparty/mpsrv.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 import sys
 import time
--- a/copyparty/msgsvc.py
+++ b/copyparty/msgsvc.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 class MsgSvc(object):
--- a/copyparty/stolen/surrogateescape.py
+++ b/copyparty/stolen/surrogateescape.py
@ -2,7 +2,10 @@
 This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
 handler of Python 3.
-Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
+Scissored from the python-future module to avoid 4.4MB of additional dependencies:
 https://github.com/PythonCharmers/python-future/blob/e12549c42ed3a38ece45b9d88c75f5f3ee4d658d/src/future/utils/surrogateescape.py
 Original source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
 """
 # This code is released under the Python license and the BSD 2-clause license
@ -10,33 +13,33 @@ Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
 import codecs
 import sys
-from future import utils
+PY3 = sys.version_info[0] > 2
 FS_ERRORS = "surrogateescape"
 FS_ERRORS = 'surrogateescape'
 #     # -- Python 2/3 compatibility -------------------------------------
 #     FS_ERRORS = 'my_surrogateescape'
 def u(text):
-    if utils.PY3:
+    if PY3:
        return text
    else:
-        return text.decode('unicode_escape')
+        return text.decode("unicode_escape")
 def b(data):
-    if utils.PY3:
+    if PY3:
-        return data.encode('latin1')
+        return data.encode("latin1")
    else:
        return data
-if utils.PY3:
+
 if PY3:
    _unichr = chr
    bytes_chr = lambda code: bytes((code,))
 else:
    _unichr = unichr
    bytes_chr = chr
 def surrogateescape_handler(exc):
    """
    Pure Python implementation of the PEP 383: the "surrogateescape" error
@ -44,7 +47,7 @@ def surrogateescape_handler(exc):
    character U+DCxx on decoding, and these are translated into the
    original bytes on encoding.
    """
-    mystring = exc.object[exc.start:exc.end]
+    mystring = exc.object[exc.start : exc.end]
    try:
        if isinstance(exc, UnicodeDecodeError):
@ -75,9 +78,6 @@ def replace_surrogate_encode(mystring):
    """
    decoded = []
    for ch in mystring:
        # if utils.PY3:
        #     code = ch
        # else:
        code = ord(ch)
        # The following magic comes from Py3.3's Python/codecs.c file:
@ -114,16 +114,12 @@ def replace_surrogate_decode(mybytes):
        elif code <= 0x7F:
            decoded.append(_unichr(code))
        else:
            # # It may be a bad byte
            # # Try swallowing it.
            # continue
            # print("RAISE!")
            raise NotASurrogateError
    return str().join(decoded)
 def encodefilename(fn):
-    if FS_ENCODING == 'ascii':
+    if FS_ENCODING == "ascii":
        # ASCII encoder of Python 2 expects that the error handler returns a
        # Unicode string encodable to ASCII, whereas our surrogateescape error
        # handler has to return bytes in 0x80-0xFF range.
@ -135,12 +131,12 @@ def encodefilename(fn):
            elif 0xDC80 <= code <= 0xDCFF:
                ch = bytes_chr(code - 0xDC00)
            else:
-                raise UnicodeEncodeError(FS_ENCODING,
+                raise UnicodeEncodeError(
-                    fn, index, index+1,
+                    FS_ENCODING, fn, index, index + 1, "ordinal not in range(128)"
-                    'ordinal not in range(128)')
+                )
            encoded.append(ch)
        return bytes().join(encoded)
-    elif FS_ENCODING == 'utf-8':
+    elif FS_ENCODING == "utf-8":
        # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
        # doesn't go through our error handler
        encoded = []
@ -152,19 +148,22 @@ def encodefilename(fn):
                    encoded.append(ch)
                else:
                    raise UnicodeEncodeError(
-                        FS_ENCODING,
+                        FS_ENCODING, fn, index, index + 1, "surrogates not allowed"
-                        fn, index, index+1, 'surrogates not allowed')
+                    )
            else:
-                ch_utf8 = ch.encode('utf-8')
+                ch_utf8 = ch.encode("utf-8")
                encoded.append(ch_utf8)
        return bytes().join(encoded)
    else:
        return fn.encode(FS_ENCODING, FS_ERRORS)
 def decodefilename(fn):
    return fn.decode(FS_ENCODING, FS_ERRORS)
-FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
+
 FS_ENCODING = sys.getfilesystemencoding()
 # FS_ENCODING = "ascii"; fn = b("[abc\xff]"); encoded = u("[abc\udcff]")
 # FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
 # FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
@ -178,21 +177,9 @@ def register_surrogateescape():
    """
    Registers the surrogateescape error handler on Python 2 (only)
    """
-    if utils.PY3:
+    if PY3:
        return
    try:
        codecs.lookup_error(FS_ERRORS)
    except LookupError:
        codecs.register_error(FS_ERRORS, surrogateescape_handler)
 if __name__ == '__main__':
    pass
    # # Tests:
    # register_surrogateescape()
    # b = decodefilename(fn)
    # assert b == encoded, "%r != %r" % (b, encoded)
    # c = encodefilename(b)
    # assert c == fn, '%r != %r' % (c, fn)
    # # print("ok")
--- a/copyparty/tcpsrv.py
+++ b/copyparty/tcpsrv.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 import sys
 import time
@ -31,8 +31,10 @@ class TcpSrv(object):
            try:
                s.connect(("10.255.255.255", 1))
                ip = s.getsockname()[0]
-            except OSError:
+            except (OSError, socket.error) as ex:
-                pass
+                if ex.errno != 101:
                    raise
            s.close()
        self.log("root", "available @ http://{0}:{1}/".format(ip, self.args.p))
@ -41,7 +43,7 @@ class TcpSrv(object):
        self.srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        try:
            self.srv.bind((self.args.i, self.args.p))
-        except OSError as ex:
+        except (OSError, socket.error) as ex:
            if ex.errno != 98:
                raise
--- a/copyparty/util.py
+++ b/copyparty/util.py
@ -1,10 +1,25 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 import re
 import sys
 import hashlib
 from .__init__ import PY2
 if not PY2:
    from urllib.parse import unquote_to_bytes as unquote
    from urllib.parse import quote_from_bytes as quote
 else:
    from urllib import unquote  # pylint: disable=no-name-in-module
    from urllib import quote
 from .stolen import surrogateescape
 surrogateescape.register_surrogateescape()
 FS_ENCODING = sys.getfilesystemencoding()
 class Unrecv(object):
    """
@ -103,18 +118,18 @@ class MultipartParser(object):
            # this breaks on firefox uploads that contain \"
            # since firefox escapes " but forgets to escape \
            # so it'll truncate after the \
-            ret = u""
+            ret = ""
            esc = False
            for ch in fn:
                if esc:
-                    if ch in [u'"', u"\\"]:
+                    if ch in ['"', "\\"]:
-                        ret += u'"'
+                        ret += '"'
                    else:
                        ret += esc + ch
                    esc = False
-                elif ch == u"\\":
+                elif ch == "\\":
                    esc = True
-                elif ch == u'"':
+                elif ch == '"':
                    break
                else:
                    ret += ch
@ -204,7 +219,7 @@ class MultipartParser(object):
        # discard junk before the first boundary
        for junk in self._read_data():
            self.log(
-                u"discarding preamble: [{}]".format(junk.decode("utf-8", "ignore"))
+                "discarding preamble: [{}]".format(junk.decode("utf-8", "replace"))
            )
        # nice, now make it fast
@ -220,7 +235,7 @@ class MultipartParser(object):
        if p_field != field_name:
            raise Pebkac('expected field "{}", got "{}"'.format(field_name, p_field))
-        return self._read_value(p_data, max_len).decode("utf-8", "ignore")
+        return self._read_value(p_data, max_len).decode("utf-8", "surrogateescape")
    def drop(self):
        """discards the remaining multipart body"""
@ -261,29 +276,64 @@ def read_header(sr):
        ret += buf
-    return ret[:-4].decode("utf-8", "replace").split("\r\n")
+    return ret[:-4].decode("utf-8", "surrogateescape").split("\r\n")
 def undot(path):
    ret = []
-    for node in path.split(u"/"):
+    for node in path.split("/"):
-        if node in [u"", u"."]:
+        if node in ["", "."]:
            continue
-        if node == u"..":
+        if node == "..":
            if ret:
                ret.pop()
            continue
        ret.append(node)
-    return u"/".join(ret)
+    return "/".join(ret)
 def sanitize_fn(fn):
    return fn.replace("\\", "/").split("/")[-1].strip()
 def quotep(txt):
    """url quoter which deals with bytes correctly"""
    btxt = fsenc(txt)
    quot1 = quote(btxt, safe=b"/")
    if not PY2:
        quot1 = quot1.encode('ascii')
    quot2 = quot1.replace(b" ", b"+")
    return fsdec(quot2)
 def unquotep(txt):
    """url unquoter which deals with bytes correctly"""
    btxt = fsenc(txt)
    unq1 = btxt.replace(b"+", b" ")
    unq2 = unquote(unq1)
    return fsdec(unq2)
 def fsdec(txt):
    """decodes filesystem-bytes to wtf8"""
    if PY2:
        return surrogateescape.decodefilename(txt)
    return txt.decode(FS_ENCODING, "surrogateescape")
 def fsenc(txt):
    """encodes wtf8 to filesystem-bytes"""
    if PY2:
        return surrogateescape.encodefilename(txt)
    return txt.encode(FS_ENCODING, "surrogateescape")
 def hashcopy(actor, fin, fout):
    u32_lim = int((2 ** 31) * 0.9)
    hashobj = hashlib.sha512()
@ -302,10 +352,10 @@ def hashcopy(actor, fin, fout):
 def unescape_cookie(orig):
    # mw=idk; doot=qwe%2Crty%3Basd+fgh%2Bjkl%25zxc%26vbn  # qwe,rty;asd fgh+jkl%zxc&vbn
-    ret = u""
+    ret = ""
-    esc = u""
+    esc = ""
    for ch in orig:
-        if ch == u"%":
+        if ch == "%":
            if len(esc) > 0:
                ret += esc
            esc = ch
@ -317,7 +367,7 @@ def unescape_cookie(orig):
                    ret += chr(int(esc[1:], 16))
                except:
                    ret += esc
-                    esc = u""
+                    esc = ""
        else:
            ret += ch
--- a/docs/notes.sh
+++ b/docs/notes.sh
@ -38,7 +38,9 @@ avg() { awk 'function pr(ncsz) {if (nsmp>0) {printf "%3s %s\n", csz, sum/nsmp} c
 ##
 ## bad filenames
-echo hi > 'qwe,rty;asd fgh+jkl%zxc&vbn <qwe>"rty'"'"'uio&asd&nbsp;fgh'.html
+dirs=("$HOME/vfs/ほげ" "$HOME/vfs/ほげ/ぴよ" "$HOME/vfs/$(printf \\xed\\x91)" "$HOME/vfs/$(printf \\xed\\x91/\\xed\\x92)")
 mkdir -p "${dirs[@]}"
 for dir in "${dirs[@]}"; do for fn in ふが "$(printf \\xed\\x93)" 'qwe,rty;asd fgh+jkl%zxc&vbn <qwe>"rty'"'"'uio&asd&nbsp;fgh'; do echo "$dir" > "$dir/$fn.html"; done; done
 ##
--- a/setup.py
+++ b/setup.py
@ -168,6 +168,7 @@ args = {
        "Programming Language :: Python :: 2",
        "Programming Language :: Python :: 2.7",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.2",
        "Programming Language :: Python :: 3.3",
        "Programming Language :: Python :: 3.4",
        "Programming Language :: Python :: 3.5",
--- a/tests/test_vfs.py
+++ b/tests/test_vfs.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 import os
 import json