support mojibake and py3.2

This commit is contained in:
ed 2019-06-12 16:39:43 +00:00
parent bf95527e92
commit 63e089a5f0
15 changed files with 150 additions and 121 deletions

View file

@ -7,7 +7,7 @@
turn your phone or raspi into a portable file server with resumable uploads/downloads using IE6 or any other browser turn your phone or raspi into a portable file server with resumable uploads/downloads using IE6 or any other browser
* server runs on anything with `py2.7` or `py3.3+` * server runs on anything with `py2.7` or `py3.2+`
* *resumable* uploads need `firefox 12+` / `chrome 6+` / `safari 6+` / `IE 10+` * *resumable* uploads need `firefox 12+` / `chrome 6+` / `safari 6+` / `IE 10+`
* code standard: `black` * code standard: `black`
@ -31,7 +31,7 @@ summary: it works
# dependencies # dependencies
* `jinja2` * `jinja2`
* pulls in `markupsafe` * pulls in `markupsafe` as of v2.7; use jinja 2.6 on py3.2
optional, enables thumbnails: optional, enables thumbnails:
* `Pillow` (requires py2.7 or py3.5+) * `Pillow` (requires py2.7 or py3.5+)

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
import platform import platform
import sys import sys

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
"""copyparty: http file sharing hub (py2/py3)""" """copyparty: http file sharing hub (py2/py3)"""
__author__ = "ed <copyparty@ocv.me>" __author__ = "ed <copyparty@ocv.me>"

View file

@ -1,12 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
import os import os
import threading import threading
from .__init__ import PY2 from .__init__ import PY2
from .util import undot, Pebkac from .util import undot, Pebkac, fsdec, fsenc
class VFS(object): class VFS(object):
@ -90,12 +90,12 @@ class VFS(object):
if rem: if rem:
rp += "/" + rem rp += "/" + rem
return os.path.realpath(rp) return fsdec(os.path.realpath(fsenc(rp)))
def ls(self, rem, uname): def ls(self, rem, uname):
"""return user-readable [fsdir,real,virt] items at vpath""" """return user-readable [fsdir,real,virt] items at vpath"""
abspath = self.canonical(rem) abspath = self.canonical(rem)
real = os.listdir(abspath) real = [fsdec(x) for x in os.listdir(fsenc(abspath))]
real.sort() real.sort()
if rem: if rem:
virt_vis = [] virt_vis = []
@ -182,7 +182,7 @@ class AuthSrv(object):
raise Exception('invalid mountpoint "{}"'.format(vol_dst)) raise Exception('invalid mountpoint "{}"'.format(vol_dst))
# cfg files override arguments and previous files # cfg files override arguments and previous files
vol_src = os.path.abspath(vol_src) vol_src = fsdec(os.path.abspath(fsenc(vol_src)))
vol_dst = vol_dst.strip("/") vol_dst = vol_dst.strip("/")
mount[vol_dst] = vol_src mount[vol_dst] = vol_src
mread[vol_dst] = [] mread[vol_dst] = []
@ -217,7 +217,7 @@ class AuthSrv(object):
# list of src:dst:permset:permset:... # list of src:dst:permset:permset:...
# permset is [rwa]username # permset is [rwa]username
for src, dst, perms in [x.split(":", 2) for x in self.args.v]: for src, dst, perms in [x.split(":", 2) for x in self.args.v]:
src = os.path.abspath(src) src = fsdec(os.path.abspath(fsenc(src)))
dst = dst.strip("/") dst = dst.strip("/")
mount[dst] = src mount[dst] = src
mread[dst] = [] mread[dst] = []

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
import os import os
import stat import stat
@ -14,11 +14,6 @@ from .util import * # noqa # pylint: disable=unused-wildcard-import
if not PY2: if not PY2:
unicode = str unicode = str
from urllib.parse import unquote_plus
from urllib.parse import quote_plus
else:
from urllib import unquote_plus # pylint: disable=no-name-in-module
from urllib import quote_plus
class HttpCli(object): class HttpCli(object):
@ -76,8 +71,6 @@ class HttpCli(object):
if self.uname: if self.uname:
self.rvol = self.auth.vfs.user_tree(self.uname, readable=True) self.rvol = self.auth.vfs.user_tree(self.uname, readable=True)
self.wvol = self.auth.vfs.user_tree(self.uname, writable=True) self.wvol = self.auth.vfs.user_tree(self.uname, writable=True)
self.log(self.rvol)
self.log(self.wvol)
# split req into vpath + uparam # split req into vpath + uparam
uparam = {} uparam = {}
@ -100,7 +93,7 @@ class HttpCli(object):
uparam[k.lower()] = True uparam[k.lower()] = True
self.uparam = uparam self.uparam = uparam
self.vpath = unquote_plus(vpath) self.vpath = unquotep(vpath)
try: try:
if mode == "GET": if mode == "GET":
@ -108,7 +101,7 @@ class HttpCli(object):
elif mode == "POST": elif mode == "POST":
self.handle_post() self.handle_post()
else: else:
self.loud_reply(u'invalid HTTP mode "{0}"'.format(mode)) self.loud_reply('invalid HTTP mode "{0}"'.format(mode))
except Pebkac as ex: except Pebkac as ex:
self.loud_reply(str(ex)) self.loud_reply(str(ex))
@ -119,16 +112,16 @@ class HttpCli(object):
def reply(self, body, status="200 OK", mime="text/html", headers=[]): def reply(self, body, status="200 OK", mime="text/html", headers=[]):
# TODO something to reply with user-supplied values safely # TODO something to reply with user-supplied values safely
response = [ response = [
u"HTTP/1.1 " + status, "HTTP/1.1 " + status,
u"Connection: Keep-Alive", "Connection: Keep-Alive",
u"Content-Type: " + mime, "Content-Type: " + mime,
u"Content-Length: " + str(len(body)), "Content-Length: " + str(len(body)),
] ]
for k, v in self.out_headers.items(): for k, v in self.out_headers.items():
response.append("{}: {}".format(k, v)) response.append("{}: {}".format(k, v))
response.extend(headers) response.extend(headers)
response_str = u"\r\n".join(response).encode("utf-8") response_str = "\r\n".join(response).encode("utf-8")
if self.ok: if self.ok:
self.s.send(response_str + b"\r\n\r\n" + body) self.s.send(response_str + b"\r\n\r\n" + body)
@ -143,7 +136,7 @@ class HttpCli(object):
self.log("GET " + self.req) self.log("GET " + self.req)
# "embedded" resources # "embedded" resources
if self.vpath.startswith(u".cpr"): if self.vpath.startswith(".cpr"):
static_path = os.path.join(E.mod, "web/", self.vpath[5:]) static_path = os.path.join(E.mod, "web/", self.vpath[5:])
if os.path.isfile(static_path): if os.path.isfile(static_path):
@ -193,11 +186,11 @@ class HttpCli(object):
act = self.parser.require("act", 64) act = self.parser.require("act", 64)
if act == u"bput": if act == "bput":
self.handle_plain_upload() self.handle_plain_upload()
return return
if act == u"login": if act == "login":
self.handle_login() self.handle_login()
return return
@ -208,10 +201,10 @@ class HttpCli(object):
self.parser.drop() self.parser.drop()
if pwd in self.auth.iuser: if pwd in self.auth.iuser:
msg = u"login ok" msg = "login ok"
else: else:
msg = u"naw dude" msg = "naw dude"
pwd = u"x" # nosec pwd = "x" # nosec
h = ["Set-Cookie: cppwd={}; Path=/".format(pwd)] h = ["Set-Cookie: cppwd={}; Path=/".format(pwd)]
html = self.conn.tpl_msg.render(h1=msg, h2='<a href="/">ack</a>', redir="/") html = self.conn.tpl_msg.render(h1=msg, h2='<a href="/">ack</a>', redir="/")
@ -235,7 +228,7 @@ class HttpCli(object):
# TODO broker which avoid this race # TODO broker which avoid this race
# and provides a new filename if taken # and provides a new filename if taken
if os.path.exists(fn): if os.path.exists(fsenc(fn)):
fn += ".{:.6f}".format(time.time()) fn += ".{:.6f}".format(time.time())
with open(fn, "wb") as f: with open(fn, "wb") as f:
@ -254,10 +247,10 @@ class HttpCli(object):
if not self.ok: if not self.ok:
status = "ERROR" status = "ERROR"
msg = u"{0} // {1} bytes // {2:.3f} MiB/s\n".format(status, sz_total, spd) msg = "{0} // {1} bytes // {2:.3f} MiB/s\n".format(status, sz_total, spd)
for sz, sha512 in files: for sz, sha512 in files:
msg += u"sha512: {0} // {1} bytes\n".format(sha512[:56], sz) msg += "sha512: {0} // {1} bytes\n".format(sha512[:56], sz)
# truncated SHA-512 prevents length extension attacks; # truncated SHA-512 prevents length extension attacks;
# using SHA-512/224, optionally SHA-512/256 = :64 # using SHA-512/224, optionally SHA-512/256 = :64
@ -276,10 +269,10 @@ class HttpCli(object):
with open(log_fn, "wb") as f: with open(log_fn, "wb") as f:
f.write( f.write(
( (
u"\n".join( "\n".join(
unicode(x) unicode(x)
for x in [ for x in [
u":".join(unicode(x) for x in self.addr), ":".join(unicode(x) for x in self.addr),
msg.rstrip(), msg.rstrip(),
] ]
) )
@ -288,7 +281,7 @@ class HttpCli(object):
) )
def tx_file(self, path): def tx_file(self, path):
sz = os.path.getsize(path) sz = os.path.getsize(fsenc(path))
mime = mimetypes.guess_type(path)[0] mime = mimetypes.guess_type(path)[0]
header = "HTTP/1.1 200 OK\r\nConnection: Keep-Alive\r\nContent-Type: {}\r\nContent-Length: {}\r\n\r\n".format( header = "HTTP/1.1 200 OK\r\nConnection: Keep-Alive\r\nContent-Type: {}\r\nContent-Length: {}\r\n\r\n".format(
mime, sz mime, sz
@ -299,7 +292,7 @@ class HttpCli(object):
if self.ok: if self.ok:
self.s.send(header) self.s.send(header)
with open(path, "rb") as f: with open(fsenc(path), "rb") as f:
while self.ok: while self.ok:
buf = f.read(4096) buf = f.read(4096)
if not buf: if not buf:
@ -321,20 +314,20 @@ class HttpCli(object):
self.loud_reply("TODO jupper {}".format(self.vpath)) self.loud_reply("TODO jupper {}".format(self.vpath))
def tx_browser(self): def tx_browser(self):
vpath = u"" vpath = ""
vpnodes = [[u"/", u"/"]] vpnodes = [["/", "/"]]
for node in self.vpath.split("/"): for node in self.vpath.split("/"):
vpath += u"/" + node vpath += "/" + node
vpnodes.append([quote_plus(vpath, safe="/") + "/", cgi.escape(node)]) vpnodes.append([quotep(vpath) + "/", cgi.escape(node)])
vn, rem = self.auth.vfs.get(self.vpath, self.uname, True, False) vn, rem = self.auth.vfs.get(self.vpath, self.uname, True, False)
abspath = vn.canonical(rem) abspath = vn.canonical(rem)
if not os.path.exists(abspath): if not os.path.exists(fsenc(abspath)):
print(abspath) print(abspath)
raise Pebkac("404 not found") raise Pebkac("404 not found")
if not os.path.isdir(abspath): if not os.path.isdir(fsenc(abspath)):
return self.tx_file(abspath) return self.tx_file(abspath)
fsroot, vfs_ls, vfs_virt = vn.ls(rem, self.uname) fsroot, vfs_ls, vfs_virt = vn.ls(rem, self.uname)
@ -348,7 +341,7 @@ class HttpCli(object):
href = vpath + "/" + fn href = vpath + "/" + fn
fspath = fsroot + "/" + fn fspath = fsroot + "/" + fn
inf = os.stat(fspath) inf = os.stat(fsenc(fspath))
is_dir = stat.S_ISDIR(inf.st_mode) is_dir = stat.S_ISDIR(inf.st_mode)
if is_dir: if is_dir:
@ -361,13 +354,7 @@ class HttpCli(object):
dt = datetime.utcfromtimestamp(inf.st_mtime) dt = datetime.utcfromtimestamp(inf.st_mtime)
dt = dt.strftime("%Y-%m-%d %H:%M:%S") dt = dt.strftime("%Y-%m-%d %H:%M:%S")
item = [ item = [margin, quotep(href), cgi.escape(fn, quote=True), sz, dt]
margin,
quote_plus(href, safe="/"),
cgi.escape(fn, quote=True),
sz,
dt,
]
if is_dir: if is_dir:
dirs.append(item) dirs.append(item)
else: else:
@ -377,4 +364,4 @@ class HttpCli(object):
html = self.conn.tpl_browser.render( html = self.conn.tpl_browser.render(
vpnodes=vpnodes, files=dirs, can_upload=self.writable vpnodes=vpnodes, files=dirs, can_upload=self.writable
) )
self.reply(html.encode("utf-8")) self.reply(html.encode("utf-8", "replace"))

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
import os import os
import jinja2 import jinja2

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
import time import time
import threading import threading

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
import sys import sys
import time import time

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
class MsgSvc(object): class MsgSvc(object):

View file

@ -2,7 +2,10 @@
This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
handler of Python 3. handler of Python 3.
Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc Scissored from the python-future module to avoid 4.4MB of additional dependencies:
https://github.com/PythonCharmers/python-future/blob/e12549c42ed3a38ece45b9d88c75f5f3ee4d658d/src/future/utils/surrogateescape.py
Original source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
""" """
# This code is released under the Python license and the BSD 2-clause license # This code is released under the Python license and the BSD 2-clause license
@ -10,33 +13,33 @@ Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
import codecs import codecs
import sys import sys
from future import utils PY3 = sys.version_info[0] > 2
FS_ERRORS = "surrogateescape"
FS_ERRORS = 'surrogateescape'
# # -- Python 2/3 compatibility -------------------------------------
# FS_ERRORS = 'my_surrogateescape'
def u(text): def u(text):
if utils.PY3: if PY3:
return text return text
else: else:
return text.decode('unicode_escape') return text.decode("unicode_escape")
def b(data): def b(data):
if utils.PY3: if PY3:
return data.encode('latin1') return data.encode("latin1")
else: else:
return data return data
if utils.PY3:
if PY3:
_unichr = chr _unichr = chr
bytes_chr = lambda code: bytes((code,)) bytes_chr = lambda code: bytes((code,))
else: else:
_unichr = unichr _unichr = unichr
bytes_chr = chr bytes_chr = chr
def surrogateescape_handler(exc): def surrogateescape_handler(exc):
""" """
Pure Python implementation of the PEP 383: the "surrogateescape" error Pure Python implementation of the PEP 383: the "surrogateescape" error
@ -44,7 +47,7 @@ def surrogateescape_handler(exc):
character U+DCxx on decoding, and these are translated into the character U+DCxx on decoding, and these are translated into the
original bytes on encoding. original bytes on encoding.
""" """
mystring = exc.object[exc.start:exc.end] mystring = exc.object[exc.start : exc.end]
try: try:
if isinstance(exc, UnicodeDecodeError): if isinstance(exc, UnicodeDecodeError):
@ -75,9 +78,6 @@ def replace_surrogate_encode(mystring):
""" """
decoded = [] decoded = []
for ch in mystring: for ch in mystring:
# if utils.PY3:
# code = ch
# else:
code = ord(ch) code = ord(ch)
# The following magic comes from Py3.3's Python/codecs.c file: # The following magic comes from Py3.3's Python/codecs.c file:
@ -114,16 +114,12 @@ def replace_surrogate_decode(mybytes):
elif code <= 0x7F: elif code <= 0x7F:
decoded.append(_unichr(code)) decoded.append(_unichr(code))
else: else:
# # It may be a bad byte
# # Try swallowing it.
# continue
# print("RAISE!")
raise NotASurrogateError raise NotASurrogateError
return str().join(decoded) return str().join(decoded)
def encodefilename(fn): def encodefilename(fn):
if FS_ENCODING == 'ascii': if FS_ENCODING == "ascii":
# ASCII encoder of Python 2 expects that the error handler returns a # ASCII encoder of Python 2 expects that the error handler returns a
# Unicode string encodable to ASCII, whereas our surrogateescape error # Unicode string encodable to ASCII, whereas our surrogateescape error
# handler has to return bytes in 0x80-0xFF range. # handler has to return bytes in 0x80-0xFF range.
@ -135,12 +131,12 @@ def encodefilename(fn):
elif 0xDC80 <= code <= 0xDCFF: elif 0xDC80 <= code <= 0xDCFF:
ch = bytes_chr(code - 0xDC00) ch = bytes_chr(code - 0xDC00)
else: else:
raise UnicodeEncodeError(FS_ENCODING, raise UnicodeEncodeError(
fn, index, index+1, FS_ENCODING, fn, index, index + 1, "ordinal not in range(128)"
'ordinal not in range(128)') )
encoded.append(ch) encoded.append(ch)
return bytes().join(encoded) return bytes().join(encoded)
elif FS_ENCODING == 'utf-8': elif FS_ENCODING == "utf-8":
# UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
# doesn't go through our error handler # doesn't go through our error handler
encoded = [] encoded = []
@ -152,19 +148,22 @@ def encodefilename(fn):
encoded.append(ch) encoded.append(ch)
else: else:
raise UnicodeEncodeError( raise UnicodeEncodeError(
FS_ENCODING, FS_ENCODING, fn, index, index + 1, "surrogates not allowed"
fn, index, index+1, 'surrogates not allowed') )
else: else:
ch_utf8 = ch.encode('utf-8') ch_utf8 = ch.encode("utf-8")
encoded.append(ch_utf8) encoded.append(ch_utf8)
return bytes().join(encoded) return bytes().join(encoded)
else: else:
return fn.encode(FS_ENCODING, FS_ERRORS) return fn.encode(FS_ENCODING, FS_ERRORS)
def decodefilename(fn): def decodefilename(fn):
return fn.decode(FS_ENCODING, FS_ERRORS) return fn.decode(FS_ENCODING, FS_ERRORS)
FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
FS_ENCODING = sys.getfilesystemencoding()
# FS_ENCODING = "ascii"; fn = b("[abc\xff]"); encoded = u("[abc\udcff]")
# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]') # FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]') # FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
@ -178,21 +177,9 @@ def register_surrogateescape():
""" """
Registers the surrogateescape error handler on Python 2 (only) Registers the surrogateescape error handler on Python 2 (only)
""" """
if utils.PY3: if PY3:
return return
try: try:
codecs.lookup_error(FS_ERRORS) codecs.lookup_error(FS_ERRORS)
except LookupError: except LookupError:
codecs.register_error(FS_ERRORS, surrogateescape_handler) codecs.register_error(FS_ERRORS, surrogateescape_handler)
if __name__ == '__main__':
pass
# # Tests:
# register_surrogateescape()
# b = decodefilename(fn)
# assert b == encoded, "%r != %r" % (b, encoded)
# c = encodefilename(b)
# assert c == fn, '%r != %r' % (c, fn)
# # print("ok")

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
import sys import sys
import time import time
@ -31,8 +31,10 @@ class TcpSrv(object):
try: try:
s.connect(("10.255.255.255", 1)) s.connect(("10.255.255.255", 1))
ip = s.getsockname()[0] ip = s.getsockname()[0]
except OSError: except (OSError, socket.error) as ex:
pass if ex.errno != 101:
raise
s.close() s.close()
self.log("root", "available @ http://{0}:{1}/".format(ip, self.args.p)) self.log("root", "available @ http://{0}:{1}/".format(ip, self.args.p))
@ -41,7 +43,7 @@ class TcpSrv(object):
self.srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
try: try:
self.srv.bind((self.args.i, self.args.p)) self.srv.bind((self.args.i, self.args.p))
except OSError as ex: except (OSError, socket.error) as ex:
if ex.errno != 98: if ex.errno != 98:
raise raise

View file

@ -1,10 +1,25 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
import re import re
import sys
import hashlib import hashlib
from .__init__ import PY2
if not PY2:
from urllib.parse import unquote_to_bytes as unquote
from urllib.parse import quote_from_bytes as quote
else:
from urllib import unquote # pylint: disable=no-name-in-module
from urllib import quote
from .stolen import surrogateescape
surrogateescape.register_surrogateescape()
FS_ENCODING = sys.getfilesystemencoding()
class Unrecv(object): class Unrecv(object):
""" """
@ -103,18 +118,18 @@ class MultipartParser(object):
# this breaks on firefox uploads that contain \" # this breaks on firefox uploads that contain \"
# since firefox escapes " but forgets to escape \ # since firefox escapes " but forgets to escape \
# so it'll truncate after the \ # so it'll truncate after the \
ret = u"" ret = ""
esc = False esc = False
for ch in fn: for ch in fn:
if esc: if esc:
if ch in [u'"', u"\\"]: if ch in ['"', "\\"]:
ret += u'"' ret += '"'
else: else:
ret += esc + ch ret += esc + ch
esc = False esc = False
elif ch == u"\\": elif ch == "\\":
esc = True esc = True
elif ch == u'"': elif ch == '"':
break break
else: else:
ret += ch ret += ch
@ -204,7 +219,7 @@ class MultipartParser(object):
# discard junk before the first boundary # discard junk before the first boundary
for junk in self._read_data(): for junk in self._read_data():
self.log( self.log(
u"discarding preamble: [{}]".format(junk.decode("utf-8", "ignore")) "discarding preamble: [{}]".format(junk.decode("utf-8", "replace"))
) )
# nice, now make it fast # nice, now make it fast
@ -220,7 +235,7 @@ class MultipartParser(object):
if p_field != field_name: if p_field != field_name:
raise Pebkac('expected field "{}", got "{}"'.format(field_name, p_field)) raise Pebkac('expected field "{}", got "{}"'.format(field_name, p_field))
return self._read_value(p_data, max_len).decode("utf-8", "ignore") return self._read_value(p_data, max_len).decode("utf-8", "surrogateescape")
def drop(self): def drop(self):
"""discards the remaining multipart body""" """discards the remaining multipart body"""
@ -261,29 +276,64 @@ def read_header(sr):
ret += buf ret += buf
return ret[:-4].decode("utf-8", "replace").split("\r\n") return ret[:-4].decode("utf-8", "surrogateescape").split("\r\n")
def undot(path): def undot(path):
ret = [] ret = []
for node in path.split(u"/"): for node in path.split("/"):
if node in [u"", u"."]: if node in ["", "."]:
continue continue
if node == u"..": if node == "..":
if ret: if ret:
ret.pop() ret.pop()
continue continue
ret.append(node) ret.append(node)
return u"/".join(ret) return "/".join(ret)
def sanitize_fn(fn): def sanitize_fn(fn):
return fn.replace("\\", "/").split("/")[-1].strip() return fn.replace("\\", "/").split("/")[-1].strip()
def quotep(txt):
"""url quoter which deals with bytes correctly"""
btxt = fsenc(txt)
quot1 = quote(btxt, safe=b"/")
if not PY2:
quot1 = quot1.encode('ascii')
quot2 = quot1.replace(b" ", b"+")
return fsdec(quot2)
def unquotep(txt):
"""url unquoter which deals with bytes correctly"""
btxt = fsenc(txt)
unq1 = btxt.replace(b"+", b" ")
unq2 = unquote(unq1)
return fsdec(unq2)
def fsdec(txt):
"""decodes filesystem-bytes to wtf8"""
if PY2:
return surrogateescape.decodefilename(txt)
return txt.decode(FS_ENCODING, "surrogateescape")
def fsenc(txt):
"""encodes wtf8 to filesystem-bytes"""
if PY2:
return surrogateescape.encodefilename(txt)
return txt.encode(FS_ENCODING, "surrogateescape")
def hashcopy(actor, fin, fout): def hashcopy(actor, fin, fout):
u32_lim = int((2 ** 31) * 0.9) u32_lim = int((2 ** 31) * 0.9)
hashobj = hashlib.sha512() hashobj = hashlib.sha512()
@ -302,10 +352,10 @@ def hashcopy(actor, fin, fout):
def unescape_cookie(orig): def unescape_cookie(orig):
# mw=idk; doot=qwe%2Crty%3Basd+fgh%2Bjkl%25zxc%26vbn # qwe,rty;asd fgh+jkl%zxc&vbn # mw=idk; doot=qwe%2Crty%3Basd+fgh%2Bjkl%25zxc%26vbn # qwe,rty;asd fgh+jkl%zxc&vbn
ret = u"" ret = ""
esc = u"" esc = ""
for ch in orig: for ch in orig:
if ch == u"%": if ch == "%":
if len(esc) > 0: if len(esc) > 0:
ret += esc ret += esc
esc = ch esc = ch
@ -317,7 +367,7 @@ def unescape_cookie(orig):
ret += chr(int(esc[1:], 16)) ret += chr(int(esc[1:], 16))
except: except:
ret += esc ret += esc
esc = u"" esc = ""
else: else:
ret += ch ret += ch

View file

@ -38,7 +38,9 @@ avg() { awk 'function pr(ncsz) {if (nsmp>0) {printf "%3s %s\n", csz, sum/nsmp} c
## ##
## bad filenames ## bad filenames
echo hi > 'qwe,rty;asd fgh+jkl%zxc&vbn <qwe>"rty'"'"'uio&asd&nbsp;fgh'.html dirs=("$HOME/vfs/ほげ" "$HOME/vfs/ほげ/ぴよ" "$HOME/vfs/$(printf \\xed\\x91)" "$HOME/vfs/$(printf \\xed\\x91/\\xed\\x92)")
mkdir -p "${dirs[@]}"
for dir in "${dirs[@]}"; do for fn in ふが "$(printf \\xed\\x93)" 'qwe,rty;asd fgh+jkl%zxc&vbn <qwe>"rty'"'"'uio&asd&nbsp;fgh'; do echo "$dir" > "$dir/$fn.html"; done; done
## ##

View file

@ -168,6 +168,7 @@ args = {
"Programming Language :: Python :: 2", "Programming Language :: Python :: 2",
"Programming Language :: Python :: 2.7", "Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.5",

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
from __future__ import print_function from __future__ import print_function, unicode_literals
import os import os
import json import json