diff --git a/bin/copyparty-fuse.py b/bin/copyparty-fuse.py index a9657465..38ec030f 100755 --- a/bin/copyparty-fuse.py +++ b/bin/copyparty-fuse.py @@ -32,6 +32,7 @@ import time import stat import errno import struct +import codecs import builtins import platform import argparse @@ -41,7 +42,7 @@ import http.client # py2: httplib import urllib.parse from datetime import datetime from urllib.parse import quote_from_bytes as quote - +from urllib.parse import unquote_to_bytes as unquote WINDOWS = sys.platform == "win32" MACOS = platform.system() == "Darwin" @@ -106,6 +107,47 @@ def null_log(msg): pass +def hexler(binary): + return binary.replace("\r", "\\r").replace("\n", "\\n") + return " ".join(["{}\033[36m{:02x}\033[0m".format(b, ord(b)) for b in binary]) + return " ".join(map(lambda b: format(ord(b), "02x"), binary)) + + +def register_wtf8(): + def wtf8_enc(text): + return str(text).encode("utf-8", "surrogateescape"), len(text) + + def wtf8_dec(binary): + return bytes(binary).decode("utf-8", "surrogateescape"), len(binary) + + def wtf8_search(encoding_name): + return codecs.CodecInfo(wtf8_enc, wtf8_dec, name="wtf-8") + + codecs.register(wtf8_search) + + +bad_good = {} +good_bad = {} + + +def enwin(txt): + return "".join([bad_good.get(x, x) for x in txt]) + + for bad, good in bad_good.items(): + txt = txt.replace(bad, good) + + return txt + + +def dewin(txt): + return "".join([good_bad.get(x, x) for x in txt]) + + for bad, good in bad_good.items(): + txt = txt.replace(good, bad) + + return txt + + class RecentLog(object): def __init__(self): self.mtx = threading.Lock() @@ -169,6 +211,8 @@ def html_dec(txt): txt.replace("<", "<") .replace(">", ">") .replace(""", '"') + .replace(" ", "\r") + .replace(" ", "\n") .replace("&", "&") ) @@ -213,8 +257,7 @@ class Gateway(object): self.conns = {} def quotep(self, path): - # TODO: mojibake support - path = path.encode("utf-8", "ignore") + path = path.encode("wtf-8") return quote(path, safe="/") def getconn(self, tid=None): @@ -273,6 +316,9 @@ class Gateway(object): raise def listdir(self, path): + if bad_good: + path = dewin(path) + web_path = self.quotep("/" + "/".join([self.web_root, path])) + "?dots" r = self.sendreq("GET", web_path) if r.status != 200: @@ -295,11 +341,14 @@ class Gateway(object): raise def download_file_range(self, path, ofs1, ofs2): + if bad_good: + path = dewin(path) + web_path = self.quotep("/" + "/".join([self.web_root, path])) + "?raw" hdr_range = "bytes={}-{}".format(ofs1, ofs2 - 1) info( "DL {:4.0f}K\033[36m{:>9}-{:<9}\033[0m{}".format( - (ofs2 - ofs1) / 1024.0, ofs1, ofs2 - 1, path + (ofs2 - ofs1) / 1024.0, ofs1, ofs2 - 1, hexler(path) ) ) @@ -318,7 +367,7 @@ class Gateway(object): ret = [] remainder = b"" ptn = re.compile( - r"^(-|DIR)]+>([^<]+)([^<]+)([^<]+)$" + r'^(-|DIR)]* href="([^"]+)"[^>]*>([^<]+)([^<]+)([^<]+)$' ) while True: @@ -340,8 +389,13 @@ class Gateway(object): # print(line) continue - ftype, fname, fsize, fdate = m.groups() - fname = html_dec(fname) + ftype, furl, fname, fsize, fdate = m.groups() + fname = furl.rstrip("/").split("/")[-1] + fname = unquote(fname) + fname = fname.decode("wtf-8") + if bad_good: + fname = enwin(fname) + sz = 1 ts = 60 * 60 * 24 * 2 try: @@ -405,7 +459,11 @@ class CPPF(Operations): cache_path, cache1 = cn.tag cache2 = cache1 + len(cn.data) msg += "\n{:<2} {:>7} {:>10}:{:<9} {}".format( - n, len(cn.data), cache1, cache2, cache_path + n, + len(cn.data), + cache1, + cache2, + cache_path.replace("\r", "\\r").replace("\n", "\\n"), ) return msg @@ -636,7 +694,7 @@ class CPPF(Operations): def _readdir(self, path, fh=None): path = path.strip("/") - log("readdir [{}] [{}]".format(path, fh)) + log("readdir [{}] [{}]".format(hexler(path), fh)) ret = self.gw.listdir(path) if not self.n_dircache: @@ -663,7 +721,11 @@ class CPPF(Operations): path = path.strip("/") ofs2 = offset + length file_sz = self.getattr(path)["st_size"] - log("read {} |{}| {}:{} max {}".format(path, length, offset, ofs2, file_sz)) + log( + "read {} |{}| {}:{} max {}".format( + hexler(path), length, offset, ofs2, file_sz + ) + ) if ofs2 > file_sz: ofs2 = file_sz log("truncate to |{}| :{}".format(ofs2 - offset, ofs2)) @@ -702,7 +764,9 @@ class CPPF(Operations): return ret def getattr(self, path, fh=None): - log("getattr [{}]".format(path)) + log("getattr [{}]".format(hexler(path))) + if WINDOWS: + path = enwin(path) # windows occasionally decodes f0xx to xx path = path.strip("/") try: @@ -725,11 +789,20 @@ class CPPF(Operations): dents = self._readdir(dirpath) for cache_name, cache_stat, _ in dents: + # if "qw" in cache_name and "qw" in fname: + # info( + # "cmp\n [{}]\n [{}]\n\n{}\n".format( + # hexler(cache_name), + # hexler(fname), + # "\n".join(traceback.format_stack()[:-1]), + # ) + # ) + if cache_name == fname: # dbg("=" + repr(cache_stat)) return cache_stat - info("=ENOENT ({})".format(path)) + info("=ENOENT ({})".format(hexler(path))) raise FuseOSError(errno.ENOENT) access = None @@ -799,24 +872,24 @@ class CPPF(Operations): raise FuseOSError(errno.ENOENT) def open(self, path, flags): - dbg("open [{}] [{}]".format(path, flags)) + dbg("open [{}] [{}]".format(hexler(path), flags)) return self._open(path) def opendir(self, path): - dbg("opendir [{}]".format(path)) + dbg("opendir [{}]".format(hexler(path))) return self._open(path) def flush(self, path, fh): - dbg("flush [{}] [{}]".format(path, fh)) + dbg("flush [{}] [{}]".format(hexler(path), fh)) def release(self, ino, fi): - dbg("release [{}] [{}]".format(ino, fi)) + dbg("release [{}] [{}]".format(hexler(ino), fi)) def releasedir(self, ino, fi): - dbg("releasedir [{}] [{}]".format(ino, fi)) + dbg("releasedir [{}] [{}]".format(hexler(ino), fi)) def access(self, path, mode): - dbg("access [{}] [{}]".format(path, mode)) + dbg("access [{}] [{}]".format(hexler(path), mode)) try: x = self.getattr(path) if x["st_mode"] <= 0: @@ -838,7 +911,7 @@ def main(): # linux generally does 128k so the cache is a slowdown, # windows likes to use 4k and 64k so cache is required, # value is numChunks (1~3M each) to keep in the cache - nf = 24 if WINDOWS or MACOS else 0 + nf = 24 # dircache is always a boost, # only want to disable it for tests etc, @@ -889,6 +962,20 @@ def main(): if WINDOWS: os.system("") + for ch in '<>:"\\|?*': + # microsoft maps illegal characters to f0xx + # (e000 to f8ff is basic-plane private-use) + bad_good[ch] = chr(ord(ch) + 0xF000) + + for n in range(0, 0x100): + # map surrogateescape to another private-use area + bad_good[chr(n + 0xDC00)] = chr(n + 0xF100) + + for k, v in bad_good.items(): + good_bad[v] = k + + register_wtf8() + try: with open("/etc/fuse.conf", "rb") as f: allow_other = b"\nuser_allow_other" in f.read() @@ -899,7 +986,7 @@ def main(): if not MACOS: args["nonempty"] = True - FUSE(CPPF(ar), ar.local_path, **args) + FUSE(CPPF(ar), ar.local_path, encoding="wtf-8", **args) if __name__ == "__main__":