mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 00:52:16 -06:00
fuse.py: support mojibake on windows
This commit is contained in:
parent
b1222edb27
commit
a90c49b8fb
|
@ -32,6 +32,7 @@ import time
|
||||||
import stat
|
import stat
|
||||||
import errno
|
import errno
|
||||||
import struct
|
import struct
|
||||||
|
import codecs
|
||||||
import builtins
|
import builtins
|
||||||
import platform
|
import platform
|
||||||
import argparse
|
import argparse
|
||||||
|
@ -41,7 +42,7 @@ import http.client # py2: httplib
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib.parse import quote_from_bytes as quote
|
from urllib.parse import quote_from_bytes as quote
|
||||||
|
from urllib.parse import unquote_to_bytes as unquote
|
||||||
|
|
||||||
WINDOWS = sys.platform == "win32"
|
WINDOWS = sys.platform == "win32"
|
||||||
MACOS = platform.system() == "Darwin"
|
MACOS = platform.system() == "Darwin"
|
||||||
|
@ -106,6 +107,47 @@ def null_log(msg):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def hexler(binary):
|
||||||
|
return binary.replace("\r", "\\r").replace("\n", "\\n")
|
||||||
|
return " ".join(["{}\033[36m{:02x}\033[0m".format(b, ord(b)) for b in binary])
|
||||||
|
return " ".join(map(lambda b: format(ord(b), "02x"), binary))
|
||||||
|
|
||||||
|
|
||||||
|
def register_wtf8():
|
||||||
|
def wtf8_enc(text):
|
||||||
|
return str(text).encode("utf-8", "surrogateescape"), len(text)
|
||||||
|
|
||||||
|
def wtf8_dec(binary):
|
||||||
|
return bytes(binary).decode("utf-8", "surrogateescape"), len(binary)
|
||||||
|
|
||||||
|
def wtf8_search(encoding_name):
|
||||||
|
return codecs.CodecInfo(wtf8_enc, wtf8_dec, name="wtf-8")
|
||||||
|
|
||||||
|
codecs.register(wtf8_search)
|
||||||
|
|
||||||
|
|
||||||
|
bad_good = {}
|
||||||
|
good_bad = {}
|
||||||
|
|
||||||
|
|
||||||
|
def enwin(txt):
|
||||||
|
return "".join([bad_good.get(x, x) for x in txt])
|
||||||
|
|
||||||
|
for bad, good in bad_good.items():
|
||||||
|
txt = txt.replace(bad, good)
|
||||||
|
|
||||||
|
return txt
|
||||||
|
|
||||||
|
|
||||||
|
def dewin(txt):
|
||||||
|
return "".join([good_bad.get(x, x) for x in txt])
|
||||||
|
|
||||||
|
for bad, good in bad_good.items():
|
||||||
|
txt = txt.replace(good, bad)
|
||||||
|
|
||||||
|
return txt
|
||||||
|
|
||||||
|
|
||||||
class RecentLog(object):
|
class RecentLog(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.mtx = threading.Lock()
|
self.mtx = threading.Lock()
|
||||||
|
@ -169,6 +211,8 @@ def html_dec(txt):
|
||||||
txt.replace("<", "<")
|
txt.replace("<", "<")
|
||||||
.replace(">", ">")
|
.replace(">", ">")
|
||||||
.replace(""", '"')
|
.replace(""", '"')
|
||||||
|
.replace(" ", "\r")
|
||||||
|
.replace(" ", "\n")
|
||||||
.replace("&", "&")
|
.replace("&", "&")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -213,8 +257,7 @@ class Gateway(object):
|
||||||
self.conns = {}
|
self.conns = {}
|
||||||
|
|
||||||
def quotep(self, path):
|
def quotep(self, path):
|
||||||
# TODO: mojibake support
|
path = path.encode("wtf-8")
|
||||||
path = path.encode("utf-8", "ignore")
|
|
||||||
return quote(path, safe="/")
|
return quote(path, safe="/")
|
||||||
|
|
||||||
def getconn(self, tid=None):
|
def getconn(self, tid=None):
|
||||||
|
@ -273,6 +316,9 @@ class Gateway(object):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def listdir(self, path):
|
def listdir(self, path):
|
||||||
|
if bad_good:
|
||||||
|
path = dewin(path)
|
||||||
|
|
||||||
web_path = self.quotep("/" + "/".join([self.web_root, path])) + "?dots"
|
web_path = self.quotep("/" + "/".join([self.web_root, path])) + "?dots"
|
||||||
r = self.sendreq("GET", web_path)
|
r = self.sendreq("GET", web_path)
|
||||||
if r.status != 200:
|
if r.status != 200:
|
||||||
|
@ -295,11 +341,14 @@ class Gateway(object):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def download_file_range(self, path, ofs1, ofs2):
|
def download_file_range(self, path, ofs1, ofs2):
|
||||||
|
if bad_good:
|
||||||
|
path = dewin(path)
|
||||||
|
|
||||||
web_path = self.quotep("/" + "/".join([self.web_root, path])) + "?raw"
|
web_path = self.quotep("/" + "/".join([self.web_root, path])) + "?raw"
|
||||||
hdr_range = "bytes={}-{}".format(ofs1, ofs2 - 1)
|
hdr_range = "bytes={}-{}".format(ofs1, ofs2 - 1)
|
||||||
info(
|
info(
|
||||||
"DL {:4.0f}K\033[36m{:>9}-{:<9}\033[0m{}".format(
|
"DL {:4.0f}K\033[36m{:>9}-{:<9}\033[0m{}".format(
|
||||||
(ofs2 - ofs1) / 1024.0, ofs1, ofs2 - 1, path
|
(ofs2 - ofs1) / 1024.0, ofs1, ofs2 - 1, hexler(path)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -318,7 +367,7 @@ class Gateway(object):
|
||||||
ret = []
|
ret = []
|
||||||
remainder = b""
|
remainder = b""
|
||||||
ptn = re.compile(
|
ptn = re.compile(
|
||||||
r"^<tr><td>(-|DIR)</td><td><a [^>]+>([^<]+)</a></td><td>([^<]+)</td><td>([^<]+)</td></tr>$"
|
r'^<tr><td>(-|DIR)</td><td><a[^>]* href="([^"]+)"[^>]*>([^<]+)</a></td><td>([^<]+)</td><td>([^<]+)</td></tr>$'
|
||||||
)
|
)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
@ -340,8 +389,13 @@ class Gateway(object):
|
||||||
# print(line)
|
# print(line)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ftype, fname, fsize, fdate = m.groups()
|
ftype, furl, fname, fsize, fdate = m.groups()
|
||||||
fname = html_dec(fname)
|
fname = furl.rstrip("/").split("/")[-1]
|
||||||
|
fname = unquote(fname)
|
||||||
|
fname = fname.decode("wtf-8")
|
||||||
|
if bad_good:
|
||||||
|
fname = enwin(fname)
|
||||||
|
|
||||||
sz = 1
|
sz = 1
|
||||||
ts = 60 * 60 * 24 * 2
|
ts = 60 * 60 * 24 * 2
|
||||||
try:
|
try:
|
||||||
|
@ -405,7 +459,11 @@ class CPPF(Operations):
|
||||||
cache_path, cache1 = cn.tag
|
cache_path, cache1 = cn.tag
|
||||||
cache2 = cache1 + len(cn.data)
|
cache2 = cache1 + len(cn.data)
|
||||||
msg += "\n{:<2} {:>7} {:>10}:{:<9} {}".format(
|
msg += "\n{:<2} {:>7} {:>10}:{:<9} {}".format(
|
||||||
n, len(cn.data), cache1, cache2, cache_path
|
n,
|
||||||
|
len(cn.data),
|
||||||
|
cache1,
|
||||||
|
cache2,
|
||||||
|
cache_path.replace("\r", "\\r").replace("\n", "\\n"),
|
||||||
)
|
)
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
|
@ -636,7 +694,7 @@ class CPPF(Operations):
|
||||||
|
|
||||||
def _readdir(self, path, fh=None):
|
def _readdir(self, path, fh=None):
|
||||||
path = path.strip("/")
|
path = path.strip("/")
|
||||||
log("readdir [{}] [{}]".format(path, fh))
|
log("readdir [{}] [{}]".format(hexler(path), fh))
|
||||||
|
|
||||||
ret = self.gw.listdir(path)
|
ret = self.gw.listdir(path)
|
||||||
if not self.n_dircache:
|
if not self.n_dircache:
|
||||||
|
@ -663,7 +721,11 @@ class CPPF(Operations):
|
||||||
path = path.strip("/")
|
path = path.strip("/")
|
||||||
ofs2 = offset + length
|
ofs2 = offset + length
|
||||||
file_sz = self.getattr(path)["st_size"]
|
file_sz = self.getattr(path)["st_size"]
|
||||||
log("read {} |{}| {}:{} max {}".format(path, length, offset, ofs2, file_sz))
|
log(
|
||||||
|
"read {} |{}| {}:{} max {}".format(
|
||||||
|
hexler(path), length, offset, ofs2, file_sz
|
||||||
|
)
|
||||||
|
)
|
||||||
if ofs2 > file_sz:
|
if ofs2 > file_sz:
|
||||||
ofs2 = file_sz
|
ofs2 = file_sz
|
||||||
log("truncate to |{}| :{}".format(ofs2 - offset, ofs2))
|
log("truncate to |{}| :{}".format(ofs2 - offset, ofs2))
|
||||||
|
@ -702,7 +764,9 @@ class CPPF(Operations):
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def getattr(self, path, fh=None):
|
def getattr(self, path, fh=None):
|
||||||
log("getattr [{}]".format(path))
|
log("getattr [{}]".format(hexler(path)))
|
||||||
|
if WINDOWS:
|
||||||
|
path = enwin(path) # windows occasionally decodes f0xx to xx
|
||||||
|
|
||||||
path = path.strip("/")
|
path = path.strip("/")
|
||||||
try:
|
try:
|
||||||
|
@ -725,11 +789,20 @@ class CPPF(Operations):
|
||||||
dents = self._readdir(dirpath)
|
dents = self._readdir(dirpath)
|
||||||
|
|
||||||
for cache_name, cache_stat, _ in dents:
|
for cache_name, cache_stat, _ in dents:
|
||||||
|
# if "qw" in cache_name and "qw" in fname:
|
||||||
|
# info(
|
||||||
|
# "cmp\n [{}]\n [{}]\n\n{}\n".format(
|
||||||
|
# hexler(cache_name),
|
||||||
|
# hexler(fname),
|
||||||
|
# "\n".join(traceback.format_stack()[:-1]),
|
||||||
|
# )
|
||||||
|
# )
|
||||||
|
|
||||||
if cache_name == fname:
|
if cache_name == fname:
|
||||||
# dbg("=" + repr(cache_stat))
|
# dbg("=" + repr(cache_stat))
|
||||||
return cache_stat
|
return cache_stat
|
||||||
|
|
||||||
info("=ENOENT ({})".format(path))
|
info("=ENOENT ({})".format(hexler(path)))
|
||||||
raise FuseOSError(errno.ENOENT)
|
raise FuseOSError(errno.ENOENT)
|
||||||
|
|
||||||
access = None
|
access = None
|
||||||
|
@ -799,24 +872,24 @@ class CPPF(Operations):
|
||||||
raise FuseOSError(errno.ENOENT)
|
raise FuseOSError(errno.ENOENT)
|
||||||
|
|
||||||
def open(self, path, flags):
|
def open(self, path, flags):
|
||||||
dbg("open [{}] [{}]".format(path, flags))
|
dbg("open [{}] [{}]".format(hexler(path), flags))
|
||||||
return self._open(path)
|
return self._open(path)
|
||||||
|
|
||||||
def opendir(self, path):
|
def opendir(self, path):
|
||||||
dbg("opendir [{}]".format(path))
|
dbg("opendir [{}]".format(hexler(path)))
|
||||||
return self._open(path)
|
return self._open(path)
|
||||||
|
|
||||||
def flush(self, path, fh):
|
def flush(self, path, fh):
|
||||||
dbg("flush [{}] [{}]".format(path, fh))
|
dbg("flush [{}] [{}]".format(hexler(path), fh))
|
||||||
|
|
||||||
def release(self, ino, fi):
|
def release(self, ino, fi):
|
||||||
dbg("release [{}] [{}]".format(ino, fi))
|
dbg("release [{}] [{}]".format(hexler(ino), fi))
|
||||||
|
|
||||||
def releasedir(self, ino, fi):
|
def releasedir(self, ino, fi):
|
||||||
dbg("releasedir [{}] [{}]".format(ino, fi))
|
dbg("releasedir [{}] [{}]".format(hexler(ino), fi))
|
||||||
|
|
||||||
def access(self, path, mode):
|
def access(self, path, mode):
|
||||||
dbg("access [{}] [{}]".format(path, mode))
|
dbg("access [{}] [{}]".format(hexler(path), mode))
|
||||||
try:
|
try:
|
||||||
x = self.getattr(path)
|
x = self.getattr(path)
|
||||||
if x["st_mode"] <= 0:
|
if x["st_mode"] <= 0:
|
||||||
|
@ -838,7 +911,7 @@ def main():
|
||||||
# linux generally does 128k so the cache is a slowdown,
|
# linux generally does 128k so the cache is a slowdown,
|
||||||
# windows likes to use 4k and 64k so cache is required,
|
# windows likes to use 4k and 64k so cache is required,
|
||||||
# value is numChunks (1~3M each) to keep in the cache
|
# value is numChunks (1~3M each) to keep in the cache
|
||||||
nf = 24 if WINDOWS or MACOS else 0
|
nf = 24
|
||||||
|
|
||||||
# dircache is always a boost,
|
# dircache is always a boost,
|
||||||
# only want to disable it for tests etc,
|
# only want to disable it for tests etc,
|
||||||
|
@ -889,6 +962,20 @@ def main():
|
||||||
if WINDOWS:
|
if WINDOWS:
|
||||||
os.system("")
|
os.system("")
|
||||||
|
|
||||||
|
for ch in '<>:"\\|?*':
|
||||||
|
# microsoft maps illegal characters to f0xx
|
||||||
|
# (e000 to f8ff is basic-plane private-use)
|
||||||
|
bad_good[ch] = chr(ord(ch) + 0xF000)
|
||||||
|
|
||||||
|
for n in range(0, 0x100):
|
||||||
|
# map surrogateescape to another private-use area
|
||||||
|
bad_good[chr(n + 0xDC00)] = chr(n + 0xF100)
|
||||||
|
|
||||||
|
for k, v in bad_good.items():
|
||||||
|
good_bad[v] = k
|
||||||
|
|
||||||
|
register_wtf8()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open("/etc/fuse.conf", "rb") as f:
|
with open("/etc/fuse.conf", "rb") as f:
|
||||||
allow_other = b"\nuser_allow_other" in f.read()
|
allow_other = b"\nuser_allow_other" in f.read()
|
||||||
|
@ -899,7 +986,7 @@ def main():
|
||||||
if not MACOS:
|
if not MACOS:
|
||||||
args["nonempty"] = True
|
args["nonempty"] = True
|
||||||
|
|
||||||
FUSE(CPPF(ar), ar.local_path, **args)
|
FUSE(CPPF(ar), ar.local_path, encoding="wtf-8", **args)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in a new issue