diff --git a/copyparty/__main__.py b/copyparty/__main__.py index fe356d14..a69e060d 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -128,7 +128,7 @@ def main(): ) ap.add_argument("-i", metavar="IP", type=str, default="0.0.0.0", help="ip to bind") ap.add_argument("-p", metavar="PORT", type=int, default=3923, help="port to bind") - ap.add_argument("-nc", metavar="NUM", type=int, default=16, help="max num clients") + ap.add_argument("-nc", metavar="NUM", type=int, default=64, help="max num clients") ap.add_argument( "-j", metavar="CORES", type=int, default=1, help="max num cpu cores" ) diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index c2977325..43ca2b36 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -16,9 +16,6 @@ from .util import * # noqa # pylint: disable=unused-wildcard-import if not PY2: unicode = str - from html import escape as html_escape -else: - from cgi import escape as html_escape # pylint: disable=no-name-in-module class HttpCli(object): @@ -125,6 +122,11 @@ class HttpCli(object): self.uparam = uparam self.vpath = unquotep(vpath) + ua = self.headers.get("user-agent", "") + if ua.startswith("rclone/"): + uparam["raw"] = True + uparam["dots"] = True + try: if self.mode in ["GET", "HEAD"]: return self.handle_get() and self.keepalive @@ -141,7 +143,7 @@ class HttpCli(object): try: # self.log("pebkac at httpcli.run #2: " + repr(ex)) self.keepalive = self._check_nonfatal(ex) - self.loud_reply(str(ex), status=ex.code) + self.loud_reply("{}: {}".format(str(ex), self.vpath), status=ex.code) return self.keepalive except Pebkac: return False @@ -180,7 +182,8 @@ class HttpCli(object): self.send_headers(len(body), status, mime, headers) try: - self.s.sendall(body) + if self.mode != "HEAD": + self.s.sendall(body) except: raise Pebkac(400, "client d/c while replying body") @@ -188,7 +191,7 @@ class HttpCli(object): def loud_reply(self, body, *args, **kwargs): self.log(body.rstrip()) - self.reply(b"
" + body.encode("utf-8"), *list(args), **kwargs)
+        self.reply(b"
" + body.encode("utf-8") + b"\r\n", *list(args), **kwargs)
 
     def handle_get(self):
         logmsg = "{:4} {}".format(self.mode, self.req)
@@ -493,7 +496,7 @@ class HttpCli(object):
         vpath = "{}/{}".format(self.vpath, sanitized).lstrip("/")
         html = self.conn.tpl_msg.render(
             h2='go to /{}'.format(
-                quotep(vpath), html_escape(vpath, quote=False)
+                quotep(vpath), html_escape(vpath)
             ),
             pre="aight",
             click=True,
@@ -527,7 +530,7 @@ class HttpCli(object):
         vpath = "{}/{}".format(self.vpath, sanitized).lstrip("/")
         html = self.conn.tpl_msg.render(
             h2='go to /{}?edit'.format(
-                quotep(vpath), html_escape(vpath, quote=False)
+                quotep(vpath), html_escape(vpath)
             ),
             pre="aight",
             click=True,
@@ -621,7 +624,7 @@ class HttpCli(object):
 
         html = self.conn.tpl_msg.render(
             h2='return to /{}'.format(
-                quotep(self.vpath), html_escape(self.vpath, quote=False)
+                quotep(self.vpath), html_escape(self.vpath)
             ),
             pre=msg,
         )
@@ -938,7 +941,7 @@ class HttpCli(object):
 
         targs = {
             "edit": "edit" in self.uparam,
-            "title": html_escape(self.vpath, quote=False),
+            "title": html_escape(self.vpath),
             "lastmod": int(ts_md * 1000),
             "md": "",
         }
@@ -979,7 +982,7 @@ class HttpCli(object):
                 else:
                     vpath += "/" + node
 
-                vpnodes.append([quotep(vpath) + "/", html_escape(node, quote=False)])
+                vpnodes.append([quotep(vpath) + "/", html_escape(node)])
 
         vn, rem = self.auth.vfs.get(
             self.vpath, self.uname, self.readable, self.writable
@@ -1054,7 +1057,7 @@ class HttpCli(object):
             dt = datetime.utcfromtimestamp(inf.st_mtime)
             dt = dt.strftime("%Y-%m-%d %H:%M:%S")
 
-            item = [margin, quotep(href), html_escape(fn, quote=False), sz, dt]
+            item = [margin, quotep(href), html_escape(fn), sz, dt]
             if is_dir:
                 dirs.append(item)
             else:
@@ -1119,7 +1122,7 @@ class HttpCli(object):
             ts=ts,
             prologue=logues[0],
             epilogue=logues[1],
-            title=html_escape(self.vpath, quote=False),
+            title=html_escape(self.vpath),
             srv_info=" /// ".join(srv_info),
         )
         self.reply(html.encode("utf-8", "replace"))
diff --git a/copyparty/util.py b/copyparty/util.py
index 3b366729..a3a4ae16 100644
--- a/copyparty/util.py
+++ b/copyparty/util.py
@@ -335,18 +335,18 @@ def read_header(sr):
 
 
 def humansize(sz, terse=False):
-    for unit in ['B', 'KiB', 'MiB', 'GiB', 'TiB']:
+    for unit in ["B", "KiB", "MiB", "GiB", "TiB"]:
         if sz < 1024:
             break
-         
-        sz /= 1024.
-   
-    ret = ' '.join([str(sz)[:4].rstrip('.'), unit])
-    
+
+        sz /= 1024.0
+
+    ret = " ".join([str(sz)[:4].rstrip("."), unit])
+
     if not terse:
         return ret
-    
-    return ret.replace('iB', '').replace(' ', '')
+
+    return ret.replace("iB", "").replace(" ", "")
 
 
 def undot(path):
@@ -398,6 +398,21 @@ def exclude_dotfiles(filepaths):
             yield fpath
 
 
+def html_escape(s, quote=False):
+    """html.escape but also newlines"""
+    s = (
+        s.replace("&", "&")
+        .replace("<", "<")
+        .replace(">", ">")
+        .replace("\r", "
")
+        .replace("\n", "
")
+    )
+    if quote:
+        s = s.replace('"', """).replace("'", "'")
+
+    return s
+
+
 def quotep(txt):
     """url quoter which deals with bytes correctly"""
     btxt = w8enc(txt)
@@ -412,8 +427,8 @@ def quotep(txt):
 def unquotep(txt):
     """url unquoter which deals with bytes correctly"""
     btxt = w8enc(txt)
-    unq1 = btxt.replace(b"+", b" ")
-    unq2 = unquote(unq1)
+    # btxt = btxt.replace(b"+", b" ")
+    unq2 = unquote(btxt)
     return w8dec(unq2)
 
 
diff --git a/scripts/speedtest-fs.py b/scripts/speedtest-fs.py
new file mode 100644
index 00000000..a5a3f552
--- /dev/null
+++ b/scripts/speedtest-fs.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import stat
+import time
+import signal
+import traceback
+import threading
+from queue import Queue
+
+
+"""speedtest-fs: filesystem performance estimate"""
+__author__ = "ed "
+__copyright__ = 2020
+__license__ = "MIT"
+__url__ = "https://github.com/9001/copyparty/"
+
+
+def get_spd(nbyte, nsec):
+    if not nsec:
+        return "0.000 MB   0.000 sec   0.000 MB/s"
+
+    mb = nbyte / (1024 * 1024.0)
+    spd = mb / nsec
+
+    return f"{mb:.3f} MB   {nsec:.3f} sec   {spd:.3f} MB/s"
+
+
+class Inf(object):
+    def __init__(self, t0):
+        self.msgs = []
+        self.errors = []
+        self.reports = []
+        self.mtx_msgs = threading.Lock()
+        self.mtx_reports = threading.Lock()
+
+        self.n_byte = 0
+        self.n_sec = 0
+        self.n_done = 0
+        self.t0 = t0
+
+        thr = threading.Thread(target=self.print_msgs)
+        thr.daemon = True
+        thr.start()
+
+    def msg(self, fn, n_read):
+        with self.mtx_msgs:
+            self.msgs.append(f"{fn} {n_read}")
+
+    def err(self, fn):
+        with self.mtx_reports:
+            self.errors.append(f"{fn}\n{traceback.format_exc()}")
+
+    def print_msgs(self):
+        while True:
+            time.sleep(0.02)
+            with self.mtx_msgs:
+                msgs = self.msgs
+                self.msgs = []
+
+            if not msgs:
+                continue
+
+            msgs = msgs[-64:]
+            msgs = [f"{get_spd(self.n_byte, self.n_sec)}   {x}" for x in msgs]
+            print("\n".join(msgs))
+
+    def report(self, fn, n_byte, n_sec):
+        with self.mtx_reports:
+            self.reports.append([n_byte, n_sec, fn])
+            self.n_byte += n_byte
+            self.n_sec += n_sec
+
+    def done(self):
+        with self.mtx_reports:
+            self.n_done += 1
+
+
+def get_files(dir_path):
+    for fn in os.listdir(dir_path):
+        fn = os.path.join(dir_path, fn)
+        st = os.stat(fn).st_mode
+
+        if stat.S_ISDIR(st):
+            yield from get_files(fn)
+
+        if stat.S_ISREG(st):
+            yield fn
+
+
+def worker(q, inf, read_sz):
+    while True:
+        fn = q.get()
+        if not fn:
+            break
+
+        n_read = 0
+        try:
+            t0 = time.time()
+            with open(fn, "rb") as f:
+                while True:
+                    buf = f.read(read_sz)
+                    if not buf:
+                        break
+
+                    n_read += len(buf)
+                    inf.msg(fn, n_read)
+
+            inf.report(fn, n_read, time.time() - t0)
+        except:
+            inf.err(fn)
+
+    inf.done()
+
+
+def sighandler(signo, frame):
+    os._exit(0)
+
+
+def main():
+    signal.signal(signal.SIGINT, sighandler)
+
+    root = "."
+    if len(sys.argv) > 1:
+        root = sys.argv[1]
+
+    t0 = time.time()
+    q = Queue(256)
+    inf = Inf(t0)
+
+    num_threads = 8
+    read_sz = 32 * 1024
+    for _ in range(num_threads):
+        thr = threading.Thread(target=worker, args=(q, inf, read_sz,))
+        thr.daemon = True
+        thr.start()
+
+    for fn in get_files(root):
+        q.put(fn)
+
+    for _ in range(num_threads):
+        q.put(None)
+
+    while inf.n_done < num_threads:
+        time.sleep(0.1)
+
+    t2 = time.time()
+    print("\n")
+
+    log = inf.reports
+    log.sort()
+    for nbyte, nsec, fn in log[-64:]:
+        print(f"{get_spd(nbyte, nsec)}   {fn}")
+
+    print()
+    print("\n".join(inf.errors))
+
+    print(get_spd(inf.n_byte, t2 - t0))
+
+
+if __name__ == "__main__":
+    main()
+