From 69517e4624f4112ec8b1162e318b456a941902df Mon Sep 17 00:00:00 2001 From: ed Date: Thu, 2 May 2024 22:49:27 +0000 Subject: [PATCH] add general-purpose query-string parcelling; currently only being used to workaround discord discarding query strings in opengraph tags, but i'm sure there will be plenty more wonderful usecases for this atrocity --- copyparty/__main__.py | 3 +- copyparty/authsrv.py | 3 ++ copyparty/httpcli.py | 71 +++++++++++++++++++++++++++---------------- copyparty/util.py | 3 ++ tests/util.py | 2 +- 5 files changed, 53 insertions(+), 29 deletions(-) diff --git a/copyparty/__main__.py b/copyparty/__main__.py index 370db2ae..57da50f9 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -1267,6 +1267,7 @@ def add_og(ap): ap2.add_argument("--og-title", metavar="TXT", type=u, default="", help="fallback title if there is nothing in the \033[33m-e2t\033[0m database (volflag=og_site)") ap2.add_argument("--og-desc", metavar="TXT", type=u, default="", help="description text; same for all files, disable with [\033[32m-\033[0m] (volflag=og_desc)") ap2.add_argument("--og-site", metavar="TXT", type=u, default="", help="sitename; defaults to \033[33m--name\033[0m, disable with [\033[32m-\033[0m] (volflag=og_site)") + ap2.add_argument("--uqe", action="store_true", help="query-string parceling; translate a request for \033[33m/foo/.uqe/BASE64\033[0m into \033[33m/foo?TEXT\033[0m, or \033[33m/foo/?TEXT\033[0m if the first character in \033[33mTEXT\033[0m is a slash. Automatically enabled for \033[33m--og\033[0m") def add_ui(ap, retry): @@ -1281,7 +1282,7 @@ def add_ui(ap, retry): ap2.add_argument("--mpmc", metavar="URL", type=u, default="", help="change the mediaplayer-toggle mouse cursor; URL to a folder with {2..5}.png inside (or disable with [\033[32m.\033[0m])") ap2.add_argument("--js-browser", metavar="L", type=u, help="URL to additional JS to include") ap2.add_argument("--css-browser", metavar="L", type=u, help="URL to additional CSS to include") - ap2.add_argument("--html-head", metavar="TXT", type=u, default="", help="text to append to the of all HTML pages; can be @PATH to send the contents of a file at PATH, and/or begin with % to render as jinja2 template (volflag=html_head)") + ap2.add_argument("--html-head", metavar="TXT", type=u, default="", help="text to append to the of all HTML pages; can be @PATH to send the contents of a file at PATH, and/or begin with %% to render as jinja2 template (volflag=html_head)") ap2.add_argument("--ih", action="store_true", help="if a folder contains index.html, show that instead of the directory listing by default (can be changed in the client settings UI, or add ?v to URL for override)") ap2.add_argument("--textfiles", metavar="CSV", type=u, default="txt,nfo,diz,cue,readme", help="file extensions to present as plaintext") ap2.add_argument("--txt-max", metavar="KiB", type=int, default=64, help="max size of embedded textfiles on ?doc= (anything bigger will be lazy-loaded by JS)") diff --git a/copyparty/authsrv.py b/copyparty/authsrv.py index b9ffda58..e2a95aa3 100644 --- a/copyparty/authsrv.py +++ b/copyparty/authsrv.py @@ -1777,6 +1777,9 @@ class AuthSrv(object): t = 'volume "/%s" has invalid %stry [%s]' raise Exception(t % (vol.vpath, k, vol.flags.get(k + "try"))) + if vol.flags.get("og"): + self.args.uqe = True + for k1, k2 in IMPLICATIONS: if k1 in vol.flags: vol.flags[k2] = True diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index 121ed535..ff82daa7 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -84,6 +84,8 @@ from .util import ( sanitize_vpath, sendfile_kern, sendfile_py, + ub64dec, + ub64enc, ujoin, undot, unescape_cookie, @@ -371,6 +373,21 @@ class HttpCli(object): if "&" in self.req and "?" not in self.req: self.hint = "did you mean '?' instead of '&'" + if self.args.uqe and "/.uqe/" in self.req: + try: + vpath, query = self.req.split("?")[0].split("/.uqe/") + query = query.split("/")[0] # discard trailing junk + # (usually a "filename" to trick discord into behaving) + query = ub64dec(query.encode("utf-8")).decode("utf-8", "replace") + if query.startswith("/"): + self.req = "%s/?%s" % (vpath, query[1:]) + else: + self.req = "%s?%s" % (vpath, query) + except Exception as ex: + t = "bad uqe in request [%s]: %r" % (self.req, ex) + self.loud_reply(t, status=400) + return False + # split req into vpath + uparam uparam = {} if "?" not in self.req: @@ -437,7 +454,8 @@ class HttpCli(object): cookie_pw = "" if len(uparam) > 10 or len(cookies) > 50: - raise Pebkac(400, "u wot m8") + self.loud_reply("u wot m8", status=400) + return False self.uparam = uparam self.cookies = cookies @@ -4075,27 +4093,6 @@ class HttpCli(object): return True def tx_browser(self) -> bool: - vn = self.vn - rem = self.rem - - add_og = "og" in vn.flags - if add_og: - if ".og-raw/" in rem: - # sad workaround: discord strips ?raw=1 so give it a unique url instead - self.uparam["raw"] = True - self.vpath = self.vpath.replace(".og-raw/", "") - vn, rem = self.asrv.vfs.get(self.vpath, self.uname, False, False) - self.vn = vn - self.rem = rem - if "th" in self.uparam or "raw" in self.uparam: - og_ua = add_og = False - elif self.args.og_ua: - og_ua = add_og = self.args.og_ua.search(self.ua) - else: - og_ua = False - add_og = True - og_fn = "" - vpath = "" vpnodes = [["", "/"]] if self.vpath: @@ -4107,6 +4104,8 @@ class HttpCli(object): vpnodes.append([quotep(vpath) + "/", html_escape(node, crlf=True)]) + vn = self.vn + rem = self.rem abspath = vn.dcanonical(rem) dbv, vrem = vn.get_dbv(rem) @@ -4137,6 +4136,17 @@ class HttpCli(object): e2d = "e2d" in vn.flags e2t = "e2t" in vn.flags + add_og = "og" in vn.flags + if add_og: + if "th" in self.uparam or "raw" in self.uparam: + og_ua = add_og = False + elif self.args.og_ua: + og_ua = add_og = self.args.og_ua.search(self.ua) + else: + og_ua = False + add_og = True + og_fn = "" + if "b" in self.uparam: self.out_headers["X-Robots-Tag"] = "noindex, nofollow" @@ -4774,16 +4784,23 @@ class HttpCli(object): j2a["og_is_au"] = is_au if thumb: fmt = vn.flags.get("og_th", "j") - zs = ujoin(url_base, quotep(thumb)) - j2a["og_thumb"] = "%s?th=%s&cache" % (zs, fmt) + th_base = ujoin(url_base, quotep(thumb)) + query = "th=%s&cache" % (fmt,) + query = ub64enc(query.encode("utf-8")).decode("utf-8") + # discord looks at file extension, not content-type... + query += "/a.jpg" if "j" in fmt else "/a.webp" + j2a["og_thumb"] = "%s/.uqe/%s" % (th_base, query) j2a["og_fn"] = og_fn j2a["og_file"] = file if og_fn: og_fn_q = quotep(og_fn) + query = ub64enc(b"raw").decode("utf-8") + if "." in og_fn: + query += "/a.%s" % (og_fn.split(".")[-1]) + j2a["og_url"] = ujoin(url_base, og_fn_q) - j2a["og_raw"] = ujoin(url_base, vjoin(".og-raw", og_fn_q)) - # discord strips ?raw so it always downloads the html... orz + j2a["og_raw"] = j2a["og_url"] + "/.uqe/" + query else: j2a["og_url"] = j2a["og_raw"] = url_base @@ -4836,7 +4853,7 @@ class HttpCli(object): pass zs = '\t' - oghs = [zs % (k, v) for k, v in ogh.items()] + oghs = [zs % (k, html_escape(str(v))) for k, v in ogh.items()] zs = self.html_head + "\n%s\n" % ("\n".join(oghs),) self.html_head = zs.replace("\n\n", "\n") diff --git a/copyparty/util.py b/copyparty/util.py index 45975b64..643fc91e 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -35,6 +35,9 @@ from .__init__ import ANYWIN, EXE, MACOS, PY2, TYPE_CHECKING, VT100, WINDOWS from .__version__ import S_BUILD_DT, S_VERSION from .stolen import surrogateescape +ub64dec = base64.urlsafe_b64decode +ub64enc = base64.urlsafe_b64encode + try: from datetime import datetime, timezone diff --git a/tests/util.py b/tests/util.py index a6b044d5..01161e28 100644 --- a/tests/util.py +++ b/tests/util.py @@ -110,7 +110,7 @@ class Cfg(Namespace): def __init__(self, a=None, v=None, c=None, **ka0): ka = {} - ex = "daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid hardlink ih ihead magic never_symlink nid nih no_acode no_athumb no_dav no_dedup no_del no_dupe no_lifetime no_logues no_mv no_pipe no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head q rand smb srch_dbg stats vague_403 vc ver xdev xlink xvol" + ex = "daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid hardlink ih ihead magic never_symlink nid nih no_acode no_athumb no_dav no_dedup no_del no_dupe no_lifetime no_logues no_mv no_pipe no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head q rand smb srch_dbg stats uqe vague_403 vc ver xdev xlink xvol" ka.update(**{k: False for k in ex.split()}) ex = "dotpart dotsrch no_dhash no_fastboot no_rescan no_sendfile no_snap no_voldump re_dhash plain_ip"