add general-purpose query-string parcelling;

currently only being used to workaround discord discarding
query strings in opengraph tags, but i'm sure there will be
plenty more wonderful usecases for this atrocity
This commit is contained in:
ed 2024-05-02 22:49:27 +00:00
parent ea270ab9f2
commit 69517e4624
5 changed files with 53 additions and 29 deletions

View file

@ -1267,6 +1267,7 @@ def add_og(ap):
ap2.add_argument("--og-title", metavar="TXT", type=u, default="", help="fallback title if there is nothing in the \033[33m-e2t\033[0m database (volflag=og_site)") ap2.add_argument("--og-title", metavar="TXT", type=u, default="", help="fallback title if there is nothing in the \033[33m-e2t\033[0m database (volflag=og_site)")
ap2.add_argument("--og-desc", metavar="TXT", type=u, default="", help="description text; same for all files, disable with [\033[32m-\033[0m] (volflag=og_desc)") ap2.add_argument("--og-desc", metavar="TXT", type=u, default="", help="description text; same for all files, disable with [\033[32m-\033[0m] (volflag=og_desc)")
ap2.add_argument("--og-site", metavar="TXT", type=u, default="", help="sitename; defaults to \033[33m--name\033[0m, disable with [\033[32m-\033[0m] (volflag=og_site)") ap2.add_argument("--og-site", metavar="TXT", type=u, default="", help="sitename; defaults to \033[33m--name\033[0m, disable with [\033[32m-\033[0m] (volflag=og_site)")
ap2.add_argument("--uqe", action="store_true", help="query-string parceling; translate a request for \033[33m/foo/.uqe/BASE64\033[0m into \033[33m/foo?TEXT\033[0m, or \033[33m/foo/?TEXT\033[0m if the first character in \033[33mTEXT\033[0m is a slash. Automatically enabled for \033[33m--og\033[0m")
def add_ui(ap, retry): def add_ui(ap, retry):
@ -1281,7 +1282,7 @@ def add_ui(ap, retry):
ap2.add_argument("--mpmc", metavar="URL", type=u, default="", help="change the mediaplayer-toggle mouse cursor; URL to a folder with {2..5}.png inside (or disable with [\033[32m.\033[0m])") ap2.add_argument("--mpmc", metavar="URL", type=u, default="", help="change the mediaplayer-toggle mouse cursor; URL to a folder with {2..5}.png inside (or disable with [\033[32m.\033[0m])")
ap2.add_argument("--js-browser", metavar="L", type=u, help="URL to additional JS to include") ap2.add_argument("--js-browser", metavar="L", type=u, help="URL to additional JS to include")
ap2.add_argument("--css-browser", metavar="L", type=u, help="URL to additional CSS to include") ap2.add_argument("--css-browser", metavar="L", type=u, help="URL to additional CSS to include")
ap2.add_argument("--html-head", metavar="TXT", type=u, default="", help="text to append to the <head> of all HTML pages; can be @PATH to send the contents of a file at PATH, and/or begin with % to render as jinja2 template (volflag=html_head)") ap2.add_argument("--html-head", metavar="TXT", type=u, default="", help="text to append to the <head> of all HTML pages; can be @PATH to send the contents of a file at PATH, and/or begin with %% to render as jinja2 template (volflag=html_head)")
ap2.add_argument("--ih", action="store_true", help="if a folder contains index.html, show that instead of the directory listing by default (can be changed in the client settings UI, or add ?v to URL for override)") ap2.add_argument("--ih", action="store_true", help="if a folder contains index.html, show that instead of the directory listing by default (can be changed in the client settings UI, or add ?v to URL for override)")
ap2.add_argument("--textfiles", metavar="CSV", type=u, default="txt,nfo,diz,cue,readme", help="file extensions to present as plaintext") ap2.add_argument("--textfiles", metavar="CSV", type=u, default="txt,nfo,diz,cue,readme", help="file extensions to present as plaintext")
ap2.add_argument("--txt-max", metavar="KiB", type=int, default=64, help="max size of embedded textfiles on ?doc= (anything bigger will be lazy-loaded by JS)") ap2.add_argument("--txt-max", metavar="KiB", type=int, default=64, help="max size of embedded textfiles on ?doc= (anything bigger will be lazy-loaded by JS)")

View file

@ -1777,6 +1777,9 @@ class AuthSrv(object):
t = 'volume "/%s" has invalid %stry [%s]' t = 'volume "/%s" has invalid %stry [%s]'
raise Exception(t % (vol.vpath, k, vol.flags.get(k + "try"))) raise Exception(t % (vol.vpath, k, vol.flags.get(k + "try")))
if vol.flags.get("og"):
self.args.uqe = True
for k1, k2 in IMPLICATIONS: for k1, k2 in IMPLICATIONS:
if k1 in vol.flags: if k1 in vol.flags:
vol.flags[k2] = True vol.flags[k2] = True

View file

@ -84,6 +84,8 @@ from .util import (
sanitize_vpath, sanitize_vpath,
sendfile_kern, sendfile_kern,
sendfile_py, sendfile_py,
ub64dec,
ub64enc,
ujoin, ujoin,
undot, undot,
unescape_cookie, unescape_cookie,
@ -371,6 +373,21 @@ class HttpCli(object):
if "&" in self.req and "?" not in self.req: if "&" in self.req and "?" not in self.req:
self.hint = "did you mean '?' instead of '&'" self.hint = "did you mean '?' instead of '&'"
if self.args.uqe and "/.uqe/" in self.req:
try:
vpath, query = self.req.split("?")[0].split("/.uqe/")
query = query.split("/")[0] # discard trailing junk
# (usually a "filename" to trick discord into behaving)
query = ub64dec(query.encode("utf-8")).decode("utf-8", "replace")
if query.startswith("/"):
self.req = "%s/?%s" % (vpath, query[1:])
else:
self.req = "%s?%s" % (vpath, query)
except Exception as ex:
t = "bad uqe in request [%s]: %r" % (self.req, ex)
self.loud_reply(t, status=400)
return False
# split req into vpath + uparam # split req into vpath + uparam
uparam = {} uparam = {}
if "?" not in self.req: if "?" not in self.req:
@ -437,7 +454,8 @@ class HttpCli(object):
cookie_pw = "" cookie_pw = ""
if len(uparam) > 10 or len(cookies) > 50: if len(uparam) > 10 or len(cookies) > 50:
raise Pebkac(400, "u wot m8") self.loud_reply("u wot m8", status=400)
return False
self.uparam = uparam self.uparam = uparam
self.cookies = cookies self.cookies = cookies
@ -4075,27 +4093,6 @@ class HttpCli(object):
return True return True
def tx_browser(self) -> bool: def tx_browser(self) -> bool:
vn = self.vn
rem = self.rem
add_og = "og" in vn.flags
if add_og:
if ".og-raw/" in rem:
# sad workaround: discord strips ?raw=1 so give it a unique url instead
self.uparam["raw"] = True
self.vpath = self.vpath.replace(".og-raw/", "")
vn, rem = self.asrv.vfs.get(self.vpath, self.uname, False, False)
self.vn = vn
self.rem = rem
if "th" in self.uparam or "raw" in self.uparam:
og_ua = add_og = False
elif self.args.og_ua:
og_ua = add_og = self.args.og_ua.search(self.ua)
else:
og_ua = False
add_og = True
og_fn = ""
vpath = "" vpath = ""
vpnodes = [["", "/"]] vpnodes = [["", "/"]]
if self.vpath: if self.vpath:
@ -4107,6 +4104,8 @@ class HttpCli(object):
vpnodes.append([quotep(vpath) + "/", html_escape(node, crlf=True)]) vpnodes.append([quotep(vpath) + "/", html_escape(node, crlf=True)])
vn = self.vn
rem = self.rem
abspath = vn.dcanonical(rem) abspath = vn.dcanonical(rem)
dbv, vrem = vn.get_dbv(rem) dbv, vrem = vn.get_dbv(rem)
@ -4137,6 +4136,17 @@ class HttpCli(object):
e2d = "e2d" in vn.flags e2d = "e2d" in vn.flags
e2t = "e2t" in vn.flags e2t = "e2t" in vn.flags
add_og = "og" in vn.flags
if add_og:
if "th" in self.uparam or "raw" in self.uparam:
og_ua = add_og = False
elif self.args.og_ua:
og_ua = add_og = self.args.og_ua.search(self.ua)
else:
og_ua = False
add_og = True
og_fn = ""
if "b" in self.uparam: if "b" in self.uparam:
self.out_headers["X-Robots-Tag"] = "noindex, nofollow" self.out_headers["X-Robots-Tag"] = "noindex, nofollow"
@ -4774,16 +4784,23 @@ class HttpCli(object):
j2a["og_is_au"] = is_au j2a["og_is_au"] = is_au
if thumb: if thumb:
fmt = vn.flags.get("og_th", "j") fmt = vn.flags.get("og_th", "j")
zs = ujoin(url_base, quotep(thumb)) th_base = ujoin(url_base, quotep(thumb))
j2a["og_thumb"] = "%s?th=%s&cache" % (zs, fmt) query = "th=%s&cache" % (fmt,)
query = ub64enc(query.encode("utf-8")).decode("utf-8")
# discord looks at file extension, not content-type...
query += "/a.jpg" if "j" in fmt else "/a.webp"
j2a["og_thumb"] = "%s/.uqe/%s" % (th_base, query)
j2a["og_fn"] = og_fn j2a["og_fn"] = og_fn
j2a["og_file"] = file j2a["og_file"] = file
if og_fn: if og_fn:
og_fn_q = quotep(og_fn) og_fn_q = quotep(og_fn)
query = ub64enc(b"raw").decode("utf-8")
if "." in og_fn:
query += "/a.%s" % (og_fn.split(".")[-1])
j2a["og_url"] = ujoin(url_base, og_fn_q) j2a["og_url"] = ujoin(url_base, og_fn_q)
j2a["og_raw"] = ujoin(url_base, vjoin(".og-raw", og_fn_q)) j2a["og_raw"] = j2a["og_url"] + "/.uqe/" + query
# discord strips ?raw so it always downloads the html... orz
else: else:
j2a["og_url"] = j2a["og_raw"] = url_base j2a["og_url"] = j2a["og_raw"] = url_base
@ -4836,7 +4853,7 @@ class HttpCli(object):
pass pass
zs = '\t<meta property="%s" content="%s">' zs = '\t<meta property="%s" content="%s">'
oghs = [zs % (k, v) for k, v in ogh.items()] oghs = [zs % (k, html_escape(str(v))) for k, v in ogh.items()]
zs = self.html_head + "\n%s\n" % ("\n".join(oghs),) zs = self.html_head + "\n%s\n" % ("\n".join(oghs),)
self.html_head = zs.replace("\n\n", "\n") self.html_head = zs.replace("\n\n", "\n")

View file

@ -35,6 +35,9 @@ from .__init__ import ANYWIN, EXE, MACOS, PY2, TYPE_CHECKING, VT100, WINDOWS
from .__version__ import S_BUILD_DT, S_VERSION from .__version__ import S_BUILD_DT, S_VERSION
from .stolen import surrogateescape from .stolen import surrogateescape
ub64dec = base64.urlsafe_b64decode
ub64enc = base64.urlsafe_b64encode
try: try:
from datetime import datetime, timezone from datetime import datetime, timezone

View file

@ -110,7 +110,7 @@ class Cfg(Namespace):
def __init__(self, a=None, v=None, c=None, **ka0): def __init__(self, a=None, v=None, c=None, **ka0):
ka = {} ka = {}
ex = "daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid hardlink ih ihead magic never_symlink nid nih no_acode no_athumb no_dav no_dedup no_del no_dupe no_lifetime no_logues no_mv no_pipe no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head q rand smb srch_dbg stats vague_403 vc ver xdev xlink xvol" ex = "daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid hardlink ih ihead magic never_symlink nid nih no_acode no_athumb no_dav no_dedup no_del no_dupe no_lifetime no_logues no_mv no_pipe no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head q rand smb srch_dbg stats uqe vague_403 vc ver xdev xlink xvol"
ka.update(**{k: False for k in ex.split()}) ka.update(**{k: False for k in ex.split()})
ex = "dotpart dotsrch no_dhash no_fastboot no_rescan no_sendfile no_snap no_voldump re_dhash plain_ip" ex = "dotpart dotsrch no_dhash no_fastboot no_rescan no_sendfile no_snap no_voldump re_dhash plain_ip"