add og / opengraph / discord embeds

2025-11-24 07:23:22 -07:00 · 2024-05-01 23:40:56 +00:00 · 2024-05-01 23:40:56 +00:00 · ea270ab9f2
parent b6cf2d3089
commit ea270ab9f2
9 changed files with 248 additions and 18 deletions
--- a/README.md
+++ b/README.md
@ -60,6 +60,7 @@ turn almost any device into a file server with resumable uploads/downloads using
    * [tftp server](#tftp-server) - a TFTP server (read/write) can be started using `--tftp 3969`
    * [smb server](#smb-server) - unsafe, slow, not recommended for wan
    * [browser ux](#browser-ux) - tweaking the ui
+    * [opengraph](#opengraph) - discord and social-media embeds
    * [file indexing](#file-indexing) - enables dedup and music search ++
        * [exclude-patterns](#exclude-patterns) - to save some time
        * [filesystem guards](#filesystem-guards) - avoid traversing into other filesystems
@ -234,6 +235,7 @@ also see [comparison to similar software](./docs/versus.md)
 * client support
  * ☑ [folder sync](#folder-sync)
  * ☑ [curl-friendly](https://user-images.githubusercontent.com/241032/215322619-ea5fd606-3654-40ad-94ee-2bc058647bb2.png)
+  * ☑ [opengraph](#opengraph) (discord embeds)
 * markdown
  * ☑ [viewer](#markdown-viewer)
  * ☑ editor (sure why not)
@ -1070,6 +1072,21 @@ tweaking the ui
 see [./docs/rice](./docs/rice) for more, including how to add stuff (css/`<meta>`/...) to the html `<head>` tag


+## opengraph
+
+discord and social-media embeds
+
+can be enabled globally with `--og` or per-volume with volflag `og`
+
+note that this disables hotlinking because the opengraph spec demands it; to sneak past this intentional limitation, you can enable opengraph selectively by user-agent, for example `--og-ua '(Discord|Twitter|Slack)bot'` (or volflag `og_ua`)
+
+you can also hotlink files regardless by appending `?raw` to the url
+
+if you want to entirely replace the copyparty response with your own jinja2 template, give the template filepath to `--og-tpl` or volflag `og_tpl` (all members of `HttpCli` are available through the `this` object)
+
+because discord (and maybe others) strip query args such as `?raw`, opengraph is incompatible with filekeys and dirkeys
+
+
 ## file indexing

 enables dedup and music search ++
--- a/copyparty/main.py
+++ b/copyparty/main.py
@ -1257,6 +1257,18 @@ def add_txt(ap):
    ap2.add_argument("--exp-lg", metavar="V,V,V", type=u, default=DEF_EXP, help="comma/space-separated list of placeholders to expand in prologue/epilogue files (volflag=exp_lg)")


+def add_og(ap):
+    ap2 = ap.add_argument_group('og / open graph / discord-embed options')
+    ap2.add_argument("--og", action="store_true", help="disable hotlinking and return an html document instead; this is required by open-graph, but can also be useful on its own (volflag=og)")
+    ap2.add_argument("--og-ua", metavar="RE", type=u, default="", help="only disable hotlinking / engage OG behavior if the useragent matches regex \033[33mRE\033[0m (volflag=og_ua)")
+    ap2.add_argument("--og-tpl", metavar="PATH", type=u, default="", help="do not return the regular copyparty html, but instead load the jinja2 template at \033[33mPATH\033[0m (if path contains 'EXT' then EXT will be replaced with the requested file's extension) (volflag=og_tpl)")
+    ap2.add_argument("--og-no-head", action="store_true", help="do not automatically add OG entries into <head> (useful if you're doing this yourself in a template or such) (volflag=og_no_head)")
+    ap2.add_argument("--og-th", metavar="FMT", type=u, default="jf3", help="thumbnail format; j=jpeg, jf=jpeg-uncropped, jf3=jpeg-uncropped-large, w=webm, ... (volflag=og_th)")
+    ap2.add_argument("--og-title", metavar="TXT", type=u, default="", help="fallback title if there is nothing in the \033[33m-e2t\033[0m database (volflag=og_site)")
+    ap2.add_argument("--og-desc", metavar="TXT", type=u, default="", help="description text; same for all files, disable with [\033[32m-\033[0m] (volflag=og_desc)")
+    ap2.add_argument("--og-site", metavar="TXT", type=u, default="", help="sitename; defaults to \033[33m--name\033[0m, disable with [\033[32m-\033[0m] (volflag=og_site)")
+
+
 def add_ui(ap, retry):
    ap2 = ap.add_argument_group('ui options')
    ap2.add_argument("--grid", action="store_true", help="show grid/thumbnails by default (volflag=grid)")
@ -1357,6 +1369,7 @@ def run_argparse(
    add_hooks(ap)
    add_stats(ap)
    add_txt(ap)
+    add_og(ap)
    add_ui(ap, retry)
    add_admin(ap)
    add_logging(ap)
--- a/copyparty/authsrv.py
+++ b/copyparty/authsrv.py
@ -1727,7 +1727,11 @@ class AuthSrv(object):
            if self.args.e2d or "e2ds" in vol.flags:
                vol.flags["e2d"] = True

-            for ga, vf in [["no_hash", "nohash"], ["no_idx", "noidx"]]:
+            for ga, vf in [
+                ["no_hash", "nohash"],
+                ["no_idx", "noidx"],
+                ["og_ua", "og_ua"],
+            ]:
                if vf in vol.flags:
                    ptn = re.compile(vol.flags.pop(vf))
                else:
--- a/copyparty/cfg.py
+++ b/copyparty/cfg.py
@ -39,6 +39,8 @@ def vf_bmap() -> dict[str, str]:
        "magic",
        "no_sb_md",
        "no_sb_lg",
+        "og",
+        "og_no_head",
        "rand",
        "xdev",
        "xlink",
@ -65,6 +67,12 @@ def vf_vmap() -> dict[str, str]:
        "lg_sbf",
        "md_sbf",
        "nrand",
+        "og_desc",
+        "og_site",
+        "og_th",
+        "og_title",
+        "og_tpl",
+        "og_ua",
        "mv_retry",
        "rm_retry",
        "sort",
--- a/copyparty/httpcli.py
+++ b/copyparty/httpcli.py
@ -84,6 +84,7 @@ from .util import (
    sanitize_vpath,
    sendfile_kern,
    sendfile_py,
+    ujoin,
    undot,
    unescape_cookie,
    unquotep,
@ -4074,6 +4075,27 @@ class HttpCli(object):
        return True

    def tx_browser(self) -> bool:
+        vn = self.vn
+        rem = self.rem
+
+        add_og = "og" in vn.flags
+        if add_og:
+            if ".og-raw/" in rem:
+                # sad workaround: discord strips ?raw=1 so give it a unique url instead
+                self.uparam["raw"] = True
+                self.vpath = self.vpath.replace(".og-raw/", "")
+                vn, rem = self.asrv.vfs.get(self.vpath, self.uname, False, False)
+                self.vn = vn
+                self.rem = rem
+            if "th" in self.uparam or "raw" in self.uparam:
+                og_ua = add_og = False
+            elif self.args.og_ua:
+                og_ua = add_og = self.args.og_ua.search(self.ua)
+            else:
+                og_ua = False
+                add_og = True
+            og_fn = ""
+
        vpath = ""
        vpnodes = [["", "/"]]
        if self.vpath:
@ -4085,8 +4107,6 @@ class HttpCli(object):

                vpnodes.append([quotep(vpath) + "/", html_escape(node, crlf=True)])

-        vn = self.vn
-        rem = self.rem
        abspath = vn.dcanonical(rem)
        dbv, vrem = vn.get_dbv(rem)

@ -4117,7 +4137,6 @@ class HttpCli(object):
        e2d = "e2d" in vn.flags
        e2t = "e2t" in vn.flags

-        self.html_head += vn.flags.get("html_head", "")
        if "b" in self.uparam:
            self.out_headers["X-Robots-Tag"] = "noindex, nofollow"

@ -4125,13 +4144,15 @@ class HttpCli(object):
        is_dk = False
        fk_pass = False
        icur = None
-        if is_dir and (e2t or e2d):
+        if (e2t or e2d) and (is_dir or add_og):
            idx = self.conn.get_u2idx()
            if idx and hasattr(idx, "p_end"):
                icur = idx.get_cur(dbv)

        th_fmt = self.uparam.get("th")
-        if self.can_read or (self.can_get and vn.flags.get("dk")):
+        if self.can_read or (
+            self.can_get and (vn.flags.get("dk") or "fk" not in vn.flags)
+        ):
            if th_fmt is not None:
                nothumb = "dthumb" in dbv.flags
                if is_dir:
@ -4178,7 +4199,7 @@ class HttpCli(object):
        elif self.can_write and th_fmt is not None:
            return self.tx_svg("upload\nonly")

-        elif self.can_get and self.avn:
+        if not self.can_read and self.can_get and self.avn:
            axs = self.avn.axs
            if self.uname not in axs.uhtml:
                pass
@ -4224,6 +4245,17 @@ class HttpCli(object):
                    self.log(t % (correct, got, self.req, abspath), 6)
                    return self.tx_404()

+            if add_og:
+                if og_ua or self.host not in self.headers.get("referer", ""):
+                    self.vpath, og_fn = vsplit(self.vpath)
+                    vpath = self.vpath
+                    vn, rem = self.asrv.vfs.get(self.vpath, self.uname, False, False)
+                    abspath = vn.dcanonical(rem)
+                    dbv, vrem = vn.get_dbv(rem)
+                    is_dir = stat.S_ISDIR(st.st_mode)
+                    is_dk = True
+                    vpnodes.pop()
+
            if (
                (abspath.endswith(".md") or self.can_delete)
                and "nohtml" not in vn.flags
@ -4235,6 +4267,7 @@ class HttpCli(object):
            ):
                return self.tx_md(vn, abspath)

+            if not add_og or not og_fn:
                return self.tx_file(
                    abspath, None if st.st_size or "nopipe" in vn.flags else vn.realpath
                )
@ -4284,7 +4317,11 @@ class HttpCli(object):
        is_ls = "ls" in self.uparam
        is_js = self.args.force_js or self.cookies.get("js") == "y"

-        if not is_ls and (self.ua.startswith("curl/") or self.ua.startswith("fetch")):
+        if (
+            not is_ls
+            and not add_og
+            and (self.ua.startswith("curl/") or self.ua.startswith("fetch"))
+        ):
            self.uparam["ls"] = "v"
            is_ls = True

@ -4409,7 +4446,7 @@ class HttpCli(object):

        for k in ["zip", "tar"]:
            v = self.uparam.get(k)
-            if v is not None:
+            if v is not None and (not add_og or not og_fn):
                return self.tx_zip(k, v, self.vpath, vn, rem, [])

        fsroot, vfs_ls, vfs_virt = vn.ls(
@ -4423,6 +4460,10 @@ class HttpCli(object):
        ls_names = [x[0] for x in vfs_ls]
        ls_names.extend(list(vfs_virt.keys()))

+        if add_og and og_fn and not self.can_read:
+            ls_names = [og_fn]
+            is_js = True
+
        # check for old versions of files,
        # [num-backups, most-recent, hist-path]
        hist: dict[str, tuple[int, float, str]] = {}
@ -4684,6 +4725,121 @@ class HttpCli(object):
        if "mth" in vn.flags:
            j2a["def_hcols"] = list(vn.flags["mth"])

+        if add_og and "raw" not in self.uparam:
+            j2a["this"] = self
+            cgv["og_fn"] = og_fn
+            if og_fn and vn.flags.get("og_tpl"):
+                tpl = vn.flags["og_tpl"]
+                if "EXT" in tpl:
+                    zs = og_fn.split(".")[-1].lower()
+                    tpl2 = tpl.replace("EXT", zs)
+                    if os.path.exists(tpl2):
+                        tpl = tpl2
+                with self.conn.hsrv.mutex:
+                    if tpl not in self.conn.hsrv.j2:
+                        tdir, tname = os.path.split(tpl)
+                        j2env = jinja2.Environment()
+                        j2env.loader = jinja2.FileSystemLoader(tdir)
+                        self.conn.hsrv.j2[tpl] = j2env.get_template(tname)
+            thumb = ""
+            is_pic = is_vid = is_au = False
+            covernames = self.args.th_coversd
+            for fn in ls_names:
+                if fn.lower() in covernames:
+                    thumb = fn
+                    break
+            if og_fn:
+                ext = og_fn.split(".")[-1].lower()
+                if ext in self.thumbcli.thumbable:
+                    is_pic = (
+                        ext in self.thumbcli.fmt_pil
+                        or ext in self.thumbcli.fmt_vips
+                        or ext in self.thumbcli.fmt_ffi
+                    )
+                    is_vid = ext in self.thumbcli.fmt_ffv
+                    is_au = ext in self.thumbcli.fmt_ffa
+                    if not thumb or not is_au:
+                        thumb = og_fn
+                file = next((x for x in files if x["name"] == og_fn), None)
+            else:
+                file = None
+
+            url_base = "%s://%s/%s" % (
+                "https" if self.is_https else "http",
+                self.host,
+                self.args.RS + quotep(vpath),
+            )
+            j2a["og_is_pic"] = is_pic
+            j2a["og_is_vid"] = is_vid
+            j2a["og_is_au"] = is_au
+            if thumb:
+                fmt = vn.flags.get("og_th", "j")
+                zs = ujoin(url_base, quotep(thumb))
+                j2a["og_thumb"] = "%s?th=%s&cache" % (zs, fmt)
+
+            j2a["og_fn"] = og_fn
+            j2a["og_file"] = file
+            if og_fn:
+                og_fn_q = quotep(og_fn)
+                j2a["og_url"] = ujoin(url_base, og_fn_q)
+                j2a["og_raw"] = ujoin(url_base, vjoin(".og-raw", og_fn_q))
+                # discord strips ?raw so it always downloads the html... orz
+            else:
+                j2a["og_url"] = j2a["og_raw"] = url_base
+
+            if not vn.flags.get("og_no_head"):
+                ogh = {"twitter:card": "summary"}
+
+                if thumb:
+                    ogh["og:image"] = j2a["og_thumb"]
+
+                zso = vn.flags.get("og_title")
+                if zso:
+                    ogh["og:title"] = str(zso)
+
+                zso = vn.flags.get("og_desc") or ""
+                if zso != "-":
+                    ogh["og:description"] = str(zso)
+
+                zs = vn.flags.get("og_site") or self.args.name
+                if zs not in ("", "-"):
+                    ogh["og:site_name"] = zs
+
+                tagmap = {}
+                if is_au:
+                    ogh["og:type"] = "music.song"
+                    ogh["og:audio"] = j2a["og_raw"]
+                    tagmap = {
+                        "title": "og:title",
+                        "artist": "og:music:musician",
+                        "album": "og:music:album",
+                        ".dur": "og:music:duration",
+                    }
+                elif is_vid:
+                    ogh["og:type"] = "video.other"
+                    ogh["og:video"] = j2a["og_raw"]
+                    tagmap = {
+                        "title": "og:title",
+                        ".dur": "og:video:duration",
+                    }
+                elif is_pic:
+                    ogh["og:type"] = "video.other"
+                    ogh["og:image"] = j2a["og_raw"]
+
+                for tag, hname in tagmap.items():
+                    try:
+                        v = file["tags"][tag]
+                        if not v:
+                            continue
+                        ogh[hname] = int(v) if tag == ".dur" else v
+                    except:
+                        pass
+
+                zs = '\t<meta property="%s" content="%s">'
+                oghs = [zs % (k, v) for k, v in ogh.items()]
+                zs = self.html_head + "\n%s\n" % ("\n".join(oghs),)
+                self.html_head = zs.replace("\n\n", "\n")
+
        html = self.j2s(tpl, **j2a)
        self.reply(html.encode("utf-8", "replace"))
        return True
--- a/copyparty/svchub.py
+++ b/copyparty/svchub.py
@ -526,7 +526,7 @@ class SvcHub(object):
        al.exp_md = odfusion(exp, al.exp_md.replace(" ", ","))
        al.exp_lg = odfusion(exp, al.exp_lg.replace(" ", ","))

-        for k in ["no_hash", "no_idx"]:
+        for k in ["no_hash", "no_idx", "og_ua"]:
            ptn = getattr(self.args, k)
            if ptn:
                setattr(self.args, k, re.compile(ptn))
--- a/copyparty/util.py
+++ b/copyparty/util.py
@ -2031,6 +2031,7 @@ def vsplit(vpath: str) -> tuple[str, str]:
    return vpath.rsplit("/", 1)  # type: ignore


+# vpath-join
 def vjoin(rd: str, fn: str) -> str:
    if rd and fn:
        return rd + "/" + fn
@ -2038,6 +2039,14 @@ def vjoin(rd: str, fn: str) -> str:
        return rd or fn


+# url-join
+def ujoin(rd: str, fn: str) -> str:
+    if rd and fn:
+        return rd.rstrip("/") + "/" + fn.lstrip("/")
+    else:
+        return rd or fn
+
+
 def _w8dec2(txt: bytes) -> str:
    """decodes filesystem-bytes to wtf8"""
    return surrogateescape.decodefilename(txt)
--- a/copyparty/web/browser.js
+++ b/copyparty/web/browser.js
@ -1,6 +1,8 @@
 "use strict";

-var XHR = XMLHttpRequest;
+var XHR = XMLHttpRequest,
+	img_re = /\.(a?png|avif|bmp|gif|heif|jpe?g|jfif|svg|webp|webm|mkv|mp4)(\?|$)/i;
+
 var Ls = {
 	"eng": {
 		"tt": "English",
@ -1419,6 +1421,12 @@ var ACtx = !IPHONE && (window.AudioContext || window.webkitAudioContext),
 	dk, mp;


+if (window.og_fn) {
+	hash0 = 1;
+	hist_replace(vsplit(get_evpath())[0]);
+}
+
+
 var mpl = (function () {
 	var have_mctl = 'mediaSession' in navigator && window.MediaMetadata;

@ -3289,6 +3297,21 @@ function scan_hash(v) {
 function eval_hash() {
 	window.onpopstate = treectl.onpopfun;

+	if (hash0 && window.og_fn) {
+		var all = msel.getall(), mi;
+		for (var a = 0; a < all.length; a++)
+			if (og_fn == uricom_dec(vsplit(all[a].vp)[1].split('?')[0])) {
+				mi = all[a];
+				break;
+			}
+
+		if (mi && img_re.exec(og_fn))
+			hash0 = '#g' + mi.id;
+
+		if (ebi('a' + mi.id))
+			hash0 = '#a' + mi.id;
+	}
+
 	var v = hash0;
 	hash0 = null;
 	if (!v)
@ -4756,7 +4779,7 @@ var thegrid = (function () {
 			aplay = ebi('a' + fid),
 			atext = ebi('t' + fid),
 			is_txt = atext && showfile.getlang(href),
-			is_img = /\.(a?png|avif|bmp|gif|heif|jpe?g|jfif|svg|webp|webm|mkv|mp4)(\?|$)/i.test(href),
+			is_img = img_re.test(href),
 			is_dir = href.endsWith('/'),
 			is_srch = !!ebi('unsearch'),
 			in_tree = is_dir && treectl.find(oth.textContent.slice(0, -1)),
--- a/tests/util.py
+++ b/tests/util.py
@ -110,13 +110,13 @@ class Cfg(Namespace):
    def __init__(self, a=None, v=None, c=None, **ka0):
        ka = {}

-        ex = "daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid hardlink ih ihead magic never_symlink nid nih no_acode no_athumb no_dav no_dedup no_del no_dupe no_lifetime no_logues no_mv no_pipe no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw q rand smb srch_dbg stats vague_403 vc ver xdev xlink xvol"
+        ex = "daw dav_auth dav_inf dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid hardlink ih ihead magic never_symlink nid nih no_acode no_athumb no_dav no_dedup no_del no_dupe no_lifetime no_logues no_mv no_pipe no_readme no_robots no_sb_md no_sb_lg no_scandir no_tarcmp no_thumb no_vthumb no_zip nrand nw og og_no_head q rand smb srch_dbg stats vague_403 vc ver xdev xlink xvol"
        ka.update(**{k: False for k in ex.split()})

        ex = "dotpart dotsrch no_dhash no_fastboot no_rescan no_sendfile no_snap no_voldump re_dhash plain_ip"
        ka.update(**{k: True for k in ex.split()})

-        ex = "ah_cli ah_gen css_browser hist js_browser no_forget no_hash no_idx nonsus_urls"
+        ex = "ah_cli ah_gen css_browser hist js_browser no_forget no_hash no_idx nonsus_urls og_tpl og_ua"
        ka.update(**{k: None for k in ex.split()})

        ex = "hash_mt srch_time u2abort u2j"
@ -128,7 +128,7 @@ class Cfg(Namespace):
        ex = "db_act df k304 loris re_maxage rproxy rsp_jtr rsp_slp s_wr_slp snap_wri theme themes turbo"
        ka.update(**{k: 0 for k in ex.split()})

-        ex = "ah_alg bname doctitle exit favico idp_h_usr html_head lg_sbf log_fk md_sbf name textfiles unlist vname R RS SR"
+        ex = "ah_alg bname doctitle exit favico idp_h_usr html_head lg_sbf log_fk md_sbf name og_desc og_site og_th og_title textfiles unlist vname R RS SR"
        ka.update(**{k: "" for k in ex.split()})

        ex = "grp on403 on404 xad xar xau xban xbd xbr xbu xiu xm"