google isn't taking the hint

specifically google, but also some others, have started ignoring
rel="nofollow" while also understanding just enough javascript to
try viewing binary files as text
This commit is contained in:
ed 2025-03-23 21:21:41 +00:00
parent de2c978842
commit 99f63adf58
5 changed files with 16 additions and 2 deletions

View file

@ -40,6 +40,7 @@ from .cfg import flagcats, onedash
from .svchub import SvcHub from .svchub import SvcHub
from .util import ( from .util import (
APPLESAN_TXT, APPLESAN_TXT,
BAD_BOTS,
DEF_EXP, DEF_EXP,
DEF_MTE, DEF_MTE,
DEF_MTH, DEF_MTH,
@ -1244,6 +1245,7 @@ def add_optouts(ap):
ap2.add_argument("--zipmaxt", metavar="TXT", type=u, default="", help="custom errormessage when download size exceeds max (volflag=zipmaxt)") ap2.add_argument("--zipmaxt", metavar="TXT", type=u, default="", help="custom errormessage when download size exceeds max (volflag=zipmaxt)")
ap2.add_argument("--zipmaxu", action="store_true", help="authenticated users bypass the zip size limit (volflag=zipmaxu)") ap2.add_argument("--zipmaxu", action="store_true", help="authenticated users bypass the zip size limit (volflag=zipmaxu)")
ap2.add_argument("--zip-who", metavar="LVL", type=int, default=3, help="who can download as zip/tar? [\033[32m0\033[0m]=nobody, [\033[32m1\033[0m]=admins, [\033[32m2\033[0m]=authenticated-with-read-access, [\033[32m3\033[0m]=everyone-with-read-access (volflag=zip_who)\n\033[1;31mWARNING:\033[0m if a nested volume has a more restrictive value than a parent volume, then this will be \033[33mignored\033[0m if the download is initiated from the parent, more lenient volume") ap2.add_argument("--zip-who", metavar="LVL", type=int, default=3, help="who can download as zip/tar? [\033[32m0\033[0m]=nobody, [\033[32m1\033[0m]=admins, [\033[32m2\033[0m]=authenticated-with-read-access, [\033[32m3\033[0m]=everyone-with-read-access (volflag=zip_who)\n\033[1;31mWARNING:\033[0m if a nested volume has a more restrictive value than a parent volume, then this will be \033[33mignored\033[0m if the download is initiated from the parent, more lenient volume")
ap2.add_argument("--ua-nozip", metavar="PTN", type=u, default=BAD_BOTS, help="regex of user-agents to reject from download-as-zip/tar; disable with [\033[32mno\033[0m] or blank")
ap2.add_argument("--no-zip", action="store_true", help="disable download as zip/tar; same as \033[33m--zip-who=0\033[0m") ap2.add_argument("--no-zip", action="store_true", help="disable download as zip/tar; same as \033[33m--zip-who=0\033[0m")
ap2.add_argument("--no-tarcmp", action="store_true", help="disable download as compressed tar (?tar=gz, ?tar=bz2, ?tar=xz, ?tar=gz:9, ...)") ap2.add_argument("--no-tarcmp", action="store_true", help="disable download as compressed tar (?tar=gz, ?tar=bz2, ?tar=xz, ?tar=gz:9, ...)")
ap2.add_argument("--no-lifetime", action="store_true", help="do not allow clients (or server config) to schedule an upload to be deleted after a given time") ap2.add_argument("--no-lifetime", action="store_true", help="do not allow clients (or server config) to schedule an upload to be deleted after a given time")
@ -1434,6 +1436,7 @@ def add_txt(ap):
ap2.add_argument("--exp", action="store_true", help="enable textfile expansion -- replace {{self.ip}} and such; see \033[33m--help-exp\033[0m (volflag=exp)") ap2.add_argument("--exp", action="store_true", help="enable textfile expansion -- replace {{self.ip}} and such; see \033[33m--help-exp\033[0m (volflag=exp)")
ap2.add_argument("--exp-md", metavar="V,V,V", type=u, default=DEF_EXP, help="comma/space-separated list of placeholders to expand in markdown files; add/remove stuff on the default list with +hdr_foo or /vf.scan (volflag=exp_md)") ap2.add_argument("--exp-md", metavar="V,V,V", type=u, default=DEF_EXP, help="comma/space-separated list of placeholders to expand in markdown files; add/remove stuff on the default list with +hdr_foo or /vf.scan (volflag=exp_md)")
ap2.add_argument("--exp-lg", metavar="V,V,V", type=u, default=DEF_EXP, help="comma/space-separated list of placeholders to expand in prologue/epilogue files (volflag=exp_lg)") ap2.add_argument("--exp-lg", metavar="V,V,V", type=u, default=DEF_EXP, help="comma/space-separated list of placeholders to expand in prologue/epilogue files (volflag=exp_lg)")
ap2.add_argument("--ua-nodoc", metavar="PTN", type=u, default=BAD_BOTS, help="regex of user-agents to reject from viewing documents through ?doc=[...]; disable with [\033[32mno\033[0m] or blank")
def add_og(ap): def add_og(ap):

View file

@ -3807,6 +3807,9 @@ class HttpCli(object):
return "download-as-zip/tar is admin-only on this server" return "download-as-zip/tar is admin-only on this server"
elif lvl <= 2 and self.uname in ("", "*"): elif lvl <= 2 and self.uname in ("", "*"):
return "you must be authenticated to download-as-zip/tar on this server" return "you must be authenticated to download-as-zip/tar on this server"
elif self.args.ua_nozip and self.args.ua_nozip.search(self.ua):
t = "this URL contains no valuable information for bots/crawlers"
raise Pebkac(403, t)
return "" return ""
def tx_res(self, req_path: str) -> bool: def tx_res(self, req_path: str) -> bool:
@ -6291,6 +6294,10 @@ class HttpCli(object):
doc = self.uparam.get("doc") if self.can_read else None doc = self.uparam.get("doc") if self.can_read else None
if doc: if doc:
zp = self.args.ua_nodoc
if zp and zp.search(self.ua):
t = "this URL contains no valuable information for bots/crawlers"
raise Pebkac(403, t)
j2a["docname"] = doc j2a["docname"] = doc
doctxt = None doctxt = None
dfn = lnames.get(doc.lower()) dfn = lnames.get(doc.lower())

View file

@ -769,7 +769,8 @@ class SvcHub(object):
vs = os.path.expandvars(os.path.expanduser(vs)) vs = os.path.expandvars(os.path.expanduser(vs))
setattr(al, k, vs) setattr(al, k, vs)
for k in "dav_ua1 sus_urls nonsus_urls".split(" "): zs = "dav_ua1 sus_urls nonsus_urls ua_nodoc ua_nozip"
for k in zs.split(" "):
vs = getattr(al, k) vs = getattr(al, k)
if not vs or vs == "no": if not vs or vs == "no":
setattr(al, k, None) setattr(al, k, None)

View file

@ -245,6 +245,9 @@ SYMTIME = PY36 and os.utime in os.supports_follow_symlinks
META_NOBOTS = '<meta name="robots" content="noindex, nofollow">\n' META_NOBOTS = '<meta name="robots" content="noindex, nofollow">\n'
# smart enough to understand javascript while also ignoring rel="nofollow"
BAD_BOTS = r"Barkrowler|bingbot|BLEXBot|Googlebot|GPTBot|PetalBot|SeekportBot|SemrushBot|YandexBot"
FFMPEG_URL = "https://www.gyan.dev/ffmpeg/builds/ffmpeg-git-full.7z" FFMPEG_URL = "https://www.gyan.dev/ffmpeg/builds/ffmpeg-git-full.7z"
URL_PRJ = "https://github.com/9001/copyparty" URL_PRJ = "https://github.com/9001/copyparty"

View file

@ -135,7 +135,7 @@ class Cfg(Namespace):
ex = "dav_inf dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip" ex = "dav_inf dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip"
ka.update(**{k: True for k in ex.split()}) ka.update(**{k: True for k in ex.split()})
ex = "ah_cli ah_gen css_browser hist ipu js_browser js_other mime mimes no_forget no_hash no_idx nonsus_urls og_tpl og_ua" ex = "ah_cli ah_gen css_browser hist ipu js_browser js_other mime mimes no_forget no_hash no_idx nonsus_urls og_tpl og_ua ua_nodoc ua_nozip"
ka.update(**{k: None for k in ex.split()}) ka.update(**{k: None for k in ex.split()})
ex = "hash_mt hsortn safe_dedup srch_time u2abort u2j u2sz" ex = "hash_mt hsortn safe_dedup srch_time u2abort u2j u2sz"