thumbnail pregen; closes #1381

This commit is contained in:
ed 2026-03-23 01:57:32 +00:00
parent 5f3b76c8f8
commit 7d6b037dcc
10 changed files with 209 additions and 6 deletions

View file

@ -771,6 +771,7 @@ to show `/icons/exe.png` and `/icons/elf.gif` as the thumbnail for all `.exe` an
note:
* heif/heifs/heic/heics images usually require the `libvips` [optional dependency](#optional-dependencies) but this is not possible with the docker-images due to [legal reasons](docs/bad-codecs.md)
* if you do not want thumbnails to be generated on-the-fly, and instead wish to generate all of them on server startup, then see [thumbnail pregen](#thumbnail-pregen)
config file example:
@ -829,6 +830,7 @@ cool trick: download a folder by appending url-params `?tar&opus` or `?tar&mp3`
* and url-param `&nodot` skips dotfiles/dotfolders; they are included by default if your account has permission to see them
* and url-params `&j` / `&w` produce jpeg/webm thumbnails/spectrograms instead of the original audio/video/images (`&p` for audio waveforms)
* can also be used to pregenerate thumbnails; combine with `--th-maxage=9999999` or `--th-clean=0`
* but now there is also a real [thumbnail pregen](#thumbnail-pregen) so just use that
## uploading
@ -1906,6 +1908,30 @@ this is **cosmetic only!** the files are still easily accessible in many ways, f
> also see the [--unlist](https://copyparty.eu/cli/#g-unlist) option which is somewhat similar -- `unlist` applies to the whole volume instead of just one folder; however, while dothidden also affects sftp and ftp, the `unlist` option is http/https-only
## thumbnail pregen
if you want to pre-generate everything on startup (usually a bad idea);
by default, thumbnails are created on-the-fly when a client needs it, and then cached on the server for [--th-maxage](https://copyparty.eu/cli/#g-th-maxage) seconds (default is one week), so most thumbnails only need to be created once, and are then eventually deleted from the cache to preserver disk space
but if you need every thumbnail instantly available when a folder is viewed, then first increase the thumbnail expiration time to something really big, and then set global-option `th-pregen` and volflag `th_pregen` to a comma-separated list of thumbnail formats to automatically generate on server startup;
the full list of all possible formats is: `j,jf,jf3,j3,w,wf,wf3,w3,x,xf,xf3,x3,opus,mp3,flac,wav` and I'll explain what those mean soon
* `j` = jpeg cropped, `jf` = jpeg uncropped, `jf3` = jpeg uncropped triplesize, `j3` jpeg cropped triplesize
* `w` = webm cropped, `wf` = webm uncropped, ..., `x` = jxl cropped, `xf` = jxl uncropped, ...
* and yes, audio-transcodes are technically thumbnails according to copyparty -- don't think too much about it ( ゚ ヮ゚)
* unlike thumbnails, the expiry time for audio-transcodes is configured with [--ac-maxage](https://copyparty.eu/cli/#g-ac-maxage)
anyways, obviously you **do not** want to pregenerate flac/wav because they're HUGE, and everything else also gets pretty big because it all adds up;
* each regular thumbnail ( j, jf, w, wf, x, xf ) takes about 16 KiB of disk space
* each triplesize thumb ( j3, jf3, w3, wf3, x3, xf3 ) takes about 96 KiB
* each opus / mp3 audiotranscode takes... idk, 6 MiB? depends on song length
so a thousand pictures converted to every possible regular-size image format (`j,jf,w,wf,x,xf`) takes **96 MiB,** and every possible 3x-size (`jf3,j3,wf3,w3,xf3,x3`) takes **562 MiB,** alternatively **658 MiB** in total for all, so that's why the default is to *not* pregenerate on startup, but instead do on-demand with a cache
## database location
in-volume (`.hist/up2k.db`, default) or somewhere else

View file

@ -1660,7 +1660,7 @@ def add_shutdown(ap):
ap2 = ap.add_argument_group("shutdown options")
ap2.add_argument("--ign-ebind", action="store_true", help="continue running even if it's impossible to listen on some of the requested endpoints")
ap2.add_argument("--ign-ebind-all", action="store_true", help="continue running even if it's impossible to receive connections at all")
ap2.add_argument("--exit", metavar="WHEN", type=u, default="", help="shutdown after \033[33mWHEN\033[0m has finished; [\033[32mcfg\033[0m] config parsing, [\033[32midx\033[0m] volscan + multimedia indexing")
ap2.add_argument("--exit", metavar="WHEN", type=u, default="", help="shutdown after \033[33mWHEN\033[0m has finished; [\033[32mcfg\033[0m] config parsing, [\033[32midx\033[0m] volscan + multimedia indexing, [\033[32mthgen\033[0m] thumbnail-pregen")
def add_logging(ap):
@ -1726,6 +1726,8 @@ def add_thumbnail(ap):
ap2.add_argument("--th-poke", metavar="SEC", type=int, default=300, help="activity labeling cooldown -- avoids doing keepalive pokes (updating the mtime) on thumbnail folders more often than \033[33mSEC\033[0m seconds")
ap2.add_argument("--th-clean", metavar="SEC", type=int, default=43200, help="cleanup interval; 0=disabled")
ap2.add_argument("--th-maxage", metavar="SEC", type=int, default=604800, help="max folder age -- folders which haven't been poked for longer than \033[33m--th-poke\033[0m seconds will get deleted every \033[33m--th-clean\033[0m seconds")
ap2.add_argument("--th-pregen", metavar="F,F", type=u, default="", help="pregenerate thumbnails on startup; \033[33mF,F\033[0m is comma-separated list of formats; example: [\033[32mj,jf,w,w3,wf,wf3,x,xf\033[0m] NOTE: remember to set \033[33m--th-maxage 123456789\033[0m (volflag=th_pregen)")
ap2.add_argument("--th-pre-rl", metavar="SEC", type=int, default=30, help="while pregen is running, ratelimit the thumbnailer logger to one message every \033[33mSEC\033[0m seconds (only works with \033[33m-j1\033[0m); set 0 to disable ratelimit")
ap2.add_argument("--th-covers", metavar="N,N", type=u, default="folder.png,folder.jpg,cover.png,cover.jpg", help="folder thumbnails to stat/look for; enabling \033[33m-e2d\033[0m will make these case-insensitive, and try them as dotfiles (.folder.jpg), and also automatically select thumbnails for all folders that contain pics, even if none match this pattern")
ap2.add_argument("--th-spec-p", metavar="N", type=u, default=1, help="for music, do spectrograms or embedded coverart? [\033[32m0\033[0m]=only-art, [\033[32m1\033[0m]=prefer-art, [\033[32m2\033[0m]=only-spec")
# https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html

View file

@ -160,6 +160,13 @@ class BrokerMp(object):
else:
raise Exception("what is " + str(dest))
def say1(self, dest: str, *args: Any) -> None:
"""
send message to one lucky recipient
"""
p = self.procs[0]
p.q_pend.put((0, dest, list(args)))
def periodic(self) -> None:
while True:
time.sleep(1)

View file

@ -150,6 +150,7 @@ def vf_vmap() -> dict[str, str]:
"tail_tmax",
"tail_who",
"tcolor",
"th_pregen",
"th_qv",
"th_qvx",
"th_spec_p",
@ -317,6 +318,7 @@ flagcats = {
"aconvt": "convert-to-audio timeout in seconds",
"th_spec_p=1": "make spectrograms? 0=never 1=fallback 2=always",
"ext_th=s=/b.png": "use /b.png as thumbnail for file-extension s",
"th_pregen=w,wf": "pregenerate thumbs for these formats",
},
"handlers\n(better explained in --help-handlers)": {
"on404=PY": "handle 404s by executing PY file",

View file

@ -55,10 +55,12 @@ except SyntaxError:
)
sys.exit(1)
from .authsrv import LEELOO_DALLAS
from .httpconn import HttpConn
from .ico import Ico
from .metrics import Metrics
from .mtag import HAVE_FFMPEG
from .sutil import gfilter2
from .th_cli import ThumbCli
from .th_srv import HAVE_PIL, HAVE_VIPS
from .u2idx import U2idx
@ -657,3 +659,73 @@ class HttpSrv(object):
self.tdli = dli
self.tdls = dls
def pregen_thumbs(self) -> None:
Daemon(self._pregen_thumbs, "th_pregen")
def _pregen_thumbs(self) -> None:
def log(msg, n):
self.log("thumb-pregen", msg, n)
if getattr(self, "_pregen", False):
log("already running", 1)
return
self._pregen = True
for n in range(9999999):
x = self.broker.ask("up2k.is_busy")
zb, zi = x.get()
if zi:
break
if not n:
log("waiting for up2k to finish initializing", 6)
time.sleep(1 if n < 10 else 5 if n < 300 else 15)
if not self.thumbcli:
log("no thumbcli", 1)
return
if self.args.th_pre_rl:
try:
self.broker.hub.thumbsrv.log = self.broker.hub.thumbsrv._slog
except:
pass
nfiles = 0
t0 = time.time()
scandir = not self.args.no_scandir
for vn in self.asrv.vfs.all_nodes.values():
fmts = vn.flags.get("th_pregen", "")
if not fmts:
continue
log("starting for volume /%s" % (vn.vpath,), 6)
g = vn.walk("x", "/", [], LEELOO_DALLAS, [True], 2, scandir, False, False)
g = gfilter2(g, self, vn.vpath, fmts.split(","))
for f in g:
nfiles += 1
if not nfiles % 256:
now = time.time()
for n in range(9999999):
x = self.broker.ask("up2k.is_busy")
zb, zi = x.get()
if not zb:
if n:
t0 += time.time() - now
break
if not n:
log("waiting for up2k to finish indexing", 6)
time.sleep(5)
if self.args.th_pre_rl:
try:
self.broker.hub.thumbsrv.log = self.broker.hub.thumbsrv._log
except:
pass
t = "finished; %d files in %d seconds"
log(t % (nfiles, time.time() - t0), 6)
self._pregen = False
if self.args.exit == "thgen":
self.broker.say("sigterm")

View file

@ -6,11 +6,11 @@ import tempfile
from datetime import datetime
from .__init__ import CORES
from .authsrv import VFS, AuthSrv
from .authsrv import LEELOO_DALLAS, VFS, AuthSrv
from .bos import bos
from .th_cli import ThumbCli
from .th_srv import TH_CH
from .util import UTC, vjoin, vol_san
from .util import UTC, sigblock, vjoin, vol_san
if True: # pylint: disable=using-constant-test
from typing import Any, Generator, Optional
@ -42,6 +42,17 @@ class StreamArc(object):
self.stopped = True
_pools = {}
def close_pools() -> None:
for p in list(_pools):
try:
p.shutdown(wait=False, cancel_futures=True)
except:
pass
def gfilter(
fgen: Generator[dict[str, Any], None, None],
thumbcli: ThumbCli,
@ -52,7 +63,8 @@ def gfilter(
from concurrent.futures import ThreadPoolExecutor
pend = []
with ThreadPoolExecutor(max_workers=CORES) as tp:
with ThreadPoolExecutor(max_workers=CORES, initializer=sigblock) as tp:
_pools[tp] = 1
try:
for f in fgen:
task = tp.submit(enthumb, thumbcli, uname, vtop, f, fmt)
@ -79,6 +91,61 @@ def gfilter(
except:
pass
thumbcli.log("gfilter flushed")
_pools.pop(tp, None)
def gfilter2(
fgen: Generator[
tuple[
"VFS",
str,
str,
str,
list[tuple[str, os.stat_result]],
list[tuple[str, os.stat_result]],
dict[str, "VFS"],
],
None,
None,
],
hsrv: "HttpSrv",
vtop: str,
fmts: list[str],
) -> Generator[dict[str, Any], None, None]:
from concurrent.futures import ThreadPoolExecutor
pend = []
with ThreadPoolExecutor(max_workers=CORES, initializer=sigblock) as tp:
_pools[tp] = 1
for _, _, vpath, apath, files, rd, vd in fgen:
if "/.hist/" in vpath:
continue
fnames = [n[0] for n in files]
vpaths = [vpath + "/" + n for n in fnames] if vpath else fnames
for vp, fi in zip(vpaths, files):
for fmt in fmts:
try:
f = {"vp": vp, "st": fi[1]}
task = tp.submit(
enthumb, hsrv.thumbcli, LEELOO_DALLAS, vtop, f, fmt
)
pend.append((task, f))
if pend[0][0].done() or len(pend) > CORES * 4:
task, f = pend.pop(0)
try:
f = task.result(600)
except:
pass
yield f
except:
pass
for task, f in pend:
try:
f = task.result(600)
except:
pass
yield f
_pools.pop(tp, None)
def enthumb(

View file

@ -35,6 +35,7 @@ from .cert import ensure_cert
from .fsutil import ramdisk_chk
from .mtag import HAVE_FFMPEG, HAVE_FFPROBE, HAVE_MUTAGEN
from .pwhash import HAVE_ARGON2
from .sutil import close_pools as sutil_close_pools
from .tcpsrv import TcpSrv
from .th_srv import (
H_PIL_AVIF,
@ -489,6 +490,8 @@ class SvcHub(object):
for nm in args.ipr_u.values():
nm.mutex = threading.Lock()
self._reload_thumbsrv()
def _db_onfail_ses(self) -> None:
self.args.no_ses = True
@ -1478,8 +1481,17 @@ class SvcHub(object):
self.log("root", "reload done")
t += "\n\nchanges to global options (if any) require a restart of copyparty to take effect"
self.broker.reload()
self._reload_thumbsrv()
return t
def _reload_thumbsrv(self) -> None:
if not self.thumbsrv:
return
vols = list(self.asrv.vfs.all_nodes.values())
if next((x for x in vols if x.flags.get("th_pregen", "")), None):
fun = getattr(self.broker, "say1", self.broker.say)
fun("httpsrv.pregen_thumbs")
def _reload_sessions(self) -> None:
with self.asrv.mutex:
self.asrv.load_sessions(True)
@ -1564,6 +1576,7 @@ class SvcHub(object):
if self.thumbsrv:
self.thumbsrv.shutdown()
sutil_close_pools()
for n in range(200): # 10s
time.sleep(0.05)

View file

@ -256,6 +256,9 @@ class ThumbSrv(object):
self.args = hub.args
self.log_func = hub.log
self.log = self._log
self.nextlog = 0
self.poke_cd = Cooldown(self.args.th_poke)
self.mutex = threading.Lock()
@ -345,7 +348,14 @@ class ThumbSrv(object):
for zss in [self.fmt_ffi, self.fmt_ffv, self.fmt_ffa]:
self.thumbable |= zss
def log(self, msg: str, c: Union[int, str] = 0) -> None:
def _log(self, msg: str, c: Union[int, str] = 0) -> None:
self.log_func("thumb", msg, c)
def _slog(self, msg: str, c: Union[int, str] = 0) -> None:
now = time.time()
if c in (0, 6) and now < self.nextlog:
return
self.nextlog = now + self.args.th_pre_rl
self.log_func("thumb", msg, c)
def shutdown(self) -> None:

View file

@ -337,6 +337,10 @@ class Up2k(object):
if not self.stop:
self.log("uploads are now possible", 2)
def is_busy(self) -> bool:
# returns ( currently-busy , have-finished-at-least-once )
return bool(self.pp), self.gt1
def get_state(self, get_q: bool, uname: str) -> str:
mtpq: Union[int, str] = 0
ups = []

View file

@ -164,7 +164,7 @@ class Cfg(Namespace):
ex = "ctl_re db_act forget_ip idp_cookie idp_store k304 loris no304 nosubtle qr_pin qr_wait re_maxage rproxy rsp_jtr rsp_slp s_wr_slp snap_wri theme themes turbo u2ow zipmaxn zipmaxs"
ka.update(**{k: 0 for k in ex.split()})
ex = "ah_alg bname chdir chmod_f chpw_db db_xattr doctitle df epilogues exit favico fika ipa ipar html_head html_head_d html_head_s idp_login idp_logout lg_sba lg_sbf log_date log_fk md_sba md_sbf name og_desc og_site og_th og_title og_title_a og_title_v og_title_i opds_exts preadmes prologues readmes shr shr1 shr_site site smsg tcolor textfiles txt_eol ufavico ufavico_h unlist up_site vc_url vname xff_src zipmaxt R RS SR"
ex = "ah_alg bname chdir chmod_f chpw_db db_xattr doctitle df epilogues exit favico fika ipa ipar html_head html_head_d html_head_s idp_login idp_logout lg_sba lg_sbf log_date log_fk md_sba md_sbf name og_desc og_site og_th og_title og_title_a og_title_v og_title_i opds_exts preadmes prologues readmes shr shr1 shr_site site smsg tcolor textfiles th_pregen txt_eol ufavico ufavico_h unlist up_site vc_url vname xff_src zipmaxt R RS SR"
ka.update(**{k: "" for k in ex.split()})
ex = "apnd_who ban_403 ban_404 ban_422 ban_pw ban_pwc ban_url dont_ban cachectl http_vary rcm rss_fmt_d rss_fmt_t spinner"