mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
EPUB Thumbnailing support
This commit is contained in:
parent
db2a03409c
commit
1c37c13c44
|
@ -1443,13 +1443,13 @@ def add_thumbnail(ap):
|
|||
# https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html
|
||||
# https://github.com/libvips/libvips
|
||||
# ffmpeg -hide_banner -demuxers | awk '/^ D /{print$2}' | while IFS= read -r x; do ffmpeg -hide_banner -h demuxer=$x; done | grep -E '^Demuxer |extensions:'
|
||||
ap2.add_argument("--th-r-pil", metavar="T,T", type=u, default="avif,avifs,blp,bmp,cbz,dcx,dds,dib,emf,eps,fits,flc,fli,fpx,gif,heic,heics,heif,heifs,icns,ico,im,j2p,j2k,jp2,jpeg,jpg,jpx,pbm,pcx,pgm,png,pnm,ppm,psd,qoi,sgi,spi,tga,tif,tiff,webp,wmf,xbm,xpm", help="image formats to decode using pillow")
|
||||
ap2.add_argument("--th-r-pil", metavar="T,T", type=u, default="avif,avifs,blp,bmp,cbz,dcx,dds,dib,emf,eps,epub,fits,flc,fli,fpx,gif,heic,heics,heif,heifs,icns,ico,im,j2p,j2k,jp2,jpeg,jpg,jpx,pbm,pcx,pgm,png,pnm,ppm,psd,qoi,sgi,spi,tga,tif,tiff,webp,wmf,xbm,xpm", help="image formats to decode using pillow")
|
||||
ap2.add_argument("--th-r-vips", metavar="T,T", type=u, default="avif,exr,fit,fits,fts,gif,hdr,heic,jp2,jpeg,jpg,jpx,jxl,nii,pfm,pgm,png,ppm,svg,tif,tiff,webp", help="image formats to decode using pyvips")
|
||||
ap2.add_argument("--th-r-ffi", metavar="T,T", type=u, default="apng,avif,avifs,bmp,cbz,dds,dib,fit,fits,fts,gif,hdr,heic,heics,heif,heifs,icns,ico,jp2,jpeg,jpg,jpx,jxl,pbm,pcx,pfm,pgm,png,pnm,ppm,psd,qoi,sgi,tga,tif,tiff,webp,xbm,xpm", help="image formats to decode using ffmpeg")
|
||||
ap2.add_argument("--th-r-ffi", metavar="T,T", type=u, default="apng,avif,avifs,bmp,cbz,dds,dib,epub,fit,fits,fts,gif,hdr,heic,heics,heif,heifs,icns,ico,jp2,jpeg,jpg,jpx,jxl,pbm,pcx,pfm,pgm,png,pnm,ppm,psd,qoi,sgi,tga,tif,tiff,webp,xbm,xpm", help="image formats to decode using ffmpeg")
|
||||
ap2.add_argument("--th-r-ffv", metavar="T,T", type=u, default="3gp,asf,av1,avc,avi,flv,h264,h265,hevc,m4v,mjpeg,mjpg,mkv,mov,mp4,mpeg,mpeg2,mpegts,mpg,mpg2,mts,nut,ogm,ogv,rm,ts,vob,webm,wmv", help="video formats to decode using ffmpeg")
|
||||
ap2.add_argument("--th-r-ffa", metavar="T,T", type=u, default="aac,ac3,aif,aiff,alac,alaw,amr,apac,ape,au,bonk,dfpwm,dts,flac,gsm,ilbc,it,itgz,itxz,itz,m4a,mdgz,mdxz,mdz,mo3,mod,mp2,mp3,mpc,mptm,mt2,mulaw,oga,ogg,okt,opus,ra,s3m,s3gz,s3xz,s3z,tak,tta,ulaw,wav,wma,wv,xm,xmgz,xmxz,xmz,xpk", help="audio formats to decode using ffmpeg")
|
||||
ap2.add_argument("--th-spec-cnv", metavar="T", type=u, default="it,itgz,itxz,itz,mdgz,mdxz,mdz,mo3,mod,s3m,s3gz,s3xz,s3z,xm,xmgz,xmxz,xmz,xpk", help="audio formats which provoke https://trac.ffmpeg.org/ticket/10797 (huge ram usage for s3xmodit spectrograms)")
|
||||
ap2.add_argument("--au-unpk", metavar="E=F.C", type=u, default="mdz=mod.zip, mdgz=mod.gz, mdxz=mod.xz, s3z=s3m.zip, s3gz=s3m.gz, s3xz=s3m.xz, xmz=xm.zip, xmgz=xm.gz, xmxz=xm.xz, itz=it.zip, itgz=it.gz, itxz=it.xz, cbz=jpg.cbz", help="audio/image formats to decompress before passing to ffmpeg")
|
||||
ap2.add_argument("--au-unpk", metavar="E=F.C", type=u, default="mdz=mod.zip, mdgz=mod.gz, mdxz=mod.xz, s3z=s3m.zip, s3gz=s3m.gz, s3xz=s3m.xz, xmz=xm.zip, xmgz=xm.gz, xmxz=xm.xz, itz=it.zip, itgz=it.gz, itxz=it.xz, cbz=jpg.cbz, epub=jpg.epub", help="audio/image formats to decompress before passing to ffmpeg")
|
||||
|
||||
|
||||
def add_transcoding(ap):
|
||||
|
|
|
@ -29,7 +29,7 @@ from .util import (
|
|||
)
|
||||
|
||||
if True: # pylint: disable=using-constant-test
|
||||
from typing import Any, Optional, Union
|
||||
from typing import Any, Optional, Union, IO
|
||||
|
||||
from .util import NamedLogger, RootLogger
|
||||
|
||||
|
@ -60,9 +60,23 @@ def have_ff(scmd: str) -> bool:
|
|||
else:
|
||||
return bool(shutil.which(scmd))
|
||||
|
||||
def expat_is_secure():
|
||||
"""
|
||||
From the python xml docs:
|
||||
|
||||
An attacker can abuse XML features to carry out denial of service attacks, access local files, generate network connections to other machines, or circumvent firewalls.
|
||||
Expat versions lower that 2.6.0 may be vulnerable to “billion laughs”, “quadratic blowup” and “large tokens”. Python may be vulnerable if it uses such older versions of Expat as a system-provided library. Check pyexpat.EXPAT_VERSION.
|
||||
"""
|
||||
import pyexpat
|
||||
# expat_2.7.1
|
||||
if len(pyexpat.EXPAT_VERSION) < 11:
|
||||
return False
|
||||
major, minor, patch = (int(x) for x in pyexpat.EXPAT_VERSION[6:].split("."))
|
||||
return major > 2 or major == 2 and minor >= 6
|
||||
|
||||
HAVE_FFMPEG = not os.environ.get("PRTY_NO_FFMPEG") and have_ff("ffmpeg")
|
||||
HAVE_FFPROBE = not os.environ.get("PRTY_NO_FFPROBE") and have_ff("ffprobe")
|
||||
HAVE_SECURE_EXPAT = os.environ.get("PRTY_ALLOW_INSECURE_EXPAT") or expat_is_secure()
|
||||
|
||||
CBZ_PICS = set("png jpg jpeg gif bmp tga tif tiff webp avif".split())
|
||||
CBZ_01 = re.compile(r"(^|[^0-9v])0+[01]\b")
|
||||
|
@ -176,6 +190,10 @@ def au_unpk(
|
|||
raise Exception("no images inside cbz")
|
||||
fi = zf.open(using)
|
||||
|
||||
elif pk == "epub":
|
||||
if HAVE_SECURE_EXPAT:
|
||||
fi = get_cover_from_epub(log, abspath)
|
||||
|
||||
else:
|
||||
raise Exception("unknown compression %s" % (pk,))
|
||||
|
||||
|
@ -365,6 +383,70 @@ def parse_ffprobe(txt: str) -> tuple[dict[str, tuple[int, Any]], dict[str, list[
|
|||
return zd, md
|
||||
|
||||
|
||||
def get_cover_from_epub(log: "NamedLogger", abspath: str) -> IO[bytes] | None:
|
||||
import zipfile
|
||||
import xml.etree.ElementTree as ElTree
|
||||
try:
|
||||
from urlparse import urljoin # Python2
|
||||
except ImportError:
|
||||
from urllib.parse import urljoin # Python3
|
||||
|
||||
with zipfile.ZipFile(abspath, "r") as z:
|
||||
# First open the container file to find the package document (.opf file)
|
||||
try:
|
||||
container_root = ElTree.parse(z.open("META-INF/container.xml"))
|
||||
except KeyError:
|
||||
log(f"epub: no container file found in {abspath}")
|
||||
return None
|
||||
|
||||
# https://www.w3.org/TR/epub-33/#sec-container.xml-rootfile-elem
|
||||
container_namesapce = {"": "urn:oasis:names:tc:opendocument:xmlns:container"}
|
||||
# One file could contain multiple package documents, default to the first one
|
||||
rootfile_path = container_root\
|
||||
.find("./rootfiles/rootfile", container_namesapce)\
|
||||
.get("full-path")
|
||||
|
||||
# Then open the first package document to find the path of the cover image
|
||||
try:
|
||||
package_root = ElTree.parse(z.open(rootfile_path))
|
||||
except KeyError:
|
||||
log(f"epub: no package document found in {abspath}")
|
||||
return None
|
||||
|
||||
# https://www.w3.org/TR/epub-33/#sec-package-doc
|
||||
package_namespaces = {"": "http://www.idpf.org/2007/opf"}
|
||||
# https://www.w3.org/TR/epub-33/#sec-cover-image
|
||||
coverimage_path_node = package_root\
|
||||
.find("./manifest/item[@properties='cover-image']", package_namespaces)
|
||||
if coverimage_path_node:
|
||||
coverimage_path = coverimage_path_node.get("href")
|
||||
else:
|
||||
# This might be an EPUB2 file, try the legacy way of specifying covers
|
||||
coverimage_path = _get_cover_from_epub2(log, package_root, package_namespaces)
|
||||
|
||||
# This url is either absolute (in the .epub) or relative to the package document
|
||||
adjusted_cover_path = urljoin(rootfile_path, coverimage_path)
|
||||
|
||||
return z.open(adjusted_cover_path)
|
||||
|
||||
|
||||
def _get_cover_from_epub2(log: "NamedLogger", package_root, package_namespaces) -> str | None:
|
||||
# <meta name="cover" content="id-to-cover-image"> in <metadata>, then
|
||||
# <item> in <manifest>
|
||||
cover_id = package_root \
|
||||
.find("./metadata/meta[@name='cover']", package_namespaces) \
|
||||
.get("content")
|
||||
if not cover_id:
|
||||
return None
|
||||
|
||||
for node in package_root.iterfind("./manifest/item", package_namespaces):
|
||||
if node.get("id") == cover_id:
|
||||
cover_path = node.get("href")
|
||||
return cover_path
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class MTag(object):
|
||||
def __init__(self, log_func: "RootLogger", args: argparse.Namespace) -> None:
|
||||
self.log_func = log_func
|
||||
|
@ -407,6 +489,9 @@ class MTag(object):
|
|||
self.log(msg.format(or_ffprobe, " " * 37, pyname), c=1)
|
||||
return
|
||||
|
||||
if not HAVE_SECURE_EXPAT:
|
||||
self.log("expat version is missing critical security fixes; epub thumbnails will not be available", c=3)
|
||||
|
||||
# https://picard-docs.musicbrainz.org/downloads/MusicBrainz_Picard_Tag_Map.html
|
||||
tagmap = {
|
||||
"album": ["album", "talb", "\u00a9alb", "original-album", "toal"],
|
||||
|
|
Loading…
Reference in a new issue