Switch to internal defused XML implementation

This commit is contained in:
AppleTheGolden 2025-08-10 20:04:15 +02:00
parent 73eed49b23
commit 988cccd2b8
No known key found for this signature in database
GPG key ID: F6AC8A62154C42AA
3 changed files with 9 additions and 26 deletions

View file

@ -2274,7 +2274,6 @@ force-enable features with known issues on your OS/env by setting any of the fo
|-----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `PRTY_FORCE_MP` | force-enable multiprocessing (real multithreading) on MacOS and other broken platforms |
| `PRTY_FORCE_MAGIC` | use [magic](https://pypi.org/project/python-magic/) on Windows (you will segfault) |
| `PRTY_ALLOW_INSECURE_EXPAT` | allow using expat versions (bundled with python) that are vulnerable to xml attacks, [see the python docs](https://docs.python.org/3/library/xml.html#xml-security) |
# packages

View file

@ -65,6 +65,9 @@ DXMLParser = _DXMLParser
def parse_xml(txt: str) -> ET.Element:
"""
Parse XML into an xml.etree.ElementTree.Element while defusing some unsafe parts.
"""
parser = DXMLParser()
parser.feed(txt)
return parser.close() # type: ignore

View file

@ -60,23 +60,8 @@ def have_ff(scmd: str) -> bool:
else:
return bool(shutil.which(scmd))
def expat_is_secure():
"""
From the python xml docs:
An attacker can abuse XML features to carry out denial of service attacks, access local files, generate network connections to other machines, or circumvent firewalls.
Expat versions lower that 2.6.0 may be vulnerable to billion laughs, quadratic blowup and large tokens. Python may be vulnerable if it uses such older versions of Expat as a system-provided library. Check pyexpat.EXPAT_VERSION.
"""
import pyexpat
# expat_2.7.1
if len(pyexpat.EXPAT_VERSION) < 11:
return False
major, minor, patch = (int(x) for x in pyexpat.EXPAT_VERSION[6:].split("."))
return major > 2 or major == 2 and minor >= 6
HAVE_FFMPEG = not os.environ.get("PRTY_NO_FFMPEG") and have_ff("ffmpeg")
HAVE_FFPROBE = not os.environ.get("PRTY_NO_FFPROBE") and have_ff("ffprobe")
HAVE_SECURE_EXPAT = os.environ.get("PRTY_ALLOW_INSECURE_EXPAT") or expat_is_secure()
CBZ_PICS = set("png jpg jpeg gif bmp tga tif tiff webp avif".split())
CBZ_01 = re.compile(r"(^|[^0-9v])0+[01]\b")
@ -191,8 +176,7 @@ def au_unpk(
fi = zf.open(using)
elif pk == "epub":
if HAVE_SECURE_EXPAT:
fi = get_cover_from_epub(log, abspath)
fi = get_cover_from_epub(log, abspath)
else:
raise Exception("unknown compression %s" % (pk,))
@ -385,7 +369,7 @@ def parse_ffprobe(txt: str) -> tuple[dict[str, tuple[int, Any]], dict[str, list[
def get_cover_from_epub(log: "NamedLogger", abspath: str) -> IO[bytes] | None:
import zipfile
import xml.etree.ElementTree as ElTree
from .dxml import parse_xml
try:
from urlparse import urljoin # Python2
except ImportError:
@ -394,21 +378,21 @@ def get_cover_from_epub(log: "NamedLogger", abspath: str) -> IO[bytes] | None:
with zipfile.ZipFile(abspath, "r") as z:
# First open the container file to find the package document (.opf file)
try:
container_root = ElTree.parse(z.open("META-INF/container.xml"))
container_root = parse_xml(z.read("META-INF/container.xml").decode())
except KeyError:
log(f"epub: no container file found in {abspath}")
return None
# https://www.w3.org/TR/epub-33/#sec-container.xml-rootfile-elem
container_namesapce = {"": "urn:oasis:names:tc:opendocument:xmlns:container"}
container_namespace = {"": "urn:oasis:names:tc:opendocument:xmlns:container"}
# One file could contain multiple package documents, default to the first one
rootfile_path = container_root\
.find("./rootfiles/rootfile", container_namesapce)\
.find("./rootfiles/rootfile", container_namespace)\
.get("full-path")
# Then open the first package document to find the path of the cover image
try:
package_root = ElTree.parse(z.open(rootfile_path))
package_root = parse_xml(z.read(rootfile_path).decode())
except KeyError:
log(f"epub: no package document found in {abspath}")
return None
@ -489,9 +473,6 @@ class MTag(object):
self.log(msg.format(or_ffprobe, " " * 37, pyname), c=1)
return
if not HAVE_SECURE_EXPAT:
self.log("expat version is missing critical security fixes; epub thumbnails will not be available", c=3)
# https://picard-docs.musicbrainz.org/downloads/MusicBrainz_Picard_Tag_Map.html
tagmap = {
"album": ["album", "talb", "\u00a9alb", "original-album", "toal"],