Switch to internal defused XML implementation

This commit is contained in:
AppleTheGolden 2025-08-10 20:04:15 +02:00
parent 73eed49b23
commit 988cccd2b8
No known key found for this signature in database
GPG key ID: F6AC8A62154C42AA
3 changed files with 9 additions and 26 deletions

View file

@ -2274,7 +2274,6 @@ force-enable features with known issues on your OS/env by setting any of the fo
|-----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------| |-----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `PRTY_FORCE_MP` | force-enable multiprocessing (real multithreading) on MacOS and other broken platforms | | `PRTY_FORCE_MP` | force-enable multiprocessing (real multithreading) on MacOS and other broken platforms |
| `PRTY_FORCE_MAGIC` | use [magic](https://pypi.org/project/python-magic/) on Windows (you will segfault) | | `PRTY_FORCE_MAGIC` | use [magic](https://pypi.org/project/python-magic/) on Windows (you will segfault) |
| `PRTY_ALLOW_INSECURE_EXPAT` | allow using expat versions (bundled with python) that are vulnerable to xml attacks, [see the python docs](https://docs.python.org/3/library/xml.html#xml-security) |
# packages # packages

View file

@ -65,6 +65,9 @@ DXMLParser = _DXMLParser
def parse_xml(txt: str) -> ET.Element: def parse_xml(txt: str) -> ET.Element:
"""
Parse XML into an xml.etree.ElementTree.Element while defusing some unsafe parts.
"""
parser = DXMLParser() parser = DXMLParser()
parser.feed(txt) parser.feed(txt)
return parser.close() # type: ignore return parser.close() # type: ignore

View file

@ -60,23 +60,8 @@ def have_ff(scmd: str) -> bool:
else: else:
return bool(shutil.which(scmd)) return bool(shutil.which(scmd))
def expat_is_secure():
"""
From the python xml docs:
An attacker can abuse XML features to carry out denial of service attacks, access local files, generate network connections to other machines, or circumvent firewalls.
Expat versions lower that 2.6.0 may be vulnerable to billion laughs, quadratic blowup and large tokens. Python may be vulnerable if it uses such older versions of Expat as a system-provided library. Check pyexpat.EXPAT_VERSION.
"""
import pyexpat
# expat_2.7.1
if len(pyexpat.EXPAT_VERSION) < 11:
return False
major, minor, patch = (int(x) for x in pyexpat.EXPAT_VERSION[6:].split("."))
return major > 2 or major == 2 and minor >= 6
HAVE_FFMPEG = not os.environ.get("PRTY_NO_FFMPEG") and have_ff("ffmpeg") HAVE_FFMPEG = not os.environ.get("PRTY_NO_FFMPEG") and have_ff("ffmpeg")
HAVE_FFPROBE = not os.environ.get("PRTY_NO_FFPROBE") and have_ff("ffprobe") HAVE_FFPROBE = not os.environ.get("PRTY_NO_FFPROBE") and have_ff("ffprobe")
HAVE_SECURE_EXPAT = os.environ.get("PRTY_ALLOW_INSECURE_EXPAT") or expat_is_secure()
CBZ_PICS = set("png jpg jpeg gif bmp tga tif tiff webp avif".split()) CBZ_PICS = set("png jpg jpeg gif bmp tga tif tiff webp avif".split())
CBZ_01 = re.compile(r"(^|[^0-9v])0+[01]\b") CBZ_01 = re.compile(r"(^|[^0-9v])0+[01]\b")
@ -191,8 +176,7 @@ def au_unpk(
fi = zf.open(using) fi = zf.open(using)
elif pk == "epub": elif pk == "epub":
if HAVE_SECURE_EXPAT: fi = get_cover_from_epub(log, abspath)
fi = get_cover_from_epub(log, abspath)
else: else:
raise Exception("unknown compression %s" % (pk,)) raise Exception("unknown compression %s" % (pk,))
@ -385,7 +369,7 @@ def parse_ffprobe(txt: str) -> tuple[dict[str, tuple[int, Any]], dict[str, list[
def get_cover_from_epub(log: "NamedLogger", abspath: str) -> IO[bytes] | None: def get_cover_from_epub(log: "NamedLogger", abspath: str) -> IO[bytes] | None:
import zipfile import zipfile
import xml.etree.ElementTree as ElTree from .dxml import parse_xml
try: try:
from urlparse import urljoin # Python2 from urlparse import urljoin # Python2
except ImportError: except ImportError:
@ -394,21 +378,21 @@ def get_cover_from_epub(log: "NamedLogger", abspath: str) -> IO[bytes] | None:
with zipfile.ZipFile(abspath, "r") as z: with zipfile.ZipFile(abspath, "r") as z:
# First open the container file to find the package document (.opf file) # First open the container file to find the package document (.opf file)
try: try:
container_root = ElTree.parse(z.open("META-INF/container.xml")) container_root = parse_xml(z.read("META-INF/container.xml").decode())
except KeyError: except KeyError:
log(f"epub: no container file found in {abspath}") log(f"epub: no container file found in {abspath}")
return None return None
# https://www.w3.org/TR/epub-33/#sec-container.xml-rootfile-elem # https://www.w3.org/TR/epub-33/#sec-container.xml-rootfile-elem
container_namesapce = {"": "urn:oasis:names:tc:opendocument:xmlns:container"} container_namespace = {"": "urn:oasis:names:tc:opendocument:xmlns:container"}
# One file could contain multiple package documents, default to the first one # One file could contain multiple package documents, default to the first one
rootfile_path = container_root\ rootfile_path = container_root\
.find("./rootfiles/rootfile", container_namesapce)\ .find("./rootfiles/rootfile", container_namespace)\
.get("full-path") .get("full-path")
# Then open the first package document to find the path of the cover image # Then open the first package document to find the path of the cover image
try: try:
package_root = ElTree.parse(z.open(rootfile_path)) package_root = parse_xml(z.read(rootfile_path).decode())
except KeyError: except KeyError:
log(f"epub: no package document found in {abspath}") log(f"epub: no package document found in {abspath}")
return None return None
@ -489,9 +473,6 @@ class MTag(object):
self.log(msg.format(or_ffprobe, " " * 37, pyname), c=1) self.log(msg.format(or_ffprobe, " " * 37, pyname), c=1)
return return
if not HAVE_SECURE_EXPAT:
self.log("expat version is missing critical security fixes; epub thumbnails will not be available", c=3)
# https://picard-docs.musicbrainz.org/downloads/MusicBrainz_Picard_Tag_Map.html # https://picard-docs.musicbrainz.org/downloads/MusicBrainz_Picard_Tag_Map.html
tagmap = { tagmap = {
"album": ["album", "talb", "\u00a9alb", "original-album", "toal"], "album": ["album", "talb", "\u00a9alb", "original-album", "toal"],