From 988cccd2b86d1515856251ece75a19a7547001a7 Mon Sep 17 00:00:00 2001 From: AppleTheGolden Date: Sun, 10 Aug 2025 20:04:15 +0200 Subject: [PATCH] Switch to internal defused XML implementation --- README.md | 1 - copyparty/dxml.py | 3 +++ copyparty/mtag.py | 31 ++++++------------------------- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 264ef5d1..077aa58b 100644 --- a/README.md +++ b/README.md @@ -2274,7 +2274,6 @@ force-enable features with known issues on your OS/env by setting any of the fo |-----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `PRTY_FORCE_MP` | force-enable multiprocessing (real multithreading) on MacOS and other broken platforms | | `PRTY_FORCE_MAGIC` | use [magic](https://pypi.org/project/python-magic/) on Windows (you will segfault) | -| `PRTY_ALLOW_INSECURE_EXPAT` | allow using expat versions (bundled with python) that are vulnerable to xml attacks, [see the python docs](https://docs.python.org/3/library/xml.html#xml-security) | # packages diff --git a/copyparty/dxml.py b/copyparty/dxml.py index b49f060b..6271da8e 100644 --- a/copyparty/dxml.py +++ b/copyparty/dxml.py @@ -65,6 +65,9 @@ DXMLParser = _DXMLParser def parse_xml(txt: str) -> ET.Element: + """ + Parse XML into an xml.etree.ElementTree.Element while defusing some unsafe parts. + """ parser = DXMLParser() parser.feed(txt) return parser.close() # type: ignore diff --git a/copyparty/mtag.py b/copyparty/mtag.py index 6bfdb52e..427c8f16 100644 --- a/copyparty/mtag.py +++ b/copyparty/mtag.py @@ -60,23 +60,8 @@ def have_ff(scmd: str) -> bool: else: return bool(shutil.which(scmd)) -def expat_is_secure(): - """ - From the python xml docs: - - An attacker can abuse XML features to carry out denial of service attacks, access local files, generate network connections to other machines, or circumvent firewalls. - Expat versions lower that 2.6.0 may be vulnerable to “billion laughs”, “quadratic blowup” and “large tokens”. Python may be vulnerable if it uses such older versions of Expat as a system-provided library. Check pyexpat.EXPAT_VERSION. - """ - import pyexpat - # expat_2.7.1 - if len(pyexpat.EXPAT_VERSION) < 11: - return False - major, minor, patch = (int(x) for x in pyexpat.EXPAT_VERSION[6:].split(".")) - return major > 2 or major == 2 and minor >= 6 - HAVE_FFMPEG = not os.environ.get("PRTY_NO_FFMPEG") and have_ff("ffmpeg") HAVE_FFPROBE = not os.environ.get("PRTY_NO_FFPROBE") and have_ff("ffprobe") -HAVE_SECURE_EXPAT = os.environ.get("PRTY_ALLOW_INSECURE_EXPAT") or expat_is_secure() CBZ_PICS = set("png jpg jpeg gif bmp tga tif tiff webp avif".split()) CBZ_01 = re.compile(r"(^|[^0-9v])0+[01]\b") @@ -191,8 +176,7 @@ def au_unpk( fi = zf.open(using) elif pk == "epub": - if HAVE_SECURE_EXPAT: - fi = get_cover_from_epub(log, abspath) + fi = get_cover_from_epub(log, abspath) else: raise Exception("unknown compression %s" % (pk,)) @@ -385,7 +369,7 @@ def parse_ffprobe(txt: str) -> tuple[dict[str, tuple[int, Any]], dict[str, list[ def get_cover_from_epub(log: "NamedLogger", abspath: str) -> IO[bytes] | None: import zipfile - import xml.etree.ElementTree as ElTree + from .dxml import parse_xml try: from urlparse import urljoin # Python2 except ImportError: @@ -394,21 +378,21 @@ def get_cover_from_epub(log: "NamedLogger", abspath: str) -> IO[bytes] | None: with zipfile.ZipFile(abspath, "r") as z: # First open the container file to find the package document (.opf file) try: - container_root = ElTree.parse(z.open("META-INF/container.xml")) + container_root = parse_xml(z.read("META-INF/container.xml").decode()) except KeyError: log(f"epub: no container file found in {abspath}") return None # https://www.w3.org/TR/epub-33/#sec-container.xml-rootfile-elem - container_namesapce = {"": "urn:oasis:names:tc:opendocument:xmlns:container"} + container_namespace = {"": "urn:oasis:names:tc:opendocument:xmlns:container"} # One file could contain multiple package documents, default to the first one rootfile_path = container_root\ - .find("./rootfiles/rootfile", container_namesapce)\ + .find("./rootfiles/rootfile", container_namespace)\ .get("full-path") # Then open the first package document to find the path of the cover image try: - package_root = ElTree.parse(z.open(rootfile_path)) + package_root = parse_xml(z.read(rootfile_path).decode()) except KeyError: log(f"epub: no package document found in {abspath}") return None @@ -489,9 +473,6 @@ class MTag(object): self.log(msg.format(or_ffprobe, " " * 37, pyname), c=1) return - if not HAVE_SECURE_EXPAT: - self.log("expat version is missing critical security fixes; epub thumbnails will not be available", c=3) - # https://picard-docs.musicbrainz.org/downloads/MusicBrainz_Picard_Tag_Map.html tagmap = { "album": ["album", "talb", "\u00a9alb", "original-album", "toal"],