ffprobe parser less bad

2025-11-24 07:23:22 -07:00 · 2021-05-27 01:41:12 +02:00 · 2021-05-27 01:41:12 +02:00 · 69dc433e1c
parent c880cd848c
commit 69dc433e1c
4 changed files with 124 additions and 89 deletions
--- a/copyparty/main.py
+++ b/copyparty/main.py
@ -268,7 +268,7 @@ def run_argparse(argv, formatter):
    ap2.add_argument("--no-mtag-mt", action="store_true", help="disable tag-read parallelism")
    ap2.add_argument("-mtm", metavar="M=t,t,t", action="append", type=str, help="add/replace metadata mapping")
    ap2.add_argument("-mte", metavar="M,M,M", type=str, help="tags to index/display (comma-sep.)",
-        default="circle,album,.tn,artist,title,.bpm,key,.dur,.q")
+        default="circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,acodec,vcodec,res,.fps")
    ap2.add_argument("-mtp", metavar="M=[f,]bin", action="append", type=str, help="read tag M using bin")
    ap2.add_argument("--srch-time", metavar="SEC", type=int, default=30, help="search deadline")

--- a/copyparty/mtag.py
+++ b/copyparty/mtag.py
@ -30,95 +30,135 @@ HAVE_FFMPEG = have_ff("ffmpeg")
 HAVE_FFPROBE = have_ff("ffprobe")


-def parse_ffprobe(stdout, logger, require_audio=True):
-    txt = [x.rstrip("\r") for x in stdout.split("\n")]
+def ffprobe(abspath):
+    cmd = [
+        b"ffprobe",
+        b"-hide_banner",
+        b"-show_streams",
+        b"-show_format",
+        b"--",
+        fsenc(abspath),
+    ]
+    p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
+    r = p.communicate()
+    txt = r[0].decode("utf-8", "replace")
+    return parse_ffprobe(txt)

-    """
-    note:
-        tags which contain newline will be truncated on first \n,
-        ffprobe emits \n and spacepads the : to align visually
-    note:
-        the Stream ln always mentions Audio: if audio
-        the Stream ln usually has kb/s, is more accurate
-        the Duration ln always has kb/s
-        the Metadata: after Chapter may contain BPM info,
-        title : Tempo: 126.0

-    Input #0, wav,
-        Metadata:
-        date : <OK>
-        Duration:
-        Chapter #
-        Metadata:
-            title : <NG>
-
-    Input #0, mp3,
-        Metadata:
-        album : <OK>
-        Duration:
-        Stream #0:0: Audio:
-        Stream #0:1: Video:
-        Metadata:
-            comment : <NG>
-    """
-
-    ptn_md_beg = re.compile("^( +)Metadata:$")
-    ptn_md_kv = re.compile("^( +)([^:]+) *: (.*)")
-    ptn_dur = re.compile("^ *Duration: ([^ ]+)(, |$)")
-    ptn_br1 = re.compile("^ *Duration: .*, bitrate: ([0-9]+) kb/s(, |$)")
-    ptn_br2 = re.compile("^ *Stream.*: Audio:.* ([0-9]+) kb/s(, |$)")
-    ptn_audio = re.compile("^ *Stream .*: Audio: ")
-    ptn_au_parent = re.compile("^ *(Input #|Stream .*: Audio: )")
-
-    ret = {}
-    md = {}
-    in_md = False
-    is_audio = False
-    au_parent = False
-    for ln in txt:
-        m = ptn_md_kv.match(ln)
-        if m and in_md and len(m.group(1)) == in_md:
-            _, k, v = [x.strip() for x in m.groups()]
-            if k != "" and v != "":
-                md[k] = [v]
+def parse_ffprobe(txt):
+    """ffprobe -show_format -show_streams"""
+    streams = []
+    g = None
+    for ln in [x.rstrip("\r") for x in txt.split("\n")]:
+        try:
+            k, v = ln.split("=", 1)
+            g[k] = v
            continue
-        else:
-            in_md = False
+        except:
+            pass

-        m = ptn_md_beg.match(ln)
-        if m and au_parent:
-            in_md = len(m.group(1)) + 2
+        if ln == "[STREAM]":
+            g = {}
+            streams.append(g)
+
+        if ln == "[FORMAT]":
+            g = {"codec_type": "format"}  # heh
+            streams.append(g)
+
+    ret = {}  # processed
+    md = {}  # raw tags
+
+    have = {}
+    for strm in streams:
+        typ = strm.get("codec_type")
+        if typ in have:
            continue

-        au_parent = bool(ptn_au_parent.search(ln))
+        have[typ] = True

-        if ptn_audio.search(ln):
-            is_audio = True
+        if typ == "audio":
+            kvm = [
+                ["codec_name", "ac"],
+                ["channel_layout", "chs"],
+                ["sample_rate", ".hz"],
+                ["bit_rate", ".aq"],
+                ["duration", ".dur"],
+            ]

-        m = ptn_dur.search(ln)
-        if m:
-            sec = 0
-            tstr = m.group(1)
-            if tstr.lower() != "n/a":
+        if typ == "video":
+            if strm.get("DISPOSITION:attached_pic") == "1" or strm.get(
+                "duration_ts"
+            ) in ["1", "N/A"]:
+                continue
+
+            kvm = [
+                ["codec_name", "vc"],
+                ["pix_fmt", "pixfmt"],
+                ["r_frame_rate", ".fps"],
+                ["bit_rate", ".vq"],
+                ["width", ".resw"],
+                ["height", ".resh"],
+                ["duration", ".dur"],
+            ]
+
+        if typ == "format":
+            kvm = [["duration", ".dur"], ["bit_rate", ".q"]]
+
+        for sk, rk in kvm:
+            v = strm.get(sk)
+            if v is None:
+                continue
+
+            if rk.startswith("."):
                try:
-                    tf = tstr.split(",")[0].split(".")[0].split(":")
-                    for f in tf:
-                        sec *= 60
-                        sec += int(f)
+                    v = float(v)
+                    v2 = ret.get(rk)
+                    if v2 is None or v > v2:
+                        ret[rk] = v
                except:
-                    logger("invalid timestr from ffprobe: [{}]".format(tstr), c=3)
+                    # sqlite doesnt care but the code below does
+                    if v not in ["N/A"]:
+                        ret[rk] = v
+            else:
+                ret[rk] = v

-            ret[".dur"] = sec
-            m = ptn_br1.search(ln)
-            if m:
-                ret[".q"] = m.group(1)
+    for strm in streams:
+        for k, v in strm.items():
+            if not k.startswith("TAG:"):
+                continue

-        m = ptn_br2.search(ln)
-        if m:
-            ret[".q"] = m.group(1)
+            k = k[4:].strip()
+            v = v.strip()
+            if k and v:
+                md[k] = [v]

-    if not is_audio and require_audio:
-        return {}, {}
+    for k in [".q", ".vq", ".aq"]:
+        if k in ret:
+            ret[k] /= 1000  # bit_rate=320000
+
+    for k in [".q", ".vq", ".aq", ".resw", ".resh"]:
+        if k in ret:
+            ret[k] = int(ret[k])
+
+    if ".fps" in ret:
+        fps = ret[".fps"]
+        if "/" in fps:
+            fa, fb = fps.split("/")
+            fps = int(fa) * 1.0 / int(fb)
+
+        if fps < 1000:
+            ret[".fps"] = round(fps, 3)
+        else:
+            del ret[".fps"]
+
+    if ".dur" in ret:
+        if ret[".dur"] < 0.1:
+            del ret[".dur"]
+            if ".q" in ret:
+                del ret[".q"]
+
+    if ".resw" in ret and ".resh" in ret:
+        ret["res"] = "{}x{}".format(ret[".resw"], ret[".resh"])

    ret = {k: [0, v] for k, v in ret.items()}

@ -325,11 +365,7 @@ class MTag(object):
        return self.normalize_tags(ret, md)

    def get_ffprobe(self, abspath):
-        cmd = [b"ffprobe", b"-hide_banner", b"--", fsenc(abspath)]
-        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
-        r = p.communicate()
-        txt = r[1].decode("utf-8", "replace")
-        ret, md = parse_ffprobe(txt, self.log)
+        ret, md = ffprobe(abspath)
        return self.normalize_tags(ret, md)

    def get_bin(self, parsers, abspath):
--- a/copyparty/th_srv.py
+++ b/copyparty/th_srv.py
@ -9,7 +9,7 @@ import subprocess as sp

 from .__init__ import PY2
 from .util import fsenc, Queue, Cooldown
-from .mtag import HAVE_FFMPEG, HAVE_FFPROBE, parse_ffprobe
+from .mtag import HAVE_FFMPEG, HAVE_FFPROBE, ffprobe


 if not PY2:
@ -214,11 +214,7 @@ class ThumbSrv(object):
            im.save(tpath)

    def conv_ffmpeg(self, abspath, tpath):
-        cmd = [b"ffprobe", b"-hide_banner", b"--", fsenc(abspath)]
-        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
-        r = p.communicate()
-        txt = r[1].decode("utf-8", "replace")
-        ret, _ = parse_ffprobe(txt, self.log, False)
+        ret, _ = run_ffprobe(abspath)

        dur = ret[".dur"][1]
        seek = "{:.0f}".format(dur / 3)
@ -265,6 +261,8 @@ class ThumbSrv(object):
                self.log("cln {}/".format(vol))
                self.clean(vol)

+            self.log("cln ok")
+
    def clean(self, vol):
        # self.log("cln {}".format(vol))
        maxage = self.args.th_maxage
--- a/copyparty/up2k.py
+++ b/copyparty/up2k.py
@ -740,7 +740,8 @@ class Up2k(object):
                    vtags = [
                        "\033[36m{} \033[33m{}".format(k, v) for k, v in tags.items()
                    ]
-                    self.log("{}\033[0m [{}]".format(" ".join(vtags), abspath))
+                    if vtags:
+                        self.log("{}\033[0m [{}]".format(" ".join(vtags), abspath))

                with self.mutex:
                    self.pending_tags.append([entags, wark, abspath, tags])