From 69dc433e1c3c6a38a61e1c7c023acfc62811e406 Mon Sep 17 00:00:00 2001 From: ed Date: Thu, 27 May 2021 01:41:12 +0200 Subject: [PATCH] ffprobe parser less bad --- copyparty/__main__.py | 2 +- copyparty/mtag.py | 198 +++++++++++++++++++++++++----------------- copyparty/th_srv.py | 10 +-- copyparty/up2k.py | 3 +- 4 files changed, 124 insertions(+), 89 deletions(-) diff --git a/copyparty/__main__.py b/copyparty/__main__.py index 6f8a3763..64b0e2cf 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -268,7 +268,7 @@ def run_argparse(argv, formatter): ap2.add_argument("--no-mtag-mt", action="store_true", help="disable tag-read parallelism") ap2.add_argument("-mtm", metavar="M=t,t,t", action="append", type=str, help="add/replace metadata mapping") ap2.add_argument("-mte", metavar="M,M,M", type=str, help="tags to index/display (comma-sep.)", - default="circle,album,.tn,artist,title,.bpm,key,.dur,.q") + default="circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,acodec,vcodec,res,.fps") ap2.add_argument("-mtp", metavar="M=[f,]bin", action="append", type=str, help="read tag M using bin") ap2.add_argument("--srch-time", metavar="SEC", type=int, default=30, help="search deadline") diff --git a/copyparty/mtag.py b/copyparty/mtag.py index 2e9370f7..ad14cb38 100644 --- a/copyparty/mtag.py +++ b/copyparty/mtag.py @@ -30,95 +30,135 @@ HAVE_FFMPEG = have_ff("ffmpeg") HAVE_FFPROBE = have_ff("ffprobe") -def parse_ffprobe(stdout, logger, require_audio=True): - txt = [x.rstrip("\r") for x in stdout.split("\n")] +def ffprobe(abspath): + cmd = [ + b"ffprobe", + b"-hide_banner", + b"-show_streams", + b"-show_format", + b"--", + fsenc(abspath), + ] + p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) + r = p.communicate() + txt = r[0].decode("utf-8", "replace") + return parse_ffprobe(txt) - """ - note: - tags which contain newline will be truncated on first \n, - ffprobe emits \n and spacepads the : to align visually - note: - the Stream ln always mentions Audio: if audio - the Stream ln usually has kb/s, is more accurate - the Duration ln always has kb/s - the Metadata: after Chapter may contain BPM info, - title : Tempo: 126.0 - Input #0, wav, - Metadata: - date : - Duration: - Chapter # - Metadata: - title : - - Input #0, mp3, - Metadata: - album : - Duration: - Stream #0:0: Audio: - Stream #0:1: Video: - Metadata: - comment : - """ - - ptn_md_beg = re.compile("^( +)Metadata:$") - ptn_md_kv = re.compile("^( +)([^:]+) *: (.*)") - ptn_dur = re.compile("^ *Duration: ([^ ]+)(, |$)") - ptn_br1 = re.compile("^ *Duration: .*, bitrate: ([0-9]+) kb/s(, |$)") - ptn_br2 = re.compile("^ *Stream.*: Audio:.* ([0-9]+) kb/s(, |$)") - ptn_audio = re.compile("^ *Stream .*: Audio: ") - ptn_au_parent = re.compile("^ *(Input #|Stream .*: Audio: )") - - ret = {} - md = {} - in_md = False - is_audio = False - au_parent = False - for ln in txt: - m = ptn_md_kv.match(ln) - if m and in_md and len(m.group(1)) == in_md: - _, k, v = [x.strip() for x in m.groups()] - if k != "" and v != "": - md[k] = [v] +def parse_ffprobe(txt): + """ffprobe -show_format -show_streams""" + streams = [] + g = None + for ln in [x.rstrip("\r") for x in txt.split("\n")]: + try: + k, v = ln.split("=", 1) + g[k] = v continue - else: - in_md = False + except: + pass - m = ptn_md_beg.match(ln) - if m and au_parent: - in_md = len(m.group(1)) + 2 + if ln == "[STREAM]": + g = {} + streams.append(g) + + if ln == "[FORMAT]": + g = {"codec_type": "format"} # heh + streams.append(g) + + ret = {} # processed + md = {} # raw tags + + have = {} + for strm in streams: + typ = strm.get("codec_type") + if typ in have: continue - au_parent = bool(ptn_au_parent.search(ln)) + have[typ] = True - if ptn_audio.search(ln): - is_audio = True + if typ == "audio": + kvm = [ + ["codec_name", "ac"], + ["channel_layout", "chs"], + ["sample_rate", ".hz"], + ["bit_rate", ".aq"], + ["duration", ".dur"], + ] - m = ptn_dur.search(ln) - if m: - sec = 0 - tstr = m.group(1) - if tstr.lower() != "n/a": + if typ == "video": + if strm.get("DISPOSITION:attached_pic") == "1" or strm.get( + "duration_ts" + ) in ["1", "N/A"]: + continue + + kvm = [ + ["codec_name", "vc"], + ["pix_fmt", "pixfmt"], + ["r_frame_rate", ".fps"], + ["bit_rate", ".vq"], + ["width", ".resw"], + ["height", ".resh"], + ["duration", ".dur"], + ] + + if typ == "format": + kvm = [["duration", ".dur"], ["bit_rate", ".q"]] + + for sk, rk in kvm: + v = strm.get(sk) + if v is None: + continue + + if rk.startswith("."): try: - tf = tstr.split(",")[0].split(".")[0].split(":") - for f in tf: - sec *= 60 - sec += int(f) + v = float(v) + v2 = ret.get(rk) + if v2 is None or v > v2: + ret[rk] = v except: - logger("invalid timestr from ffprobe: [{}]".format(tstr), c=3) + # sqlite doesnt care but the code below does + if v not in ["N/A"]: + ret[rk] = v + else: + ret[rk] = v - ret[".dur"] = sec - m = ptn_br1.search(ln) - if m: - ret[".q"] = m.group(1) + for strm in streams: + for k, v in strm.items(): + if not k.startswith("TAG:"): + continue - m = ptn_br2.search(ln) - if m: - ret[".q"] = m.group(1) + k = k[4:].strip() + v = v.strip() + if k and v: + md[k] = [v] - if not is_audio and require_audio: - return {}, {} + for k in [".q", ".vq", ".aq"]: + if k in ret: + ret[k] /= 1000 # bit_rate=320000 + + for k in [".q", ".vq", ".aq", ".resw", ".resh"]: + if k in ret: + ret[k] = int(ret[k]) + + if ".fps" in ret: + fps = ret[".fps"] + if "/" in fps: + fa, fb = fps.split("/") + fps = int(fa) * 1.0 / int(fb) + + if fps < 1000: + ret[".fps"] = round(fps, 3) + else: + del ret[".fps"] + + if ".dur" in ret: + if ret[".dur"] < 0.1: + del ret[".dur"] + if ".q" in ret: + del ret[".q"] + + if ".resw" in ret and ".resh" in ret: + ret["res"] = "{}x{}".format(ret[".resw"], ret[".resh"]) ret = {k: [0, v] for k, v in ret.items()} @@ -325,11 +365,7 @@ class MTag(object): return self.normalize_tags(ret, md) def get_ffprobe(self, abspath): - cmd = [b"ffprobe", b"-hide_banner", b"--", fsenc(abspath)] - p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) - r = p.communicate() - txt = r[1].decode("utf-8", "replace") - ret, md = parse_ffprobe(txt, self.log) + ret, md = ffprobe(abspath) return self.normalize_tags(ret, md) def get_bin(self, parsers, abspath): diff --git a/copyparty/th_srv.py b/copyparty/th_srv.py index 5eb5f1b4..2d630699 100644 --- a/copyparty/th_srv.py +++ b/copyparty/th_srv.py @@ -9,7 +9,7 @@ import subprocess as sp from .__init__ import PY2 from .util import fsenc, Queue, Cooldown -from .mtag import HAVE_FFMPEG, HAVE_FFPROBE, parse_ffprobe +from .mtag import HAVE_FFMPEG, HAVE_FFPROBE, ffprobe if not PY2: @@ -214,11 +214,7 @@ class ThumbSrv(object): im.save(tpath) def conv_ffmpeg(self, abspath, tpath): - cmd = [b"ffprobe", b"-hide_banner", b"--", fsenc(abspath)] - p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) - r = p.communicate() - txt = r[1].decode("utf-8", "replace") - ret, _ = parse_ffprobe(txt, self.log, False) + ret, _ = run_ffprobe(abspath) dur = ret[".dur"][1] seek = "{:.0f}".format(dur / 3) @@ -265,6 +261,8 @@ class ThumbSrv(object): self.log("cln {}/".format(vol)) self.clean(vol) + self.log("cln ok") + def clean(self, vol): # self.log("cln {}".format(vol)) maxage = self.args.th_maxage diff --git a/copyparty/up2k.py b/copyparty/up2k.py index 93042185..9fd7c9ee 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -740,7 +740,8 @@ class Up2k(object): vtags = [ "\033[36m{} \033[33m{}".format(k, v) for k, v in tags.items() ] - self.log("{}\033[0m [{}]".format(" ".join(vtags), abspath)) + if vtags: + self.log("{}\033[0m [{}]".format(" ".join(vtags), abspath)) with self.mutex: self.pending_tags.append([entags, wark, abspath, tags])