From 452450e4519225c52f9d2b2fe55da8bb9cbdb7c1 Mon Sep 17 00:00:00 2001 From: ed Date: Sun, 8 Aug 2021 20:30:12 +0200 Subject: [PATCH] improve youtube parser --- bin/mtag/res/yt-ipr.conf | 37 +++++++++++++++++++++++++++++++++++++ bin/mtag/res/yt-ipr.user.js | 31 +++++++++++++++++++++---------- bin/mtag/yt-ipr.py | 25 +++++++++++++++++++------ 3 files changed, 77 insertions(+), 16 deletions(-) create mode 100644 bin/mtag/res/yt-ipr.conf diff --git a/bin/mtag/res/yt-ipr.conf b/bin/mtag/res/yt-ipr.conf new file mode 100644 index 00000000..f95f0aee --- /dev/null +++ b/bin/mtag/res/yt-ipr.conf @@ -0,0 +1,37 @@ +# example config file to use copyparty as a youtube manifest collector, +# use with copyparty like: python copyparty.py -c yt-ipr.conf +# +# see docs/example.conf for a better explanation of the syntax, but +# newlines are block separators, so adding blank lines inside a volume definition is bad +# (use comments as separators instead) + + +# create user ed, password wark +u ed:wark + + +# create a volume at /ytm which stores files at ./srv/ytm +./srv/ytm +/ytm +# write-only, but read-write for user ed +w +rw ed +# rescan the volume on startup +c e2dsa +# collect tags from all new files since last scan +c e2ts +# optionally enable compression to make the files 50% smaller +c pk +# only allow uploads which are between 16k and 1m large +c sz=16k-1m +# allow up to 10 uploads over 5 minutes from each ip +c maxn=10,300 +# move uploads into subfolders: YEAR-MONTH / DAY-HOUR / +c rotf=%Y-%m/%d-%H +# add the parser and tell copyparty what tags it can expect from it +c mtp=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-manifest,yt-expires=bin/mtag/yt-ipr.py +# decide which tags we want to index and in what order +c mte=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-manifest,yt-expires + + +# create any other volumes you'd like down here, or merge this with an existing config file diff --git a/bin/mtag/res/yt-ipr.user.js b/bin/mtag/res/yt-ipr.user.js index b81947e0..10435246 100644 --- a/bin/mtag/res/yt-ipr.user.js +++ b/bin/mtag/res/yt-ipr.user.js @@ -7,23 +7,34 @@ // ==/UserScript== function main() { + var server = 'https://127.0.0.1:3923/ytm', + interval = 60; // sec + var sent = {}; - function send(txt) { - if (sent[txt]) + function send(txt, mf_url, desc) { + if (sent[mf_url]) return; - fetch('https://127.0.0.1:3923/playerdata?_=' + Date.now(), { method: "PUT", body: txt }); - console.log('[yt-ipr] yeet %d bytes', txt.length); - sent[txt] = 1; + fetch(server + '?_=' + Date.now(), { method: "PUT", body: txt }); + console.log('[yt-ipr] yeet %d bytes, %s', txt.length, desc); + sent[mf_url] = 1; } function collect() { - setTimeout(collect, 60 * 1000); - var pd = document.querySelector('ytd-watch-flexy'); - if (pd) - send(JSON.stringify(pd.playerData)); + setTimeout(collect, interval * 1000); + try { + var pd = document.querySelector('ytd-watch-flexy').playerData, + mu = pd.streamingData.dashManifestUrl || pd.streamingData.hlsManifestUrl, + desc = pd.videoDetails.videoId + ', ' + pd.videoDetails.title; + + if (mu.length) + send(JSON.stringify(pd), mu, desc); + } + catch (ex) { + console.log("[yt-ipr]", ex); + } } - setTimeout(collect, 5000); + collect(); } var scr = document.createElement('script'); diff --git a/bin/mtag/yt-ipr.py b/bin/mtag/yt-ipr.py index a7eba6bf..7d656c7f 100644 --- a/bin/mtag/yt-ipr.py +++ b/bin/mtag/yt-ipr.py @@ -9,11 +9,14 @@ from datetime import datetime """ youtube initial player response -example usage: - -v srv/playerdata:playerdata:w - :c,e2tsr:c,e2dsa - :c,mtp=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-expires=bin/mtag/yt-ipr.py - :c,mte=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-expires +it's probably best to use this through a config file; see res/yt-ipr.conf + +but if you want to use plain arguments instead then: + -v srv/ytm:ytm:w:rw,ed + :c,e2ts:c,e2dsa + :c,sz=16k-1m:c,maxn=10,300:c,rotf=%Y-%m/%d-%H + :c,mtp=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-manifest,yt-expires=bin/mtag/yt-ipr.py + :c,mte=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-manifest,yt-expires see res/yt-ipr.user.js for the example userscript to go with this """ @@ -44,6 +47,12 @@ def main(): et = datetime.utcfromtimestamp(int(et)) et = et.strftime("%Y-%m-%d, %H:%M") + mf = [] + if "dashManifestUrl" in sd: + mf.append("dash") + if "hlsManifestUrl" in sd: + mf.append("hls") + r = { "yt-id": vd["videoId"], "yt-title": vd["title"], @@ -52,10 +61,14 @@ def main(): "yt-views": vd["viewCount"], "yt-private": vd["isPrivate"], # "yt-expires": sd["expiresInSeconds"], + "yt-manifest": ",".join(mf), "yt-expires": et, } print(json.dumps(r)) if __name__ == "__main__": - main() + try: + main() + except: + pass