improve youtube parser

This commit is contained in:
ed 2021-08-08 20:30:12 +02:00
parent 419dd2d1c7
commit 452450e451
3 changed files with 77 additions and 16 deletions

37
bin/mtag/res/yt-ipr.conf Normal file
View file

@ -0,0 +1,37 @@
# example config file to use copyparty as a youtube manifest collector,
# use with copyparty like: python copyparty.py -c yt-ipr.conf
#
# see docs/example.conf for a better explanation of the syntax, but
# newlines are block separators, so adding blank lines inside a volume definition is bad
# (use comments as separators instead)
# create user ed, password wark
u ed:wark
# create a volume at /ytm which stores files at ./srv/ytm
./srv/ytm
/ytm
# write-only, but read-write for user ed
w
rw ed
# rescan the volume on startup
c e2dsa
# collect tags from all new files since last scan
c e2ts
# optionally enable compression to make the files 50% smaller
c pk
# only allow uploads which are between 16k and 1m large
c sz=16k-1m
# allow up to 10 uploads over 5 minutes from each ip
c maxn=10,300
# move uploads into subfolders: YEAR-MONTH / DAY-HOUR / <upload>
c rotf=%Y-%m/%d-%H
# add the parser and tell copyparty what tags it can expect from it
c mtp=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-manifest,yt-expires=bin/mtag/yt-ipr.py
# decide which tags we want to index and in what order
c mte=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-manifest,yt-expires
# create any other volumes you'd like down here, or merge this with an existing config file

View file

@ -7,23 +7,34 @@
// ==/UserScript==
function main() {
var server = 'https://127.0.0.1:3923/ytm',
interval = 60; // sec
var sent = {};
function send(txt) {
if (sent[txt])
function send(txt, mf_url, desc) {
if (sent[mf_url])
return;
fetch('https://127.0.0.1:3923/playerdata?_=' + Date.now(), { method: "PUT", body: txt });
console.log('[yt-ipr] yeet %d bytes', txt.length);
sent[txt] = 1;
fetch(server + '?_=' + Date.now(), { method: "PUT", body: txt });
console.log('[yt-ipr] yeet %d bytes, %s', txt.length, desc);
sent[mf_url] = 1;
}
function collect() {
setTimeout(collect, 60 * 1000);
var pd = document.querySelector('ytd-watch-flexy');
if (pd)
send(JSON.stringify(pd.playerData));
setTimeout(collect, interval * 1000);
try {
var pd = document.querySelector('ytd-watch-flexy').playerData,
mu = pd.streamingData.dashManifestUrl || pd.streamingData.hlsManifestUrl,
desc = pd.videoDetails.videoId + ', ' + pd.videoDetails.title;
if (mu.length)
send(JSON.stringify(pd), mu, desc);
}
catch (ex) {
console.log("[yt-ipr]", ex);
}
}
setTimeout(collect, 5000);
collect();
}
var scr = document.createElement('script');

View file

@ -9,11 +9,14 @@ from datetime import datetime
"""
youtube initial player response
example usage:
-v srv/playerdata:playerdata:w
:c,e2tsr:c,e2dsa
:c,mtp=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-expires=bin/mtag/yt-ipr.py
:c,mte=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-expires
it's probably best to use this through a config file; see res/yt-ipr.conf
but if you want to use plain arguments instead then:
-v srv/ytm:ytm:w:rw,ed
:c,e2ts:c,e2dsa
:c,sz=16k-1m:c,maxn=10,300:c,rotf=%Y-%m/%d-%H
:c,mtp=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-manifest,yt-expires=bin/mtag/yt-ipr.py
:c,mte=yt-id,yt-title,yt-author,yt-channel,yt-views,yt-private,yt-manifest,yt-expires
see res/yt-ipr.user.js for the example userscript to go with this
"""
@ -44,6 +47,12 @@ def main():
et = datetime.utcfromtimestamp(int(et))
et = et.strftime("%Y-%m-%d, %H:%M")
mf = []
if "dashManifestUrl" in sd:
mf.append("dash")
if "hlsManifestUrl" in sd:
mf.append("hls")
r = {
"yt-id": vd["videoId"],
"yt-title": vd["title"],
@ -52,10 +61,14 @@ def main():
"yt-views": vd["viewCount"],
"yt-private": vd["isPrivate"],
# "yt-expires": sd["expiresInSeconds"],
"yt-manifest": ",".join(mf),
"yt-expires": et,
}
print(json.dumps(r))
if __name__ == "__main__":
main()
try:
main()
except:
pass