mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
up2k-hook-ytid: add mp4/webm/mkv metadata scanner
This commit is contained in:
parent
5dc75ebb67
commit
76adac7c72
|
@ -2,29 +2,140 @@
|
|||
// assumes all files dropped into the uploader have a youtube-id somewhere in the filename,
|
||||
// locates the youtube-ids and passes them to an API which returns a list of IDs which should be uploaded
|
||||
//
|
||||
// also tries to find the youtube-id in the embedded metadata
|
||||
//
|
||||
// assumes copyparty is behind nginx as /ytq is a standalone service which must be rproxied in place
|
||||
|
||||
function up2k_namefilter(good_files, nil_files, bad_files, hooks) {
|
||||
var filenames = [],
|
||||
file_lists = [good_files, nil_files, bad_files];
|
||||
var passthru = up2k.uc.fsearch;
|
||||
if (passthru)
|
||||
return hooks[0](good_files, nil_files, bad_files, hooks.slice(1));
|
||||
|
||||
for (var lst of file_lists)
|
||||
for (var ent of lst)
|
||||
filenames.push(ent[1]);
|
||||
a_up2k_namefilter(good_files, nil_files, bad_files, hooks).then(() => { });
|
||||
}
|
||||
|
||||
function bstrpos(buf, ptn) {
|
||||
var ofs = 0,
|
||||
ch0 = ptn[0],
|
||||
sz = buf.byteLength;
|
||||
|
||||
var yt_ids = new Set();
|
||||
for (var lst of file_lists)
|
||||
for (var ent of lst) {
|
||||
var m, name = ent[1];
|
||||
while (true) {
|
||||
ofs = buf.indexOf(ch0, ofs);
|
||||
if (ofs < 0 || ofs >= sz)
|
||||
return -1;
|
||||
|
||||
for (var a = 1; a < ptn.length; a++)
|
||||
if (buf[ofs + a] !== ptn[a])
|
||||
break;
|
||||
|
||||
if (a === ptn.length)
|
||||
return ofs;
|
||||
|
||||
++ofs;
|
||||
}
|
||||
}
|
||||
|
||||
async function a_up2k_namefilter(good_files, nil_files, bad_files, hooks) {
|
||||
var t0 = Date.now(),
|
||||
yt_ids = new Set(),
|
||||
textdec = new TextDecoder('latin1'),
|
||||
md_ptn = new TextEncoder().encode('youtube.com/watch?v='),
|
||||
file_ids = [], // all IDs found for each good_files
|
||||
mofs = 0,
|
||||
mnchk = 0,
|
||||
mfile = '';
|
||||
|
||||
for (var a = 0; a < good_files.length; a++) {
|
||||
var [fobj, name] = good_files[a],
|
||||
sz = fobj.size,
|
||||
ids = [],
|
||||
id_ok = false,
|
||||
m;
|
||||
|
||||
// all IDs found in this file
|
||||
file_ids.push(ids);
|
||||
|
||||
// look for ID in filename; reduce the
|
||||
// metadata-scan intensity if the id looks safe
|
||||
m = /[\[(-]([\w-]{11})[\])]?\.(?:mp4|webm|mkv)$/i.exec(name);
|
||||
id_ok = !!m;
|
||||
|
||||
while (true) {
|
||||
// fuzzy catch-all;
|
||||
// some ytdl fork did %(title)-%(id).%(ext) ...
|
||||
m = /(?:^|[^\w])([\w-]{11})(?:$|[^\w-])/.exec(name);
|
||||
if (!m)
|
||||
break;
|
||||
|
||||
yt_ids.add(m[1]);
|
||||
name = name.replace(m[1], '');
|
||||
yt_ids.add(m[1]);
|
||||
ids.push(m[1]);
|
||||
}
|
||||
|
||||
// look for IDs in video metadata,
|
||||
if (/\.(mp4|webm|mkv)$/i.exec(name)) {
|
||||
toast.show('inf r', 0, `analyzing file ${a + 1} / ${good_files.length} :\n${name}\n\nhave analysed ${++mnchk} files in ${(Date.now() - t0) / 1000} seconds, ${humantime((good_files.length - (a + 1)) * (((Date.now() - t0) / 1000) / mnchk))} remaining,\n\nbiggest offset so far is ${mofs}, in this file:\n\n${mfile}`);
|
||||
|
||||
// check first and last 128 MiB;
|
||||
// pWxOroN5WCo.mkv @ 6edb98 (6.92M)
|
||||
// Nf-nN1wF5Xo.mp4 @ 4a98034 (74.6M)
|
||||
var chunksz = 1024 * 1024 * 2, // byte
|
||||
aspan = id_ok ? 128 : 512; // MiB
|
||||
|
||||
aspan = parseInt(Math.min(sz / 2, aspan * 1024 * 1024) / chunksz) * chunksz;
|
||||
|
||||
for (var side = 0; side < 2; side++) {
|
||||
var ofs = side ? Math.max(0, sz - aspan) : 0,
|
||||
nchunks = aspan / chunksz;
|
||||
|
||||
for (var chunk = 0; chunk < nchunks; chunk++) {
|
||||
var bchunk = await fobj.slice(ofs, ofs + chunksz + 16).arrayBuffer(),
|
||||
uchunk = new Uint8Array(bchunk, 0, bchunk.byteLength),
|
||||
bofs = bstrpos(uchunk, md_ptn),
|
||||
absofs = Math.min(ofs + bofs, (sz - ofs) + bofs),
|
||||
txt = bofs < 0 ? '' : textdec.decode(uchunk.subarray(bofs)),
|
||||
m;
|
||||
|
||||
//console.log(`side ${ side }, chunk ${ chunk }, ofs ${ ofs }, bchunk ${ bchunk.byteLength }, txt ${ txt.length }`);
|
||||
while (true) {
|
||||
// mkv/webm have [a-z] immediately after url
|
||||
m = /(youtube\.com\/watch\?v=[\w-]{11})/.exec(txt);
|
||||
if (!m)
|
||||
break;
|
||||
|
||||
txt = txt.replace(m[1], '');
|
||||
m = m[1].slice(-11);
|
||||
|
||||
console.log(`found ${m} @${bofs}, ${name} `);
|
||||
yt_ids.add(m);
|
||||
if (!has(ids, m))
|
||||
ids.push(m);
|
||||
|
||||
// bail after next iteration
|
||||
chunk = nchunks - 1;
|
||||
side = 9;
|
||||
|
||||
if (mofs < absofs) {
|
||||
mofs = absofs;
|
||||
mfile = name;
|
||||
}
|
||||
}
|
||||
ofs += chunksz;
|
||||
if (ofs >= sz)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (false) {
|
||||
var msg = `finished analysing ${mnchk} files in ${(Date.now() - t0) / 1000} seconds,\n\nbiggest offset was ${mofs} in this file:\n\n${mfile}`,
|
||||
mfun = function () { toast.ok(0, msg); };
|
||||
|
||||
mfun();
|
||||
setTimeout(mfun, 200);
|
||||
|
||||
return hooks[0]([], [], [], hooks.slice(1));
|
||||
}
|
||||
|
||||
toast.inf(5, `running query for ${yt_ids.size} videos...`);
|
||||
|
@ -34,48 +145,65 @@ function up2k_namefilter(good_files, nil_files, bad_files, hooks) {
|
|||
xhr.setRequestHeader('Content-Type', 'text/plain');
|
||||
xhr.onload = xhr.onerror = function () {
|
||||
if (this.status != 200)
|
||||
return toast.err(0, `sorry, database query failed ;_;\n\nplease let us know so we can look at it, thx!!\n\nerror ${this.status}: ${(this.response && this.response.err) || this.responseText}`);
|
||||
return toast.err(0, `sorry, database query failed; _; \n\nplease let us know so we can look at it, thx!!\n\nerror ${this.status}: ${(this.response && this.response.err) || this.responseText} `);
|
||||
|
||||
var new_lists = [],
|
||||
ptn = new RegExp(this.responseText.trim().split('\n').join('|') || '\n'),
|
||||
nothing_to_do = true,
|
||||
n_skip = 0;
|
||||
process_id_list(this.responseText);
|
||||
};
|
||||
xhr.send(Array.from(yt_ids).join('\n'));
|
||||
|
||||
for (var lst of file_lists) {
|
||||
var keep = [];
|
||||
new_lists.push(keep);
|
||||
setTimeout(function () { process_id_list('Nf-nN1wF5Xo\n'); }, 500);
|
||||
|
||||
for (var ent of lst)
|
||||
if (ptn.exec(ent[1]))
|
||||
keep.push(ent);
|
||||
else
|
||||
n_skip++;
|
||||
function process_id_list(txt) {
|
||||
var wanted_ids = new Set(txt.trim().split('\n')),
|
||||
wanted_names = new Set(), // basenames with a wanted ID
|
||||
wanted_files = new Set(); // filedrops
|
||||
|
||||
if (keep.length)
|
||||
nothing_to_do = false;
|
||||
for (var a = 0; a < good_files.length; a++) {
|
||||
var name = good_files[a][1];
|
||||
for (var b = 0; b < file_ids[a].length; b++)
|
||||
if (wanted_ids.has(file_ids[a][b])) {
|
||||
wanted_files.add(good_files[a]);
|
||||
|
||||
var m = /(.*)\.(mp4|webm|mkv)$/i.exec(name);
|
||||
if (m)
|
||||
wanted_names.add(m[1]);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// add all files with the same basename as each explicitly wanted file
|
||||
// (infojson/chatlog/etc when ID was discovered from metadata)
|
||||
for (var a = 0; a < good_files.length; a++) {
|
||||
var name = good_files[a][1];
|
||||
for (var b = 0; b < 3; b++) {
|
||||
name = name.replace(/\.[^\.]+$/, '');
|
||||
if (wanted_names.has(name)) {
|
||||
wanted_files.add(good_files[a]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function upload_filtered() {
|
||||
if (nothing_to_do)
|
||||
if (!wanted_files.size)
|
||||
return modal.alert('Good news -- turns out we already have all those.\n\nBut thank you for checking in!');
|
||||
|
||||
[good_files, nil_files, bad_files] = new_lists;
|
||||
hooks[0](good_files, nil_files, bad_files, hooks.slice(1));
|
||||
hooks[0](Array.from(wanted_files), nil_files, bad_files, hooks.slice(1));
|
||||
}
|
||||
|
||||
function upload_all() {
|
||||
hooks[0](good_files, nil_files, bad_files, hooks.slice(1));
|
||||
}
|
||||
|
||||
var msg = `you added ${good_files.length} files; ${n_skip} of them were skipped --\neither because we already have them,\nor because there is no youtube-ID in your filename.\n\n<code>OK</code> / <code>Enter</code> = continue uploading the ${new_lists[0].length} files we definitely need\n\n<code>Cancel</code> / <code>ESC</code> = override the filter; upload ALL the files you added`;
|
||||
var n_skip = good_files.length - wanted_files.size,
|
||||
msg = `you added ${good_files.length} files; ${n_skip} of them were skipped --\neither because we already have them,\nor because there is no youtube-ID in your filename.\n\n<code>OK</code> / <code>Enter</code> = continue uploading just the ${wanted_files.size} files we definitely need\n\n<code>Cancel</code> / <code>ESC</code> = override the filter; upload ALL the files you added`;
|
||||
|
||||
if (!n_skip)
|
||||
upload_filtered();
|
||||
else
|
||||
modal.confirm(msg, upload_filtered, upload_all);
|
||||
|
||||
};
|
||||
xhr.send(Array.from(yt_ids).join('\n'));
|
||||
}
|
||||
|
||||
up2k_hooks.push(function () {
|
||||
|
|
|
@ -132,7 +132,7 @@ class Up2k(object):
|
|||
self.mem_cur = None
|
||||
self.sqlite_ver = None
|
||||
self.no_expr_idx = False
|
||||
self.timeout = int(max(self.args.srch_time, 5) * 1.2)
|
||||
self.timeout = int(max(self.args.srch_time, 5) * 1.2) + 1
|
||||
self.spools: set[tempfile.SpooledTemporaryFile[bytes]] = set()
|
||||
if HAVE_SQLITE3:
|
||||
# mojibake detector
|
||||
|
@ -1178,6 +1178,7 @@ class Up2k(object):
|
|||
params: tuple[Any, ...],
|
||||
flt: int,
|
||||
) -> tuple[tempfile.SpooledTemporaryFile[bytes], int]:
|
||||
"""mutex me"""
|
||||
n = 0
|
||||
c2 = cur.connection.cursor()
|
||||
tf = tempfile.SpooledTemporaryFile(1024 * 1024 * 8, "w+b", prefix="cpp-tq-")
|
||||
|
|
Loading…
Reference in a new issue