run tag scrapers in parallel on new uploads

This commit is contained in:
ed 2021-10-27 00:47:50 +02:00
parent 6b737bf1d7
commit 6dade82d2c
5 changed files with 18 additions and 16 deletions

View file

@ -546,6 +546,8 @@ and there are *two* editors
* you can link a particular timestamp in an audio file by adding it to the URL, such as `&20` / `&20s` / `&1m20` / `&t=1:20` after the `.../#af-c8960dab`
* get a plaintext file listing by adding `?ls=t` to a URL, or a compact colored one with `?ls=v` (for unix terminals)
* if you are using media hotkeys to switch songs and are getting tired of seeing the OSD popup which Windows doesn't let you disable, consider https://ocv.me/dev/?media-osd-bgone.ps1
* click the bottom-left `π` to open a javascript prompt for debugging
@ -716,7 +718,7 @@ that'll run the command `notify-send` with the path to the uploaded file as the
note that it will only trigger on new unique files, not dupes
and it will occupy the parsing threads, so fork anything expensive, or if you want to intentionally queue/singlethread you can combine it with `--no-mtag-mt`
and it will occupy the parsing threads, so fork anything expensive, or if you want to intentionally queue/singlethread you can combine it with `--mtag-mt 1`
if this becomes popular maybe there should be a less janky way to do it actually

View file

@ -208,6 +208,8 @@ def run_argparse(argv, formatter):
except:
fk_salt = "hunter2"
cores = os.cpu_count() if hasattr(os, "cpu_count") else 4
sects = [
[
"accounts",
@ -333,7 +335,7 @@ def run_argparse(argv, formatter):
ap2 = ap.add_argument_group('general options')
ap2.add_argument("-c", metavar="PATH", type=u, action="append", help="add config file")
ap2.add_argument("-nc", metavar="NUM", type=int, default=64, help="max num clients")
ap2.add_argument("-j", metavar="CORES", type=int, default=1, help="max num cpu cores")
ap2.add_argument("-j", metavar="CORES", type=int, default=1, help="max num cpu cores, 0=all")
ap2.add_argument("-a", metavar="ACCT", type=u, action="append", help="add account, USER:PASS; example [ed:wark")
ap2.add_argument("-v", metavar="VOL", type=u, action="append", help="add volume, SRC:DST:FLAG; example [.::r], [/mnt/nas/music:/music:r:aed")
ap2.add_argument("-ed", action="store_true", help="enable ?dots")
@ -402,7 +404,7 @@ def run_argparse(argv, formatter):
ap2.add_argument("--no-thumb", action="store_true", help="disable all thumbnails")
ap2.add_argument("--no-vthumb", action="store_true", help="disable video thumbnails")
ap2.add_argument("--th-size", metavar="WxH", default="320x256", help="thumbnail res")
ap2.add_argument("--th-mt", metavar="CORES", type=int, default=0, help="max num cpu cores to use, 0=all")
ap2.add_argument("--th-mt", metavar="CORES", type=int, default=cores, help="num cpu cores to use for generating thumbnails")
ap2.add_argument("--th-no-crop", action="store_true", help="dynamic height; show full image")
ap2.add_argument("--th-no-jpg", action="store_true", help="disable jpg output")
ap2.add_argument("--th-no-webp", action="store_true", help="disable webp output")
@ -428,8 +430,8 @@ def run_argparse(argv, formatter):
ap2.add_argument("-e2ts", action="store_true", help="enable metadata scanner, sets -e2t")
ap2.add_argument("-e2tsr", action="store_true", help="rescan all metadata, sets -e2ts")
ap2.add_argument("--no-mutagen", action="store_true", help="use FFprobe for tags instead")
ap2.add_argument("--no-mtag-mt", action="store_true", help="disable tag-read parallelism")
ap2.add_argument("--no-mtag-ff", action="store_true", help="never use FFprobe as tag reader")
ap2.add_argument("--mtag-mt", metavar="CORES", type=int, default=cores, help="num cpu cores to use for tag scanning")
ap2.add_argument("-mtm", metavar="M=t,t,t", type=u, action="append", help="add/replace metadata mapping")
ap2.add_argument("-mte", metavar="M,M,M", type=u, help="tags to index/display (comma-sep.)",
default="circle,album,.tn,artist,title,.bpm,key,.dur,.q,.vq,.aq,vc,ac,res,.fps,ahash,vhash")

View file

@ -105,9 +105,7 @@ class ThumbSrv(object):
self.mutex = threading.Lock()
self.busy = {}
self.stopping = False
self.nthr = self.args.th_mt
if not self.nthr:
self.nthr = os.cpu_count() if hasattr(os, "cpu_count") else 4
self.nthr = max(1, self.args.th_mt)
self.q = Queue(self.nthr * 4)
for n in range(self.nthr):
@ -130,7 +128,7 @@ class ThumbSrv(object):
self.log(msg, c=3)
if self.args.th_clean:
t = threading.Thread(target=self.cleaner, name="thumb-cleaner")
t = threading.Thread(target=self.cleaner, name="thumb.cln")
t.daemon = True
t.start()

View file

@ -131,9 +131,11 @@ class Up2k(object):
thr.start()
if self.mtag:
thr = threading.Thread(target=self._tagger, name="up2k-tagger")
thr.daemon = True
thr.start()
for n in range(max(1, self.args.mtag_mt)):
name = "tagger-{}".format(n)
thr = threading.Thread(target=self._tagger, name=name)
thr.daemon = True
thr.start()
thr = threading.Thread(target=self._run_all_mtp, name="up2k-mtp-init")
thr.daemon = True
@ -700,7 +702,7 @@ class Up2k(object):
return n_add, n_rm, False
mpool = False
if self.mtag.prefer_mt and not self.args.no_mtag_mt:
if self.mtag.prefer_mt and self.args.mtag_mt > 1:
mpool = self._start_mpool()
conn = sqlite3.connect(db_path, timeout=15)
@ -933,9 +935,7 @@ class Up2k(object):
def _start_mpool(self):
# mp.pool.ThreadPool and concurrent.futures.ThreadPoolExecutor
# both do crazy runahead so lets reinvent another wheel
nw = os.cpu_count() if hasattr(os, "cpu_count") else 4
if self.args.no_mtag_mt:
nw = 1
nw = max(1, self.args.mtag_mt)
if self.pending_tags is None:
self.log("using {}x {}".format(nw, self.mtag.backend))

View file

@ -445,7 +445,7 @@ def log_thrs(log, ival, name):
tv = [x.name for x in threading.enumerate()]
tv = [
x.split("-")[0]
if x.startswith("httpconn-") or x.startswith("thumb-")
if x.split("-")[0] in ["httpconn", "thumb", "tagger"]
else "listen"
if "-listen-" in x
else x