add dbtool

This commit is contained in:
ed 2021-05-30 16:49:08 +00:00
parent 292c18b3d0
commit 2528729971
2 changed files with 213 additions and 0 deletions

View file

@ -45,3 +45,18 @@ you could replace winfsp with [dokan](https://github.com/dokan-dev/dokany/releas
# [`mtag/`](mtag/)
* standalone programs which perform misc. file analysis
* copyparty can Popen programs like these during file indexing to collect additional metadata
# [`dbtool.py`](dbtool.py)
upgrade utility which can show db info and help transfer data between databases, for example when a new version of copyparty recommends to wipe the DB and reindex because it now collects additional metadata during analysis, but you have some really expensive `-mtp` parsers and want to copy over the tags from the old db
for that example (upgrading to v0.11.0), first move the old db aside, launch copyparty, let it rebuild the db until the point where it starts running mtp (colored messages as it adds the mtp tags), then CTRL-C and patch in the old mtp tags from the old db instead
so assuming you have `-mtp` parsers to provide the tags `key` and `.bpm`:
```
~/bin/dbtool.py -ls up2k.db
~/bin/dbtool.py -src up2k.db.v0.10.22 up2k.db -cmp
~/bin/dbtool.py -src up2k.db.v0.10.22 up2k.db -rm-mtp-flag -copy key
~/bin/dbtool.py -src up2k.db.v0.10.22 up2k.db -rm-mtp-flag -copy .bpm -vac
```

198
bin/dbtool.py Executable file
View file

@ -0,0 +1,198 @@
#!/usr/bin/env python3
import os
import sys
import sqlite3
import argparse
DB_VER = 3
def die(msg):
print("\033[31m\n" + msg + "\n\033[0m")
sys.exit(1)
def read_ver(db):
for tab in ["ki", "kv"]:
try:
c = db.execute(r"select v from {} where k = 'sver'".format(tab))
except:
continue
rows = c.fetchall()
if rows:
return int(rows[0][0])
return "corrupt"
def ls(db):
nfiles = next(db.execute("select count(w) from up"))[0]
ntags = next(db.execute("select count(w) from mt"))[0]
print(f"{nfiles} files")
print(f"{ntags} tags\n")
print("number of occurences for each tag,")
print(" 'x' = file has no tags")
print(" 't:mtp' = the mtp flag (file not mtp processed yet)")
print()
for k, nk in db.execute("select k, count(k) from mt group by k order by k"):
print(f"{nk:9} {k}")
def compare(n1, d1, n2, d2, verbose):
nt = next(d1.execute("select count(w) from up"))[0]
n = 0
miss = 0
for w, rd, fn in d1.execute("select w, rd, fn from up"):
n += 1
if n % 25_000 == 0:
m = f"\033[36mchecked {n:,} of {nt:,} files in {n1} against {n2}\033[0m"
print(m)
q = "select w from up where substr(w,1,16) = ?"
hit = d2.execute(q, (w[:16],)).fetchone()
if not hit:
miss += 1
if verbose:
print(f"file in {n1} missing in {n2}: [{w}] {rd}/{fn}")
print(f" {miss} files in {n1} missing in {n2}\n")
nt = next(d1.execute("select count(w) from mt"))[0]
n = 0
miss = {}
nmiss = 0
for w, k, v in d1.execute("select * from mt"):
n += 1
if n % 100_000 == 0:
m = f"\033[36mchecked {n:,} of {nt:,} tags in {n1} against {n2}, so far {nmiss} missing tags\033[0m"
print(m)
v2 = d2.execute("select v from mt where w = ? and +k = ?", (w, k)).fetchone()
if v2:
v2 = v2[0]
# if v != v2 and v2 and k in [".bpm", "key"] and n2 == "src":
# print(f"{w} [{rd}/{fn}] {k} = [{v}] / [{v2}]")
if v2 is not None:
if k.startswith("."):
try:
diff = abs(float(v) - float(v2))
if diff > float(v) / 0.9:
v2 = None
else:
v2 = v
except:
pass
if v != v2:
v2 = None
if v2 is None:
nmiss += 1
try:
miss[k] += 1
except:
miss[k] = 1
if verbose:
q = "select rd, fn from up where substr(w,1,16) = ?"
rd, fn = d1.execute(q, (w,)).fetchone()
print(f"missing in {n2}: [{w}] [{rd}/{fn}] {k} = {v}")
for k, v in sorted(miss.items()):
if v:
print(f"{n1} has {v:6} more {k:<6} tags than {n2}")
print(f"in total, {nmiss} missing tags in {n2}\n")
def copy_mtp(d1, d2, tag, rm):
nt = next(d1.execute("select count(w) from mt where k = ?", (tag,)))[0]
n = 0
ndone = 0
for w, k, v in d1.execute("select * from mt where k = ?", (tag,)):
n += 1
if n % 25_000 == 0:
m = f"\033[36m{n:,} of {nt:,} tags checked, so far {ndone} copied\033[0m"
print(m)
hit = d2.execute("select v from mt where w = ? and +k = ?", (w, k)).fetchone()
if hit:
hit = hit[0]
if hit != v:
ndone += 1
if hit is not None:
d2.execute("delete from mt where w = ? and +k = ?", (w, k))
d2.execute("insert into mt values (?,?,?)", (w, k, v))
if rm:
d2.execute("delete from mt where w = ? and +k = 't:mtp'", (w,))
d2.commit()
print(f"copied {ndone} {tag} tags over")
def main():
os.system("")
print()
ap = argparse.ArgumentParser()
ap.add_argument("db", help="database to work on")
ap.add_argument("-src", metavar="DB", type=str, help="database to copy from")
ap2 = ap.add_argument_group("informational / read-only stuff")
ap2.add_argument("-v", action="store_true", help="verbose")
ap2.add_argument("-ls", action="store_true", help="list summary for db")
ap2.add_argument("-cmp", action="store_true", help="compare databases")
ap2 = ap.add_argument_group("options which modify target db")
ap2.add_argument("-copy", metavar="TAG", type=str, help="mtp tag to copy over")
ap2.add_argument(
"-rm-mtp-flag",
action="store_true",
help="when an mtp tag is copied over, also mark that as done, so copyparty won't run mtp on it",
)
ap2.add_argument("-vac", action="store_true", help="optimize DB")
ar = ap.parse_args()
for v in [ar.db, ar.src]:
if v and not os.path.exists(v):
die("database must exist")
db = sqlite3.connect(ar.db)
ds = sqlite3.connect(ar.src) if ar.src else None
for d, n in [[ds, "src"], [db, "dst"]]:
if not d:
continue
ver = read_ver(d)
if ver == "corrupt":
die("{} database appears to be corrupt, sorry")
if ver != DB_VER:
m = f"{n} db is version {ver}, this tool only supports version {DB_VER}, please upgrade it with copyparty first"
die(m)
if ar.ls:
ls(db)
if ar.cmp:
if not ds:
die("need src db to compare against")
compare("src", ds, "dst", db, ar.v)
compare("dst", db, "src", ds, ar.v)
if ar.copy:
copy_mtp(ds, db, ar.copy, ar.rm_mtp_flag)
if __name__ == "__main__":
main()