mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 00:52:16 -06:00
add -e2v (file integrity checker)
This commit is contained in:
parent
3683984c8d
commit
48b957f1d5
|
@ -663,8 +663,11 @@ through arguments:
|
||||||
* `-e2t` enables metadata indexing on upload
|
* `-e2t` enables metadata indexing on upload
|
||||||
* `-e2ts` also scans for tags in all files that don't have tags yet
|
* `-e2ts` also scans for tags in all files that don't have tags yet
|
||||||
* `-e2tsr` also deletes all existing tags, doing a full reindex
|
* `-e2tsr` also deletes all existing tags, doing a full reindex
|
||||||
|
* `-e2v` verfies file integrity at startup, comparing hashes from the db
|
||||||
|
* `-e2vu` patches the database with the new hashes from the filesystem
|
||||||
|
* `-e2vp` panics and kills copyparty instead
|
||||||
|
|
||||||
the same arguments can be set as volume flags, in addition to `d2d`, `d2ds`, `d2t`, `d2ts` for disabling:
|
the same arguments can be set as volume flags, in addition to `d2d`, `d2ds`, `d2t`, `d2ts`, `d2v` for disabling:
|
||||||
* `-v ~/music::r:c,e2dsa,e2tsr` does a full reindex of everything on startup
|
* `-v ~/music::r:c,e2dsa,e2tsr` does a full reindex of everything on startup
|
||||||
* `-v ~/music::r:c,d2d` disables **all** indexing, even if any `-e2*` are on
|
* `-v ~/music::r:c,d2d` disables **all** indexing, even if any `-e2*` are on
|
||||||
* `-v ~/music::r:c,d2t` disables all `-e2t*` (tags), does not affect `-e2d*`
|
* `-v ~/music::r:c,d2t` disables all `-e2t*` (tags), does not affect `-e2d*`
|
||||||
|
|
|
@ -89,4 +89,7 @@ def main():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
main()
|
main()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
|
@ -394,6 +394,7 @@ def run_argparse(argv: list[str], formatter: Any, retry: bool) -> argparse.Names
|
||||||
\033[36md2ts\033[35m disables metadata collection for existing files
|
\033[36md2ts\033[35m disables metadata collection for existing files
|
||||||
\033[36md2ds\033[35m disables onboot indexing, overrides -e2ds*
|
\033[36md2ds\033[35m disables onboot indexing, overrides -e2ds*
|
||||||
\033[36md2t\033[35m disables metadata collection, overrides -e2t*
|
\033[36md2t\033[35m disables metadata collection, overrides -e2t*
|
||||||
|
\033[36md2v\033[35m disables file verification, overrides -e2v*
|
||||||
\033[36md2d\033[35m disables all database stuff, overrides -e2*
|
\033[36md2d\033[35m disables all database stuff, overrides -e2*
|
||||||
\033[36mnohash=\\.iso$\033[35m skips hashing file contents if path matches *.iso
|
\033[36mnohash=\\.iso$\033[35m skips hashing file contents if path matches *.iso
|
||||||
\033[36mnoidx=\\.iso$\033[35m fully ignores the contents at paths matching *.iso
|
\033[36mnoidx=\\.iso$\033[35m fully ignores the contents at paths matching *.iso
|
||||||
|
@ -586,6 +587,9 @@ def run_argparse(argv: list[str], formatter: Any, retry: bool) -> argparse.Names
|
||||||
ap2.add_argument("-e2d", action="store_true", help="enable up2k database, making files searchable + enables upload deduplocation")
|
ap2.add_argument("-e2d", action="store_true", help="enable up2k database, making files searchable + enables upload deduplocation")
|
||||||
ap2.add_argument("-e2ds", action="store_true", help="scan writable folders for new files on startup; sets -e2d")
|
ap2.add_argument("-e2ds", action="store_true", help="scan writable folders for new files on startup; sets -e2d")
|
||||||
ap2.add_argument("-e2dsa", action="store_true", help="scans all folders on startup; sets -e2ds")
|
ap2.add_argument("-e2dsa", action="store_true", help="scans all folders on startup; sets -e2ds")
|
||||||
|
ap2.add_argument("-e2v", action="store_true", help="verify file integrity; rehash all files and compare with db")
|
||||||
|
ap2.add_argument("-e2vu", action="store_true", help="on hash mismatch: update the database with the new hash")
|
||||||
|
ap2.add_argument("-e2vp", action="store_true", help="on hash mismatch: panic and quit copyparty")
|
||||||
ap2.add_argument("--hist", metavar="PATH", type=u, help="where to store volume data (db, thumbs)")
|
ap2.add_argument("--hist", metavar="PATH", type=u, help="where to store volume data (db, thumbs)")
|
||||||
ap2.add_argument("--no-hash", metavar="PTN", type=u, help="regex: disable hashing of matching paths during e2ds folder scans")
|
ap2.add_argument("--no-hash", metavar="PTN", type=u, help="regex: disable hashing of matching paths during e2ds folder scans")
|
||||||
ap2.add_argument("--no-idx", metavar="PTN", type=u, help="regex: disable indexing of matching paths during e2ds folder scans")
|
ap2.add_argument("--no-idx", metavar="PTN", type=u, help="regex: disable indexing of matching paths during e2ds folder scans")
|
||||||
|
|
|
@ -1008,7 +1008,7 @@ class AuthSrv(object):
|
||||||
if ptn:
|
if ptn:
|
||||||
vol.flags[vf] = re.compile(ptn)
|
vol.flags[vf] = re.compile(ptn)
|
||||||
|
|
||||||
for k in ["e2t", "e2ts", "e2tsr"]:
|
for k in ["e2t", "e2ts", "e2tsr", "e2v", "e2vu", "e2vp"]:
|
||||||
if getattr(self.args, k):
|
if getattr(self.args, k):
|
||||||
vol.flags[k] = True
|
vol.flags[k] = True
|
||||||
|
|
||||||
|
@ -1030,7 +1030,7 @@ class AuthSrv(object):
|
||||||
self._read_volflag(vol.flags, "mtp", self.args.mtp, True)
|
self._read_volflag(vol.flags, "mtp", self.args.mtp, True)
|
||||||
|
|
||||||
# d2d drops all database features for a volume
|
# d2d drops all database features for a volume
|
||||||
for grp, rm in [["d2d", "e2d"], ["d2t", "e2t"]]:
|
for grp, rm in [["d2d", "e2d"], ["d2t", "e2t"], ["d2d", "e2v"]]:
|
||||||
if not vol.flags.get(grp, False):
|
if not vol.flags.get(grp, False):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -1052,6 +1052,12 @@ class AuthSrv(object):
|
||||||
|
|
||||||
vol.flags = {k: v for k, v in vol.flags.items() if not k.startswith(rm)}
|
vol.flags = {k: v for k, v in vol.flags.items() if not k.startswith(rm)}
|
||||||
|
|
||||||
|
for grp, rm in [["d2v", "e2v"]]:
|
||||||
|
if not vol.flags.get(grp, False):
|
||||||
|
continue
|
||||||
|
|
||||||
|
vol.flags = {k: v for k, v in vol.flags.items() if not k.startswith(rm)}
|
||||||
|
|
||||||
# verify tags mentioned by -mt[mp] are used by -mte
|
# verify tags mentioned by -mt[mp] are used by -mte
|
||||||
local_mtp = {}
|
local_mtp = {}
|
||||||
local_only_mtp = {}
|
local_only_mtp = {}
|
||||||
|
|
|
@ -9,6 +9,7 @@ import math
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import signal
|
||||||
import stat
|
import stat
|
||||||
import subprocess as sp
|
import subprocess as sp
|
||||||
import threading
|
import threading
|
||||||
|
@ -434,10 +435,36 @@ class Up2k(object):
|
||||||
if vac:
|
if vac:
|
||||||
need_vac[vol] = True
|
need_vac[vol] = True
|
||||||
|
|
||||||
if "e2ts" not in vol.flags:
|
if "e2v" in vol.flags:
|
||||||
t = "online, idle"
|
t = "online (integrity-check pending)"
|
||||||
else:
|
elif "e2ts" in vol.flags:
|
||||||
t = "online (tags pending)"
|
t = "online (tags pending)"
|
||||||
|
else:
|
||||||
|
t = "online, idle"
|
||||||
|
|
||||||
|
self.volstate[vol.vpath] = t
|
||||||
|
|
||||||
|
# file contents verification
|
||||||
|
for vol in vols:
|
||||||
|
if self.stop:
|
||||||
|
break
|
||||||
|
|
||||||
|
if "e2v" not in vol.flags:
|
||||||
|
continue
|
||||||
|
|
||||||
|
t = "online (verifying integrity)"
|
||||||
|
self.volstate[vol.vpath] = t
|
||||||
|
self.log("{} [{}]".format(t, vol.realpath))
|
||||||
|
|
||||||
|
nmod = self._verify_integrity(vol)
|
||||||
|
if nmod:
|
||||||
|
self.log("modified {} entries in the db".format(nmod), 3)
|
||||||
|
need_vac[vol] = True
|
||||||
|
|
||||||
|
if "e2ts" in vol.flags:
|
||||||
|
t = "online (tags pending)"
|
||||||
|
else:
|
||||||
|
t = "online, idle"
|
||||||
|
|
||||||
self.volstate[vol.vpath] = t
|
self.volstate[vol.vpath] = t
|
||||||
|
|
||||||
|
@ -736,7 +763,9 @@ class Up2k(object):
|
||||||
self.log("file: {}".format(abspath))
|
self.log("file: {}".format(abspath))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
hashes = self._hashlist_from_file(abspath)
|
hashes = self._hashlist_from_file(
|
||||||
|
abspath, "a{}, ".format(self.pp.n)
|
||||||
|
)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
self.log("hash: {} @ [{}]".format(repr(ex), abspath))
|
self.log("hash: {} @ [{}]".format(repr(ex), abspath))
|
||||||
continue
|
continue
|
||||||
|
@ -816,6 +845,106 @@ class Up2k(object):
|
||||||
|
|
||||||
return n_rm
|
return n_rm
|
||||||
|
|
||||||
|
def _verify_integrity(self, vol: VFS) -> int:
|
||||||
|
"""expensive; blocks database access until finished"""
|
||||||
|
ptop = vol.realpath
|
||||||
|
assert self.pp and self.mtag
|
||||||
|
|
||||||
|
cur = self.cur[ptop]
|
||||||
|
rei = vol.flags.get("noidx")
|
||||||
|
reh = vol.flags.get("nohash")
|
||||||
|
e2vu = "e2vu" in vol.flags
|
||||||
|
e2vp = "e2vp" in vol.flags
|
||||||
|
|
||||||
|
excl = [
|
||||||
|
d[len(vol.vpath) :].lstrip("/")
|
||||||
|
for d in self.asrv.vfs.all_vols
|
||||||
|
if d != vol.vpath and (d.startswith(vol.vpath + "/") or not vol.vpath)
|
||||||
|
]
|
||||||
|
qexa: list[str] = []
|
||||||
|
pexa: list[str] = []
|
||||||
|
for vpath in excl:
|
||||||
|
qexa.append("up.rd != ? and not up.rd like ?||'%'")
|
||||||
|
pexa.extend([vpath, vpath])
|
||||||
|
|
||||||
|
pex = tuple(pexa)
|
||||||
|
qex = " and ".join(qexa)
|
||||||
|
if qex:
|
||||||
|
qex = " where " + qex
|
||||||
|
|
||||||
|
rewark: list[tuple[str, str, str, int, int]] = []
|
||||||
|
|
||||||
|
with self.mutex:
|
||||||
|
b_left = 0
|
||||||
|
n_left = 0
|
||||||
|
q = "select sz from up" + qex
|
||||||
|
for (sz,) in cur.execute(q, pex):
|
||||||
|
b_left += sz # sum() can overflow according to docs
|
||||||
|
n_left += 1
|
||||||
|
|
||||||
|
q = "select w, mt, sz, rd, fn from up" + qex
|
||||||
|
for w, mt, sz, drd, dfn in cur.execute(q, pex):
|
||||||
|
if self.stop:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
n_left -= 1
|
||||||
|
b_left -= sz
|
||||||
|
if drd.startswith("//") or dfn.startswith("//"):
|
||||||
|
rd, fn = s3dec(drd, dfn)
|
||||||
|
else:
|
||||||
|
rd = drd
|
||||||
|
fn = dfn
|
||||||
|
|
||||||
|
abspath = os.path.join(ptop, rd, fn)
|
||||||
|
if rei and rei.search(abspath):
|
||||||
|
continue
|
||||||
|
|
||||||
|
nohash = reh.search(abspath) if reh else False
|
||||||
|
|
||||||
|
pf = "v{}, {:.0f}+".format(n_left, b_left / 1024 / 1024)
|
||||||
|
self.pp.msg = pf + abspath
|
||||||
|
|
||||||
|
st = bos.stat(abspath)
|
||||||
|
sz2 = st.st_size
|
||||||
|
mt2 = int(st.st_mtime)
|
||||||
|
|
||||||
|
if nohash:
|
||||||
|
w2 = up2k_wark_from_metadata(self.salt, sz2, mt2, rd, fn)
|
||||||
|
else:
|
||||||
|
if sz2 > 1024 * 1024 * 32:
|
||||||
|
self.log("file: {}".format(abspath))
|
||||||
|
|
||||||
|
try:
|
||||||
|
hashes = self._hashlist_from_file(abspath, pf)
|
||||||
|
except Exception as ex:
|
||||||
|
self.log("hash: {} @ [{}]".format(repr(ex), abspath))
|
||||||
|
continue
|
||||||
|
|
||||||
|
w2 = up2k_wark_from_hashlist(self.salt, sz2, hashes)
|
||||||
|
|
||||||
|
if w == w2:
|
||||||
|
continue
|
||||||
|
|
||||||
|
rewark.append((drd, dfn, w2, sz2, mt2))
|
||||||
|
|
||||||
|
t = "hash mismatch: {}\n db: {} ({} byte, {})\n fs: {} ({} byte, {})"
|
||||||
|
t = t.format(abspath, w, sz, mt, w2, sz2, mt2)
|
||||||
|
self.log(t, 1)
|
||||||
|
|
||||||
|
if e2vp and rewark:
|
||||||
|
self.hub.retcode = 1
|
||||||
|
os.kill(os.getpid(), signal.SIGTERM)
|
||||||
|
raise Exception("{} files have incorrect hashes".format(len(rewark)))
|
||||||
|
|
||||||
|
if not e2vu:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
for rd, fn, w, sz, mt in rewark:
|
||||||
|
q = "update up set w = ?, sz = ?, mt = ? where rd = ? and fn = ? limit 1"
|
||||||
|
cur.execute(q, (w, sz, int(mt), rd, fn))
|
||||||
|
|
||||||
|
return len(rewark)
|
||||||
|
|
||||||
def _build_tags_index(self, vol: VFS) -> tuple[int, int, bool]:
|
def _build_tags_index(self, vol: VFS) -> tuple[int, int, bool]:
|
||||||
ptop = vol.realpath
|
ptop = vol.realpath
|
||||||
with self.mutex:
|
with self.mutex:
|
||||||
|
@ -2225,14 +2354,15 @@ class Up2k(object):
|
||||||
|
|
||||||
return wark
|
return wark
|
||||||
|
|
||||||
def _hashlist_from_file(self, path: str) -> list[str]:
|
def _hashlist_from_file(self, path: str, prefix: str = "") -> list[str]:
|
||||||
fsz = bos.path.getsize(path)
|
fsz = bos.path.getsize(path)
|
||||||
csz = up2k_chunksize(fsz)
|
csz = up2k_chunksize(fsz)
|
||||||
ret = []
|
ret = []
|
||||||
with open(fsenc(path), "rb", 512 * 1024) as f:
|
with open(fsenc(path), "rb", 512 * 1024) as f:
|
||||||
while fsz > 0:
|
while fsz > 0:
|
||||||
if self.pp:
|
if self.pp:
|
||||||
self.pp.msg = "{} MB, {}".format(int(fsz / 1024 / 1024), path)
|
mb = int(fsz / 1024 / 1024)
|
||||||
|
self.pp.msg = "{}{} MB, {}".format(prefix, mb, path)
|
||||||
|
|
||||||
hashobj = hashlib.sha512()
|
hashobj = hashlib.sha512()
|
||||||
rem = min(csz, fsz)
|
rem = min(csz, fsz)
|
||||||
|
|
|
@ -137,6 +137,9 @@ IMPLICATIONS = [
|
||||||
["e2tsr", "e2ts"],
|
["e2tsr", "e2ts"],
|
||||||
["e2ts", "e2t"],
|
["e2ts", "e2t"],
|
||||||
["e2t", "e2d"],
|
["e2t", "e2d"],
|
||||||
|
["e2vu", "e2v"],
|
||||||
|
["e2vp", "e2v"],
|
||||||
|
["e2v", "e2d"],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue