mirror of
https://github.com/9001/copyparty.git
synced 2025-08-16 16:42:13 -06:00
add -e2v (file integrity checker)
This commit is contained in:
parent
3683984c8d
commit
48b957f1d5
|
@ -663,8 +663,11 @@ through arguments:
|
|||
* `-e2t` enables metadata indexing on upload
|
||||
* `-e2ts` also scans for tags in all files that don't have tags yet
|
||||
* `-e2tsr` also deletes all existing tags, doing a full reindex
|
||||
* `-e2v` verfies file integrity at startup, comparing hashes from the db
|
||||
* `-e2vu` patches the database with the new hashes from the filesystem
|
||||
* `-e2vp` panics and kills copyparty instead
|
||||
|
||||
the same arguments can be set as volume flags, in addition to `d2d`, `d2ds`, `d2t`, `d2ts` for disabling:
|
||||
the same arguments can be set as volume flags, in addition to `d2d`, `d2ds`, `d2t`, `d2ts`, `d2v` for disabling:
|
||||
* `-v ~/music::r:c,e2dsa,e2tsr` does a full reindex of everything on startup
|
||||
* `-v ~/music::r:c,d2d` disables **all** indexing, even if any `-e2*` are on
|
||||
* `-v ~/music::r:c,d2t` disables all `-e2t*` (tags), does not affect `-e2d*`
|
||||
|
|
|
@ -89,4 +89,7 @@ def main():
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
try:
|
||||
main()
|
||||
except:
|
||||
pass
|
||||
|
|
|
@ -394,6 +394,7 @@ def run_argparse(argv: list[str], formatter: Any, retry: bool) -> argparse.Names
|
|||
\033[36md2ts\033[35m disables metadata collection for existing files
|
||||
\033[36md2ds\033[35m disables onboot indexing, overrides -e2ds*
|
||||
\033[36md2t\033[35m disables metadata collection, overrides -e2t*
|
||||
\033[36md2v\033[35m disables file verification, overrides -e2v*
|
||||
\033[36md2d\033[35m disables all database stuff, overrides -e2*
|
||||
\033[36mnohash=\\.iso$\033[35m skips hashing file contents if path matches *.iso
|
||||
\033[36mnoidx=\\.iso$\033[35m fully ignores the contents at paths matching *.iso
|
||||
|
@ -586,6 +587,9 @@ def run_argparse(argv: list[str], formatter: Any, retry: bool) -> argparse.Names
|
|||
ap2.add_argument("-e2d", action="store_true", help="enable up2k database, making files searchable + enables upload deduplocation")
|
||||
ap2.add_argument("-e2ds", action="store_true", help="scan writable folders for new files on startup; sets -e2d")
|
||||
ap2.add_argument("-e2dsa", action="store_true", help="scans all folders on startup; sets -e2ds")
|
||||
ap2.add_argument("-e2v", action="store_true", help="verify file integrity; rehash all files and compare with db")
|
||||
ap2.add_argument("-e2vu", action="store_true", help="on hash mismatch: update the database with the new hash")
|
||||
ap2.add_argument("-e2vp", action="store_true", help="on hash mismatch: panic and quit copyparty")
|
||||
ap2.add_argument("--hist", metavar="PATH", type=u, help="where to store volume data (db, thumbs)")
|
||||
ap2.add_argument("--no-hash", metavar="PTN", type=u, help="regex: disable hashing of matching paths during e2ds folder scans")
|
||||
ap2.add_argument("--no-idx", metavar="PTN", type=u, help="regex: disable indexing of matching paths during e2ds folder scans")
|
||||
|
|
|
@ -1008,7 +1008,7 @@ class AuthSrv(object):
|
|||
if ptn:
|
||||
vol.flags[vf] = re.compile(ptn)
|
||||
|
||||
for k in ["e2t", "e2ts", "e2tsr"]:
|
||||
for k in ["e2t", "e2ts", "e2tsr", "e2v", "e2vu", "e2vp"]:
|
||||
if getattr(self.args, k):
|
||||
vol.flags[k] = True
|
||||
|
||||
|
@ -1030,7 +1030,7 @@ class AuthSrv(object):
|
|||
self._read_volflag(vol.flags, "mtp", self.args.mtp, True)
|
||||
|
||||
# d2d drops all database features for a volume
|
||||
for grp, rm in [["d2d", "e2d"], ["d2t", "e2t"]]:
|
||||
for grp, rm in [["d2d", "e2d"], ["d2t", "e2t"], ["d2d", "e2v"]]:
|
||||
if not vol.flags.get(grp, False):
|
||||
continue
|
||||
|
||||
|
@ -1052,6 +1052,12 @@ class AuthSrv(object):
|
|||
|
||||
vol.flags = {k: v for k, v in vol.flags.items() if not k.startswith(rm)}
|
||||
|
||||
for grp, rm in [["d2v", "e2v"]]:
|
||||
if not vol.flags.get(grp, False):
|
||||
continue
|
||||
|
||||
vol.flags = {k: v for k, v in vol.flags.items() if not k.startswith(rm)}
|
||||
|
||||
# verify tags mentioned by -mt[mp] are used by -mte
|
||||
local_mtp = {}
|
||||
local_only_mtp = {}
|
||||
|
|
|
@ -9,6 +9,7 @@ import math
|
|||
import os
|
||||
import re
|
||||
import shutil
|
||||
import signal
|
||||
import stat
|
||||
import subprocess as sp
|
||||
import threading
|
||||
|
@ -434,10 +435,36 @@ class Up2k(object):
|
|||
if vac:
|
||||
need_vac[vol] = True
|
||||
|
||||
if "e2ts" not in vol.flags:
|
||||
t = "online, idle"
|
||||
else:
|
||||
if "e2v" in vol.flags:
|
||||
t = "online (integrity-check pending)"
|
||||
elif "e2ts" in vol.flags:
|
||||
t = "online (tags pending)"
|
||||
else:
|
||||
t = "online, idle"
|
||||
|
||||
self.volstate[vol.vpath] = t
|
||||
|
||||
# file contents verification
|
||||
for vol in vols:
|
||||
if self.stop:
|
||||
break
|
||||
|
||||
if "e2v" not in vol.flags:
|
||||
continue
|
||||
|
||||
t = "online (verifying integrity)"
|
||||
self.volstate[vol.vpath] = t
|
||||
self.log("{} [{}]".format(t, vol.realpath))
|
||||
|
||||
nmod = self._verify_integrity(vol)
|
||||
if nmod:
|
||||
self.log("modified {} entries in the db".format(nmod), 3)
|
||||
need_vac[vol] = True
|
||||
|
||||
if "e2ts" in vol.flags:
|
||||
t = "online (tags pending)"
|
||||
else:
|
||||
t = "online, idle"
|
||||
|
||||
self.volstate[vol.vpath] = t
|
||||
|
||||
|
@ -736,7 +763,9 @@ class Up2k(object):
|
|||
self.log("file: {}".format(abspath))
|
||||
|
||||
try:
|
||||
hashes = self._hashlist_from_file(abspath)
|
||||
hashes = self._hashlist_from_file(
|
||||
abspath, "a{}, ".format(self.pp.n)
|
||||
)
|
||||
except Exception as ex:
|
||||
self.log("hash: {} @ [{}]".format(repr(ex), abspath))
|
||||
continue
|
||||
|
@ -816,6 +845,106 @@ class Up2k(object):
|
|||
|
||||
return n_rm
|
||||
|
||||
def _verify_integrity(self, vol: VFS) -> int:
|
||||
"""expensive; blocks database access until finished"""
|
||||
ptop = vol.realpath
|
||||
assert self.pp and self.mtag
|
||||
|
||||
cur = self.cur[ptop]
|
||||
rei = vol.flags.get("noidx")
|
||||
reh = vol.flags.get("nohash")
|
||||
e2vu = "e2vu" in vol.flags
|
||||
e2vp = "e2vp" in vol.flags
|
||||
|
||||
excl = [
|
||||
d[len(vol.vpath) :].lstrip("/")
|
||||
for d in self.asrv.vfs.all_vols
|
||||
if d != vol.vpath and (d.startswith(vol.vpath + "/") or not vol.vpath)
|
||||
]
|
||||
qexa: list[str] = []
|
||||
pexa: list[str] = []
|
||||
for vpath in excl:
|
||||
qexa.append("up.rd != ? and not up.rd like ?||'%'")
|
||||
pexa.extend([vpath, vpath])
|
||||
|
||||
pex = tuple(pexa)
|
||||
qex = " and ".join(qexa)
|
||||
if qex:
|
||||
qex = " where " + qex
|
||||
|
||||
rewark: list[tuple[str, str, str, int, int]] = []
|
||||
|
||||
with self.mutex:
|
||||
b_left = 0
|
||||
n_left = 0
|
||||
q = "select sz from up" + qex
|
||||
for (sz,) in cur.execute(q, pex):
|
||||
b_left += sz # sum() can overflow according to docs
|
||||
n_left += 1
|
||||
|
||||
q = "select w, mt, sz, rd, fn from up" + qex
|
||||
for w, mt, sz, drd, dfn in cur.execute(q, pex):
|
||||
if self.stop:
|
||||
return -1
|
||||
|
||||
n_left -= 1
|
||||
b_left -= sz
|
||||
if drd.startswith("//") or dfn.startswith("//"):
|
||||
rd, fn = s3dec(drd, dfn)
|
||||
else:
|
||||
rd = drd
|
||||
fn = dfn
|
||||
|
||||
abspath = os.path.join(ptop, rd, fn)
|
||||
if rei and rei.search(abspath):
|
||||
continue
|
||||
|
||||
nohash = reh.search(abspath) if reh else False
|
||||
|
||||
pf = "v{}, {:.0f}+".format(n_left, b_left / 1024 / 1024)
|
||||
self.pp.msg = pf + abspath
|
||||
|
||||
st = bos.stat(abspath)
|
||||
sz2 = st.st_size
|
||||
mt2 = int(st.st_mtime)
|
||||
|
||||
if nohash:
|
||||
w2 = up2k_wark_from_metadata(self.salt, sz2, mt2, rd, fn)
|
||||
else:
|
||||
if sz2 > 1024 * 1024 * 32:
|
||||
self.log("file: {}".format(abspath))
|
||||
|
||||
try:
|
||||
hashes = self._hashlist_from_file(abspath, pf)
|
||||
except Exception as ex:
|
||||
self.log("hash: {} @ [{}]".format(repr(ex), abspath))
|
||||
continue
|
||||
|
||||
w2 = up2k_wark_from_hashlist(self.salt, sz2, hashes)
|
||||
|
||||
if w == w2:
|
||||
continue
|
||||
|
||||
rewark.append((drd, dfn, w2, sz2, mt2))
|
||||
|
||||
t = "hash mismatch: {}\n db: {} ({} byte, {})\n fs: {} ({} byte, {})"
|
||||
t = t.format(abspath, w, sz, mt, w2, sz2, mt2)
|
||||
self.log(t, 1)
|
||||
|
||||
if e2vp and rewark:
|
||||
self.hub.retcode = 1
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
raise Exception("{} files have incorrect hashes".format(len(rewark)))
|
||||
|
||||
if not e2vu:
|
||||
return 0
|
||||
|
||||
for rd, fn, w, sz, mt in rewark:
|
||||
q = "update up set w = ?, sz = ?, mt = ? where rd = ? and fn = ? limit 1"
|
||||
cur.execute(q, (w, sz, int(mt), rd, fn))
|
||||
|
||||
return len(rewark)
|
||||
|
||||
def _build_tags_index(self, vol: VFS) -> tuple[int, int, bool]:
|
||||
ptop = vol.realpath
|
||||
with self.mutex:
|
||||
|
@ -2225,14 +2354,15 @@ class Up2k(object):
|
|||
|
||||
return wark
|
||||
|
||||
def _hashlist_from_file(self, path: str) -> list[str]:
|
||||
def _hashlist_from_file(self, path: str, prefix: str = "") -> list[str]:
|
||||
fsz = bos.path.getsize(path)
|
||||
csz = up2k_chunksize(fsz)
|
||||
ret = []
|
||||
with open(fsenc(path), "rb", 512 * 1024) as f:
|
||||
while fsz > 0:
|
||||
if self.pp:
|
||||
self.pp.msg = "{} MB, {}".format(int(fsz / 1024 / 1024), path)
|
||||
mb = int(fsz / 1024 / 1024)
|
||||
self.pp.msg = "{}{} MB, {}".format(prefix, mb, path)
|
||||
|
||||
hashobj = hashlib.sha512()
|
||||
rem = min(csz, fsz)
|
||||
|
|
|
@ -137,6 +137,9 @@ IMPLICATIONS = [
|
|||
["e2tsr", "e2ts"],
|
||||
["e2ts", "e2t"],
|
||||
["e2t", "e2d"],
|
||||
["e2vu", "e2v"],
|
||||
["e2vp", "e2v"],
|
||||
["e2v", "e2d"],
|
||||
]
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue