mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
adding --no-hash
This commit is contained in:
parent
d6bf300d80
commit
1078d933b4
15
README.md
15
README.md
|
@ -296,9 +296,16 @@ the same arguments can be set as volume flags, in addition to `d2d` and `d2t` fo
|
|||
* `-v ~/music::r:cd2d` disables **all** indexing, even if any `-e2*` are on
|
||||
* `-v ~/music::r:cd2t` disables all `-e2t*` (tags), does not affect `-e2d*`
|
||||
|
||||
`e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and cause `e2ts` to reindex those
|
||||
note:
|
||||
* `e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and cause `e2ts` to reindex those
|
||||
* the rescan button in the admin panel has no effect unless the volume has `-e2ds` or higher
|
||||
|
||||
the rescan button in the admin panel has no effect unless the volume has `-e2ds` or higher
|
||||
you can choose to only index filename/path/size/last-modified (and not the hash of the file contents) by setting `--no-hash` or the volume-flag `cnhash`, this has the following consequences:
|
||||
* initial indexing is way faster, especially when the volume is on a networked disk
|
||||
* makes it impossible to [file-search](#file-search)
|
||||
* if someone uploads the same file contents, the upload will not be detected as a dupe, so it will not get symlinked or rejected
|
||||
|
||||
if you set `--no-hash`, you can enable hashing for specific volumes using flag `cehash`
|
||||
|
||||
|
||||
## database location
|
||||
|
@ -308,9 +315,9 @@ copyparty creates a subfolder named `.hist` inside each volume where it stores t
|
|||
this can instead be kept in a single place using the `--hist` argument, or the `hist=` volume flag, or a mix of both:
|
||||
* `--hist ~/.cache/copyparty -v ~/music::r:chist=-` sets `~/.cache/copyparty` as the default place to put volume info, but `~/music` gets the regular `.hist` subfolder (`-` restores default behavior)
|
||||
|
||||
btw,
|
||||
note:
|
||||
* markdown edits are always stored in a local `.hist` subdirectory
|
||||
* on windows the volflag path is cyglike, so `/c/temp` means `C:\temp`
|
||||
* on windows the volflag path is cyglike, so `/c/temp` means `C:\temp` but use regular paths for `--hist`
|
||||
|
||||
|
||||
## metadata from audio files
|
||||
|
|
|
@ -286,6 +286,7 @@ def run_argparse(argv, formatter):
|
|||
ap2.add_argument("-e2ts", action="store_true", help="enable metadata scanner, sets -e2t")
|
||||
ap2.add_argument("-e2tsr", action="store_true", help="rescan all metadata, sets -e2ts")
|
||||
ap2.add_argument("--hist", metavar="PATH", type=str, help="where to store volume state")
|
||||
ap2.add_argument("--no-hash", action="store_true", help="disable hashing during e2ds folder scans")
|
||||
ap2.add_argument("--no-mutagen", action="store_true", help="use ffprobe for tags instead")
|
||||
ap2.add_argument("--no-mtag-mt", action="store_true", help="disable tag-read parallelism")
|
||||
ap2.add_argument("-mtm", metavar="M=t,t,t", action="append", type=str, help="add/replace metadata mapping")
|
||||
|
|
|
@ -31,7 +31,7 @@ class VFS(object):
|
|||
self.all_vols = {vpath: self} # flattened recursive
|
||||
else:
|
||||
self.histpath = None
|
||||
self.all_vols = {}
|
||||
self.all_vols = None
|
||||
|
||||
def __repr__(self):
|
||||
return "VFS({})".format(
|
||||
|
@ -41,9 +41,10 @@ class VFS(object):
|
|||
)
|
||||
)
|
||||
|
||||
def _trk(self, vol):
|
||||
self.all_vols[vol.vpath] = vol
|
||||
return vol
|
||||
def get_all_vols(self, outdict):
|
||||
for v in self.nodes.values():
|
||||
v.get_all_vols(outdict)
|
||||
outdict[v.vpath] = v
|
||||
|
||||
def add(self, src, dst):
|
||||
"""get existing, or add new path to the vfs"""
|
||||
|
@ -55,19 +56,18 @@ class VFS(object):
|
|||
name, dst = dst.split("/", 1)
|
||||
if name in self.nodes:
|
||||
# exists; do not manipulate permissions
|
||||
return self._trk(self.nodes[name].add(src, dst))
|
||||
return self.nodes[name].add(src, dst)
|
||||
|
||||
vn = VFS(
|
||||
"{}/{}".format(self.realpath, name),
|
||||
os.path.join(self.realpath, name),
|
||||
"{}/{}".format(self.vpath, name).lstrip("/"),
|
||||
self.uread,
|
||||
self.uwrite,
|
||||
self.uadm,
|
||||
self.flags,
|
||||
)
|
||||
self._trk(vn)
|
||||
self.nodes[name] = vn
|
||||
return self._trk(vn.add(src, dst))
|
||||
return vn.add(src, dst)
|
||||
|
||||
if dst in self.nodes:
|
||||
# leaf exists; return as-is
|
||||
|
@ -77,7 +77,7 @@ class VFS(object):
|
|||
vp = "{}/{}".format(self.vpath, dst).lstrip("/")
|
||||
vn = VFS(src, vp)
|
||||
self.nodes[dst] = vn
|
||||
return self._trk(vn)
|
||||
return vn
|
||||
|
||||
def _find(self, vpath):
|
||||
"""return [vfs,remainder]"""
|
||||
|
@ -462,6 +462,9 @@ class AuthSrv(object):
|
|||
v.uadm = madm[dst]
|
||||
v.flags = mflags[dst]
|
||||
|
||||
vfs.all_vols = {}
|
||||
vfs.get_all_vols(vfs.all_vols)
|
||||
|
||||
missing_users = {}
|
||||
for d in [mread, mwrite]:
|
||||
for _, ul in d.items():
|
||||
|
@ -526,6 +529,10 @@ class AuthSrv(object):
|
|||
if self.args.e2d or "e2ds" in vol.flags:
|
||||
vol.flags["e2d"] = True
|
||||
|
||||
if self.args.no_hash:
|
||||
if "ehash" not in vol.flags:
|
||||
vol.flags["dhash"] = True
|
||||
|
||||
for k in ["e2t", "e2ts", "e2tsr"]:
|
||||
if getattr(self.args, k):
|
||||
vol.flags[k] = True
|
||||
|
|
|
@ -7,7 +7,7 @@ import time
|
|||
import threading
|
||||
from datetime import datetime
|
||||
|
||||
from .util import s3dec, Pebkac
|
||||
from .util import s3dec, Pebkac, min_ex
|
||||
from .up2k import up2k_wark_from_hashlist
|
||||
|
||||
|
||||
|
@ -54,8 +54,8 @@ class U2idx(object):
|
|||
|
||||
try:
|
||||
return self.run_query(vols, uq, uv)[0]
|
||||
except Exception as ex:
|
||||
raise Pebkac(500, repr(ex))
|
||||
except:
|
||||
raise Pebkac(500, min_ex())
|
||||
|
||||
def get_cur(self, ptop):
|
||||
cur = self.cur.get(ptop)
|
||||
|
@ -245,6 +245,7 @@ class U2idx(object):
|
|||
hit["tags"] = tags
|
||||
|
||||
ret.extend(sret)
|
||||
# print("[{}] {}".format(ptop, sret))
|
||||
|
||||
done_flag.append(True)
|
||||
self.active_id = None
|
||||
|
|
|
@ -359,6 +359,7 @@ class Up2k(object):
|
|||
def _build_file_index(self, vol, all_vols):
|
||||
do_vac = False
|
||||
top = vol.realpath
|
||||
nohash = "dhash" in vol.flags
|
||||
with self.mutex:
|
||||
cur, _ = self.register_vpath(top, vol.flags)
|
||||
|
||||
|
@ -373,7 +374,7 @@ class Up2k(object):
|
|||
if WINDOWS:
|
||||
excl = [x.replace("/", "\\") for x in excl]
|
||||
|
||||
n_add = self._build_dir(dbw, top, set(excl), top)
|
||||
n_add = self._build_dir(dbw, top, set(excl), top, nohash)
|
||||
n_rm = self._drop_lost(dbw[0], top)
|
||||
if dbw[1]:
|
||||
self.log("commit {} new files".format(dbw[1]))
|
||||
|
@ -381,7 +382,7 @@ class Up2k(object):
|
|||
|
||||
return True, n_add or n_rm or do_vac
|
||||
|
||||
def _build_dir(self, dbw, top, excl, cdir):
|
||||
def _build_dir(self, dbw, top, excl, cdir, nohash):
|
||||
self.pp.msg = "a{} {}".format(self.pp.n, cdir)
|
||||
histdir = self.vfs.histtab[top]
|
||||
ret = 0
|
||||
|
@ -389,16 +390,17 @@ class Up2k(object):
|
|||
for iname, inf in sorted(g):
|
||||
abspath = os.path.join(cdir, iname)
|
||||
lmod = int(inf.st_mtime)
|
||||
sz = inf.st_size
|
||||
if stat.S_ISDIR(inf.st_mode):
|
||||
if abspath in excl or abspath == histdir:
|
||||
continue
|
||||
# self.log(" dir: {}".format(abspath))
|
||||
ret += self._build_dir(dbw, top, excl, abspath)
|
||||
ret += self._build_dir(dbw, top, excl, abspath, nohash)
|
||||
else:
|
||||
# self.log("file: {}".format(abspath))
|
||||
rp = abspath[len(top) :].replace("\\", "/").strip("/")
|
||||
rd, fn = rp.rsplit("/", 1) if "/" in rp else ["", rp]
|
||||
sql = "select * from up where rd = ? and fn = ?"
|
||||
sql = "select w, mt, sz from up where rd = ? and fn = ?"
|
||||
try:
|
||||
c = dbw[0].execute(sql, (rd, fn))
|
||||
except:
|
||||
|
@ -407,18 +409,18 @@ class Up2k(object):
|
|||
in_db = list(c.fetchall())
|
||||
if in_db:
|
||||
self.pp.n -= 1
|
||||
_, dts, dsz, _, _ = in_db[0]
|
||||
dw, dts, dsz = in_db[0]
|
||||
if len(in_db) > 1:
|
||||
m = "WARN: multiple entries: [{}] => [{}] |{}|\n{}"
|
||||
rep_db = "\n".join([repr(x) for x in in_db])
|
||||
self.log(m.format(top, rp, len(in_db), rep_db))
|
||||
dts = -1
|
||||
|
||||
if dts == lmod and dsz == inf.st_size:
|
||||
if dts == lmod and dsz == sz and (nohash or dw[0] != "#"):
|
||||
continue
|
||||
|
||||
m = "reindex [{}] => [{}] ({}/{}) ({}/{})".format(
|
||||
top, rp, dts, lmod, dsz, inf.st_size
|
||||
top, rp, dts, lmod, dsz, sz
|
||||
)
|
||||
self.log(m)
|
||||
self.db_rm(dbw[0], rd, fn)
|
||||
|
@ -427,7 +429,11 @@ class Up2k(object):
|
|||
in_db = None
|
||||
|
||||
self.pp.msg = "a{} {}".format(self.pp.n, abspath)
|
||||
if inf.st_size > 1024 * 1024:
|
||||
|
||||
if nohash:
|
||||
wark = up2k_wark_from_metadata(self.salt, sz, lmod, rd, fn)
|
||||
else:
|
||||
if sz > 1024 * 1024:
|
||||
self.log("file: {}".format(abspath))
|
||||
|
||||
try:
|
||||
|
@ -436,8 +442,9 @@ class Up2k(object):
|
|||
self.log("hash: {} @ [{}]".format(repr(ex), abspath))
|
||||
continue
|
||||
|
||||
wark = up2k_wark_from_hashlist(self.salt, inf.st_size, hashes)
|
||||
self.db_add(dbw[0], wark, rd, fn, lmod, inf.st_size)
|
||||
wark = up2k_wark_from_hashlist(self.salt, sz, hashes)
|
||||
|
||||
self.db_add(dbw[0], wark, rd, fn, lmod, sz)
|
||||
dbw[1] += 1
|
||||
ret += 1
|
||||
td = time.time() - dbw[2]
|
||||
|
@ -1466,9 +1473,12 @@ def up2k_wark_from_hashlist(salt, filesize, hashes):
|
|||
ident.extend(hashes)
|
||||
ident = "\n".join(ident)
|
||||
|
||||
hasher = hashlib.sha512()
|
||||
hasher.update(ident.encode("utf-8"))
|
||||
digest = hasher.digest()[:32]
|
||||
wark = hashlib.sha512(ident.encode("utf-8")).digest()
|
||||
wark = base64.urlsafe_b64encode(wark)
|
||||
return wark.decode("ascii")[:43]
|
||||
|
||||
wark = base64.urlsafe_b64encode(digest)
|
||||
return wark.decode("utf-8").rstrip("=")
|
||||
|
||||
def up2k_wark_from_metadata(salt, sz, lastmod, rd, fn):
|
||||
ret = fsenc("{}\n{}\n{}\n{}\n{}".format(salt, lastmod, sz, rd, fn))
|
||||
ret = base64.urlsafe_b64encode(hashlib.sha512(ret).digest())
|
||||
return "#{}".format(ret[:42].decode("ascii"))
|
||||
|
|
|
@ -254,6 +254,17 @@ def trace(*args, **kwargs):
|
|||
nuprint(msg)
|
||||
|
||||
|
||||
def min_ex():
|
||||
et, ev, tb = sys.exc_info()
|
||||
tb = traceback.extract_tb(tb, 2)
|
||||
ex = [
|
||||
"{} @ {} <{}>: {}".format(fp.split(os.sep)[-1], ln, fun, txt)
|
||||
for fp, ln, fun, txt in tb
|
||||
]
|
||||
ex.append("{}: {}".format(et.__name__, ev))
|
||||
return "\n".join(ex)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def ren_open(fname, *args, **kwargs):
|
||||
fdir = kwargs.pop("fdir", None)
|
||||
|
|
|
@ -38,6 +38,7 @@ class Cfg(Namespace):
|
|||
mtp=[],
|
||||
mte="a",
|
||||
hist=None,
|
||||
no_hash=False,
|
||||
**{k: False for k in "e2d e2ds e2dsa e2t e2ts e2tsr".split()}
|
||||
)
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ from copyparty import util
|
|||
class Cfg(Namespace):
|
||||
def __init__(self, a=[], v=[], c=None):
|
||||
ex = {k: False for k in "e2d e2ds e2dsa e2t e2ts e2tsr".split()}
|
||||
ex2 = {"mtp": [], "mte": "a", "hist": None}
|
||||
ex2 = {"mtp": [], "mte": "a", "hist": None, "no_hash": False}
|
||||
ex.update(ex2)
|
||||
super(Cfg, self).__init__(a=a, v=v, c=c, **ex)
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ def get_ramdisk():
|
|||
|
||||
if os.path.exists("/Volumes"):
|
||||
# hdiutil eject /Volumes/cptd/
|
||||
devname, _ = chkcmd("hdiutil", "attach", "-nomount", "ram://65536")
|
||||
devname, _ = chkcmd("hdiutil", "attach", "-nomount", "ram://131072")
|
||||
devname = devname.strip()
|
||||
print("devname: [{}]".format(devname))
|
||||
for _ in range(10):
|
||||
|
|
Loading…
Reference in a new issue