mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
adding --no-hash
This commit is contained in:
parent
d6bf300d80
commit
1078d933b4
15
README.md
15
README.md
|
@ -296,9 +296,16 @@ the same arguments can be set as volume flags, in addition to `d2d` and `d2t` fo
|
||||||
* `-v ~/music::r:cd2d` disables **all** indexing, even if any `-e2*` are on
|
* `-v ~/music::r:cd2d` disables **all** indexing, even if any `-e2*` are on
|
||||||
* `-v ~/music::r:cd2t` disables all `-e2t*` (tags), does not affect `-e2d*`
|
* `-v ~/music::r:cd2t` disables all `-e2t*` (tags), does not affect `-e2d*`
|
||||||
|
|
||||||
`e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and cause `e2ts` to reindex those
|
note:
|
||||||
|
* `e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and cause `e2ts` to reindex those
|
||||||
|
* the rescan button in the admin panel has no effect unless the volume has `-e2ds` or higher
|
||||||
|
|
||||||
the rescan button in the admin panel has no effect unless the volume has `-e2ds` or higher
|
you can choose to only index filename/path/size/last-modified (and not the hash of the file contents) by setting `--no-hash` or the volume-flag `cnhash`, this has the following consequences:
|
||||||
|
* initial indexing is way faster, especially when the volume is on a networked disk
|
||||||
|
* makes it impossible to [file-search](#file-search)
|
||||||
|
* if someone uploads the same file contents, the upload will not be detected as a dupe, so it will not get symlinked or rejected
|
||||||
|
|
||||||
|
if you set `--no-hash`, you can enable hashing for specific volumes using flag `cehash`
|
||||||
|
|
||||||
|
|
||||||
## database location
|
## database location
|
||||||
|
@ -308,9 +315,9 @@ copyparty creates a subfolder named `.hist` inside each volume where it stores t
|
||||||
this can instead be kept in a single place using the `--hist` argument, or the `hist=` volume flag, or a mix of both:
|
this can instead be kept in a single place using the `--hist` argument, or the `hist=` volume flag, or a mix of both:
|
||||||
* `--hist ~/.cache/copyparty -v ~/music::r:chist=-` sets `~/.cache/copyparty` as the default place to put volume info, but `~/music` gets the regular `.hist` subfolder (`-` restores default behavior)
|
* `--hist ~/.cache/copyparty -v ~/music::r:chist=-` sets `~/.cache/copyparty` as the default place to put volume info, but `~/music` gets the regular `.hist` subfolder (`-` restores default behavior)
|
||||||
|
|
||||||
btw,
|
note:
|
||||||
* markdown edits are always stored in a local `.hist` subdirectory
|
* markdown edits are always stored in a local `.hist` subdirectory
|
||||||
* on windows the volflag path is cyglike, so `/c/temp` means `C:\temp`
|
* on windows the volflag path is cyglike, so `/c/temp` means `C:\temp` but use regular paths for `--hist`
|
||||||
|
|
||||||
|
|
||||||
## metadata from audio files
|
## metadata from audio files
|
||||||
|
|
|
@ -286,6 +286,7 @@ def run_argparse(argv, formatter):
|
||||||
ap2.add_argument("-e2ts", action="store_true", help="enable metadata scanner, sets -e2t")
|
ap2.add_argument("-e2ts", action="store_true", help="enable metadata scanner, sets -e2t")
|
||||||
ap2.add_argument("-e2tsr", action="store_true", help="rescan all metadata, sets -e2ts")
|
ap2.add_argument("-e2tsr", action="store_true", help="rescan all metadata, sets -e2ts")
|
||||||
ap2.add_argument("--hist", metavar="PATH", type=str, help="where to store volume state")
|
ap2.add_argument("--hist", metavar="PATH", type=str, help="where to store volume state")
|
||||||
|
ap2.add_argument("--no-hash", action="store_true", help="disable hashing during e2ds folder scans")
|
||||||
ap2.add_argument("--no-mutagen", action="store_true", help="use ffprobe for tags instead")
|
ap2.add_argument("--no-mutagen", action="store_true", help="use ffprobe for tags instead")
|
||||||
ap2.add_argument("--no-mtag-mt", action="store_true", help="disable tag-read parallelism")
|
ap2.add_argument("--no-mtag-mt", action="store_true", help="disable tag-read parallelism")
|
||||||
ap2.add_argument("-mtm", metavar="M=t,t,t", action="append", type=str, help="add/replace metadata mapping")
|
ap2.add_argument("-mtm", metavar="M=t,t,t", action="append", type=str, help="add/replace metadata mapping")
|
||||||
|
|
|
@ -31,7 +31,7 @@ class VFS(object):
|
||||||
self.all_vols = {vpath: self} # flattened recursive
|
self.all_vols = {vpath: self} # flattened recursive
|
||||||
else:
|
else:
|
||||||
self.histpath = None
|
self.histpath = None
|
||||||
self.all_vols = {}
|
self.all_vols = None
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "VFS({})".format(
|
return "VFS({})".format(
|
||||||
|
@ -41,9 +41,10 @@ class VFS(object):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def _trk(self, vol):
|
def get_all_vols(self, outdict):
|
||||||
self.all_vols[vol.vpath] = vol
|
for v in self.nodes.values():
|
||||||
return vol
|
v.get_all_vols(outdict)
|
||||||
|
outdict[v.vpath] = v
|
||||||
|
|
||||||
def add(self, src, dst):
|
def add(self, src, dst):
|
||||||
"""get existing, or add new path to the vfs"""
|
"""get existing, or add new path to the vfs"""
|
||||||
|
@ -55,19 +56,18 @@ class VFS(object):
|
||||||
name, dst = dst.split("/", 1)
|
name, dst = dst.split("/", 1)
|
||||||
if name in self.nodes:
|
if name in self.nodes:
|
||||||
# exists; do not manipulate permissions
|
# exists; do not manipulate permissions
|
||||||
return self._trk(self.nodes[name].add(src, dst))
|
return self.nodes[name].add(src, dst)
|
||||||
|
|
||||||
vn = VFS(
|
vn = VFS(
|
||||||
"{}/{}".format(self.realpath, name),
|
os.path.join(self.realpath, name),
|
||||||
"{}/{}".format(self.vpath, name).lstrip("/"),
|
"{}/{}".format(self.vpath, name).lstrip("/"),
|
||||||
self.uread,
|
self.uread,
|
||||||
self.uwrite,
|
self.uwrite,
|
||||||
self.uadm,
|
self.uadm,
|
||||||
self.flags,
|
self.flags,
|
||||||
)
|
)
|
||||||
self._trk(vn)
|
|
||||||
self.nodes[name] = vn
|
self.nodes[name] = vn
|
||||||
return self._trk(vn.add(src, dst))
|
return vn.add(src, dst)
|
||||||
|
|
||||||
if dst in self.nodes:
|
if dst in self.nodes:
|
||||||
# leaf exists; return as-is
|
# leaf exists; return as-is
|
||||||
|
@ -77,7 +77,7 @@ class VFS(object):
|
||||||
vp = "{}/{}".format(self.vpath, dst).lstrip("/")
|
vp = "{}/{}".format(self.vpath, dst).lstrip("/")
|
||||||
vn = VFS(src, vp)
|
vn = VFS(src, vp)
|
||||||
self.nodes[dst] = vn
|
self.nodes[dst] = vn
|
||||||
return self._trk(vn)
|
return vn
|
||||||
|
|
||||||
def _find(self, vpath):
|
def _find(self, vpath):
|
||||||
"""return [vfs,remainder]"""
|
"""return [vfs,remainder]"""
|
||||||
|
@ -462,6 +462,9 @@ class AuthSrv(object):
|
||||||
v.uadm = madm[dst]
|
v.uadm = madm[dst]
|
||||||
v.flags = mflags[dst]
|
v.flags = mflags[dst]
|
||||||
|
|
||||||
|
vfs.all_vols = {}
|
||||||
|
vfs.get_all_vols(vfs.all_vols)
|
||||||
|
|
||||||
missing_users = {}
|
missing_users = {}
|
||||||
for d in [mread, mwrite]:
|
for d in [mread, mwrite]:
|
||||||
for _, ul in d.items():
|
for _, ul in d.items():
|
||||||
|
@ -526,6 +529,10 @@ class AuthSrv(object):
|
||||||
if self.args.e2d or "e2ds" in vol.flags:
|
if self.args.e2d or "e2ds" in vol.flags:
|
||||||
vol.flags["e2d"] = True
|
vol.flags["e2d"] = True
|
||||||
|
|
||||||
|
if self.args.no_hash:
|
||||||
|
if "ehash" not in vol.flags:
|
||||||
|
vol.flags["dhash"] = True
|
||||||
|
|
||||||
for k in ["e2t", "e2ts", "e2tsr"]:
|
for k in ["e2t", "e2ts", "e2tsr"]:
|
||||||
if getattr(self.args, k):
|
if getattr(self.args, k):
|
||||||
vol.flags[k] = True
|
vol.flags[k] = True
|
||||||
|
|
|
@ -7,7 +7,7 @@ import time
|
||||||
import threading
|
import threading
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from .util import s3dec, Pebkac
|
from .util import s3dec, Pebkac, min_ex
|
||||||
from .up2k import up2k_wark_from_hashlist
|
from .up2k import up2k_wark_from_hashlist
|
||||||
|
|
||||||
|
|
||||||
|
@ -54,8 +54,8 @@ class U2idx(object):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return self.run_query(vols, uq, uv)[0]
|
return self.run_query(vols, uq, uv)[0]
|
||||||
except Exception as ex:
|
except:
|
||||||
raise Pebkac(500, repr(ex))
|
raise Pebkac(500, min_ex())
|
||||||
|
|
||||||
def get_cur(self, ptop):
|
def get_cur(self, ptop):
|
||||||
cur = self.cur.get(ptop)
|
cur = self.cur.get(ptop)
|
||||||
|
@ -245,6 +245,7 @@ class U2idx(object):
|
||||||
hit["tags"] = tags
|
hit["tags"] = tags
|
||||||
|
|
||||||
ret.extend(sret)
|
ret.extend(sret)
|
||||||
|
# print("[{}] {}".format(ptop, sret))
|
||||||
|
|
||||||
done_flag.append(True)
|
done_flag.append(True)
|
||||||
self.active_id = None
|
self.active_id = None
|
||||||
|
|
|
@ -359,6 +359,7 @@ class Up2k(object):
|
||||||
def _build_file_index(self, vol, all_vols):
|
def _build_file_index(self, vol, all_vols):
|
||||||
do_vac = False
|
do_vac = False
|
||||||
top = vol.realpath
|
top = vol.realpath
|
||||||
|
nohash = "dhash" in vol.flags
|
||||||
with self.mutex:
|
with self.mutex:
|
||||||
cur, _ = self.register_vpath(top, vol.flags)
|
cur, _ = self.register_vpath(top, vol.flags)
|
||||||
|
|
||||||
|
@ -373,7 +374,7 @@ class Up2k(object):
|
||||||
if WINDOWS:
|
if WINDOWS:
|
||||||
excl = [x.replace("/", "\\") for x in excl]
|
excl = [x.replace("/", "\\") for x in excl]
|
||||||
|
|
||||||
n_add = self._build_dir(dbw, top, set(excl), top)
|
n_add = self._build_dir(dbw, top, set(excl), top, nohash)
|
||||||
n_rm = self._drop_lost(dbw[0], top)
|
n_rm = self._drop_lost(dbw[0], top)
|
||||||
if dbw[1]:
|
if dbw[1]:
|
||||||
self.log("commit {} new files".format(dbw[1]))
|
self.log("commit {} new files".format(dbw[1]))
|
||||||
|
@ -381,7 +382,7 @@ class Up2k(object):
|
||||||
|
|
||||||
return True, n_add or n_rm or do_vac
|
return True, n_add or n_rm or do_vac
|
||||||
|
|
||||||
def _build_dir(self, dbw, top, excl, cdir):
|
def _build_dir(self, dbw, top, excl, cdir, nohash):
|
||||||
self.pp.msg = "a{} {}".format(self.pp.n, cdir)
|
self.pp.msg = "a{} {}".format(self.pp.n, cdir)
|
||||||
histdir = self.vfs.histtab[top]
|
histdir = self.vfs.histtab[top]
|
||||||
ret = 0
|
ret = 0
|
||||||
|
@ -389,16 +390,17 @@ class Up2k(object):
|
||||||
for iname, inf in sorted(g):
|
for iname, inf in sorted(g):
|
||||||
abspath = os.path.join(cdir, iname)
|
abspath = os.path.join(cdir, iname)
|
||||||
lmod = int(inf.st_mtime)
|
lmod = int(inf.st_mtime)
|
||||||
|
sz = inf.st_size
|
||||||
if stat.S_ISDIR(inf.st_mode):
|
if stat.S_ISDIR(inf.st_mode):
|
||||||
if abspath in excl or abspath == histdir:
|
if abspath in excl or abspath == histdir:
|
||||||
continue
|
continue
|
||||||
# self.log(" dir: {}".format(abspath))
|
# self.log(" dir: {}".format(abspath))
|
||||||
ret += self._build_dir(dbw, top, excl, abspath)
|
ret += self._build_dir(dbw, top, excl, abspath, nohash)
|
||||||
else:
|
else:
|
||||||
# self.log("file: {}".format(abspath))
|
# self.log("file: {}".format(abspath))
|
||||||
rp = abspath[len(top) :].replace("\\", "/").strip("/")
|
rp = abspath[len(top) :].replace("\\", "/").strip("/")
|
||||||
rd, fn = rp.rsplit("/", 1) if "/" in rp else ["", rp]
|
rd, fn = rp.rsplit("/", 1) if "/" in rp else ["", rp]
|
||||||
sql = "select * from up where rd = ? and fn = ?"
|
sql = "select w, mt, sz from up where rd = ? and fn = ?"
|
||||||
try:
|
try:
|
||||||
c = dbw[0].execute(sql, (rd, fn))
|
c = dbw[0].execute(sql, (rd, fn))
|
||||||
except:
|
except:
|
||||||
|
@ -407,18 +409,18 @@ class Up2k(object):
|
||||||
in_db = list(c.fetchall())
|
in_db = list(c.fetchall())
|
||||||
if in_db:
|
if in_db:
|
||||||
self.pp.n -= 1
|
self.pp.n -= 1
|
||||||
_, dts, dsz, _, _ = in_db[0]
|
dw, dts, dsz = in_db[0]
|
||||||
if len(in_db) > 1:
|
if len(in_db) > 1:
|
||||||
m = "WARN: multiple entries: [{}] => [{}] |{}|\n{}"
|
m = "WARN: multiple entries: [{}] => [{}] |{}|\n{}"
|
||||||
rep_db = "\n".join([repr(x) for x in in_db])
|
rep_db = "\n".join([repr(x) for x in in_db])
|
||||||
self.log(m.format(top, rp, len(in_db), rep_db))
|
self.log(m.format(top, rp, len(in_db), rep_db))
|
||||||
dts = -1
|
dts = -1
|
||||||
|
|
||||||
if dts == lmod and dsz == inf.st_size:
|
if dts == lmod and dsz == sz and (nohash or dw[0] != "#"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
m = "reindex [{}] => [{}] ({}/{}) ({}/{})".format(
|
m = "reindex [{}] => [{}] ({}/{}) ({}/{})".format(
|
||||||
top, rp, dts, lmod, dsz, inf.st_size
|
top, rp, dts, lmod, dsz, sz
|
||||||
)
|
)
|
||||||
self.log(m)
|
self.log(m)
|
||||||
self.db_rm(dbw[0], rd, fn)
|
self.db_rm(dbw[0], rd, fn)
|
||||||
|
@ -427,17 +429,22 @@ class Up2k(object):
|
||||||
in_db = None
|
in_db = None
|
||||||
|
|
||||||
self.pp.msg = "a{} {}".format(self.pp.n, abspath)
|
self.pp.msg = "a{} {}".format(self.pp.n, abspath)
|
||||||
if inf.st_size > 1024 * 1024:
|
|
||||||
self.log("file: {}".format(abspath))
|
|
||||||
|
|
||||||
try:
|
if nohash:
|
||||||
hashes = self._hashlist_from_file(abspath)
|
wark = up2k_wark_from_metadata(self.salt, sz, lmod, rd, fn)
|
||||||
except Exception as ex:
|
else:
|
||||||
self.log("hash: {} @ [{}]".format(repr(ex), abspath))
|
if sz > 1024 * 1024:
|
||||||
continue
|
self.log("file: {}".format(abspath))
|
||||||
|
|
||||||
wark = up2k_wark_from_hashlist(self.salt, inf.st_size, hashes)
|
try:
|
||||||
self.db_add(dbw[0], wark, rd, fn, lmod, inf.st_size)
|
hashes = self._hashlist_from_file(abspath)
|
||||||
|
except Exception as ex:
|
||||||
|
self.log("hash: {} @ [{}]".format(repr(ex), abspath))
|
||||||
|
continue
|
||||||
|
|
||||||
|
wark = up2k_wark_from_hashlist(self.salt, sz, hashes)
|
||||||
|
|
||||||
|
self.db_add(dbw[0], wark, rd, fn, lmod, sz)
|
||||||
dbw[1] += 1
|
dbw[1] += 1
|
||||||
ret += 1
|
ret += 1
|
||||||
td = time.time() - dbw[2]
|
td = time.time() - dbw[2]
|
||||||
|
@ -1466,9 +1473,12 @@ def up2k_wark_from_hashlist(salt, filesize, hashes):
|
||||||
ident.extend(hashes)
|
ident.extend(hashes)
|
||||||
ident = "\n".join(ident)
|
ident = "\n".join(ident)
|
||||||
|
|
||||||
hasher = hashlib.sha512()
|
wark = hashlib.sha512(ident.encode("utf-8")).digest()
|
||||||
hasher.update(ident.encode("utf-8"))
|
wark = base64.urlsafe_b64encode(wark)
|
||||||
digest = hasher.digest()[:32]
|
return wark.decode("ascii")[:43]
|
||||||
|
|
||||||
wark = base64.urlsafe_b64encode(digest)
|
|
||||||
return wark.decode("utf-8").rstrip("=")
|
def up2k_wark_from_metadata(salt, sz, lastmod, rd, fn):
|
||||||
|
ret = fsenc("{}\n{}\n{}\n{}\n{}".format(salt, lastmod, sz, rd, fn))
|
||||||
|
ret = base64.urlsafe_b64encode(hashlib.sha512(ret).digest())
|
||||||
|
return "#{}".format(ret[:42].decode("ascii"))
|
||||||
|
|
|
@ -254,6 +254,17 @@ def trace(*args, **kwargs):
|
||||||
nuprint(msg)
|
nuprint(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def min_ex():
|
||||||
|
et, ev, tb = sys.exc_info()
|
||||||
|
tb = traceback.extract_tb(tb, 2)
|
||||||
|
ex = [
|
||||||
|
"{} @ {} <{}>: {}".format(fp.split(os.sep)[-1], ln, fun, txt)
|
||||||
|
for fp, ln, fun, txt in tb
|
||||||
|
]
|
||||||
|
ex.append("{}: {}".format(et.__name__, ev))
|
||||||
|
return "\n".join(ex)
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def ren_open(fname, *args, **kwargs):
|
def ren_open(fname, *args, **kwargs):
|
||||||
fdir = kwargs.pop("fdir", None)
|
fdir = kwargs.pop("fdir", None)
|
||||||
|
|
|
@ -38,6 +38,7 @@ class Cfg(Namespace):
|
||||||
mtp=[],
|
mtp=[],
|
||||||
mte="a",
|
mte="a",
|
||||||
hist=None,
|
hist=None,
|
||||||
|
no_hash=False,
|
||||||
**{k: False for k in "e2d e2ds e2dsa e2t e2ts e2tsr".split()}
|
**{k: False for k in "e2d e2ds e2dsa e2t e2ts e2tsr".split()}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ from copyparty import util
|
||||||
class Cfg(Namespace):
|
class Cfg(Namespace):
|
||||||
def __init__(self, a=[], v=[], c=None):
|
def __init__(self, a=[], v=[], c=None):
|
||||||
ex = {k: False for k in "e2d e2ds e2dsa e2t e2ts e2tsr".split()}
|
ex = {k: False for k in "e2d e2ds e2dsa e2t e2ts e2tsr".split()}
|
||||||
ex2 = {"mtp": [], "mte": "a", "hist": None}
|
ex2 = {"mtp": [], "mte": "a", "hist": None, "no_hash": False}
|
||||||
ex.update(ex2)
|
ex.update(ex2)
|
||||||
super(Cfg, self).__init__(a=a, v=v, c=c, **ex)
|
super(Cfg, self).__init__(a=a, v=v, c=c, **ex)
|
||||||
|
|
||||||
|
|
|
@ -60,7 +60,7 @@ def get_ramdisk():
|
||||||
|
|
||||||
if os.path.exists("/Volumes"):
|
if os.path.exists("/Volumes"):
|
||||||
# hdiutil eject /Volumes/cptd/
|
# hdiutil eject /Volumes/cptd/
|
||||||
devname, _ = chkcmd("hdiutil", "attach", "-nomount", "ram://65536")
|
devname, _ = chkcmd("hdiutil", "attach", "-nomount", "ram://131072")
|
||||||
devname = devname.strip()
|
devname = devname.strip()
|
||||||
print("devname: [{}]".format(devname))
|
print("devname: [{}]".format(devname))
|
||||||
for _ in range(10):
|
for _ in range(10):
|
||||||
|
|
Loading…
Reference in a new issue