mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 17:12:13 -06:00
skip indexing files by regex
This commit is contained in:
parent
eb05cb6c6e
commit
2f021a0c2b
|
@ -596,12 +596,14 @@ note:
|
||||||
* `e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and `e2ts` would then reindex those, unless there is a new copyparty version with new parsers and the release note says otherwise
|
* `e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and `e2ts` would then reindex those, unless there is a new copyparty version with new parsers and the release note says otherwise
|
||||||
* the rescan button in the admin panel has no effect unless the volume has `-e2ds` or higher
|
* the rescan button in the admin panel has no effect unless the volume has `-e2ds` or higher
|
||||||
|
|
||||||
to save some time, you can choose to only index filename/path/size/last-modified (and not the hash of the file contents) by setting `--no-hash` or the volume-flag `:c,dhash`, this has the following consequences:
|
to save some time, you can provide a regex pattern for filepaths to only index by filename/path/size/last-modified (and not the hash of the file contents) by setting `--no-hash \.iso$` or the volume-flag `:c,nohash=\.iso$`, this has the following consequences:
|
||||||
* initial indexing is way faster, especially when the volume is on a network disk
|
* initial indexing is way faster, especially when the volume is on a network disk
|
||||||
* makes it impossible to [file-search](#file-search)
|
* makes it impossible to [file-search](#file-search)
|
||||||
* if someone uploads the same file contents, the upload will not be detected as a dupe, so it will not get symlinked or rejected
|
* if someone uploads the same file contents, the upload will not be detected as a dupe, so it will not get symlinked or rejected
|
||||||
|
|
||||||
if you set `--no-hash`, you can enable hashing for specific volumes using flag `:c,ehash`
|
similarly, you can fully ignore files/folders using `--no-idx [...]` and `:c,noidx=\.iso$`
|
||||||
|
|
||||||
|
if you set `--no-hash [...]` globally, you can enable hashing for specific volumes using flag `:c,nohash=`
|
||||||
|
|
||||||
|
|
||||||
## upload rules
|
## upload rules
|
||||||
|
@ -851,7 +853,7 @@ below are some tweaks roughly ordered by usefulness:
|
||||||
* `-q` disables logging and can help a bunch, even when combined with `-lo` to redirect logs to file
|
* `-q` disables logging and can help a bunch, even when combined with `-lo` to redirect logs to file
|
||||||
* `--http-only` or `--https-only` (unless you want to support both protocols) will reduce the delay before a new connection is established
|
* `--http-only` or `--https-only` (unless you want to support both protocols) will reduce the delay before a new connection is established
|
||||||
* `--hist` pointing to a fast location (ssd) will make directory listings and searches faster when `-e2d` or `-e2t` is set
|
* `--hist` pointing to a fast location (ssd) will make directory listings and searches faster when `-e2d` or `-e2t` is set
|
||||||
* `--no-hash` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable
|
* `--no-hash .` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable
|
||||||
* `-j` enables multiprocessing (actual multithreading) and can make copyparty perform better in cpu-intensive workloads, for example:
|
* `-j` enables multiprocessing (actual multithreading) and can make copyparty perform better in cpu-intensive workloads, for example:
|
||||||
* huge amount of short-lived connections
|
* huge amount of short-lived connections
|
||||||
* really heavy traffic (downloads/uploads)
|
* really heavy traffic (downloads/uploads)
|
||||||
|
|
|
@ -276,7 +276,8 @@ def run_argparse(argv, formatter):
|
||||||
\033[36me2d\033[35m sets -e2d (all -e2* args can be set using ce2* volflags)
|
\033[36me2d\033[35m sets -e2d (all -e2* args can be set using ce2* volflags)
|
||||||
\033[36md2t\033[35m disables metadata collection, overrides -e2t*
|
\033[36md2t\033[35m disables metadata collection, overrides -e2t*
|
||||||
\033[36md2d\033[35m disables all database stuff, overrides -e2*
|
\033[36md2d\033[35m disables all database stuff, overrides -e2*
|
||||||
\033[36mdhash\033[35m disables file hashing on initial scans, also ehash
|
\033[36mnohash=\\.iso$\033[35m skips hashing file contents if path matches *.iso
|
||||||
|
\033[36mnoidx=\\.iso$\033[35m fully ignores the contents at paths matching *.iso
|
||||||
\033[36mhist=/tmp/cdb\033[35m puts thumbnails and indexes at that location
|
\033[36mhist=/tmp/cdb\033[35m puts thumbnails and indexes at that location
|
||||||
\033[36mscan=60\033[35m scan for new files every 60sec, same as --re-maxage
|
\033[36mscan=60\033[35m scan for new files every 60sec, same as --re-maxage
|
||||||
|
|
||||||
|
@ -412,7 +413,8 @@ def run_argparse(argv, formatter):
|
||||||
ap2.add_argument("-e2ds", action="store_true", help="enable up2k db-scanner, sets -e2d")
|
ap2.add_argument("-e2ds", action="store_true", help="enable up2k db-scanner, sets -e2d")
|
||||||
ap2.add_argument("-e2dsa", action="store_true", help="scan all folders (for search), sets -e2ds")
|
ap2.add_argument("-e2dsa", action="store_true", help="scan all folders (for search), sets -e2ds")
|
||||||
ap2.add_argument("--hist", metavar="PATH", type=u, help="where to store volume data (db, thumbs)")
|
ap2.add_argument("--hist", metavar="PATH", type=u, help="where to store volume data (db, thumbs)")
|
||||||
ap2.add_argument("--no-hash", action="store_true", help="disable hashing during e2ds folder scans")
|
ap2.add_argument("--no-hash", metavar="PTN", type=u, help="regex: disable hashing of matching paths during e2ds folder scans")
|
||||||
|
ap2.add_argument("--no-idx", metavar="PTN", type=u, help="regex: disable indexing of matching paths during e2ds folder scans")
|
||||||
ap2.add_argument("--re-int", metavar="SEC", type=int, default=30, help="disk rescan check interval")
|
ap2.add_argument("--re-int", metavar="SEC", type=int, default=30, help="disk rescan check interval")
|
||||||
ap2.add_argument("--re-maxage", metavar="SEC", type=int, default=0, help="disk rescan volume interval, 0=off, can be set per-volume with the 'scan' volflag")
|
ap2.add_argument("--re-maxage", metavar="SEC", type=int, default=0, help="disk rescan volume interval, 0=off, can be set per-volume with the 'scan' volflag")
|
||||||
ap2.add_argument("--srch-time", metavar="SEC", type=int, default=30, help="search deadline")
|
ap2.add_argument("--srch-time", metavar="SEC", type=int, default=30, help="search deadline")
|
||||||
|
|
|
@ -865,9 +865,14 @@ class AuthSrv(object):
|
||||||
if self.args.e2d or "e2ds" in vol.flags:
|
if self.args.e2d or "e2ds" in vol.flags:
|
||||||
vol.flags["e2d"] = True
|
vol.flags["e2d"] = True
|
||||||
|
|
||||||
if self.args.no_hash:
|
for ga, vf in [["no_hash", "nohash"], ["no_idx", "noidx"]]:
|
||||||
if "ehash" not in vol.flags:
|
if vf in vol.flags:
|
||||||
vol.flags["dhash"] = True
|
ptn = vol.flags.pop(vf)
|
||||||
|
else:
|
||||||
|
ptn = getattr(self.args, ga)
|
||||||
|
|
||||||
|
if ptn:
|
||||||
|
vol.flags[vf] = re.compile(ptn)
|
||||||
|
|
||||||
for k in ["e2t", "e2ts", "e2tsr"]:
|
for k in ["e2t", "e2ts", "e2tsr"]:
|
||||||
if getattr(self.args, k):
|
if getattr(self.args, k):
|
||||||
|
|
|
@ -466,7 +466,8 @@ class Up2k(object):
|
||||||
def _build_file_index(self, vol, all_vols):
|
def _build_file_index(self, vol, all_vols):
|
||||||
do_vac = False
|
do_vac = False
|
||||||
top = vol.realpath
|
top = vol.realpath
|
||||||
nohash = "dhash" in vol.flags
|
rei = vol.flags.get("noidx")
|
||||||
|
reh = vol.flags.get("nohash")
|
||||||
with self.mutex:
|
with self.mutex:
|
||||||
cur, _ = self.register_vpath(top, vol.flags)
|
cur, _ = self.register_vpath(top, vol.flags)
|
||||||
|
|
||||||
|
@ -483,7 +484,7 @@ class Up2k(object):
|
||||||
|
|
||||||
n_add = n_rm = 0
|
n_add = n_rm = 0
|
||||||
try:
|
try:
|
||||||
n_add = self._build_dir(dbw, top, set(excl), top, nohash, [])
|
n_add = self._build_dir(dbw, top, set(excl), top, rei, reh, [])
|
||||||
n_rm = self._drop_lost(dbw[0], top)
|
n_rm = self._drop_lost(dbw[0], top)
|
||||||
except:
|
except:
|
||||||
m = "failed to index volume [{}]:\n{}"
|
m = "failed to index volume [{}]:\n{}"
|
||||||
|
@ -496,7 +497,7 @@ class Up2k(object):
|
||||||
|
|
||||||
return True, n_add or n_rm or do_vac
|
return True, n_add or n_rm or do_vac
|
||||||
|
|
||||||
def _build_dir(self, dbw, top, excl, cdir, nohash, seen):
|
def _build_dir(self, dbw, top, excl, cdir, rei, reh, seen):
|
||||||
rcdir = absreal(cdir) # a bit expensive but worth
|
rcdir = absreal(cdir) # a bit expensive but worth
|
||||||
if rcdir in seen:
|
if rcdir in seen:
|
||||||
m = "bailing from symlink loop,\n prev: {}\n curr: {}\n from: {}"
|
m = "bailing from symlink loop,\n prev: {}\n curr: {}\n from: {}"
|
||||||
|
@ -511,6 +512,10 @@ class Up2k(object):
|
||||||
g = statdir(self.log_func, not self.args.no_scandir, False, cdir)
|
g = statdir(self.log_func, not self.args.no_scandir, False, cdir)
|
||||||
for iname, inf in sorted(g):
|
for iname, inf in sorted(g):
|
||||||
abspath = os.path.join(cdir, iname)
|
abspath = os.path.join(cdir, iname)
|
||||||
|
if rei and rei.search(abspath):
|
||||||
|
continue
|
||||||
|
|
||||||
|
nohash = reh.search(abspath) if reh else False
|
||||||
lmod = int(inf.st_mtime)
|
lmod = int(inf.st_mtime)
|
||||||
sz = inf.st_size
|
sz = inf.st_size
|
||||||
if stat.S_ISDIR(inf.st_mode):
|
if stat.S_ISDIR(inf.st_mode):
|
||||||
|
@ -518,7 +523,7 @@ class Up2k(object):
|
||||||
continue
|
continue
|
||||||
# self.log(" dir: {}".format(abspath))
|
# self.log(" dir: {}".format(abspath))
|
||||||
try:
|
try:
|
||||||
ret += self._build_dir(dbw, top, excl, abspath, nohash, seen)
|
ret += self._build_dir(dbw, top, excl, abspath, rei, reh, seen)
|
||||||
except:
|
except:
|
||||||
m = "failed to index subdir [{}]:\n{}"
|
m = "failed to index subdir [{}]:\n{}"
|
||||||
self.log(m.format(abspath, min_ex()), c=1)
|
self.log(m.format(abspath, min_ex()), c=1)
|
||||||
|
|
|
@ -48,7 +48,8 @@ class Cfg(Namespace):
|
||||||
mte="a",
|
mte="a",
|
||||||
mth="",
|
mth="",
|
||||||
hist=None,
|
hist=None,
|
||||||
no_hash=False,
|
no_idx=None,
|
||||||
|
no_hash=None,
|
||||||
css_browser=None,
|
css_browser=None,
|
||||||
**{k: False for k in "e2d e2ds e2dsa e2t e2ts e2tsr".split()}
|
**{k: False for k in "e2d e2ds e2dsa e2t e2ts e2tsr".split()}
|
||||||
)
|
)
|
||||||
|
|
|
@ -23,7 +23,8 @@ class Cfg(Namespace):
|
||||||
"mte": "a",
|
"mte": "a",
|
||||||
"mth": "",
|
"mth": "",
|
||||||
"hist": None,
|
"hist": None,
|
||||||
"no_hash": False,
|
"no_idx": None,
|
||||||
|
"no_hash": None,
|
||||||
"css_browser": None,
|
"css_browser": None,
|
||||||
"no_voldump": True,
|
"no_voldump": True,
|
||||||
"no_logues": False,
|
"no_logues": False,
|
||||||
|
|
Loading…
Reference in a new issue