mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 00:52:16 -06:00
previously, when moving or renaming a symlink to a file (or a folder with symlinks inside), the dedup setting would decide whether those links would be expanded into full files or not with dedup disabled (which is the default), all symlinks would be expanded during a move operation now, the dedup-setting is ignored when files/folders are moved, but it still applies when uploading or copying files/folders * absolute symlinks are moved as-is * relative symlinks are rewritten as necessary, assuming both source and destination is known in db
5439 lines
178 KiB
Python
5439 lines
178 KiB
Python
# coding: utf-8
|
|
from __future__ import print_function, unicode_literals
|
|
|
|
import errno
|
|
import gzip
|
|
import hashlib
|
|
import json
|
|
import math
|
|
import os
|
|
import re
|
|
import shutil
|
|
import stat
|
|
import subprocess as sp
|
|
import tempfile
|
|
import threading
|
|
import time
|
|
import traceback
|
|
from copy import deepcopy
|
|
|
|
from queue import Queue
|
|
|
|
from .__init__ import ANYWIN, PY2, TYPE_CHECKING, WINDOWS, E
|
|
from .authsrv import LEELOO_DALLAS, SEESLOG, VFS, AuthSrv
|
|
from .bos import bos
|
|
from .cfg import vf_bmap, vf_cmap, vf_vmap
|
|
from .fsutil import Fstab
|
|
from .mtag import MParser, MTag
|
|
from .util import (
|
|
HAVE_SQLITE3,
|
|
SYMTIME,
|
|
VF_CAREFUL,
|
|
Daemon,
|
|
MTHash,
|
|
Pebkac,
|
|
ProgressPrinter,
|
|
absreal,
|
|
alltrace,
|
|
atomic_move,
|
|
db_ex_chk,
|
|
dir_is_empty,
|
|
djoin,
|
|
fsenc,
|
|
gen_filekey,
|
|
gen_filekey_dbg,
|
|
hidedir,
|
|
humansize,
|
|
min_ex,
|
|
pathmod,
|
|
quotep,
|
|
rand_name,
|
|
ren_open,
|
|
rmdirs,
|
|
rmdirs_up,
|
|
runhook,
|
|
runihook,
|
|
s2hms,
|
|
s3dec,
|
|
s3enc,
|
|
sanitize_fn,
|
|
sfsenc,
|
|
spack,
|
|
statdir,
|
|
ub64enc,
|
|
unhumanize,
|
|
vjoin,
|
|
vsplit,
|
|
w8b64dec,
|
|
w8b64enc,
|
|
wunlink,
|
|
)
|
|
|
|
try:
|
|
from pathlib import Path
|
|
except:
|
|
pass
|
|
|
|
if HAVE_SQLITE3:
|
|
import sqlite3
|
|
|
|
DB_VER = 5
|
|
|
|
if True: # pylint: disable=using-constant-test
|
|
from typing import Any, Optional, Pattern, Union
|
|
|
|
if TYPE_CHECKING:
|
|
from .svchub import SvcHub
|
|
|
|
zsg = "avif,avifs,bmp,gif,heic,heics,heif,heifs,ico,j2p,j2k,jp2,jpeg,jpg,jpx,png,tga,tif,tiff,webp"
|
|
CV_EXTS = set(zsg.split(","))
|
|
|
|
zsg = "nohash noidx xdev xvol"
|
|
VF_AFFECTS_INDEXING = set(zsg.split(" "))
|
|
|
|
|
|
SBUSY = "cannot receive uploads right now;\nserver busy with %s.\nPlease wait; the client will retry..."
|
|
|
|
HINT_HISTPATH = "you could try moving the database to another location (preferably an SSD or NVME drive) using either the --hist argument (global option for all volumes), or the hist volflag (just for this volume)"
|
|
|
|
|
|
NULLSTAT = os.stat_result((0, -1, -1, 0, 0, 0, 0, 0, 0, 0))
|
|
|
|
|
|
class Dbw(object):
|
|
def __init__(self, c: "sqlite3.Cursor", n: int, nf: int, t: float) -> None:
|
|
self.c = c
|
|
self.n = n
|
|
self.nf = nf
|
|
self.t = t
|
|
|
|
|
|
class Mpqe(object):
|
|
"""pending files to tag-scan"""
|
|
|
|
def __init__(
|
|
self,
|
|
mtp: dict[str, MParser],
|
|
entags: set[str],
|
|
w: str,
|
|
abspath: str,
|
|
oth_tags: dict[str, Any],
|
|
):
|
|
# mtp empty = mtag
|
|
self.mtp = mtp
|
|
self.entags = entags
|
|
self.w = w
|
|
self.abspath = abspath
|
|
self.oth_tags = oth_tags
|
|
|
|
|
|
class Up2k(object):
|
|
def __init__(self, hub: "SvcHub") -> None:
|
|
self.hub = hub
|
|
self.asrv: AuthSrv = hub.asrv
|
|
self.args = hub.args
|
|
self.log_func = hub.log
|
|
|
|
self.vfs = self.asrv.vfs
|
|
self.acct = self.asrv.acct
|
|
self.iacct = self.asrv.iacct
|
|
self.grps = self.asrv.grps
|
|
|
|
self.salt = self.args.warksalt
|
|
self.r_hash = re.compile("^[0-9a-zA-Z_-]{44}$")
|
|
|
|
self.gid = 0
|
|
self.gt0 = 0
|
|
self.gt1 = 0
|
|
self.stop = False
|
|
self.mutex = threading.Lock()
|
|
self.reload_mutex = threading.Lock()
|
|
self.reload_flag = 0
|
|
self.reloading = False
|
|
self.blocked: Optional[str] = None
|
|
self.pp: Optional[ProgressPrinter] = None
|
|
self.rescan_cond = threading.Condition()
|
|
self.need_rescan: set[str] = set()
|
|
self.db_act = 0.0
|
|
|
|
self.reg_mutex = threading.Lock()
|
|
self.registry: dict[str, dict[str, dict[str, Any]]] = {}
|
|
self.flags: dict[str, dict[str, Any]] = {}
|
|
self.droppable: dict[str, list[str]] = {}
|
|
self.volnfiles: dict["sqlite3.Cursor", int] = {}
|
|
self.volsize: dict["sqlite3.Cursor", int] = {}
|
|
self.volstate: dict[str, str] = {}
|
|
self.vol_act: dict[str, float] = {}
|
|
self.busy_aps: dict[str, int] = {}
|
|
self.dupesched: dict[str, list[tuple[str, str, float]]] = {}
|
|
self.snap_prev: dict[str, Optional[tuple[int, float]]] = {}
|
|
|
|
self.mtag: Optional[MTag] = None
|
|
self.entags: dict[str, set[str]] = {}
|
|
self.mtp_parsers: dict[str, dict[str, MParser]] = {}
|
|
self.pending_tags: list[tuple[set[str], str, str, dict[str, Any]]] = []
|
|
self.hashq: Queue[
|
|
tuple[str, str, dict[str, Any], str, str, str, float, str, bool]
|
|
] = Queue()
|
|
self.tagq: Queue[tuple[str, str, str, str, int, str, float]] = Queue()
|
|
self.tag_event = threading.Condition()
|
|
self.hashq_mutex = threading.Lock()
|
|
self.n_hashq = 0
|
|
self.n_tagq = 0
|
|
self.mpool_used = False
|
|
|
|
self.xiu_ptn = re.compile(r"(?:^|,)i([0-9]+)")
|
|
self.xiu_busy = False # currently running hook
|
|
self.xiu_asleep = True # needs rescan_cond poke to schedule self
|
|
self.fx_backlog: list[tuple[str, dict[str, str], str]] = []
|
|
|
|
self.cur: dict[str, "sqlite3.Cursor"] = {}
|
|
self.mem_cur = None
|
|
self.sqlite_ver = None
|
|
self.no_expr_idx = False
|
|
self.timeout = int(max(self.args.srch_time, 50) * 1.2) + 1
|
|
self.spools: set[tempfile.SpooledTemporaryFile[bytes]] = set()
|
|
if HAVE_SQLITE3:
|
|
# mojibake detector
|
|
assert sqlite3 # type: ignore # !rm
|
|
self.mem_cur = self._orz(":memory:")
|
|
self.mem_cur.execute(r"create table a (b text)")
|
|
self.sqlite_ver = tuple([int(x) for x in sqlite3.sqlite_version.split(".")])
|
|
if self.sqlite_ver < (3, 9):
|
|
self.no_expr_idx = True
|
|
else:
|
|
t = "could not initialize sqlite3, will use in-memory registry only"
|
|
self.log(t, 3)
|
|
|
|
self.fstab = Fstab(self.log_func, self.args)
|
|
self.gen_fk = self._gen_fk if self.args.log_fk else gen_filekey
|
|
|
|
if self.args.hash_mt < 2:
|
|
self.mth: Optional[MTHash] = None
|
|
else:
|
|
self.mth = MTHash(self.args.hash_mt)
|
|
|
|
if self.args.no_fastboot:
|
|
self.deferred_init()
|
|
|
|
def init_vols(self) -> None:
|
|
if self.args.no_fastboot:
|
|
return
|
|
|
|
Daemon(self.deferred_init, "up2k-deferred-init")
|
|
|
|
def unpp(self) -> None:
|
|
self.gt1 = time.time()
|
|
if self.pp:
|
|
self.pp.end = True
|
|
self.pp = None
|
|
|
|
def reload(self, rescan_all_vols: bool) -> None:
|
|
n = 2 if rescan_all_vols else 1
|
|
with self.reload_mutex:
|
|
if self.reload_flag < n:
|
|
self.reload_flag = n
|
|
with self.rescan_cond:
|
|
self.rescan_cond.notify_all()
|
|
|
|
def _reload_thr(self) -> None:
|
|
while self.pp:
|
|
time.sleep(0.1)
|
|
while True:
|
|
with self.reload_mutex:
|
|
if not self.reload_flag:
|
|
break
|
|
rav = self.reload_flag == 2
|
|
self.reload_flag = 0
|
|
gt1 = self.gt1
|
|
with self.mutex:
|
|
self._reload(rav)
|
|
while gt1 == self.gt1 or self.pp:
|
|
time.sleep(0.1)
|
|
|
|
self.reloading = False
|
|
|
|
def _reload(self, rescan_all_vols: bool) -> None:
|
|
"""mutex(main) me"""
|
|
self.log("reload #{} scheduled".format(self.gid + 1))
|
|
all_vols = self.asrv.vfs.all_vols
|
|
|
|
with self.reg_mutex:
|
|
scan_vols = [
|
|
k for k, v in all_vols.items() if v.realpath not in self.registry
|
|
]
|
|
|
|
if rescan_all_vols:
|
|
scan_vols = list(all_vols.keys())
|
|
|
|
self._rescan(all_vols, scan_vols, True, False)
|
|
|
|
def deferred_init(self) -> None:
|
|
all_vols = self.asrv.vfs.all_vols
|
|
have_e2d = self.init_indexes(all_vols, [], False)
|
|
|
|
if self.stop:
|
|
# up-mt consistency not guaranteed if init is interrupted;
|
|
# drop caches for a full scan on next boot
|
|
with self.mutex, self.reg_mutex:
|
|
self._drop_caches()
|
|
|
|
self.unpp()
|
|
return
|
|
|
|
if not self.pp and self.args.exit == "idx":
|
|
return self.hub.sigterm()
|
|
|
|
if self.hub.is_dut:
|
|
return
|
|
|
|
Daemon(self._snapshot, "up2k-snapshot")
|
|
if have_e2d:
|
|
Daemon(self._hasher, "up2k-hasher")
|
|
Daemon(self._sched_rescan, "up2k-rescan")
|
|
if self.mtag:
|
|
for n in range(max(1, self.args.mtag_mt)):
|
|
Daemon(self._tagger, "tagger-{}".format(n))
|
|
|
|
def log(self, msg: str, c: Union[int, str] = 0) -> None:
|
|
if self.pp:
|
|
msg += "\033[K"
|
|
|
|
self.log_func("up2k", msg, c)
|
|
|
|
def _gen_fk(self, alg: int, salt: str, fspath: str, fsize: int, inode: int) -> str:
|
|
return gen_filekey_dbg(
|
|
alg, salt, fspath, fsize, inode, self.log, self.args.log_fk
|
|
)
|
|
|
|
def _block(self, why: str) -> None:
|
|
self.blocked = why
|
|
self.log("uploads temporarily blocked due to " + why, 3)
|
|
|
|
def _unblock(self) -> None:
|
|
if self.blocked is not None:
|
|
self.blocked = None
|
|
if not self.stop:
|
|
self.log("uploads are now possible", 2)
|
|
|
|
def get_state(self, get_q: bool, uname: str) -> str:
|
|
mtpq: Union[int, str] = 0
|
|
ups = []
|
|
up_en = not self.args.no_up_list
|
|
q = "select count(w) from mt where k = 't:mtp'"
|
|
got_lock = False if PY2 else self.mutex.acquire(timeout=0.5)
|
|
if got_lock:
|
|
try:
|
|
for cur in self.cur.values() if get_q else []:
|
|
try:
|
|
mtpq += cur.execute(q).fetchone()[0]
|
|
except:
|
|
pass
|
|
if uname and up_en:
|
|
ups = self._active_uploads(uname)
|
|
finally:
|
|
self.mutex.release()
|
|
else:
|
|
mtpq = "(?)"
|
|
if up_en:
|
|
ups = [(1, 0, 0, time.time(), "cannot show list (server too busy)")]
|
|
if PY2:
|
|
ups = []
|
|
|
|
ups.sort(reverse=True)
|
|
|
|
ret = {
|
|
"volstate": self.volstate,
|
|
"scanning": bool(self.pp),
|
|
"hashq": self.n_hashq,
|
|
"tagq": self.n_tagq,
|
|
"mtpq": mtpq,
|
|
"ups": ups,
|
|
"dbwu": "{:.2f}".format(self.db_act),
|
|
"dbwt": "{:.2f}".format(
|
|
min(1000 * 24 * 60 * 60 - 1, time.time() - self.db_act)
|
|
),
|
|
}
|
|
return json.dumps(ret, separators=(",\n", ": "))
|
|
|
|
def _active_uploads(self, uname: str) -> list[tuple[float, int, int, str]]:
|
|
ret = []
|
|
for vtop in self.vfs.aread.get(uname) or []:
|
|
vfs = self.vfs.all_vols.get(vtop)
|
|
if not vfs: # dbv only
|
|
continue
|
|
ptop = vfs.realpath
|
|
tab = self.registry.get(ptop)
|
|
if not tab:
|
|
continue
|
|
for job in tab.values():
|
|
ineed = len(job["need"])
|
|
ihash = len(job["hash"])
|
|
if ineed == ihash or not ineed:
|
|
continue
|
|
|
|
zt = (
|
|
ineed / ihash,
|
|
job["size"],
|
|
int(job["t0c"]),
|
|
int(job["poke"]),
|
|
djoin(vtop, job["prel"], job["name"]),
|
|
)
|
|
ret.append(zt)
|
|
return ret
|
|
|
|
def find_job_by_ap(self, ptop: str, ap: str) -> str:
|
|
try:
|
|
if ANYWIN:
|
|
ap = ap.replace("\\", "/")
|
|
|
|
vp = ap[len(ptop) :].strip("/")
|
|
dn, fn = vsplit(vp)
|
|
with self.reg_mutex:
|
|
tab2 = self.registry[ptop]
|
|
for job in tab2.values():
|
|
if job["prel"] == dn and job["name"] == fn:
|
|
return json.dumps(job, separators=(",\n", ": "))
|
|
except:
|
|
pass
|
|
|
|
return "{}"
|
|
|
|
def get_unfinished_by_user(self, uname, ip) -> str:
|
|
if PY2 or not self.reg_mutex.acquire(timeout=2):
|
|
return '[{"timeout":1}]'
|
|
|
|
ret: list[tuple[int, str, int, int, int]] = []
|
|
userset = set([(uname or "\n"), "*"])
|
|
try:
|
|
for ptop, tab2 in self.registry.items():
|
|
cfg = self.flags.get(ptop, {}).get("u2abort", 1)
|
|
if not cfg:
|
|
continue
|
|
addr = (ip or "\n") if cfg in (1, 2) else ""
|
|
user = userset if cfg in (1, 3) else None
|
|
for job in tab2.values():
|
|
if (
|
|
"done" in job
|
|
or (user and job["user"] not in user)
|
|
or (addr and addr != job["addr"])
|
|
):
|
|
continue
|
|
|
|
zt5 = (
|
|
int(job["t0"]),
|
|
djoin(job["vtop"], job["prel"], job["name"]),
|
|
job["size"],
|
|
len(job["need"]),
|
|
len(job["hash"]),
|
|
)
|
|
ret.append(zt5)
|
|
finally:
|
|
self.reg_mutex.release()
|
|
|
|
if ANYWIN:
|
|
ret = [(x[0], x[1].replace("\\", "/"), x[2], x[3], x[4]) for x in ret]
|
|
|
|
ret.sort(reverse=True)
|
|
ret2 = [
|
|
{
|
|
"at": at,
|
|
"vp": "/" + quotep(vp),
|
|
"pd": 100 - ((nn * 100) // (nh or 1)),
|
|
"sz": sz,
|
|
}
|
|
for (at, vp, sz, nn, nh) in ret
|
|
]
|
|
return json.dumps(ret2, separators=(",\n", ": "))
|
|
|
|
def get_unfinished(self) -> str:
|
|
if PY2 or not self.reg_mutex.acquire(timeout=0.5):
|
|
return ""
|
|
|
|
ret: dict[str, tuple[int, int]] = {}
|
|
try:
|
|
for ptop, tab2 in self.registry.items():
|
|
nbytes = 0
|
|
nfiles = 0
|
|
for job in tab2.values():
|
|
if "done" in job:
|
|
continue
|
|
|
|
nfiles += 1
|
|
try:
|
|
# close enough on average
|
|
nbytes += len(job["need"]) * job["size"] // len(job["hash"])
|
|
except:
|
|
pass
|
|
|
|
ret[ptop] = (nbytes, nfiles)
|
|
finally:
|
|
self.reg_mutex.release()
|
|
|
|
return json.dumps(ret, separators=(",\n", ": "))
|
|
|
|
def get_volsize(self, ptop: str) -> tuple[int, int]:
|
|
with self.reg_mutex:
|
|
return self._get_volsize(ptop)
|
|
|
|
def get_volsizes(self, ptops: list[str]) -> list[tuple[int, int]]:
|
|
ret = []
|
|
with self.reg_mutex:
|
|
for ptop in ptops:
|
|
ret.append(self._get_volsize(ptop))
|
|
|
|
return ret
|
|
|
|
def _get_volsize(self, ptop: str) -> tuple[int, int]:
|
|
if "e2ds" not in self.flags.get(ptop, {}):
|
|
return (0, 0)
|
|
|
|
cur = self.cur[ptop]
|
|
nbytes = self.volsize[cur]
|
|
nfiles = self.volnfiles[cur]
|
|
for j in list(self.registry.get(ptop, {}).values()):
|
|
nbytes += j["size"]
|
|
nfiles += 1
|
|
|
|
return (nbytes, nfiles)
|
|
|
|
def rescan(
|
|
self, all_vols: dict[str, VFS], scan_vols: list[str], wait: bool, fscan: bool
|
|
) -> str:
|
|
with self.mutex:
|
|
return self._rescan(all_vols, scan_vols, wait, fscan)
|
|
|
|
def _rescan(
|
|
self, all_vols: dict[str, VFS], scan_vols: list[str], wait: bool, fscan: bool
|
|
) -> str:
|
|
"""mutex(main) me"""
|
|
if not wait and self.pp:
|
|
return "cannot initiate; scan is already in progress"
|
|
|
|
self.gid += 1
|
|
Daemon(
|
|
self.init_indexes,
|
|
"up2k-rescan-{}".format(scan_vols[0] if scan_vols else "all"),
|
|
(all_vols, scan_vols, fscan, self.gid),
|
|
)
|
|
return ""
|
|
|
|
def _sched_rescan(self) -> None:
|
|
volage = {}
|
|
cooldown = timeout = time.time() + 3.0
|
|
while not self.stop:
|
|
now = time.time()
|
|
timeout = max(timeout, cooldown)
|
|
wait = timeout - time.time()
|
|
# self.log("SR in {:.2f}".format(wait), 5)
|
|
with self.rescan_cond:
|
|
self.rescan_cond.wait(wait)
|
|
|
|
if self.stop:
|
|
return
|
|
|
|
with self.reload_mutex:
|
|
if self.reload_flag and not self.reloading:
|
|
self.reloading = True
|
|
zs = "up2k-reload-%d" % (self.gid,)
|
|
Daemon(self._reload_thr, zs)
|
|
|
|
now = time.time()
|
|
if now < cooldown:
|
|
# self.log("SR: cd - now = {:.2f}".format(cooldown - now), 5)
|
|
timeout = cooldown # wakeup means stuff to do, forget timeout
|
|
continue
|
|
|
|
if self.pp:
|
|
# self.log("SR: pp; cd := 1", 5)
|
|
cooldown = now + 1
|
|
continue
|
|
|
|
cooldown = now + 3
|
|
# self.log("SR", 5)
|
|
|
|
if self.args.no_lifetime and not self.args.shr:
|
|
timeout = now + 9001
|
|
else:
|
|
# important; not deferred by db_act
|
|
timeout = self._check_lifetimes()
|
|
timeout = min(self._check_forget_ip(), timeout)
|
|
try:
|
|
if self.args.shr:
|
|
timeout = min(self._check_shares(), timeout)
|
|
except Exception as ex:
|
|
timeout = min(timeout, now + 60)
|
|
t = "could not check for expiring shares: %r"
|
|
self.log(t % (ex,), 1)
|
|
|
|
try:
|
|
timeout = min(timeout, now + self._check_xiu())
|
|
except Exception as ex:
|
|
if "closed cursor" in str(ex):
|
|
self.log("sched_rescan: lost db")
|
|
return
|
|
raise
|
|
|
|
with self.mutex:
|
|
for vp, vol in sorted(self.vfs.all_vols.items()):
|
|
maxage = vol.flags.get("scan")
|
|
if not maxage:
|
|
continue
|
|
|
|
if vp not in volage:
|
|
volage[vp] = now
|
|
|
|
deadline = volage[vp] + maxage
|
|
if deadline <= now:
|
|
self.need_rescan.add(vp)
|
|
|
|
timeout = min(timeout, deadline)
|
|
|
|
if self.db_act > now - self.args.db_act and self.need_rescan:
|
|
# recent db activity; defer volume rescan
|
|
act_timeout = self.db_act + self.args.db_act
|
|
if self.need_rescan:
|
|
timeout = now
|
|
|
|
if timeout < act_timeout:
|
|
timeout = act_timeout
|
|
t = "volume rescan deferred {:.1f} sec, due to database activity"
|
|
self.log(t.format(timeout - now))
|
|
|
|
continue
|
|
|
|
with self.mutex:
|
|
vols = list(sorted(self.need_rescan))
|
|
self.need_rescan.clear()
|
|
|
|
if vols:
|
|
cooldown = now + 10
|
|
err = self.rescan(self.vfs.all_vols, vols, False, False)
|
|
if err:
|
|
for v in vols:
|
|
self.need_rescan.add(v)
|
|
|
|
continue
|
|
|
|
for v in vols:
|
|
volage[v] = now
|
|
|
|
def _check_forget_ip(self) -> float:
|
|
now = time.time()
|
|
timeout = now + 9001
|
|
for vp, vol in sorted(self.vfs.all_vols.items()):
|
|
maxage = vol.flags["forget_ip"]
|
|
if not maxage:
|
|
continue
|
|
|
|
cur = self.cur.get(vol.realpath)
|
|
if not cur:
|
|
continue
|
|
|
|
cutoff = now - maxage * 60
|
|
|
|
for _ in range(2):
|
|
q = "select ip, at from up where ip > '' order by +at limit 1"
|
|
hits = cur.execute(q).fetchall()
|
|
if not hits:
|
|
break
|
|
|
|
remains = hits[0][1] - cutoff
|
|
if remains > 0:
|
|
timeout = min(timeout, now + remains)
|
|
break
|
|
|
|
q = "update up set ip = '' where ip > '' and at <= %d"
|
|
cur.execute(q % (cutoff,))
|
|
zi = cur.rowcount
|
|
cur.connection.commit()
|
|
|
|
t = "forget-ip(%d) removed %d IPs from db [/%s]"
|
|
self.log(t % (maxage, zi, vol.vpath))
|
|
|
|
timeout = min(timeout, now + 900)
|
|
|
|
return timeout
|
|
|
|
def _check_lifetimes(self) -> float:
|
|
now = time.time()
|
|
timeout = now + 9001
|
|
for vp, vol in sorted(self.vfs.all_vols.items()):
|
|
lifetime = vol.flags.get("lifetime")
|
|
if not lifetime:
|
|
continue
|
|
|
|
cur = self.cur.get(vol.realpath)
|
|
if not cur:
|
|
continue
|
|
|
|
nrm = 0
|
|
deadline = time.time() - lifetime
|
|
timeout = min(timeout, now + lifetime)
|
|
q = "select rd, fn from up where at > 0 and at < ? limit 100"
|
|
while True:
|
|
with self.mutex:
|
|
hits = cur.execute(q, (deadline,)).fetchall()
|
|
|
|
if not hits:
|
|
break
|
|
|
|
for rd, fn in hits:
|
|
if rd.startswith("//") or fn.startswith("//"):
|
|
rd, fn = s3dec(rd, fn)
|
|
|
|
fvp = ("%s/%s" % (rd, fn)).strip("/")
|
|
if vp:
|
|
fvp = "%s/%s" % (vp, fvp)
|
|
|
|
self._handle_rm(LEELOO_DALLAS, "", fvp, [], True, False)
|
|
nrm += 1
|
|
|
|
if nrm:
|
|
self.log("%d files graduated in /%s" % (nrm, vp))
|
|
|
|
if timeout < 10:
|
|
continue
|
|
|
|
q = "select at from up where at > 0 order by at limit 1"
|
|
with self.mutex:
|
|
hits = cur.execute(q).fetchone()
|
|
|
|
if hits:
|
|
timeout = min(timeout, now + lifetime - (now - hits[0]))
|
|
|
|
return timeout
|
|
|
|
def _check_shares(self) -> float:
|
|
assert sqlite3 # type: ignore # !rm
|
|
|
|
now = time.time()
|
|
timeout = now + 9001
|
|
maxage = self.args.shr_rt * 60
|
|
low = now - maxage
|
|
|
|
vn = self.vfs.nodes.get(self.args.shr.strip("/"))
|
|
active = vn and vn.nodes
|
|
|
|
db = sqlite3.connect(self.args.shr_db, timeout=2)
|
|
cur = db.cursor()
|
|
|
|
q = "select k from sh where t1 and t1 <= ?"
|
|
rm = [x[0] for x in cur.execute(q, (now,))] if active else []
|
|
if rm:
|
|
assert vn and vn.nodes # type: ignore
|
|
# self.log("chk_shr: %d" % (len(rm),))
|
|
zss = set(rm)
|
|
rm = [zs for zs in vn.nodes if zs in zss]
|
|
reload = bool(rm)
|
|
if reload:
|
|
self.log("disabling expired shares %s" % (rm,))
|
|
|
|
rm = [x[0] for x in cur.execute(q, (low,))]
|
|
if rm:
|
|
self.log("forgetting expired shares %s" % (rm,))
|
|
cur.executemany("delete from sh where k=?", [(x,) for x in rm])
|
|
cur.executemany("delete from sf where k=?", [(x,) for x in rm])
|
|
db.commit()
|
|
|
|
if reload:
|
|
Daemon(self.hub.reload, "sharedrop", (False, False))
|
|
|
|
q = "select min(t1) from sh where t1 > ?"
|
|
(earliest,) = cur.execute(q, (1,)).fetchone()
|
|
if earliest:
|
|
# deadline for revoking regular access
|
|
timeout = min(timeout, earliest + maxage)
|
|
|
|
(earliest,) = cur.execute(q, (now - 2,)).fetchone()
|
|
if earliest:
|
|
# deadline for revival; drop entirely
|
|
timeout = min(timeout, earliest)
|
|
|
|
cur.close()
|
|
db.close()
|
|
|
|
if self.args.shr_v:
|
|
self.log("next shr_chk = %d (%d)" % (timeout, timeout - time.time()))
|
|
|
|
return timeout
|
|
|
|
def _check_xiu(self) -> float:
|
|
if self.xiu_busy:
|
|
return 2
|
|
|
|
ret = 9001
|
|
for _, vol in sorted(self.vfs.all_vols.items()):
|
|
rp = vol.realpath
|
|
cur = self.cur.get(rp)
|
|
if not cur:
|
|
continue
|
|
|
|
with self.mutex:
|
|
q = "select distinct c from iu"
|
|
cds = cur.execute(q).fetchall()
|
|
if not cds:
|
|
continue
|
|
|
|
run_cds: list[int] = []
|
|
for cd in sorted([x[0] for x in cds]):
|
|
delta = cd - (time.time() - self.vol_act[rp])
|
|
if delta > 0:
|
|
ret = min(ret, delta)
|
|
break
|
|
|
|
run_cds.append(cd)
|
|
|
|
if run_cds:
|
|
self.xiu_busy = True
|
|
Daemon(self._run_xius, "xiu", (vol, run_cds))
|
|
return 2
|
|
|
|
return ret
|
|
|
|
def _run_xius(self, vol: VFS, cds: list[int]):
|
|
for cd in cds:
|
|
self._run_xiu(vol, cd)
|
|
|
|
self.xiu_busy = False
|
|
self.xiu_asleep = True
|
|
|
|
def _run_xiu(self, vol: VFS, cd: int):
|
|
rp = vol.realpath
|
|
cur = self.cur[rp]
|
|
|
|
# t0 = time.time()
|
|
with self.mutex:
|
|
q = "select w,rd,fn from iu where c={} limit 80386"
|
|
wrfs = cur.execute(q.format(cd)).fetchall()
|
|
if not wrfs:
|
|
return
|
|
|
|
# dont wanna rebox so use format instead of prepared
|
|
q = "delete from iu where w=? and +rd=? and +fn=? and +c={}"
|
|
cur.executemany(q.format(cd), wrfs)
|
|
cur.connection.commit()
|
|
|
|
q = "select * from up where substr(w,1,16)=? and +rd=? and +fn=?"
|
|
ups = []
|
|
for wrf in wrfs:
|
|
up = cur.execute(q, wrf).fetchone()
|
|
if up:
|
|
ups.append(up)
|
|
|
|
# t1 = time.time()
|
|
# self.log("mapped {} warks in {:.3f} sec".format(len(wrfs), t1 - t0))
|
|
# "mapped 10989 warks in 0.126 sec"
|
|
|
|
cmds = self.flags[rp]["xiu"]
|
|
for cmd in cmds:
|
|
m = self.xiu_ptn.search(cmd)
|
|
ccd = int(m.group(1)) if m else 5
|
|
if ccd != cd:
|
|
continue
|
|
|
|
self.log("xiu: %d# %r" % (len(wrfs), cmd))
|
|
runihook(self.log, self.args.hook_v, cmd, vol, ups)
|
|
|
|
def _vis_job_progress(self, job: dict[str, Any]) -> str:
|
|
perc = 100 - (len(job["need"]) * 100.0 / (len(job["hash"]) or 1))
|
|
path = djoin(job["ptop"], job["prel"], job["name"])
|
|
return "{:5.1f}% {}".format(perc, path)
|
|
|
|
def _vis_reg_progress(self, reg: dict[str, dict[str, Any]]) -> list[str]:
|
|
ret = []
|
|
for _, job in reg.items():
|
|
if job["need"]:
|
|
ret.append(self._vis_job_progress(job))
|
|
|
|
return ret
|
|
|
|
def _expr_idx_filter(self, flags: dict[str, Any]) -> tuple[bool, dict[str, Any]]:
|
|
if not self.no_expr_idx:
|
|
return False, flags
|
|
|
|
ret = {k: v for k, v in flags.items() if not k.startswith("e2t")}
|
|
if len(ret) == len(flags):
|
|
return False, flags
|
|
|
|
return True, ret
|
|
|
|
def init_indexes(
|
|
self, all_vols: dict[str, VFS], scan_vols: list[str], fscan: bool, gid: int = 0
|
|
) -> bool:
|
|
if not gid:
|
|
with self.mutex:
|
|
gid = self.gid
|
|
|
|
self.gt0 = time.time()
|
|
|
|
nspin = 0
|
|
while True:
|
|
nspin += 1
|
|
if nspin > 1:
|
|
time.sleep(0.1)
|
|
|
|
with self.mutex:
|
|
if gid != self.gid:
|
|
return False
|
|
|
|
if self.pp:
|
|
continue
|
|
|
|
self.pp = ProgressPrinter(self.log, self.args)
|
|
if not self.hub.is_dut:
|
|
self.pp.start()
|
|
|
|
break
|
|
|
|
if gid:
|
|
self.log("reload #%d running" % (gid,))
|
|
|
|
self.vfs = self.asrv.vfs
|
|
self.acct = self.asrv.acct
|
|
self.iacct = self.asrv.iacct
|
|
self.grps = self.asrv.grps
|
|
|
|
have_e2d = self.args.idp_h_usr or self.args.chpw or self.args.shr
|
|
vols = list(all_vols.values())
|
|
t0 = time.time()
|
|
|
|
if self.no_expr_idx:
|
|
modified = False
|
|
for vol in vols:
|
|
m, f = self._expr_idx_filter(vol.flags)
|
|
if m:
|
|
vol.flags = f
|
|
modified = True
|
|
|
|
if modified:
|
|
msg = "disabling -e2t because your sqlite belongs in a museum"
|
|
self.log(msg, c=3)
|
|
|
|
live_vols = []
|
|
with self.mutex, self.reg_mutex:
|
|
# only need to protect register_vpath but all in one go feels right
|
|
for vol in vols:
|
|
try:
|
|
bos.makedirs(vol.realpath) # gonna happen at snap anyways
|
|
dir_is_empty(self.log_func, not self.args.no_scandir, vol.realpath)
|
|
except Exception as ex:
|
|
self.volstate[vol.vpath] = "OFFLINE (cannot access folder)"
|
|
self.log("cannot access %s: %r" % (vol.realpath, ex), c=1)
|
|
continue
|
|
|
|
if scan_vols and vol.vpath not in scan_vols:
|
|
continue
|
|
|
|
if not self.register_vpath(vol.realpath, vol.flags):
|
|
# self.log("db not enable for {}".format(m, vol.realpath))
|
|
continue
|
|
|
|
live_vols.append(vol)
|
|
|
|
if vol.vpath not in self.volstate:
|
|
self.volstate[vol.vpath] = "OFFLINE (pending initialization)"
|
|
|
|
vols = live_vols
|
|
need_vac = {}
|
|
|
|
need_mtag = False
|
|
for vol in vols:
|
|
if "e2t" in vol.flags:
|
|
need_mtag = True
|
|
|
|
if need_mtag and not self.mtag:
|
|
self.mtag = MTag(self.log_func, self.args)
|
|
if not self.mtag.usable:
|
|
self.mtag = None
|
|
|
|
# e2ds(a) volumes first
|
|
if next((zv for zv in vols if "e2ds" in zv.flags), None):
|
|
self._block("indexing")
|
|
|
|
if self.args.re_dhash or [zv for zv in vols if "e2tsr" in zv.flags]:
|
|
self.args.re_dhash = False
|
|
with self.mutex, self.reg_mutex:
|
|
self._drop_caches()
|
|
|
|
for vol in vols:
|
|
if self.stop or gid != self.gid:
|
|
break
|
|
|
|
en = set(vol.flags.get("mte", {}))
|
|
self.entags[vol.realpath] = en
|
|
|
|
if "e2d" in vol.flags:
|
|
have_e2d = True
|
|
|
|
if "e2ds" in vol.flags or fscan:
|
|
self.volstate[vol.vpath] = "busy (hashing files)"
|
|
_, vac = self._build_file_index(vol, list(all_vols.values()))
|
|
if vac:
|
|
need_vac[vol] = True
|
|
|
|
if "e2v" in vol.flags:
|
|
t = "online (integrity-check pending)"
|
|
elif "e2ts" in vol.flags:
|
|
t = "online (tags pending)"
|
|
else:
|
|
t = "online, idle"
|
|
|
|
self.volstate[vol.vpath] = t
|
|
|
|
self._unblock()
|
|
|
|
# file contents verification
|
|
for vol in vols:
|
|
if self.stop:
|
|
break
|
|
|
|
if "e2v" not in vol.flags:
|
|
continue
|
|
|
|
t = "online (verifying integrity)"
|
|
self.volstate[vol.vpath] = t
|
|
self.log("{} [{}]".format(t, vol.realpath))
|
|
|
|
nmod = self._verify_integrity(vol)
|
|
if nmod:
|
|
self.log("modified {} entries in the db".format(nmod), 3)
|
|
need_vac[vol] = True
|
|
|
|
if "e2ts" in vol.flags:
|
|
t = "online (tags pending)"
|
|
else:
|
|
t = "online, idle"
|
|
|
|
self.volstate[vol.vpath] = t
|
|
|
|
# open the rest + do any e2ts(a)
|
|
needed_mutagen = False
|
|
for vol in vols:
|
|
if self.stop:
|
|
break
|
|
|
|
if "e2ts" not in vol.flags:
|
|
continue
|
|
|
|
t = "online (reading tags)"
|
|
self.volstate[vol.vpath] = t
|
|
self.log("{} [{}]".format(t, vol.realpath))
|
|
|
|
nadd, nrm, success = self._build_tags_index(vol)
|
|
if not success:
|
|
needed_mutagen = True
|
|
|
|
if nadd or nrm:
|
|
need_vac[vol] = True
|
|
|
|
self.volstate[vol.vpath] = "online (mtp soon)"
|
|
|
|
for vol in need_vac:
|
|
with self.mutex, self.reg_mutex:
|
|
reg = self.register_vpath(vol.realpath, vol.flags)
|
|
|
|
assert reg # !rm
|
|
cur, _ = reg
|
|
with self.mutex:
|
|
cur.connection.commit()
|
|
cur.execute("vacuum")
|
|
|
|
if self.stop:
|
|
return False
|
|
|
|
for vol in all_vols.values():
|
|
if vol.flags["dbd"] == "acid":
|
|
continue
|
|
|
|
with self.mutex, self.reg_mutex:
|
|
reg = self.register_vpath(vol.realpath, vol.flags)
|
|
|
|
try:
|
|
assert reg # !rm
|
|
cur, db_path = reg
|
|
if bos.path.getsize(db_path + "-wal") < 1024 * 1024 * 5:
|
|
continue
|
|
except:
|
|
continue
|
|
|
|
try:
|
|
with self.mutex:
|
|
cur.execute("pragma wal_checkpoint(truncate)")
|
|
try:
|
|
cur.execute("commit") # absolutely necessary! for some reason
|
|
except:
|
|
pass
|
|
|
|
cur.connection.commit() # this one maybe not
|
|
except Exception as ex:
|
|
self.log("checkpoint failed: {}".format(ex), 3)
|
|
|
|
if self.stop:
|
|
return False
|
|
|
|
self.pp.end = True
|
|
|
|
msg = "{} volumes in {:.2f} sec"
|
|
self.log(msg.format(len(vols), time.time() - t0))
|
|
|
|
if needed_mutagen:
|
|
msg = "could not read tags because no backends are available (Mutagen or FFprobe)"
|
|
self.log(msg, c=1)
|
|
|
|
t = "online (running mtp)" if self.mtag else "online, idle"
|
|
for vol in vols:
|
|
self.volstate[vol.vpath] = t
|
|
|
|
if self.mtag:
|
|
Daemon(self._run_all_mtp, "up2k-mtp-scan", (gid,))
|
|
else:
|
|
self.unpp()
|
|
|
|
return have_e2d
|
|
|
|
def register_vpath(
|
|
self, ptop: str, flags: dict[str, Any]
|
|
) -> Optional[tuple["sqlite3.Cursor", str]]:
|
|
"""mutex(main,reg) me"""
|
|
histpath = self.vfs.histtab.get(ptop)
|
|
if not histpath:
|
|
self.log("no histpath for %r" % (ptop,))
|
|
return None
|
|
|
|
db_path = os.path.join(histpath, "up2k.db")
|
|
if ptop in self.registry:
|
|
try:
|
|
return self.cur[ptop], db_path
|
|
except:
|
|
return None
|
|
|
|
vpath = "?"
|
|
for k, v in self.vfs.all_vols.items():
|
|
if v.realpath == ptop:
|
|
vpath = k
|
|
|
|
_, flags = self._expr_idx_filter(flags)
|
|
n4g = bool(flags.get("noforget"))
|
|
|
|
ft = "\033[0;32m{}{:.0}"
|
|
ff = "\033[0;35m{}{:.0}"
|
|
fv = "\033[0;36m{}:\033[90m{}"
|
|
zs = "ext_th_d html_head mv_re_r mv_re_t rm_re_r rm_re_t srch_re_dots srch_re_nodot zipmax"
|
|
fx = set(zs.split())
|
|
fd = vf_bmap()
|
|
fd.update(vf_cmap())
|
|
fd.update(vf_vmap())
|
|
fd = {v: k for k, v in fd.items()}
|
|
fl = {
|
|
k: v
|
|
for k, v in flags.items()
|
|
if k not in fd
|
|
or (
|
|
v != getattr(self.args, fd[k])
|
|
and str(v) != str(getattr(self.args, fd[k]))
|
|
)
|
|
}
|
|
for k1, k2 in vf_cmap().items():
|
|
if k1 not in fl or k1 in fx:
|
|
continue
|
|
if str(fl[k1]) == str(getattr(self.args, k2)):
|
|
del fl[k1]
|
|
else:
|
|
fl[k1] = ",".join(x for x in fl[k1])
|
|
a = [
|
|
(ft if v is True else ff if v is False else fv).format(k, str(v))
|
|
for k, v in fl.items()
|
|
if k not in fx
|
|
]
|
|
if not a:
|
|
a = ["\033[90mall-default"]
|
|
|
|
if a:
|
|
zs = " ".join(sorted(a))
|
|
zs = zs.replace("90mre.compile(", "90m(") # nohash
|
|
self.log("/{} {}".format(vpath + ("/" if vpath else ""), zs), "35")
|
|
|
|
reg = {}
|
|
drp = None
|
|
emptylist = []
|
|
dotpart = "." if self.args.dotpart else ""
|
|
snap = os.path.join(histpath, "up2k.snap")
|
|
if bos.path.exists(snap):
|
|
with gzip.GzipFile(snap, "rb") as f:
|
|
j = f.read().decode("utf-8")
|
|
|
|
reg2 = json.loads(j)
|
|
try:
|
|
drp = reg2["droppable"]
|
|
reg2 = reg2["registry"]
|
|
except:
|
|
pass
|
|
|
|
reg = reg2 # diff-golf
|
|
|
|
if reg2 and "dwrk" not in reg2[next(iter(reg2))]:
|
|
for job in reg2.values():
|
|
job["dwrk"] = job["wark"]
|
|
|
|
rm = []
|
|
for k, job in reg2.items():
|
|
job["ptop"] = ptop
|
|
is_done = "done" in job
|
|
if is_done:
|
|
job["need"] = job["hash"] = emptylist
|
|
else:
|
|
if "need" not in job:
|
|
job["need"] = []
|
|
if "hash" not in job:
|
|
job["hash"] = []
|
|
|
|
if is_done:
|
|
fp = djoin(ptop, job["prel"], job["name"])
|
|
else:
|
|
fp = djoin(ptop, job["prel"], dotpart + job["name"] + ".PARTIAL")
|
|
|
|
if bos.path.exists(fp):
|
|
if is_done:
|
|
continue
|
|
job["poke"] = time.time()
|
|
job["busy"] = {}
|
|
else:
|
|
self.log("ign deleted file in snap: %r" % (fp,))
|
|
if not n4g:
|
|
rm.append(k)
|
|
|
|
for x in rm:
|
|
del reg2[x]
|
|
|
|
# optimize pre-1.15.4 entries
|
|
if next((x for x in reg.values() if "done" in x and "poke" in x), None):
|
|
zsl = "host tnam busy sprs poke t0c".split()
|
|
for job in reg.values():
|
|
if "done" in job:
|
|
for k in zsl:
|
|
job.pop(k, None)
|
|
|
|
if drp is None:
|
|
drp = [k for k, v in reg.items() if not v["need"]]
|
|
else:
|
|
drp = [x for x in drp if x in reg]
|
|
|
|
t = "loaded snap {} |{}| ({})".format(snap, len(reg.keys()), len(drp or []))
|
|
ta = [t] + self._vis_reg_progress(reg)
|
|
self.log("\n".join(ta))
|
|
|
|
self.flags[ptop] = flags
|
|
self.vol_act[ptop] = 0.0
|
|
self.registry[ptop] = reg
|
|
self.droppable[ptop] = drp or []
|
|
self.regdrop(ptop, "")
|
|
if not HAVE_SQLITE3 or "e2d" not in flags or "d2d" in flags:
|
|
return None
|
|
|
|
try:
|
|
if bos.makedirs(histpath):
|
|
hidedir(histpath)
|
|
except Exception as ex:
|
|
t = "failed to initialize volume '/%s': %s"
|
|
self.log(t % (vpath, ex), 1)
|
|
return None
|
|
|
|
try:
|
|
cur = self._open_db_wd(db_path)
|
|
|
|
# speeds measured uploading 520 small files on a WD20SPZX (SMR 2.5" 5400rpm 4kb)
|
|
dbd = flags["dbd"]
|
|
if dbd == "acid":
|
|
# 217.5s; python-defaults
|
|
zs = "delete"
|
|
sync = "full"
|
|
elif dbd == "swal":
|
|
# 88.0s; still 99.9% safe (can lose a bit of on OS crash)
|
|
zs = "wal"
|
|
sync = "full"
|
|
elif dbd == "yolo":
|
|
# 2.7s; may lose entire db on OS crash
|
|
zs = "wal"
|
|
sync = "off"
|
|
else:
|
|
# 4.1s; corruption-safe but more likely to lose wal
|
|
zs = "wal"
|
|
sync = "normal"
|
|
|
|
try:
|
|
amode = cur.execute("pragma journal_mode=" + zs).fetchone()[0]
|
|
if amode.lower() != zs.lower():
|
|
t = "sqlite failed to set journal_mode {}; got {}"
|
|
raise Exception(t.format(zs, amode))
|
|
except Exception as ex:
|
|
if sync != "off":
|
|
sync = "full"
|
|
t = "reverting to sync={} because {}"
|
|
self.log(t.format(sync, ex))
|
|
|
|
cur.execute("pragma synchronous=" + sync)
|
|
cur.connection.commit()
|
|
|
|
self._verify_db_cache(cur, vpath)
|
|
|
|
self.cur[ptop] = cur
|
|
self.volsize[cur] = 0
|
|
self.volnfiles[cur] = 0
|
|
|
|
return cur, db_path
|
|
except:
|
|
msg = "ERROR: cannot use database at [%s]:\n%s\n\033[33mhint: %s\n"
|
|
self.log(msg % (db_path, traceback.format_exc(), HINT_HISTPATH), 1)
|
|
|
|
return None
|
|
|
|
def _verify_db_cache(self, cur: "sqlite3.Cursor", vpath: str) -> None:
|
|
# check if list of intersecting volumes changed since last use; drop caches if so
|
|
prefix = (vpath + "/").lstrip("/")
|
|
vps = [x for x in self.vfs.all_vols if x.startswith(prefix)]
|
|
vps.sort()
|
|
seed = [x[len(prefix) :] for x in vps]
|
|
|
|
# also consider volflags which affect indexing
|
|
for vp in vps:
|
|
vf = self.vfs.all_vols[vp].flags
|
|
vf = {k: v for k, v in vf.items() if k in VF_AFFECTS_INDEXING}
|
|
seed.append(str(sorted(vf.items())))
|
|
|
|
zb = hashlib.sha1("\n".join(seed).encode("utf-8", "replace")).digest()
|
|
vcfg = ub64enc(zb[:18]).decode("ascii")
|
|
|
|
c = cur.execute("select v from kv where k = 'volcfg'")
|
|
try:
|
|
(oldcfg,) = c.fetchone()
|
|
except:
|
|
oldcfg = ""
|
|
|
|
if oldcfg != vcfg:
|
|
cur.execute("delete from kv where k = 'volcfg'")
|
|
cur.execute("delete from dh")
|
|
cur.execute("delete from cv")
|
|
cur.execute("insert into kv values ('volcfg',?)", (vcfg,))
|
|
cur.connection.commit()
|
|
|
|
def _build_file_index(self, vol: VFS, all_vols: list[VFS]) -> tuple[bool, bool]:
|
|
do_vac = False
|
|
top = vol.realpath
|
|
rei = vol.flags.get("noidx")
|
|
reh = vol.flags.get("nohash")
|
|
n4g = bool(vol.flags.get("noforget"))
|
|
ffat = "fat32" in vol.flags
|
|
cst = bos.stat(top)
|
|
dev = cst.st_dev if vol.flags.get("xdev") else 0
|
|
|
|
with self.mutex:
|
|
with self.reg_mutex:
|
|
reg = self.register_vpath(top, vol.flags)
|
|
|
|
assert reg and self.pp # !rm
|
|
cur, db_path = reg
|
|
|
|
db = Dbw(cur, 0, 0, time.time())
|
|
self.pp.n = next(db.c.execute("select count(w) from up"))[0]
|
|
|
|
excl = [
|
|
vol.realpath + "/" + d.vpath[len(vol.vpath) :].lstrip("/")
|
|
for d in all_vols
|
|
if d != vol and (d.vpath.startswith(vol.vpath + "/") or not vol.vpath)
|
|
]
|
|
excl += [absreal(x) for x in excl]
|
|
excl += list(self.vfs.histtab.values())
|
|
if WINDOWS:
|
|
excl = [x.replace("/", "\\") for x in excl]
|
|
else:
|
|
# ~/.wine/dosdevices/z:/ and such
|
|
excl.extend(("/dev", "/proc", "/run", "/sys"))
|
|
|
|
if self.args.re_dirsz:
|
|
db.c.execute("delete from ds")
|
|
db.n += 1
|
|
|
|
rtop = absreal(top)
|
|
n_add = n_rm = 0
|
|
try:
|
|
if dir_is_empty(self.log_func, not self.args.no_scandir, rtop):
|
|
t = "volume /%s at [%s] is empty; will not be indexed as this could be due to an offline filesystem"
|
|
self.log(t % (vol.vpath, rtop), 6)
|
|
return True, False
|
|
|
|
n_add, _, _ = self._build_dir(
|
|
db,
|
|
top,
|
|
set(excl),
|
|
top,
|
|
rtop,
|
|
rei,
|
|
reh,
|
|
n4g,
|
|
ffat,
|
|
[],
|
|
cst,
|
|
dev,
|
|
bool(vol.flags.get("xvol")),
|
|
)
|
|
if not n4g:
|
|
n_rm = self._drop_lost(db.c, top, excl)
|
|
except Exception as ex:
|
|
t = "failed to index volume [{}]:\n{}"
|
|
self.log(t.format(top, min_ex()), c=1)
|
|
if db_ex_chk(self.log, ex, db_path):
|
|
self.hub.log_stacks()
|
|
|
|
if db.n:
|
|
self.log("commit %d new files; %d updates" % (db.nf, db.n))
|
|
|
|
if self.args.no_dhash:
|
|
if db.c.execute("select d from dh").fetchone():
|
|
db.c.execute("delete from dh")
|
|
self.log("forgetting dhashes in {}".format(top))
|
|
elif n_add or n_rm:
|
|
self._set_tagscan(db.c, True)
|
|
|
|
db.c.connection.commit()
|
|
|
|
if (
|
|
vol.flags.get("vmaxb")
|
|
or vol.flags.get("vmaxn")
|
|
or (self.args.stats and not self.args.nos_vol)
|
|
):
|
|
zs = "select count(sz), sum(sz) from up"
|
|
vn, vb = db.c.execute(zs).fetchone()
|
|
vb = vb or 0
|
|
vb += vn * 2048
|
|
self.volsize[db.c] = vb
|
|
self.volnfiles[db.c] = vn
|
|
vmaxb = unhumanize(vol.flags.get("vmaxb") or "0")
|
|
vmaxn = unhumanize(vol.flags.get("vmaxn") or "0")
|
|
t = "{:>5} / {:>5} ( {:>5} / {:>5} files) in {}".format(
|
|
humansize(vb, True),
|
|
humansize(vmaxb, True),
|
|
humansize(vn, True).rstrip("B"),
|
|
humansize(vmaxn, True).rstrip("B"),
|
|
vol.realpath,
|
|
)
|
|
self.log(t)
|
|
|
|
return True, bool(n_add or n_rm or do_vac)
|
|
|
|
def _build_dir(
|
|
self,
|
|
db: Dbw,
|
|
top: str,
|
|
excl: set[str],
|
|
cdir: str,
|
|
rcdir: str,
|
|
rei: Optional[Pattern[str]],
|
|
reh: Optional[Pattern[str]],
|
|
n4g: bool,
|
|
ffat: bool,
|
|
seen: list[str],
|
|
cst: os.stat_result,
|
|
dev: int,
|
|
xvol: bool,
|
|
) -> tuple[int, int, int]:
|
|
if xvol and not rcdir.startswith(top):
|
|
self.log("skip xvol: %r -> %r" % (cdir, rcdir), 6)
|
|
return 0, 0, 0
|
|
|
|
if rcdir in seen:
|
|
t = "bailing from symlink loop,\n prev: %r\n curr: %r\n from: %r"
|
|
self.log(t % (seen[-1], rcdir, cdir), 3)
|
|
return 0, 0, 0
|
|
|
|
# total-files-added, total-num-files, recursive-size
|
|
tfa = tnf = rsz = 0
|
|
seen = seen + [rcdir]
|
|
unreg: list[str] = []
|
|
files: list[tuple[int, int, str]] = []
|
|
fat32 = True
|
|
cv = ""
|
|
|
|
th_cvd = self.args.th_coversd
|
|
th_cvds = self.args.th_coversd_set
|
|
|
|
assert self.pp and self.mem_cur # !rm
|
|
self.pp.msg = "a%d %s" % (self.pp.n, cdir)
|
|
|
|
rd = cdir[len(top) :].strip("/")
|
|
if WINDOWS:
|
|
rd = rd.replace("\\", "/").strip("/")
|
|
|
|
rds = rd + "/" if rd else ""
|
|
cdirs = cdir + os.sep
|
|
|
|
g = statdir(self.log_func, not self.args.no_scandir, True, cdir, False)
|
|
gl = sorted(g)
|
|
partials = set([x[0] for x in gl if "PARTIAL" in x[0]])
|
|
for iname, inf in gl:
|
|
if self.stop:
|
|
return -1, 0, 0
|
|
|
|
rp = rds + iname
|
|
abspath = cdirs + iname
|
|
|
|
if rei and rei.search(abspath):
|
|
unreg.append(rp)
|
|
continue
|
|
|
|
lmod = int(inf.st_mtime)
|
|
if stat.S_ISLNK(inf.st_mode):
|
|
try:
|
|
inf = bos.stat(abspath)
|
|
except:
|
|
continue
|
|
|
|
sz = inf.st_size
|
|
if fat32 and not ffat and inf.st_mtime % 2:
|
|
fat32 = False
|
|
|
|
if stat.S_ISDIR(inf.st_mode):
|
|
rap = absreal(abspath)
|
|
if (
|
|
dev
|
|
and inf.st_dev != dev
|
|
and not (ANYWIN and bos.stat(rap).st_dev == dev)
|
|
):
|
|
self.log("skip xdev %s->%s: %r" % (dev, inf.st_dev, abspath), 6)
|
|
continue
|
|
if abspath in excl or rap in excl:
|
|
unreg.append(rp)
|
|
continue
|
|
if iname == ".th" and bos.path.isdir(os.path.join(abspath, "top")):
|
|
# abandoned or foreign, skip
|
|
continue
|
|
# self.log(" dir: {}".format(abspath))
|
|
try:
|
|
i1, i2, i3 = self._build_dir(
|
|
db,
|
|
top,
|
|
excl,
|
|
abspath,
|
|
rap,
|
|
rei,
|
|
reh,
|
|
n4g,
|
|
fat32,
|
|
seen,
|
|
inf,
|
|
dev,
|
|
xvol,
|
|
)
|
|
tfa += i1
|
|
tnf += i2
|
|
rsz += i3
|
|
except:
|
|
t = "failed to index subdir %r:\n%s"
|
|
self.log(t % (abspath, min_ex()), 1)
|
|
elif not stat.S_ISREG(inf.st_mode):
|
|
self.log("skip type-0%o file %r" % (inf.st_mode, abspath))
|
|
else:
|
|
# self.log("file: {}".format(abspath))
|
|
if rp.endswith(".PARTIAL") and time.time() - lmod < 60:
|
|
# rescan during upload
|
|
continue
|
|
|
|
if not sz and (
|
|
"%s.PARTIAL" % (iname,) in partials
|
|
or ".%s.PARTIAL" % (iname,) in partials
|
|
):
|
|
# placeholder for unfinished upload
|
|
continue
|
|
|
|
rsz += sz
|
|
files.append((sz, lmod, iname))
|
|
liname = iname.lower()
|
|
if (
|
|
sz
|
|
and (
|
|
liname in th_cvds
|
|
or (
|
|
not cv
|
|
and liname.rsplit(".", 1)[-1] in CV_EXTS
|
|
and not iname.startswith(".")
|
|
)
|
|
)
|
|
and (
|
|
not cv
|
|
or liname not in th_cvds
|
|
or cv.lower() not in th_cvds
|
|
or th_cvd.index(liname) < th_cvd.index(cv.lower())
|
|
)
|
|
):
|
|
cv = iname
|
|
|
|
if not self.args.no_dirsz:
|
|
tnf += len(files)
|
|
q = "select sz, nf from ds where rd=? limit 1"
|
|
try:
|
|
db_sz, db_nf = db.c.execute(q, (rd,)).fetchone() or (-1, -1)
|
|
if rsz != db_sz or tnf != db_nf:
|
|
db.c.execute("delete from ds where rd=?", (rd,))
|
|
db.c.execute("insert into ds values (?,?,?)", (rd, rsz, tnf))
|
|
db.n += 1
|
|
except:
|
|
pass # mojibake rd
|
|
|
|
# folder of 1000 files = ~1 MiB RAM best-case (tiny filenames);
|
|
# free up stuff we're done with before dhashing
|
|
gl = []
|
|
partials.clear()
|
|
if not self.args.no_dhash:
|
|
if len(files) < 9000:
|
|
zh = hashlib.sha1(str(files).encode("utf-8", "replace"))
|
|
else:
|
|
zh = hashlib.sha1()
|
|
_ = [zh.update(str(x).encode("utf-8", "replace")) for x in files]
|
|
|
|
zh.update(cv.encode("utf-8", "replace"))
|
|
zh.update(spack(b"<d", cst.st_mtime))
|
|
dhash = ub64enc(zh.digest()[:12]).decode("ascii")
|
|
sql = "select d from dh where d=? and +h=?"
|
|
try:
|
|
c = db.c.execute(sql, (rd, dhash))
|
|
drd = rd
|
|
except:
|
|
drd = "//" + w8b64enc(rd)
|
|
c = db.c.execute(sql, (drd, dhash))
|
|
|
|
if c.fetchone():
|
|
return tfa, tnf, rsz
|
|
|
|
if cv and rd:
|
|
# mojibake not supported (for performance / simplicity):
|
|
try:
|
|
q = "select * from cv where rd=? and dn=? and +fn=?"
|
|
crd, cdn = rd.rsplit("/", 1) if "/" in rd else ("", rd)
|
|
if not db.c.execute(q, (crd, cdn, cv)).fetchone():
|
|
db.c.execute("delete from cv where rd=? and dn=?", (crd, cdn))
|
|
db.c.execute("insert into cv values (?,?,?)", (crd, cdn, cv))
|
|
db.n += 1
|
|
except Exception as ex:
|
|
self.log("cover %r/%r failed: %s" % (rd, cv, ex), 6)
|
|
|
|
seen_files = set([x[2] for x in files]) # for dropcheck
|
|
for sz, lmod, fn in files:
|
|
if self.stop:
|
|
return -1, 0, 0
|
|
|
|
rp = rds + fn
|
|
abspath = cdirs + fn
|
|
nohash = reh.search(abspath) if reh else False
|
|
|
|
sql = "select w, mt, sz, ip, at from up where rd = ? and fn = ?"
|
|
try:
|
|
c = db.c.execute(sql, (rd, fn))
|
|
except:
|
|
c = db.c.execute(sql, s3enc(self.mem_cur, rd, fn))
|
|
|
|
in_db = list(c.fetchall())
|
|
if in_db:
|
|
self.pp.n -= 1
|
|
dw, dts, dsz, ip, at = in_db[0]
|
|
if len(in_db) > 1:
|
|
t = "WARN: multiple entries: %r => %r |%d|\n%r"
|
|
rep_db = "\n".join([repr(x) for x in in_db])
|
|
self.log(t % (top, rp, len(in_db), rep_db))
|
|
dts = -1
|
|
|
|
if fat32 and abs(dts - lmod) == 1:
|
|
dts = lmod
|
|
|
|
if dts == lmod and dsz == sz and (nohash or dw[0] != "#" or not sz):
|
|
continue
|
|
|
|
t = "reindex %r => %r mtime(%s/%s) size(%s/%s)"
|
|
self.log(t % (top, rp, dts, lmod, dsz, sz))
|
|
self.db_rm(db.c, rd, fn, 0)
|
|
tfa += 1
|
|
db.n += 1
|
|
in_db = []
|
|
else:
|
|
dw = ""
|
|
ip = ""
|
|
at = 0
|
|
|
|
self.pp.msg = "a%d %s" % (self.pp.n, abspath)
|
|
|
|
if nohash or not sz:
|
|
wark = up2k_wark_from_metadata(self.salt, sz, lmod, rd, fn)
|
|
else:
|
|
if sz > 1024 * 1024:
|
|
self.log("file: %r" % (abspath,))
|
|
|
|
try:
|
|
hashes, _ = self._hashlist_from_file(
|
|
abspath, "a{}, ".format(self.pp.n)
|
|
)
|
|
except Exception as ex:
|
|
self.log("hash: %r @ %r" % (ex, abspath))
|
|
continue
|
|
|
|
if not hashes:
|
|
return -1, 0, 0
|
|
|
|
wark = up2k_wark_from_hashlist(self.salt, sz, hashes)
|
|
|
|
if dw and dw != wark:
|
|
ip = ""
|
|
at = 0
|
|
|
|
# skip upload hooks by not providing vflags
|
|
self.db_add(db.c, {}, rd, fn, lmod, sz, "", "", wark, wark, "", "", ip, at)
|
|
db.n += 1
|
|
db.nf += 1
|
|
tfa += 1
|
|
td = time.time() - db.t
|
|
if db.n >= 4096 or td >= 60:
|
|
self.log("commit %d new files; %d updates" % (db.nf, db.n))
|
|
db.c.connection.commit()
|
|
db.n = db.nf = 0
|
|
db.t = time.time()
|
|
|
|
if not self.args.no_dhash:
|
|
db.c.execute("delete from dh where d = ?", (drd,)) # type: ignore
|
|
db.c.execute("insert into dh values (?,?)", (drd, dhash)) # type: ignore
|
|
|
|
if self.stop:
|
|
return -1, 0, 0
|
|
|
|
# drop shadowed folders
|
|
for sh_rd in unreg:
|
|
n = 0
|
|
q = "select count(w) from up where (rd=? or rd like ?||'/%')"
|
|
for sh_erd in [sh_rd, "//" + w8b64enc(sh_rd)]:
|
|
try:
|
|
erd_erd = (sh_erd, sh_erd)
|
|
n = db.c.execute(q, erd_erd).fetchone()[0]
|
|
break
|
|
except:
|
|
pass
|
|
|
|
assert erd_erd # type: ignore # !rm
|
|
|
|
if n:
|
|
t = "forgetting %d shadowed autoindexed files in %r > %r"
|
|
self.log(t % (n, top, sh_rd))
|
|
|
|
q = "delete from dh where (d = ? or d like ?||'/%')"
|
|
db.c.execute(q, erd_erd)
|
|
|
|
q = "delete from up where (rd=? or rd like ?||'/%')"
|
|
db.c.execute(q, erd_erd)
|
|
tfa += n
|
|
|
|
q = "delete from ds where (rd=? or rd like ?||'/%')"
|
|
db.c.execute(q, erd_erd)
|
|
|
|
if n4g:
|
|
return tfa, tnf, rsz
|
|
|
|
# drop missing files
|
|
q = "select fn from up where rd = ?"
|
|
try:
|
|
c = db.c.execute(q, (rd,))
|
|
except:
|
|
c = db.c.execute(q, ("//" + w8b64enc(rd),))
|
|
|
|
hits = [w8b64dec(x[2:]) if x.startswith("//") else x for (x,) in c]
|
|
rm_files = [x for x in hits if x not in seen_files]
|
|
n_rm = len(rm_files)
|
|
for fn in rm_files:
|
|
self.db_rm(db.c, rd, fn, 0)
|
|
|
|
if n_rm:
|
|
self.log("forgot {} deleted files".format(n_rm))
|
|
|
|
return tfa, tnf, rsz
|
|
|
|
def _drop_lost(self, cur: "sqlite3.Cursor", top: str, excl: list[str]) -> int:
|
|
rm = []
|
|
n_rm = 0
|
|
nchecked = 0
|
|
assert self.pp
|
|
|
|
# `_build_dir` did all unshadowed files; first do dirs:
|
|
ndirs = next(cur.execute("select count(distinct rd) from up"))[0]
|
|
c = cur.execute("select distinct rd from up order by rd desc")
|
|
for (drd,) in c:
|
|
nchecked += 1
|
|
if drd.startswith("//"):
|
|
rd = w8b64dec(drd[2:])
|
|
else:
|
|
rd = drd
|
|
|
|
abspath = djoin(top, rd)
|
|
self.pp.msg = "b%d %s" % (ndirs - nchecked, abspath)
|
|
try:
|
|
if os.path.isdir(abspath):
|
|
continue
|
|
except:
|
|
pass
|
|
|
|
rm.append(drd)
|
|
|
|
if rm:
|
|
q = "select count(w) from up where rd = ?"
|
|
for rd in rm:
|
|
n_rm += next(cur.execute(q, (rd,)))[0]
|
|
|
|
self.log("forgetting {} deleted dirs, {} files".format(len(rm), n_rm))
|
|
for rd in rm:
|
|
cur.execute("delete from dh where d = ?", (rd,))
|
|
cur.execute("delete from up where rd = ?", (rd,))
|
|
|
|
# then shadowed deleted files
|
|
n_rm2 = 0
|
|
c2 = cur.connection.cursor()
|
|
excl = [x[len(top) + 1 :] for x in excl if x.startswith(top + "/")]
|
|
q = "select rd, fn from up where (rd = ? or rd like ?||'%') order by rd"
|
|
for rd in excl:
|
|
for erd in [rd, "//" + w8b64enc(rd)]:
|
|
try:
|
|
c = cur.execute(q, (erd, erd + "/"))
|
|
break
|
|
except:
|
|
pass
|
|
|
|
crd = "///"
|
|
cdc: set[str] = set()
|
|
for drd, dfn in c:
|
|
rd, fn = s3dec(drd, dfn)
|
|
|
|
if crd != rd:
|
|
crd = rd
|
|
try:
|
|
cdc = set(os.listdir(djoin(top, rd)))
|
|
except:
|
|
cdc.clear()
|
|
|
|
if fn not in cdc:
|
|
q = "delete from up where rd = ? and fn = ?"
|
|
c2.execute(q, (drd, dfn))
|
|
n_rm2 += 1
|
|
|
|
if n_rm2:
|
|
self.log("forgetting {} shadowed deleted files".format(n_rm2))
|
|
|
|
c2.connection.commit()
|
|
|
|
# then covers
|
|
n_rm3 = 0
|
|
qu = "select 1 from up where rd=? and fn=? limit 1"
|
|
q = "delete from cv where rd=? and dn=? and +fn=?"
|
|
for crd, cdn, fn in cur.execute("select * from cv"):
|
|
urd = vjoin(crd, cdn)
|
|
if not c2.execute(qu, (urd, fn)).fetchone():
|
|
c2.execute(q, (crd, cdn, fn))
|
|
n_rm3 += 1
|
|
|
|
if n_rm3:
|
|
self.log("forgetting {} deleted covers".format(n_rm3))
|
|
|
|
c2.connection.commit()
|
|
c2.close()
|
|
return n_rm + n_rm2
|
|
|
|
def _verify_integrity(self, vol: VFS) -> int:
|
|
"""expensive; blocks database access until finished"""
|
|
ptop = vol.realpath
|
|
assert self.pp
|
|
|
|
cur = self.cur[ptop]
|
|
rei = vol.flags.get("noidx")
|
|
reh = vol.flags.get("nohash")
|
|
e2vu = "e2vu" in vol.flags
|
|
e2vp = "e2vp" in vol.flags
|
|
|
|
excl = [
|
|
d[len(vol.vpath) :].lstrip("/")
|
|
for d in self.vfs.all_vols
|
|
if d != vol.vpath and (d.startswith(vol.vpath + "/") or not vol.vpath)
|
|
]
|
|
qexa: list[str] = []
|
|
pexa: list[str] = []
|
|
for vpath in excl:
|
|
qexa.append("up.rd != ? and not up.rd like ?||'%'")
|
|
pexa.extend([vpath, vpath])
|
|
|
|
pex: tuple[Any, ...] = tuple(pexa)
|
|
qex = " and ".join(qexa)
|
|
if qex:
|
|
qex = " where " + qex
|
|
|
|
rewark: list[tuple[str, str, str, int, int]] = []
|
|
f404: list[tuple[str, str, str]] = []
|
|
|
|
with self.mutex:
|
|
b_left = 0
|
|
n_left = 0
|
|
q = "select sz from up" + qex
|
|
for (sz,) in cur.execute(q, pex):
|
|
b_left += sz # sum() can overflow according to docs
|
|
n_left += 1
|
|
|
|
tf, _ = self._spool_warks(cur, "select w, rd, fn from up" + qex, pex, 0)
|
|
|
|
with gzip.GzipFile(mode="rb", fileobj=tf) as gf:
|
|
for zb in gf:
|
|
if self.stop:
|
|
return -1
|
|
|
|
zs = zb[:-1].decode("utf-8").replace("\x00\x02", "\n")
|
|
w, drd, dfn = zs.split("\x00\x01")
|
|
with self.mutex:
|
|
q = "select mt, sz from up where rd=? and fn=? and +w=?"
|
|
try:
|
|
mt, sz = cur.execute(q, (drd, dfn, w)).fetchone()
|
|
except:
|
|
# file moved/deleted since spooling
|
|
continue
|
|
|
|
n_left -= 1
|
|
b_left -= sz
|
|
if drd.startswith("//") or dfn.startswith("//"):
|
|
rd, fn = s3dec(drd, dfn)
|
|
else:
|
|
rd = drd
|
|
fn = dfn
|
|
|
|
abspath = djoin(ptop, rd, fn)
|
|
if rei and rei.search(abspath):
|
|
continue
|
|
|
|
nohash = reh.search(abspath) if reh else False
|
|
|
|
pf = "v{}, {:.0f}+".format(n_left, b_left / 1024 / 1024)
|
|
self.pp.msg = pf + abspath
|
|
|
|
try:
|
|
stl = bos.lstat(abspath)
|
|
st = bos.stat(abspath) if stat.S_ISLNK(stl.st_mode) else stl
|
|
except Exception as ex:
|
|
self.log("missing file: %r" % (abspath,), 3)
|
|
f404.append((drd, dfn, w))
|
|
continue
|
|
|
|
mt2 = int(stl.st_mtime)
|
|
sz2 = st.st_size
|
|
|
|
if nohash or not sz2:
|
|
w2 = up2k_wark_from_metadata(self.salt, sz2, mt2, rd, fn)
|
|
else:
|
|
if sz2 > 1024 * 1024 * 32:
|
|
self.log("file: %r" % (abspath,))
|
|
|
|
try:
|
|
hashes, _ = self._hashlist_from_file(abspath, pf)
|
|
except Exception as ex:
|
|
self.log("hash: %r @ %r" % (ex, abspath))
|
|
continue
|
|
|
|
if not hashes:
|
|
return -1
|
|
|
|
w2 = up2k_wark_from_hashlist(self.salt, sz2, hashes)
|
|
|
|
if w == w2:
|
|
continue
|
|
|
|
# symlink mtime was inconsistent before v1.9.4; check if that's it
|
|
if st != stl and (nohash or not sz2):
|
|
mt2b = int(st.st_mtime)
|
|
w2b = up2k_wark_from_metadata(self.salt, sz2, mt2b, rd, fn)
|
|
if w == w2b:
|
|
continue
|
|
|
|
rewark.append((drd, dfn, w2, sz2, mt2))
|
|
|
|
t = "hash mismatch: %r\n db: %s (%d byte, %d)\n fs: %s (%d byte, %d)"
|
|
self.log(t % (abspath, w, sz, mt, w2, sz2, mt2), 1)
|
|
|
|
if e2vp and (rewark or f404):
|
|
self.hub.retcode = 1
|
|
Daemon(self.hub.sigterm)
|
|
t = "in volume /%s: %s files missing, %s files have incorrect hashes"
|
|
t = t % (vol.vpath, len(f404), len(rewark))
|
|
self.log(t, 1)
|
|
raise Exception(t)
|
|
|
|
if not e2vu or (not rewark and not f404):
|
|
return 0
|
|
|
|
with self.mutex:
|
|
q = "update up set w=?, sz=?, mt=? where rd=? and fn=?"
|
|
for rd, fn, w, sz, mt in rewark:
|
|
cur.execute(q, (w, sz, int(mt), rd, fn))
|
|
|
|
if f404:
|
|
q = "delete from up where rd=? and fn=? and +w=?"
|
|
cur.executemany(q, f404)
|
|
|
|
cur.connection.commit()
|
|
|
|
return len(rewark) + len(f404)
|
|
|
|
def _build_tags_index(self, vol: VFS) -> tuple[int, int, bool]:
|
|
ptop = vol.realpath
|
|
with self.mutex, self.reg_mutex:
|
|
reg = self.register_vpath(ptop, vol.flags)
|
|
|
|
assert reg and self.pp
|
|
cur = self.cur[ptop]
|
|
|
|
if not self.args.no_dhash:
|
|
with self.mutex:
|
|
c = cur.execute("select k from kv where k = 'tagscan'")
|
|
if not c.fetchone():
|
|
return 0, 0, bool(self.mtag)
|
|
|
|
ret = self._build_tags_index_2(ptop)
|
|
|
|
with self.mutex:
|
|
self._set_tagscan(cur, False)
|
|
cur.connection.commit()
|
|
|
|
return ret
|
|
|
|
def _drop_caches(self) -> None:
|
|
"""mutex(main,reg) me"""
|
|
self.log("dropping caches for a full filesystem scan")
|
|
for vol in self.vfs.all_vols.values():
|
|
reg = self.register_vpath(vol.realpath, vol.flags)
|
|
if not reg:
|
|
continue
|
|
|
|
cur, _ = reg
|
|
self._set_tagscan(cur, True)
|
|
cur.execute("delete from dh")
|
|
cur.execute("delete from cv")
|
|
cur.connection.commit()
|
|
|
|
def _set_tagscan(self, cur: "sqlite3.Cursor", need: bool) -> bool:
|
|
if self.args.no_dhash:
|
|
return False
|
|
|
|
c = cur.execute("select k from kv where k = 'tagscan'")
|
|
if bool(c.fetchone()) == need:
|
|
return False
|
|
|
|
if need:
|
|
cur.execute("insert into kv values ('tagscan',1)")
|
|
else:
|
|
cur.execute("delete from kv where k = 'tagscan'")
|
|
|
|
return True
|
|
|
|
def _build_tags_index_2(self, ptop: str) -> tuple[int, int, bool]:
|
|
entags = self.entags[ptop]
|
|
flags = self.flags[ptop]
|
|
cur = self.cur[ptop]
|
|
|
|
n_add = 0
|
|
n_rm = 0
|
|
if "e2tsr" in flags:
|
|
with self.mutex:
|
|
n_rm = cur.execute("select count(w) from mt").fetchone()[0]
|
|
if n_rm:
|
|
self.log("discarding {} media tags for a full rescan".format(n_rm))
|
|
cur.execute("delete from mt")
|
|
|
|
# integrity: drop tags for tracks that were deleted
|
|
if "e2t" in flags:
|
|
with self.mutex:
|
|
n = 0
|
|
c2 = cur.connection.cursor()
|
|
up_q = "select w from up where substr(w,1,16) = ?"
|
|
rm_q = "delete from mt where w = ?"
|
|
for (w,) in cur.execute("select w from mt"):
|
|
if not c2.execute(up_q, (w,)).fetchone():
|
|
c2.execute(rm_q, (w[:16],))
|
|
n += 1
|
|
|
|
c2.close()
|
|
if n:
|
|
t = "discarded media tags for {} deleted files"
|
|
self.log(t.format(n))
|
|
n_rm += n
|
|
|
|
with self.mutex:
|
|
cur.connection.commit()
|
|
|
|
# bail if a volflag disables indexing
|
|
if "d2t" in flags or "d2d" in flags:
|
|
return 0, n_rm, True
|
|
|
|
# add tags for new files
|
|
if "e2ts" in flags:
|
|
if not self.mtag:
|
|
return 0, n_rm, False
|
|
|
|
nq = 0
|
|
with self.mutex:
|
|
tf, nq = self._spool_warks(
|
|
cur, "select w from up order by rd, fn", (), 1
|
|
)
|
|
|
|
if not nq:
|
|
# self.log("tags ok")
|
|
self._unspool(tf)
|
|
return 0, n_rm, True
|
|
|
|
if nq == -1:
|
|
return -1, -1, True
|
|
|
|
with gzip.GzipFile(mode="rb", fileobj=tf) as gf:
|
|
n_add = self._e2ts_q(gf, nq, cur, ptop, entags)
|
|
|
|
self._unspool(tf)
|
|
|
|
return n_add, n_rm, True
|
|
|
|
def _e2ts_q(
|
|
self,
|
|
qf: gzip.GzipFile,
|
|
nq: int,
|
|
cur: "sqlite3.Cursor",
|
|
ptop: str,
|
|
entags: set[str],
|
|
) -> int:
|
|
assert self.pp and self.mtag
|
|
|
|
flags = self.flags[ptop]
|
|
mpool: Optional[Queue[Mpqe]] = None
|
|
if self.mtag.prefer_mt and self.args.mtag_mt > 1:
|
|
mpool = self._start_mpool()
|
|
|
|
n_add = 0
|
|
n_buf = 0
|
|
last_write = time.time()
|
|
for bw in qf:
|
|
if self.stop:
|
|
return -1
|
|
|
|
w = bw[:-1].decode("ascii")
|
|
|
|
with self.mutex:
|
|
try:
|
|
q = "select rd, fn, ip, at from up where substr(w,1,16)=? and +w=?"
|
|
rd, fn, ip, at = cur.execute(q, (w[:16], w)).fetchone()
|
|
except:
|
|
# file modified/deleted since spooling
|
|
continue
|
|
|
|
if rd.startswith("//") or fn.startswith("//"):
|
|
rd, fn = s3dec(rd, fn)
|
|
|
|
if "mtp" in flags:
|
|
q = "insert into mt values (?,'t:mtp','a')"
|
|
cur.execute(q, (w[:16],))
|
|
|
|
abspath = djoin(ptop, rd, fn)
|
|
self.pp.msg = "c%d %s" % (nq, abspath)
|
|
if not mpool:
|
|
n_tags = self._tagscan_file(cur, entags, w, abspath, ip, at)
|
|
else:
|
|
if ip:
|
|
oth_tags = {"up_ip": ip, "up_at": at}
|
|
else:
|
|
oth_tags = {}
|
|
|
|
mpool.put(Mpqe({}, entags, w, abspath, oth_tags))
|
|
with self.mutex:
|
|
n_tags = len(self._flush_mpool(cur))
|
|
|
|
n_add += n_tags
|
|
n_buf += n_tags
|
|
nq -= 1
|
|
|
|
td = time.time() - last_write
|
|
if n_buf >= 4096 or td >= self.timeout / 2:
|
|
self.log("commit {} new tags".format(n_buf))
|
|
with self.mutex:
|
|
cur.connection.commit()
|
|
|
|
last_write = time.time()
|
|
n_buf = 0
|
|
|
|
if mpool:
|
|
self._stop_mpool(mpool)
|
|
with self.mutex:
|
|
n_add += len(self._flush_mpool(cur))
|
|
|
|
with self.mutex:
|
|
cur.connection.commit()
|
|
|
|
return n_add
|
|
|
|
def _spool_warks(
|
|
self,
|
|
cur: "sqlite3.Cursor",
|
|
q: str,
|
|
params: tuple[Any, ...],
|
|
flt: int,
|
|
) -> tuple[tempfile.SpooledTemporaryFile[bytes], int]:
|
|
"""mutex(main) me"""
|
|
n = 0
|
|
c2 = cur.connection.cursor()
|
|
tf = tempfile.SpooledTemporaryFile(1024 * 1024 * 8, "w+b", prefix="cpp-tq-")
|
|
with gzip.GzipFile(mode="wb", fileobj=tf) as gf:
|
|
for row in cur.execute(q, params):
|
|
if self.stop:
|
|
return tf, -1
|
|
|
|
if flt == 1:
|
|
q = "select w from mt where w = ?"
|
|
if c2.execute(q, (row[0][:16],)).fetchone():
|
|
continue
|
|
|
|
zs = "\x00\x01".join(row).replace("\n", "\x00\x02")
|
|
gf.write((zs + "\n").encode("utf-8"))
|
|
n += 1
|
|
|
|
c2.close()
|
|
tf.seek(0)
|
|
self.spools.add(tf)
|
|
return tf, n
|
|
|
|
def _unspool(self, tf: tempfile.SpooledTemporaryFile[bytes]) -> None:
|
|
try:
|
|
self.spools.remove(tf)
|
|
except:
|
|
return
|
|
|
|
try:
|
|
tf.close()
|
|
except Exception as ex:
|
|
self.log("failed to delete spool: {}".format(ex), 3)
|
|
|
|
def _flush_mpool(self, wcur: "sqlite3.Cursor") -> list[str]:
|
|
ret = []
|
|
for x in self.pending_tags:
|
|
self._tag_file(wcur, *x)
|
|
ret.append(x[1])
|
|
|
|
self.pending_tags = []
|
|
return ret
|
|
|
|
def _run_all_mtp(self, gid: int) -> None:
|
|
t0 = time.time()
|
|
for ptop, flags in self.flags.items():
|
|
if "mtp" in flags:
|
|
if ptop not in self.entags:
|
|
t = "skipping mtp for unavailable volume {}"
|
|
self.log(t.format(ptop), 1)
|
|
else:
|
|
self._run_one_mtp(ptop, gid)
|
|
|
|
vtop = "\n"
|
|
for vol in self.vfs.all_vols.values():
|
|
if vol.realpath == ptop:
|
|
vtop = vol.vpath
|
|
if "running mtp" in self.volstate.get(vtop, ""):
|
|
self.volstate[vtop] = "online, idle"
|
|
|
|
td = time.time() - t0
|
|
msg = "mtp finished in {:.2f} sec ({})"
|
|
self.log(msg.format(td, s2hms(td, True)))
|
|
|
|
self.unpp()
|
|
if self.args.exit == "idx":
|
|
self.hub.sigterm()
|
|
|
|
def _run_one_mtp(self, ptop: str, gid: int) -> None:
|
|
if gid != self.gid:
|
|
return
|
|
|
|
entags = self.entags[ptop]
|
|
|
|
parsers = {}
|
|
for parser in self.flags[ptop]["mtp"]:
|
|
try:
|
|
parser = MParser(parser)
|
|
except:
|
|
self.log("invalid argument (could not find program): " + parser, 1)
|
|
return
|
|
|
|
for tag in entags:
|
|
if tag in parser.tags:
|
|
parsers[parser.tag] = parser
|
|
|
|
if self.args.mtag_vv:
|
|
t = "parsers for {}: \033[0m{}"
|
|
self.log(t.format(ptop, list(parsers.keys())), "90")
|
|
|
|
self.mtp_parsers[ptop] = parsers
|
|
|
|
q = "select count(w) from mt where k = 't:mtp'"
|
|
with self.mutex:
|
|
cur = self.cur[ptop]
|
|
cur = cur.connection.cursor()
|
|
wcur = cur.connection.cursor()
|
|
n_left = cur.execute(q).fetchone()[0]
|
|
|
|
mpool = self._start_mpool()
|
|
batch_sz = mpool.maxsize * 3
|
|
t_prev = time.time()
|
|
n_prev = n_left
|
|
n_done = 0
|
|
to_delete = {}
|
|
in_progress = {}
|
|
while True:
|
|
did_nothing = True
|
|
with self.mutex:
|
|
if gid != self.gid:
|
|
break
|
|
|
|
q = "select w from mt where k = 't:mtp' limit ?"
|
|
zq = cur.execute(q, (batch_sz,)).fetchall()
|
|
warks = [str(x[0]) for x in zq]
|
|
jobs = []
|
|
for w in warks:
|
|
if w in in_progress:
|
|
continue
|
|
|
|
q = "select rd, fn, ip, at from up where substr(w,1,16)=? limit 1"
|
|
rd, fn, ip, at = cur.execute(q, (w,)).fetchone()
|
|
rd, fn = s3dec(rd, fn)
|
|
abspath = djoin(ptop, rd, fn)
|
|
|
|
q = "select k from mt where w = ?"
|
|
zq = cur.execute(q, (w,)).fetchall()
|
|
have: dict[str, Union[str, float]] = {x[0]: 1 for x in zq}
|
|
|
|
did_nothing = False
|
|
parsers = self._get_parsers(ptop, have, abspath)
|
|
if not parsers:
|
|
to_delete[w] = True
|
|
n_left -= 1
|
|
continue
|
|
|
|
if next((x for x in parsers.values() if x.pri), None):
|
|
q = "select k, v from mt where w = ?"
|
|
zq2 = cur.execute(q, (w,)).fetchall()
|
|
oth_tags = {str(k): v for k, v in zq2}
|
|
else:
|
|
oth_tags = {}
|
|
|
|
if ip:
|
|
oth_tags["up_ip"] = ip
|
|
oth_tags["up_at"] = at
|
|
|
|
jobs.append(Mpqe(parsers, set(), w, abspath, oth_tags))
|
|
in_progress[w] = True
|
|
|
|
with self.mutex:
|
|
done = self._flush_mpool(wcur)
|
|
for w in done:
|
|
to_delete[w] = True
|
|
did_nothing = False
|
|
in_progress.pop(w)
|
|
n_done += 1
|
|
|
|
for w in to_delete:
|
|
q = "delete from mt where w = ? and +k = 't:mtp'"
|
|
cur.execute(q, (w,))
|
|
|
|
to_delete = {}
|
|
|
|
if not warks:
|
|
break
|
|
|
|
if did_nothing:
|
|
with self.tag_event:
|
|
self.tag_event.wait(0.2)
|
|
|
|
if not jobs:
|
|
continue
|
|
|
|
try:
|
|
now = time.time()
|
|
s = ((now - t_prev) / (n_prev - n_left)) * n_left
|
|
h, s = divmod(s, 3600)
|
|
m, s = divmod(s, 60)
|
|
n_prev = n_left
|
|
t_prev = now
|
|
except:
|
|
h = 1
|
|
m = 1
|
|
|
|
msg = "mtp: {} done, {} left, eta {}h {:02d}m"
|
|
with self.mutex:
|
|
msg = msg.format(n_done, n_left, int(h), int(m))
|
|
self.log(msg, c=6)
|
|
|
|
for j in jobs:
|
|
n_left -= 1
|
|
mpool.put(j)
|
|
|
|
with self.mutex:
|
|
cur.connection.commit()
|
|
|
|
self._stop_mpool(mpool)
|
|
with self.mutex:
|
|
done = self._flush_mpool(wcur)
|
|
for w in done:
|
|
q = "delete from mt where w = ? and +k = 't:mtp'"
|
|
cur.execute(q, (w,))
|
|
|
|
cur.connection.commit()
|
|
if n_done:
|
|
self.log("mtp: scanned {} files in {}".format(n_done, ptop), c=6)
|
|
cur.execute("vacuum")
|
|
|
|
wcur.close()
|
|
cur.close()
|
|
|
|
def _get_parsers(
|
|
self, ptop: str, have: dict[str, Union[str, float]], abspath: str
|
|
) -> dict[str, MParser]:
|
|
try:
|
|
all_parsers = self.mtp_parsers[ptop]
|
|
except:
|
|
if self.args.mtag_vv:
|
|
self.log("no mtp defined for {}".format(ptop), "90")
|
|
return {}
|
|
|
|
entags = self.entags[ptop]
|
|
parsers = {}
|
|
for k, v in all_parsers.items():
|
|
if "ac" in entags or ".aq" in entags:
|
|
if "ac" in have or ".aq" in have:
|
|
# is audio, require non-audio?
|
|
if v.audio == "n":
|
|
if self.args.mtag_vv:
|
|
t = "skip mtp {}; want no-audio, got audio"
|
|
self.log(t.format(k), "90")
|
|
continue
|
|
# is not audio, require audio?
|
|
elif v.audio == "y":
|
|
if self.args.mtag_vv:
|
|
t = "skip mtp {}; want audio, got no-audio"
|
|
self.log(t.format(k), "90")
|
|
continue
|
|
|
|
if v.ext:
|
|
match = False
|
|
for ext in v.ext:
|
|
if abspath.lower().endswith("." + ext):
|
|
match = True
|
|
break
|
|
|
|
if not match:
|
|
if self.args.mtag_vv:
|
|
t = "skip mtp {}; want file-ext {}, got {}"
|
|
self.log(t.format(k, v.ext, abspath.rsplit(".")[-1]), "90")
|
|
continue
|
|
|
|
parsers[k] = v
|
|
|
|
parsers = {k: v for k, v in parsers.items() if v.force or k not in have}
|
|
return parsers
|
|
|
|
def _start_mpool(self) -> Queue[Mpqe]:
|
|
# mp.pool.ThreadPool and concurrent.futures.ThreadPoolExecutor
|
|
# both do crazy runahead so lets reinvent another wheel
|
|
nw = max(1, self.args.mtag_mt)
|
|
assert self.mtag # !rm
|
|
if not self.mpool_used:
|
|
self.mpool_used = True
|
|
self.log("using {}x {}".format(nw, self.mtag.backend))
|
|
|
|
mpool: Queue[Mpqe] = Queue(nw)
|
|
for _ in range(nw):
|
|
Daemon(self._tag_thr, "up2k-mpool", (mpool,))
|
|
|
|
return mpool
|
|
|
|
def _stop_mpool(self, mpool: Queue[Mpqe]) -> None:
|
|
if not mpool:
|
|
return
|
|
|
|
for _ in range(mpool.maxsize):
|
|
mpool.put(Mpqe({}, set(), "", "", {}))
|
|
|
|
mpool.join()
|
|
|
|
def _tag_thr(self, q: Queue[Mpqe]) -> None:
|
|
assert self.mtag
|
|
while True:
|
|
qe = q.get()
|
|
if not qe.w:
|
|
q.task_done()
|
|
return
|
|
|
|
try:
|
|
st = bos.stat(qe.abspath)
|
|
if not qe.mtp:
|
|
if self.args.mtag_vv:
|
|
t = "tag-thr: {}({})"
|
|
self.log(t.format(self.mtag.backend, qe.abspath), "90")
|
|
|
|
tags = self.mtag.get(qe.abspath) if st.st_size else {}
|
|
else:
|
|
if self.args.mtag_vv:
|
|
t = "tag-thr: {}({})"
|
|
self.log(t.format(list(qe.mtp.keys()), qe.abspath), "90")
|
|
|
|
tags = self.mtag.get_bin(qe.mtp, qe.abspath, qe.oth_tags)
|
|
vtags = [
|
|
"\033[36m{} \033[33m{}".format(k, v) for k, v in tags.items()
|
|
]
|
|
if vtags:
|
|
self.log("{}\033[0m [{}]".format(" ".join(vtags), qe.abspath))
|
|
|
|
with self.mutex:
|
|
self.pending_tags.append((qe.entags, qe.w, qe.abspath, tags))
|
|
except:
|
|
ex = traceback.format_exc()
|
|
self._log_tag_err(qe.mtp or self.mtag.backend, qe.abspath, ex)
|
|
finally:
|
|
if qe.mtp:
|
|
with self.tag_event:
|
|
self.tag_event.notify_all()
|
|
|
|
q.task_done()
|
|
|
|
def _log_tag_err(self, parser: Any, abspath: str, ex: Any) -> None:
|
|
msg = "%s failed to read tags from %r:\n%s" % (parser, abspath, ex)
|
|
self.log(msg.lstrip(), c=1 if "<Signals.SIG" in msg else 3)
|
|
|
|
def _tagscan_file(
|
|
self,
|
|
write_cur: "sqlite3.Cursor",
|
|
entags: set[str],
|
|
wark: str,
|
|
abspath: str,
|
|
ip: str,
|
|
at: float,
|
|
) -> int:
|
|
"""will mutex(main)"""
|
|
assert self.mtag # !rm
|
|
|
|
try:
|
|
st = bos.stat(abspath)
|
|
except:
|
|
return 0
|
|
|
|
if not stat.S_ISREG(st.st_mode):
|
|
return 0
|
|
|
|
try:
|
|
tags = self.mtag.get(abspath) if st.st_size else {}
|
|
except Exception as ex:
|
|
self._log_tag_err("", abspath, ex)
|
|
return 0
|
|
|
|
if ip:
|
|
tags["up_ip"] = ip
|
|
tags["up_at"] = at
|
|
|
|
with self.mutex:
|
|
return self._tag_file(write_cur, entags, wark, abspath, tags)
|
|
|
|
def _tag_file(
|
|
self,
|
|
write_cur: "sqlite3.Cursor",
|
|
entags: set[str],
|
|
wark: str,
|
|
abspath: str,
|
|
tags: dict[str, Union[str, float]],
|
|
) -> int:
|
|
"""mutex(main) me"""
|
|
assert self.mtag # !rm
|
|
|
|
if not bos.path.isfile(abspath):
|
|
return 0
|
|
|
|
if entags:
|
|
tags = {k: v for k, v in tags.items() if k in entags}
|
|
if not tags:
|
|
# indicate scanned without tags
|
|
tags = {"x": 0}
|
|
|
|
if not tags:
|
|
return 0
|
|
|
|
for k in tags.keys():
|
|
q = "delete from mt where w = ? and ({})".format(
|
|
" or ".join(["+k = ?"] * len(tags))
|
|
)
|
|
args = [wark[:16]] + list(tags.keys())
|
|
write_cur.execute(q, tuple(args))
|
|
|
|
ret = 0
|
|
for k, v in tags.items():
|
|
q = "insert into mt values (?,?,?)"
|
|
write_cur.execute(q, (wark[:16], k, v))
|
|
ret += 1
|
|
|
|
self._set_tagscan(write_cur, True)
|
|
return ret
|
|
|
|
def _trace(self, msg: str) -> None:
|
|
self.log("ST: {}".format(msg))
|
|
|
|
def _open_db_wd(self, db_path: str) -> "sqlite3.Cursor":
|
|
ok: list[int] = []
|
|
if not self.hub.is_dut:
|
|
Daemon(self._open_db_timeout, "opendb_watchdog", [db_path, ok])
|
|
|
|
try:
|
|
return self._open_db(db_path)
|
|
finally:
|
|
ok.append(1)
|
|
|
|
def _open_db_timeout(self, db_path, ok: list[int]) -> None:
|
|
# give it plenty of time due to the count statement (and wisdom from byte's box)
|
|
for _ in range(60):
|
|
time.sleep(1)
|
|
if ok:
|
|
return
|
|
|
|
t = "WARNING:\n\n initializing an up2k database is taking longer than one minute; something has probably gone wrong:\n\n"
|
|
self._log_sqlite_incompat(db_path, t)
|
|
|
|
def _log_sqlite_incompat(self, db_path, t0) -> None:
|
|
txt = t0 or ""
|
|
digest = hashlib.sha512(db_path.encode("utf-8", "replace")).digest()
|
|
stackname = ub64enc(digest[:9]).decode("ascii")
|
|
stackpath = os.path.join(E.cfg, "stack-%s.txt" % (stackname,))
|
|
|
|
t = " the filesystem at %s may not support locking, or is otherwise incompatible with sqlite\n\n %s\n\n"
|
|
t += " PS: if you think this is a bug and wish to report it, please include your configuration + the following file: %s\n"
|
|
txt += t % (db_path, HINT_HISTPATH, stackpath)
|
|
self.log(txt, 3)
|
|
|
|
try:
|
|
stk = alltrace()
|
|
with open(stackpath, "wb") as f:
|
|
f.write(stk.encode("utf-8", "replace"))
|
|
except Exception as ex:
|
|
self.log("warning: failed to write %s: %s" % (stackpath, ex), 3)
|
|
|
|
if self.args.q:
|
|
t = "-" * 72
|
|
raise Exception("%s\n%s\n%s" % (t, txt, t))
|
|
|
|
def _orz(self, db_path: str) -> "sqlite3.Cursor":
|
|
assert sqlite3 # type: ignore # !rm
|
|
c = sqlite3.connect(
|
|
db_path, timeout=self.timeout, check_same_thread=False
|
|
).cursor()
|
|
# c.connection.set_trace_callback(self._trace)
|
|
return c
|
|
|
|
def _open_db(self, db_path: str) -> "sqlite3.Cursor":
|
|
existed = bos.path.exists(db_path)
|
|
cur = self._orz(db_path)
|
|
ver = self._read_ver(cur)
|
|
if not existed and ver is None:
|
|
return self._try_create_db(db_path, cur)
|
|
|
|
if ver == 4:
|
|
try:
|
|
t = "creating backup before upgrade: "
|
|
cur = self._backup_db(db_path, cur, ver, t)
|
|
self._upgrade_v4(cur)
|
|
ver = 5
|
|
except:
|
|
self.log("WARN: failed to upgrade from v4", 3)
|
|
|
|
if ver == DB_VER:
|
|
self._add_dhash_tab(cur)
|
|
self._add_xiu_tab(cur)
|
|
self._add_cv_tab(cur)
|
|
self._add_idx_up_vp(cur, db_path)
|
|
self._add_ds_tab(cur)
|
|
|
|
try:
|
|
nfiles = next(cur.execute("select count(w) from up"))[0]
|
|
self.log(" {} |{}|".format(db_path, nfiles), "90")
|
|
return cur
|
|
except:
|
|
self.log("WARN: could not list files; DB corrupt?\n" + min_ex())
|
|
|
|
if (ver or 0) > DB_VER:
|
|
t = "database is version {}, this copyparty only supports versions <= {}"
|
|
raise Exception(t.format(ver, DB_VER))
|
|
|
|
msg = "creating new DB (old is bad); backup: "
|
|
if ver:
|
|
msg = "creating new DB (too old to upgrade); backup: "
|
|
|
|
cur = self._backup_db(db_path, cur, ver, msg)
|
|
db = cur.connection
|
|
cur.close()
|
|
db.close()
|
|
self._delete_db(db_path)
|
|
return self._try_create_db(db_path, None)
|
|
|
|
def _delete_db(self, db_path: str):
|
|
for suf in ("", "-shm", "-wal", "-journal"):
|
|
try:
|
|
bos.unlink(db_path + suf)
|
|
except:
|
|
if not suf:
|
|
raise
|
|
|
|
def _backup_db(
|
|
self, db_path: str, cur: "sqlite3.Cursor", ver: Optional[int], msg: str
|
|
) -> "sqlite3.Cursor":
|
|
assert sqlite3 # type: ignore # !rm
|
|
bak = "{}.bak.{:x}.v{}".format(db_path, int(time.time()), ver)
|
|
self.log(msg + bak)
|
|
try:
|
|
c2 = sqlite3.connect(bak)
|
|
with c2:
|
|
cur.connection.backup(c2)
|
|
return cur
|
|
except:
|
|
t = "native sqlite3 backup failed; using fallback method:\n"
|
|
self.log(t + min_ex())
|
|
finally:
|
|
c2.close() # type: ignore
|
|
|
|
db = cur.connection
|
|
cur.close()
|
|
db.close()
|
|
|
|
shutil.copy2(fsenc(db_path), fsenc(bak))
|
|
return self._orz(db_path)
|
|
|
|
def _read_ver(self, cur: "sqlite3.Cursor") -> Optional[int]:
|
|
for tab in ["ki", "kv"]:
|
|
try:
|
|
c = cur.execute(r"select v from {} where k = 'sver'".format(tab))
|
|
except:
|
|
continue
|
|
|
|
rows = c.fetchall()
|
|
if rows:
|
|
return int(rows[0][0])
|
|
return None
|
|
|
|
def _try_create_db(
|
|
self, db_path: str, cur: Optional["sqlite3.Cursor"]
|
|
) -> "sqlite3.Cursor":
|
|
try:
|
|
return self._create_db(db_path, cur)
|
|
except:
|
|
try:
|
|
self._delete_db(db_path)
|
|
except:
|
|
pass
|
|
raise
|
|
|
|
def _create_db(
|
|
self, db_path: str, cur: Optional["sqlite3.Cursor"]
|
|
) -> "sqlite3.Cursor":
|
|
"""
|
|
collision in 2^(n/2) files where n = bits (6 bits/ch)
|
|
10*6/2 = 2^30 = 1'073'741'824, 24.1mb idx 1<<(3*10)
|
|
12*6/2 = 2^36 = 68'719'476'736, 24.8mb idx
|
|
16*6/2 = 2^48 = 281'474'976'710'656, 26.1mb idx
|
|
"""
|
|
if not cur:
|
|
cur = self._orz(db_path)
|
|
|
|
idx = r"create index up_w on up(substr(w,1,16))"
|
|
if self.no_expr_idx:
|
|
idx = r"create index up_w on up(w)"
|
|
|
|
for cmd in [
|
|
r"create table up (w text, mt int, sz int, rd text, fn text, ip text, at int)",
|
|
r"create index up_vp on up(rd, fn)",
|
|
r"create index up_fn on up(fn)",
|
|
r"create index up_ip on up(ip)",
|
|
r"create index up_at on up(at)",
|
|
idx,
|
|
r"create table mt (w text, k text, v int)",
|
|
r"create index mt_w on mt(w)",
|
|
r"create index mt_k on mt(k)",
|
|
r"create index mt_v on mt(v)",
|
|
r"create table kv (k text, v int)",
|
|
r"insert into kv values ('sver', {})".format(DB_VER),
|
|
]:
|
|
cur.execute(cmd)
|
|
|
|
self._add_dhash_tab(cur)
|
|
self._add_xiu_tab(cur)
|
|
self._add_cv_tab(cur)
|
|
self._add_ds_tab(cur)
|
|
self.log("created DB at {}".format(db_path))
|
|
return cur
|
|
|
|
def _upgrade_v4(self, cur: "sqlite3.Cursor") -> None:
|
|
for cmd in [
|
|
r"alter table up add column ip text",
|
|
r"alter table up add column at int",
|
|
r"create index up_ip on up(ip)",
|
|
r"update kv set v=5 where k='sver'",
|
|
]:
|
|
cur.execute(cmd)
|
|
|
|
cur.connection.commit()
|
|
|
|
def _add_dhash_tab(self, cur: "sqlite3.Cursor") -> None:
|
|
# v5 -> v5a
|
|
try:
|
|
cur.execute("select d, h from dh limit 1").fetchone()
|
|
return
|
|
except:
|
|
pass
|
|
|
|
for cmd in [
|
|
r"create table dh (d text, h text)",
|
|
r"create index dh_d on dh(d)",
|
|
r"insert into kv values ('tagscan',1)",
|
|
]:
|
|
cur.execute(cmd)
|
|
|
|
cur.connection.commit()
|
|
|
|
def _add_xiu_tab(self, cur: "sqlite3.Cursor") -> None:
|
|
# v5a -> v5b
|
|
# store rd+fn rather than warks to support nohash vols
|
|
try:
|
|
cur.execute("select ws, rd, fn from iu limit 1").fetchone()
|
|
return
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
cur.execute("drop table iu")
|
|
except:
|
|
pass
|
|
|
|
for cmd in [
|
|
r"create table iu (c int, w text, rd text, fn text)",
|
|
r"create index iu_c on iu(c)",
|
|
r"create index iu_w on iu(w)",
|
|
]:
|
|
cur.execute(cmd)
|
|
|
|
cur.connection.commit()
|
|
|
|
def _add_cv_tab(self, cur: "sqlite3.Cursor") -> None:
|
|
# v5b -> v5c
|
|
try:
|
|
cur.execute("select rd, dn, fn from cv limit 1").fetchone()
|
|
return
|
|
except:
|
|
pass
|
|
|
|
for cmd in [
|
|
r"create table cv (rd text, dn text, fn text)",
|
|
r"create index cv_i on cv(rd, dn)",
|
|
]:
|
|
cur.execute(cmd)
|
|
|
|
try:
|
|
cur.execute("delete from dh")
|
|
except:
|
|
pass
|
|
|
|
cur.connection.commit()
|
|
|
|
def _add_idx_up_vp(self, cur: "sqlite3.Cursor", db_path: str) -> None:
|
|
# v5c -> v5d
|
|
try:
|
|
cur.execute("drop index up_rd")
|
|
except:
|
|
return
|
|
|
|
for cmd in [
|
|
r"create index up_vp on up(rd, fn)",
|
|
r"create index up_at on up(at)",
|
|
]:
|
|
self.log("upgrading db [%s]: %s" % (db_path, cmd[:18]))
|
|
cur.execute(cmd)
|
|
|
|
self.log("upgrading db [%s]: writing to disk..." % (db_path,))
|
|
cur.connection.commit()
|
|
cur.execute("vacuum")
|
|
|
|
def _add_ds_tab(self, cur: "sqlite3.Cursor") -> None:
|
|
# v5d -> v5e
|
|
try:
|
|
cur.execute("select rd, sz from ds limit 1").fetchone()
|
|
return
|
|
except:
|
|
pass
|
|
|
|
for cmd in [
|
|
r"create table ds (rd text, sz int, nf int)",
|
|
r"create index ds_rd on ds(rd)",
|
|
]:
|
|
cur.execute(cmd)
|
|
|
|
cur.connection.commit()
|
|
|
|
def wake_rescanner(self):
|
|
with self.rescan_cond:
|
|
self.rescan_cond.notify_all()
|
|
|
|
def handle_json(
|
|
self, cj: dict[str, Any], busy_aps: dict[str, int]
|
|
) -> dict[str, Any]:
|
|
# busy_aps is u2fh (always undefined if -j0) so this is safe
|
|
self.busy_aps = busy_aps
|
|
if self.reload_flag or self.reloading:
|
|
raise Pebkac(503, SBUSY % ("fs-reload",))
|
|
|
|
got_lock = False
|
|
try:
|
|
# bit expensive; 3.9=10x 3.11=2x
|
|
if self.mutex.acquire(timeout=10):
|
|
got_lock = True
|
|
with self.reg_mutex:
|
|
ret = self._handle_json(cj)
|
|
else:
|
|
raise Pebkac(503, SBUSY % (self.blocked or "[unknown]",))
|
|
except TypeError:
|
|
if not PY2:
|
|
raise
|
|
with self.mutex, self.reg_mutex:
|
|
ret = self._handle_json(cj)
|
|
finally:
|
|
if got_lock:
|
|
self.mutex.release()
|
|
|
|
if self.fx_backlog:
|
|
self.do_fx_backlog()
|
|
|
|
return ret
|
|
|
|
def _handle_json(self, cj: dict[str, Any], depth: int = 1) -> dict[str, Any]:
|
|
if depth > 16:
|
|
raise Pebkac(500, "too many xbu relocs, giving up")
|
|
|
|
ptop = cj["ptop"]
|
|
if not self.register_vpath(ptop, cj["vcfg"]):
|
|
if ptop not in self.registry:
|
|
raise Pebkac(410, "location unavailable")
|
|
|
|
cj["name"] = sanitize_fn(cj["name"], "")
|
|
cj["poke"] = now = self.db_act = self.vol_act[ptop] = time.time()
|
|
wark = dwark = self._get_wark(cj)
|
|
job = None
|
|
pdir = djoin(ptop, cj["prel"])
|
|
inc_ap = djoin(pdir, cj["name"])
|
|
try:
|
|
dev = bos.stat(pdir).st_dev
|
|
except:
|
|
dev = 0
|
|
|
|
# check if filesystem supports sparse files;
|
|
# refuse out-of-order / multithreaded uploading if sprs False
|
|
sprs = self.fstab.get(pdir) != "ng"
|
|
|
|
if True:
|
|
jcur = self.cur.get(ptop)
|
|
reg = self.registry[ptop]
|
|
vfs = self.vfs.all_vols[cj["vtop"]]
|
|
n4g = bool(vfs.flags.get("noforget"))
|
|
noclone = bool(vfs.flags.get("noclone"))
|
|
rand = vfs.flags.get("rand") or cj.get("rand")
|
|
lost: list[tuple["sqlite3.Cursor", str, str]] = []
|
|
|
|
safe_dedup = vfs.flags.get("safededup") or 50
|
|
data_ok = safe_dedup < 10 or n4g
|
|
|
|
vols = [(ptop, jcur)] if jcur else []
|
|
if vfs.flags.get("xlink"):
|
|
vols += [(k, v) for k, v in self.cur.items() if k != ptop]
|
|
|
|
if noclone:
|
|
wark = up2k_wark_from_metadata(
|
|
self.salt, cj["size"], cj["lmod"], cj["prel"], cj["name"]
|
|
)
|
|
|
|
zi = cj["lmod"]
|
|
bad_mt = zi <= 0 or zi > 0xAAAAAAAA
|
|
if bad_mt or vfs.flags.get("up_ts", "") == "fu":
|
|
# force upload time rather than last-modified
|
|
cj["lmod"] = int(time.time())
|
|
if zi and bad_mt:
|
|
t = "ignoring impossible last-modified time from client: %s"
|
|
self.log(t % (zi,), 6)
|
|
|
|
alts: list[tuple[int, int, dict[str, Any], "sqlite3.Cursor", str, str]] = []
|
|
for ptop, cur in vols:
|
|
allv = self.vfs.all_vols
|
|
cvfs = next((v for v in allv.values() if v.realpath == ptop), vfs)
|
|
vtop = cj["vtop"] if cur == jcur else cvfs.vpath
|
|
|
|
if self.no_expr_idx:
|
|
q = r"select * from up where w = ?"
|
|
argv = [dwark]
|
|
else:
|
|
q = r"select * from up where substr(w,1,16)=? and +w=?"
|
|
argv = [dwark[:16], dwark]
|
|
|
|
c2 = cur.execute(q, tuple(argv))
|
|
for _, dtime, dsize, dp_dir, dp_fn, ip, at in c2:
|
|
if dp_dir.startswith("//") or dp_fn.startswith("//"):
|
|
dp_dir, dp_fn = s3dec(dp_dir, dp_fn)
|
|
|
|
dp_abs = djoin(ptop, dp_dir, dp_fn)
|
|
if noclone and dp_abs != inc_ap:
|
|
continue
|
|
|
|
try:
|
|
st = bos.stat(dp_abs)
|
|
if stat.S_ISLNK(st.st_mode):
|
|
# broken symlink
|
|
raise Exception()
|
|
if st.st_size != dsize:
|
|
t = "candidate ignored (db/fs desync): {}, size fs={} db={}, mtime fs={} db={}, file: {}"
|
|
t = t.format(
|
|
dwark, st.st_size, dsize, st.st_mtime, dtime, dp_abs
|
|
)
|
|
self.log(t)
|
|
raise Exception()
|
|
except Exception as ex:
|
|
if n4g:
|
|
st = NULLSTAT
|
|
else:
|
|
lost.append((cur, dp_dir, dp_fn))
|
|
continue
|
|
|
|
j = {
|
|
"name": dp_fn,
|
|
"prel": dp_dir,
|
|
"vtop": vtop,
|
|
"ptop": ptop,
|
|
"sprs": sprs, # dontcare; finished anyways
|
|
"size": dsize,
|
|
"lmod": dtime,
|
|
"host": cj["host"],
|
|
"user": cj["user"],
|
|
"addr": ip,
|
|
"at": at,
|
|
}
|
|
for k in ["life"]:
|
|
if k in cj:
|
|
j[k] = cj[k]
|
|
|
|
# offset of 1st diff in vpaths
|
|
zig = (
|
|
n + 1
|
|
for n, (c1, c2) in enumerate(
|
|
zip(dp_dir + "\r", cj["prel"] + "\n")
|
|
)
|
|
if c1 != c2
|
|
)
|
|
score = (
|
|
(6969 if st.st_dev == dev else 0)
|
|
+ (3210 if dp_dir == cj["prel"] else next(zig))
|
|
+ (1 if dp_fn == cj["name"] else 0)
|
|
)
|
|
alts.append((score, -len(alts), j, cur, dp_dir, dp_fn))
|
|
|
|
job = None
|
|
for dupe in sorted(alts, reverse=True):
|
|
rj = dupe[2]
|
|
orig_ap = djoin(rj["ptop"], rj["prel"], rj["name"])
|
|
if data_ok or inc_ap == orig_ap:
|
|
data_ok = True
|
|
job = rj
|
|
break
|
|
else:
|
|
self.log("asserting contents of %r" % (orig_ap,))
|
|
hashes2, st = self._hashlist_from_file(orig_ap)
|
|
wark2 = up2k_wark_from_hashlist(self.salt, st.st_size, hashes2)
|
|
if dwark != wark2:
|
|
t = "will not dedup (fs index desync): fs=%s, db=%s, file: %r\n%s"
|
|
self.log(t % (wark2, dwark, orig_ap, rj))
|
|
lost.append(dupe[3:])
|
|
continue
|
|
data_ok = True
|
|
job = rj
|
|
break
|
|
|
|
if job:
|
|
if wark in reg:
|
|
del reg[wark]
|
|
job["hash"] = job["need"] = []
|
|
job["done"] = 1
|
|
job["busy"] = {}
|
|
|
|
if lost:
|
|
c2 = None
|
|
for cur, dp_dir, dp_fn in lost:
|
|
t = "forgetting desynced db entry: %r"
|
|
self.log(t % ("/" + vjoin(vjoin(vfs.vpath, dp_dir), dp_fn)))
|
|
self.db_rm(cur, dp_dir, dp_fn, cj["size"])
|
|
if c2 and c2 != cur:
|
|
c2.connection.commit()
|
|
|
|
c2 = cur
|
|
|
|
assert c2 # !rm
|
|
c2.connection.commit()
|
|
|
|
cur = jcur
|
|
ptop = None # use cj or job as appropriate
|
|
|
|
if not job and wark in reg:
|
|
# ensure the files haven't been edited or deleted
|
|
path = ""
|
|
st = None
|
|
rj = reg[wark]
|
|
names = [rj[x] for x in ["name", "tnam"] if x in rj]
|
|
for fn in names:
|
|
path = djoin(rj["ptop"], rj["prel"], fn)
|
|
try:
|
|
st = bos.stat(path)
|
|
if st.st_size > 0 or "done" in rj:
|
|
# upload completed or both present
|
|
break
|
|
except:
|
|
# missing; restart
|
|
if not self.args.nw and not n4g:
|
|
t = "forgetting deleted partial upload at %r"
|
|
self.log(t % (path,))
|
|
del reg[wark]
|
|
break
|
|
|
|
inc_ap = djoin(cj["ptop"], cj["prel"], cj["name"])
|
|
orig_ap = djoin(rj["ptop"], rj["prel"], rj["name"])
|
|
|
|
if self.args.nw or n4g or not st or "done" not in rj:
|
|
pass
|
|
|
|
elif st.st_size != rj["size"]:
|
|
t = "will not dedup (fs index desync): %s, size fs=%d db=%d, mtime fs=%d db=%d, file: %r\n%s"
|
|
t = t % (
|
|
wark,
|
|
st.st_size,
|
|
rj["size"],
|
|
st.st_mtime,
|
|
rj["lmod"],
|
|
path,
|
|
rj,
|
|
)
|
|
self.log(t)
|
|
del reg[wark]
|
|
|
|
elif inc_ap != orig_ap and not data_ok and "done" in reg[wark]:
|
|
self.log("asserting contents of %r" % (orig_ap,))
|
|
hashes2, _ = self._hashlist_from_file(orig_ap)
|
|
wark2 = up2k_wark_from_hashlist(self.salt, st.st_size, hashes2)
|
|
if wark != wark2:
|
|
t = "will not dedup (fs index desync): fs=%s, idx=%s, file: %r\n%s"
|
|
self.log(t % (wark2, wark, orig_ap, rj))
|
|
del reg[wark]
|
|
|
|
if job or wark in reg:
|
|
job = job or reg[wark]
|
|
if (
|
|
job["ptop"] != cj["ptop"]
|
|
or job["prel"] != cj["prel"]
|
|
or job["name"] != cj["name"]
|
|
):
|
|
# file contents match, but not the path
|
|
src = djoin(job["ptop"], job["prel"], job["name"])
|
|
dst = djoin(cj["ptop"], cj["prel"], cj["name"])
|
|
vsrc = djoin(job["vtop"], job["prel"], job["name"])
|
|
vsrc = vsrc.replace("\\", "/") # just for prints anyways
|
|
if "done" not in job:
|
|
self.log("unfinished:\n %r\n %r" % (src, dst))
|
|
err = "partial upload exists at a different location; please resume uploading here instead:\n"
|
|
err += "/" + quotep(vsrc) + " "
|
|
|
|
# registry is size-constrained + can only contain one unique wark;
|
|
# let want_recheck trigger symlink (if still in reg) or reupload
|
|
if cur:
|
|
dupe = (cj["prel"], cj["name"], cj["lmod"])
|
|
try:
|
|
if dupe not in self.dupesched[src]:
|
|
self.dupesched[src].append(dupe)
|
|
except:
|
|
self.dupesched[src] = [dupe]
|
|
|
|
raise Pebkac(422, err)
|
|
|
|
elif "nodupe" in vfs.flags:
|
|
self.log("dupe-reject:\n %r\n %r" % (src, dst))
|
|
err = "upload rejected, file already exists:\n"
|
|
err += "/" + quotep(vsrc) + " "
|
|
raise Pebkac(409, err)
|
|
else:
|
|
# symlink to the client-provided name,
|
|
# returning the previous upload info
|
|
psrc = src + ".PARTIAL"
|
|
if self.args.dotpart:
|
|
m = re.match(r"(.*[\\/])(.*)", psrc)
|
|
if m: # always true but...
|
|
zs1, zs2 = m.groups()
|
|
psrc = zs1 + "." + zs2
|
|
|
|
if (
|
|
src in self.busy_aps
|
|
or psrc in self.busy_aps
|
|
or (wark in reg and "done" not in reg[wark])
|
|
):
|
|
raise Pebkac(
|
|
422, "source file busy; please try again later"
|
|
)
|
|
|
|
job = deepcopy(job)
|
|
job["wark"] = wark
|
|
job["dwrk"] = dwark
|
|
job["at"] = cj.get("at") or now
|
|
zs = "vtop ptop prel name lmod host user addr poke"
|
|
for k in zs.split():
|
|
job[k] = cj.get(k) or ""
|
|
for k in ("life", "replace"):
|
|
if k in cj:
|
|
job[k] = cj[k]
|
|
|
|
pdir = djoin(cj["ptop"], cj["prel"])
|
|
if rand:
|
|
job["name"] = rand_name(
|
|
pdir, cj["name"], vfs.flags["nrand"]
|
|
)
|
|
|
|
dst = djoin(job["ptop"], job["prel"], job["name"])
|
|
xbu = vfs.flags.get("xbu")
|
|
if xbu:
|
|
vp = djoin(job["vtop"], job["prel"], job["name"])
|
|
hr = runhook(
|
|
self.log,
|
|
None,
|
|
self,
|
|
"xbu.up2k.dupe",
|
|
xbu, # type: ignore
|
|
dst,
|
|
vp,
|
|
job["host"],
|
|
job["user"],
|
|
self.vfs.get_perms(job["vtop"], job["user"]),
|
|
job["lmod"],
|
|
job["size"],
|
|
job["addr"],
|
|
job["at"],
|
|
"",
|
|
)
|
|
if not hr:
|
|
t = "upload blocked by xbu server config: %r" % (dst,)
|
|
self.log(t, 1)
|
|
raise Pebkac(403, t)
|
|
if hr.get("reloc"):
|
|
x = pathmod(self.vfs, dst, vp, hr["reloc"])
|
|
if x:
|
|
zvfs = vfs
|
|
pdir, _, job["name"], (vfs, rem) = x
|
|
dst = os.path.join(pdir, job["name"])
|
|
job["vcfg"] = vfs.flags
|
|
job["ptop"] = vfs.realpath
|
|
job["vtop"] = vfs.vpath
|
|
job["prel"] = rem
|
|
if zvfs.vpath != vfs.vpath:
|
|
# print(json.dumps(job, sort_keys=True, indent=4))
|
|
job["hash"] = cj["hash"]
|
|
self.log("xbu reloc1:%d..." % (depth,), 6)
|
|
return self._handle_json(job, depth + 1)
|
|
|
|
job["name"] = self._untaken(pdir, job, now)
|
|
dst = djoin(job["ptop"], job["prel"], job["name"])
|
|
|
|
if not self.args.nw:
|
|
dvf: dict[str, Any] = vfs.flags
|
|
try:
|
|
dvf = self.flags[job["ptop"]]
|
|
self._symlink(src, dst, dvf, lmod=cj["lmod"], rm=True)
|
|
except:
|
|
if bos.path.exists(dst):
|
|
wunlink(self.log, dst, dvf)
|
|
if not n4g:
|
|
raise
|
|
|
|
if cur and not self.args.nw:
|
|
zs = "prel name lmod size ptop vtop wark dwrk host user addr at"
|
|
a = [job[x] for x in zs.split()]
|
|
self.db_add(cur, vfs.flags, *a)
|
|
cur.connection.commit()
|
|
elif wark in reg:
|
|
# checks out, but client may have hopped IPs
|
|
job["addr"] = cj["addr"]
|
|
|
|
if not job:
|
|
ap1 = djoin(cj["ptop"], cj["prel"])
|
|
if rand:
|
|
cj["name"] = rand_name(ap1, cj["name"], vfs.flags["nrand"])
|
|
|
|
if vfs.lim:
|
|
ap2, cj["prel"] = vfs.lim.all(
|
|
cj["addr"],
|
|
cj["prel"],
|
|
cj["size"],
|
|
cj["ptop"],
|
|
ap1,
|
|
self.hub.broker,
|
|
reg,
|
|
"up2k._get_volsize",
|
|
)
|
|
bos.makedirs(ap2)
|
|
vfs.lim.nup(cj["addr"])
|
|
vfs.lim.bup(cj["addr"], cj["size"])
|
|
|
|
job = {
|
|
"wark": wark,
|
|
"dwrk": dwark,
|
|
"t0": now,
|
|
"sprs": sprs,
|
|
"hash": deepcopy(cj["hash"]),
|
|
"need": [],
|
|
"busy": {},
|
|
}
|
|
# client-provided, sanitized by _get_wark: name, size, lmod
|
|
zs = "vtop ptop prel name size lmod host user addr poke"
|
|
for k in zs.split():
|
|
job[k] = cj[k]
|
|
|
|
for k in ["life", "replace"]:
|
|
if k in cj:
|
|
job[k] = cj[k]
|
|
|
|
# one chunk may occur multiple times in a file;
|
|
# filter to unique values for the list of missing chunks
|
|
# (preserve order to reduce disk thrashing)
|
|
lut = set()
|
|
for k in cj["hash"]:
|
|
if k not in lut:
|
|
job["need"].append(k)
|
|
lut.add(k)
|
|
|
|
try:
|
|
ret = self._new_upload(job, vfs, depth)
|
|
if ret:
|
|
return ret # xbu recursed
|
|
except:
|
|
self.registry[job["ptop"]].pop(job["wark"], None)
|
|
raise
|
|
|
|
purl = "{}/{}".format(job["vtop"], job["prel"]).strip("/")
|
|
purl = "/{}/".format(purl) if purl else "/"
|
|
|
|
ret = {
|
|
"name": job["name"],
|
|
"purl": purl,
|
|
"size": job["size"],
|
|
"lmod": job["lmod"],
|
|
"sprs": job.get("sprs", sprs),
|
|
"hash": job["need"],
|
|
"dwrk": dwark,
|
|
"wark": wark,
|
|
}
|
|
|
|
if (
|
|
not ret["hash"]
|
|
and "fk" in vfs.flags
|
|
and not self.args.nw
|
|
and (cj["user"] in vfs.axs.uread or cj["user"] in vfs.axs.upget)
|
|
):
|
|
alg = 2 if "fka" in vfs.flags else 1
|
|
ap = absreal(djoin(job["ptop"], job["prel"], job["name"]))
|
|
ino = 0 if ANYWIN else bos.stat(ap).st_ino
|
|
fk = self.gen_fk(alg, self.args.fk_salt, ap, job["size"], ino)
|
|
ret["fk"] = fk[: vfs.flags["fk"]]
|
|
|
|
if (
|
|
not ret["hash"]
|
|
and cur
|
|
and cj.get("umod")
|
|
and int(cj["lmod"]) != int(job["lmod"])
|
|
and not self.args.nw
|
|
and cj["user"] in vfs.axs.uwrite
|
|
and cj["user"] in vfs.axs.udel
|
|
):
|
|
sql = "update up set mt=? where substr(w,1,16)=? and +rd=? and +fn=?"
|
|
try:
|
|
cur.execute(sql, (cj["lmod"], dwark[:16], job["prel"], job["name"]))
|
|
cur.connection.commit()
|
|
|
|
ap = djoin(job["ptop"], job["prel"], job["name"])
|
|
times = (int(time.time()), int(cj["lmod"]))
|
|
bos.utime(ap, times, False)
|
|
|
|
self.log("touched %r from %d to %d" % (ap, job["lmod"], cj["lmod"]))
|
|
except Exception as ex:
|
|
self.log("umod failed, %r" % (ex,), 3)
|
|
|
|
return ret
|
|
|
|
def _untaken(self, fdir: str, job: dict[str, Any], ts: float) -> str:
|
|
fname = job["name"]
|
|
ip = job["addr"]
|
|
|
|
if self.args.nw:
|
|
return fname
|
|
|
|
fp = djoin(fdir, fname)
|
|
|
|
ow = job.get("replace") and bos.path.exists(fp)
|
|
if ow and "mt" in str(job["replace"]).lower():
|
|
mts = bos.stat(fp).st_mtime
|
|
mtc = job["lmod"]
|
|
if mtc < mts:
|
|
t = "will not overwrite; server %d sec newer than client; %d > %d %r"
|
|
self.log(t % (mts - mtc, mts, mtc, fp))
|
|
ow = False
|
|
|
|
if ow:
|
|
self.log("replacing existing file at %r" % (fp,))
|
|
cur = None
|
|
ptop = job["ptop"]
|
|
vf = self.flags.get(ptop) or {}
|
|
st = bos.stat(fp)
|
|
try:
|
|
vrel = vjoin(job["prel"], fname)
|
|
xlink = bool(vf.get("xlink"))
|
|
cur, wark, _, _, _, _ = self._find_from_vpath(ptop, vrel)
|
|
self._forget_file(ptop, vrel, cur, wark, True, st.st_size, xlink)
|
|
except Exception as ex:
|
|
self.log("skipping replace-relink: %r" % (ex,))
|
|
finally:
|
|
if cur:
|
|
cur.connection.commit()
|
|
|
|
wunlink(self.log, fp, vf)
|
|
|
|
if self.args.plain_ip:
|
|
dip = ip.replace(":", ".")
|
|
else:
|
|
dip = self.hub.iphash.s(ip)
|
|
|
|
suffix = "-%.6f-%s" % (ts, dip)
|
|
f, ret = ren_open(fname, "wb", fdir=fdir, suffix=suffix)
|
|
f.close()
|
|
return ret
|
|
|
|
def _symlink(
|
|
self,
|
|
src: str,
|
|
dst: str,
|
|
flags: dict[str, Any],
|
|
verbose: bool = True,
|
|
rm: bool = False,
|
|
lmod: float = 0,
|
|
fsrc: Optional[str] = None,
|
|
is_mv: bool = False,
|
|
) -> None:
|
|
if src == dst or (fsrc and fsrc == dst):
|
|
t = "symlinking a file to itself?? orig(%s) fsrc(%s) link(%s)"
|
|
raise Exception(t % (src, fsrc, dst))
|
|
|
|
if verbose:
|
|
t = "linking dupe:\n point-to: {0!r}\n link-loc: {1!r}"
|
|
if fsrc:
|
|
t += "\n data-src: {2!r}"
|
|
self.log(t.format(src, dst, fsrc))
|
|
|
|
if self.args.nw:
|
|
return
|
|
|
|
linked = False
|
|
try:
|
|
if not is_mv and not flags.get("dedup"):
|
|
raise Exception("dedup is disabled in config")
|
|
|
|
lsrc = src
|
|
ldst = dst
|
|
fs1 = bos.stat(os.path.dirname(src)).st_dev
|
|
fs2 = bos.stat(os.path.dirname(dst)).st_dev
|
|
if fs1 == 0 or fs2 == 0:
|
|
# py2 on winxp or other unsupported combination
|
|
raise OSError(errno.ENOSYS, "filesystem does not have st_dev")
|
|
elif fs1 == fs2:
|
|
# same fs; make symlink as relative as possible
|
|
spl = r"[\\/]" if WINDOWS else "/"
|
|
nsrc = re.split(spl, src)
|
|
ndst = re.split(spl, dst)
|
|
nc = 0
|
|
for a, b in zip(nsrc, ndst):
|
|
if a != b:
|
|
break
|
|
nc += 1
|
|
if nc > 1:
|
|
zsl = nsrc[nc:]
|
|
hops = len(ndst[nc:]) - 1
|
|
lsrc = "../" * hops + "/".join(zsl)
|
|
|
|
if WINDOWS:
|
|
lsrc = lsrc.replace("/", "\\")
|
|
ldst = ldst.replace("/", "\\")
|
|
|
|
if rm and bos.path.exists(dst):
|
|
wunlink(self.log, dst, flags)
|
|
|
|
try:
|
|
if "hardlink" in flags:
|
|
os.link(fsenc(absreal(src)), fsenc(dst))
|
|
linked = True
|
|
except Exception as ex:
|
|
self.log("cannot hardlink: " + repr(ex))
|
|
if "hardlinkonly" in flags:
|
|
raise Exception("symlink-fallback disabled in cfg")
|
|
|
|
if not linked:
|
|
if ANYWIN:
|
|
Path(ldst).symlink_to(lsrc)
|
|
if not bos.path.exists(dst):
|
|
try:
|
|
wunlink(self.log, dst, flags)
|
|
except:
|
|
pass
|
|
t = "the created symlink [%s] did not resolve to [%s]"
|
|
raise Exception(t % (ldst, lsrc))
|
|
else:
|
|
os.symlink(fsenc(lsrc), fsenc(ldst))
|
|
|
|
linked = True
|
|
except Exception as ex:
|
|
self.log("cannot link; creating copy: " + repr(ex))
|
|
if bos.path.isfile(src):
|
|
csrc = src
|
|
elif fsrc and bos.path.isfile(fsrc):
|
|
csrc = fsrc
|
|
else:
|
|
t = "BUG: no valid sources to link from! orig(%r) fsrc(%r) link(%r)"
|
|
self.log(t, 1)
|
|
raise Exception(t % (src, fsrc, dst))
|
|
shutil.copy2(fsenc(csrc), fsenc(dst))
|
|
|
|
if lmod and (not linked or SYMTIME):
|
|
times = (int(time.time()), int(lmod))
|
|
bos.utime(dst, times, False)
|
|
|
|
def handle_chunks(
|
|
self, ptop: str, wark: str, chashes: list[str]
|
|
) -> tuple[list[str], int, list[list[int]], str, float, int, bool]:
|
|
with self.mutex, self.reg_mutex:
|
|
self.db_act = self.vol_act[ptop] = time.time()
|
|
job = self.registry[ptop].get(wark)
|
|
if not job:
|
|
known = " ".join([x for x in self.registry[ptop].keys()])
|
|
self.log("unknown wark [{}], known: {}".format(wark, known))
|
|
raise Pebkac(400, "unknown wark" + SEESLOG)
|
|
|
|
if "t0c" not in job:
|
|
job["t0c"] = time.time()
|
|
|
|
if len(chashes) > 1 and len(chashes[1]) < 44:
|
|
# first hash is full-length; expand remaining ones
|
|
uniq = []
|
|
lut = set()
|
|
for chash in job["hash"]:
|
|
if chash not in lut:
|
|
uniq.append(chash)
|
|
lut.add(chash)
|
|
try:
|
|
nchunk = uniq.index(chashes[0])
|
|
except:
|
|
raise Pebkac(400, "unknown chunk0 [%s]" % (chashes[0],))
|
|
expanded = [chashes[0]]
|
|
for prefix in chashes[1:]:
|
|
nchunk += 1
|
|
chash = uniq[nchunk]
|
|
if not chash.startswith(prefix):
|
|
t = "next sibling chunk does not start with expected prefix [%s]: [%s]"
|
|
raise Pebkac(400, t % (prefix, chash))
|
|
expanded.append(chash)
|
|
chashes = expanded
|
|
|
|
for chash in chashes:
|
|
if chash not in job["need"]:
|
|
msg = "chash = {} , need:\n".format(chash)
|
|
msg += "\n".join(job["need"])
|
|
self.log(msg)
|
|
t = "already got that (%s) but thanks??"
|
|
if chash not in job["hash"]:
|
|
t = "unknown chunk wtf: %s"
|
|
raise Pebkac(400, t % (chash,))
|
|
|
|
if chash in job["busy"]:
|
|
nh = len(job["hash"])
|
|
idx = job["hash"].index(chash)
|
|
t = "that chunk is already being written to:\n {}\n {} {}/{}\n {}"
|
|
raise Pebkac(400, t.format(wark, chash, idx, nh, job["name"]))
|
|
|
|
assert chash # type: ignore # !rm
|
|
chunksize = up2k_chunksize(job["size"])
|
|
|
|
coffsets = []
|
|
nchunks = []
|
|
for chash in chashes:
|
|
nchunk = [n for n, v in enumerate(job["hash"]) if v == chash]
|
|
if not nchunk:
|
|
raise Pebkac(400, "unknown chunk %s" % (chash,))
|
|
|
|
ofs = [chunksize * x for x in nchunk]
|
|
coffsets.append(ofs)
|
|
nchunks.append(nchunk)
|
|
|
|
for ofs1, ofs2 in zip(coffsets, coffsets[1:]):
|
|
gap = (ofs2[0] - ofs1[0]) - chunksize
|
|
if gap:
|
|
t = "only sibling chunks can be stitched; gap of %d bytes between offsets %d and %d in %s"
|
|
raise Pebkac(400, t % (gap, ofs1[0], ofs2[0], job["name"]))
|
|
|
|
path = djoin(job["ptop"], job["prel"], job["tnam"])
|
|
|
|
if not job["sprs"]:
|
|
cur_sz = bos.path.getsize(path)
|
|
if coffsets[0][0] > cur_sz:
|
|
t = "please upload sequentially using one thread;\nserver filesystem does not support sparse files.\n file: {}\n chunk: {}\n cofs: {}\n flen: {}"
|
|
t = t.format(job["name"], nchunks[0][0], coffsets[0][0], cur_sz)
|
|
raise Pebkac(400, t)
|
|
|
|
job["busy"][chash] = 1
|
|
|
|
job["poke"] = time.time()
|
|
|
|
return chashes, chunksize, coffsets, path, job["lmod"], job["size"], job["sprs"]
|
|
|
|
def fast_confirm_chunks(
|
|
self, ptop: str, wark: str, chashes: list[str]
|
|
) -> tuple[int, str]:
|
|
if not self.mutex.acquire(False):
|
|
return -1, ""
|
|
if not self.reg_mutex.acquire(False):
|
|
self.mutex.release()
|
|
return -1, ""
|
|
try:
|
|
return self._confirm_chunks(ptop, wark, chashes, chashes)
|
|
finally:
|
|
self.reg_mutex.release()
|
|
self.mutex.release()
|
|
|
|
def confirm_chunks(
|
|
self, ptop: str, wark: str, written: list[str], locked: list[str]
|
|
) -> tuple[int, str]:
|
|
with self.mutex, self.reg_mutex:
|
|
return self._confirm_chunks(ptop, wark, written, locked)
|
|
|
|
def _confirm_chunks(
|
|
self, ptop: str, wark: str, written: list[str], locked: list[str]
|
|
) -> tuple[int, str]:
|
|
if True:
|
|
self.db_act = self.vol_act[ptop] = time.time()
|
|
try:
|
|
job = self.registry[ptop][wark]
|
|
pdir = djoin(job["ptop"], job["prel"])
|
|
src = djoin(pdir, job["tnam"])
|
|
dst = djoin(pdir, job["name"])
|
|
except Exception as ex:
|
|
return -2, "confirm_chunk, wark(%r)" % (ex,) # type: ignore
|
|
|
|
for chash in locked:
|
|
job["busy"].pop(chash, None)
|
|
|
|
try:
|
|
for chash in written:
|
|
job["need"].remove(chash)
|
|
except Exception as ex:
|
|
# dead tcp connections can get here by timeout (OK)
|
|
return -2, "confirm_chunk, chash(%s) %r" % (chash, ex) # type: ignore
|
|
|
|
ret = len(job["need"])
|
|
if ret > 0:
|
|
return ret, src
|
|
|
|
if self.args.nw:
|
|
self.regdrop(ptop, wark)
|
|
|
|
return ret, dst
|
|
|
|
def finish_upload(self, ptop: str, wark: str, busy_aps: dict[str, int]) -> None:
|
|
self.busy_aps = busy_aps
|
|
with self.mutex, self.reg_mutex:
|
|
self._finish_upload(ptop, wark)
|
|
|
|
if self.fx_backlog:
|
|
self.do_fx_backlog()
|
|
|
|
def _finish_upload(self, ptop: str, wark: str) -> None:
|
|
"""mutex(main,reg) me"""
|
|
try:
|
|
job = self.registry[ptop][wark]
|
|
pdir = djoin(job["ptop"], job["prel"])
|
|
src = djoin(pdir, job["tnam"])
|
|
dst = djoin(pdir, job["name"])
|
|
except Exception as ex:
|
|
self.log(min_ex(), 1)
|
|
raise Pebkac(500, "finish_upload, wark, %r%s" % (ex, SEESLOG))
|
|
|
|
if job["need"]:
|
|
self.log(min_ex(), 1)
|
|
t = "finish_upload %s with remaining chunks %s%s"
|
|
raise Pebkac(500, t % (wark, job["need"], SEESLOG))
|
|
|
|
upt = job.get("at") or time.time()
|
|
vflags = self.flags[ptop]
|
|
|
|
atomic_move(self.log, src, dst, vflags)
|
|
|
|
times = (int(time.time()), int(job["lmod"]))
|
|
t = "no more chunks, setting times %s (%d) on %r"
|
|
self.log(t % (times, bos.path.getsize(dst), dst))
|
|
try:
|
|
bos.utime(dst, times)
|
|
except:
|
|
self.log("failed to utime (%r, %s)" % (dst, times))
|
|
|
|
zs = "prel name lmod size ptop vtop wark dwrk host user addr"
|
|
z2 = [job[x] for x in zs.split()]
|
|
wake_sr = False
|
|
try:
|
|
flt = job["life"]
|
|
vfs = self.vfs.all_vols[job["vtop"]]
|
|
vlt = vfs.flags["lifetime"]
|
|
if vlt and flt > 1 and flt < vlt:
|
|
upt -= vlt - flt
|
|
wake_sr = True
|
|
t = "using client lifetime; at={:.0f} ({}-{})"
|
|
self.log(t.format(upt, vlt, flt))
|
|
except:
|
|
pass
|
|
|
|
z2.append(upt)
|
|
if self.idx_wark(vflags, *z2):
|
|
del self.registry[ptop][wark]
|
|
else:
|
|
for k in "host tnam busy sprs poke t0c".split():
|
|
del job[k]
|
|
job["t0"] = int(job["t0"])
|
|
job["hash"] = []
|
|
job["done"] = 1
|
|
self.regdrop(ptop, wark)
|
|
|
|
if wake_sr:
|
|
with self.rescan_cond:
|
|
self.rescan_cond.notify_all()
|
|
|
|
dupes = self.dupesched.pop(dst, [])
|
|
if not dupes:
|
|
return
|
|
|
|
cur = self.cur.get(ptop)
|
|
for rd, fn, lmod in dupes:
|
|
d2 = djoin(ptop, rd, fn)
|
|
if os.path.exists(d2):
|
|
continue
|
|
|
|
self._symlink(dst, d2, self.flags[ptop], lmod=lmod)
|
|
if cur:
|
|
self.db_add(cur, vflags, rd, fn, lmod, *z2[3:])
|
|
|
|
if cur:
|
|
cur.connection.commit()
|
|
|
|
def regdrop(self, ptop: str, wark: str) -> None:
|
|
"""mutex(main,reg) me"""
|
|
olds = self.droppable[ptop]
|
|
if wark:
|
|
olds.append(wark)
|
|
|
|
if len(olds) <= self.args.reg_cap:
|
|
return
|
|
|
|
n = len(olds) - int(self.args.reg_cap / 2)
|
|
t = "up2k-registry [{}] has {} droppables; discarding {}"
|
|
self.log(t.format(ptop, len(olds), n))
|
|
for k in olds[:n]:
|
|
self.registry[ptop].pop(k, None)
|
|
self.droppable[ptop] = olds[n:]
|
|
|
|
def idx_wark(
|
|
self,
|
|
vflags: dict[str, Any],
|
|
rd: str,
|
|
fn: str,
|
|
lmod: float,
|
|
sz: int,
|
|
ptop: str,
|
|
vtop: str,
|
|
wark: str,
|
|
dwark: str,
|
|
host: str,
|
|
usr: str,
|
|
ip: str,
|
|
at: float,
|
|
skip_xau: bool = False,
|
|
) -> bool:
|
|
cur = self.cur.get(ptop)
|
|
if not cur:
|
|
return False
|
|
|
|
self.db_act = self.vol_act[ptop] = time.time()
|
|
try:
|
|
self.db_add(
|
|
cur,
|
|
vflags,
|
|
rd,
|
|
fn,
|
|
lmod,
|
|
sz,
|
|
ptop,
|
|
vtop,
|
|
wark,
|
|
dwark,
|
|
host,
|
|
usr,
|
|
ip,
|
|
at,
|
|
skip_xau,
|
|
)
|
|
cur.connection.commit()
|
|
except Exception as ex:
|
|
x = self.register_vpath(ptop, {})
|
|
assert x # !rm
|
|
db_ex_chk(self.log, ex, x[1])
|
|
raise
|
|
|
|
if "e2t" in self.flags[ptop]:
|
|
self.tagq.put((ptop, dwark, rd, fn, sz, ip, at))
|
|
self.n_tagq += 1
|
|
|
|
return True
|
|
|
|
def db_rm(self, db: "sqlite3.Cursor", rd: str, fn: str, sz: int) -> None:
|
|
sql = "delete from up where rd = ? and fn = ?"
|
|
try:
|
|
r = db.execute(sql, (rd, fn))
|
|
except:
|
|
assert self.mem_cur # !rm
|
|
r = db.execute(sql, s3enc(self.mem_cur, rd, fn))
|
|
|
|
if r.rowcount:
|
|
self.volsize[db] -= sz
|
|
self.volnfiles[db] -= 1
|
|
|
|
def db_add(
|
|
self,
|
|
db: "sqlite3.Cursor",
|
|
vflags: dict[str, Any],
|
|
rd: str,
|
|
fn: str,
|
|
ts: float,
|
|
sz: int,
|
|
ptop: str,
|
|
vtop: str,
|
|
wark: str,
|
|
dwark: str,
|
|
host: str,
|
|
usr: str,
|
|
ip: str,
|
|
at: float,
|
|
skip_xau: bool = False,
|
|
) -> None:
|
|
"""mutex(main) me"""
|
|
self.db_rm(db, rd, fn, sz)
|
|
|
|
if not ip:
|
|
db_ip = ""
|
|
else:
|
|
# plugins may expect this to look like an actual IP
|
|
db_ip = "1.1.1.1" if "no_db_ip" in vflags else ip
|
|
|
|
sql = "insert into up values (?,?,?,?,?,?,?)"
|
|
v = (dwark, int(ts), sz, rd, fn, db_ip, int(at or 0))
|
|
try:
|
|
db.execute(sql, v)
|
|
except:
|
|
assert self.mem_cur # !rm
|
|
rd, fn = s3enc(self.mem_cur, rd, fn)
|
|
v = (dwark, int(ts), sz, rd, fn, db_ip, int(at or 0))
|
|
db.execute(sql, v)
|
|
|
|
self.volsize[db] += sz
|
|
self.volnfiles[db] += 1
|
|
|
|
xau = False if skip_xau else vflags.get("xau")
|
|
dst = djoin(ptop, rd, fn)
|
|
if xau:
|
|
hr = runhook(
|
|
self.log,
|
|
None,
|
|
self,
|
|
"xau.up2k",
|
|
xau,
|
|
dst,
|
|
djoin(vtop, rd, fn),
|
|
host,
|
|
usr,
|
|
self.vfs.get_perms(djoin(vtop, rd, fn), usr),
|
|
ts,
|
|
sz,
|
|
ip,
|
|
at or time.time(),
|
|
"",
|
|
)
|
|
if not hr:
|
|
t = "upload blocked by xau server config"
|
|
self.log(t, 1)
|
|
wunlink(self.log, dst, vflags)
|
|
self.registry[ptop].pop(wark, None)
|
|
raise Pebkac(403, t)
|
|
|
|
xiu = vflags.get("xiu")
|
|
if xiu:
|
|
cds: set[int] = set()
|
|
for cmd in xiu:
|
|
m = self.xiu_ptn.search(cmd)
|
|
cds.add(int(m.group(1)) if m else 5)
|
|
|
|
q = "insert into iu values (?,?,?,?)"
|
|
for cd in cds:
|
|
# one for each unique cooldown duration
|
|
try:
|
|
db.execute(q, (cd, dwark[:16], rd, fn))
|
|
except:
|
|
assert self.mem_cur # !rm
|
|
rd, fn = s3enc(self.mem_cur, rd, fn)
|
|
db.execute(q, (cd, dwark[:16], rd, fn))
|
|
|
|
if self.xiu_asleep:
|
|
self.xiu_asleep = False
|
|
with self.rescan_cond:
|
|
self.rescan_cond.notify_all()
|
|
|
|
if rd and sz and fn.lower() in self.args.th_coversd_set:
|
|
# wasteful; db_add will re-index actual covers
|
|
# but that won't catch existing files
|
|
crd, cdn = rd.rsplit("/", 1) if "/" in rd else ("", rd)
|
|
try:
|
|
q = "select fn from cv where rd=? and dn=?"
|
|
db_cv = db.execute(q, (crd, cdn)).fetchone()[0]
|
|
db_lcv = db_cv.lower()
|
|
if db_lcv in self.args.th_coversd_set:
|
|
idx_db = self.args.th_coversd.index(db_lcv)
|
|
idx_fn = self.args.th_coversd.index(fn.lower())
|
|
add_cv = idx_fn < idx_db
|
|
else:
|
|
add_cv = True
|
|
except:
|
|
add_cv = True
|
|
|
|
if add_cv:
|
|
try:
|
|
db.execute("delete from cv where rd=? and dn=?", (crd, cdn))
|
|
db.execute("insert into cv values (?,?,?)", (crd, cdn, fn))
|
|
except:
|
|
pass
|
|
|
|
if "nodirsz" not in vflags:
|
|
try:
|
|
q = "update ds set nf=nf+1, sz=sz+? where rd=?"
|
|
q2 = "insert into ds values(?,?,1)"
|
|
while True:
|
|
if not db.execute(q, (sz, rd)).rowcount:
|
|
db.execute(q2, (rd, sz))
|
|
if not rd:
|
|
break
|
|
rd = rd.rsplit("/", 1)[0] if "/" in rd else ""
|
|
except:
|
|
pass
|
|
|
|
def handle_rm(
|
|
self,
|
|
uname: str,
|
|
ip: str,
|
|
vpaths: list[str],
|
|
lim: list[int],
|
|
rm_up: bool,
|
|
unpost: bool,
|
|
) -> str:
|
|
n_files = 0
|
|
ok = {}
|
|
ng = {}
|
|
for vp in vpaths:
|
|
if lim and lim[0] <= 0:
|
|
self.log("hit delete limit of {} files".format(lim[1]), 3)
|
|
break
|
|
|
|
a, b, c = self._handle_rm(uname, ip, vp, lim, rm_up, unpost)
|
|
n_files += a
|
|
for k in b:
|
|
ok[k] = 1
|
|
for k in c:
|
|
ng[k] = 1
|
|
|
|
ng = {k: 1 for k in ng if k not in ok}
|
|
iok = len(ok)
|
|
ing = len(ng)
|
|
|
|
return "deleted {} files (and {}/{} folders)".format(n_files, iok, iok + ing)
|
|
|
|
def _handle_rm(
|
|
self, uname: str, ip: str, vpath: str, lim: list[int], rm_up: bool, unpost: bool
|
|
) -> tuple[int, list[str], list[str]]:
|
|
self.db_act = time.time()
|
|
partial = ""
|
|
if not unpost:
|
|
permsets = [[True, False, False, True]]
|
|
vn0, rem0 = self.vfs.get(vpath, uname, *permsets[0])
|
|
vn, rem = vn0.get_dbv(rem0)
|
|
else:
|
|
# unpost with missing permissions? verify with db
|
|
permsets = [[False, True]]
|
|
vn0, rem0 = self.vfs.get(vpath, uname, *permsets[0])
|
|
vn, rem = vn0.get_dbv(rem0)
|
|
ptop = vn.realpath
|
|
with self.mutex, self.reg_mutex:
|
|
abrt_cfg = self.flags.get(ptop, {}).get("u2abort", 1)
|
|
addr = (ip or "\n") if abrt_cfg in (1, 2) else ""
|
|
user = ((uname or "\n"), "*") if abrt_cfg in (1, 3) else None
|
|
reg = self.registry.get(ptop, {}) if abrt_cfg else {}
|
|
for wark, job in reg.items():
|
|
if (addr and addr != job["addr"]) or (
|
|
user and job["user"] not in user
|
|
):
|
|
continue
|
|
jrem = djoin(job["prel"], job["name"])
|
|
if ANYWIN:
|
|
jrem = jrem.replace("\\", "/")
|
|
if jrem == rem:
|
|
if job["ptop"] != ptop:
|
|
t = "job.ptop [%s] != vol.ptop [%s] ??"
|
|
raise Exception(t % (job["ptop"], ptop))
|
|
partial = vn.canonical(vjoin(job["prel"], job["tnam"]))
|
|
break
|
|
if partial:
|
|
dip = ip
|
|
dat = time.time()
|
|
else:
|
|
if not self.args.unpost:
|
|
t = "the unpost feature is disabled in server config"
|
|
raise Pebkac(400, t)
|
|
|
|
_, _, _, _, dip, dat = self._find_from_vpath(ptop, rem)
|
|
|
|
t = "you cannot delete this: "
|
|
if not dip:
|
|
t += "file not found"
|
|
elif dip != ip:
|
|
t += "not uploaded by (You)"
|
|
elif dat < time.time() - self.args.unpost:
|
|
t += "uploaded too long ago"
|
|
else:
|
|
t = ""
|
|
|
|
if t:
|
|
raise Pebkac(400, t)
|
|
|
|
ptop = vn.realpath
|
|
atop = vn.canonical(rem, False)
|
|
self.vol_act[ptop] = self.db_act
|
|
adir, fn = os.path.split(atop)
|
|
try:
|
|
st = bos.lstat(atop)
|
|
is_dir = stat.S_ISDIR(st.st_mode)
|
|
except:
|
|
raise Pebkac(400, "file not found on disk (already deleted?)")
|
|
|
|
scandir = not self.args.no_scandir
|
|
if is_dir:
|
|
# note: deletion inside shares would require a rewrite here;
|
|
# shares necessitate get_dbv which is incompatible with walk
|
|
g = vn0.walk("", rem0, [], uname, permsets, 2, scandir, True)
|
|
if unpost:
|
|
raise Pebkac(400, "cannot unpost folders")
|
|
elif stat.S_ISLNK(st.st_mode) or stat.S_ISREG(st.st_mode):
|
|
voldir = vsplit(rem)[0]
|
|
vpath_dir = vsplit(vpath)[0]
|
|
g = [(vn, voldir, vpath_dir, adir, [(fn, 0)], [], {})] # type: ignore
|
|
else:
|
|
self.log("rm: skip type-0%o file %r" % (st.st_mode, atop))
|
|
return 0, [], []
|
|
|
|
xbd = vn.flags.get("xbd")
|
|
xad = vn.flags.get("xad")
|
|
n_files = 0
|
|
for dbv, vrem, _, adir, files, rd, vd in g:
|
|
for fn in [x[0] for x in files]:
|
|
if lim:
|
|
lim[0] -= 1
|
|
if lim[0] < 0:
|
|
self.log("hit delete limit of {} files".format(lim[1]), 3)
|
|
break
|
|
|
|
abspath = djoin(adir, fn)
|
|
st = stl = bos.lstat(abspath)
|
|
if stat.S_ISLNK(st.st_mode):
|
|
try:
|
|
st = bos.stat(abspath)
|
|
except:
|
|
pass
|
|
|
|
volpath = ("%s/%s" % (vrem, fn)).strip("/")
|
|
vpath = ("%s/%s" % (dbv.vpath, volpath)).strip("/")
|
|
self.log("rm %r\n %r" % (vpath, abspath))
|
|
if not unpost:
|
|
# recursion-only sanchk
|
|
_ = dbv.get(volpath, uname, *permsets[0])
|
|
|
|
if xbd:
|
|
if not runhook(
|
|
self.log,
|
|
None,
|
|
self,
|
|
"xbd",
|
|
xbd,
|
|
abspath,
|
|
vpath,
|
|
"",
|
|
uname,
|
|
self.vfs.get_perms(vpath, uname),
|
|
stl.st_mtime,
|
|
st.st_size,
|
|
ip,
|
|
time.time(),
|
|
"",
|
|
):
|
|
t = "delete blocked by xbd server config: %r"
|
|
self.log(t % (abspath,), 1)
|
|
continue
|
|
|
|
n_files += 1
|
|
with self.mutex, self.reg_mutex:
|
|
cur = None
|
|
try:
|
|
ptop = dbv.realpath
|
|
xlink = bool(dbv.flags.get("xlink"))
|
|
cur, wark, _, _, _, _ = self._find_from_vpath(ptop, volpath)
|
|
self._forget_file(
|
|
ptop, volpath, cur, wark, True, st.st_size, xlink
|
|
)
|
|
finally:
|
|
if cur:
|
|
cur.connection.commit()
|
|
|
|
wunlink(self.log, abspath, dbv.flags)
|
|
if partial:
|
|
wunlink(self.log, partial, dbv.flags)
|
|
partial = ""
|
|
if xad:
|
|
runhook(
|
|
self.log,
|
|
None,
|
|
self,
|
|
"xad",
|
|
xad,
|
|
abspath,
|
|
vpath,
|
|
"",
|
|
uname,
|
|
self.vfs.get_perms(vpath, uname),
|
|
stl.st_mtime,
|
|
st.st_size,
|
|
ip,
|
|
time.time(),
|
|
"",
|
|
)
|
|
|
|
if is_dir:
|
|
ok, ng = rmdirs(self.log_func, scandir, True, atop, 1)
|
|
else:
|
|
ok = ng = []
|
|
|
|
if rm_up:
|
|
ok2, ng2 = rmdirs_up(os.path.dirname(atop), ptop)
|
|
else:
|
|
ok2 = ng2 = []
|
|
|
|
return n_files, ok + ok2, ng + ng2
|
|
|
|
def handle_cp(self, uname: str, ip: str, svp: str, dvp: str) -> str:
|
|
if svp == dvp or dvp.startswith(svp + "/"):
|
|
raise Pebkac(400, "cp: cannot copy parent into subfolder")
|
|
|
|
svn, srem = self.vfs.get(svp, uname, True, False)
|
|
dvn, drem = self.vfs.get(dvp, uname, False, True)
|
|
svn_dbv, _ = svn.get_dbv(srem)
|
|
sabs = svn.canonical(srem, False)
|
|
curs: set["sqlite3.Cursor"] = set()
|
|
self.db_act = self.vol_act[svn_dbv.realpath] = time.time()
|
|
|
|
st = bos.stat(sabs)
|
|
if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
|
|
with self.mutex:
|
|
try:
|
|
ret = self._cp_file(uname, ip, svp, dvp, curs)
|
|
finally:
|
|
for v in curs:
|
|
v.connection.commit()
|
|
|
|
return ret
|
|
|
|
if not stat.S_ISDIR(st.st_mode):
|
|
raise Pebkac(400, "cannot copy type-0%o file" % (st.st_mode,))
|
|
|
|
permsets = [[True, False]]
|
|
scandir = not self.args.no_scandir
|
|
|
|
# if user can see dotfiles in target volume, only include
|
|
# dots from source vols where user also has the dot perm
|
|
dots = 1 if uname in dvn.axs.udot else 2
|
|
|
|
# don't use svn_dbv; would skip subvols due to _ls `if not rem:`
|
|
g = svn.walk("", srem, [], uname, permsets, dots, scandir, True)
|
|
with self.mutex:
|
|
try:
|
|
for dbv, vrem, _, atop, files, rd, vd in g:
|
|
for fn in files:
|
|
self.db_act = self.vol_act[dbv.realpath] = time.time()
|
|
svpf = "/".join(x for x in [dbv.vpath, vrem, fn[0]] if x)
|
|
if not svpf.startswith(svp + "/"): # assert
|
|
self.log(min_ex(), 1)
|
|
t = "cp: bug at %r, top %r%s"
|
|
raise Pebkac(500, t % (svpf, svp, SEESLOG))
|
|
|
|
dvpf = dvp + svpf[len(svp) :]
|
|
self._cp_file(uname, ip, svpf, dvpf, curs)
|
|
|
|
for v in curs:
|
|
v.connection.commit()
|
|
curs.clear()
|
|
finally:
|
|
for v in curs:
|
|
v.connection.commit()
|
|
|
|
return "k"
|
|
|
|
def _cp_file(
|
|
self, uname: str, ip: str, svp: str, dvp: str, curs: set["sqlite3.Cursor"]
|
|
) -> str:
|
|
"""mutex(main) me; will mutex(reg)"""
|
|
svn, srem = self.vfs.get(svp, uname, True, False)
|
|
svn_dbv, srem_dbv = svn.get_dbv(srem)
|
|
|
|
dvn, drem = self.vfs.get(dvp, uname, False, True)
|
|
dvn, drem = dvn.get_dbv(drem)
|
|
|
|
sabs = svn.canonical(srem, False)
|
|
dabs = dvn.canonical(drem)
|
|
drd, dfn = vsplit(drem)
|
|
|
|
if bos.path.exists(dabs):
|
|
raise Pebkac(400, "cp2: target file exists")
|
|
|
|
st = stl = bos.lstat(sabs)
|
|
if stat.S_ISLNK(stl.st_mode):
|
|
is_link = True
|
|
try:
|
|
st = bos.stat(sabs)
|
|
except:
|
|
pass # broken symlink; keep as-is
|
|
elif not stat.S_ISREG(st.st_mode):
|
|
self.log("skipping type-0%o file %r" % (st.st_mode, sabs))
|
|
return ""
|
|
else:
|
|
is_link = False
|
|
|
|
ftime = stl.st_mtime
|
|
fsize = st.st_size
|
|
|
|
xbc = svn.flags.get("xbc")
|
|
xac = dvn.flags.get("xac")
|
|
if xbc:
|
|
if not runhook(
|
|
self.log,
|
|
None,
|
|
self,
|
|
"xbc",
|
|
xbc,
|
|
sabs,
|
|
svp,
|
|
"",
|
|
uname,
|
|
self.vfs.get_perms(svp, uname),
|
|
ftime,
|
|
fsize,
|
|
ip,
|
|
time.time(),
|
|
"",
|
|
):
|
|
t = "copy blocked by xbr server config: %r" % (svp,)
|
|
self.log(t, 1)
|
|
raise Pebkac(405, t)
|
|
|
|
bos.makedirs(os.path.dirname(dabs))
|
|
|
|
c1, w, ftime_, fsize_, ip, at = self._find_from_vpath(
|
|
svn_dbv.realpath, srem_dbv
|
|
)
|
|
c2 = self.cur.get(dvn.realpath)
|
|
|
|
if w:
|
|
assert c1 # !rm
|
|
if c2 and c2 != c1:
|
|
self._copy_tags(c1, c2, w)
|
|
|
|
curs.add(c1)
|
|
|
|
if c2:
|
|
self.db_add(
|
|
c2,
|
|
{}, # skip upload hooks
|
|
drd,
|
|
dfn,
|
|
ftime,
|
|
fsize,
|
|
dvn.realpath,
|
|
dvn.vpath,
|
|
w,
|
|
w,
|
|
"",
|
|
"",
|
|
ip or "",
|
|
at or 0,
|
|
)
|
|
curs.add(c2)
|
|
else:
|
|
self.log("not found in src db: %r" % (svp,))
|
|
|
|
try:
|
|
if is_link and st != stl:
|
|
# relink non-broken symlinks to still work after the move,
|
|
# but only resolve 1st level to maintain relativity
|
|
dlink = bos.readlink(sabs)
|
|
dlink = os.path.join(os.path.dirname(sabs), dlink)
|
|
dlink = bos.path.abspath(dlink)
|
|
self._symlink(dlink, dabs, dvn.flags, lmod=ftime)
|
|
else:
|
|
self._symlink(sabs, dabs, dvn.flags, lmod=ftime)
|
|
|
|
except OSError as ex:
|
|
if ex.errno != errno.EXDEV:
|
|
raise
|
|
|
|
self.log("using plain copy (%s):\n %r\n %r" % (ex.strerror, sabs, dabs))
|
|
b1, b2 = fsenc(sabs), fsenc(dabs)
|
|
is_link = os.path.islink(b1) # due to _relink
|
|
try:
|
|
shutil.copy2(b1, b2)
|
|
except:
|
|
try:
|
|
wunlink(self.log, dabs, dvn.flags)
|
|
except:
|
|
pass
|
|
|
|
if not is_link:
|
|
raise
|
|
|
|
# broken symlink? keep it as-is
|
|
try:
|
|
zb = os.readlink(b1)
|
|
os.symlink(zb, b2)
|
|
except:
|
|
wunlink(self.log, dabs, dvn.flags)
|
|
raise
|
|
|
|
if is_link:
|
|
try:
|
|
times = (int(time.time()), int(ftime))
|
|
bos.utime(dabs, times, False)
|
|
except:
|
|
pass
|
|
|
|
if xac:
|
|
runhook(
|
|
self.log,
|
|
None,
|
|
self,
|
|
"xac",
|
|
xac,
|
|
dabs,
|
|
dvp,
|
|
"",
|
|
uname,
|
|
self.vfs.get_perms(dvp, uname),
|
|
ftime,
|
|
fsize,
|
|
ip,
|
|
time.time(),
|
|
"",
|
|
)
|
|
|
|
return "k"
|
|
|
|
def handle_mv(self, uname: str, ip: str, svp: str, dvp: str) -> str:
|
|
if svp == dvp or dvp.startswith(svp + "/"):
|
|
raise Pebkac(400, "mv: cannot move parent into subfolder")
|
|
|
|
svn, srem = self.vfs.get(svp, uname, True, False, True)
|
|
jail, jail_rem = svn.get_dbv(srem)
|
|
sabs = svn.canonical(srem, False)
|
|
curs: set["sqlite3.Cursor"] = set()
|
|
self.db_act = self.vol_act[jail.realpath] = time.time()
|
|
|
|
if not jail_rem:
|
|
raise Pebkac(400, "mv: cannot move a mountpoint")
|
|
|
|
st = bos.lstat(sabs)
|
|
if stat.S_ISREG(st.st_mode) or stat.S_ISLNK(st.st_mode):
|
|
with self.mutex:
|
|
try:
|
|
ret = self._mv_file(uname, ip, svp, dvp, curs)
|
|
finally:
|
|
for v in curs:
|
|
v.connection.commit()
|
|
|
|
return ret
|
|
|
|
if not stat.S_ISDIR(st.st_mode):
|
|
raise Pebkac(400, "cannot move type-0%o file" % (st.st_mode,))
|
|
|
|
permsets = [[True, False, True]]
|
|
scandir = not self.args.no_scandir
|
|
|
|
# following symlinks is too scary
|
|
g = svn.walk("", srem, [], uname, permsets, 2, scandir, True)
|
|
for dbv, vrem, _, atop, files, rd, vd in g:
|
|
if dbv != jail:
|
|
# fail early (prevent partial moves)
|
|
raise Pebkac(400, "mv: source folder contains other volumes")
|
|
|
|
g = svn.walk("", srem, [], uname, permsets, 2, scandir, True)
|
|
with self.mutex:
|
|
try:
|
|
for dbv, vrem, _, atop, files, rd, vd in g:
|
|
if dbv != jail:
|
|
# the actual check (avoid toctou)
|
|
raise Pebkac(400, "mv: source folder contains other volumes")
|
|
|
|
for fn in files:
|
|
self.db_act = self.vol_act[dbv.realpath] = time.time()
|
|
svpf = "/".join(x for x in [dbv.vpath, vrem, fn[0]] if x)
|
|
if not svpf.startswith(svp + "/"): # assert
|
|
self.log(min_ex(), 1)
|
|
t = "mv: bug at %r, top %r%s"
|
|
raise Pebkac(500, t % (svpf, svp, SEESLOG))
|
|
|
|
dvpf = dvp + svpf[len(svp) :]
|
|
self._mv_file(uname, ip, svpf, dvpf, curs)
|
|
|
|
for v in curs:
|
|
v.connection.commit()
|
|
curs.clear()
|
|
finally:
|
|
for v in curs:
|
|
v.connection.commit()
|
|
|
|
rm_ok, rm_ng = rmdirs(self.log_func, scandir, True, sabs, 1)
|
|
|
|
for zsl in (rm_ok, rm_ng):
|
|
for ap in reversed(zsl):
|
|
if not ap.startswith(sabs):
|
|
self.log(min_ex(), 1)
|
|
t = "mv_d: bug at %r, top %r%s"
|
|
raise Pebkac(500, t % (ap, sabs, SEESLOG))
|
|
|
|
rem = ap[len(sabs) :].replace(os.sep, "/").lstrip("/")
|
|
vp = vjoin(dvp, rem)
|
|
try:
|
|
dvn, drem = self.vfs.get(vp, uname, False, True)
|
|
bos.mkdir(dvn.canonical(drem))
|
|
except:
|
|
pass
|
|
|
|
return "k"
|
|
|
|
def _mv_file(
|
|
self, uname: str, ip: str, svp: str, dvp: str, curs: set["sqlite3.Cursor"]
|
|
) -> str:
|
|
"""mutex(main) me; will mutex(reg)"""
|
|
svn, srem = self.vfs.get(svp, uname, True, False, True)
|
|
svn, srem = svn.get_dbv(srem)
|
|
|
|
dvn, drem = self.vfs.get(dvp, uname, False, True)
|
|
dvn, drem = dvn.get_dbv(drem)
|
|
|
|
sabs = svn.canonical(srem, False)
|
|
dabs = dvn.canonical(drem)
|
|
drd, dfn = vsplit(drem)
|
|
|
|
n1 = svp.split("/")[-1]
|
|
n2 = dvp.split("/")[-1]
|
|
if n1.startswith(".") or n2.startswith("."):
|
|
if self.args.no_dot_mv:
|
|
raise Pebkac(400, "moving dotfiles is disabled in server config")
|
|
elif self.args.no_dot_ren and n1 != n2:
|
|
raise Pebkac(400, "renaming dotfiles is disabled in server config")
|
|
|
|
if bos.path.exists(dabs):
|
|
raise Pebkac(400, "mv2: target file exists")
|
|
|
|
is_link = is_dirlink = False
|
|
st = stl = bos.lstat(sabs)
|
|
if stat.S_ISLNK(stl.st_mode):
|
|
is_link = True
|
|
try:
|
|
st = bos.stat(sabs)
|
|
is_dirlink = stat.S_ISDIR(st.st_mode)
|
|
except:
|
|
pass # broken symlink; keep as-is
|
|
|
|
ftime = stl.st_mtime
|
|
fsize = st.st_size
|
|
|
|
xbr = svn.flags.get("xbr")
|
|
xar = dvn.flags.get("xar")
|
|
if xbr:
|
|
if not runhook(
|
|
self.log,
|
|
None,
|
|
self,
|
|
"xbr",
|
|
xbr,
|
|
sabs,
|
|
svp,
|
|
"",
|
|
uname,
|
|
self.vfs.get_perms(svp, uname),
|
|
ftime,
|
|
fsize,
|
|
ip,
|
|
time.time(),
|
|
"",
|
|
):
|
|
t = "move blocked by xbr server config: %r" % (svp,)
|
|
self.log(t, 1)
|
|
raise Pebkac(405, t)
|
|
|
|
is_xvol = svn.realpath != dvn.realpath
|
|
|
|
bos.makedirs(os.path.dirname(dabs))
|
|
|
|
if is_dirlink:
|
|
dlabs = absreal(sabs)
|
|
t = "moving symlink from %r to %r, target %r"
|
|
self.log(t % (sabs, dabs, dlabs))
|
|
mt = bos.path.getmtime(sabs, False)
|
|
wunlink(self.log, sabs, svn.flags)
|
|
self._symlink(dlabs, dabs, dvn.flags, False, lmod=mt)
|
|
|
|
# folders are too scary, schedule rescan of both vols
|
|
self.need_rescan.add(svn.vpath)
|
|
self.need_rescan.add(dvn.vpath)
|
|
with self.rescan_cond:
|
|
self.rescan_cond.notify_all()
|
|
|
|
if xar:
|
|
runhook(
|
|
self.log,
|
|
None,
|
|
self,
|
|
"xar.ln",
|
|
xar,
|
|
dabs,
|
|
dvp,
|
|
"",
|
|
uname,
|
|
self.vfs.get_perms(dvp, uname),
|
|
ftime,
|
|
fsize,
|
|
ip,
|
|
time.time(),
|
|
"",
|
|
)
|
|
|
|
return "k"
|
|
|
|
c1, w, ftime_, fsize_, ip, at = self._find_from_vpath(svn.realpath, srem)
|
|
c2 = self.cur.get(dvn.realpath)
|
|
|
|
has_dupes = False
|
|
if w:
|
|
assert c1 # !rm
|
|
if c2 and c2 != c1:
|
|
self._copy_tags(c1, c2, w)
|
|
|
|
xlink = bool(svn.flags.get("xlink"))
|
|
|
|
with self.reg_mutex:
|
|
has_dupes = self._forget_file(
|
|
svn.realpath, srem, c1, w, is_xvol, fsize_ or fsize, xlink
|
|
)
|
|
|
|
if not is_xvol:
|
|
has_dupes = self._relink(w, svn.realpath, srem, dabs, c1, xlink)
|
|
|
|
curs.add(c1)
|
|
|
|
if c2:
|
|
self.db_add(
|
|
c2,
|
|
{}, # skip upload hooks
|
|
drd,
|
|
dfn,
|
|
ftime,
|
|
fsize,
|
|
dvn.realpath,
|
|
dvn.vpath,
|
|
w,
|
|
w,
|
|
"",
|
|
"",
|
|
ip or "",
|
|
at or 0,
|
|
)
|
|
curs.add(c2)
|
|
else:
|
|
self.log("not found in src db: %r" % (svp,))
|
|
|
|
try:
|
|
if is_xvol and has_dupes:
|
|
raise OSError(errno.EXDEV, "src is symlink")
|
|
|
|
if is_link and st != stl:
|
|
# relink non-broken symlinks to still work after the move,
|
|
# but only resolve 1st level to maintain relativity
|
|
dlink = bos.readlink(sabs)
|
|
dlink = os.path.join(os.path.dirname(sabs), dlink)
|
|
dlink = bos.path.abspath(dlink)
|
|
self._symlink(dlink, dabs, dvn.flags, lmod=ftime, is_mv=True)
|
|
wunlink(self.log, sabs, svn.flags)
|
|
else:
|
|
atomic_move(self.log, sabs, dabs, svn.flags)
|
|
|
|
except OSError as ex:
|
|
if ex.errno != errno.EXDEV:
|
|
raise
|
|
|
|
self.log("using copy+delete (%s):\n %r\n %r" % (ex.strerror, sabs, dabs))
|
|
b1, b2 = fsenc(sabs), fsenc(dabs)
|
|
is_link = os.path.islink(b1) # due to _relink
|
|
try:
|
|
shutil.copy2(b1, b2)
|
|
except:
|
|
try:
|
|
wunlink(self.log, dabs, dvn.flags)
|
|
except:
|
|
pass
|
|
|
|
if not is_link:
|
|
raise
|
|
|
|
# broken symlink? keep it as-is
|
|
try:
|
|
zb = os.readlink(b1)
|
|
os.symlink(zb, b2)
|
|
except:
|
|
wunlink(self.log, dabs, dvn.flags)
|
|
raise
|
|
|
|
if is_link:
|
|
try:
|
|
times = (int(time.time()), int(ftime))
|
|
bos.utime(dabs, times, False)
|
|
except:
|
|
pass
|
|
|
|
wunlink(self.log, sabs, svn.flags)
|
|
|
|
if xar:
|
|
runhook(
|
|
self.log,
|
|
None,
|
|
self,
|
|
"xar.mv",
|
|
xar,
|
|
dabs,
|
|
dvp,
|
|
"",
|
|
uname,
|
|
self.vfs.get_perms(dvp, uname),
|
|
ftime,
|
|
fsize,
|
|
ip,
|
|
time.time(),
|
|
"",
|
|
)
|
|
|
|
return "k"
|
|
|
|
def _copy_tags(
|
|
self, csrc: "sqlite3.Cursor", cdst: "sqlite3.Cursor", wark: str
|
|
) -> None:
|
|
"""copy all tags for wark from src-db to dst-db"""
|
|
w = wark[:16]
|
|
|
|
if cdst.execute("select * from mt where w=? limit 1", (w,)).fetchone():
|
|
return # existing tags in dest db
|
|
|
|
for _, k, v in csrc.execute("select * from mt where w=?", (w,)):
|
|
cdst.execute("insert into mt values(?,?,?)", (w, k, v))
|
|
|
|
def _find_from_vpath(
|
|
self, ptop: str, vrem: str
|
|
) -> tuple[
|
|
Optional["sqlite3.Cursor"],
|
|
Optional[str],
|
|
Optional[int],
|
|
Optional[int],
|
|
str,
|
|
Optional[int],
|
|
]:
|
|
cur = self.cur.get(ptop)
|
|
if not cur:
|
|
return None, None, None, None, "", None
|
|
|
|
rd, fn = vsplit(vrem)
|
|
q = "select w, mt, sz, ip, at from up where rd=? and fn=? limit 1"
|
|
try:
|
|
c = cur.execute(q, (rd, fn))
|
|
except:
|
|
assert self.mem_cur # !rm
|
|
c = cur.execute(q, s3enc(self.mem_cur, rd, fn))
|
|
|
|
hit = c.fetchone()
|
|
if hit:
|
|
wark, ftime, fsize, ip, at = hit
|
|
return cur, wark, ftime, fsize, ip, at
|
|
return cur, None, None, None, "", None
|
|
|
|
def _forget_file(
|
|
self,
|
|
ptop: str,
|
|
vrem: str,
|
|
cur: Optional["sqlite3.Cursor"],
|
|
wark: Optional[str],
|
|
drop_tags: bool,
|
|
sz: int,
|
|
xlink: bool,
|
|
) -> bool:
|
|
"""
|
|
mutex(main,reg) me
|
|
forgets file in db, fixes symlinks, does not delete
|
|
"""
|
|
srd, sfn = vsplit(vrem)
|
|
has_dupes = False
|
|
self.log("forgetting %r" % (vrem,))
|
|
if wark and cur:
|
|
self.log("found {} in db".format(wark))
|
|
if drop_tags:
|
|
if self._relink(wark, ptop, vrem, "", cur, xlink):
|
|
has_dupes = True
|
|
drop_tags = False
|
|
|
|
if drop_tags:
|
|
q = "delete from mt where w=?"
|
|
cur.execute(q, (wark[:16],))
|
|
|
|
self.db_rm(cur, srd, sfn, sz)
|
|
|
|
reg = self.registry.get(ptop)
|
|
if reg:
|
|
vdir = vsplit(vrem)[0]
|
|
wark = wark or next(
|
|
(
|
|
x
|
|
for x, y in reg.items()
|
|
if sfn in [y["name"], y.get("tnam")] and y["prel"] == vdir
|
|
),
|
|
"",
|
|
)
|
|
job = reg.get(wark) if wark else None
|
|
if job:
|
|
if job["need"]:
|
|
t = "forgetting partial upload {} ({})"
|
|
p = self._vis_job_progress(job)
|
|
self.log(t.format(wark, p))
|
|
|
|
src = djoin(ptop, vrem)
|
|
zi = len(self.dupesched.pop(src, []))
|
|
if zi:
|
|
t = "...and forgetting %d links in dupesched"
|
|
self.log(t % (zi,))
|
|
|
|
assert wark
|
|
del reg[wark]
|
|
|
|
return has_dupes
|
|
|
|
def _relink(
|
|
self,
|
|
wark: str,
|
|
sptop: str,
|
|
srem: str,
|
|
dabs: str,
|
|
vcur: Optional["sqlite3.Cursor"],
|
|
xlink: bool,
|
|
) -> int:
|
|
"""
|
|
update symlinks from file at svn/srem to dabs (rename),
|
|
or to first remaining full if no dabs (delete)
|
|
"""
|
|
dupes = []
|
|
sabs = djoin(sptop, srem)
|
|
|
|
if self.no_expr_idx:
|
|
q = r"select rd, fn from up where w = ?"
|
|
argv = (wark,)
|
|
else:
|
|
q = r"select rd, fn from up where substr(w,1,16)=? and +w=?"
|
|
argv = (wark[:16], wark)
|
|
|
|
for ptop, cur in self.cur.items():
|
|
if not xlink and cur and cur != vcur:
|
|
continue
|
|
for rd, fn in cur.execute(q, argv):
|
|
if rd.startswith("//") or fn.startswith("//"):
|
|
rd, fn = s3dec(rd, fn)
|
|
|
|
dvrem = vjoin(rd, fn).strip("/")
|
|
if ptop != sptop or srem != dvrem:
|
|
dupes.append([ptop, dvrem])
|
|
self.log("found %s dupe: %r %r" % (wark, ptop, dvrem))
|
|
|
|
if not dupes:
|
|
return 0
|
|
|
|
full: dict[str, tuple[str, str]] = {}
|
|
links: dict[str, tuple[str, str]] = {}
|
|
for ptop, vp in dupes:
|
|
ap = djoin(ptop, vp)
|
|
try:
|
|
d = links if bos.path.islink(ap) else full
|
|
d[ap] = (ptop, vp)
|
|
except:
|
|
self.log("relink: not found: %r" % (ap,))
|
|
|
|
# self.log("full:\n" + "\n".join(" {:90}: {}".format(*x) for x in full.items()))
|
|
# self.log("links:\n" + "\n".join(" {:90}: {}".format(*x) for x in links.items()))
|
|
if not dabs and not full and links:
|
|
# deleting final remaining full copy; swap it with a symlink
|
|
slabs = list(sorted(links.keys()))[0]
|
|
ptop, rem = links.pop(slabs)
|
|
self.log("linkswap %r and %r" % (sabs, slabs))
|
|
mt = bos.path.getmtime(slabs, False)
|
|
flags = self.flags.get(ptop) or {}
|
|
atomic_move(self.log, sabs, slabs, flags)
|
|
bos.utime(slabs, (int(time.time()), int(mt)), False)
|
|
self._symlink(slabs, sabs, flags, False, is_mv=True)
|
|
full[slabs] = (ptop, rem)
|
|
sabs = slabs
|
|
|
|
if not dabs:
|
|
dabs = list(sorted(full.keys()))[0]
|
|
|
|
for alink, parts in links.items():
|
|
lmod = 0.0
|
|
try:
|
|
faulty = False
|
|
ldst = alink
|
|
try:
|
|
for n in range(40): # MAXSYMLINKS
|
|
zs = bos.readlink(ldst)
|
|
ldst = os.path.join(os.path.dirname(ldst), zs)
|
|
ldst = bos.path.abspath(ldst)
|
|
if not bos.path.islink(ldst):
|
|
break
|
|
|
|
if ldst == sabs:
|
|
t = "relink because level %d would break:"
|
|
self.log(t % (n,), 6)
|
|
faulty = True
|
|
except Exception as ex:
|
|
self.log("relink because walk failed: %s; %r" % (ex, ex), 3)
|
|
faulty = True
|
|
|
|
zs = absreal(alink)
|
|
if ldst != zs:
|
|
t = "relink because computed != actual destination:\n %r\n %r"
|
|
self.log(t % (ldst, zs), 3)
|
|
ldst = zs
|
|
faulty = True
|
|
|
|
if bos.path.islink(ldst):
|
|
raise Exception("broken symlink: %s" % (alink,))
|
|
|
|
if alink != sabs and ldst != sabs and not faulty:
|
|
continue # original symlink OK; leave it be
|
|
|
|
except Exception as ex:
|
|
t = "relink because symlink verification failed: %s; %r"
|
|
self.log(t % (ex, ex), 3)
|
|
|
|
self.log("relinking %r to %r" % (alink, dabs))
|
|
flags = self.flags.get(parts[0]) or {}
|
|
try:
|
|
lmod = bos.path.getmtime(alink, False)
|
|
wunlink(self.log, alink, flags)
|
|
except:
|
|
pass
|
|
|
|
# this creates a link pointing from dabs to alink; alink may
|
|
# not exist yet, which becomes problematic if the symlinking
|
|
# fails and it has to fall back on hardlinking/copying files
|
|
# (for example a volume with symlinked dupes but no --dedup);
|
|
# fsrc=sabs is then a source that currently resolves to copy
|
|
|
|
self._symlink(
|
|
dabs, alink, flags, False, lmod=lmod or 0, fsrc=sabs, is_mv=True
|
|
)
|
|
|
|
return len(full) + len(links)
|
|
|
|
def _get_wark(self, cj: dict[str, Any]) -> str:
|
|
if len(cj["name"]) > 1024 or len(cj["hash"]) > 512 * 1024: # 16TiB
|
|
raise Pebkac(400, "name or numchunks not according to spec")
|
|
|
|
for k in cj["hash"]:
|
|
if not self.r_hash.match(k):
|
|
raise Pebkac(
|
|
400, "at least one hash is not according to spec: {}".format(k)
|
|
)
|
|
|
|
# try to use client-provided timestamp, don't care if it fails somehow
|
|
try:
|
|
cj["lmod"] = int(cj["lmod"])
|
|
except:
|
|
cj["lmod"] = int(time.time())
|
|
|
|
if cj["hash"]:
|
|
wark = up2k_wark_from_hashlist(self.salt, cj["size"], cj["hash"])
|
|
else:
|
|
wark = up2k_wark_from_metadata(
|
|
self.salt, cj["size"], cj["lmod"], cj["prel"], cj["name"]
|
|
)
|
|
|
|
return wark
|
|
|
|
def _hashlist_from_file(
|
|
self, path: str, prefix: str = ""
|
|
) -> tuple[list[str], os.stat_result]:
|
|
st = bos.stat(path)
|
|
fsz = st.st_size
|
|
csz = up2k_chunksize(fsz)
|
|
ret = []
|
|
suffix = " MB, {}".format(path)
|
|
with open(fsenc(path), "rb", self.args.iobuf) as f:
|
|
if self.mth and fsz >= 1024 * 512:
|
|
tlt = self.mth.hash(f, fsz, csz, self.pp, prefix, suffix)
|
|
ret = [x[0] for x in tlt]
|
|
fsz = 0
|
|
|
|
while fsz > 0:
|
|
# same as `hash_at` except for `imutex` / bufsz
|
|
if self.stop:
|
|
return [], st
|
|
|
|
if self.pp:
|
|
mb = fsz // (1024 * 1024)
|
|
self.pp.msg = prefix + str(mb) + suffix
|
|
|
|
hashobj = hashlib.sha512()
|
|
rem = min(csz, fsz)
|
|
fsz -= rem
|
|
while rem > 0:
|
|
buf = f.read(min(rem, 64 * 1024))
|
|
if not buf:
|
|
raise Exception("EOF at " + str(f.tell()))
|
|
|
|
hashobj.update(buf)
|
|
rem -= len(buf)
|
|
|
|
digest = hashobj.digest()[:33]
|
|
ret.append(ub64enc(digest).decode("ascii"))
|
|
|
|
return ret, st
|
|
|
|
def _new_upload(self, job: dict[str, Any], vfs: VFS, depth: int) -> dict[str, str]:
|
|
pdir = djoin(job["ptop"], job["prel"])
|
|
if not job["size"]:
|
|
try:
|
|
inf = bos.stat(djoin(pdir, job["name"]))
|
|
if stat.S_ISREG(inf.st_mode):
|
|
job["lmod"] = inf.st_size
|
|
return {}
|
|
except:
|
|
pass
|
|
|
|
vf = self.flags[job["ptop"]]
|
|
xbu = vf.get("xbu")
|
|
ap_chk = djoin(pdir, job["name"])
|
|
vp_chk = djoin(job["vtop"], job["prel"], job["name"])
|
|
if xbu:
|
|
hr = runhook(
|
|
self.log,
|
|
None,
|
|
self,
|
|
"xbu.up2k",
|
|
xbu,
|
|
ap_chk,
|
|
vp_chk,
|
|
job["host"],
|
|
job["user"],
|
|
self.vfs.get_perms(vp_chk, job["user"]),
|
|
job["lmod"],
|
|
job["size"],
|
|
job["addr"],
|
|
job["t0"],
|
|
"",
|
|
)
|
|
if not hr:
|
|
t = "upload blocked by xbu server config: %r" % (vp_chk,)
|
|
self.log(t, 1)
|
|
raise Pebkac(403, t)
|
|
if hr.get("reloc"):
|
|
x = pathmod(self.vfs, ap_chk, vp_chk, hr["reloc"])
|
|
if x:
|
|
zvfs = vfs
|
|
pdir, _, job["name"], (vfs, rem) = x
|
|
job["vcfg"] = vf = vfs.flags
|
|
job["ptop"] = vfs.realpath
|
|
job["vtop"] = vfs.vpath
|
|
job["prel"] = rem
|
|
if zvfs.vpath != vfs.vpath:
|
|
self.log("xbu reloc2:%d..." % (depth,), 6)
|
|
return self._handle_json(job, depth + 1)
|
|
|
|
job["name"] = self._untaken(pdir, job, job["t0"])
|
|
self.registry[job["ptop"]][job["wark"]] = job
|
|
|
|
tnam = job["name"] + ".PARTIAL"
|
|
if self.args.dotpart:
|
|
tnam = "." + tnam
|
|
|
|
if self.args.nw:
|
|
job["tnam"] = tnam
|
|
if not job["hash"]:
|
|
del self.registry[job["ptop"]][job["wark"]]
|
|
return {}
|
|
|
|
if self.args.plain_ip:
|
|
dip = job["addr"].replace(":", ".")
|
|
else:
|
|
dip = self.hub.iphash.s(job["addr"])
|
|
|
|
suffix = "-%.6f-%s" % (job["t0"], dip)
|
|
f, job["tnam"] = ren_open(tnam, "wb", fdir=pdir, suffix=suffix)
|
|
try:
|
|
abspath = djoin(pdir, job["tnam"])
|
|
sprs = job["sprs"]
|
|
sz = job["size"]
|
|
relabel = False
|
|
if (
|
|
ANYWIN
|
|
and sprs
|
|
and self.args.sparse
|
|
and self.args.sparse * 1024 * 1024 <= sz
|
|
):
|
|
try:
|
|
sp.check_call(["fsutil", "sparse", "setflag", abspath])
|
|
except:
|
|
self.log("could not sparse %r" % (abspath,), 3)
|
|
relabel = True
|
|
sprs = False
|
|
|
|
if not ANYWIN and sprs and sz > 1024 * 1024:
|
|
fs = self.fstab.get(pdir)
|
|
if fs == "ok":
|
|
pass
|
|
elif "nosparse" in vf:
|
|
t = "volflag 'nosparse' is preventing creation of sparse files for uploads to [%s]"
|
|
self.log(t % (job["ptop"],))
|
|
relabel = True
|
|
sprs = False
|
|
elif "sparse" in vf:
|
|
t = "volflag 'sparse' is forcing creation of sparse files for uploads to [%s]"
|
|
self.log(t % (job["ptop"],))
|
|
relabel = True
|
|
else:
|
|
relabel = True
|
|
f.seek(1024 * 1024 - 1)
|
|
f.write(b"e")
|
|
f.flush()
|
|
try:
|
|
nblk = bos.stat(abspath).st_blocks
|
|
sprs = nblk < 2048
|
|
except:
|
|
sprs = False
|
|
|
|
if relabel:
|
|
t = "sparse files {} on {} filesystem at {}"
|
|
nv = "ok" if sprs else "ng"
|
|
self.log(t.format(nv, self.fstab.get(pdir), pdir))
|
|
self.fstab.relabel(pdir, nv)
|
|
job["sprs"] = sprs
|
|
|
|
if job["hash"] and sprs:
|
|
f.seek(sz - 1)
|
|
f.write(b"e")
|
|
finally:
|
|
f.close()
|
|
|
|
if not job["hash"]:
|
|
self._finish_upload(job["ptop"], job["wark"])
|
|
|
|
return {}
|
|
|
|
def _snapshot(self) -> None:
|
|
slp = self.args.snap_wri
|
|
if not slp or self.args.no_snap:
|
|
return
|
|
|
|
while True:
|
|
time.sleep(slp)
|
|
if self.pp:
|
|
slp = 5
|
|
else:
|
|
slp = self.args.snap_wri
|
|
self.do_snapshot()
|
|
|
|
def do_snapshot(self) -> None:
|
|
with self.mutex, self.reg_mutex:
|
|
for k, reg in self.registry.items():
|
|
self._snap_reg(k, reg)
|
|
|
|
def _snap_reg(self, ptop: str, reg: dict[str, dict[str, Any]]) -> None:
|
|
now = time.time()
|
|
histpath = self.vfs.histtab.get(ptop)
|
|
if not histpath:
|
|
return
|
|
|
|
idrop = self.args.snap_drop * 60
|
|
rm = [x for x in reg.values() if x["need"] and now - x["poke"] >= idrop]
|
|
|
|
if self.args.nw:
|
|
lost = []
|
|
else:
|
|
lost = [
|
|
x
|
|
for x in reg.values()
|
|
if x["need"]
|
|
and not bos.path.exists(djoin(x["ptop"], x["prel"], x["name"]))
|
|
]
|
|
|
|
if rm or lost:
|
|
t = "dropping {} abandoned, {} deleted uploads in {}"
|
|
t = t.format(len(rm), len(lost), ptop)
|
|
rm.extend(lost)
|
|
vis = [self._vis_job_progress(x) for x in rm]
|
|
self.log("\n".join([t] + vis))
|
|
for job in rm:
|
|
del reg[job["wark"]]
|
|
try:
|
|
# remove the filename reservation
|
|
path = djoin(job["ptop"], job["prel"], job["name"])
|
|
if bos.path.getsize(path) == 0:
|
|
bos.unlink(path)
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
if len(job["hash"]) == len(job["need"]):
|
|
# PARTIAL is empty, delete that too
|
|
path = djoin(job["ptop"], job["prel"], job["tnam"])
|
|
bos.unlink(path)
|
|
except:
|
|
pass
|
|
|
|
if self.args.nw or self.args.no_snap:
|
|
return
|
|
|
|
path = os.path.join(histpath, "up2k.snap")
|
|
if not reg:
|
|
if ptop not in self.snap_prev or self.snap_prev[ptop] is not None:
|
|
self.snap_prev[ptop] = None
|
|
if bos.path.exists(path):
|
|
bos.unlink(path)
|
|
return
|
|
|
|
newest = float(
|
|
max(x["t0"] if "done" in x else x["poke"] for _, x in reg.items())
|
|
if reg
|
|
else 0
|
|
)
|
|
etag = (len(reg), newest)
|
|
if etag == self.snap_prev.get(ptop):
|
|
return
|
|
|
|
if bos.makedirs(histpath):
|
|
hidedir(histpath)
|
|
|
|
path2 = "{}.{}".format(path, os.getpid())
|
|
body = {"droppable": self.droppable[ptop], "registry": reg}
|
|
j = json.dumps(body, sort_keys=True, separators=(",\n", ": ")).encode("utf-8")
|
|
# j = re.sub(r'"(need|hash)": \[\],\n', "", j) # bytes=slow, utf8=hungry
|
|
j = j.replace(b'"need": [],\n', b"") # surprisingly optimal
|
|
j = j.replace(b'"hash": [],\n', b"")
|
|
with gzip.GzipFile(path2, "wb") as f:
|
|
f.write(j)
|
|
|
|
atomic_move(self.log, path2, path, VF_CAREFUL)
|
|
|
|
self.log("snap: %s |%d| %.2fs" % (path, len(reg), time.time() - now))
|
|
self.snap_prev[ptop] = etag
|
|
|
|
def _tagger(self) -> None:
|
|
with self.mutex:
|
|
self.n_tagq += 1
|
|
|
|
assert self.mtag
|
|
while True:
|
|
with self.mutex:
|
|
self.n_tagq -= 1
|
|
|
|
ptop, wark, rd, fn, sz, ip, at = self.tagq.get()
|
|
if "e2t" not in self.flags[ptop]:
|
|
continue
|
|
|
|
# self.log("\n " + repr([ptop, rd, fn]))
|
|
abspath = djoin(ptop, rd, fn)
|
|
try:
|
|
tags = self.mtag.get(abspath) if sz else {}
|
|
ntags1 = len(tags)
|
|
parsers = self._get_parsers(ptop, tags, abspath)
|
|
if self.args.mtag_vv:
|
|
t = "parsers({}): {}\n{} {} tags: {}".format(
|
|
ptop, list(parsers.keys()), ntags1, self.mtag.backend, tags
|
|
)
|
|
self.log(t)
|
|
|
|
if parsers:
|
|
tags["up_ip"] = ip
|
|
tags["up_at"] = at
|
|
tags.update(self.mtag.get_bin(parsers, abspath, tags))
|
|
except Exception as ex:
|
|
self._log_tag_err("", abspath, ex)
|
|
continue
|
|
|
|
with self.mutex:
|
|
cur = self.cur[ptop]
|
|
if not cur:
|
|
self.log("no cursor to write tags with??", c=1)
|
|
continue
|
|
|
|
# TODO is undef if vol 404 on startup
|
|
entags = self.entags.get(ptop)
|
|
if not entags:
|
|
self.log("no entags okay.jpg", c=3)
|
|
continue
|
|
|
|
self._tag_file(cur, entags, wark, abspath, tags)
|
|
cur.connection.commit()
|
|
|
|
self.log("tagged %r (%d+%d)" % (abspath, ntags1, len(tags) - ntags1))
|
|
|
|
def _hasher(self) -> None:
|
|
with self.hashq_mutex:
|
|
self.n_hashq += 1
|
|
|
|
while True:
|
|
with self.hashq_mutex:
|
|
self.n_hashq -= 1
|
|
# self.log("hashq {}".format(self.n_hashq))
|
|
|
|
task = self.hashq.get()
|
|
if len(task) != 9:
|
|
raise Exception("invalid hash task")
|
|
|
|
try:
|
|
if not self._hash_t(task) and self.stop:
|
|
return
|
|
except Exception as ex:
|
|
self.log("failed to hash %r: %s" % (task, ex), 1)
|
|
|
|
def _hash_t(
|
|
self, task: tuple[str, str, dict[str, Any], str, str, str, float, str, bool]
|
|
) -> bool:
|
|
ptop, vtop, flags, rd, fn, ip, at, usr, skip_xau = task
|
|
# self.log("hashq {} pop {}/{}/{}".format(self.n_hashq, ptop, rd, fn))
|
|
with self.mutex, self.reg_mutex:
|
|
if not self.register_vpath(ptop, flags):
|
|
return True
|
|
|
|
abspath = djoin(ptop, rd, fn)
|
|
self.log("hashing %r" % (abspath,))
|
|
inf = bos.stat(abspath)
|
|
if not inf.st_size:
|
|
wark = up2k_wark_from_metadata(
|
|
self.salt, inf.st_size, int(inf.st_mtime), rd, fn
|
|
)
|
|
else:
|
|
hashes, _ = self._hashlist_from_file(abspath)
|
|
if not hashes:
|
|
return False
|
|
|
|
wark = up2k_wark_from_hashlist(self.salt, inf.st_size, hashes)
|
|
|
|
with self.mutex, self.reg_mutex:
|
|
self.idx_wark(
|
|
self.flags[ptop],
|
|
rd,
|
|
fn,
|
|
inf.st_mtime,
|
|
inf.st_size,
|
|
ptop,
|
|
vtop,
|
|
wark,
|
|
wark,
|
|
"",
|
|
usr,
|
|
ip,
|
|
at,
|
|
skip_xau,
|
|
)
|
|
|
|
if at and time.time() - at > 30:
|
|
with self.rescan_cond:
|
|
self.rescan_cond.notify_all()
|
|
|
|
if self.fx_backlog:
|
|
self.do_fx_backlog()
|
|
|
|
return True
|
|
|
|
def hash_file(
|
|
self,
|
|
ptop: str,
|
|
vtop: str,
|
|
flags: dict[str, Any],
|
|
rd: str,
|
|
fn: str,
|
|
ip: str,
|
|
at: float,
|
|
usr: str,
|
|
skip_xau: bool = False,
|
|
) -> None:
|
|
if "e2d" not in flags:
|
|
return
|
|
|
|
if self.n_hashq > 1024:
|
|
t = "%d files in hashq; taking a nap"
|
|
self.log(t % (self.n_hashq,), 6)
|
|
|
|
for _ in range(self.n_hashq // 1024):
|
|
time.sleep(0.1)
|
|
if self.n_hashq < 1024:
|
|
break
|
|
|
|
zt = (ptop, vtop, flags, rd, fn, ip, at, usr, skip_xau)
|
|
with self.hashq_mutex:
|
|
self.hashq.put(zt)
|
|
self.n_hashq += 1
|
|
|
|
def do_fx_backlog(self):
|
|
with self.mutex, self.reg_mutex:
|
|
todo = self.fx_backlog
|
|
self.fx_backlog = []
|
|
for act, hr, req_vp in todo:
|
|
self.hook_fx(act, hr, req_vp)
|
|
|
|
def hook_fx(self, act: str, hr: dict[str, str], req_vp: str) -> None:
|
|
bad = [k for k in hr if k != "vp"]
|
|
if bad:
|
|
t = "got unsupported key in %s from hook: %s"
|
|
raise Exception(t % (act, bad))
|
|
|
|
for fvp in hr.get("vp") or []:
|
|
# expect vpath including filename; either absolute
|
|
# or relative to the client's vpath (request url)
|
|
if fvp.startswith("/"):
|
|
fvp, fn = vsplit(fvp[1:])
|
|
fvp = "/" + fvp
|
|
else:
|
|
fvp, fn = vsplit(fvp)
|
|
|
|
x = pathmod(self.vfs, "", req_vp, {"vp": fvp, "fn": fn})
|
|
if not x:
|
|
t = "hook_fx(%s): failed to resolve %r based on %r"
|
|
self.log(t % (act, fvp, req_vp))
|
|
continue
|
|
|
|
ap, rd, fn, (vn, rem) = x
|
|
vp = vjoin(rd, fn)
|
|
if not vp:
|
|
raise Exception("hook_fx: blank vp from pathmod")
|
|
|
|
if act == "idx":
|
|
rd = rd[len(vn.vpath) :].strip("/")
|
|
self.hash_file(
|
|
vn.realpath, vn.vpath, vn.flags, rd, fn, "", time.time(), "", True
|
|
)
|
|
|
|
if act == "del":
|
|
self._handle_rm(LEELOO_DALLAS, "", vp, [], False, False)
|
|
|
|
def shutdown(self) -> None:
|
|
self.stop = True
|
|
|
|
if self.mth:
|
|
self.mth.stop = True
|
|
|
|
# in case we're killed early
|
|
for x in list(self.spools):
|
|
self._unspool(x)
|
|
|
|
if not self.args.no_snap:
|
|
self.log("writing snapshot")
|
|
self.do_snapshot()
|
|
|
|
t0 = time.time()
|
|
while self.pp:
|
|
time.sleep(0.1)
|
|
if time.time() - t0 >= 1:
|
|
break
|
|
|
|
# if there is time
|
|
for x in list(self.spools):
|
|
self._unspool(x)
|
|
|
|
for cur in self.cur.values():
|
|
db = cur.connection
|
|
try:
|
|
db.interrupt()
|
|
except:
|
|
pass
|
|
|
|
cur.close()
|
|
db.close()
|
|
|
|
if self.mem_cur:
|
|
db = self.mem_cur.connection
|
|
self.mem_cur.close()
|
|
db.close()
|
|
|
|
self.registry = {}
|
|
|
|
|
|
def up2k_chunksize(filesize: int) -> int:
|
|
chunksize = 1024 * 1024
|
|
stepsize = 512 * 1024
|
|
while True:
|
|
for mul in [1, 2]:
|
|
nchunks = math.ceil(filesize * 1.0 / chunksize)
|
|
if nchunks <= 256 or (chunksize >= 32 * 1024 * 1024 and nchunks <= 4096):
|
|
return chunksize
|
|
|
|
chunksize += stepsize
|
|
stepsize *= mul
|
|
|
|
|
|
def up2k_wark_from_hashlist(salt: str, filesize: int, hashes: list[str]) -> str:
|
|
"""server-reproducible file identifier, independent of name or location"""
|
|
values = [salt, str(filesize)] + hashes
|
|
vstr = "\n".join(values)
|
|
|
|
wark = hashlib.sha512(vstr.encode("utf-8")).digest()[:33]
|
|
return ub64enc(wark).decode("ascii")
|
|
|
|
|
|
def up2k_wark_from_metadata(salt: str, sz: int, lastmod: int, rd: str, fn: str) -> str:
|
|
ret = sfsenc("%s\n%d\n%d\n%s\n%s" % (salt, lastmod, sz, rd, fn))
|
|
ret = ub64enc(hashlib.sha512(ret).digest())
|
|
return ("#%s" % (ret.decode("ascii"),))[:44]
|