mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
sqlite3 as up2k db + build index on boot + rproxy ip fix
This commit is contained in:
parent
de724a1ff3
commit
7bd2b9c23a
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
|
@ -12,6 +12,8 @@
|
||||||
//"-nw",
|
//"-nw",
|
||||||
"-ed",
|
"-ed",
|
||||||
"-emp",
|
"-emp",
|
||||||
|
"-e2d",
|
||||||
|
"-e2s",
|
||||||
"-a",
|
"-a",
|
||||||
"ed:wark",
|
"ed:wark",
|
||||||
"-v",
|
"-v",
|
||||||
|
|
|
@ -144,8 +144,7 @@ roughly sorted by priority
|
||||||
* `os.copy_file_range` for up2k cloning
|
* `os.copy_file_range` for up2k cloning
|
||||||
* support pillow-simd
|
* support pillow-simd
|
||||||
* cache sha512 chunks on client
|
* cache sha512 chunks on client
|
||||||
* ~~symlink existing files on upload~~
|
* persist unfinished up2k uploads too
|
||||||
* ok at runtime, up2k db still not persisted
|
|
||||||
* comment field
|
* comment field
|
||||||
* ~~look into android thumbnail cache file format~~ bad idea
|
* ~~look into android thumbnail cache file format~~ bad idea
|
||||||
* figure out the deal with pixel3a not being connectable as hotspot
|
* figure out the deal with pixel3a not being connectable as hotspot
|
||||||
|
|
|
@ -105,17 +105,22 @@ def main():
|
||||||
epilog=dedent(
|
epilog=dedent(
|
||||||
"""
|
"""
|
||||||
-a takes username:password,
|
-a takes username:password,
|
||||||
-v takes src:dst:permset:permset:... where "permset" is
|
-v takes src:dst:permset:permset:cflag:cflag:...
|
||||||
accesslevel followed by username (no separator)
|
where "permset" is accesslevel followed by username (no separator)
|
||||||
|
and "cflag" is config flags to set on this volume
|
||||||
|
|
||||||
|
list of cflags:
|
||||||
|
cnodupe rejects existing files (instead of symlinking them)
|
||||||
|
|
||||||
example:\033[35m
|
example:\033[35m
|
||||||
-a ed:hunter2 -v .::r:aed -v ../inc:dump:w:aed \033[36m
|
-a ed:hunter2 -v .::r:aed -v ../inc:dump:w:aed:cnodupe \033[36m
|
||||||
mount current directory at "/" with
|
mount current directory at "/" with
|
||||||
* r (read-only) for everyone
|
* r (read-only) for everyone
|
||||||
* a (read+write) for ed
|
* a (read+write) for ed
|
||||||
mount ../inc at "/dump" with
|
mount ../inc at "/dump" with
|
||||||
* w (write-only) for everyone
|
* w (write-only) for everyone
|
||||||
* a (read+write) for ed \033[0m
|
* a (read+write) for ed
|
||||||
|
* reject duplicate files \033[0m
|
||||||
|
|
||||||
if no accounts or volumes are configured,
|
if no accounts or volumes are configured,
|
||||||
current folder will be read/write for everyone
|
current folder will be read/write for everyone
|
||||||
|
@ -125,6 +130,7 @@ def main():
|
||||||
"""
|
"""
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
# fmt: off
|
||||||
ap.add_argument("-c", metavar="PATH", type=str, action="append", help="add config file")
|
ap.add_argument("-c", metavar="PATH", type=str, action="append", help="add config file")
|
||||||
ap.add_argument("-i", metavar="IP", type=str, default="0.0.0.0", help="ip to bind")
|
ap.add_argument("-i", metavar="IP", type=str, default="0.0.0.0", help="ip to bind")
|
||||||
ap.add_argument("-p", metavar="PORT", type=int, default=3923, help="port to bind")
|
ap.add_argument("-p", metavar="PORT", type=int, default=3923, help="port to bind")
|
||||||
|
@ -135,12 +141,15 @@ def main():
|
||||||
ap.add_argument("-q", action="store_true", help="quiet")
|
ap.add_argument("-q", action="store_true", help="quiet")
|
||||||
ap.add_argument("-ed", action="store_true", help="enable ?dots")
|
ap.add_argument("-ed", action="store_true", help="enable ?dots")
|
||||||
ap.add_argument("-emp", action="store_true", help="enable markdown plugins")
|
ap.add_argument("-emp", action="store_true", help="enable markdown plugins")
|
||||||
|
ap.add_argument("-e2d", action="store_true", help="enable up2k database")
|
||||||
|
ap.add_argument("-e2s", action="store_true", help="enable up2k db-scanner")
|
||||||
ap.add_argument("-mcr", metavar="SEC", type=int, default=60, help="md-editor mod-chk rate")
|
ap.add_argument("-mcr", metavar="SEC", type=int, default=60, help="md-editor mod-chk rate")
|
||||||
ap.add_argument("-nw", action="store_true", help="disable writes (benchmark)")
|
ap.add_argument("-nw", action="store_true", help="disable writes (benchmark)")
|
||||||
ap.add_argument("-nih", action="store_true", help="no info hostname")
|
ap.add_argument("-nih", action="store_true", help="no info hostname")
|
||||||
ap.add_argument("-nid", action="store_true", help="no info disk-usage")
|
ap.add_argument("-nid", action="store_true", help="no info disk-usage")
|
||||||
ap.add_argument("--no-sendfile", action="store_true", help="disable sendfile")
|
ap.add_argument("--no-sendfile", action="store_true", help="disable sendfile")
|
||||||
al = ap.parse_args()
|
al = ap.parse_args()
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
SvcHub(al).run()
|
SvcHub(al).run()
|
||||||
|
|
||||||
|
|
|
@ -258,6 +258,7 @@ class AuthSrv(object):
|
||||||
with open(cfg_fn, "rb") as f:
|
with open(cfg_fn, "rb") as f:
|
||||||
self._parse_config_file(f, user, mread, mwrite, mflags, mount)
|
self._parse_config_file(f, user, mread, mwrite, mflags, mount)
|
||||||
|
|
||||||
|
self.all_writable = []
|
||||||
if not mount:
|
if not mount:
|
||||||
# -h says our defaults are CWD at root and read/write for everyone
|
# -h says our defaults are CWD at root and read/write for everyone
|
||||||
vfs = VFS(os.path.abspath("."), "", ["*"], ["*"])
|
vfs = VFS(os.path.abspath("."), "", ["*"], ["*"])
|
||||||
|
@ -280,6 +281,11 @@ class AuthSrv(object):
|
||||||
v.uread = mread[dst]
|
v.uread = mread[dst]
|
||||||
v.uwrite = mwrite[dst]
|
v.uwrite = mwrite[dst]
|
||||||
v.flags = mflags[dst]
|
v.flags = mflags[dst]
|
||||||
|
if v.uwrite:
|
||||||
|
self.all_writable.append(v)
|
||||||
|
|
||||||
|
if vfs.uwrite and vfs not in self.all_writable:
|
||||||
|
self.all_writable.append(vfs)
|
||||||
|
|
||||||
missing_users = {}
|
missing_users = {}
|
||||||
for d in [mread, mwrite]:
|
for d in [mread, mwrite]:
|
||||||
|
|
|
@ -28,6 +28,7 @@ class HttpCli(object):
|
||||||
self.conn = conn
|
self.conn = conn
|
||||||
self.s = conn.s
|
self.s = conn.s
|
||||||
self.sr = conn.sr
|
self.sr = conn.sr
|
||||||
|
self.ip = conn.addr[0]
|
||||||
self.addr = conn.addr
|
self.addr = conn.addr
|
||||||
self.args = conn.args
|
self.args = conn.args
|
||||||
self.auth = conn.auth
|
self.auth = conn.auth
|
||||||
|
@ -42,7 +43,7 @@ class HttpCli(object):
|
||||||
self.log_func(self.log_src, msg)
|
self.log_func(self.log_src, msg)
|
||||||
|
|
||||||
def _check_nonfatal(self, ex):
|
def _check_nonfatal(self, ex):
|
||||||
return ex.code in [404]
|
return ex.code < 400 or ex.code == 404
|
||||||
|
|
||||||
def _assert_safe_rem(self, rem):
|
def _assert_safe_rem(self, rem):
|
||||||
# sanity check to prevent any disasters
|
# sanity check to prevent any disasters
|
||||||
|
@ -85,7 +86,8 @@ class HttpCli(object):
|
||||||
|
|
||||||
v = self.headers.get("x-forwarded-for", None)
|
v = self.headers.get("x-forwarded-for", None)
|
||||||
if v is not None and self.conn.addr[0] in ["127.0.0.1", "::1"]:
|
if v is not None and self.conn.addr[0] in ["127.0.0.1", "::1"]:
|
||||||
self.log_src = self.conn.set_rproxy(v.split(",")[0])
|
self.ip = v.split(",")[0]
|
||||||
|
self.log_src = self.conn.set_rproxy(self.ip)
|
||||||
|
|
||||||
self.uname = "*"
|
self.uname = "*"
|
||||||
if "cookie" in self.headers:
|
if "cookie" in self.headers:
|
||||||
|
@ -305,7 +307,7 @@ class HttpCli(object):
|
||||||
vfs, rem = self.conn.auth.vfs.get(self.vpath, self.uname, False, True)
|
vfs, rem = self.conn.auth.vfs.get(self.vpath, self.uname, False, True)
|
||||||
fdir = os.path.join(vfs.realpath, rem)
|
fdir = os.path.join(vfs.realpath, rem)
|
||||||
|
|
||||||
addr = self.conn.addr[0].replace(":", ".")
|
addr = self.ip.replace(":", ".")
|
||||||
fn = "put-{:.6f}-{}.bin".format(time.time(), addr)
|
fn = "put-{:.6f}-{}.bin".format(time.time(), addr)
|
||||||
path = os.path.join(fdir, fn)
|
path = os.path.join(fdir, fn)
|
||||||
|
|
||||||
|
@ -384,9 +386,10 @@ class HttpCli(object):
|
||||||
|
|
||||||
vfs, rem = self.conn.auth.vfs.get(self.vpath, self.uname, False, True)
|
vfs, rem = self.conn.auth.vfs.get(self.vpath, self.uname, False, True)
|
||||||
|
|
||||||
body["vdir"] = self.vpath
|
body["vtop"] = vfs.vpath
|
||||||
body["rdir"] = os.path.join(vfs.realpath, rem)
|
body["ptop"] = vfs.realpath
|
||||||
body["addr"] = self.addr[0]
|
body["prel"] = rem
|
||||||
|
body["addr"] = self.ip
|
||||||
body["flag"] = vfs.flags
|
body["flag"] = vfs.flags
|
||||||
|
|
||||||
x = self.conn.hsrv.broker.put(True, "up2k.handle_json", body)
|
x = self.conn.hsrv.broker.put(True, "up2k.handle_json", body)
|
||||||
|
@ -409,7 +412,10 @@ class HttpCli(object):
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise Pebkac(400, "need hash and wark headers for binary POST")
|
raise Pebkac(400, "need hash and wark headers for binary POST")
|
||||||
|
|
||||||
x = self.conn.hsrv.broker.put(True, "up2k.handle_chunk", wark, chash)
|
vfs, _ = self.conn.auth.vfs.get(self.vpath, self.uname, False, True)
|
||||||
|
ptop = vfs.realpath
|
||||||
|
|
||||||
|
x = self.conn.hsrv.broker.put(True, "up2k.handle_chunk", ptop, wark, chash)
|
||||||
response = x.get()
|
response = x.get()
|
||||||
chunksize, cstart, path, lastmod = response
|
chunksize, cstart, path, lastmod = response
|
||||||
|
|
||||||
|
@ -454,7 +460,7 @@ class HttpCli(object):
|
||||||
|
|
||||||
self.log("clone {} done".format(cstart[0]))
|
self.log("clone {} done".format(cstart[0]))
|
||||||
|
|
||||||
x = self.conn.hsrv.broker.put(True, "up2k.confirm_chunk", wark, chash)
|
x = self.conn.hsrv.broker.put(True, "up2k.confirm_chunk", ptop, wark, chash)
|
||||||
num_left = x.get()
|
num_left = x.get()
|
||||||
|
|
||||||
if not WINDOWS and num_left == 0:
|
if not WINDOWS and num_left == 0:
|
||||||
|
@ -576,7 +582,7 @@ class HttpCli(object):
|
||||||
if not os.path.isdir(fsenc(fdir)):
|
if not os.path.isdir(fsenc(fdir)):
|
||||||
raise Pebkac(404, "that folder does not exist")
|
raise Pebkac(404, "that folder does not exist")
|
||||||
|
|
||||||
suffix = ".{:.6f}-{}".format(time.time(), self.addr[0])
|
suffix = ".{:.6f}-{}".format(time.time(), self.ip)
|
||||||
open_args = {"fdir": fdir, "suffix": suffix}
|
open_args = {"fdir": fdir, "suffix": suffix}
|
||||||
else:
|
else:
|
||||||
open_args = {}
|
open_args = {}
|
||||||
|
@ -638,7 +644,7 @@ class HttpCli(object):
|
||||||
"\n".join(
|
"\n".join(
|
||||||
unicode(x)
|
unicode(x)
|
||||||
for x in [
|
for x in [
|
||||||
":".join(unicode(x) for x in self.addr),
|
":".join(unicode(x) for x in [self.ip, self.addr[1]]),
|
||||||
msg.rstrip(),
|
msg.rstrip(),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -895,7 +901,7 @@ class HttpCli(object):
|
||||||
open_func = open
|
open_func = open
|
||||||
# 512 kB is optimal for huge files, use 64k
|
# 512 kB is optimal for huge files, use 64k
|
||||||
open_args = [fsenc(fs_path), "rb", 64 * 1024]
|
open_args = [fsenc(fs_path), "rb", 64 * 1024]
|
||||||
if hasattr(os, 'sendfile'):
|
if hasattr(os, "sendfile"):
|
||||||
use_sendfile = not self.args.no_sendfile
|
use_sendfile = not self.args.no_sendfile
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -1021,6 +1027,9 @@ class HttpCli(object):
|
||||||
if abspath.endswith(".md") and "raw" not in self.uparam:
|
if abspath.endswith(".md") and "raw" not in self.uparam:
|
||||||
return self.tx_md(abspath)
|
return self.tx_md(abspath)
|
||||||
|
|
||||||
|
if abspath.endswith("{0}.hist{0}up2k.db".format(os.sep)):
|
||||||
|
raise Pebkac(403)
|
||||||
|
|
||||||
return self.tx_file(abspath)
|
return self.tx_file(abspath)
|
||||||
|
|
||||||
fsroot, vfs_ls, vfs_virt = vn.ls(rem, self.uname)
|
fsroot, vfs_ls, vfs_virt = vn.ls(rem, self.uname)
|
||||||
|
|
|
@ -65,6 +65,7 @@ class HttpConn(object):
|
||||||
color = 34
|
color = 34
|
||||||
self.rproxy = ip
|
self.rproxy = ip
|
||||||
|
|
||||||
|
self.ip = ip
|
||||||
self.log_src = "{} \033[{}m{}".format(ip, color, self.addr[1]).ljust(26)
|
self.log_src = "{} \033[{}m{}".format(ip, color, self.addr[1]).ljust(26)
|
||||||
return self.log_src
|
return self.log_src
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ from datetime import datetime, timedelta
|
||||||
import calendar
|
import calendar
|
||||||
|
|
||||||
from .__init__ import PY2, WINDOWS, MACOS, VT100
|
from .__init__ import PY2, WINDOWS, MACOS, VT100
|
||||||
|
from .authsrv import AuthSrv
|
||||||
from .tcpsrv import TcpSrv
|
from .tcpsrv import TcpSrv
|
||||||
from .up2k import Up2k
|
from .up2k import Up2k
|
||||||
from .util import mp
|
from .util import mp
|
||||||
|
@ -38,6 +39,10 @@ class SvcHub(object):
|
||||||
self.tcpsrv = TcpSrv(self)
|
self.tcpsrv = TcpSrv(self)
|
||||||
self.up2k = Up2k(self)
|
self.up2k = Up2k(self)
|
||||||
|
|
||||||
|
if self.args.e2d and self.args.e2s:
|
||||||
|
auth = AuthSrv(self.args, self.log)
|
||||||
|
self.up2k.build_indexes(auth.all_writable)
|
||||||
|
|
||||||
# decide which worker impl to use
|
# decide which worker impl to use
|
||||||
if self.check_mp_enable():
|
if self.check_mp_enable():
|
||||||
from .broker_mp import BrokerMp as Broker
|
from .broker_mp import BrokerMp as Broker
|
||||||
|
|
|
@ -6,6 +6,7 @@ import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import math
|
import math
|
||||||
|
import stat
|
||||||
import shutil
|
import shutil
|
||||||
import base64
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
|
@ -13,7 +14,15 @@ import threading
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
from .__init__ import WINDOWS
|
from .__init__ import WINDOWS
|
||||||
from .util import Pebkac, Queue, fsenc, sanitize_fn, ren_open
|
from .util import Pebkac, Queue, fsdec, fsenc, sanitize_fn, ren_open
|
||||||
|
|
||||||
|
HAVE_SQLITE3 = False
|
||||||
|
try:
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
HAVE_SQLITE3 = True
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Up2k(object):
|
class Up2k(object):
|
||||||
|
@ -22,20 +31,21 @@ class Up2k(object):
|
||||||
* documentation
|
* documentation
|
||||||
* registry persistence
|
* registry persistence
|
||||||
* ~/.config flatfiles for active jobs
|
* ~/.config flatfiles for active jobs
|
||||||
* wark->path database for finished uploads
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, broker):
|
def __init__(self, broker):
|
||||||
self.broker = broker
|
self.broker = broker
|
||||||
self.args = broker.args
|
self.args = broker.args
|
||||||
self.log = broker.log
|
self.log = broker.log
|
||||||
|
self.persist = self.args.e2d
|
||||||
|
|
||||||
# config
|
# config
|
||||||
self.salt = "hunter2" # TODO: config
|
self.salt = "hunter2" # TODO: config
|
||||||
|
|
||||||
# state
|
# state
|
||||||
self.registry = {}
|
|
||||||
self.mutex = threading.Lock()
|
self.mutex = threading.Lock()
|
||||||
|
self.registry = {}
|
||||||
|
self.db = {}
|
||||||
|
|
||||||
if WINDOWS:
|
if WINDOWS:
|
||||||
# usually fails to set lastmod too quickly
|
# usually fails to set lastmod too quickly
|
||||||
|
@ -47,57 +57,234 @@ class Up2k(object):
|
||||||
# static
|
# static
|
||||||
self.r_hash = re.compile("^[0-9a-zA-Z_-]{43}$")
|
self.r_hash = re.compile("^[0-9a-zA-Z_-]{43}$")
|
||||||
|
|
||||||
|
if self.persist and not HAVE_SQLITE3:
|
||||||
|
m = "could not initialize sqlite3, will use in-memory registry only"
|
||||||
|
self.log("up2k", m)
|
||||||
|
|
||||||
|
def register_vpath(self, ptop):
|
||||||
|
with self.mutex:
|
||||||
|
if ptop in self.registry:
|
||||||
|
return None
|
||||||
|
|
||||||
|
self.registry[ptop] = {}
|
||||||
|
if not self.persist or not HAVE_SQLITE3:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.mkdir(os.path.join(ptop, ".hist"))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
db_path = os.path.join(ptop, ".hist", "up2k.db")
|
||||||
|
if ptop in self.db:
|
||||||
|
# self.db[ptop].close()
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
db = self._open_db(db_path)
|
||||||
|
self.db[ptop] = db
|
||||||
|
return db
|
||||||
|
except Exception as ex:
|
||||||
|
m = "failed to open [{}]: {}".format(ptop, repr(ex))
|
||||||
|
self.log("up2k", m)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def build_indexes(self, writeables):
|
||||||
|
tops = [d.realpath for d in writeables]
|
||||||
|
for top in tops:
|
||||||
|
db = self.register_vpath(top)
|
||||||
|
if db:
|
||||||
|
# can be symlink so don't `and d.startswith(top)``
|
||||||
|
excl = set([d for d in tops if d != top])
|
||||||
|
self._build_dir([db, 0], top, excl, top)
|
||||||
|
self._drop_lost(db, top)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
def _build_dir(self, dbw, top, excl, cdir):
|
||||||
|
histdir = os.path.join(top, ".hist")
|
||||||
|
for inode in [fsdec(x) for x in os.listdir(fsenc(cdir))]:
|
||||||
|
abspath = os.path.join(cdir, inode)
|
||||||
|
inf = os.stat(fsenc(abspath))
|
||||||
|
if stat.S_ISDIR(inf.st_mode):
|
||||||
|
if abspath in excl or abspath == histdir:
|
||||||
|
continue
|
||||||
|
self.log("up2k", "dir: {}".format(abspath))
|
||||||
|
self._build_dir(dbw, top, excl, abspath)
|
||||||
|
else:
|
||||||
|
# self.log("up2k", "file: {}".format(abspath))
|
||||||
|
rp = abspath[len(top) :].replace("\\", "/").strip("/")
|
||||||
|
c = dbw[0].execute("select * from up where rp = ?", (rp,))
|
||||||
|
in_db = list(c.fetchall())
|
||||||
|
if in_db:
|
||||||
|
_, dts, dsz, _ = in_db[0]
|
||||||
|
if len(in_db) > 1:
|
||||||
|
m = "WARN: multiple entries: [{}] => [{}] ({})"
|
||||||
|
self.log("up2k", m.format(top, rp, len(in_db)))
|
||||||
|
dts = -1
|
||||||
|
|
||||||
|
if dts == inf.st_mtime and dsz == inf.st_size:
|
||||||
|
continue
|
||||||
|
|
||||||
|
m = "reindex [{}] => [{}] ({}/{}) ({}/{})".format(
|
||||||
|
top, rp, dts, inf.st_mtime, dsz, inf.st_size
|
||||||
|
)
|
||||||
|
self.log("up2k", m)
|
||||||
|
self.db_rm(dbw[0], rp)
|
||||||
|
dbw[1] += 1
|
||||||
|
in_db = None
|
||||||
|
|
||||||
|
self.log("up2k", "file: {}".format(abspath))
|
||||||
|
hashes = self._hashlist_from_file(abspath)
|
||||||
|
wark = self._wark_from_hashlist(inf.st_size, hashes)
|
||||||
|
self.db_add(dbw[0], wark, rp, inf.st_mtime, inf.st_size)
|
||||||
|
dbw[1] += 1
|
||||||
|
if dbw[1] > 1024:
|
||||||
|
dbw[0].commit()
|
||||||
|
dbw[1] = 0
|
||||||
|
|
||||||
|
def _drop_lost(self, db, top):
|
||||||
|
rm = []
|
||||||
|
c = db.execute("select * from up")
|
||||||
|
for dwark, dts, dsz, drp in c:
|
||||||
|
abspath = os.path.join(top, drp)
|
||||||
|
if not os.path.exists(abspath):
|
||||||
|
rm.append(drp)
|
||||||
|
|
||||||
|
if not rm:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.log("up2k", "forgetting {} deleted files".format(len(rm)))
|
||||||
|
for rp in rm:
|
||||||
|
self.db_rm(db, rp)
|
||||||
|
|
||||||
|
def _open_db(self, db_path):
|
||||||
|
conn = sqlite3.connect(db_path, check_same_thread=False)
|
||||||
|
try:
|
||||||
|
c = conn.execute(r"select * from kv where k = 'sver'")
|
||||||
|
rows = c.fetchall()
|
||||||
|
if rows:
|
||||||
|
ver = rows[0][1]
|
||||||
|
else:
|
||||||
|
self.log("up2k", "WARN: no sver in kv, DB corrupt?")
|
||||||
|
ver = "unknown"
|
||||||
|
|
||||||
|
if ver == "1":
|
||||||
|
try:
|
||||||
|
nfiles = next(conn.execute("select count(w) from up"))[0]
|
||||||
|
self.log("up2k", "found DB at {} |{}|".format(db_path, nfiles))
|
||||||
|
return conn
|
||||||
|
except Exception as ex:
|
||||||
|
m = "WARN: could not list files, DB corrupt?\n " + repr(ex)
|
||||||
|
self.log("up2k", m)
|
||||||
|
|
||||||
|
m = "REPLACING unsupported DB (v.{}) at {}".format(ver, db_path)
|
||||||
|
self.log("up2k", m)
|
||||||
|
conn.close()
|
||||||
|
os.unlink(db_path)
|
||||||
|
conn = sqlite3.connect(db_path, check_same_thread=False)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# sqlite is variable-width only, no point in using char/nchar/varchar
|
||||||
|
for cmd in [
|
||||||
|
r"create table kv (k text, v text)",
|
||||||
|
r"create table up (w text, mt int, sz int, rp text)",
|
||||||
|
r"insert into kv values ('sver', '1')",
|
||||||
|
r"create index up_w on up(w)",
|
||||||
|
]:
|
||||||
|
conn.execute(cmd)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
self.log("up2k", "created DB at {}".format(db_path))
|
||||||
|
return conn
|
||||||
|
|
||||||
def handle_json(self, cj):
|
def handle_json(self, cj):
|
||||||
|
self.register_vpath(cj["ptop"])
|
||||||
cj["name"] = sanitize_fn(cj["name"])
|
cj["name"] = sanitize_fn(cj["name"])
|
||||||
wark = self._get_wark(cj)
|
wark = self._get_wark(cj)
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
job = None
|
||||||
with self.mutex:
|
with self.mutex:
|
||||||
# TODO use registry persistence here to symlink any matching wark
|
db = self.db.get(cj["ptop"], None)
|
||||||
if wark in self.registry:
|
reg = self.registry[cj["ptop"]]
|
||||||
job = self.registry[wark]
|
if wark not in reg and db:
|
||||||
if job["rdir"] != cj["rdir"] or job["name"] != cj["name"]:
|
cur = db.execute(r"select * from up where w = ?", (wark,))
|
||||||
src = os.path.join(job["rdir"], job["name"])
|
for _, dtime, dsize, dp_rel in cur:
|
||||||
dst = os.path.join(cj["rdir"], cj["name"])
|
dp_abs = os.path.join(cj["ptop"], dp_rel).replace("\\", "/")
|
||||||
|
# relying on path.exists to return false on broken symlinks
|
||||||
|
if os.path.exists(dp_abs):
|
||||||
|
try:
|
||||||
|
prel, name = dp_rel.rsplit("/", 1)
|
||||||
|
except:
|
||||||
|
prel = ""
|
||||||
|
name = dp_rel
|
||||||
|
|
||||||
|
job = {
|
||||||
|
"name": name,
|
||||||
|
"prel": prel,
|
||||||
|
"vtop": cj["vtop"],
|
||||||
|
"ptop": cj["ptop"],
|
||||||
|
"flag": cj["flag"],
|
||||||
|
"size": dsize,
|
||||||
|
"lmod": dtime,
|
||||||
|
"hash": [],
|
||||||
|
"need": [],
|
||||||
|
}
|
||||||
|
break
|
||||||
|
|
||||||
|
if job or wark in reg:
|
||||||
|
job = job or reg[wark]
|
||||||
|
if job["prel"] != cj["prel"] or job["name"] != cj["name"]:
|
||||||
|
src = os.path.join(job["ptop"], job["prel"], job["name"])
|
||||||
|
dst = os.path.join(cj["ptop"], cj["prel"], cj["name"])
|
||||||
|
vsrc = os.path.join(job["vtop"], job["prel"], job["name"])
|
||||||
|
vsrc = vsrc.replace("\\", "/") # just for prints anyways
|
||||||
if job["need"]:
|
if job["need"]:
|
||||||
self.log("up2k", "unfinished:\n {0}\n {1}".format(src, dst))
|
self.log("up2k", "unfinished:\n {0}\n {1}".format(src, dst))
|
||||||
err = "partial upload exists at a different location; please resume uploading here instead:\n{0}{1} ".format(
|
err = "partial upload exists at a different location; please resume uploading here instead:\n"
|
||||||
job["vdir"], job["name"]
|
err += vsrc + " "
|
||||||
)
|
|
||||||
raise Pebkac(400, err)
|
raise Pebkac(400, err)
|
||||||
elif "nodupe" in job["flag"]:
|
elif "nodupe" in job["flag"]:
|
||||||
self.log("up2k", "dupe-reject:\n {0}\n {1}".format(src, dst))
|
self.log("up2k", "dupe-reject:\n {0}\n {1}".format(src, dst))
|
||||||
err = "upload rejected, file already exists:\n{0}{1} ".format(
|
err = "upload rejected, file already exists:\n " + vsrc + " "
|
||||||
job["vdir"], job["name"]
|
|
||||||
)
|
|
||||||
raise Pebkac(400, err)
|
raise Pebkac(400, err)
|
||||||
else:
|
else:
|
||||||
# symlink to the client-provided name,
|
# symlink to the client-provided name,
|
||||||
# returning the previous upload info
|
# returning the previous upload info
|
||||||
job = deepcopy(job)
|
job = deepcopy(job)
|
||||||
job["rdir"] = cj["rdir"]
|
for k in ["ptop", "vtop", "prel"]:
|
||||||
job["name"] = self._untaken(cj["rdir"], cj["name"], now, cj["addr"])
|
job[k] = cj[k]
|
||||||
dst = os.path.join(job["rdir"], job["name"])
|
|
||||||
|
pdir = os.path.join(cj["ptop"], cj["prel"])
|
||||||
|
job["name"] = self._untaken(pdir, cj["name"], now, cj["addr"])
|
||||||
|
dst = os.path.join(job["ptop"], job["prel"], job["name"])
|
||||||
os.unlink(fsenc(dst)) # TODO ed pls
|
os.unlink(fsenc(dst)) # TODO ed pls
|
||||||
self._symlink(src, dst)
|
self._symlink(src, dst)
|
||||||
else:
|
|
||||||
|
if not job:
|
||||||
job = {
|
job = {
|
||||||
"wark": wark,
|
"wark": wark,
|
||||||
"t0": now,
|
"t0": now,
|
||||||
"addr": cj["addr"],
|
|
||||||
"vdir": cj["vdir"],
|
|
||||||
"rdir": cj["rdir"],
|
|
||||||
"flag": cj["flag"],
|
|
||||||
# client-provided, sanitized by _get_wark:
|
|
||||||
"name": cj["name"],
|
|
||||||
"size": cj["size"],
|
|
||||||
"lmod": cj["lmod"],
|
|
||||||
"hash": deepcopy(cj["hash"]),
|
"hash": deepcopy(cj["hash"]),
|
||||||
|
"need": [],
|
||||||
}
|
}
|
||||||
|
# client-provided, sanitized by _get_wark: name, size, lmod
|
||||||
|
for k in [
|
||||||
|
"addr",
|
||||||
|
"vtop",
|
||||||
|
"ptop",
|
||||||
|
"prel",
|
||||||
|
"flag",
|
||||||
|
"name",
|
||||||
|
"size",
|
||||||
|
"lmod",
|
||||||
|
]:
|
||||||
|
job[k] = cj[k]
|
||||||
|
|
||||||
# one chunk may occur multiple times in a file;
|
# one chunk may occur multiple times in a file;
|
||||||
# filter to unique values for the list of missing chunks
|
# filter to unique values for the list of missing chunks
|
||||||
# (preserve order to reduce disk thrashing)
|
# (preserve order to reduce disk thrashing)
|
||||||
job["need"] = []
|
|
||||||
lut = {}
|
lut = {}
|
||||||
for k in cj["hash"]:
|
for k in cj["hash"]:
|
||||||
if k not in lut:
|
if k not in lut:
|
||||||
|
@ -149,36 +336,47 @@ class Up2k(object):
|
||||||
self.log("up2k", "cannot symlink; creating copy: " + repr(ex))
|
self.log("up2k", "cannot symlink; creating copy: " + repr(ex))
|
||||||
shutil.copy2(fsenc(src), fsenc(dst))
|
shutil.copy2(fsenc(src), fsenc(dst))
|
||||||
|
|
||||||
def handle_chunk(self, wark, chash):
|
def handle_chunk(self, ptop, wark, chash):
|
||||||
with self.mutex:
|
with self.mutex:
|
||||||
job = self.registry.get(wark)
|
job = self.registry[ptop].get(wark, None)
|
||||||
if not job:
|
if not job:
|
||||||
raise Pebkac(404, "unknown wark")
|
raise Pebkac(400, "unknown wark")
|
||||||
|
|
||||||
if chash not in job["need"]:
|
if chash not in job["need"]:
|
||||||
raise Pebkac(200, "already got that but thanks??")
|
raise Pebkac(200, "already got that but thanks??")
|
||||||
|
|
||||||
nchunk = [n for n, v in enumerate(job["hash"]) if v == chash]
|
nchunk = [n for n, v in enumerate(job["hash"]) if v == chash]
|
||||||
if not nchunk:
|
if not nchunk:
|
||||||
raise Pebkac(404, "unknown chunk")
|
raise Pebkac(400, "unknown chunk")
|
||||||
|
|
||||||
chunksize = self._get_chunksize(job["size"])
|
chunksize = self._get_chunksize(job["size"])
|
||||||
ofs = [chunksize * x for x in nchunk]
|
ofs = [chunksize * x for x in nchunk]
|
||||||
|
|
||||||
path = os.path.join(job["rdir"], job["name"])
|
path = os.path.join(job["ptop"], job["prel"], job["name"])
|
||||||
|
|
||||||
return [chunksize, ofs, path, job["lmod"]]
|
return [chunksize, ofs, path, job["lmod"]]
|
||||||
|
|
||||||
def confirm_chunk(self, wark, chash):
|
def confirm_chunk(self, ptop, wark, chash):
|
||||||
with self.mutex:
|
with self.mutex:
|
||||||
job = self.registry[wark]
|
job = self.registry[ptop][wark]
|
||||||
job["need"].remove(chash)
|
job["need"].remove(chash)
|
||||||
ret = len(job["need"])
|
ret = len(job["need"])
|
||||||
|
if ret > 0:
|
||||||
|
return ret
|
||||||
|
|
||||||
if WINDOWS and ret == 0:
|
if WINDOWS:
|
||||||
path = os.path.join(job["rdir"], job["name"])
|
path = os.path.join(job["ptop"], job["prel"], job["name"])
|
||||||
self.lastmod_q.put([path, (int(time.time()), int(job["lmod"]))])
|
self.lastmod_q.put([path, (int(time.time()), int(job["lmod"]))])
|
||||||
|
|
||||||
|
db = self.db.get(job["ptop"], None)
|
||||||
|
if db:
|
||||||
|
rp = os.path.join(job["prel"], job["name"]).replace("\\", "/")
|
||||||
|
self.db_rm(db, rp)
|
||||||
|
self.db_add(db, job["wark"], rp, job["lmod"], job["size"])
|
||||||
|
db.commit()
|
||||||
|
del self.registry[ptop][wark]
|
||||||
|
# in-memory registry is reserved for unfinished uploads
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def _get_chunksize(self, filesize):
|
def _get_chunksize(self, filesize):
|
||||||
|
@ -193,6 +391,14 @@ class Up2k(object):
|
||||||
chunksize += stepsize
|
chunksize += stepsize
|
||||||
stepsize *= mul
|
stepsize *= mul
|
||||||
|
|
||||||
|
def db_rm(self, db, rp):
|
||||||
|
db.execute("delete from up where rp = ?", (rp,))
|
||||||
|
|
||||||
|
def db_add(self, db, wark, rp, ts, sz):
|
||||||
|
db.execute(
|
||||||
|
"insert into up values (?,?,?,?)", (wark, ts, sz, rp,),
|
||||||
|
)
|
||||||
|
|
||||||
def _get_wark(self, cj):
|
def _get_wark(self, cj):
|
||||||
if len(cj["name"]) > 1024 or len(cj["hash"]) > 512 * 1024: # 16TiB
|
if len(cj["name"]) > 1024 or len(cj["hash"]) > 512 * 1024: # 16TiB
|
||||||
raise Pebkac(400, "name or numchunks not according to spec")
|
raise Pebkac(400, "name or numchunks not according to spec")
|
||||||
|
@ -209,9 +415,13 @@ class Up2k(object):
|
||||||
except:
|
except:
|
||||||
cj["lmod"] = int(time.time())
|
cj["lmod"] = int(time.time())
|
||||||
|
|
||||||
# server-reproducible file identifier, independent of name or location
|
wark = self._wark_from_hashlist(cj["size"], cj["hash"])
|
||||||
ident = [self.salt, str(cj["size"])]
|
return wark
|
||||||
ident.extend(cj["hash"])
|
|
||||||
|
def _wark_from_hashlist(self, filesize, hashes):
|
||||||
|
""" server-reproducible file identifier, independent of name or location """
|
||||||
|
ident = [self.salt, str(filesize)]
|
||||||
|
ident.extend(hashes)
|
||||||
ident = "\n".join(ident)
|
ident = "\n".join(ident)
|
||||||
|
|
||||||
hasher = hashlib.sha512()
|
hasher = hashlib.sha512()
|
||||||
|
@ -221,10 +431,34 @@ class Up2k(object):
|
||||||
wark = base64.urlsafe_b64encode(digest)
|
wark = base64.urlsafe_b64encode(digest)
|
||||||
return wark.decode("utf-8").rstrip("=")
|
return wark.decode("utf-8").rstrip("=")
|
||||||
|
|
||||||
|
def _hashlist_from_file(self, path):
|
||||||
|
fsz = os.path.getsize(path)
|
||||||
|
csz = self._get_chunksize(fsz)
|
||||||
|
ret = []
|
||||||
|
with open(path, "rb", 512 * 1024) as f:
|
||||||
|
while fsz > 0:
|
||||||
|
hashobj = hashlib.sha512()
|
||||||
|
rem = min(csz, fsz)
|
||||||
|
fsz -= rem
|
||||||
|
while rem > 0:
|
||||||
|
buf = f.read(min(rem, 64 * 1024))
|
||||||
|
if not buf:
|
||||||
|
raise Exception("EOF at " + str(f.tell()))
|
||||||
|
|
||||||
|
hashobj.update(buf)
|
||||||
|
rem -= len(buf)
|
||||||
|
|
||||||
|
digest = hashobj.digest()[:32]
|
||||||
|
digest = base64.urlsafe_b64encode(digest)
|
||||||
|
ret.append(digest.decode("utf-8").rstrip("="))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
def _new_upload(self, job):
|
def _new_upload(self, job):
|
||||||
self.registry[job["wark"]] = job
|
self.registry[job["ptop"]][job["wark"]] = job
|
||||||
suffix = ".{:.6f}-{}".format(job["t0"], job["addr"])
|
suffix = ".{:.6f}-{}".format(job["t0"], job["addr"])
|
||||||
with ren_open(job["name"], "wb", fdir=job["rdir"], suffix=suffix) as f:
|
pdir = os.path.join(job["ptop"], job["prel"])
|
||||||
|
with ren_open(job["name"], "wb", fdir=pdir, suffix=suffix) as f:
|
||||||
f, job["name"] = f["orz"]
|
f, job["name"] = f["orz"]
|
||||||
f.seek(job["size"] - 1)
|
f.seek(job["size"] - 1)
|
||||||
f.write(b"e")
|
f.write(b"e")
|
||||||
|
|
|
@ -712,3 +712,6 @@ class Pebkac(Exception):
|
||||||
def __init__(self, code, msg=None):
|
def __init__(self, code, msg=None):
|
||||||
super(Pebkac, self).__init__(msg or HTTPCODE[code])
|
super(Pebkac, self).__init__(msg or HTTPCODE[code])
|
||||||
self.code = code
|
self.code = code
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "Pebkac({}, {})".format(self.code, repr(self.args))
|
||||||
|
|
|
@ -672,7 +672,10 @@ function up2k_init(have_crypto) {
|
||||||
var rsp = (xhr.responseText + '');
|
var rsp = (xhr.responseText + '');
|
||||||
if (rsp.indexOf('partial upload exists') !== -1 ||
|
if (rsp.indexOf('partial upload exists') !== -1 ||
|
||||||
rsp.indexOf('file already exists') !== -1) {
|
rsp.indexOf('file already exists') !== -1) {
|
||||||
err = rsp.slice(5);
|
err = rsp;
|
||||||
|
var ofs = err.lastIndexOf(' : ');
|
||||||
|
if (ofs > 0)
|
||||||
|
err = err.slice(0, ofs);
|
||||||
}
|
}
|
||||||
if (err != "") {
|
if (err != "") {
|
||||||
ebi('f{0}t'.format(t.n)).innerHTML = "ERROR";
|
ebi('f{0}t'.format(t.n)).innerHTML = "ERROR";
|
||||||
|
|
Loading…
Reference in a new issue