mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
sqlite and misc optimizations:
* exponentially slow upload handshakes caused by lack of rd+fn sqlite index; became apparent after a volume hit 200k files * listing big folders 5% faster due to `_quotep3b` * optimize `unquote`, 20% faster but only used rarely * reindex on startup 150x faster in some rare cases (same filename in MANY folders) the database is now around 10% larger (likely worst-case)
This commit is contained in:
parent
2927bbb2d6
commit
d67e9cc507
|
@ -5143,7 +5143,6 @@ class HttpCli(object):
|
||||||
dirs.append(item)
|
dirs.append(item)
|
||||||
else:
|
else:
|
||||||
files.append(item)
|
files.append(item)
|
||||||
item["rd"] = rem
|
|
||||||
|
|
||||||
if is_dk and not vf.get("dks"):
|
if is_dk and not vf.get("dks"):
|
||||||
dirs = []
|
dirs = []
|
||||||
|
@ -5166,16 +5165,10 @@ class HttpCli(object):
|
||||||
add_up_at = ".up_at" in mte
|
add_up_at = ".up_at" in mte
|
||||||
is_admin = self.can_admin
|
is_admin = self.can_admin
|
||||||
tagset: set[str] = set()
|
tagset: set[str] = set()
|
||||||
for fe in files:
|
rd = vrem
|
||||||
|
for fe in files if icur else []:
|
||||||
|
assert icur # !rm
|
||||||
fn = fe["name"]
|
fn = fe["name"]
|
||||||
rd = fe["rd"]
|
|
||||||
del fe["rd"]
|
|
||||||
if not icur:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if vn != dbv:
|
|
||||||
_, rd = vn.get_dbv(rd)
|
|
||||||
|
|
||||||
erd_efn = (rd, fn)
|
erd_efn = (rd, fn)
|
||||||
q = "select mt.k, mt.v from up inner join mt on mt.w = substr(up.w,1,16) where up.rd = ? and up.fn = ? and +mt.k != 'x'"
|
q = "select mt.k, mt.v from up inner join mt on mt.w = substr(up.w,1,16) where up.rd = ? and up.fn = ? and +mt.k != 'x'"
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1313,6 +1313,9 @@ class Up2k(object):
|
||||||
if WINDOWS:
|
if WINDOWS:
|
||||||
rd = rd.replace("\\", "/").strip("/")
|
rd = rd.replace("\\", "/").strip("/")
|
||||||
|
|
||||||
|
rds = rd + "/" if rd else ""
|
||||||
|
cdirs = cdir + os.sep
|
||||||
|
|
||||||
g = statdir(self.log_func, not self.args.no_scandir, True, cdir)
|
g = statdir(self.log_func, not self.args.no_scandir, True, cdir)
|
||||||
gl = sorted(g)
|
gl = sorted(g)
|
||||||
partials = set([x[0] for x in gl if "PARTIAL" in x[0]])
|
partials = set([x[0] for x in gl if "PARTIAL" in x[0]])
|
||||||
|
@ -1320,8 +1323,8 @@ class Up2k(object):
|
||||||
if self.stop:
|
if self.stop:
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
rp = vjoin(rd, iname)
|
rp = rds + iname
|
||||||
abspath = os.path.join(cdir, iname)
|
abspath = cdirs + iname
|
||||||
|
|
||||||
if rei and rei.search(abspath):
|
if rei and rei.search(abspath):
|
||||||
unreg.append(rp)
|
unreg.append(rp)
|
||||||
|
@ -1451,8 +1454,8 @@ class Up2k(object):
|
||||||
if self.stop:
|
if self.stop:
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
rp = vjoin(rd, fn)
|
rp = rds + fn
|
||||||
abspath = os.path.join(cdir, fn)
|
abspath = cdirs + fn
|
||||||
nohash = reh.search(abspath) if reh else False
|
nohash = reh.search(abspath) if reh else False
|
||||||
|
|
||||||
sql = "select w, mt, sz, ip, at from up where rd = ? and fn = ?"
|
sql = "select w, mt, sz, ip, at from up where rd = ? and fn = ?"
|
||||||
|
@ -1536,7 +1539,7 @@ class Up2k(object):
|
||||||
# drop shadowed folders
|
# drop shadowed folders
|
||||||
for sh_rd in unreg:
|
for sh_rd in unreg:
|
||||||
n = 0
|
n = 0
|
||||||
q = "select count(w) from up where (rd = ? or rd like ?||'%') and at == 0"
|
q = "select count(w) from up where (rd=? or rd like ?||'%') and +at == 0"
|
||||||
for sh_erd in [sh_rd, "//" + w8b64enc(sh_rd)]:
|
for sh_erd in [sh_rd, "//" + w8b64enc(sh_rd)]:
|
||||||
try:
|
try:
|
||||||
n = db.c.execute(q, (sh_erd, sh_erd + "/")).fetchone()[0]
|
n = db.c.execute(q, (sh_erd, sh_erd + "/")).fetchone()[0]
|
||||||
|
@ -1552,7 +1555,7 @@ class Up2k(object):
|
||||||
q = "delete from dh where (d = ? or d like ?||'%')"
|
q = "delete from dh where (d = ? or d like ?||'%')"
|
||||||
db.c.execute(q, (sh_erd, sh_erd + "/"))
|
db.c.execute(q, (sh_erd, sh_erd + "/"))
|
||||||
|
|
||||||
q = "delete from up where (rd = ? or rd like ?||'%') and at == 0"
|
q = "delete from up where (rd=? or rd like ?||'%') and +at == 0"
|
||||||
db.c.execute(q, (sh_erd, sh_erd + "/"))
|
db.c.execute(q, (sh_erd, sh_erd + "/"))
|
||||||
ret += n
|
ret += n
|
||||||
|
|
||||||
|
@ -1650,7 +1653,7 @@ class Up2k(object):
|
||||||
|
|
||||||
# then covers
|
# then covers
|
||||||
n_rm3 = 0
|
n_rm3 = 0
|
||||||
qu = "select 1 from up where rd=? and +fn=? limit 1"
|
qu = "select 1 from up where rd=? and fn=? limit 1"
|
||||||
q = "delete from cv where rd=? and dn=? and +fn=?"
|
q = "delete from cv where rd=? and dn=? and +fn=?"
|
||||||
for crd, cdn, fn in cur.execute("select * from cv"):
|
for crd, cdn, fn in cur.execute("select * from cv"):
|
||||||
urd = vjoin(crd, cdn)
|
urd = vjoin(crd, cdn)
|
||||||
|
@ -2471,12 +2474,10 @@ class Up2k(object):
|
||||||
self.log("WARN: failed to upgrade from v4", 3)
|
self.log("WARN: failed to upgrade from v4", 3)
|
||||||
|
|
||||||
if ver == DB_VER:
|
if ver == DB_VER:
|
||||||
try:
|
self._add_dhash_tab(cur)
|
||||||
self._add_cv_tab(cur)
|
self._add_xiu_tab(cur)
|
||||||
self._add_xiu_tab(cur)
|
self._add_cv_tab(cur)
|
||||||
self._add_dhash_tab(cur)
|
self._add_idx_up_vp(cur, db_path)
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
nfiles = next(cur.execute("select count(w) from up"))[0]
|
nfiles = next(cur.execute("select count(w) from up"))[0]
|
||||||
|
@ -2573,9 +2574,10 @@ class Up2k(object):
|
||||||
|
|
||||||
for cmd in [
|
for cmd in [
|
||||||
r"create table up (w text, mt int, sz int, rd text, fn text, ip text, at int)",
|
r"create table up (w text, mt int, sz int, rd text, fn text, ip text, at int)",
|
||||||
r"create index up_rd on up(rd)",
|
r"create index up_vp on up(rd, fn)",
|
||||||
r"create index up_fn on up(fn)",
|
r"create index up_fn on up(fn)",
|
||||||
r"create index up_ip on up(ip)",
|
r"create index up_ip on up(ip)",
|
||||||
|
r"create index up_at on up(at)",
|
||||||
idx,
|
idx,
|
||||||
r"create table mt (w text, k text, v int)",
|
r"create table mt (w text, k text, v int)",
|
||||||
r"create index mt_w on mt(w)",
|
r"create index mt_w on mt(w)",
|
||||||
|
@ -2605,6 +2607,12 @@ class Up2k(object):
|
||||||
|
|
||||||
def _add_dhash_tab(self, cur: "sqlite3.Cursor") -> None:
|
def _add_dhash_tab(self, cur: "sqlite3.Cursor") -> None:
|
||||||
# v5 -> v5a
|
# v5 -> v5a
|
||||||
|
try:
|
||||||
|
cur.execute("select d, h from dh limit 1").fetchone()
|
||||||
|
return
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
for cmd in [
|
for cmd in [
|
||||||
r"create table dh (d text, h text)",
|
r"create table dh (d text, h text)",
|
||||||
r"create index dh_d on dh(d)",
|
r"create index dh_d on dh(d)",
|
||||||
|
@ -2658,6 +2666,24 @@ class Up2k(object):
|
||||||
|
|
||||||
cur.connection.commit()
|
cur.connection.commit()
|
||||||
|
|
||||||
|
def _add_idx_up_vp(self, cur: "sqlite3.Cursor", db_path: str) -> None:
|
||||||
|
# v5c -> v5d
|
||||||
|
try:
|
||||||
|
cur.execute("drop index up_rd")
|
||||||
|
except:
|
||||||
|
return
|
||||||
|
|
||||||
|
for cmd in [
|
||||||
|
r"create index up_vp on up(rd, fn)",
|
||||||
|
r"create index up_at on up(at)",
|
||||||
|
]:
|
||||||
|
self.log("upgrading db [%s]: %s" % (db_path, cmd[:18]))
|
||||||
|
cur.execute(cmd)
|
||||||
|
|
||||||
|
self.log("upgrading db [%s]: writing to disk..." % (db_path,))
|
||||||
|
cur.connection.commit()
|
||||||
|
cur.execute("vacuum")
|
||||||
|
|
||||||
def wake_rescanner(self):
|
def wake_rescanner(self):
|
||||||
with self.rescan_cond:
|
with self.rescan_cond:
|
||||||
self.rescan_cond.notify_all()
|
self.rescan_cond.notify_all()
|
||||||
|
|
|
@ -164,12 +164,8 @@ except ImportError:
|
||||||
|
|
||||||
if not PY2:
|
if not PY2:
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from urllib.parse import quote_from_bytes as quote
|
|
||||||
from urllib.parse import unquote_to_bytes as unquote
|
|
||||||
else:
|
else:
|
||||||
from StringIO import StringIO as BytesIO # type: ignore
|
from StringIO import StringIO as BytesIO # type: ignore
|
||||||
from urllib import quote # type: ignore # pylint: disable=no-name-in-module
|
|
||||||
from urllib import unquote # type: ignore # pylint: disable=no-name-in-module
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -488,19 +484,6 @@ VERSIONS = (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
_: Any = (mp, BytesIO, quote, unquote, SQLITE_VER, JINJA_VER, PYFTPD_VER, PARTFTPY_VER)
|
|
||||||
__all__ = [
|
|
||||||
"mp",
|
|
||||||
"BytesIO",
|
|
||||||
"quote",
|
|
||||||
"unquote",
|
|
||||||
"SQLITE_VER",
|
|
||||||
"JINJA_VER",
|
|
||||||
"PYFTPD_VER",
|
|
||||||
"PARTFTPY_VER",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class Daemon(threading.Thread):
|
class Daemon(threading.Thread):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -2074,6 +2057,8 @@ def html_bescape(s: bytes, quot: bool = False, crlf: bool = False) -> bytes:
|
||||||
|
|
||||||
def _quotep2(txt: str) -> str:
|
def _quotep2(txt: str) -> str:
|
||||||
"""url quoter which deals with bytes correctly"""
|
"""url quoter which deals with bytes correctly"""
|
||||||
|
if not txt:
|
||||||
|
return ""
|
||||||
btxt = w8enc(txt)
|
btxt = w8enc(txt)
|
||||||
quot = quote(btxt, safe=b"/")
|
quot = quote(btxt, safe=b"/")
|
||||||
return w8dec(quot.replace(b" ", b"+")) # type: ignore
|
return w8dec(quot.replace(b" ", b"+")) # type: ignore
|
||||||
|
@ -2081,18 +2066,61 @@ def _quotep2(txt: str) -> str:
|
||||||
|
|
||||||
def _quotep3(txt: str) -> str:
|
def _quotep3(txt: str) -> str:
|
||||||
"""url quoter which deals with bytes correctly"""
|
"""url quoter which deals with bytes correctly"""
|
||||||
|
if not txt:
|
||||||
|
return ""
|
||||||
btxt = w8enc(txt)
|
btxt = w8enc(txt)
|
||||||
quot = quote(btxt, safe=b"/").encode("utf-8")
|
quot = quote(btxt, safe=b"/").encode("utf-8")
|
||||||
return w8dec(quot.replace(b" ", b"+"))
|
return w8dec(quot.replace(b" ", b"+"))
|
||||||
|
|
||||||
|
|
||||||
quotep = _quotep3 if not PY2 else _quotep2
|
if not PY2:
|
||||||
|
_uqsb = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-~/"
|
||||||
|
_uqtl = {
|
||||||
|
n: ("%%%02X" % (n,) if n not in _uqsb else chr(n)).encode("utf-8")
|
||||||
|
for n in range(256)
|
||||||
|
}
|
||||||
|
_uqtl[b" "] = b"+"
|
||||||
|
|
||||||
|
def _quotep3b(txt: str) -> str:
|
||||||
|
"""url quoter which deals with bytes correctly"""
|
||||||
|
if not txt:
|
||||||
|
return ""
|
||||||
|
btxt = w8enc(txt)
|
||||||
|
if btxt.rstrip(_uqsb):
|
||||||
|
lut = _uqtl
|
||||||
|
btxt = b"".join([lut[ch] for ch in btxt])
|
||||||
|
return w8dec(btxt)
|
||||||
|
|
||||||
|
quotep = _quotep3b
|
||||||
|
|
||||||
|
_hexd = "0123456789ABCDEFabcdef"
|
||||||
|
_hex2b = {(a + b).encode(): bytes.fromhex(a + b) for a in _hexd for b in _hexd}
|
||||||
|
|
||||||
|
def unquote(btxt: bytes) -> bytes:
|
||||||
|
h2b = _hex2b
|
||||||
|
parts = iter(btxt.split(b"%"))
|
||||||
|
ret = [next(parts)]
|
||||||
|
for item in parts:
|
||||||
|
c = h2b.get(item[:2])
|
||||||
|
if c is None:
|
||||||
|
ret.append(b"%")
|
||||||
|
ret.append(item)
|
||||||
|
else:
|
||||||
|
ret.append(c)
|
||||||
|
ret.append(item[2:])
|
||||||
|
return b"".join(ret)
|
||||||
|
|
||||||
|
from urllib.parse import quote_from_bytes as quote
|
||||||
|
else:
|
||||||
|
from urllib import quote # type: ignore # pylint: disable=no-name-in-module
|
||||||
|
from urllib import unquote # type: ignore # pylint: disable=no-name-in-module
|
||||||
|
|
||||||
|
quotep = _quotep2
|
||||||
|
|
||||||
|
|
||||||
def unquotep(txt: str) -> str:
|
def unquotep(txt: str) -> str:
|
||||||
"""url unquoter which deals with bytes correctly"""
|
"""url unquoter which deals with bytes correctly"""
|
||||||
btxt = w8enc(txt)
|
btxt = w8enc(txt)
|
||||||
# btxt = btxt.replace(b"+", b" ")
|
|
||||||
unq2 = unquote(btxt)
|
unq2 = unquote(btxt)
|
||||||
return w8dec(unq2)
|
return w8dec(unq2)
|
||||||
|
|
||||||
|
@ -3521,3 +3549,16 @@ class WrongPostKey(Pebkac):
|
||||||
self.got = got
|
self.got = got
|
||||||
self.fname = fname
|
self.fname = fname
|
||||||
self.datagen = datagen
|
self.datagen = datagen
|
||||||
|
|
||||||
|
|
||||||
|
_: Any = (mp, BytesIO, quote, unquote, SQLITE_VER, JINJA_VER, PYFTPD_VER, PARTFTPY_VER)
|
||||||
|
__all__ = [
|
||||||
|
"mp",
|
||||||
|
"BytesIO",
|
||||||
|
"quote",
|
||||||
|
"unquote",
|
||||||
|
"SQLITE_VER",
|
||||||
|
"JINJA_VER",
|
||||||
|
"PYFTPD_VER",
|
||||||
|
"PARTFTPY_VER",
|
||||||
|
]
|
||||||
|
|
38
tests/test_utils.py
Normal file
38
tests/test_utils.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import print_function, unicode_literals
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from copyparty.__main__ import PY2
|
||||||
|
from copyparty.util import w8enc
|
||||||
|
from tests import util as tu
|
||||||
|
|
||||||
|
|
||||||
|
class TestUtils(unittest.TestCase):
|
||||||
|
def cmp(self, orig, t1, t2):
|
||||||
|
if t1 != t2:
|
||||||
|
raise Exception("\n%r\n%r\n%r\n" % (w8enc(orig), t1, t2))
|
||||||
|
|
||||||
|
def test_quotep(self):
|
||||||
|
if PY2:
|
||||||
|
raise unittest.SkipTest()
|
||||||
|
|
||||||
|
from copyparty.util import _quotep3, _quotep3b, w8dec
|
||||||
|
|
||||||
|
txt = w8dec(tu.randbytes(8192))
|
||||||
|
self.cmp(txt, _quotep3(txt), _quotep3b(txt))
|
||||||
|
|
||||||
|
def test_unquote(self):
|
||||||
|
if PY2:
|
||||||
|
raise unittest.SkipTest()
|
||||||
|
|
||||||
|
from urllib.parse import unquote_to_bytes as u2b
|
||||||
|
|
||||||
|
from copyparty.util import unquote
|
||||||
|
|
||||||
|
for btxt in (
|
||||||
|
tu.randbytes(8192),
|
||||||
|
br"%ed%91qw,er;ty%20as df?gh+jkl%zxc&vbn <qwe>\"rty'uio&asd fgh",
|
||||||
|
):
|
||||||
|
self.cmp(btxt, unquote(btxt), u2b(btxt))
|
|
@ -3,6 +3,7 @@
|
||||||
from __future__ import print_function, unicode_literals
|
from __future__ import print_function, unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import socket
|
import socket
|
||||||
|
@ -49,6 +50,10 @@ from copyparty.util import FHC, CachedDict, Garda, Unrecv
|
||||||
init_E(E)
|
init_E(E)
|
||||||
|
|
||||||
|
|
||||||
|
def randbytes(n):
|
||||||
|
return random.getrandbits(n * 8).to_bytes(n, "little")
|
||||||
|
|
||||||
|
|
||||||
def runcmd(argv):
|
def runcmd(argv):
|
||||||
p = sp.Popen(argv, stdout=sp.PIPE, stderr=sp.PIPE)
|
p = sp.Popen(argv, stdout=sp.PIPE, stderr=sp.PIPE)
|
||||||
stdout, stderr = p.communicate()
|
stdout, stderr = p.communicate()
|
||||||
|
|
Loading…
Reference in a new issue