mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
sqlite and misc optimizations:
* exponentially slow upload handshakes caused by lack of rd+fn sqlite index; became apparent after a volume hit 200k files * listing big folders 5% faster due to `_quotep3b` * optimize `unquote`, 20% faster but only used rarely * reindex on startup 150x faster in some rare cases (same filename in MANY folders) the database is now around 10% larger (likely worst-case)
This commit is contained in:
parent
2927bbb2d6
commit
d67e9cc507
|
@ -5143,7 +5143,6 @@ class HttpCli(object):
|
|||
dirs.append(item)
|
||||
else:
|
||||
files.append(item)
|
||||
item["rd"] = rem
|
||||
|
||||
if is_dk and not vf.get("dks"):
|
||||
dirs = []
|
||||
|
@ -5166,16 +5165,10 @@ class HttpCli(object):
|
|||
add_up_at = ".up_at" in mte
|
||||
is_admin = self.can_admin
|
||||
tagset: set[str] = set()
|
||||
for fe in files:
|
||||
rd = vrem
|
||||
for fe in files if icur else []:
|
||||
assert icur # !rm
|
||||
fn = fe["name"]
|
||||
rd = fe["rd"]
|
||||
del fe["rd"]
|
||||
if not icur:
|
||||
continue
|
||||
|
||||
if vn != dbv:
|
||||
_, rd = vn.get_dbv(rd)
|
||||
|
||||
erd_efn = (rd, fn)
|
||||
q = "select mt.k, mt.v from up inner join mt on mt.w = substr(up.w,1,16) where up.rd = ? and up.fn = ? and +mt.k != 'x'"
|
||||
try:
|
||||
|
|
|
@ -1313,6 +1313,9 @@ class Up2k(object):
|
|||
if WINDOWS:
|
||||
rd = rd.replace("\\", "/").strip("/")
|
||||
|
||||
rds = rd + "/" if rd else ""
|
||||
cdirs = cdir + os.sep
|
||||
|
||||
g = statdir(self.log_func, not self.args.no_scandir, True, cdir)
|
||||
gl = sorted(g)
|
||||
partials = set([x[0] for x in gl if "PARTIAL" in x[0]])
|
||||
|
@ -1320,8 +1323,8 @@ class Up2k(object):
|
|||
if self.stop:
|
||||
return -1
|
||||
|
||||
rp = vjoin(rd, iname)
|
||||
abspath = os.path.join(cdir, iname)
|
||||
rp = rds + iname
|
||||
abspath = cdirs + iname
|
||||
|
||||
if rei and rei.search(abspath):
|
||||
unreg.append(rp)
|
||||
|
@ -1451,8 +1454,8 @@ class Up2k(object):
|
|||
if self.stop:
|
||||
return -1
|
||||
|
||||
rp = vjoin(rd, fn)
|
||||
abspath = os.path.join(cdir, fn)
|
||||
rp = rds + fn
|
||||
abspath = cdirs + fn
|
||||
nohash = reh.search(abspath) if reh else False
|
||||
|
||||
sql = "select w, mt, sz, ip, at from up where rd = ? and fn = ?"
|
||||
|
@ -1536,7 +1539,7 @@ class Up2k(object):
|
|||
# drop shadowed folders
|
||||
for sh_rd in unreg:
|
||||
n = 0
|
||||
q = "select count(w) from up where (rd = ? or rd like ?||'%') and at == 0"
|
||||
q = "select count(w) from up where (rd=? or rd like ?||'%') and +at == 0"
|
||||
for sh_erd in [sh_rd, "//" + w8b64enc(sh_rd)]:
|
||||
try:
|
||||
n = db.c.execute(q, (sh_erd, sh_erd + "/")).fetchone()[0]
|
||||
|
@ -1552,7 +1555,7 @@ class Up2k(object):
|
|||
q = "delete from dh where (d = ? or d like ?||'%')"
|
||||
db.c.execute(q, (sh_erd, sh_erd + "/"))
|
||||
|
||||
q = "delete from up where (rd = ? or rd like ?||'%') and at == 0"
|
||||
q = "delete from up where (rd=? or rd like ?||'%') and +at == 0"
|
||||
db.c.execute(q, (sh_erd, sh_erd + "/"))
|
||||
ret += n
|
||||
|
||||
|
@ -1650,7 +1653,7 @@ class Up2k(object):
|
|||
|
||||
# then covers
|
||||
n_rm3 = 0
|
||||
qu = "select 1 from up where rd=? and +fn=? limit 1"
|
||||
qu = "select 1 from up where rd=? and fn=? limit 1"
|
||||
q = "delete from cv where rd=? and dn=? and +fn=?"
|
||||
for crd, cdn, fn in cur.execute("select * from cv"):
|
||||
urd = vjoin(crd, cdn)
|
||||
|
@ -2471,12 +2474,10 @@ class Up2k(object):
|
|||
self.log("WARN: failed to upgrade from v4", 3)
|
||||
|
||||
if ver == DB_VER:
|
||||
try:
|
||||
self._add_cv_tab(cur)
|
||||
self._add_xiu_tab(cur)
|
||||
self._add_dhash_tab(cur)
|
||||
except:
|
||||
pass
|
||||
self._add_xiu_tab(cur)
|
||||
self._add_cv_tab(cur)
|
||||
self._add_idx_up_vp(cur, db_path)
|
||||
|
||||
try:
|
||||
nfiles = next(cur.execute("select count(w) from up"))[0]
|
||||
|
@ -2573,9 +2574,10 @@ class Up2k(object):
|
|||
|
||||
for cmd in [
|
||||
r"create table up (w text, mt int, sz int, rd text, fn text, ip text, at int)",
|
||||
r"create index up_rd on up(rd)",
|
||||
r"create index up_vp on up(rd, fn)",
|
||||
r"create index up_fn on up(fn)",
|
||||
r"create index up_ip on up(ip)",
|
||||
r"create index up_at on up(at)",
|
||||
idx,
|
||||
r"create table mt (w text, k text, v int)",
|
||||
r"create index mt_w on mt(w)",
|
||||
|
@ -2605,6 +2607,12 @@ class Up2k(object):
|
|||
|
||||
def _add_dhash_tab(self, cur: "sqlite3.Cursor") -> None:
|
||||
# v5 -> v5a
|
||||
try:
|
||||
cur.execute("select d, h from dh limit 1").fetchone()
|
||||
return
|
||||
except:
|
||||
pass
|
||||
|
||||
for cmd in [
|
||||
r"create table dh (d text, h text)",
|
||||
r"create index dh_d on dh(d)",
|
||||
|
@ -2658,6 +2666,24 @@ class Up2k(object):
|
|||
|
||||
cur.connection.commit()
|
||||
|
||||
def _add_idx_up_vp(self, cur: "sqlite3.Cursor", db_path: str) -> None:
|
||||
# v5c -> v5d
|
||||
try:
|
||||
cur.execute("drop index up_rd")
|
||||
except:
|
||||
return
|
||||
|
||||
for cmd in [
|
||||
r"create index up_vp on up(rd, fn)",
|
||||
r"create index up_at on up(at)",
|
||||
]:
|
||||
self.log("upgrading db [%s]: %s" % (db_path, cmd[:18]))
|
||||
cur.execute(cmd)
|
||||
|
||||
self.log("upgrading db [%s]: writing to disk..." % (db_path,))
|
||||
cur.connection.commit()
|
||||
cur.execute("vacuum")
|
||||
|
||||
def wake_rescanner(self):
|
||||
with self.rescan_cond:
|
||||
self.rescan_cond.notify_all()
|
||||
|
|
|
@ -164,12 +164,8 @@ except ImportError:
|
|||
|
||||
if not PY2:
|
||||
from io import BytesIO
|
||||
from urllib.parse import quote_from_bytes as quote
|
||||
from urllib.parse import unquote_to_bytes as unquote
|
||||
else:
|
||||
from StringIO import StringIO as BytesIO # type: ignore
|
||||
from urllib import quote # type: ignore # pylint: disable=no-name-in-module
|
||||
from urllib import unquote # type: ignore # pylint: disable=no-name-in-module
|
||||
|
||||
|
||||
try:
|
||||
|
@ -488,19 +484,6 @@ VERSIONS = (
|
|||
)
|
||||
|
||||
|
||||
_: Any = (mp, BytesIO, quote, unquote, SQLITE_VER, JINJA_VER, PYFTPD_VER, PARTFTPY_VER)
|
||||
__all__ = [
|
||||
"mp",
|
||||
"BytesIO",
|
||||
"quote",
|
||||
"unquote",
|
||||
"SQLITE_VER",
|
||||
"JINJA_VER",
|
||||
"PYFTPD_VER",
|
||||
"PARTFTPY_VER",
|
||||
]
|
||||
|
||||
|
||||
class Daemon(threading.Thread):
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -2074,6 +2057,8 @@ def html_bescape(s: bytes, quot: bool = False, crlf: bool = False) -> bytes:
|
|||
|
||||
def _quotep2(txt: str) -> str:
|
||||
"""url quoter which deals with bytes correctly"""
|
||||
if not txt:
|
||||
return ""
|
||||
btxt = w8enc(txt)
|
||||
quot = quote(btxt, safe=b"/")
|
||||
return w8dec(quot.replace(b" ", b"+")) # type: ignore
|
||||
|
@ -2081,18 +2066,61 @@ def _quotep2(txt: str) -> str:
|
|||
|
||||
def _quotep3(txt: str) -> str:
|
||||
"""url quoter which deals with bytes correctly"""
|
||||
if not txt:
|
||||
return ""
|
||||
btxt = w8enc(txt)
|
||||
quot = quote(btxt, safe=b"/").encode("utf-8")
|
||||
return w8dec(quot.replace(b" ", b"+"))
|
||||
|
||||
|
||||
quotep = _quotep3 if not PY2 else _quotep2
|
||||
if not PY2:
|
||||
_uqsb = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.-~/"
|
||||
_uqtl = {
|
||||
n: ("%%%02X" % (n,) if n not in _uqsb else chr(n)).encode("utf-8")
|
||||
for n in range(256)
|
||||
}
|
||||
_uqtl[b" "] = b"+"
|
||||
|
||||
def _quotep3b(txt: str) -> str:
|
||||
"""url quoter which deals with bytes correctly"""
|
||||
if not txt:
|
||||
return ""
|
||||
btxt = w8enc(txt)
|
||||
if btxt.rstrip(_uqsb):
|
||||
lut = _uqtl
|
||||
btxt = b"".join([lut[ch] for ch in btxt])
|
||||
return w8dec(btxt)
|
||||
|
||||
quotep = _quotep3b
|
||||
|
||||
_hexd = "0123456789ABCDEFabcdef"
|
||||
_hex2b = {(a + b).encode(): bytes.fromhex(a + b) for a in _hexd for b in _hexd}
|
||||
|
||||
def unquote(btxt: bytes) -> bytes:
|
||||
h2b = _hex2b
|
||||
parts = iter(btxt.split(b"%"))
|
||||
ret = [next(parts)]
|
||||
for item in parts:
|
||||
c = h2b.get(item[:2])
|
||||
if c is None:
|
||||
ret.append(b"%")
|
||||
ret.append(item)
|
||||
else:
|
||||
ret.append(c)
|
||||
ret.append(item[2:])
|
||||
return b"".join(ret)
|
||||
|
||||
from urllib.parse import quote_from_bytes as quote
|
||||
else:
|
||||
from urllib import quote # type: ignore # pylint: disable=no-name-in-module
|
||||
from urllib import unquote # type: ignore # pylint: disable=no-name-in-module
|
||||
|
||||
quotep = _quotep2
|
||||
|
||||
|
||||
def unquotep(txt: str) -> str:
|
||||
"""url unquoter which deals with bytes correctly"""
|
||||
btxt = w8enc(txt)
|
||||
# btxt = btxt.replace(b"+", b" ")
|
||||
unq2 = unquote(btxt)
|
||||
return w8dec(unq2)
|
||||
|
||||
|
@ -3521,3 +3549,16 @@ class WrongPostKey(Pebkac):
|
|||
self.got = got
|
||||
self.fname = fname
|
||||
self.datagen = datagen
|
||||
|
||||
|
||||
_: Any = (mp, BytesIO, quote, unquote, SQLITE_VER, JINJA_VER, PYFTPD_VER, PARTFTPY_VER)
|
||||
__all__ = [
|
||||
"mp",
|
||||
"BytesIO",
|
||||
"quote",
|
||||
"unquote",
|
||||
"SQLITE_VER",
|
||||
"JINJA_VER",
|
||||
"PYFTPD_VER",
|
||||
"PARTFTPY_VER",
|
||||
]
|
||||
|
|
38
tests/test_utils.py
Normal file
38
tests/test_utils.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env python3
|
||||
# coding: utf-8
|
||||
from __future__ import print_function, unicode_literals
|
||||
|
||||
import unittest
|
||||
|
||||
from copyparty.__main__ import PY2
|
||||
from copyparty.util import w8enc
|
||||
from tests import util as tu
|
||||
|
||||
|
||||
class TestUtils(unittest.TestCase):
|
||||
def cmp(self, orig, t1, t2):
|
||||
if t1 != t2:
|
||||
raise Exception("\n%r\n%r\n%r\n" % (w8enc(orig), t1, t2))
|
||||
|
||||
def test_quotep(self):
|
||||
if PY2:
|
||||
raise unittest.SkipTest()
|
||||
|
||||
from copyparty.util import _quotep3, _quotep3b, w8dec
|
||||
|
||||
txt = w8dec(tu.randbytes(8192))
|
||||
self.cmp(txt, _quotep3(txt), _quotep3b(txt))
|
||||
|
||||
def test_unquote(self):
|
||||
if PY2:
|
||||
raise unittest.SkipTest()
|
||||
|
||||
from urllib.parse import unquote_to_bytes as u2b
|
||||
|
||||
from copyparty.util import unquote
|
||||
|
||||
for btxt in (
|
||||
tu.randbytes(8192),
|
||||
br"%ed%91qw,er;ty%20as df?gh+jkl%zxc&vbn <qwe>\"rty'uio&asd fgh",
|
||||
):
|
||||
self.cmp(btxt, unquote(btxt), u2b(btxt))
|
|
@ -3,6 +3,7 @@
|
|||
from __future__ import print_function, unicode_literals
|
||||
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
|
@ -49,6 +50,10 @@ from copyparty.util import FHC, CachedDict, Garda, Unrecv
|
|||
init_E(E)
|
||||
|
||||
|
||||
def randbytes(n):
|
||||
return random.getrandbits(n * 8).to_bytes(n, "little")
|
||||
|
||||
|
||||
def runcmd(argv):
|
||||
p = sp.Popen(argv, stdout=sp.PIPE, stderr=sp.PIPE)
|
||||
stdout, stderr = p.communicate()
|
||||
|
|
Loading…
Reference in a new issue