add prometheus metrics; closes #49

This commit is contained in:
ed 2023-08-20 17:58:06 +00:00
parent 7df890d964
commit fc0405c8f3
9 changed files with 293 additions and 7 deletions

View file

@ -71,6 +71,7 @@ turn almost any device into a file server with resumable uploads/downloads using
* [themes](#themes)
* [complete examples](#complete-examples)
* [reverse-proxy](#reverse-proxy) - running copyparty next to other websites
* [prometheus](#prometheus) - metrics/stats can be enabled
* [packages](#packages) - the party might be closer than you think
* [arch package](#arch-package) - now [available on aur](https://aur.archlinux.org/packages/copyparty) maintained by [@icxes](https://github.com/icxes)
* [fedora package](#fedora-package) - now [available on copr-pypi](https://copr.fedorainfracloud.org/coprs/g/copr/PyPI/)
@ -1008,6 +1009,9 @@ you can also set transaction limits which apply per-IP and per-volume, but these
* `:c,maxn=250,3600` allows 250 files over 1 hour from each IP (tracked per-volume)
* `:c,maxb=1g,300` allows 1 GiB total over 5 minutes from each IP (tracked per-volume)
notes:
* `vmaxb` and `vmaxn` requires either the `e2ds` volflag or `-e2dsa` global-option
## compress uploads
@ -1238,6 +1242,51 @@ example webserver configs:
* [apache2 config](contrib/apache/copyparty.conf) -- location-based
## prometheus
metrics/stats can be enabled at `/.cpr/s/metrics` for grafana / prometheus / etc.
must be enabled with `--stats` since it reduces startup time a tiny bit
the endpoint is only accessible by `admin` accounts, meaning the `a` in `rwmda` of the following example commandline: `python3 -m copyparty -a ed:wark -v /mnt/nas::rwmda,ed`
follow a guide for setting up `node_exporter` except have it read from copyparty instead; example `/etc/prometheus/prometheus.yml` below
```yaml
scrape_configs:
- job_name: copyparty
metrics_path: /.cpr/s/metrics
basic_auth:
password: wark
static_configs:
- targets: ['192.168.123.1:3923']
```
currently the following metrics are available,
* `cpp_uptime` in seconds
* `cpp_bans` number of banned IPs
and these are available per-volume only:
* `cpp_disk_size` total HDD size in MiB
* `cpp_disk_free` free HDD space in MiB
and these are per-volume and `total`:
* `cpp_vol_mib` size of all files in MiB
* `cpp_vol_files` number of files
* `cpp_dupe_mib` amount of MiB presumably saved by deduplication
* `cpp_dupe_files` number of dupe files
* `cpp_unf_mib` currently unfinished / incoming uploads
some of the metrics have additional requirements to function correctly,
* `cpp_vol_mib` and `cpp_vol_files` requires either the `e2ds` volflag or `-e2dsa` global-option
the following options are available to disable some of the metrics:
* `--nos-hdd` disables `cpp_disk_*` which can prevent spinning up HDDs
* `--nos-vol` disables `cpp_vol_*` which reduces server startup time
* `--nos-dup` disables `cpp_dupe_*` which reduces the server load caused by prometheus queries
* `--nos-unf` disables `cpp_unf_mib` for no particular purpose
# packages
the party might be closer than you think

View file

@ -727,8 +727,11 @@ def get_sects():
things to check if it does not work at all:
* is there a firewall blocking port 5353 on either the server or client?
(for example, clients may be able to send queries to copyparty,
but the replies could get lost)
* is multicast accidentally disabled on either the server or client?
(look for mDNS log messages saying "new client on [...]")
* the router/switch must be multicast and igmp capable
@ -959,6 +962,15 @@ def add_hooks(ap):
ap2.add_argument("--xban", metavar="CMD", type=u, action="append", help="execute CMD if someone gets banned (pw/404)")
def add_stats(ap):
ap2 = ap.add_argument_group('grafana/prometheus metrics endpoint')
ap2.add_argument("--stats", action="store_true", help="enable stats at /.cpr/s/metrics for admin accounts")
ap2.add_argument("--nos-hdd", action="store_true", help="disable disk-space metrics (used/free space)")
ap2.add_argument("--nos-vol", action="store_true", help="disable volume size metrics (num files, total bytes, vmaxb/vmaxn)")
ap2.add_argument("--nos-dup", action="store_true", help="disable dupe-files metrics (good idea; very slow)")
ap2.add_argument("--nos-unf", action="store_true", help="disable unfinished-uploads metrics")
def add_yolo(ap):
ap2 = ap.add_argument_group('yolo options')
ap2.add_argument("--allow-csrf", action="store_true", help="disable csrf protections; let other domains/sites impersonate you through cross-site requests")
@ -1208,6 +1220,7 @@ def run_argparse(
add_yolo(ap)
add_handlers(ap)
add_hooks(ap)
add_stats(ap)
add_ui(ap, retry)
add_admin(ap)
add_logging(ap)

View file

@ -822,6 +822,9 @@ class HttpCli(object):
self.reply(b"", 301, headers=h)
return True
if self.vpath == ".cpr/s/metrics":
return self.conn.hsrv.metrics.tx(self)
path_base = os.path.join(self.E.mod, "web")
static_path = absreal(os.path.join(path_base, self.vpath[5:]))
if static_path in self.conn.hsrv.statics:

View file

@ -56,6 +56,7 @@ except SyntaxError:
sys.exit(1)
from .httpconn import HttpConn
from .metrics import Metrics
from .u2idx import U2idx
from .util import (
E_SCK,
@ -99,6 +100,7 @@ class HttpSrv(object):
# redefine in case of multiprocessing
socket.setdefaulttimeout(120)
self.t0 = time.time()
nsuf = "-n{}-i{:x}".format(nid, os.getpid()) if nid else ""
self.magician = Magician()
self.nm = NetMap([], {})
@ -122,6 +124,7 @@ class HttpSrv(object):
self.t_periodic: Optional[threading.Thread] = None
self.u2fh = FHC()
self.metrics = Metrics(self)
self.srvs: list[socket.socket] = []
self.ncli = 0 # exact
self.clients: set[HttpConn] = set() # laggy

163
copyparty/metrics.py Normal file
View file

@ -0,0 +1,163 @@
# coding: utf-8
from __future__ import print_function, unicode_literals
import json
import time
from .__init__ import TYPE_CHECKING
from .util import Pebkac, get_df, unhumanize
if TYPE_CHECKING:
from .httpcli import HttpCli
from .httpsrv import HttpSrv
class Metrics(object):
def __init__(self, hsrv: "HttpSrv") -> None:
self.hsrv = hsrv
def tx(self, cli: "HttpCli") -> bool:
if not cli.avol:
raise Pebkac(403, "not allowed for user " + cli.uname)
args = cli.args
if not args.stats:
raise Pebkac(403, "the stats feature is not enabled in server config")
conn = cli.conn
vfs = conn.asrv.vfs
allvols = list(sorted(vfs.all_vols.items()))
idx = conn.get_u2idx()
if not idx or not hasattr(idx, "p_end"):
idx = None
ret: list[str] = []
def add(name: str, typ: str, v: str, desc: str) -> None:
zs = "# HELP %s %s\n# TYPE %s %s\n%s %s"
ret.append(zs % (name, desc, name, typ, name, v))
def addh(name: str, typ: str, desc: str) -> None:
zs = "# HELP %s %s\n# TYPE %s %s"
ret.append(zs % (name, desc, name, typ))
def addv(name: str, v: str) -> None:
ret.append("%s %s" % (name, v))
v = "{:.3f}".format(time.time() - self.hsrv.t0)
add("cpp_uptime", "counter", v, "time since last server restart")
v = str(len(conn.bans or []))
add("cpp_bans", "counter", v, "number of banned IPs")
if not args.nos_hdd:
addh("cpp_disk_mib", "gauge", "total HDD size (MiB) of volume")
addh("cpp_disk_free", "gauge", "free HDD space (MiB) in volume")
for vpath, vol in allvols:
free, total = get_df(vol.realpath)
v = "{:.3f}".format(total / 1048576.0)
addv('cpp_disk_size{vol="/%s"}' % (vpath), v)
v = "{:.3f}".format(free / 1048576.0)
addv('cpp_disk_free{vol="/%s"}' % (vpath), v)
if idx and not args.nos_vol:
addh("cpp_vol_mib", "gauge", "total MiB in volume")
addh("cpp_vol_files", "gauge", "total num files in volume")
addh("cpp_vol_mib_free", "gauge", "free space (vmaxb) in volume")
addh("cpp_vol_files_free", "gauge", "free space (vmaxn) in volume")
tnbytes = 0
tnfiles = 0
volsizes = []
try:
ptops = [x.realpath for _, x in allvols]
x = self.hsrv.broker.ask("up2k.get_volsizes", ptops)
volsizes = x.get()
except Exception as ex:
cli.log("tx_stats get_volsizes: {!r}".format(ex), 3)
for (vpath, vol), (nbytes, nfiles) in zip(allvols, volsizes):
tnbytes += nbytes
tnfiles += nfiles
v = "{:.3f}".format(nbytes / 1048576.0)
addv('cpp_vol_mib{vol="/%s"}' % (vpath), v)
addv('cpp_vol_files{vol="/%s"}' % (vpath), str(nfiles))
if vol.flags.get("vmaxb") or vol.flags.get("vmaxn"):
zi = unhumanize(vol.flags.get("vmaxb") or "0")
if zi:
v = "{:.3f}".format((zi - nbytes) / 1048576.0)
addv('cpp_vol_mib_free{vol="/%s"}' % (vpath), v)
zi = unhumanize(vol.flags.get("vmaxn") or "0")
if zi:
v = str(zi - nfiles)
addv('cpp_vol_nfiles_free{vol="/%s"}' % (vpath), v)
if volsizes:
v = "{:.3f}".format(tnbytes / 1048576.0)
addv('cpp_vol_mib{vol="total"}', v)
addv('cpp_vol_files{vol="total"}', str(tnfiles))
if idx and not args.nos_dup:
addh("cpp_dupe_mib", "gauge", "num dupe MiB in volume")
addh("cpp_dupe_files", "gauge", "num dupe files in volume")
tnbytes = 0
tnfiles = 0
for vpath, vol in allvols:
cur = idx.get_cur(vol.realpath)
if not cur:
continue
nbytes = 0
nfiles = 0
q = "select sz, count(*)-1 c from up group by w having c"
for sz, c in cur.execute(q):
nbytes += sz * c
nfiles += c
tnbytes += nbytes
tnfiles += nfiles
v = "{:.3f}".format(nbytes / 1048576.0)
addv('cpp_dupe_mib{vol="/%s"}' % (vpath), v)
addv('cpp_dupe_files{vol="/%s"}' % (vpath), str(nfiles))
v = "{:.3f}".format(tnbytes / 1048576.0)
addv('cpp_dupe_mib{vol="total"}', v)
addv('cpp_dupe_files{vol="total"}', str(tnfiles))
if not args.nos_unf:
addh("cpp_unf_mib", "gauge", "incoming/unfinished uploads (MiB)")
addh("cpp_unf_files", "gauge", "incoming/unfinished uploads (num files)")
tnbytes = 0
tnfiles = 0
try:
x = self.hsrv.broker.ask("up2k.get_unfinished")
xs = x.get()
xj = json.loads(xs)
for ptop, (nbytes, nfiles) in xj.items():
tnbytes += nbytes
tnfiles += nfiles
vol = next((x[1] for x in allvols if x[1].realpath == ptop), None)
if not vol:
t = "tx_stats get_unfinished: could not map {}"
cli.log(t.format(ptop), 3)
continue
v = "{:.3f}".format(nbytes / 1048576.0)
addv('cpp_unf_mib{vol="/%s"}' % (vol.vpath), v)
addv('cpp_unf_files{vol="/%s"}' % (vol.vpath), str(nfiles))
v = "{:.3f}".format(tnbytes / 1048576.0)
addv('cpp_unf_mib{vol="total"}', v)
addv('cpp_unf_files{vol="total"}', str(tnfiles))
except Exception as ex:
cli.log("tx_stats get_unfinished: {!r}".format(ex), 3)
cli.reply("\n".join(ret).encode("utf-8"), mime="text/plain")
return True

View file

@ -267,11 +267,49 @@ class Up2k(object):
}
return json.dumps(ret, indent=4)
def get_unfinished(self) -> str:
if PY2 or not self.mutex.acquire(timeout=0.5):
return "{}"
ret: dict[str, tuple[int, int]] = {}
try:
for ptop, tab2 in self.registry.items():
nbytes = 0
nfiles = 0
drp = self.droppable.get(ptop, {})
for wark, job in tab2.items():
if wark in drp:
continue
nfiles += 1
try:
# close enough on average
nbytes += len(job["need"]) * job["size"] // len(job["hash"])
except:
pass
ret[ptop] = (nbytes, nfiles)
finally:
self.mutex.release()
return json.dumps(ret, indent=4)
def get_volsize(self, ptop: str) -> tuple[int, int]:
with self.mutex:
return self._get_volsize(ptop)
def get_volsizes(self, ptops: list[str]) -> list[tuple[int, int]]:
ret = []
with self.mutex:
for ptop in ptops:
ret.append(self._get_volsize(ptop))
return ret
def _get_volsize(self, ptop: str) -> tuple[int, int]:
if "e2ds" not in self.flags.get(ptop, {}):
return (0, 0)
cur = self.cur[ptop]
nbytes = self.volsize[cur]
nfiles = self.volnfiles[cur]
@ -946,7 +984,11 @@ class Up2k(object):
db.c.connection.commit()
if vol.flags.get("vmaxb") or vol.flags.get("vmaxn"):
if (
vol.flags.get("vmaxb")
or vol.flags.get("vmaxn")
or (self.args.stats and not self.args.nos_vol)
):
zs = "select count(sz), sum(sz) from up"
vn, vb = db.c.execute(zs).fetchone()
vb = vb or 0

View file

@ -21,6 +21,7 @@ copyparty/httpconn.py,
copyparty/httpsrv.py,
copyparty/ico.py,
copyparty/mdns.py,
copyparty/metrics.py,
copyparty/mtag.py,
copyparty/multicast.py,
copyparty/pwhash.py,

View file

@ -12,7 +12,7 @@ import tempfile
import unittest
from tests import util as tu
from tests.util import Cfg
from tests.util import Cfg, eprint
from copyparty.authsrv import AuthSrv
from copyparty.httpcli import HttpCli
@ -93,7 +93,7 @@ class TestHttpCli(unittest.TestCase):
res = "ok " + fp in ret
print("[{}] {} {} = {}".format(fp, rok, wok, res))
if rok != res:
print("\033[33m{}\n# {}\033[0m".format(ret, furl))
eprint("\033[33m{}\n# {}\033[0m".format(ret, furl))
self.fail()
# file browser: html
@ -101,7 +101,7 @@ class TestHttpCli(unittest.TestCase):
res = "'{}'".format(self.fn) in ret
print(res)
if rok != res:
print("\033[33m{}\n# {}\033[0m".format(ret, durl))
eprint("\033[33m{}\n# {}\033[0m".format(ret, durl))
self.fail()
# file browser: json
@ -110,7 +110,7 @@ class TestHttpCli(unittest.TestCase):
res = '"{}"'.format(self.fn) in ret
print(res)
if rok != res:
print("\033[33m{}\n# {}\033[0m".format(ret, url))
eprint("\033[33m{}\n# {}\033[0m".format(ret, url))
self.fail()
# tar
@ -132,7 +132,9 @@ class TestHttpCli(unittest.TestCase):
if durl.split("/")[-1] in self.can_read:
ref = [x for x in vfiles if self.in_dive(top + "/" + durl, x)]
for f in ref:
print("{}: {}".format("ok" if f in tar_ok else "NG", f))
ok = f in tar_ok
pr = print if ok else eprint
pr("{}: {}".format("ok" if ok else "NG", f))
ref.sort()
tar_ok.sort()
self.assertEqual(ref, tar_ok)

View file

@ -1,3 +1,7 @@
#!/usr/bin/env python3
# coding: utf-8
from __future__ import print_function, unicode_literals
import os
import re
import sys
@ -23,6 +27,12 @@ def nah(*a, **ka):
return False
def eprint(*a, **ka):
ka["file"] = sys.stderr
print(*a, **ka)
sys.stderr.flush()
if MACOS:
import posixpath
@ -114,7 +124,7 @@ class Cfg(Namespace):
ex = "df loris re_maxage rproxy rsp_jtr rsp_slp s_wr_slp theme themes turbo"
ka.update(**{k: 0 for k in ex.split()})
ex = "ah_alg doctitle favico html_head lg_sbf log_fk md_sbf mth name textfiles unlist vname R RS SR"
ex = "ah_alg bname doctitle favico html_head lg_sbf log_fk md_sbf mth name textfiles unlist vname R RS SR"
ka.update(**{k: "" for k in ex.split()})
ex = "on403 on404 xad xar xau xban xbd xbr xbu xiu xm"