mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
add more prometheus metrics; breaking changes:
* cpp_uptime is now a gauge * cpp_bans is now cpp_active_bans (and also a gauge) and other related fixes: * stop emitting invalid cpp_disk_size/free for offline volumes * support overriding the spec-mandatory mimetype with ?mime=foo
This commit is contained in:
parent
2e85a25614
commit
4b720f4150
22
README.md
22
README.md
|
@ -1304,8 +1304,23 @@ scrape_configs:
|
||||||
```
|
```
|
||||||
|
|
||||||
currently the following metrics are available,
|
currently the following metrics are available,
|
||||||
* `cpp_uptime_seconds`
|
* `cpp_uptime_seconds` time since last copyparty restart
|
||||||
* `cpp_bans` number of banned IPs
|
* `cpp_boot_unixtime_seconds` same but as an absolute timestamp
|
||||||
|
* `cpp_http_conns` number of open http(s) connections
|
||||||
|
* `cpp_http_reqs` number of http(s) requests handled
|
||||||
|
* `cpp_sus_reqs` number of 403/422/malicious requests
|
||||||
|
* `cpp_active_bans` number of currently banned IPs
|
||||||
|
* `cpp_total_bans` number of IPs banned since last restart
|
||||||
|
|
||||||
|
these are available unless `--nos-vst` is specified:
|
||||||
|
* `cpp_db_idle_seconds` time since last database activity (upload/rename/delete)
|
||||||
|
* `cpp_db_act_seconds` same but as an absolute timestamp
|
||||||
|
* `cpp_idle_vols` number of volumes which are idle / ready
|
||||||
|
* `cpp_busy_vols` number of volumes which are busy / indexing
|
||||||
|
* `cpp_offline_vols` number of volumes which are offline / unavailable
|
||||||
|
* `cpp_hashing_files` number of files queued for hashing / indexing
|
||||||
|
* `cpp_tagq_files` number of files queued for metadata scanning
|
||||||
|
* `cpp_mtpq_files` number of files queued for plugin-based analysis
|
||||||
|
|
||||||
and these are available per-volume only:
|
and these are available per-volume only:
|
||||||
* `cpp_disk_size_bytes` total HDD size
|
* `cpp_disk_size_bytes` total HDD size
|
||||||
|
@ -1324,9 +1339,12 @@ some of the metrics have additional requirements to function correctly,
|
||||||
the following options are available to disable some of the metrics:
|
the following options are available to disable some of the metrics:
|
||||||
* `--nos-hdd` disables `cpp_disk_*` which can prevent spinning up HDDs
|
* `--nos-hdd` disables `cpp_disk_*` which can prevent spinning up HDDs
|
||||||
* `--nos-vol` disables `cpp_vol_*` which reduces server startup time
|
* `--nos-vol` disables `cpp_vol_*` which reduces server startup time
|
||||||
|
* `--nos-vst` disables volume state, reducing the worst-case prometheus query time by 0.5 sec
|
||||||
* `--nos-dup` disables `cpp_dupe_*` which reduces the server load caused by prometheus queries
|
* `--nos-dup` disables `cpp_dupe_*` which reduces the server load caused by prometheus queries
|
||||||
* `--nos-unf` disables `cpp_unf_*` for no particular purpose
|
* `--nos-unf` disables `cpp_unf_*` for no particular purpose
|
||||||
|
|
||||||
|
note: the following metrics are counted incorrectly if multiprocessing is enabled with `-j`: `cpp_http_conns`, `cpp_http_reqs`, `cpp_sus_reqs`, `cpp_active_bans`, `cpp_total_bans`
|
||||||
|
|
||||||
|
|
||||||
# packages
|
# packages
|
||||||
|
|
||||||
|
|
|
@ -1014,6 +1014,7 @@ def add_stats(ap):
|
||||||
ap2.add_argument("--stats", action="store_true", help="enable openmetrics at /.cpr/metrics for admin accounts")
|
ap2.add_argument("--stats", action="store_true", help="enable openmetrics at /.cpr/metrics for admin accounts")
|
||||||
ap2.add_argument("--nos-hdd", action="store_true", help="disable disk-space metrics (used/free space)")
|
ap2.add_argument("--nos-hdd", action="store_true", help="disable disk-space metrics (used/free space)")
|
||||||
ap2.add_argument("--nos-vol", action="store_true", help="disable volume size metrics (num files, total bytes, vmaxb/vmaxn)")
|
ap2.add_argument("--nos-vol", action="store_true", help="disable volume size metrics (num files, total bytes, vmaxb/vmaxn)")
|
||||||
|
ap2.add_argument("--nos-vst", action="store_true", help="disable volume state metrics (indexing, analyzing, activity)")
|
||||||
ap2.add_argument("--nos-dup", action="store_true", help="disable dupe-files metrics (good idea; very slow)")
|
ap2.add_argument("--nos-dup", action="store_true", help="disable dupe-files metrics (good idea; very slow)")
|
||||||
ap2.add_argument("--nos-unf", action="store_true", help="disable unfinished-uploads metrics")
|
ap2.add_argument("--nos-unf", action="store_true", help="disable unfinished-uploads metrics")
|
||||||
|
|
||||||
|
|
|
@ -92,6 +92,12 @@ class FtpAuth(DummyAuthorizer):
|
||||||
if bonk:
|
if bonk:
|
||||||
logging.warning("client banned: invalid passwords")
|
logging.warning("client banned: invalid passwords")
|
||||||
bans[ip] = bonk
|
bans[ip] = bonk
|
||||||
|
try:
|
||||||
|
# only possible if multiprocessing disabled
|
||||||
|
self.hub.broker.httpsrv.bans[ip] = bonk
|
||||||
|
self.hub.broker.httpsrv.nban += 1
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
raise AuthenticationFailed("Authentication failed.")
|
raise AuthenticationFailed("Authentication failed.")
|
||||||
|
|
||||||
|
|
|
@ -277,6 +277,8 @@ class HttpCli(object):
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
self.conn.hsrv.nreq += 1
|
||||||
|
|
||||||
self.ua = self.headers.get("user-agent", "")
|
self.ua = self.headers.get("user-agent", "")
|
||||||
self.is_rclone = self.ua.startswith("rclone/")
|
self.is_rclone = self.ua.startswith("rclone/")
|
||||||
|
|
||||||
|
@ -567,6 +569,7 @@ class HttpCli(object):
|
||||||
return self.conn.iphash.s(self.ip)
|
return self.conn.iphash.s(self.ip)
|
||||||
|
|
||||||
def cbonk(self, g: Garda, v: str, reason: str, descr: str) -> bool:
|
def cbonk(self, g: Garda, v: str, reason: str, descr: str) -> bool:
|
||||||
|
self.conn.hsrv.nsus += 1
|
||||||
if not g.lim:
|
if not g.lim:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -590,6 +593,7 @@ class HttpCli(object):
|
||||||
):
|
):
|
||||||
self.log("client banned: %s" % (descr,), 1)
|
self.log("client banned: %s" % (descr,), 1)
|
||||||
self.conn.hsrv.bans[ip] = bonk
|
self.conn.hsrv.bans[ip] = bonk
|
||||||
|
self.conn.hsrv.nban += 1
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -128,6 +128,9 @@ class HttpSrv(object):
|
||||||
|
|
||||||
self.u2fh = FHC()
|
self.u2fh = FHC()
|
||||||
self.metrics = Metrics(self)
|
self.metrics = Metrics(self)
|
||||||
|
self.nreq = 0
|
||||||
|
self.nsus = 0
|
||||||
|
self.nban = 0
|
||||||
self.srvs: list[socket.socket] = []
|
self.srvs: list[socket.socket] = []
|
||||||
self.ncli = 0 # exact
|
self.ncli = 0 # exact
|
||||||
self.clients: set[HttpConn] = set() # laggy
|
self.clients: set[HttpConn] = set() # laggy
|
||||||
|
|
|
@ -34,14 +34,23 @@ class Metrics(object):
|
||||||
|
|
||||||
ret: list[str] = []
|
ret: list[str] = []
|
||||||
|
|
||||||
def addc(k: str, unit: str, v: str, desc: str) -> None:
|
def addc(k: str, v: str, desc: str) -> None:
|
||||||
if unit:
|
zs = "# TYPE %s counter\n# HELP %s %s\n%s_created %s\n%s_total %s"
|
||||||
k += "_" + unit
|
ret.append(zs % (k, k, desc, k, int(self.hsrv.t0), k, v))
|
||||||
zs = "# TYPE %s counter\n# UNIT %s %s\n# HELP %s %s\n%s_created %s\n%s_total %s"
|
|
||||||
ret.append(zs % (k, k, unit, k, desc, k, int(self.hsrv.t0), k, v))
|
def adduc(k: str, unit: str, v: str, desc: str) -> None:
|
||||||
else:
|
k += "_" + unit
|
||||||
zs = "# TYPE %s counter\n# HELP %s %s\n%s_created %s\n%s_total %s"
|
zs = "# TYPE %s counter\n# UNIT %s %s\n# HELP %s %s\n%s_created %s\n%s_total %s"
|
||||||
ret.append(zs % (k, k, desc, k, int(self.hsrv.t0), k, v))
|
ret.append(zs % (k, k, unit, k, desc, k, int(self.hsrv.t0), k, v))
|
||||||
|
|
||||||
|
def addg(k: str, v: str, desc: str) -> None:
|
||||||
|
zs = "# TYPE %s gauge\n# HELP %s %s\n%s %s"
|
||||||
|
ret.append(zs % (k, k, desc, k, v))
|
||||||
|
|
||||||
|
def addug(k: str, unit: str, v: str, desc: str) -> None:
|
||||||
|
k += "_" + unit
|
||||||
|
zs = "# TYPE %s gauge\n# UNIT %s %s\n# HELP %s %s\n%s %s"
|
||||||
|
ret.append(zs % (k, k, unit, k, desc, k, v))
|
||||||
|
|
||||||
def addh(k: str, typ: str, desc: str) -> None:
|
def addh(k: str, typ: str, desc: str) -> None:
|
||||||
zs = "# TYPE %s %s\n# HELP %s %s"
|
zs = "# TYPE %s %s\n# HELP %s %s"
|
||||||
|
@ -54,17 +63,75 @@ class Metrics(object):
|
||||||
def addv(k: str, v: str) -> None:
|
def addv(k: str, v: str) -> None:
|
||||||
ret.append("%s %s" % (k, v))
|
ret.append("%s %s" % (k, v))
|
||||||
|
|
||||||
|
t = "time since last copyparty restart"
|
||||||
v = "{:.3f}".format(time.time() - self.hsrv.t0)
|
v = "{:.3f}".format(time.time() - self.hsrv.t0)
|
||||||
addc("cpp_uptime", "seconds", v, "time since last server restart")
|
addug("cpp_uptime", "seconds", v, t)
|
||||||
|
|
||||||
|
# timestamps are gauges because initial value is not zero
|
||||||
|
t = "unixtime of last copyparty restart"
|
||||||
|
v = "{:.3f}".format(self.hsrv.t0)
|
||||||
|
addug("cpp_boot_unixtime", "seconds", v, t)
|
||||||
|
|
||||||
|
t = "number of open http(s) client connections"
|
||||||
|
addg("cpp_http_conns", str(self.hsrv.ncli), t)
|
||||||
|
|
||||||
|
t = "number of http(s) requests since last restart"
|
||||||
|
addc("cpp_http_reqs", str(self.hsrv.nreq), t)
|
||||||
|
|
||||||
|
t = "number of 403/422/malicious reqs since restart"
|
||||||
|
addc("cpp_sus_reqs", str(self.hsrv.nsus), t)
|
||||||
|
|
||||||
v = str(len(conn.bans or []))
|
v = str(len(conn.bans or []))
|
||||||
addc("cpp_bans", "", v, "number of banned IPs")
|
addg("cpp_active_bans", v, "number of currently banned IPs")
|
||||||
|
|
||||||
|
t = "number of IPs banned since last restart"
|
||||||
|
addg("cpp_total_bans", str(self.hsrv.nban), t)
|
||||||
|
|
||||||
|
if not args.nos_vst:
|
||||||
|
x = self.hsrv.broker.ask("up2k.get_state")
|
||||||
|
vs = json.loads(x.get())
|
||||||
|
|
||||||
|
nvidle = 0
|
||||||
|
nvbusy = 0
|
||||||
|
nvoffline = 0
|
||||||
|
for v in vs["volstate"].values():
|
||||||
|
if v == "online, idle":
|
||||||
|
nvidle += 1
|
||||||
|
elif "OFFLINE" in v:
|
||||||
|
nvoffline += 1
|
||||||
|
else:
|
||||||
|
nvbusy += 1
|
||||||
|
|
||||||
|
addg("cpp_idle_vols", str(nvidle), "number of idle/ready volumes")
|
||||||
|
addg("cpp_busy_vols", str(nvbusy), "number of busy/indexing volumes")
|
||||||
|
addg("cpp_offline_vols", str(nvoffline), "number of offline volumes")
|
||||||
|
|
||||||
|
t = "time since last database activity (upload/rename/delete)"
|
||||||
|
addug("cpp_db_idle", "seconds", str(vs["dbwt"]), t)
|
||||||
|
|
||||||
|
t = "unixtime of last database activity (upload/rename/delete)"
|
||||||
|
addug("cpp_db_act", "seconds", str(vs["dbwu"]), t)
|
||||||
|
|
||||||
|
t = "number of files queued for hashing/indexing"
|
||||||
|
addg("cpp_hashing_files", str(vs["hashq"]), t)
|
||||||
|
|
||||||
|
t = "number of files queued for metadata scanning"
|
||||||
|
addg("cpp_tagq_files", str(vs["tagq"]), t)
|
||||||
|
|
||||||
|
try:
|
||||||
|
t = "number of files queued for plugin-based analysis"
|
||||||
|
addg("cpp_mtpq_files", str(int(vs["mtpq"])), t)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
if not args.nos_hdd:
|
if not args.nos_hdd:
|
||||||
addbh("cpp_disk_size_bytes", "total HDD size of volume")
|
addbh("cpp_disk_size_bytes", "total HDD size of volume")
|
||||||
addbh("cpp_disk_free_bytes", "free HDD space in volume")
|
addbh("cpp_disk_free_bytes", "free HDD space in volume")
|
||||||
for vpath, vol in allvols:
|
for vpath, vol in allvols:
|
||||||
free, total = get_df(vol.realpath)
|
free, total = get_df(vol.realpath)
|
||||||
|
if free is None or total is None:
|
||||||
|
continue
|
||||||
|
|
||||||
addv('cpp_disk_size_bytes{vol="/%s"}' % (vpath), str(total))
|
addv('cpp_disk_size_bytes{vol="/%s"}' % (vpath), str(total))
|
||||||
addv('cpp_disk_free_bytes{vol="/%s"}' % (vpath), str(free))
|
addv('cpp_disk_free_bytes{vol="/%s"}' % (vpath), str(free))
|
||||||
|
|
||||||
|
@ -161,5 +228,6 @@ class Metrics(object):
|
||||||
ret.append("# EOF")
|
ret.append("# EOF")
|
||||||
|
|
||||||
mime = "application/openmetrics-text; version=1.0.0; charset=utf-8"
|
mime = "application/openmetrics-text; version=1.0.0; charset=utf-8"
|
||||||
|
mime = cli.uparam.get("mime") or mime
|
||||||
cli.reply("\n".join(ret).encode("utf-8"), mime=mime)
|
cli.reply("\n".join(ret).encode("utf-8"), mime=mime)
|
||||||
return True
|
return True
|
||||||
|
|
|
@ -266,6 +266,7 @@ class Up2k(object):
|
||||||
"hashq": self.n_hashq,
|
"hashq": self.n_hashq,
|
||||||
"tagq": self.n_tagq,
|
"tagq": self.n_tagq,
|
||||||
"mtpq": mtpq,
|
"mtpq": mtpq,
|
||||||
|
"dbwu": "{:.2f}".format(self.db_act),
|
||||||
"dbwt": "{:.2f}".format(
|
"dbwt": "{:.2f}".format(
|
||||||
min(1000 * 24 * 60 * 60 - 1, time.time() - self.db_act)
|
min(1000 * 24 * 60 * 60 - 1, time.time() - self.db_act)
|
||||||
),
|
),
|
||||||
|
|
Loading…
Reference in a new issue