diff --git a/README.md b/README.md index ef333e44..a3345d0a 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ turn almost any device into a file server with resumable uploads/downloads using * [prometheus](#prometheus) - metrics/stats can be enabled * [other extremely specific features](#other-extremely-specific-features) - you'll never find a use for these * [custom mimetypes](#custom-mimetypes) - change the association of a file extension + * [GDPR compliance](#GDPR-compliance) - imagine using copyparty professionally... * [feature chickenbits](#feature-chickenbits) - buggy feature? rip it out * [packages](#packages) - the party might be closer than you think * [arch package](#arch-package) - now [available on aur](https://aur.archlinux.org/packages/copyparty) maintained by [@icxes](https://github.com/icxes) @@ -2103,6 +2104,18 @@ in a config file, this is the same as: run copyparty with `--mimes` to list all the default mappings +### GDPR compliance + +imagine using copyparty professionally... **TINLA/IANAL; EU laws are hella confusing** + +* remember to disable logging, or configure logrotation to an acceptable timeframe with `-lo cpp-%Y-%m%d.txt.xz` or similar + +* if running with the database enabled (recommended), then have it forget uploader-IPs after some time using `--forget-ip 43200` + * don't set it too low; [unposting](#unpost) a file is no longer possible after this takes effect + +* if you actually *are* a lawyer then I'm open for feedback, would be fun + + ### feature chickenbits buggy feature? rip it out by setting any of the following environment variables to disable its associated bell or whistle, diff --git a/copyparty/__main__.py b/copyparty/__main__.py index e4cb3ef3..2b242251 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -1269,7 +1269,7 @@ def add_optouts(ap): ap2.add_argument("--no-tarcmp", action="store_true", help="disable download as compressed tar (?tar=gz, ?tar=bz2, ?tar=xz, ?tar=gz:9, ...)") ap2.add_argument("--no-lifetime", action="store_true", help="do not allow clients (or server config) to schedule an upload to be deleted after a given time") ap2.add_argument("--no-pipe", action="store_true", help="disable race-the-beam (lockstep download of files which are currently being uploaded) (volflag=nopipe)") - ap2.add_argument("--no-db-ip", action="store_true", help="do not write uploader IPs into the database") + ap2.add_argument("--no-db-ip", action="store_true", help="do not write uploader-IP into the database; will also disable unpost, you may want \033[32m--forget-ip\033[0m instead (volflag=no_db_ip)") def add_safety(ap): @@ -1419,6 +1419,7 @@ def add_db_general(ap, hcores): ap2.add_argument("--no-dhash", action="store_true", help="disable rescan acceleration; do full database integrity check -- makes the db ~5%% smaller and bootup/rescans 3~10x slower") ap2.add_argument("--re-dhash", action="store_true", help="force a cache rebuild on startup; enable this once if it gets out of sync (should never be necessary)") ap2.add_argument("--no-forget", action="store_true", help="never forget indexed files, even when deleted from disk -- makes it impossible to ever upload the same file twice -- only useful for offloading uploads to a cloud service or something (volflag=noforget)") + ap2.add_argument("--forget-ip", metavar="MIN", type=int, default=0, help="remove uploader-IP from database (and make unpost impossible) \033[33mMIN\033[0m minutes after upload, for GDPR reasons. Default [\033[32m0\033[0m] is never-forget. [\033[32m1440\033[0m]=day, [\033[32m10080\033[0m]=week, [\033[32m43200\033[0m]=month. (volflag=forget_ip)") ap2.add_argument("--dbd", metavar="PROFILE", default="wal", help="database durability profile; sets the tradeoff between robustness and speed, see \033[33m--help-dbd\033[0m (volflag=dbd)") ap2.add_argument("--xlink", action="store_true", help="on upload: check all volumes for dupes, not just the target volume (probably buggy, not recommended) (volflag=xlink)") ap2.add_argument("--hash-mt", metavar="CORES", type=int, default=hcores, help="num cpu cores to use for file hashing; set 0 or 1 for single-core hashing") diff --git a/copyparty/authsrv.py b/copyparty/authsrv.py index 6ccaafe7..a4b34a71 100644 --- a/copyparty/authsrv.py +++ b/copyparty/authsrv.py @@ -1960,7 +1960,8 @@ class AuthSrv(object): if k not in vol.flags: vol.flags[k] = getattr(self.args, k) - for k in ("nrand", "u2abort", "ups_who", "zip_who"): + zs = "forget_ip nrand u2abort ups_who zip_who" + for k in zs.split(): if k in vol.flags: vol.flags[k] = int(vol.flags[k]) diff --git a/copyparty/cfg.py b/copyparty/cfg.py index fbf053dc..2d4a375b 100644 --- a/copyparty/cfg.py +++ b/copyparty/cfg.py @@ -43,6 +43,7 @@ def vf_bmap() -> dict[str, str]: "gsel", "hardlink", "magic", + "no_db_ip", "no_sb_md", "no_sb_lg", "nsort", @@ -73,6 +74,7 @@ def vf_vmap() -> dict[str, str]: } for k in ( "dbd", + "forget_ip", "hsortn", "html_head", "lg_sbf", @@ -198,6 +200,8 @@ flagcats = { "nohash=\\.iso$": "skips hashing file contents if path matches *.iso", "noidx=\\.iso$": "fully ignores the contents at paths matching *.iso", "noforget": "don't forget files when deleted from disk", + "forget_ip=43200": "forget uploader-IP after 30 days (GDPR)", + "no_db_ip": "never store uploader-IP in the db; disables unpost", "fat32": "avoid excessive reindexing on android sdcardfs", "dbd=[acid|swal|wal|yolo]": "database speed-durability tradeoff", "xlink": "cross-volume dupe detection / linking (dangerous)", diff --git a/copyparty/up2k.py b/copyparty/up2k.py index f8e20842..8b832ef1 100644 --- a/copyparty/up2k.py +++ b/copyparty/up2k.py @@ -557,6 +557,7 @@ class Up2k(object): else: # important; not deferred by db_act timeout = self._check_lifetimes() + timeout = min(self._check_forget_ip(), timeout) try: if self.args.shr: timeout = min(self._check_shares(), timeout) @@ -617,6 +618,43 @@ class Up2k(object): for v in vols: volage[v] = now + def _check_forget_ip(self) -> float: + now = time.time() + timeout = now + 9001 + for vp, vol in sorted(self.vfs.all_vols.items()): + maxage = vol.flags["forget_ip"] + if not maxage: + continue + + cur = self.cur.get(vol.realpath) + if not cur: + continue + + cutoff = now - maxage * 60 + + for _ in range(2): + q = "select ip, at from up where ip > '' order by +at limit 1" + hits = cur.execute(q).fetchall() + if not hits: + break + + remains = hits[0][1] - cutoff + if remains > 0: + timeout = min(timeout, now + remains) + break + + q = "update up set ip = '' where ip > '' and at <= %d" + cur.execute(q % (cutoff,)) + zi = cur.rowcount + cur.connection.commit() + + t = "forget-ip(%d) removed %d IPs from db [/%s]" + self.log(t % (maxage, zi, vol.vpath)) + + timeout = min(timeout, now + 900) + + return timeout + def _check_lifetimes(self) -> float: now = time.time() timeout = now + 9001 @@ -3789,7 +3827,7 @@ class Up2k(object): db_ip = "" else: # plugins may expect this to look like an actual IP - db_ip = "1.1.1.1" if self.args.no_db_ip else ip + db_ip = "1.1.1.1" if "no_db_ip" in vflags else ip sql = "insert into up values (?,?,?,?,?,?,?)" v = (dwark, int(ts), sz, rd, fn, db_ip, int(at or 0)) diff --git a/tests/util.py b/tests/util.py index 30d1e7fd..35485a74 100644 --- a/tests/util.py +++ b/tests/util.py @@ -144,7 +144,7 @@ class Cfg(Namespace): ex = "au_vol dl_list mtab_age reg_cap s_thead s_tbody th_convt ups_who zip_who" ka.update(**{k: 9 for k in ex.split()}) - ex = "db_act k304 loris no304 re_maxage rproxy rsp_jtr rsp_slp s_wr_slp snap_wri theme themes turbo" + ex = "db_act forget_ip k304 loris no304 re_maxage rproxy rsp_jtr rsp_slp s_wr_slp snap_wri theme themes turbo" ka.update(**{k: 0 for k in ex.split()}) ex = "ah_alg bname chpw_db doctitle df exit favico idp_h_usr ipa html_head lg_sba lg_sbf log_fk md_sba md_sbf name og_desc og_site og_th og_title og_title_a og_title_v og_title_i shr tcolor textfiles unlist vname xff_src R RS SR"