mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
add reflink-based dedup; closes #201
This commit is contained in:
parent
674fc1fe08
commit
df9feabcf8
|
@ -1439,12 +1439,17 @@ if you enable deduplication with `--dedup` then it'll create a symlink instead o
|
||||||
**warning:** when enabling dedup, you should also:
|
**warning:** when enabling dedup, you should also:
|
||||||
* enable indexing with `-e2dsa` or volflag `e2dsa` (see [file indexing](#file-indexing) section below); strongly recommended
|
* enable indexing with `-e2dsa` or volflag `e2dsa` (see [file indexing](#file-indexing) section below); strongly recommended
|
||||||
* ...and/or `--hardlink-only` to use hardlink-based deduplication instead of symlinks; see explanation below
|
* ...and/or `--hardlink-only` to use hardlink-based deduplication instead of symlinks; see explanation below
|
||||||
|
* ...and/or `--reflink` to use CoW/reflink-based dedup (much safer than hardlink, but OS/FS-dependent)
|
||||||
|
|
||||||
it will not be safe to rename/delete files if you only enable dedup and none of the above; if you enable indexing then it is not *necessary* to also do hardlinks (but you may still want to)
|
it will not be safe to rename/delete files if you only enable dedup and none of the above; if you enable indexing then it is not *necessary* to also do hardlinks (but you may still want to)
|
||||||
|
|
||||||
by default, deduplication is done based on symlinks (symbolic links); these are tiny files which are pointers to the nearest full copy of the file
|
by default, deduplication is done based on symlinks (symbolic links); these are tiny files which are pointers to the nearest full copy of the file
|
||||||
|
|
||||||
you can choose to use hardlinks instead of softlinks, globally with `--hardlink-only` or volflag `hardlinkonly`;
|
you can choose to use hardlinks instead of softlinks, globally with `--hardlink-only` or volflag `hardlinkonly`, and you can choose to use reflinks with `--reflink` or volflag `reflink`
|
||||||
|
|
||||||
|
advantages of using reflinks (CoW, copy-on-write):
|
||||||
|
* entirely safe (when your filesystem supports it correctly); either file can be edited or deleted without affecting other copies
|
||||||
|
* only linux 5.3 or newer, only python 3.14 or newer, only some filesystems (btrfs probably ok, maybe xfs too, but zfs had bugs)
|
||||||
|
|
||||||
advantages of using hardlinks:
|
advantages of using hardlinks:
|
||||||
* hardlinks are more compatible with other software; they behave entirely like regular files
|
* hardlinks are more compatible with other software; they behave entirely like regular files
|
||||||
|
|
|
@ -1056,6 +1056,7 @@ def add_upload(ap):
|
||||||
ap2.add_argument("--safe-dedup", metavar="N", type=int, default=50, help="how careful to be when deduplicating files; [\033[32m1\033[0m] = just verify the filesize, [\033[32m50\033[0m] = verify file contents have not been altered (volflag=safededup)")
|
ap2.add_argument("--safe-dedup", metavar="N", type=int, default=50, help="how careful to be when deduplicating files; [\033[32m1\033[0m] = just verify the filesize, [\033[32m50\033[0m] = verify file contents have not been altered (volflag=safededup)")
|
||||||
ap2.add_argument("--hardlink", action="store_true", help="enable hardlink-based dedup; will fallback on symlinks when that is impossible (across filesystems) (volflag=hardlink)")
|
ap2.add_argument("--hardlink", action="store_true", help="enable hardlink-based dedup; will fallback on symlinks when that is impossible (across filesystems) (volflag=hardlink)")
|
||||||
ap2.add_argument("--hardlink-only", action="store_true", help="do not fallback to symlinks when a hardlink cannot be made (volflag=hardlinkonly)")
|
ap2.add_argument("--hardlink-only", action="store_true", help="do not fallback to symlinks when a hardlink cannot be made (volflag=hardlinkonly)")
|
||||||
|
ap2.add_argument("--reflink", action="store_true", help="enable reflink-based dedup; will fallback on full copies when that is impossible (non-CoW filesystem) (volflag=reflink)")
|
||||||
ap2.add_argument("--no-dupe", action="store_true", help="reject duplicate files during upload; only matches within the same volume (volflag=nodupe)")
|
ap2.add_argument("--no-dupe", action="store_true", help="reject duplicate files during upload; only matches within the same volume (volflag=nodupe)")
|
||||||
ap2.add_argument("--no-clone", action="store_true", help="do not use existing data on disk to satisfy dupe uploads; reduces server HDD reads in exchange for much more network load (volflag=noclone)")
|
ap2.add_argument("--no-clone", action="store_true", help="do not use existing data on disk to satisfy dupe uploads; reduces server HDD reads in exchange for much more network load (volflag=noclone)")
|
||||||
ap2.add_argument("--no-snap", action="store_true", help="disable snapshots -- forget unfinished uploads on shutdown; don't create .hist/up2k.snap files -- abandoned/interrupted uploads must be cleaned up manually")
|
ap2.add_argument("--no-snap", action="store_true", help="disable snapshots -- forget unfinished uploads on shutdown; don't create .hist/up2k.snap files -- abandoned/interrupted uploads must be cleaned up manually")
|
||||||
|
|
|
@ -2124,6 +2124,7 @@ class AuthSrv(object):
|
||||||
all_mte = {}
|
all_mte = {}
|
||||||
errors = False
|
errors = False
|
||||||
free_umask = False
|
free_umask = False
|
||||||
|
have_reflink = False
|
||||||
for vol in vfs.all_nodes.values():
|
for vol in vfs.all_nodes.values():
|
||||||
if (self.args.e2ds and vol.axs.uwrite) or self.args.e2dsa:
|
if (self.args.e2ds and vol.axs.uwrite) or self.args.e2dsa:
|
||||||
vol.flags["e2ds"] = True
|
vol.flags["e2ds"] = True
|
||||||
|
@ -2207,6 +2208,9 @@ class AuthSrv(object):
|
||||||
if "unlistcr" in vol.flags or "unlistcw" in vol.flags:
|
if "unlistcr" in vol.flags or "unlistcw" in vol.flags:
|
||||||
self.args.have_unlistc = True
|
self.args.have_unlistc = True
|
||||||
|
|
||||||
|
if "reflink" in vol.flags:
|
||||||
|
have_reflink = True
|
||||||
|
|
||||||
zs = str(vol.flags.get("tcolor", "")).lstrip("#")
|
zs = str(vol.flags.get("tcolor", "")).lstrip("#")
|
||||||
if len(zs) == 3: # fc5 => ffcc55
|
if len(zs) == 3: # fc5 => ffcc55
|
||||||
vol.flags["tcolor"] = "".join([x * 2 for x in zs])
|
vol.flags["tcolor"] = "".join([x * 2 for x in zs])
|
||||||
|
@ -2571,6 +2575,13 @@ class AuthSrv(object):
|
||||||
t = "WARNING! The following IdP volumes are mounted below another volume where other users can read and/or write files. This is a SECURITY HAZARD!! When copyparty is restarted, it will not know about these IdP volumes yet. These volumes will then be accessible by an unexpected set of permissions UNTIL one of the users associated with their volume sends a request to the server. RECOMMENDATION: You should create a restricted volume where nobody can read/write files, and make sure that all IdP volumes are configured to appear somewhere below that volume."
|
t = "WARNING! The following IdP volumes are mounted below another volume where other users can read and/or write files. This is a SECURITY HAZARD!! When copyparty is restarted, it will not know about these IdP volumes yet. These volumes will then be accessible by an unexpected set of permissions UNTIL one of the users associated with their volume sends a request to the server. RECOMMENDATION: You should create a restricted volume where nobody can read/write files, and make sure that all IdP volumes are configured to appear somewhere below that volume."
|
||||||
self.log(t + "".join(self.idp_err), 1)
|
self.log(t + "".join(self.idp_err), 1)
|
||||||
|
|
||||||
|
if have_reflink:
|
||||||
|
t = "WARNING: Reflink-based dedup was requested, but %s. This will not work; files will be full copies instead."
|
||||||
|
if sys.version_info < (3, 14):
|
||||||
|
self.log(t % "your python version is not new enough", 1)
|
||||||
|
if not sys.platform.startswith("linux"):
|
||||||
|
self.log(t % "your OS is not Linux", 1)
|
||||||
|
|
||||||
self.vfs = vfs
|
self.vfs = vfs
|
||||||
self.acct = acct
|
self.acct = acct
|
||||||
self.defpw = defpw
|
self.defpw = defpw
|
||||||
|
|
|
@ -52,6 +52,7 @@ def vf_bmap() -> dict[str, str]:
|
||||||
"og_no_head",
|
"og_no_head",
|
||||||
"og_s_title",
|
"og_s_title",
|
||||||
"rand",
|
"rand",
|
||||||
|
"reflink",
|
||||||
"rmagic",
|
"rmagic",
|
||||||
"rss",
|
"rss",
|
||||||
"wo_up_readme",
|
"wo_up_readme",
|
||||||
|
@ -168,6 +169,7 @@ flagcats = {
|
||||||
"dedup": "enable symlink-based file deduplication",
|
"dedup": "enable symlink-based file deduplication",
|
||||||
"hardlink": "enable hardlink-based file deduplication,\nwith fallback on symlinks when that is impossible",
|
"hardlink": "enable hardlink-based file deduplication,\nwith fallback on symlinks when that is impossible",
|
||||||
"hardlinkonly": "dedup with hardlink only, never symlink;\nmake a full copy if hardlink is impossible",
|
"hardlinkonly": "dedup with hardlink only, never symlink;\nmake a full copy if hardlink is impossible",
|
||||||
|
"reflink": "enable reflink-based file deduplication,\nwith fallback on full copy when that is impossible",
|
||||||
"safededup": "verify on-disk data before using it for dedup",
|
"safededup": "verify on-disk data before using it for dedup",
|
||||||
"noclone": "take dupe data from clients, even if available on HDD",
|
"noclone": "take dupe data from clients, even if available on HDD",
|
||||||
"nodupe": "rejects existing files (instead of linking/cloning them)",
|
"nodupe": "rejects existing files (instead of linking/cloning them)",
|
||||||
|
|
|
@ -3476,6 +3476,8 @@ class Up2k(object):
|
||||||
|
|
||||||
linked = False
|
linked = False
|
||||||
try:
|
try:
|
||||||
|
if "reflink" in flags:
|
||||||
|
raise Exception("reflink")
|
||||||
if not is_mv and not flags.get("dedup"):
|
if not is_mv and not flags.get("dedup"):
|
||||||
raise Exception("dedup is disabled in config")
|
raise Exception("dedup is disabled in config")
|
||||||
|
|
||||||
|
@ -3532,7 +3534,8 @@ class Up2k(object):
|
||||||
|
|
||||||
linked = True
|
linked = True
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
self.log("cannot link; creating copy: " + repr(ex))
|
if str(ex) != "reflink":
|
||||||
|
self.log("cannot link; creating copy: " + repr(ex))
|
||||||
if bos.path.isfile(src):
|
if bos.path.isfile(src):
|
||||||
csrc = src
|
csrc = src
|
||||||
elif fsrc and bos.path.isfile(fsrc):
|
elif fsrc and bos.path.isfile(fsrc):
|
||||||
|
|
|
@ -143,7 +143,7 @@ class Cfg(Namespace):
|
||||||
def __init__(self, a=None, v=None, c=None, **ka0):
|
def __init__(self, a=None, v=None, c=None, **ka0):
|
||||||
ka = {}
|
ka = {}
|
||||||
|
|
||||||
ex = "chpw cookie_lax daw dav_auth dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink hardlink_only ih ihead magic nid nih no_acode no_athumb no_bauth no_clone no_cp no_dav no_db_ip no_del no_dirsz no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tail no_tarcmp no_thumb no_vthumb no_zip nrand nsort nw og og_no_head og_s_title ohead q rand re_dirsz rmagic rss smb srch_dbg srch_excl stats uqe vague_403 vc ver wo_up_readme write_uplog xdev xlink xvol zipmaxu zs"
|
ex = "chpw cookie_lax daw dav_auth dav_mac dav_rt e2d e2ds e2dsa e2t e2ts e2tsr e2v e2vu e2vp early_ban ed emp exp force_js getmod grid gsel hardlink hardlink_only ih ihead magic nid nih no_acode no_athumb no_bauth no_clone no_cp no_dav no_db_ip no_del no_dirsz no_dupe no_lifetime no_logues no_mv no_pipe no_poll no_readme no_robots no_sb_md no_sb_lg no_scandir no_tail no_tarcmp no_thumb no_vthumb no_zip nrand nsort nw og og_no_head og_s_title ohead q rand re_dirsz reflink rmagic rss smb srch_dbg srch_excl stats uqe vague_403 vc ver wo_up_readme write_uplog xdev xlink xvol zipmaxu zs"
|
||||||
ka.update(**{k: False for k in ex.split()})
|
ka.update(**{k: False for k in ex.split()})
|
||||||
|
|
||||||
ex = "dav_inf dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash see_dots plain_ip"
|
ex = "dav_inf dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash see_dots plain_ip"
|
||||||
|
|
Loading…
Reference in a new issue