recover from file access errors when zipping

This commit is contained in:
ed 2021-03-27 17:16:59 +01:00
parent 5ccca3fbd5
commit b38533b0cc
3 changed files with 105 additions and 41 deletions

View file

@ -1,6 +1,8 @@
import os
import tarfile import tarfile
import threading import threading
from .sutil import errdesc
from .util import Queue, fsenc from .util import Queue, fsenc
@ -22,6 +24,7 @@ class StreamTar(object):
self.co = 0 self.co = 0
self.qfile = QFile() self.qfile = QFile()
self.fgen = fgen self.fgen = fgen
self.errf = None
# python 3.8 changed to PAX_FORMAT as default, # python 3.8 changed to PAX_FORMAT as default,
# waste of space and don't care about the new features # waste of space and don't care about the new features
@ -42,23 +45,40 @@ class StreamTar(object):
yield buf yield buf
yield None yield None
if self.errf:
os.unlink(self.errf["ap"])
def ser(self, f):
name = f["vp"]
src = f["ap"]
fsi = f["st"]
inf = tarfile.TarInfo(name=name)
inf.mode = fsi.st_mode
inf.size = fsi.st_size
inf.mtime = fsi.st_mtime
inf.uid = 0
inf.gid = 0
self.ci += inf.size
with open(fsenc(src), "rb", 512 * 1024) as f:
self.tar.addfile(inf, f)
def _gen(self): def _gen(self):
errors = []
for f in self.fgen: for f in self.fgen:
name = f["vp"] if "err" in f:
src = f["ap"] errors.append([f["vp"], f["err"]])
fsi = f["st"] continue
inf = tarfile.TarInfo(name=name) try:
inf.mode = fsi.st_mode self.ser(f)
inf.size = fsi.st_size except Exception as ex:
inf.mtime = fsi.st_mtime errors.append([f["vp"], repr(ex)])
inf.uid = 0
inf.gid = 0
self.ci += inf.size if errors:
with open(fsenc(src), "rb", 512 * 1024) as f: self.errf = errdesc(errors)
self.tar.addfile(inf, f) self.ser(self.errf)
self.tar.close() self.tar.close()
self.qfile.q.put(None) self.qfile.q.put(None)

21
copyparty/sutil.py Normal file
View file

@ -0,0 +1,21 @@
import os
import time
import tempfile
def errdesc(errors):
report = ["copyparty failed to add the following files to the archive:", ""]
for fn, err in errors:
report.extend([" file: {}".format(fn), "error: {}".format(err), ""])
with tempfile.NamedTemporaryFile(prefix="copyparty-", delete=False) as tf:
tf_path = tf.name
tf.write("\r\n".join(report).encode("utf-8", "replace"))
os.chmod(tf_path, 0o444)
return {
"vp": "archive-errors-{}.txt".format(int(time.time())),
"ap": tf_path,
"st": os.stat(tf_path),
}

View file

@ -1,8 +1,10 @@
import os
import time import time
import zlib import zlib
import struct import struct
from datetime import datetime from datetime import datetime
from .sutil import errdesc
from .util import yieldfile, sanitize_fn from .util import yieldfile, sanitize_fn
@ -187,43 +189,61 @@ class StreamZip(object):
self.pos += len(buf) self.pos += len(buf)
return buf return buf
def gen(self): def ser(self, f):
for f in self.fgen: name = f["vp"]
name = f["vp"] src = f["ap"]
src = f["ap"] st = f["st"]
st = f["st"]
sz = st.st_size sz = st.st_size
ts = st.st_mtime + 1 ts = st.st_mtime + 1
crc = None crc = None
if self.pre_crc: if self.pre_crc:
crc = 0 crc = 0
for buf in yieldfile(src):
crc = zlib.crc32(buf, crc)
crc &= 0xFFFFFFFF
h_pos = self.pos
buf = gen_hdr(None, name, sz, ts, self.utf8, crc, self.pre_crc)
yield self._ct(buf)
crc = crc or 0
for buf in yieldfile(src): for buf in yieldfile(src):
if not self.pre_crc: crc = zlib.crc32(buf, crc)
crc = zlib.crc32(buf, crc)
yield self._ct(buf)
crc &= 0xFFFFFFFF crc &= 0xFFFFFFFF
self.items.append([name, sz, ts, crc, h_pos]) h_pos = self.pos
buf = gen_hdr(None, name, sz, ts, self.utf8, crc, self.pre_crc)
yield self._ct(buf)
z64 = sz >= 4 * 1024 * 1024 * 1024 crc = crc or 0
for buf in yieldfile(src):
if not self.pre_crc:
crc = zlib.crc32(buf, crc)
if z64 or not self.pre_crc: yield self._ct(buf)
buf = gen_fdesc(sz, crc, z64)
yield self._ct(buf) crc &= 0xFFFFFFFF
self.items.append([name, sz, ts, crc, h_pos])
z64 = sz >= 4 * 1024 * 1024 * 1024
if z64 or not self.pre_crc:
buf = gen_fdesc(sz, crc, z64)
yield self._ct(buf)
def gen(self):
errors = []
for f in self.fgen:
if "err" in f:
errors.append([f["vp"], f["err"]])
continue
try:
for x in self.ser(f):
yield x
except Exception as ex:
errors.append([f["vp"], repr(ex)])
if errors:
errf = errdesc(errors)
print(repr(errf))
for x in self.ser(errf):
yield x
cdir_pos = self.pos cdir_pos = self.pos
for name, sz, ts, crc, h_pos in self.items: for name, sz, ts, crc, h_pos in self.items:
@ -242,3 +262,6 @@ class StreamZip(object):
ecdr, _ = gen_ecdr(self.items, cdir_pos, cdir_end) ecdr, _ = gen_ecdr(self.items, cdir_pos, cdir_end)
yield self._ct(ecdr) yield self._ct(ecdr)
if errors:
os.unlink(errf["ap"])