recover from file access errors when zipping

This commit is contained in:
ed 2021-03-27 17:16:59 +01:00
parent 5ccca3fbd5
commit b38533b0cc
3 changed files with 105 additions and 41 deletions

View file

@ -1,6 +1,8 @@
import os
import tarfile
import threading
from .sutil import errdesc
from .util import Queue, fsenc
@ -22,6 +24,7 @@ class StreamTar(object):
self.co = 0
self.qfile = QFile()
self.fgen = fgen
self.errf = None
# python 3.8 changed to PAX_FORMAT as default,
# waste of space and don't care about the new features
@ -42,23 +45,40 @@ class StreamTar(object):
yield buf
yield None
if self.errf:
os.unlink(self.errf["ap"])
def ser(self, f):
name = f["vp"]
src = f["ap"]
fsi = f["st"]
inf = tarfile.TarInfo(name=name)
inf.mode = fsi.st_mode
inf.size = fsi.st_size
inf.mtime = fsi.st_mtime
inf.uid = 0
inf.gid = 0
self.ci += inf.size
with open(fsenc(src), "rb", 512 * 1024) as f:
self.tar.addfile(inf, f)
def _gen(self):
errors = []
for f in self.fgen:
name = f["vp"]
src = f["ap"]
fsi = f["st"]
if "err" in f:
errors.append([f["vp"], f["err"]])
continue
inf = tarfile.TarInfo(name=name)
inf.mode = fsi.st_mode
inf.size = fsi.st_size
inf.mtime = fsi.st_mtime
inf.uid = 0
inf.gid = 0
try:
self.ser(f)
except Exception as ex:
errors.append([f["vp"], repr(ex)])
self.ci += inf.size
with open(fsenc(src), "rb", 512 * 1024) as f:
self.tar.addfile(inf, f)
if errors:
self.errf = errdesc(errors)
self.ser(self.errf)
self.tar.close()
self.qfile.q.put(None)

21
copyparty/sutil.py Normal file
View file

@ -0,0 +1,21 @@
import os
import time
import tempfile
def errdesc(errors):
report = ["copyparty failed to add the following files to the archive:", ""]
for fn, err in errors:
report.extend([" file: {}".format(fn), "error: {}".format(err), ""])
with tempfile.NamedTemporaryFile(prefix="copyparty-", delete=False) as tf:
tf_path = tf.name
tf.write("\r\n".join(report).encode("utf-8", "replace"))
os.chmod(tf_path, 0o444)
return {
"vp": "archive-errors-{}.txt".format(int(time.time())),
"ap": tf_path,
"st": os.stat(tf_path),
}

View file

@ -1,8 +1,10 @@
import os
import time
import zlib
import struct
from datetime import datetime
from .sutil import errdesc
from .util import yieldfile, sanitize_fn
@ -187,43 +189,61 @@ class StreamZip(object):
self.pos += len(buf)
return buf
def gen(self):
for f in self.fgen:
name = f["vp"]
src = f["ap"]
st = f["st"]
def ser(self, f):
name = f["vp"]
src = f["ap"]
st = f["st"]
sz = st.st_size
ts = st.st_mtime + 1
sz = st.st_size
ts = st.st_mtime + 1
crc = None
if self.pre_crc:
crc = 0
for buf in yieldfile(src):
crc = zlib.crc32(buf, crc)
crc &= 0xFFFFFFFF
h_pos = self.pos
buf = gen_hdr(None, name, sz, ts, self.utf8, crc, self.pre_crc)
yield self._ct(buf)
crc = crc or 0
crc = None
if self.pre_crc:
crc = 0
for buf in yieldfile(src):
if not self.pre_crc:
crc = zlib.crc32(buf, crc)
yield self._ct(buf)
crc = zlib.crc32(buf, crc)
crc &= 0xFFFFFFFF
self.items.append([name, sz, ts, crc, h_pos])
h_pos = self.pos
buf = gen_hdr(None, name, sz, ts, self.utf8, crc, self.pre_crc)
yield self._ct(buf)
z64 = sz >= 4 * 1024 * 1024 * 1024
crc = crc or 0
for buf in yieldfile(src):
if not self.pre_crc:
crc = zlib.crc32(buf, crc)
if z64 or not self.pre_crc:
buf = gen_fdesc(sz, crc, z64)
yield self._ct(buf)
yield self._ct(buf)
crc &= 0xFFFFFFFF
self.items.append([name, sz, ts, crc, h_pos])
z64 = sz >= 4 * 1024 * 1024 * 1024
if z64 or not self.pre_crc:
buf = gen_fdesc(sz, crc, z64)
yield self._ct(buf)
def gen(self):
errors = []
for f in self.fgen:
if "err" in f:
errors.append([f["vp"], f["err"]])
continue
try:
for x in self.ser(f):
yield x
except Exception as ex:
errors.append([f["vp"], repr(ex)])
if errors:
errf = errdesc(errors)
print(repr(errf))
for x in self.ser(errf):
yield x
cdir_pos = self.pos
for name, sz, ts, crc, h_pos in self.items:
@ -242,3 +262,6 @@ class StreamZip(object):
ecdr, _ = gen_ecdr(self.items, cdir_pos, cdir_end)
yield self._ct(ecdr)
if errors:
os.unlink(errf["ap"])