recover from file access errors when zipping

This commit is contained in:
ed 2021-03-27 17:16:59 +01:00
parent 5ccca3fbd5
commit b38533b0cc
3 changed files with 105 additions and 41 deletions

View file

@ -1,6 +1,8 @@
import os
import tarfile import tarfile
import threading import threading
from .sutil import errdesc
from .util import Queue, fsenc from .util import Queue, fsenc
@ -22,6 +24,7 @@ class StreamTar(object):
self.co = 0 self.co = 0
self.qfile = QFile() self.qfile = QFile()
self.fgen = fgen self.fgen = fgen
self.errf = None
# python 3.8 changed to PAX_FORMAT as default, # python 3.8 changed to PAX_FORMAT as default,
# waste of space and don't care about the new features # waste of space and don't care about the new features
@ -42,9 +45,10 @@ class StreamTar(object):
yield buf yield buf
yield None yield None
if self.errf:
os.unlink(self.errf["ap"])
def _gen(self): def ser(self, f):
for f in self.fgen:
name = f["vp"] name = f["vp"]
src = f["ap"] src = f["ap"]
fsi = f["st"] fsi = f["st"]
@ -60,5 +64,21 @@ class StreamTar(object):
with open(fsenc(src), "rb", 512 * 1024) as f: with open(fsenc(src), "rb", 512 * 1024) as f:
self.tar.addfile(inf, f) self.tar.addfile(inf, f)
def _gen(self):
errors = []
for f in self.fgen:
if "err" in f:
errors.append([f["vp"], f["err"]])
continue
try:
self.ser(f)
except Exception as ex:
errors.append([f["vp"], repr(ex)])
if errors:
self.errf = errdesc(errors)
self.ser(self.errf)
self.tar.close() self.tar.close()
self.qfile.q.put(None) self.qfile.q.put(None)

21
copyparty/sutil.py Normal file
View file

@ -0,0 +1,21 @@
import os
import time
import tempfile
def errdesc(errors):
report = ["copyparty failed to add the following files to the archive:", ""]
for fn, err in errors:
report.extend([" file: {}".format(fn), "error: {}".format(err), ""])
with tempfile.NamedTemporaryFile(prefix="copyparty-", delete=False) as tf:
tf_path = tf.name
tf.write("\r\n".join(report).encode("utf-8", "replace"))
os.chmod(tf_path, 0o444)
return {
"vp": "archive-errors-{}.txt".format(int(time.time())),
"ap": tf_path,
"st": os.stat(tf_path),
}

View file

@ -1,8 +1,10 @@
import os
import time import time
import zlib import zlib
import struct import struct
from datetime import datetime from datetime import datetime
from .sutil import errdesc
from .util import yieldfile, sanitize_fn from .util import yieldfile, sanitize_fn
@ -187,8 +189,7 @@ class StreamZip(object):
self.pos += len(buf) self.pos += len(buf)
return buf return buf
def gen(self): def ser(self, f):
for f in self.fgen:
name = f["vp"] name = f["vp"]
src = f["ap"] src = f["ap"]
st = f["st"] st = f["st"]
@ -225,6 +226,25 @@ class StreamZip(object):
buf = gen_fdesc(sz, crc, z64) buf = gen_fdesc(sz, crc, z64)
yield self._ct(buf) yield self._ct(buf)
def gen(self):
errors = []
for f in self.fgen:
if "err" in f:
errors.append([f["vp"], f["err"]])
continue
try:
for x in self.ser(f):
yield x
except Exception as ex:
errors.append([f["vp"], repr(ex)])
if errors:
errf = errdesc(errors)
print(repr(errf))
for x in self.ser(errf):
yield x
cdir_pos = self.pos cdir_pos = self.pos
for name, sz, ts, crc, h_pos in self.items: for name, sz, ts, crc, h_pos in self.items:
buf = gen_hdr(h_pos, name, sz, ts, self.utf8, crc, self.pre_crc) buf = gen_hdr(h_pos, name, sz, ts, self.utf8, crc, self.pre_crc)
@ -242,3 +262,6 @@ class StreamZip(object):
ecdr, _ = gen_ecdr(self.items, cdir_pos, cdir_end) ecdr, _ = gen_ecdr(self.items, cdir_pos, cdir_end)
yield self._ct(ecdr) yield self._ct(ecdr)
if errors:
os.unlink(errf["ap"])