support repetitive files

This commit is contained in:
ed 2019-07-02 22:58:31 +00:00
parent 1b43ed9432
commit 3794aa7ac7
2 changed files with 49 additions and 20 deletions

View file

@ -282,7 +282,7 @@ class HttpCli(object):
x = self.conn.hsrv.broker.put(True, "up2k.handle_chunk", wark, chash) x = self.conn.hsrv.broker.put(True, "up2k.handle_chunk", wark, chash)
response = x.get() response = x.get()
chunksize, ofs, path = response chunksize, cstart, path = response
if self.args.nw: if self.args.nw:
path = os.devnull path = os.devnull
@ -290,21 +290,40 @@ class HttpCli(object):
if remains > chunksize: if remains > chunksize:
raise Pebkac(400, "your chunk is too big to fit") raise Pebkac(400, "your chunk is too big to fit")
self.log("writing {} #{} @{} len {}".format(path, chash, ofs, remains)) self.log("writing {} #{} @{} len {}".format(path, chash, cstart, remains))
reader = read_socket(self.sr, remains) reader = read_socket(self.sr, remains)
with open(path, "rb+") as f: with open(path, "rb+", 512 * 1024) as f:
f.seek(ofs) f.seek(cstart[0])
post_sz, _, sha_b64 = hashcopy(self.conn, reader, f) post_sz, _, sha_b64 = hashcopy(self.conn, reader, f)
if sha_b64 != chash: if sha_b64 != chash:
raise Pebkac( raise Pebkac(
400, 400,
"your chunk got corrupted somehow:\n{} expected,\n{} received ({} bytes)".format( "your chunk got corrupted somehow:\n{} expected,\n{} received ({} bytes)".format(
chash, sha_b64, post_sz chash, sha_b64, post_sz
), ),
) )
if len(cstart) > 1:
self.log(
"clone {} to {}".format(
cstart[0], " & ".join(str(x) for x in cstart[1:])
)
)
ofs = 0
while ofs < chunksize:
bufsz = min(chunksize - ofs, 4 * 1024 * 1024)
f.seek(cstart[0] + ofs)
buf = f.read(bufsz)
for wofs in cstart[1:]:
f.seek(wofs + ofs)
f.write(buf)
ofs += len(buf)
self.log("clone {} done".format(cstart[0]))
x = self.conn.hsrv.broker.put(True, "up2k.confirm_chunk", wark, chash) x = self.conn.hsrv.broker.put(True, "up2k.confirm_chunk", wark, chash)
response = x.get() response = x.get()
@ -652,3 +671,4 @@ class HttpCli(object):
) )
self.reply(html.encode("utf-8", "replace")) self.reply(html.encode("utf-8", "replace"))
return True return True

View file

@ -56,15 +56,24 @@ class Up2k(object):
"name": cj["name"], "name": cj["name"],
"size": cj["size"], "size": cj["size"],
"hash": deepcopy(cj["hash"]), "hash": deepcopy(cj["hash"]),
# upload state
"pend": deepcopy(cj["hash"]),
} }
# one chunk may occur multiple times in a file;
# filter to unique values for the list of missing chunks
# (preserve order to reduce disk thrashing)
job["need"] = []
lut = {}
for k in cj["hash"]:
if k not in lut:
job["need"].append(k)
lut[k] = 1
self._new_upload(job) self._new_upload(job)
return { return {
"name": job["name"], "name": job["name"],
"size": job["size"], "size": job["size"],
"hash": job["pend"], "hash": job["need"],
"wark": wark, "wark": wark,
} }
@ -74,16 +83,15 @@ class Up2k(object):
if not job: if not job:
raise Pebkac(404, "unknown wark") raise Pebkac(404, "unknown wark")
if chash not in job["pend"]: if chash not in job["need"]:
raise Pebkac(200, "already got that but thanks??") raise Pebkac(200, "already got that but thanks??")
try: nchunk = [n for n, v in enumerate(job["hash"]) if v == chash]
nchunk = job["hash"].index(chash) if not nchunk:
except ValueError:
raise Pebkac(404, "unknown chunk") raise Pebkac(404, "unknown chunk")
chunksize = self._get_chunksize(job["size"]) chunksize = self._get_chunksize(job["size"])
ofs = nchunk * chunksize ofs = [chunksize * x for x in nchunk]
path = os.path.join(job["vdir"], job["name"]) path = os.path.join(job["vdir"], job["name"])
@ -91,7 +99,7 @@ class Up2k(object):
def confirm_chunk(self, wark, chash): def confirm_chunk(self, wark, chash):
with self.mutex: with self.mutex:
self.registry[wark]["pend"].remove(chash) self.registry[wark]["need"].remove(chash)
def _get_chunksize(self, filesize): def _get_chunksize(self, filesize):
chunksize = 1024 * 1024 chunksize = 1024 * 1024
@ -131,3 +139,4 @@ class Up2k(object):
with open(path, "wb") as f: with open(path, "wb") as f:
f.seek(job["size"] - 1) f.seek(job["size"] - 1)
f.write(b"e") f.write(b"e")