From e7fd871ffe14416d36734b568d3f82266eab6b6c Mon Sep 17 00:00:00 2001 From: ed Date: Mon, 27 Sep 2021 23:28:34 +0200 Subject: [PATCH] add up2k.py --- README.md | 4 + bin/up2k.py | 340 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 344 insertions(+) create mode 100755 bin/up2k.py diff --git a/README.md b/README.md index c43bbedc..e6789ee1 100644 --- a/README.md +++ b/README.md @@ -769,6 +769,10 @@ interact with copyparty using non-browser clients * `chunk(){ curl -b cppwd=wark -T- http://127.0.0.1:3923/;}` `chunk , MIT-Licensed +https://github.com/9001/copyparty/blob/hovudstraum/bin/up2k.py + +- dependencies: requests +- supports python 2.7 and 3.3 through 3.10 + +- no parallel hashing / uploads yet, so browsers are faster +- almost zero error-handling +- but if something breaks just try again and it'll autoresume +""" + +import os +import sys +import stat +import math +import time +import base64 +import hashlib +import argparse +import platform +import threading +import requests + + +# from copyparty/__init__.py +PY2 = sys.version_info[0] == 2 +if PY2: + from Queue import Queue + + sys.dont_write_bytecode = True + bytes = str +else: + from queue import Queue + + unicode = str + +WINDOWS = False +if platform.system() == "Windows": + WINDOWS = [int(x) for x in platform.version().split(".")] + +VT100 = not WINDOWS or WINDOWS >= [10, 0, 14393] +# introduced in anniversary update + + +class File(object): + def __init__(self, top, rel, size, lmod): + self.top = top + self.rel = rel.replace(b"\\", b"/") + self.size = size + self.lmod = lmod + + self.abs = os.path.join(top, rel) + self.name = self.rel.split(b"/")[-1].decode("utf-8", "replace") + + # set by get_hashlist + self.cids = [] # [ hash, ofs, sz ] + self.kchunks = {} # hash: [ ofs, sz ] + + # set by handshake + self.ucids = [] # chunks which need to be uploaded + self.wark = None + self.url = None + + # set by upload + self.uploading = [] # chunks currently being uploaded + + # m = "size({}) lmod({}) top({}) rel({}) abs({}) name({})" + # print(m.format(self.size, self.lmod, self.top, self.rel, self.abs, self.name)) + + +class FileSlice(object): + def __init__(self, file, cid): + self.car, self.len = file.kchunks[cid] + self.cdr = self.car + self.len + self.ofs = 0 + self.f = open(file.abs, "rb", 512 * 1024) + self.f.seek(self.car) + + # https://stackoverflow.com/questions/4359495/what-is-exactly-a-file-like-object-in-python + # IOBase, RawIOBase, BufferedIOBase + funs = "close closed __enter__ __exit__ __iter__ isatty __next__ readable seekable writable" + try: + for fun in funs.split(): + setattr(self, fun, getattr(self.f, fun)) + except: + pass # py27 probably + + def tell(self): + return self.ofs + + def seek(self, ofs, wh=0): + if wh == 1: + ofs = self.ofs + ofs + elif wh == 2: + ofs = self.len + ofs # provided ofs is negative + + if ofs < 0: + ofs = 0 + elif ofs >= self.len: + ofs = self.len - 1 + + self.ofs = ofs + self.f.seek(self.car + ofs) + + def read(self, sz): + sz = min(sz, self.len - self.ofs) + ret = self.f.read(sz) + self.ofs += len(ret) + return ret + + +def statdir(top): + """non-recursive listing of directory contents, along with stat() info""" + if hasattr(os, "scandir"): + with os.scandir(top) as dh: + for fh in dh: + yield [os.path.join(top, fh.name), fh.stat()] + else: + for name in os.listdir(top): + abspath = os.path.join(top, name) + yield [abspath, os.stat(abspath)] + + +def walkdir(top): + """recursive statdir""" + for ap, inf in statdir(top): + if stat.S_ISDIR(inf.st_mode): + for x in walkdir(ap): + yield x + else: + yield ap, inf + + +def walkdirs(tops): + """recursive statdir for a list of tops, yields [top, relpath, stat]""" + for top in tops: + if os.path.isdir(top): + for ap, inf in walkdir(top): + yield top, ap[len(top) + 1 :], inf + else: + sep = "{}".format(os.sep).encode("ascii") + d, n = top.rsplit(sep, 1) + yield d, n, os.stat(top) + + +# from copyparty/util.py +def humansize(sz, terse=False): + for unit in ["B", "KiB", "MiB", "GiB", "TiB"]: + if sz < 1024: + break + + sz /= 1024.0 + + ret = " ".join([str(sz)[:4].rstrip("."), unit]) + + if not terse: + return ret + + return ret.replace("iB", "").replace(" ", "") + + +# from copyparty/up2k.py +def up2k_chunksize(filesize): + chunksize = 1024 * 1024 + stepsize = 512 * 1024 + while True: + for mul in [1, 2]: + nchunks = math.ceil(filesize * 1.0 / chunksize) + if nchunks <= 256 or chunksize >= 32 * 1024 * 1024: + return chunksize + + chunksize += stepsize + stepsize *= mul + + +# mostly from copyparty/up2k.py +def get_hashlist(file, pcb): + # type: (File, any) -> None + chunk_sz = up2k_chunksize(file.size) + file_rem = file.size + file_ofs = 0 + ret = [] + with open(file.abs, "rb", 512 * 1024) as f: + while file_rem > 0: + hashobj = hashlib.sha512() + chunk_sz = chunk_rem = min(chunk_sz, file_rem) + while chunk_rem > 0: + buf = f.read(min(chunk_rem, 64 * 1024)) + if not buf: + raise Exception("EOF at " + str(f.tell())) + + hashobj.update(buf) + chunk_rem -= len(buf) + + digest = hashobj.digest()[:33] + digest = base64.urlsafe_b64encode(digest).decode("utf-8") + + ret.append([digest, file_ofs, chunk_sz]) + file_ofs += chunk_sz + file_rem -= chunk_sz + + file.cids = ret + file.kchunks = {k: [v1, v2] for k, v1, v2 in ret} + + +def handshake(url, file, pw, cert): + # type: (str, File, any, any) -> List[str] + req = { + "hash": [x[0] for x in file.cids], + "name": file.name, + "lmod": file.lmod, + "size": file.size, + } + headers = {"Content-Type": "text/plain"} # wtf ed + if pw: + headers["Cookie"] = "=".join(["cppwd", pw]) + + if file.url: + url = file.url + elif b"/" in file.rel: + url += file.rel.rsplit(b"/", 1)[0].decode("utf-8", "replace") + + r = requests.post(url, headers=headers, json=req, verify=cert) + try: + r = r.json() + except: + raise Exception(r.text) + + try: + pre, url = url.split("://") + pre += "://" + except: + pre = "" + + file.url = pre + url.split("/")[0] + r["purl"] + file.name = r["name"] + file.wark = r["wark"] + + return r["hash"] + + +def upload(file, cid, pw, cert): + # type: (File, str, any, any) -> None + headers = { + "X-Up2k-Hash": cid, + "X-Up2k-Wark": file.wark, + "Content-Type": "application/octet-stream", + } + if pw: + headers["Cookie"] = "=".join(["cppwd", pw]) + + f = FileSlice(file, cid) + try: + r = requests.post(file.url, headers=headers, data=f, verify=cert) + if not r: + raise Exception(repr(r)) + finally: + f.f.close() + + +class Daemon(threading.Thread): + def __init__(self): + threading.Thread.__init__(self) + self.daemon = True + + +class Ctl(object): + def __init__(self, ar): + self.ar = ar + ar.url = ar.url.rstrip("/") + "/" + ar.files = [ + os.path.abspath(os.path.realpath(x.encode("utf-8"))) for x in ar.files + ] + + print("\nscanning {} locations".format(len(ar.files))) + + nfiles = 0 + nbytes = 0 + for _, _, inf in walkdirs(ar.files): + nfiles += 1 + nbytes += inf.st_size + + print("found {} files, {}\n".format(nfiles, humansize(nbytes))) + + cert = None + if ar.td: + cert = False + if ar.te: + cert = ar.te + + self.filegen = walkdirs(ar.files) + for nf, (top, rel, inf) in enumerate(self.filegen): + file = File(top, rel, inf.st_size, inf.st_mtime) + upath = file.abs.decode("utf-8", "replace") + + print("{} {}\n hash...".format(nfiles - nf, upath)) + get_hashlist(file, None) + + while True: + print(" hs...") + up = handshake(ar.url, file, ar.a, cert) + file.ucids = up + if not up: + break + + print("{} {}".format(nfiles - nf, upath)) + ncs = len(up) + for nc, cid in enumerate(up): + print(" {} up {}".format(ncs - nc, cid)) + upload(file, cid, ar.a, cert) + + print(" ok!") + + +def main(): + time.strptime("19970815", "%Y%m%d") # python#7980 + if WINDOWS: + os.system("rem") # enables colors + + # fmt: off + ap = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + ap.add_argument("url", type=unicode, help="server url, including destination folder") + ap.add_argument("files", type=unicode, nargs="+", help="files and/or folders to process") + ap.add_argument("-a", metavar="PASSWORD", help="password") + ap.add_argument("-te", metavar="PEM_FILE", help="certificate to expect/verify") + ap.add_argument("-td", action="store_true", help="disable certificate check") + # ap.add_argument("-j", type=int, default=2, help="parallel connections") + # ap.add_argument("-nh", action="store_true", help="disable hashing while uploading") + # fmt: on + + Ctl(ap.parse_args()) + + +if __name__ == "__main__": + main()