#!/usr/bin/env python3 from __future__ import print_function, unicode_literals """ up2k.py: upload to copyparty 2021-09-30, v0.4, ed , MIT-Licensed https://github.com/9001/copyparty/blob/hovudstraum/bin/up2k.py - dependencies: requests - supports python 2.7 and 3.3 through 3.10 - almost zero error-handling - but if something breaks just try again and it'll autoresume """ import os import sys import stat import math import time import base64 import hashlib import argparse import platform import threading import requests # from copyparty/__init__.py PY2 = sys.version_info[0] == 2 if PY2: from Queue import Queue sys.dont_write_bytecode = True bytes = str else: from queue import Queue unicode = str WINDOWS = False if platform.system() == "Windows": WINDOWS = [int(x) for x in platform.version().split(".")] VT100 = not WINDOWS or WINDOWS >= [10, 0, 14393] # introduced in anniversary update req_ses = requests.Session() class File(object): """an up2k upload task; represents a single file""" def __init__(self, top, rel, size, lmod): self.top = top # type: bytes self.rel = rel.replace(b"\\", b"/") # type: bytes self.size = size # type: int self.lmod = lmod # type: float self.abs = os.path.join(top, rel) # type: bytes self.name = self.rel.split(b"/")[-1].decode("utf-8", "replace") # type: str # set by get_hashlist self.cids = [] # type: list[tuple[str, int, int]] # [ hash, ofs, sz ] self.kchunks = {} # type: dict[str, tuple[int, int]] # hash: [ ofs, sz ] # set by handshake self.ucids = [] # type: list[str] # chunks which need to be uploaded self.wark = None # type: str self.url = None # type: str # set by upload self.up_b = 0 # type: int self.up_c = 0 # type: int # m = "size({}) lmod({}) top({}) rel({}) abs({}) name({})" # eprint(m.format(self.size, self.lmod, self.top, self.rel, self.abs, self.name)) class FileSlice(object): """file-like object providing a fixed window into a file""" def __init__(self, file, cid): # type: (File, str) -> FileSlice self.car, self.len = file.kchunks[cid] self.cdr = self.car + self.len self.ofs = 0 # type: int self.f = open(file.abs, "rb", 512 * 1024) self.f.seek(self.car) # https://stackoverflow.com/questions/4359495/what-is-exactly-a-file-like-object-in-python # IOBase, RawIOBase, BufferedIOBase funs = "close closed __enter__ __exit__ __iter__ isatty __next__ readable seekable writable" try: for fun in funs.split(): setattr(self, fun, getattr(self.f, fun)) except: pass # py27 probably def tell(self): return self.ofs def seek(self, ofs, wh=0): if wh == 1: ofs = self.ofs + ofs elif wh == 2: ofs = self.len + ofs # provided ofs is negative if ofs < 0: ofs = 0 elif ofs >= self.len: ofs = self.len - 1 self.ofs = ofs self.f.seek(self.car + ofs) def read(self, sz): sz = min(sz, self.len - self.ofs) ret = self.f.read(sz) self.ofs += len(ret) return ret def eprint(*a, **ka): ka["file"] = sys.stderr if not PY2: ka["flush"] = True print(*a, **ka) if PY2: sys.stderr.flush() def statdir(top): """non-recursive listing of directory contents, along with stat() info""" if hasattr(os, "scandir"): with os.scandir(top) as dh: for fh in dh: yield [os.path.join(top, fh.name), fh.stat()] else: for name in os.listdir(top): abspath = os.path.join(top, name) yield [abspath, os.stat(abspath)] def walkdir(top): """recursive statdir""" for ap, inf in statdir(top): if stat.S_ISDIR(inf.st_mode): for x in walkdir(ap): yield x else: yield ap, inf def walkdirs(tops): """recursive statdir for a list of tops, yields [top, relpath, stat]""" for top in tops: if os.path.isdir(top): for ap, inf in walkdir(top): yield top, ap[len(top) + 1 :], inf else: sep = "{}".format(os.sep).encode("ascii") d, n = top.rsplit(sep, 1) yield d, n, os.stat(top) # from copyparty/util.py def humansize(sz, terse=False): """picks a sensible unit for the given extent""" for unit in ["B", "KiB", "MiB", "GiB", "TiB"]: if sz < 1024: break sz /= 1024.0 ret = " ".join([str(sz)[:4].rstrip("."), unit]) if not terse: return ret return ret.replace("iB", "").replace(" ", "") # from copyparty/up2k.py def up2k_chunksize(filesize): """gives The correct chunksize for up2k hashing""" chunksize = 1024 * 1024 stepsize = 512 * 1024 while True: for mul in [1, 2]: nchunks = math.ceil(filesize * 1.0 / chunksize) if nchunks <= 256 or chunksize >= 32 * 1024 * 1024: return chunksize chunksize += stepsize stepsize *= mul # mostly from copyparty/up2k.py def get_hashlist(file, pcb): # type: (File, any) -> None """generates the up2k hashlist from file contents, inserts it into `file`""" chunk_sz = up2k_chunksize(file.size) file_rem = file.size file_ofs = 0 ret = [] with open(file.abs, "rb", 512 * 1024) as f: while file_rem > 0: hashobj = hashlib.sha512() chunk_sz = chunk_rem = min(chunk_sz, file_rem) while chunk_rem > 0: buf = f.read(min(chunk_rem, 64 * 1024)) if not buf: raise Exception("EOF at " + str(f.tell())) hashobj.update(buf) chunk_rem -= len(buf) digest = hashobj.digest()[:33] digest = base64.urlsafe_b64encode(digest).decode("utf-8") ret.append([digest, file_ofs, chunk_sz]) file_ofs += chunk_sz file_rem -= chunk_sz if pcb: pcb(file, file_ofs) file.cids = ret file.kchunks = {k: [v1, v2] for k, v1, v2 in ret} def handshake(req_ses, url, file, pw, search): # type: (requests.Session, str, File, any, bool) -> List[str] """ performs a handshake with the server; reply is: if search, a list of search results otherwise, a list of chunks to upload """ req = { "hash": [x[0] for x in file.cids], "name": file.name, "lmod": file.lmod, "size": file.size, } if search: req["srch"] = 1 headers = {"Content-Type": "text/plain"} # wtf ed if pw: headers["Cookie"] = "=".join(["cppwd", pw]) if file.url: url = file.url elif b"/" in file.rel: url += file.rel.rsplit(b"/", 1)[0].decode("utf-8", "replace") r = req_ses.post(url, headers=headers, json=req) try: r = r.json() except: raise Exception(r.text) if search: return r["hits"] try: pre, url = url.split("://") pre += "://" except: pre = "" file.url = pre + url.split("/")[0] + r["purl"] file.name = r["name"] file.wark = r["wark"] return r["hash"] def upload(req_ses, file, cid, pw): # type: (requests.Session, File, str, any) -> None """upload one specific chunk, `cid` (a chunk-hash)""" headers = { "X-Up2k-Hash": cid, "X-Up2k-Wark": file.wark, "Content-Type": "application/octet-stream", } if pw: headers["Cookie"] = "=".join(["cppwd", pw]) f = FileSlice(file, cid) try: r = req_ses.post(file.url, headers=headers, data=f) if not r: raise Exception(repr(r)) _ = r.content finally: f.f.close() class Daemon(threading.Thread): def __init__(self, *a, **ka): threading.Thread.__init__(self, *a, **ka) self.daemon = True class Ctl(object): """ this will be the coordinator which runs everything in parallel (hashing, handshakes, uploads) but right now it's p dumb """ def __init__(self, ar): self.ar = ar ar.url = ar.url.rstrip("/") + "/" ar.files = [ os.path.abspath(os.path.realpath(x.encode("utf-8"))) for x in ar.files ] eprint("\nscanning {} locations".format(len(ar.files))) nfiles = 0 nbytes = 0 for _, _, inf in walkdirs(ar.files): nfiles += 1 nbytes += inf.st_size eprint("found {} files, {}\n".format(nfiles, humansize(nbytes))) self.nfiles = nfiles self.nbytes = nbytes if ar.td: req_ses.verify = False if ar.te: req_ses.verify = ar.te self.filegen = walkdirs(ar.files) if ar.safe: self.safe() else: self.fancy() def safe(self): """minimal basic slow boring fallback codepath""" search = self.ar.s for nf, (top, rel, inf) in enumerate(self.filegen): file = File(top, rel, inf.st_size, inf.st_mtime) upath = file.abs.decode("utf-8", "replace") print("{} {}\n hash...".format(self.nfiles - nf, upath)) get_hashlist(file, None) while True: print(" hs...") hs = handshake(req_ses, self.ar.url, file, self.ar.a, search) if search: if hs: for hit in hs: print(" found: {}{}".format(self.ar.url, hit["rp"])) else: print(" NOT found") break file.ucids = hs if not hs: break print("{} {}".format(self.nfiles - nf, upath)) ncs = len(hs) for nc, cid in enumerate(hs): print(" {} up {}".format(ncs - nc, cid)) upload(req_ses, file, cid, self.ar.a) print(" ok!") def fancy(self): self.hash_f = 0 self.hash_c = 0 self.hash_b = 0 self.up_f = 0 self.up_c = 0 self.up_b = 0 self.hasher_busy = 1 self.handshaker_busy = 0 self.uploader_busy = 0 self.mutex = threading.Lock() self.q_handshake = Queue() # type: Queue[File] self.q_recheck = Queue() # type: Queue[File] # partial upload exists [...] self.q_upload = Queue() # type: Queue[tuple[File, str]] Daemon(target=self.hasher).start() for _ in range(self.ar.j): Daemon(target=self.handshaker).start() Daemon(target=self.uploader).start() while True: time.sleep(0.1) with self.mutex: if ( self.q_handshake.empty() and self.q_upload.empty() and not self.hasher_busy and not self.handshaker_busy and not self.uploader_busy ): break def cb_hasher(self, file, ofs): eprint(".", end="") def hasher(self): for nf, (top, rel, inf) in enumerate(self.filegen): file = File(top, rel, inf.st_size, inf.st_mtime) upath = file.abs.decode("utf-8", "replace") while True: with self.mutex: if ( self.hash_b - self.up_b < 1024 * 1024 * 128 and self.hash_c - self.up_c < 64 and ( not self.ar.nh or ( self.q_upload.empty() and self.q_handshake.empty() and not self.uploader_busy ) ) ): break time.sleep(0.05) eprint("\n{:6d} hash {}\n".format(self.nfiles - nf, upath), end="") get_hashlist(file, self.cb_hasher) with self.mutex: self.hash_f += 1 self.hash_c += len(file.cids) self.hash_b += file.size self.q_handshake.put(file) self.hasher_busy = 0 def handshaker(self): search = self.ar.s q = self.q_handshake while True: file = q.get() if not file: if q == self.q_handshake: q = self.q_recheck q.put(None) continue self.q_upload.put(None) break with self.mutex: self.handshaker_busy += 1 upath = file.abs.decode("utf-8", "replace") eprint("\n handshake {}\n".format(upath), end="") try: hs = handshake(req_ses, self.ar.url, file, self.ar.a, search) except Exception as ex: if q == self.q_handshake and "
partial upload exists" in str(ex):
                    self.q_recheck.put(file)
                    hs = []
                else:
                    raise

            if search:
                if hs:
                    for hit in hs:
                        eprint("     found: {}{}".format(self.ar.url, hit["rp"]))
                else:
                    eprint("  NOT found {}".format(upath))

                with self.mutex:
                    self.up_f += 1
                    self.up_c += len(file.cids)
                    self.up_b += file.size
                    self.handshaker_busy -= 1

                continue

            with self.mutex:
                if not hs:
                    # all chunks done
                    self.up_f += 1
                    self.up_c += len(file.cids) - file.up_c
                    self.up_b += file.size - file.up_b

                if hs and self.up_c:
                    # some chunks failed
                    self.up_c -= len(hs)
                    file.up_c -= len(hs)
                    for cid in hs:
                        sz = file.kchunks[cid][1]
                        self.up_b -= sz
                        file.up_b -= sz

                file.ucids = hs
                self.handshaker_busy -= 1

            for cid in hs:
                self.q_upload.put([file, cid])

    def uploader(self):
        while True:
            task = self.q_upload.get()
            if not task:
                break

            eprint("*", end="")
            with self.mutex:
                self.uploader_busy += 1

            file, cid = task
            upload(req_ses, file, cid, self.ar.a)

            with self.mutex:
                sz = file.kchunks[cid][1]
                file.ucids = [x for x in file.ucids if x != cid]
                if not file.ucids:
                    self.q_handshake.put(file)

                file.up_b += sz
                self.up_b += sz
                file.up_c += 1
                self.up_c += 1
                self.uploader_busy -= 1


def main():
    time.strptime("19970815", "%Y%m%d")  # python#7980
    if WINDOWS:
        os.system("rem")  # enables colors

    # fmt: off
    ap = app = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    ap.add_argument("url", type=unicode, help="server url, including destination folder")
    ap.add_argument("files", type=unicode, nargs="+", help="files and/or folders to process")
    ap.add_argument("-a", metavar="PASSWORD", help="password")
    ap.add_argument("-s", action="store_true", help="file-search (disables upload)")
    ap = app.add_argument_group("performance tweaks")
    ap.add_argument("-j", type=int, metavar="THREADS", default=4, help="parallel connections")
    ap.add_argument("-nh", action="store_true", help="disable hashing while uploading")
    ap.add_argument("--safe", action="store_true", help="use simple fallback approach")
    ap = app.add_argument_group("tls")
    ap.add_argument("-te", metavar="PEM_FILE", help="certificate to expect/verify")
    ap.add_argument("-td", action="store_true", help="disable certificate check")
    # fmt: on

    Ctl(app.parse_args())


if __name__ == "__main__":
    main()