mirror of
https://github.com/9001/copyparty.git
synced 2025-08-17 09:02:15 -06:00
703 lines
20 KiB
Python
Executable file
703 lines
20 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
from __future__ import print_function, unicode_literals
|
|
|
|
"""
|
|
up2k.py: upload to copyparty
|
|
2021-09-30, v0.5, ed <irc.rizon.net>, MIT-Licensed
|
|
https://github.com/9001/copyparty/blob/hovudstraum/bin/up2k.py
|
|
|
|
- dependencies: requests
|
|
- supports python 2.7 and 3.3 through 3.10
|
|
|
|
- almost zero error-handling
|
|
- but if something breaks just try again and it'll autoresume
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import stat
|
|
import math
|
|
import time
|
|
import atexit
|
|
import signal
|
|
import base64
|
|
import hashlib
|
|
import argparse
|
|
import platform
|
|
import threading
|
|
import requests
|
|
|
|
|
|
# from copyparty/__init__.py
|
|
PY2 = sys.version_info[0] == 2
|
|
if PY2:
|
|
from Queue import Queue
|
|
|
|
sys.dont_write_bytecode = True
|
|
bytes = str
|
|
else:
|
|
from queue import Queue
|
|
|
|
unicode = str
|
|
|
|
WINDOWS = False
|
|
if platform.system() == "Windows":
|
|
WINDOWS = [int(x) for x in platform.version().split(".")]
|
|
|
|
VT100 = not WINDOWS or WINDOWS >= [10, 0, 14393]
|
|
# introduced in anniversary update
|
|
|
|
|
|
req_ses = requests.Session()
|
|
|
|
|
|
class File(object):
|
|
"""an up2k upload task; represents a single file"""
|
|
|
|
def __init__(self, top, rel, size, lmod):
|
|
self.top = top # type: bytes
|
|
self.rel = rel.replace(b"\\", b"/") # type: bytes
|
|
self.size = size # type: int
|
|
self.lmod = lmod # type: float
|
|
|
|
self.abs = os.path.join(top, rel) # type: bytes
|
|
self.name = self.rel.split(b"/")[-1].decode("utf-8", "replace") # type: str
|
|
|
|
# set by get_hashlist
|
|
self.cids = [] # type: list[tuple[str, int, int]] # [ hash, ofs, sz ]
|
|
self.kchunks = {} # type: dict[str, tuple[int, int]] # hash: [ ofs, sz ]
|
|
|
|
# set by handshake
|
|
self.ucids = [] # type: list[str] # chunks which need to be uploaded
|
|
self.wark = None # type: str
|
|
self.url = None # type: str
|
|
|
|
# set by upload
|
|
self.up_b = 0 # type: int
|
|
self.up_c = 0 # type: int
|
|
|
|
# m = "size({}) lmod({}) top({}) rel({}) abs({}) name({})"
|
|
# eprint(m.format(self.size, self.lmod, self.top, self.rel, self.abs, self.name))
|
|
|
|
|
|
class FileSlice(object):
|
|
"""file-like object providing a fixed window into a file"""
|
|
|
|
def __init__(self, file, cid):
|
|
# type: (File, str) -> FileSlice
|
|
|
|
self.car, self.len = file.kchunks[cid]
|
|
self.cdr = self.car + self.len
|
|
self.ofs = 0 # type: int
|
|
self.f = open(file.abs, "rb", 512 * 1024)
|
|
self.f.seek(self.car)
|
|
|
|
# https://stackoverflow.com/questions/4359495/what-is-exactly-a-file-like-object-in-python
|
|
# IOBase, RawIOBase, BufferedIOBase
|
|
funs = "close closed __enter__ __exit__ __iter__ isatty __next__ readable seekable writable"
|
|
try:
|
|
for fun in funs.split():
|
|
setattr(self, fun, getattr(self.f, fun))
|
|
except:
|
|
pass # py27 probably
|
|
|
|
def tell(self):
|
|
return self.ofs
|
|
|
|
def seek(self, ofs, wh=0):
|
|
if wh == 1:
|
|
ofs = self.ofs + ofs
|
|
elif wh == 2:
|
|
ofs = self.len + ofs # provided ofs is negative
|
|
|
|
if ofs < 0:
|
|
ofs = 0
|
|
elif ofs >= self.len:
|
|
ofs = self.len - 1
|
|
|
|
self.ofs = ofs
|
|
self.f.seek(self.car + ofs)
|
|
|
|
def read(self, sz):
|
|
sz = min(sz, self.len - self.ofs)
|
|
ret = self.f.read(sz)
|
|
self.ofs += len(ret)
|
|
return ret
|
|
|
|
|
|
def eprint(*a, **ka):
|
|
ka["file"] = sys.stderr
|
|
if not PY2:
|
|
ka["flush"] = True
|
|
|
|
print(*a, **ka)
|
|
if PY2:
|
|
sys.stderr.flush()
|
|
|
|
|
|
def termsize():
|
|
import os
|
|
|
|
env = os.environ
|
|
|
|
def ioctl_GWINSZ(fd):
|
|
try:
|
|
import fcntl, termios, struct, os
|
|
|
|
cr = struct.unpack("hh", fcntl.ioctl(fd, termios.TIOCGWINSZ, "1234"))
|
|
except:
|
|
return
|
|
return cr
|
|
|
|
cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
|
|
if not cr:
|
|
try:
|
|
fd = os.open(os.ctermid(), os.O_RDONLY)
|
|
cr = ioctl_GWINSZ(fd)
|
|
os.close(fd)
|
|
except:
|
|
pass
|
|
if not cr:
|
|
try:
|
|
cr = (env["LINES"], env["COLUMNS"])
|
|
except:
|
|
cr = (25, 80)
|
|
return int(cr[1]), int(cr[0])
|
|
|
|
|
|
class CTermsize(object):
|
|
def __init__(self):
|
|
self.ev = False
|
|
self.margin = None
|
|
self.g = None
|
|
self.w, self.h = termsize()
|
|
|
|
try:
|
|
signal.signal(signal.SIGWINCH, self.ev_sig)
|
|
except:
|
|
return
|
|
|
|
thr = threading.Thread(target=self.worker)
|
|
thr.daemon = True
|
|
thr.start()
|
|
|
|
def worker(self):
|
|
while True:
|
|
time.sleep(0.5)
|
|
if not self.ev:
|
|
continue
|
|
|
|
self.ev = False
|
|
self.w, self.h = termsize()
|
|
|
|
if self.margin is not None:
|
|
self.scroll_region(self.margin)
|
|
|
|
def ev_sig(self, *a, **ka):
|
|
self.ev = True
|
|
|
|
def scroll_region(self, margin):
|
|
self.margin = margin
|
|
if margin is None:
|
|
self.g = None
|
|
eprint("\033[s\033[r\033[u")
|
|
else:
|
|
self.g = 1 + self.h - margin
|
|
m = "{}\033[{}A".format("\n" * margin, margin)
|
|
eprint("{}\033[s\033[1;{}r\033[u".format(m, self.g - 1), end="")
|
|
|
|
|
|
ss = CTermsize()
|
|
|
|
|
|
def statdir(top):
|
|
"""non-recursive listing of directory contents, along with stat() info"""
|
|
if hasattr(os, "scandir"):
|
|
with os.scandir(top) as dh:
|
|
for fh in dh:
|
|
yield [os.path.join(top, fh.name), fh.stat()]
|
|
else:
|
|
for name in os.listdir(top):
|
|
abspath = os.path.join(top, name)
|
|
yield [abspath, os.stat(abspath)]
|
|
|
|
|
|
def walkdir(top):
|
|
"""recursive statdir"""
|
|
for ap, inf in statdir(top):
|
|
if stat.S_ISDIR(inf.st_mode):
|
|
for x in walkdir(ap):
|
|
yield x
|
|
else:
|
|
yield ap, inf
|
|
|
|
|
|
def walkdirs(tops):
|
|
"""recursive statdir for a list of tops, yields [top, relpath, stat]"""
|
|
for top in tops:
|
|
if os.path.isdir(top):
|
|
for ap, inf in walkdir(top):
|
|
yield top, ap[len(top) + 1 :], inf
|
|
else:
|
|
sep = "{}".format(os.sep).encode("ascii")
|
|
d, n = top.rsplit(sep, 1)
|
|
yield d, n, os.stat(top)
|
|
|
|
|
|
# from copyparty/util.py
|
|
def humansize(sz, terse=False):
|
|
"""picks a sensible unit for the given extent"""
|
|
for unit in ["B", "KiB", "MiB", "GiB", "TiB"]:
|
|
if sz < 1024:
|
|
break
|
|
|
|
sz /= 1024.0
|
|
|
|
ret = " ".join([str(sz)[:4].rstrip("."), unit])
|
|
|
|
if not terse:
|
|
return ret
|
|
|
|
return ret.replace("iB", "").replace(" ", "")
|
|
|
|
|
|
# from copyparty/up2k.py
|
|
def up2k_chunksize(filesize):
|
|
"""gives The correct chunksize for up2k hashing"""
|
|
chunksize = 1024 * 1024
|
|
stepsize = 512 * 1024
|
|
while True:
|
|
for mul in [1, 2]:
|
|
nchunks = math.ceil(filesize * 1.0 / chunksize)
|
|
if nchunks <= 256 or chunksize >= 32 * 1024 * 1024:
|
|
return chunksize
|
|
|
|
chunksize += stepsize
|
|
stepsize *= mul
|
|
|
|
|
|
# mostly from copyparty/up2k.py
|
|
def get_hashlist(file, pcb):
|
|
# type: (File, any) -> None
|
|
"""generates the up2k hashlist from file contents, inserts it into `file`"""
|
|
|
|
chunk_sz = up2k_chunksize(file.size)
|
|
file_rem = file.size
|
|
file_ofs = 0
|
|
ret = []
|
|
with open(file.abs, "rb", 512 * 1024) as f:
|
|
while file_rem > 0:
|
|
hashobj = hashlib.sha512()
|
|
chunk_sz = chunk_rem = min(chunk_sz, file_rem)
|
|
while chunk_rem > 0:
|
|
buf = f.read(min(chunk_rem, 64 * 1024))
|
|
if not buf:
|
|
raise Exception("EOF at " + str(f.tell()))
|
|
|
|
hashobj.update(buf)
|
|
chunk_rem -= len(buf)
|
|
|
|
digest = hashobj.digest()[:33]
|
|
digest = base64.urlsafe_b64encode(digest).decode("utf-8")
|
|
|
|
ret.append([digest, file_ofs, chunk_sz])
|
|
file_ofs += chunk_sz
|
|
file_rem -= chunk_sz
|
|
|
|
if pcb:
|
|
pcb(file, file_ofs)
|
|
|
|
file.cids = ret
|
|
file.kchunks = {k: [v1, v2] for k, v1, v2 in ret}
|
|
|
|
|
|
def handshake(req_ses, url, file, pw, search):
|
|
# type: (requests.Session, str, File, any, bool) -> List[str]
|
|
"""
|
|
performs a handshake with the server; reply is:
|
|
if search, a list of search results
|
|
otherwise, a list of chunks to upload
|
|
"""
|
|
|
|
req = {
|
|
"hash": [x[0] for x in file.cids],
|
|
"name": file.name,
|
|
"lmod": file.lmod,
|
|
"size": file.size,
|
|
}
|
|
if search:
|
|
req["srch"] = 1
|
|
|
|
headers = {"Content-Type": "text/plain"} # wtf ed
|
|
if pw:
|
|
headers["Cookie"] = "=".join(["cppwd", pw])
|
|
|
|
if file.url:
|
|
url = file.url
|
|
elif b"/" in file.rel:
|
|
url += file.rel.rsplit(b"/", 1)[0].decode("utf-8", "replace")
|
|
|
|
r = req_ses.post(url, headers=headers, json=req)
|
|
try:
|
|
r = r.json()
|
|
except:
|
|
raise Exception(r.text)
|
|
|
|
if search:
|
|
return r["hits"]
|
|
|
|
try:
|
|
pre, url = url.split("://")
|
|
pre += "://"
|
|
except:
|
|
pre = ""
|
|
|
|
file.url = pre + url.split("/")[0] + r["purl"]
|
|
file.name = r["name"]
|
|
file.wark = r["wark"]
|
|
|
|
return r["hash"]
|
|
|
|
|
|
def upload(req_ses, file, cid, pw):
|
|
# type: (requests.Session, File, str, any) -> None
|
|
"""upload one specific chunk, `cid` (a chunk-hash)"""
|
|
|
|
headers = {
|
|
"X-Up2k-Hash": cid,
|
|
"X-Up2k-Wark": file.wark,
|
|
"Content-Type": "application/octet-stream",
|
|
}
|
|
if pw:
|
|
headers["Cookie"] = "=".join(["cppwd", pw])
|
|
|
|
f = FileSlice(file, cid)
|
|
try:
|
|
r = req_ses.post(file.url, headers=headers, data=f)
|
|
if not r:
|
|
raise Exception(repr(r))
|
|
|
|
_ = r.content
|
|
finally:
|
|
f.f.close()
|
|
|
|
|
|
class Daemon(threading.Thread):
|
|
def __init__(self, *a, **ka):
|
|
threading.Thread.__init__(self, *a, **ka)
|
|
self.daemon = True
|
|
|
|
|
|
class Ctl(object):
|
|
"""
|
|
this will be the coordinator which runs everything in parallel
|
|
(hashing, handshakes, uploads) but right now it's p dumb
|
|
"""
|
|
|
|
def __init__(self, ar):
|
|
self.ar = ar
|
|
ar.url = ar.url.rstrip("/") + "/"
|
|
ar.files = [
|
|
os.path.abspath(os.path.realpath(x.encode("utf-8"))) for x in ar.files
|
|
]
|
|
|
|
eprint("\nscanning {} locations".format(len(ar.files)))
|
|
|
|
nfiles = 0
|
|
nbytes = 0
|
|
for _, _, inf in walkdirs(ar.files):
|
|
nfiles += 1
|
|
nbytes += inf.st_size
|
|
|
|
eprint("found {} files, {}\n".format(nfiles, humansize(nbytes)))
|
|
self.nfiles = nfiles
|
|
self.nbytes = nbytes
|
|
|
|
if ar.td:
|
|
req_ses.verify = False
|
|
if ar.te:
|
|
req_ses.verify = ar.te
|
|
|
|
self.filegen = walkdirs(ar.files)
|
|
if ar.safe:
|
|
self.safe()
|
|
else:
|
|
self.fancy()
|
|
|
|
def safe(self):
|
|
"""minimal basic slow boring fallback codepath"""
|
|
search = self.ar.s
|
|
for nf, (top, rel, inf) in enumerate(self.filegen):
|
|
file = File(top, rel, inf.st_size, inf.st_mtime)
|
|
upath = file.abs.decode("utf-8", "replace")
|
|
|
|
print("{} {}\n hash...".format(self.nfiles - nf, upath))
|
|
get_hashlist(file, None)
|
|
|
|
while True:
|
|
print(" hs...")
|
|
hs = handshake(req_ses, self.ar.url, file, self.ar.a, search)
|
|
if search:
|
|
if hs:
|
|
for hit in hs:
|
|
print(" found: {}{}".format(self.ar.url, hit["rp"]))
|
|
else:
|
|
print(" NOT found")
|
|
break
|
|
|
|
file.ucids = hs
|
|
if not hs:
|
|
break
|
|
|
|
print("{} {}".format(self.nfiles - nf, upath))
|
|
ncs = len(hs)
|
|
for nc, cid in enumerate(hs):
|
|
print(" {} up {}".format(ncs - nc, cid))
|
|
upload(req_ses, file, cid, self.ar.a)
|
|
|
|
print(" ok!")
|
|
|
|
def fancy(self):
|
|
self.hash_f = 0
|
|
self.hash_c = 0
|
|
self.hash_b = 0
|
|
self.up_f = 0
|
|
self.up_c = 0
|
|
self.up_b = 0
|
|
self.hasher_busy = 1
|
|
self.handshaker_busy = 0
|
|
self.uploader_busy = 0
|
|
|
|
self.mutex = threading.Lock()
|
|
self.q_handshake = Queue() # type: Queue[File]
|
|
self.q_recheck = Queue() # type: Queue[File] # partial upload exists [...]
|
|
self.q_upload = Queue() # type: Queue[tuple[File, str]]
|
|
|
|
self.cb_hasher = self._cb_hasher_basic
|
|
self.cb_uploader = self._cb_uploader_basic
|
|
self.st_hash = [None, "(idle, starting...)"] # type: tuple[File, int]
|
|
self.st_up = [None, "(idle, starting...)"] # type: tuple[File, int]
|
|
if VT100:
|
|
self.cb_hasher = self._cb_hasher_vt100
|
|
self.cb_uploader = self._cb_uploader_vt100
|
|
atexit.register(self.cleanup_vt100)
|
|
ss.scroll_region(3)
|
|
# eprint("\033[s\033[{}Hhello from g\033[u".format(ss.g))
|
|
|
|
Daemon(target=self.hasher).start()
|
|
for _ in range(self.ar.j):
|
|
Daemon(target=self.handshaker).start()
|
|
Daemon(target=self.uploader).start()
|
|
|
|
while True:
|
|
time.sleep(0.07)
|
|
with self.mutex:
|
|
if (
|
|
self.q_handshake.empty()
|
|
and self.q_upload.empty()
|
|
and not self.hasher_busy
|
|
and not self.handshaker_busy
|
|
and not self.uploader_busy
|
|
):
|
|
break
|
|
|
|
if VT100:
|
|
maxlen = ss.w - len(str(self.nfiles)) - 14
|
|
txt = "\033[s\033[{}H".format(ss.g)
|
|
for y, k, st, f, c, b in [
|
|
[0, "hash", self.st_hash, self.hash_f, self.hash_c, self.hash_b],
|
|
[1, "send", self.st_up, self.up_f, self.up_c, self.up_b],
|
|
]:
|
|
txt += "\033[{}H{}:".format(ss.g + y, k)
|
|
file, arg = st
|
|
if not file:
|
|
txt += " {}\033[K".format(arg)
|
|
else:
|
|
if y:
|
|
p = 100 * file.up_b / file.size
|
|
else:
|
|
p = 100 * arg / file.size
|
|
|
|
name = file.abs.decode("utf-8", "replace")[-maxlen:]
|
|
|
|
m = "{:6.1f}% {} {}\033[K"
|
|
txt += m.format(p, self.nfiles - f, name)
|
|
|
|
eprint(txt + "\033[u", end="")
|
|
|
|
def cleanup_vt100(self):
|
|
ss.scroll_region(None)
|
|
eprint("\033[J", end="")
|
|
|
|
def _cb_hasher_basic(self, file, ofs):
|
|
eprint(".", end="")
|
|
|
|
def _cb_uploader_basic(self, file, cid):
|
|
eprint("*", end="")
|
|
|
|
def _cb_hasher_vt100(self, file, ofs):
|
|
self.st_hash = [file, ofs]
|
|
|
|
def _cb_uploader_vt100(self, file, cid):
|
|
self.st_up = [file, cid]
|
|
|
|
def hasher(self):
|
|
for nf, (top, rel, inf) in enumerate(self.filegen):
|
|
file = File(top, rel, inf.st_size, inf.st_mtime)
|
|
while True:
|
|
with self.mutex:
|
|
if (
|
|
self.hash_b - self.up_b < 1024 * 1024 * 128
|
|
and self.hash_c - self.up_c < 64
|
|
and (
|
|
not self.ar.nh
|
|
or (
|
|
self.q_upload.empty()
|
|
and self.q_handshake.empty()
|
|
and not self.uploader_busy
|
|
)
|
|
)
|
|
):
|
|
break
|
|
|
|
time.sleep(0.05)
|
|
|
|
if not VT100:
|
|
upath = file.abs.decode("utf-8", "replace")
|
|
eprint("\n{:6d} hash {}\n".format(self.nfiles - nf, upath), end="")
|
|
|
|
get_hashlist(file, self.cb_hasher)
|
|
with self.mutex:
|
|
self.hash_f += 1
|
|
self.hash_c += len(file.cids)
|
|
self.hash_b += file.size
|
|
|
|
self.q_handshake.put(file)
|
|
|
|
self.hasher_busy = 0
|
|
self.st_hash = [None, "(finished)"]
|
|
|
|
def handshaker(self):
|
|
search = self.ar.s
|
|
q = self.q_handshake
|
|
while True:
|
|
file = q.get()
|
|
if not file:
|
|
if q == self.q_handshake:
|
|
q = self.q_recheck
|
|
q.put(None)
|
|
continue
|
|
|
|
self.q_upload.put(None)
|
|
break
|
|
|
|
with self.mutex:
|
|
self.handshaker_busy += 1
|
|
|
|
upath = file.abs.decode("utf-8", "replace")
|
|
if not VT100:
|
|
eprint("\n handshake {}\n".format(upath), end="")
|
|
|
|
try:
|
|
hs = handshake(req_ses, self.ar.url, file, self.ar.a, search)
|
|
except Exception as ex:
|
|
if q == self.q_handshake and "<pre>partial upload exists" in str(ex):
|
|
self.q_recheck.put(file)
|
|
hs = []
|
|
else:
|
|
raise
|
|
|
|
if search:
|
|
if hs:
|
|
for hit in hs:
|
|
m = "found: {}\n {}{}\n"
|
|
print(m.format(upath, self.ar.url, hit["rp"]), end="")
|
|
else:
|
|
print("NOT found: {}\n".format(upath), end="")
|
|
|
|
with self.mutex:
|
|
self.up_f += 1
|
|
self.up_c += len(file.cids)
|
|
self.up_b += file.size
|
|
self.handshaker_busy -= 1
|
|
|
|
continue
|
|
|
|
with self.mutex:
|
|
if not hs:
|
|
# all chunks done
|
|
self.up_f += 1
|
|
self.up_c += len(file.cids) - file.up_c
|
|
self.up_b += file.size - file.up_b
|
|
|
|
if hs and file.up_c:
|
|
# some chunks failed
|
|
self.up_c -= len(hs)
|
|
file.up_c -= len(hs)
|
|
for cid in hs:
|
|
sz = file.kchunks[cid][1]
|
|
self.up_b -= sz
|
|
file.up_b -= sz
|
|
|
|
file.ucids = hs
|
|
self.handshaker_busy -= 1
|
|
|
|
if not hs and VT100:
|
|
print("uploaded {}".format(upath))
|
|
for cid in hs:
|
|
self.q_upload.put([file, cid])
|
|
|
|
def uploader(self):
|
|
while True:
|
|
task = self.q_upload.get()
|
|
if not task:
|
|
self.st_up = [None, "(finished)"]
|
|
break
|
|
|
|
with self.mutex:
|
|
self.uploader_busy += 1
|
|
|
|
file, cid = task
|
|
upload(req_ses, file, cid, self.ar.a)
|
|
|
|
with self.mutex:
|
|
sz = file.kchunks[cid][1]
|
|
file.ucids = [x for x in file.ucids if x != cid]
|
|
if not file.ucids:
|
|
self.q_handshake.put(file)
|
|
|
|
file.up_b += sz
|
|
self.up_b += sz
|
|
file.up_c += 1
|
|
self.up_c += 1
|
|
self.uploader_busy -= 1
|
|
|
|
self.cb_uploader(file, cid)
|
|
|
|
|
|
def main():
|
|
time.strptime("19970815", "%Y%m%d") # python#7980
|
|
if WINDOWS:
|
|
os.system("rem") # enables colors
|
|
|
|
# fmt: off
|
|
ap = app = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
ap.add_argument("url", type=unicode, help="server url, including destination folder")
|
|
ap.add_argument("files", type=unicode, nargs="+", help="files and/or folders to process")
|
|
ap.add_argument("-a", metavar="PASSWORD", help="password")
|
|
ap.add_argument("-s", action="store_true", help="file-search (disables upload)")
|
|
ap = app.add_argument_group("performance tweaks")
|
|
ap.add_argument("-j", type=int, metavar="THREADS", default=4, help="parallel connections")
|
|
ap.add_argument("-nh", action="store_true", help="disable hashing while uploading")
|
|
ap.add_argument("--safe", action="store_true", help="use simple fallback approach")
|
|
ap = app.add_argument_group("tls")
|
|
ap.add_argument("-te", metavar="PEM_FILE", help="certificate to expect/verify")
|
|
ap.add_argument("-td", action="store_true", help="disable certificate check")
|
|
# fmt: on
|
|
|
|
Ctl(app.parse_args())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|