diff --git a/copyparty/__init__.py b/copyparty/__init__.py index 641414e7..28dbc8ba 100644 --- a/copyparty/__init__.py +++ b/copyparty/__init__.py @@ -54,6 +54,7 @@ except: class EnvParams(object): def __init__(self) -> None: + self.pkg = None self.t0 = time.time() self.mod = "" self.cfg = "" diff --git a/copyparty/__main__.py b/copyparty/__main__.py index c4f2ef75..56d6f443 100644 --- a/copyparty/__main__.py +++ b/copyparty/__main__.py @@ -57,6 +57,7 @@ from .util import ( ansi_re, b64enc, dedent, + has_resource, min_ex, pybin, termsize, @@ -216,6 +217,7 @@ def init_E(EE: EnvParams) -> None: raise Exception("could not find a writable path for config") + E.pkg = sys.modules[__package__] E.mod = os.path.dirname(os.path.realpath(__file__)) if E.mod.endswith("__init__"): E.mod = os.path.dirname(E.mod) @@ -325,8 +327,7 @@ def ensure_locale() -> None: def ensure_webdeps() -> None: - ap = os.path.join(E.mod, "web/deps/mini-fa.woff") - if os.path.exists(ap): + if has_resource(E, "web/deps/mini-fa.woff"): return warn( diff --git a/copyparty/httpcli.py b/copyparty/httpcli.py index 53a6e34b..7abd37b5 100644 --- a/copyparty/httpcli.py +++ b/copyparty/httpcli.py @@ -68,12 +68,15 @@ from .util import ( get_spd, guess_mime, gzip_orig_sz, + gzip_file_orig_sz, + has_resource, hashcopy, hidedir, html_bescape, html_escape, humansize, ipnorm, + load_resource, loadpy, log_reloc, min_ex, @@ -93,6 +96,7 @@ from .util import ( sanitize_vpath, sendfile_kern, sendfile_py, + stat_resource, ub64dec, ub64enc, ujoin, @@ -1093,12 +1097,11 @@ class HttpCli(object): if self.vpath == ".cpr/metrics": return self.conn.hsrv.metrics.tx(self) - path_base = os.path.join(self.E.mod, "web") - static_path = absreal(os.path.join(path_base, self.vpath[5:])) + static_path = os.path.join("web", self.vpath[5:]) if static_path in self.conn.hsrv.statics: - return self.tx_file(static_path) + return self.tx_res(static_path) - if not static_path.startswith(path_base): + if not undot(static_path).startswith("web"): t = "malicious user; attempted path traversal [{}] => [{}]" self.log(t.format(self.vpath, static_path), 1) self.cbonk(self.conn.hsrv.gmal, self.req, "trav", "path traversal") @@ -3300,6 +3303,129 @@ class HttpCli(object): return txt + def tx_res(self, req_path: str) -> bool: + status = 200 + logmsg = "{:4} {} ".format("", self.req) + logtail = "" + + editions = {} + file_ts = 0 + + if has_resource(self.E, req_path): + st = stat_resource(self.E, req_path) + if st: + file_ts = max(file_ts, st.st_mtime) + editions["plain"] = req_path + + if has_resource(self.E, req_path + ".gz"): + st = stat_resource(self.E, req_path + ".gz") + if st: + file_ts = max(file_ts, st.st_mtime) + if not st or st.st_mtime > file_ts: + editions[".gz"] = req_path + ".gz" + + if not editions: + return self.tx_404() + + # + # if-modified + + if file_ts > 0: + file_lastmod, do_send = self._chk_lastmod(int(file_ts)) + self.out_headers["Last-Modified"] = file_lastmod + if not do_send: + status = 304 + + if self.can_write: + self.out_headers["X-Lastmod3"] = str(int(file_ts * 1000)) + else: + do_send = True + + # + # Accept-Encoding and UA decides which edition to send + + decompress = False + supported_editions = [ + x.strip() + for x in self.headers.get("accept-encoding", "").lower().split(",") + ] + if ".gz" in editions: + is_compressed = True + selected_edition = ".gz" + + if "gzip" not in supported_editions: + decompress = True + else: + if re.match(r"MSIE [4-6]\.", self.ua) and " SV1" not in self.ua: + decompress = True + + if not decompress: + self.out_headers["Content-Encoding"] = "gzip" + else: + is_compressed = False + selected_edition = "plain" + + res_path = editions[selected_edition] + logmsg += "{} ".format(selected_edition.lstrip(".")) + + res = load_resource(self.E, res_path) + + if decompress: + file_sz = gzip_file_orig_sz(res) + res = gzip.open(res) + else: + res.seek(0, os.SEEK_END) + file_sz = res.tell() + res.seek(0, os.SEEK_SET) + + # + # send reply + + if is_compressed: + self.out_headers["Cache-Control"] = "max-age=604869" + else: + self.permit_caching() + + if "txt" in self.uparam: + mime = "text/plain; charset={}".format(self.uparam["txt"] or "utf-8") + elif "mime" in self.uparam: + mime = str(self.uparam.get("mime")) + else: + mime = guess_mime(req_path) + + logmsg += unicode(status) + logtail + + if self.mode == "HEAD" or not do_send: + res.close() + if self.do_log: + self.log(logmsg) + + self.send_headers(length=file_sz, status=status, mime=mime) + return True + + ret = True + self.send_headers(length=file_sz, status=status, mime=mime) + remains = sendfile_py( + self.log, + 0, + file_sz, + res, + self.s, + self.args.s_wr_sz, + self.args.s_wr_slp, + not self.args.no_poll, + ) + + if remains > 0: + logmsg += " \033[31m" + unicode(file_sz - remains) + "\033[0m" + ret = False + + spd = self._spd(file_sz - remains) + if self.do_log: + self.log("{}, {}".format(logmsg, spd)) + + return ret + def tx_file(self, req_path: str, ptop: Optional[str] = None) -> bool: status = 200 logmsg = "{:4} {} ".format("", self.req) @@ -3815,15 +3941,11 @@ class HttpCli(object): return self.tx_404(True) tpl = "mde" if "edit2" in self.uparam else "md" - html_path = os.path.join(self.E.mod, "web", "{}.html".format(tpl)) template = self.j2j(tpl) st = bos.stat(fs_path) ts_md = st.st_mtime - st = bos.stat(html_path) - ts_html = st.st_mtime - max_sz = 1024 * self.args.txt_max sz_md = 0 lead = b"" @@ -3857,7 +3979,7 @@ class HttpCli(object): fullfile = html_bescape(fullfile) sz_md = len(lead) + len(fullfile) - file_ts = int(max(ts_md, ts_html, self.E.t0)) + file_ts = int(max(ts_md, self.E.t0)) file_lastmod, do_send = self._chk_lastmod(file_ts) self.out_headers["Last-Modified"] = file_lastmod self.out_headers.update(NO_CACHE) @@ -3896,7 +4018,7 @@ class HttpCli(object): zs = template.render(**targs).encode("utf-8", "replace") html = zs.split(boundary.encode("utf-8")) if len(html) != 2: - raise Exception("boundary appears in " + html_path) + raise Exception("boundary appears in " + tpl) self.send_headers(sz_md + len(html[0]) + len(html[1]), status) diff --git a/copyparty/httpconn.py b/copyparty/httpconn.py index a906f582..09366742 100644 --- a/copyparty/httpconn.py +++ b/copyparty/httpconn.py @@ -103,9 +103,6 @@ class HttpConn(object): self.log_src = ("%s \033[%dm%d" % (ip, color, self.addr[1])).ljust(26) return self.log_src - def respath(self, res_name: str) -> str: - return os.path.join(self.E.mod, "web", res_name) - def log(self, msg: str, c: Union[int, str] = 0) -> None: self.log_func(self.log_src, msg, c) diff --git a/copyparty/httpsrv.py b/copyparty/httpsrv.py index 8271b12f..0e5710f6 100644 --- a/copyparty/httpsrv.py +++ b/copyparty/httpsrv.py @@ -68,13 +68,17 @@ from .util import ( NetMap, absreal, build_netmap, + has_resource, ipnorm, + load_resource, min_ex, shut_socket, spack, start_log_thrs, start_stackmon, + stat_resource, ub64enc, + walk_resources, ) if TYPE_CHECKING: @@ -91,6 +95,10 @@ if not hasattr(socket, "AF_UNIX"): setattr(socket, "AF_UNIX", -9001) +def load_jinja2_resource(E: EnvParams, name: str): + return load_resource(E, os.path.join("web", name), "r").read() + + class HttpSrv(object): """ handles incoming connections using HttpConn to process http, @@ -153,7 +161,7 @@ class HttpSrv(object): self.u2idx_n = 0 env = jinja2.Environment() - env.loader = jinja2.FileSystemLoader(os.path.join(self.E.mod, "web")) + env.loader = jinja2.FunctionLoader(lambda f: load_jinja2_resource(self.E, f)) jn = [ "splash", "shares", @@ -166,8 +174,7 @@ class HttpSrv(object): "cf", ] self.j2 = {x: env.get_template(x + ".html") for x in jn} - zs = os.path.join(self.E.mod, "web", "deps", "prism.js.gz") - self.prism = os.path.exists(zs) + self.prism = has_resource(self.E, os.path.join("web", "deps", "prism.js.gz")) self.ipa_nm = build_netmap(self.args.ipa) self.xff_nm = build_netmap(self.args.xff_src) @@ -210,9 +217,9 @@ class HttpSrv(object): pass def _build_statics(self) -> None: - for dp, _, df in os.walk(os.path.join(self.E.mod, "web")): + for dp, _, df in walk_resources(self.E, "web"): for fn in df: - ap = absreal(os.path.join(dp, fn)) + ap = os.path.join(dp, fn) self.statics.add(ap) if ap.endswith(".gz"): self.statics.add(ap[:-3]) @@ -536,10 +543,20 @@ class HttpSrv(object): v = self.E.t0 try: - with os.scandir(os.path.join(self.E.mod, "web")) as dh: - for fh in dh: - inf = fh.stat() + for (base, dirs, files) in walk_resources(self.E, "web"): + inf = stat_resource(self.E, base) + if inf: v = max(v, inf.st_mtime) + for d in dirs: + inf = stat_resource(self.E, os.path.join(base, d)) + if inf: + v = max(v, inf.st_mtime) + for f in files: + inf = stat_resource(self.E, os.path.join(base, e)) + if inf: + v = max(v, inf.st_mtime) + # only do top-level + break except: pass diff --git a/copyparty/util.py b/copyparty/util.py index bebcc57b..95e904f5 100644 --- a/copyparty/util.py +++ b/copyparty/util.py @@ -7,6 +7,7 @@ import binascii import errno import hashlib import hmac +import io import json import logging import math @@ -30,7 +31,17 @@ from collections import Counter from ipaddress import IPv4Address, IPv4Network, IPv6Address, IPv6Network from queue import Queue -from .__init__ import ANYWIN, EXE, MACOS, PY2, PY36, TYPE_CHECKING, VT100, WINDOWS +from .__init__ import ( + ANYWIN, + EXE, + MACOS, + PY2, + PY36, + TYPE_CHECKING, + VT100, + WINDOWS, + EnvParams, +) from .__version__ import S_BUILD_DT, S_VERSION from .stolen import surrogateescape @@ -3407,9 +3418,14 @@ def loadpy(ap: str, hot: bool) -> Any: def gzip_orig_sz(fn: str) -> int: with open(fsenc(fn), "rb") as f: - f.seek(-4, 2) - rv = f.read(4) - return sunpack(b"I", rv)[0] # type: ignore + return gzip_file_orig_sz(f) + +def gzip_file_orig_sz(f) -> int: + start = f.tell() + f.seek(-4, 2) + rv = f.read(4) + f.seek(start, 0) + return sunpack(b"I", rv)[0] # type: ignore def align_tab(lines: list[str]) -> list[str]: @@ -3545,6 +3561,167 @@ def hidedir(dp) -> None: pass +try: + import importlib.resources as impresources +except ImportError: + try: + import importlib_resources as impresources + except ImportError: + impresources = None +try: + import pkg_resources +except ImportError: + pkg_resources = None + + +def _pkg_resource_exists(pkg: str, name: str) -> bool: + if not pkg_resources: + return False + try: + return pkg_resources.resource_exists(pkg, name) + except NotImplementedError: + return False + +def stat_resource(E: EnvParams, name: str): + path = os.path.join(E.mod, name) + if os.path.exists(path): + return os.stat(fsenc(path)) + return None + +def has_resource(E: EnvParams, name: str): + if impresources: + try: + resources = impresources.files(E.pkg) + except ImportError: + pass + else: + res = resources.joinpath(name) + if res.is_file() or res.is_dir(): + return True + + if pkg_resources: + if _pkg_resource_exists(E.pkg.__name__, name): + return True + + return os.path.exists(os.path.join(E.mod, name)) + + +def load_resource(E: EnvParams, name: str, mode="rb"): + if impresources: + try: + resources = impresources.files(E.pkg) + except ImportError: + pass + else: + res = resources.joinpath(name) + if res.is_file(): + return res.open(mode) + + if pkg_resources: + if _pkg_resource_exists(E.pkg.__name__, name) and not pkg_resources.resource_isdir(E.pkg.__name__, name): + stream = pkg_resources.resource_stream(E.pkg.__name__, name) + if 'b' not in mode: + stream = io.TextIOWrapper(stream) + return stream + + return open(os.path.join(E.mod, name), mode) + + +def walk_resources(E: EnvParams, name: str): + def walk_idirs(base, r): + queue = [(base, r)] + while queue: + (b, r) = queue.pop(0) + d = [] + f = [] + for e in r.iterdir(): + if e.is_dir(): + d.append(e.name) + queue.append((os.path.join(b, e.name), e)) + elif e.is_file(): + f.append(e.name) + yield (b, d, f) + + def walk_pdirs(base): + queue = [base] + while queue: + b = queue.pop(0) + d = [] + f = [] + for e in pkg_resources.resource_listdir(E.pkg.__name__, b): + if pkg_resources.resource_isdir(E.pkg.__name__, e): + d.append(e) + queue.append(os.path.join(b, e)) + else: + f.append(e) + yield (b, d, f) + + if impresources: + try: + iresources = impresources.files(E.pkg) + except ImportError: + iresources = None + else: + iresources = None + + base_path = os.path.join(E.mod, name) + + def walk_single(base, dirs, files, normalize_base=False, skip_ires=False, skip_pres=False): + if normalize_base: + if base != base_path: + relbase = os.path.relpath(base, base_path) + else: + relbase = name + else: + relbase = base + + ires_dirs = [] + if not skip_ires and iresources: + iresbase = iresources.joinpath(relbase) + if iresbase.is_dir(): + for ientry in iresbase.iterdir(): + if ientry.is_dir() and ientry.name not in dirs: + dirs.append(ientry.name) + ires_dirs.append(ientry.name) + elif ientry.is_file() and ientry.name not in files: + files.append(ientry.name) + + pres_dirs = [] + if not skip_pres and _pkg_resource_exists(E.pkg.__name__, relbase) and pkg_resources.resource_isdir(E.pkg.__name__, relbase): + for pentry in pkg_resources.resource_listdir(E.pkg.__name__, relbase): + ppath = os.path.join(relbase, pentry) + if pkg_resources.resource_isdir(E.pkg.__name__, ppath): + if pentry not in dirs: + dirs.append(pentry) + pres_dirs.append(pentry) + else: + if pentry not in files: + files.append(pentry) + + yield (base, dirs + ires_dirs + pres_dirs, files) + for d in ires_dirs: + for (ibase, idirs, ifiles) in walk_idirs(os.path.join(relbase, d), iresources.joinpath(relbase, d)): + yield from walk_single(ibase, idirs, ifiles, normalize_base=False, skip_ires=True, skip_pres=skip_pres) + for d in pres_dirs: + for (pbase, pdirs, pfiles) in walk_pdirs(os.path.join(relbase, d)): + yield (pbase, pdirs, pfiles) + + normalize_base = False + skip_ires = skip_pres = False + if os.path.isdir(base_path): + walker = os.walk(base_path) + normalize_base = True + elif iresources and iresources.joinpath(name).is_dir(): + walker = walk_idirs(name, iresources.joinpath(name)) + skip_ires = True + elif pkg_resources and _pkg_resource_exists(E.pkg.__name__, name) and pkg_resources.resource_isdir(E.pkg.__name__, name): + walker = walk_pdirs(name) + skip_pres = True + + for (base, dirs, files) in walker: + yield from walk_single(base, dirs, files, normalize_base=normalize_base, skip_ires=skip_ires, skip_pres=skip_pres) + + class Pebkac(Exception): def __init__( self, code: int, msg: Optional[str] = None, log: Optional[str] = None