Python 3.7 package resources support (#98)

add support for reading webdeps and jinja-templates using either
importlib_resources or pkg_resources, which removes the need for
extracting these to a temporary folder on the filesystem

* util: add helper functions to abstract embedded resource access
* http*: serve embedded resources through resource abstraction
* main: check webdeps through resource abstraction
* httpconn: remove unused method `respath(name)`
* use __package__ to find package resources
* util: use importlib_resources backport if available
* pass E.pkg as module object for importlib_resources compatibility
* util: add pkg_resources compatibility to resource abstraction
This commit is contained in:
Shiz 2024-09-19 11:00:34 +02:00 committed by GitHub
parent 678675a9a6
commit a462a644fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 342 additions and 27 deletions

View file

@ -54,6 +54,7 @@ except:
class EnvParams(object):
def __init__(self) -> None:
self.pkg = None
self.t0 = time.time()
self.mod = ""
self.cfg = ""

View file

@ -57,6 +57,7 @@ from .util import (
ansi_re,
b64enc,
dedent,
has_resource,
min_ex,
pybin,
termsize,
@ -216,6 +217,7 @@ def init_E(EE: EnvParams) -> None:
raise Exception("could not find a writable path for config")
E.pkg = sys.modules[__package__]
E.mod = os.path.dirname(os.path.realpath(__file__))
if E.mod.endswith("__init__"):
E.mod = os.path.dirname(E.mod)
@ -325,8 +327,7 @@ def ensure_locale() -> None:
def ensure_webdeps() -> None:
ap = os.path.join(E.mod, "web/deps/mini-fa.woff")
if os.path.exists(ap):
if has_resource(E, "web/deps/mini-fa.woff"):
return
warn(

View file

@ -68,12 +68,15 @@ from .util import (
get_spd,
guess_mime,
gzip_orig_sz,
gzip_file_orig_sz,
has_resource,
hashcopy,
hidedir,
html_bescape,
html_escape,
humansize,
ipnorm,
load_resource,
loadpy,
log_reloc,
min_ex,
@ -93,6 +96,7 @@ from .util import (
sanitize_vpath,
sendfile_kern,
sendfile_py,
stat_resource,
ub64dec,
ub64enc,
ujoin,
@ -1093,12 +1097,11 @@ class HttpCli(object):
if self.vpath == ".cpr/metrics":
return self.conn.hsrv.metrics.tx(self)
path_base = os.path.join(self.E.mod, "web")
static_path = absreal(os.path.join(path_base, self.vpath[5:]))
static_path = os.path.join("web", self.vpath[5:])
if static_path in self.conn.hsrv.statics:
return self.tx_file(static_path)
return self.tx_res(static_path)
if not static_path.startswith(path_base):
if not undot(static_path).startswith("web"):
t = "malicious user; attempted path traversal [{}] => [{}]"
self.log(t.format(self.vpath, static_path), 1)
self.cbonk(self.conn.hsrv.gmal, self.req, "trav", "path traversal")
@ -3300,6 +3303,129 @@ class HttpCli(object):
return txt
def tx_res(self, req_path: str) -> bool:
status = 200
logmsg = "{:4} {} ".format("", self.req)
logtail = ""
editions = {}
file_ts = 0
if has_resource(self.E, req_path):
st = stat_resource(self.E, req_path)
if st:
file_ts = max(file_ts, st.st_mtime)
editions["plain"] = req_path
if has_resource(self.E, req_path + ".gz"):
st = stat_resource(self.E, req_path + ".gz")
if st:
file_ts = max(file_ts, st.st_mtime)
if not st or st.st_mtime > file_ts:
editions[".gz"] = req_path + ".gz"
if not editions:
return self.tx_404()
#
# if-modified
if file_ts > 0:
file_lastmod, do_send = self._chk_lastmod(int(file_ts))
self.out_headers["Last-Modified"] = file_lastmod
if not do_send:
status = 304
if self.can_write:
self.out_headers["X-Lastmod3"] = str(int(file_ts * 1000))
else:
do_send = True
#
# Accept-Encoding and UA decides which edition to send
decompress = False
supported_editions = [
x.strip()
for x in self.headers.get("accept-encoding", "").lower().split(",")
]
if ".gz" in editions:
is_compressed = True
selected_edition = ".gz"
if "gzip" not in supported_editions:
decompress = True
else:
if re.match(r"MSIE [4-6]\.", self.ua) and " SV1" not in self.ua:
decompress = True
if not decompress:
self.out_headers["Content-Encoding"] = "gzip"
else:
is_compressed = False
selected_edition = "plain"
res_path = editions[selected_edition]
logmsg += "{} ".format(selected_edition.lstrip("."))
res = load_resource(self.E, res_path)
if decompress:
file_sz = gzip_file_orig_sz(res)
res = gzip.open(res)
else:
res.seek(0, os.SEEK_END)
file_sz = res.tell()
res.seek(0, os.SEEK_SET)
#
# send reply
if is_compressed:
self.out_headers["Cache-Control"] = "max-age=604869"
else:
self.permit_caching()
if "txt" in self.uparam:
mime = "text/plain; charset={}".format(self.uparam["txt"] or "utf-8")
elif "mime" in self.uparam:
mime = str(self.uparam.get("mime"))
else:
mime = guess_mime(req_path)
logmsg += unicode(status) + logtail
if self.mode == "HEAD" or not do_send:
res.close()
if self.do_log:
self.log(logmsg)
self.send_headers(length=file_sz, status=status, mime=mime)
return True
ret = True
self.send_headers(length=file_sz, status=status, mime=mime)
remains = sendfile_py(
self.log,
0,
file_sz,
res,
self.s,
self.args.s_wr_sz,
self.args.s_wr_slp,
not self.args.no_poll,
)
if remains > 0:
logmsg += " \033[31m" + unicode(file_sz - remains) + "\033[0m"
ret = False
spd = self._spd(file_sz - remains)
if self.do_log:
self.log("{}, {}".format(logmsg, spd))
return ret
def tx_file(self, req_path: str, ptop: Optional[str] = None) -> bool:
status = 200
logmsg = "{:4} {} ".format("", self.req)
@ -3815,15 +3941,11 @@ class HttpCli(object):
return self.tx_404(True)
tpl = "mde" if "edit2" in self.uparam else "md"
html_path = os.path.join(self.E.mod, "web", "{}.html".format(tpl))
template = self.j2j(tpl)
st = bos.stat(fs_path)
ts_md = st.st_mtime
st = bos.stat(html_path)
ts_html = st.st_mtime
max_sz = 1024 * self.args.txt_max
sz_md = 0
lead = b""
@ -3857,7 +3979,7 @@ class HttpCli(object):
fullfile = html_bescape(fullfile)
sz_md = len(lead) + len(fullfile)
file_ts = int(max(ts_md, ts_html, self.E.t0))
file_ts = int(max(ts_md, self.E.t0))
file_lastmod, do_send = self._chk_lastmod(file_ts)
self.out_headers["Last-Modified"] = file_lastmod
self.out_headers.update(NO_CACHE)
@ -3896,7 +4018,7 @@ class HttpCli(object):
zs = template.render(**targs).encode("utf-8", "replace")
html = zs.split(boundary.encode("utf-8"))
if len(html) != 2:
raise Exception("boundary appears in " + html_path)
raise Exception("boundary appears in " + tpl)
self.send_headers(sz_md + len(html[0]) + len(html[1]), status)

View file

@ -103,9 +103,6 @@ class HttpConn(object):
self.log_src = ("%s \033[%dm%d" % (ip, color, self.addr[1])).ljust(26)
return self.log_src
def respath(self, res_name: str) -> str:
return os.path.join(self.E.mod, "web", res_name)
def log(self, msg: str, c: Union[int, str] = 0) -> None:
self.log_func(self.log_src, msg, c)

View file

@ -68,13 +68,17 @@ from .util import (
NetMap,
absreal,
build_netmap,
has_resource,
ipnorm,
load_resource,
min_ex,
shut_socket,
spack,
start_log_thrs,
start_stackmon,
stat_resource,
ub64enc,
walk_resources,
)
if TYPE_CHECKING:
@ -91,6 +95,10 @@ if not hasattr(socket, "AF_UNIX"):
setattr(socket, "AF_UNIX", -9001)
def load_jinja2_resource(E: EnvParams, name: str):
return load_resource(E, os.path.join("web", name), "r").read()
class HttpSrv(object):
"""
handles incoming connections using HttpConn to process http,
@ -153,7 +161,7 @@ class HttpSrv(object):
self.u2idx_n = 0
env = jinja2.Environment()
env.loader = jinja2.FileSystemLoader(os.path.join(self.E.mod, "web"))
env.loader = jinja2.FunctionLoader(lambda f: load_jinja2_resource(self.E, f))
jn = [
"splash",
"shares",
@ -166,8 +174,7 @@ class HttpSrv(object):
"cf",
]
self.j2 = {x: env.get_template(x + ".html") for x in jn}
zs = os.path.join(self.E.mod, "web", "deps", "prism.js.gz")
self.prism = os.path.exists(zs)
self.prism = has_resource(self.E, os.path.join("web", "deps", "prism.js.gz"))
self.ipa_nm = build_netmap(self.args.ipa)
self.xff_nm = build_netmap(self.args.xff_src)
@ -210,9 +217,9 @@ class HttpSrv(object):
pass
def _build_statics(self) -> None:
for dp, _, df in os.walk(os.path.join(self.E.mod, "web")):
for dp, _, df in walk_resources(self.E, "web"):
for fn in df:
ap = absreal(os.path.join(dp, fn))
ap = os.path.join(dp, fn)
self.statics.add(ap)
if ap.endswith(".gz"):
self.statics.add(ap[:-3])
@ -536,10 +543,20 @@ class HttpSrv(object):
v = self.E.t0
try:
with os.scandir(os.path.join(self.E.mod, "web")) as dh:
for fh in dh:
inf = fh.stat()
for (base, dirs, files) in walk_resources(self.E, "web"):
inf = stat_resource(self.E, base)
if inf:
v = max(v, inf.st_mtime)
for d in dirs:
inf = stat_resource(self.E, os.path.join(base, d))
if inf:
v = max(v, inf.st_mtime)
for f in files:
inf = stat_resource(self.E, os.path.join(base, e))
if inf:
v = max(v, inf.st_mtime)
# only do top-level
break
except:
pass

View file

@ -7,6 +7,7 @@ import binascii
import errno
import hashlib
import hmac
import io
import json
import logging
import math
@ -30,7 +31,17 @@ from collections import Counter
from ipaddress import IPv4Address, IPv4Network, IPv6Address, IPv6Network
from queue import Queue
from .__init__ import ANYWIN, EXE, MACOS, PY2, PY36, TYPE_CHECKING, VT100, WINDOWS
from .__init__ import (
ANYWIN,
EXE,
MACOS,
PY2,
PY36,
TYPE_CHECKING,
VT100,
WINDOWS,
EnvParams,
)
from .__version__ import S_BUILD_DT, S_VERSION
from .stolen import surrogateescape
@ -3407,9 +3418,14 @@ def loadpy(ap: str, hot: bool) -> Any:
def gzip_orig_sz(fn: str) -> int:
with open(fsenc(fn), "rb") as f:
f.seek(-4, 2)
rv = f.read(4)
return sunpack(b"I", rv)[0] # type: ignore
return gzip_file_orig_sz(f)
def gzip_file_orig_sz(f) -> int:
start = f.tell()
f.seek(-4, 2)
rv = f.read(4)
f.seek(start, 0)
return sunpack(b"I", rv)[0] # type: ignore
def align_tab(lines: list[str]) -> list[str]:
@ -3545,6 +3561,167 @@ def hidedir(dp) -> None:
pass
try:
import importlib.resources as impresources
except ImportError:
try:
import importlib_resources as impresources
except ImportError:
impresources = None
try:
import pkg_resources
except ImportError:
pkg_resources = None
def _pkg_resource_exists(pkg: str, name: str) -> bool:
if not pkg_resources:
return False
try:
return pkg_resources.resource_exists(pkg, name)
except NotImplementedError:
return False
def stat_resource(E: EnvParams, name: str):
path = os.path.join(E.mod, name)
if os.path.exists(path):
return os.stat(fsenc(path))
return None
def has_resource(E: EnvParams, name: str):
if impresources:
try:
resources = impresources.files(E.pkg)
except ImportError:
pass
else:
res = resources.joinpath(name)
if res.is_file() or res.is_dir():
return True
if pkg_resources:
if _pkg_resource_exists(E.pkg.__name__, name):
return True
return os.path.exists(os.path.join(E.mod, name))
def load_resource(E: EnvParams, name: str, mode="rb"):
if impresources:
try:
resources = impresources.files(E.pkg)
except ImportError:
pass
else:
res = resources.joinpath(name)
if res.is_file():
return res.open(mode)
if pkg_resources:
if _pkg_resource_exists(E.pkg.__name__, name) and not pkg_resources.resource_isdir(E.pkg.__name__, name):
stream = pkg_resources.resource_stream(E.pkg.__name__, name)
if 'b' not in mode:
stream = io.TextIOWrapper(stream)
return stream
return open(os.path.join(E.mod, name), mode)
def walk_resources(E: EnvParams, name: str):
def walk_idirs(base, r):
queue = [(base, r)]
while queue:
(b, r) = queue.pop(0)
d = []
f = []
for e in r.iterdir():
if e.is_dir():
d.append(e.name)
queue.append((os.path.join(b, e.name), e))
elif e.is_file():
f.append(e.name)
yield (b, d, f)
def walk_pdirs(base):
queue = [base]
while queue:
b = queue.pop(0)
d = []
f = []
for e in pkg_resources.resource_listdir(E.pkg.__name__, b):
if pkg_resources.resource_isdir(E.pkg.__name__, e):
d.append(e)
queue.append(os.path.join(b, e))
else:
f.append(e)
yield (b, d, f)
if impresources:
try:
iresources = impresources.files(E.pkg)
except ImportError:
iresources = None
else:
iresources = None
base_path = os.path.join(E.mod, name)
def walk_single(base, dirs, files, normalize_base=False, skip_ires=False, skip_pres=False):
if normalize_base:
if base != base_path:
relbase = os.path.relpath(base, base_path)
else:
relbase = name
else:
relbase = base
ires_dirs = []
if not skip_ires and iresources:
iresbase = iresources.joinpath(relbase)
if iresbase.is_dir():
for ientry in iresbase.iterdir():
if ientry.is_dir() and ientry.name not in dirs:
dirs.append(ientry.name)
ires_dirs.append(ientry.name)
elif ientry.is_file() and ientry.name not in files:
files.append(ientry.name)
pres_dirs = []
if not skip_pres and _pkg_resource_exists(E.pkg.__name__, relbase) and pkg_resources.resource_isdir(E.pkg.__name__, relbase):
for pentry in pkg_resources.resource_listdir(E.pkg.__name__, relbase):
ppath = os.path.join(relbase, pentry)
if pkg_resources.resource_isdir(E.pkg.__name__, ppath):
if pentry not in dirs:
dirs.append(pentry)
pres_dirs.append(pentry)
else:
if pentry not in files:
files.append(pentry)
yield (base, dirs + ires_dirs + pres_dirs, files)
for d in ires_dirs:
for (ibase, idirs, ifiles) in walk_idirs(os.path.join(relbase, d), iresources.joinpath(relbase, d)):
yield from walk_single(ibase, idirs, ifiles, normalize_base=False, skip_ires=True, skip_pres=skip_pres)
for d in pres_dirs:
for (pbase, pdirs, pfiles) in walk_pdirs(os.path.join(relbase, d)):
yield (pbase, pdirs, pfiles)
normalize_base = False
skip_ires = skip_pres = False
if os.path.isdir(base_path):
walker = os.walk(base_path)
normalize_base = True
elif iresources and iresources.joinpath(name).is_dir():
walker = walk_idirs(name, iresources.joinpath(name))
skip_ires = True
elif pkg_resources and _pkg_resource_exists(E.pkg.__name__, name) and pkg_resources.resource_isdir(E.pkg.__name__, name):
walker = walk_pdirs(name)
skip_pres = True
for (base, dirs, files) in walker:
yield from walk_single(base, dirs, files, normalize_base=normalize_base, skip_ires=skip_ires, skip_pres=skip_pres)
class Pebkac(Exception):
def __init__(
self, code: int, msg: Optional[str] = None, log: Optional[str] = None