From 2d6eb63fce7201cd5fa68380f7e346b37a46cd61 Mon Sep 17 00:00:00 2001 From: ed Date: Mon, 12 Aug 2024 19:55:17 +0000 Subject: [PATCH] scripts/uncomment: python 3.12 support; `tokenize.FSTRING_MIDDLE` was introduced, changing the representation of `f"x{{y"` from `STRING(f"x{{y")` to: * `FSTRING_START('f"')` * `FSTRING_MIDDLE('x{')` * `FSTRING_MIDDLE('y')` * `FSTRING_END('"')` each literal `{` (encoded as `{{` in the input) now appears as a single `{` as the final character of its `FSTRING_MIDDLE`, with additional consecutive `FSTRING_MIDDLE` tokens if necessary regular interpolating `{` are encoded as separate `OP` tokens the fact that the literal `{` is encoded as a single `{` instead of `{{` breaks the assumption that the string-value of each token maps directly to the original code fix this by replacing `{` with `{{` and `}` with `}}` in `FSTRING_MIDDLE` tokens, and not adding whitespace after `FSTRING_MIDDLE` tokens --- scripts/uncomment.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/uncomment.py b/scripts/uncomment.py index 273db5ea..11dfcf90 100644 --- a/scripts/uncomment.py +++ b/scripts/uncomment.py @@ -8,6 +8,12 @@ import sys import tokenize +try: + FSTRING_MIDDLE = tokenize.FSTRING_MIDDLE +except: + FSTRING_MIDDLE = -9001 + + def uncomment(fpath): """modified https://stackoverflow.com/a/62074206""" @@ -31,7 +37,7 @@ def uncomment(fpath): if start_line > last_lineno: last_col = 0 - if start_col > last_col: + if start_col > last_col and prev_toktype != FSTRING_MIDDLE: out += " " * (start_col - last_col) is_legalese = ( @@ -48,6 +54,10 @@ def uncomment(fpath): out += token_string else: out += '"a"' + elif token_type == FSTRING_MIDDLE: + out += token_string.replace(r"{", r"{{").replace(r"}", r"}}") + if not code and token_string.strip(): + code = True elif token_type != tokenize.COMMENT: out += token_string if not code and token_string.strip():