diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index 7fc2422dac9c3f..b168c0aa427869 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -27,9 +27,8 @@ from dataclasses import dataclass, field, fields from _colorize import can_colorize, ANSIColors - from . import commands, console, input -from .utils import wlen, unbracket, disp_str +from .utils import wlen, unbracket, disp_str, gen_colors from .trace import trace @@ -38,8 +37,7 @@ from .types import Callback, SimpleContextManager, KeySpec, CommandName -# syntax classes: - +# syntax classes SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3) @@ -144,16 +142,17 @@ class Reader: Instance variables of note include: * buffer: - A *list* (*not* a string at the moment :-) containing all the - characters that have been entered. + A per-character list containing all the characters that have been + entered. Does not include color information. * console: Hopefully encapsulates the OS dependent stuff. * pos: A 0-based index into 'buffer' for where the insertion point is. * screeninfo: - Ahem. This list contains some info needed to move the - insertion point around reasonably efficiently. + A list of screen position tuples. Each list element is a tuple + representing information on visible line length for a given line. + Allows for efficient skipping of color escape sequences. * cxy, lxy: the position of the insertion point in screen ... * syntax_table: @@ -316,6 +315,11 @@ def calc_screen(self) -> list[str]: pos -= offset prompt_from_cache = (offset and self.buffer[offset - 1] != "\n") + + if self.can_colorize: + colors = list(gen_colors(self.get_unicode())) + else: + colors = None lines = "".join(self.buffer[offset:]).split("\n") cursor_found = False lines_beyond_cursor = 0 @@ -343,7 +347,7 @@ def calc_screen(self) -> list[str]: screeninfo.append((0, [])) pos -= line_len + 1 prompt, prompt_len = self.process_prompt(prompt) - chars, char_widths = disp_str(line) + chars, char_widths = disp_str(line, colors, offset) wrapcount = (sum(char_widths) + prompt_len) // self.console.width trace("wrapcount = {wrapcount}", wrapcount=wrapcount) if wrapcount == 0 or not char_widths: @@ -567,6 +571,7 @@ def insert(self, text: str | list[str]) -> None: def update_cursor(self) -> None: """Move the cursor to reflect changes in self.pos""" self.cxy = self.pos2xy() + trace("update_cursor({pos}) = {cxy}", pos=self.pos, cxy=self.cxy) self.console.move_cursor(*self.cxy) def after_command(self, cmd: Command) -> None: diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 7437fbe1ab9371..8b57c05368ec00 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -2,12 +2,56 @@ import unicodedata import functools +from idlelib import colorizer +from typing import cast, Iterator, Literal, Match, NamedTuple, Pattern, Self +from _colorize import ANSIColors + from .types import CharBuffer, CharWidths from .trace import trace ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) +COLORIZE_RE: Pattern[str] = colorizer.prog +IDENTIFIER_RE: Pattern[str] = colorizer.idprog +IDENTIFIERS_AFTER = {"def", "class"} +COLORIZE_GROUP_NAME_MAP: dict[str, str] = colorizer.prog_group_name_to_tag + +type ColorTag = ( + Literal["KEYWORD"] + | Literal["BUILTIN"] + | Literal["COMMENT"] + | Literal["STRING"] + | Literal["DEFINITION"] + | Literal["SYNC"] +) + + +class Span(NamedTuple): + """Span indexing that's inclusive on both ends.""" + + start: int + end: int + + @classmethod + def from_re(cls, m: Match[str], group: int | str) -> Self: + re_span = m.span(group) + return cls(re_span[0], re_span[1] - 1) + + +class ColorSpan(NamedTuple): + span: Span + tag: ColorTag + + +TAG_TO_ANSI: dict[ColorTag, str] = { + "KEYWORD": ANSIColors.BOLD_BLUE, + "BUILTIN": ANSIColors.CYAN, + "COMMENT": ANSIColors.RED, + "STRING": ANSIColors.GREEN, + "DEFINITION": ANSIColors.BOLD_WHITE, + "SYNC": ANSIColors.RESET, +} @functools.cache @@ -41,17 +85,61 @@ def unbracket(s: str, including_content: bool = False) -> str: return s.translate(ZERO_WIDTH_TRANS) -def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]: - r"""Decompose the input buffer into a printable variant. +def gen_colors(buffer: str) -> Iterator[ColorSpan]: + """Returns a list of index spans to color using the given color tag. + + The input `buffer` should be a valid start of a Python code block, i.e. + it cannot be a block starting in the middle of a multiline string. + """ + for match in COLORIZE_RE.finditer(buffer): + yield from gen_color_spans(match) + + +def gen_color_spans(re_match: Match[str]) -> Iterator[ColorSpan]: + """Generate non-empty color spans.""" + for tag, data in re_match.groupdict().items(): + if not data: + continue + span = Span.from_re(re_match, tag) + tag = COLORIZE_GROUP_NAME_MAP.get(tag, tag) + yield ColorSpan(span, cast(ColorTag, tag)) + if data in IDENTIFIERS_AFTER: + if name_match := IDENTIFIER_RE.match(re_match.string, span.end + 1): + span = Span.from_re(name_match, 1) + yield ColorSpan(span, "DEFINITION") + + +def disp_str( + buffer: str, colors: list[ColorSpan] | None = None, start_index: int = 0 +) -> tuple[CharBuffer, CharWidths]: + r"""Decompose the input buffer into a printable variant with applied colors. Returns a tuple of two lists: - - the first list is the input buffer, character by character; + - the first list is the input buffer, character by character, with color + escape codes added (while those codes contain multiple ASCII characters, + each code is considered atomic *and is attached for the corresponding + visible character*); - the second list is the visible width of each character in the input buffer. + Note on colors: + - The `colors` list, if provided, is partially consumed within. We're using + a list and not a generator since we need to hold onto the current + unfinished span between calls to disp_str in case of multiline strings. + - The `colors` list is computed from the start of the input block. `buffer` + is only a subset of that input block, a single line within. This is why + we need `start_index` to inform us which position is the start of `buffer` + actually within user input. This allows us to match color spans correctly. + Examples: >>> utils.disp_str("a = 9") (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1]) + + >>> line = "while 1:" + >>> colors = list(utils.gen_colors(line)) + >>> utils.disp_str(line, colors=colors) + (['\x1b[1;34mw', 'h', 'i', 'l', 'e\x1b[0m', ' ', '1', ':'], [1, 1, 1, 1, 1, 1, 1, 1]) + """ chars: CharBuffer = [] char_widths: CharWidths = [] @@ -59,7 +147,20 @@ def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]: if not buffer: return chars, char_widths - for c in buffer: + while colors and colors[0].span.end < start_index: + # move past irrelevant spans + colors.pop(0) + + pre_color = "" + post_color = "" + if colors and colors[0].span.start < start_index: + # looks like we're continuing a previous color (e.g. a multiline str) + pre_color = TAG_TO_ANSI[colors[0].tag] + + for i, c in enumerate(buffer, start_index): + if colors and colors[0].span.start == i: # new color starts now + pre_color = TAG_TO_ANSI[colors[0].tag] + if c == "\x1a": # CTRL-Z on Windows chars.append(c) char_widths.append(2) @@ -73,5 +174,19 @@ def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]: else: chars.append(c) char_widths.append(str_width(c)) + + if colors and colors[0].span.end == i: # current color ends now + post_color = TAG_TO_ANSI["SYNC"] + colors.pop(0) + + chars[-1] = pre_color + chars[-1] + post_color + pre_color = "" + post_color = "" + + if colors and colors[0].span.start < i and colors[0].span.end > i: + # even though the current color should be continued, reset it for now. + # the next call to `disp_str()` will revive it. + chars[-1] += TAG_TO_ANSI["SYNC"] + trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths) return chars, char_widths diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 109cb603ae88b6..9ad2ffe5d1685c 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -1,14 +1,20 @@ import itertools import functools import rlcompleter +from textwrap import dedent from unittest import TestCase from unittest.mock import MagicMock from .support import handle_all_events, handle_events_narrow_console from .support import ScreenEqualMixin, code_to_events -from .support import prepare_reader, prepare_console +from .support import prepare_reader, prepare_console, reader_force_colors from _pyrepl.console import Event from _pyrepl.reader import Reader +from _pyrepl.utils import TAG_TO_ANSI + + +colors = {k[0].lower(): v for k, v in TAG_TO_ANSI.items() if k != "SYNC"} +colors["z"] = TAG_TO_ANSI["SYNC"] class TestReader(ScreenEqualMixin, TestCase): @@ -123,8 +129,9 @@ def test_setpos_for_xy_simple(self): def test_control_characters(self): code = 'flag = "🏳️‍🌈"' events = code_to_events(code) - reader, _ = handle_all_events(events) + reader, _ = handle_all_events(events, prepare_reader=reader_force_colors) self.assert_screen_equal(reader, 'flag = "🏳️\\u200d🌈"', clean=True) + self.assert_screen_equal(reader, 'flag = {s}"🏳️\\u200d🌈"{z}'.format(**colors)) def test_setpos_from_xy_multiple_lines(self): # fmt: off @@ -355,3 +362,60 @@ def test_setpos_from_xy_for_non_printing_char(self): reader, _ = handle_all_events(events) reader.setpos_from_xy(8, 0) self.assertEqual(reader.pos, 7) + + def test_syntax_highlighting_basic(self): + code = dedent( + """\ + import re, sys + def funct(case: str = sys.platform) -> None: + match = re.search( + "(me)", + ''' + Come on + Come on now + You know that it's time to emerge + ''', + ) + match case: + case "emscripten": print("on the web") + case "ios" | "android": print("on the phone") + case _: print('arms around', match.group(1)) + """ + ) + expected = dedent( + """\ + {k}import{z} re, sys + {a}{k}def{z} {d}funct{z}(case: {b}str{z} = sys.platform) -> {k}None{z}: + match = re.search( + {s}"(me)"{z}, + {s}'''{z} + {s} Come on{z} + {s} Come on now{z} + {s} You know that it's time to emerge{z} + {s} '''{z}, + ) + {k}match{z} case: + {k}case{z} {s}"emscripten"{z}: {b}print{z}({s}"on the web"{z}) + {k}case{z} {s}"ios"{z} | {s}"android"{z}: {b}print{z}({s}"on the phone"{z}) + {k}case{z} {k}_{z}: {b}print{z}({s}'arms around'{z}, match.group(1)) + """ + ) + expected_sync = expected.format(a="", **colors) + events = code_to_events(code) + reader, _ = handle_all_events(events, prepare_reader=reader_force_colors) + self.assert_screen_equal(reader, code, clean=True) + self.assert_screen_equal(reader, expected_sync) + self.assertEqual(reader.pos, 2**7 + 2**8) + self.assertEqual(reader.cxy, (0, 14)) + + async_msg = "{k}async{z} ".format(**colors) + expected_async = expected.format(a=async_msg, **colors) + more_events = itertools.chain( + code_to_events(code), + [Event(evt="key", data="up", raw=bytearray(b"\x1bOA"))] * 13, + code_to_events("async "), + ) + reader, _ = handle_all_events(more_events, prepare_reader=reader_force_colors) + self.assert_screen_equal(reader, expected_async) + self.assertEqual(reader.pos, 21) + self.assertEqual(reader.cxy, (6, 1)) diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py index 07eaccd1124cd6..a62a2e316c3f3e 100644 --- a/Lib/test/test_pyrepl/test_windows_console.py +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -11,7 +11,8 @@ from unittest import TestCase from unittest.mock import MagicMock, call -from .support import handle_all_events, code_to_events +from .support import handle_all_events, code_to_events, reader_no_colors +from .support import prepare_reader as default_prepare_reader try: from _pyrepl.console import Event, Console @@ -46,14 +47,22 @@ def console(self, events, **kwargs) -> Console: setattr(console, key, val) return console - def handle_events(self, events: Iterable[Event], **kwargs): - return handle_all_events(events, partial(self.console, **kwargs)) + def handle_events( + self, + events: Iterable[Event], + prepare_console=None, + prepare_reader=None, + **kwargs, + ): + prepare_console = prepare_console or partial(self.console, **kwargs) + prepare_reader = prepare_reader or default_prepare_reader + return handle_all_events(events, prepare_console, prepare_reader) def handle_events_narrow(self, events): return self.handle_events(events, width=5) - def handle_events_short(self, events): - return self.handle_events(events, height=1) + def handle_events_short(self, events, **kwargs): + return self.handle_events(events, height=1, **kwargs) def handle_events_height_3(self, events): return self.handle_events(events, height=3) @@ -248,7 +257,9 @@ def test_resize_bigger_on_multiline_function(self): # fmt: on events = itertools.chain(code_to_events(code)) - reader, console = self.handle_events_short(events) + reader, console = self.handle_events_short( + events, prepare_reader=reader_no_colors + ) console.height = 2 console.getheightwidth = MagicMock(lambda _: (2, 80)) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-21-19-03-42.gh-issue-131507.q9fvyM.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-21-19-03-42.gh-issue-131507.q9fvyM.rst new file mode 100644 index 00000000000000..354a116c53371b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-21-19-03-42.gh-issue-131507.q9fvyM.rst @@ -0,0 +1 @@ +PyREPL now supports syntax highlighing. Contributed by Łukasz Langa.