Skip to content

gh-131878: Fix input of unicode characters with two or more code points in new pyrepl on Windows #131901

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
12 changes: 4 additions & 8 deletions Lib/_pyrepl/base_eventqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,14 @@ def insert(self, event: Event) -> None:
trace('added event {event}', event=event)
self.events.append(event)

def push(self, char: int | bytes | str) -> None:
def push(self, char: int | bytes) -> None:
"""
Processes a character by updating the buffer and handling special key mappings.
"""
assert isinstance(char, (int, bytes))
ord_char = char if isinstance(char, int) else ord(char)
if ord_char > 255:
assert isinstance(char, str)
char = bytes(char.encode(self.encoding, "replace"))
self.buf.extend(char)
else:
char = bytes(bytearray((ord_char,)))
self.buf.append(ord_char)
char = bytes((ord_char,))
self.buf.append(ord_char)

if char in self.keymap:
if self.keymap is self.compiled_keymap:
Expand Down
3 changes: 2 additions & 1 deletion Lib/_pyrepl/windows_console.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,8 @@ def get_event(self, block: bool = True) -> Event | None:
return None
elif self.__vt_support:
# If virtual terminal is enabled, scanning VT sequences
self.event_queue.push(rec.Event.KeyEvent.uChar.UnicodeChar)
for char in raw_key.encode(self.event_queue.encoding, "replace"):
self.event_queue.push(char)
continue

if key_event.dwControlKeyState & ALT_ACTIVE:
Expand Down
78 changes: 60 additions & 18 deletions Lib/test/test_pyrepl/test_eventqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_push_with_key_in_keymap(self, mock_keymap):
mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue()
eq.keymap = {b"a": "b"}
eq.push("a")
eq.push(b"a")
mock_keymap.compile_keymap.assert_called()
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "b")
Expand All @@ -63,7 +63,7 @@ def test_push_without_key_in_keymap(self, mock_keymap):
mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue()
eq.keymap = {b"c": "d"}
eq.push("a")
eq.push(b"a")
mock_keymap.compile_keymap.assert_called()
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "a")
Expand All @@ -73,13 +73,13 @@ def test_push_with_keymap_in_keymap(self, mock_keymap):
mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue()
eq.keymap = {b"a": {b"b": "c"}}
eq.push("a")
eq.push(b"a")
mock_keymap.compile_keymap.assert_called()
self.assertTrue(eq.empty())
eq.push("b")
eq.push(b"b")
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "c")
eq.push("d")
eq.push(b"d")
self.assertEqual(eq.events[1].evt, "key")
self.assertEqual(eq.events[1].data, "d")

Expand All @@ -88,32 +88,32 @@ def test_push_with_keymap_in_keymap_and_escape(self, mock_keymap):
mock_keymap.compile_keymap.return_value = {"a": "b"}
eq = self.make_eventqueue()
eq.keymap = {b"a": {b"b": "c"}}
eq.push("a")
eq.push(b"a")
mock_keymap.compile_keymap.assert_called()
self.assertTrue(eq.empty())
eq.flush_buf()
eq.push("\033")
eq.push(b"\033")
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "\033")
eq.push("b")
eq.push(b"b")
self.assertEqual(eq.events[1].evt, "key")
self.assertEqual(eq.events[1].data, "b")

def test_push_special_key(self):
eq = self.make_eventqueue()
eq.keymap = {}
eq.push("\x1b")
eq.push("[")
eq.push("A")
eq.push(b"\x1b")
eq.push(b"[")
eq.push(b"A")
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "\x1b")

def test_push_unrecognized_escape_sequence(self):
eq = self.make_eventqueue()
eq.keymap = {}
eq.push("\x1b")
eq.push("[")
eq.push("Z")
eq.push(b"\x1b")
eq.push(b"[")
eq.push(b"Z")
self.assertEqual(len(eq.events), 3)
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "\x1b")
Expand All @@ -122,12 +122,54 @@ def test_push_unrecognized_escape_sequence(self):
self.assertEqual(eq.events[2].evt, "key")
self.assertEqual(eq.events[2].data, "Z")

def test_push_unicode_character(self):
def test_push_unicode_character_as_str(self):
eq = self.make_eventqueue()
eq.keymap = {}
eq.push("ч")
self.assertEqual(eq.events[0].evt, "key")
self.assertEqual(eq.events[0].data, "ч")
with self.assertRaises(AssertionError):
eq.push("ч")
with self.assertRaises(AssertionError):
eq.push("ñ")

def test_push_unicode_character_two_bytes(self):
eq = self.make_eventqueue()
eq.keymap = {}

encoded = "ч".encode(eq.encoding, "replace")
self.assertEqual(len(encoded), 2)

eq.push(encoded[0])
e = eq.get()
self.assertIsNone(e)

eq.push(encoded[1])
e = eq.get()
self.assertEqual(e.evt, "key")
self.assertEqual(e.data, "ч")

def test_push_single_chars_and_unicode_character_as_str(self):
eq = self.make_eventqueue()
eq.keymap = {}

def _event(evt, data, raw=None):
r = raw if raw is not None else data.encode(eq.encoding)
e = Event(evt, data, r)
return e

def _push(keys):
for k in keys:
eq.push(k)

self.assertIsInstance("ñ", str)

# If an exception happens during push, the existing events must be
# preserved and we can continue to push.
_push(b"b")
with self.assertRaises(AssertionError):
_push("ñ")
_push(b"a")

self.assertEqual(eq.get(), _event("key", "b"))
self.assertEqual(eq.get(), _event("key", "a"))


@unittest.skipIf(support.MS_WINDOWS, "No Unix event queue on Windows")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix support of unicode characters with two or more codepoints on Windows in
the new REPL.
Loading