Skip to content

Commit 18f6901

Browse files
Changed dict key-matching regex to capture any valid dict key (#920)
* Changed dict key-matching regex to capture any valid dict key * dict key matching regex no longer matches beyond the end of a key * Updated regex to handle str, bytes, int, float, tuple dict keys * Added comments to regex using re.VERBOSE flag * added test case for string dict key being typed inside []
1 parent e69cfe3 commit 18f6901

File tree

2 files changed

+61
-2
lines changed

2 files changed

+61
-2
lines changed

bpython/line.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
Python code, and return None, or a tuple of the start index, end index, and the
55
word."""
66

7+
import re
8+
79
from itertools import chain
810
from typing import Optional, NamedTuple
911

@@ -34,7 +36,41 @@ def current_word(cursor_offset: int, line: str) -> Optional[LinePart]:
3436
return LinePart(start, end, word)
3537

3638

37-
_current_dict_key_re = LazyReCompile(r"""[\w_][\w0-9._]*\[([\w0-9._(), '"]*)""")
39+
# pieces of regex to match repr() of several hashable built-in types
40+
_match_all_dict_keys = r"""[^\]]*"""
41+
42+
# https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
43+
_match_single_quote_str_bytes = r"""
44+
# bytes repr() begins with `b` character; bytes and str begin with `'`
45+
b?'
46+
# match escape sequence; this handles `\'` in the string repr()
47+
(?:\\['"nabfrtvxuU\\]|
48+
# or match any non-`\` and non-single-quote character (most of the string)
49+
[^'\\])*
50+
# matches hanging `\` or ending `'` if one is present
51+
[\\']?
52+
"""
53+
54+
# bytes and str repr() only uses double quotes if the string contains 1 or more
55+
# `'` character and exactly 0 `"` characters
56+
_match_double_quote_str_bytes = r"""
57+
# bytes repr() begins with `b` character
58+
b?"
59+
# string continues until a `"` character is reached
60+
[^"]*
61+
# end matching at closing double-quote if one is present
62+
"?"""
63+
64+
# match valid identifier name followed by `[` character
65+
_match_dict_before_key = r"""[\w_][\w0-9._]*\["""
66+
67+
_current_dict_key_re = LazyReCompile(
68+
f"{_match_dict_before_key}((?:"
69+
f"{_match_single_quote_str_bytes}|"
70+
f"{_match_double_quote_str_bytes}|"
71+
f"{_match_all_dict_keys}|)*)",
72+
re.VERBOSE,
73+
)
3874

3975

4076
def current_dict_key(cursor_offset: int, line: str) -> Optional[LinePart]:
@@ -45,7 +81,16 @@ def current_dict_key(cursor_offset: int, line: str) -> Optional[LinePart]:
4581
return None
4682

4783

48-
_current_dict_re = LazyReCompile(r"""([\w_][\w0-9._]*)\[([\w0-9._(), '"]*)""")
84+
# capture valid identifier name if followed by `[` character
85+
_capture_dict_name = r"""([\w_][\w0-9._]*)\["""
86+
87+
_current_dict_re = LazyReCompile(
88+
f"{_capture_dict_name}((?:"
89+
f"{_match_single_quote_str_bytes}|"
90+
f"{_match_double_quote_str_bytes}|"
91+
f"{_match_all_dict_keys}|)*)",
92+
re.VERBOSE,
93+
)
4994

5095

5196
def current_dict(cursor_offset: int, line: str) -> Optional[LinePart]:

bpython/test/test_line_properties.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,24 @@ def test_simple(self):
178178
self.assertAccess("asdf[<(>|]")
179179
self.assertAccess("asdf[<(1>|]")
180180
self.assertAccess("asdf[<(1,>|]")
181+
self.assertAccess("asdf[<(1,)>|]")
181182
self.assertAccess("asdf[<(1, >|]")
182183
self.assertAccess("asdf[<(1, 2)>|]")
183184
# TODO self.assertAccess('d[d[<12|>')
184185
self.assertAccess("d[<'a>|")
186+
self.assertAccess("object.dict['a'bcd'], object.dict[<'abc>|")
187+
self.assertAccess("object.dict[<'a'bcd'>|], object.dict['abc")
188+
self.assertAccess(r"object.dict[<'a\'\\\"\n\\'>|")
189+
self.assertAccess("object.dict[<\"abc'>|")
190+
self.assertAccess("object.dict[<(1, 'apple', 2.134>|]")
191+
self.assertAccess("object.dict[<(1, 'apple', 2.134)>|]")
192+
self.assertAccess("object.dict[<-1000>|")
193+
self.assertAccess("object.dict[<-0.23948>|")
194+
self.assertAccess("object.dict[<'\U0001ffff>|")
195+
self.assertAccess(r"object.dict[<'a\'\\\"\n\\'>|]")
196+
self.assertAccess(r"object.dict[<'a\'\\\"\n\\|[[]'>")
197+
self.assertAccess('object.dict[<"a]bc[|]">]')
198+
self.assertAccess("object.dict[<'abcd[]>|")
185199

186200

187201
class TestCurrentDict(LineTestCase):

0 commit comments

Comments
 (0)