Skip to content

Changed dict key-matching regex to capture any valid dict key #920

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Oct 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 47 additions & 2 deletions bpython/line.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
Python code, and return None, or a tuple of the start index, end index, and the
word."""

import re

from itertools import chain
from typing import Optional, NamedTuple

Expand Down Expand Up @@ -34,7 +36,41 @@ def current_word(cursor_offset: int, line: str) -> Optional[LinePart]:
return LinePart(start, end, word)


_current_dict_key_re = LazyReCompile(r"""[\w_][\w0-9._]*\[([\w0-9._(), '"]*)""")
# pieces of regex to match repr() of several hashable built-in types
_match_all_dict_keys = r"""[^\]]*"""

# https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
_match_single_quote_str_bytes = r"""
# bytes repr() begins with `b` character; bytes and str begin with `'`
b?'
# match escape sequence; this handles `\'` in the string repr()
(?:\\['"nabfrtvxuU\\]|
# or match any non-`\` and non-single-quote character (most of the string)
[^'\\])*
# matches hanging `\` or ending `'` if one is present
[\\']?
"""

# bytes and str repr() only uses double quotes if the string contains 1 or more
# `'` character and exactly 0 `"` characters
_match_double_quote_str_bytes = r"""
# bytes repr() begins with `b` character
b?"
# string continues until a `"` character is reached
[^"]*
# end matching at closing double-quote if one is present
"?"""

# match valid identifier name followed by `[` character
_match_dict_before_key = r"""[\w_][\w0-9._]*\["""

_current_dict_key_re = LazyReCompile(
f"{_match_dict_before_key}((?:"
f"{_match_single_quote_str_bytes}|"
f"{_match_double_quote_str_bytes}|"
f"{_match_all_dict_keys}|)*)",
re.VERBOSE,
)


def current_dict_key(cursor_offset: int, line: str) -> Optional[LinePart]:
Expand All @@ -45,7 +81,16 @@ def current_dict_key(cursor_offset: int, line: str) -> Optional[LinePart]:
return None


_current_dict_re = LazyReCompile(r"""([\w_][\w0-9._]*)\[([\w0-9._(), '"]*)""")
# capture valid identifier name if followed by `[` character
_capture_dict_name = r"""([\w_][\w0-9._]*)\["""

_current_dict_re = LazyReCompile(
f"{_capture_dict_name}((?:"
f"{_match_single_quote_str_bytes}|"
f"{_match_double_quote_str_bytes}|"
f"{_match_all_dict_keys}|)*)",
re.VERBOSE,
)


def current_dict(cursor_offset: int, line: str) -> Optional[LinePart]:
Expand Down
14 changes: 14 additions & 0 deletions bpython/test/test_line_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,24 @@ def test_simple(self):
self.assertAccess("asdf[<(>|]")
self.assertAccess("asdf[<(1>|]")
self.assertAccess("asdf[<(1,>|]")
self.assertAccess("asdf[<(1,)>|]")
self.assertAccess("asdf[<(1, >|]")
self.assertAccess("asdf[<(1, 2)>|]")
# TODO self.assertAccess('d[d[<12|>')
self.assertAccess("d[<'a>|")
self.assertAccess("object.dict['a'bcd'], object.dict[<'abc>|")
self.assertAccess("object.dict[<'a'bcd'>|], object.dict['abc")
self.assertAccess(r"object.dict[<'a\'\\\"\n\\'>|")
self.assertAccess("object.dict[<\"abc'>|")
self.assertAccess("object.dict[<(1, 'apple', 2.134>|]")
self.assertAccess("object.dict[<(1, 'apple', 2.134)>|]")
self.assertAccess("object.dict[<-1000>|")
self.assertAccess("object.dict[<-0.23948>|")
self.assertAccess("object.dict[<'\U0001ffff>|")
self.assertAccess(r"object.dict[<'a\'\\\"\n\\'>|]")
self.assertAccess(r"object.dict[<'a\'\\\"\n\\|[[]'>")
self.assertAccess('object.dict[<"a]bc[|]">]')
self.assertAccess("object.dict[<'abcd[]>|")


class TestCurrentDict(LineTestCase):
Expand Down