From 125130bbad26f91ad393b573805484940a65d689 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Wed, 1 Jan 2025 09:14:40 -0600 Subject: [PATCH 01/42] Fix typo in examples/README.md, and add table of contents --- examples/README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index 73f3fd0c..e67e1103 100644 --- a/examples/README.md +++ b/examples/README.md @@ -3,6 +3,16 @@ This directory contains a number of examples of parsers created using pyparsing. They fall into a few general categories (several examples include supporting railroad diagrams): + +* [Pyparsing tutorial and language feature demonstrations](#pyparsing-tutorial-and-language-feature-demonstrations) +* [Language parsers](#language-parsers) +* [Domain Specific Language parsers](#domain-specific-language-parsers) +* [Search and query language parsers](#search-and-query-language-parsers) +* [Data format parsers](#data-format-parsers) +* [Logical and arithmetic infix notation parsers and examples](#logical-and-arithmetic-infix-notation-parsers-and-examples) +* [Helpful utilities](#helpful-utilities) + + ## Pyparsing tutorial and language feature demonstrations * Hello World! * [greeting.py](./greeting.py) @@ -18,7 +28,7 @@ categories (several examples include supporting railroad diagrams): * Unicode text handling * [tag_metadata.py](./tag_metadata.py) [(diagram)](./tag_metadata_diagram.html) * chemical formulas - * [chemical_formula.py](./chemical_formula.py) + * [chemical_formulas.py](./chemical_formulas.py) * [complex_chemical_formulas.py](./complex_chemical_formulas.py) * API checker * [apicheck.py](./apicheck.py) [(diagram)](./apicheck_diagram.html) From 0619b15402c5724d7af3b1bd2d3325fe5b6c7c56 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Thu, 2 Jan 2025 09:39:32 -0600 Subject: [PATCH 02/42] Add dev support for Python 3.14 (must disable matplotlib tests at this time) --- CHANGES | 5 +++++ pyparsing/__init__.py | 4 ++-- pyproject.toml | 1 + tox.ini | 4 ++-- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGES b/CHANGES index abb74f38..3637d354 100644 --- a/CHANGES +++ b/CHANGES @@ -12,6 +12,11 @@ the new function names before the old functions are completely removed. (Big hel Devin J. Pohly in structuring the code to enable this peaceful transition.) +Version 3.2.2 - under development +--------------------------------- +- Added support for Python 3.14. + + Version 3.2.1 - December, 2024 ------------------------------ - Updated generated railroad diagrams to make non-terminal elements links to their related diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 726c76cb..2c87695a 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -120,8 +120,8 @@ def __repr__(self): return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})" -__version_info__ = version_info(3, 2, 1, "final", 1) -__version_time__ = "31 Dec 2024 20:41 UTC" +__version_info__ = version_info(3, 2, 2, "final", 1) +__version_time__ = "02 Jan 2025 15:36 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " diff --git a/pyproject.toml b/pyproject.toml index 5ea5a25f..de32a97c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", diff --git a/tox.ini b/tox.ini index 35a26fe4..4aac8629 100644 --- a/tox.ini +++ b/tox.ini @@ -1,13 +1,13 @@ [tox] skip_missing_interpreters=true envlist = - py{39,310,311,312,313,py3},mypy-check + py{39,310,311,312,313,314,py3},mypy-check isolated_build = True [testenv] deps= pytest - matplotlib; implementation_name != 'pypy' + matplotlib; implementation_name != 'pypy' and python_version <'3.14' extras=diagrams commands= pytest tests {posargs} From d43ae3a443b9addb75837a06a5a512519aeead0d Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sat, 4 Jan 2025 03:16:24 -0600 Subject: [PATCH 03/42] Code cleanup in And handling of Ellipsis terms --- pyparsing/__init__.py | 2 +- pyparsing/core.py | 36 +++++++++++++++++++++--------------- tests/test_unit.py | 6 ++++++ 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 2c87695a..734233cd 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ def __repr__(self): __version_info__ = version_info(3, 2, 2, "final", 1) -__version_time__ = "02 Jan 2025 15:36 UTC" +__version_time__ = "04 Jan 2025 08:43 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " diff --git a/pyparsing/core.py b/pyparsing/core.py index b884e2d4..8656a438 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -4036,25 +4036,31 @@ def _generateDefaultName(self) -> str: return "-" def __init__( - self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True + self, + exprs_arg: typing.Iterable[Union[ParserElement, str]], + savelist: bool = True, ): - exprs: list[ParserElement] = list(exprs_arg) - if exprs and Ellipsis in exprs: - tmp: list[ParserElement] = [] - for i, expr in enumerate(exprs): - if expr is not Ellipsis: - tmp.append(expr) - continue + # instantiate exprs as a list, converting strs to ParserElements + exprs: list[ParserElement] = [ + self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg + ] - if i < len(exprs) - 1: - skipto_arg: ParserElement = typing.cast( - ParseExpression, (Empty() + exprs[i + 1]) - ).exprs[-1] - tmp.append(SkipTo(skipto_arg)("_skipped*")) - continue + # convert any Ellipsis elements to SkipTo + if Ellipsis in exprs: + # Ellipsis cannot be the last element + if exprs[-1] is Ellipsis: raise Exception("cannot construct And with sequence ending in ...") - exprs[:] = tmp + + tmp: list[ParserElement] = [] + for cur_expr, next_expr in zip(exprs, exprs[1:]): + if cur_expr is Ellipsis: + tmp.append(SkipTo(next_expr)("_skipped*")) + else: + tmp.append(cur_expr) + + exprs[:-1] = tmp + super().__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) diff --git a/tests/test_unit.py b/tests/test_unit.py index 48e7b24f..9251f668 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -1934,6 +1934,12 @@ def test(expr, test_string, expected_list, expected_dict): ["start", "red", "", "end"], {"_skipped": ["missing "]}, ) + test( + e, + "start 456 end", + ["start", "", "456", "end"], + {"_skipped": ["missing "]}, + ) test( e, "start end", From c5eb7015ee20fef3dd4ab82c50e74821525e2642 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sat, 4 Jan 2025 03:23:36 -0600 Subject: [PATCH 04/42] Tighten up some return type annotations --- pyparsing/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index f781e871..4e9807ca 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -276,7 +276,7 @@ def one_of( ) -def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: +def dict_of(key: ParserElement, value: ParserElement) -> Dict: """Helper to easily and clearly define a dictionary by specifying the respective patterns for the key and value. Takes care of defining the :class:`Dict`, :class:`ZeroOrMore`, and @@ -691,7 +691,7 @@ def infix_notation( op_list: list[InfixNotationOperatorSpec], lpar: Union[str, ParserElement] = Suppress("("), rpar: Union[str, ParserElement] = Suppress(")"), -) -> ParserElement: +) -> Forward: """Helper method for constructing grammars of expressions made up of operators working in a precedence hierarchy. Operators may be unary or binary, left- or right-associative. Parse actions can also be From 5e648398da6fcbf422337ec96b346014cc541f35 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sat, 4 Jan 2025 04:04:28 -0600 Subject: [PATCH 05/42] Guard against absence of sys._getframe (if run in non-CPython implementation) --- pyparsing/core.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pyparsing/core.py b/pyparsing/core.py index 8656a438..47e2a72c 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -6162,11 +6162,17 @@ def autoname_elements() -> None: Utility to simplify mass-naming of parser elements, for generating railroad diagram with named subdiagrams. """ - calling_frame = sys._getframe(1) + + # guard against _getframe not being implemented in the current Python + getframe_fn = getattr(sys, "_getframe", lambda _: None) + calling_frame = getframe_fn(1) if calling_frame is None: return + + # find all locals in the calling frame that are ParserElements calling_frame = typing.cast(types.FrameType, calling_frame) for name, var in calling_frame.f_locals.items(): + # if no custom name defined, set the name to the var name if isinstance(var, ParserElement) and not var.customName: var.set_name(name) From 970f9468f60205e063d1393eb70c8c3f6e65741c Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sat, 4 Jan 2025 04:08:05 -0600 Subject: [PATCH 06/42] Position '^' markers using f-string notation instead of old-style " " multiplication --- pyparsing/core.py | 2 +- pyparsing/exceptions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyparsing/core.py b/pyparsing/core.py index 47e2a72c..bfa94001 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -355,7 +355,7 @@ def _default_start_debug_action( ( f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" f" {line(loc, instring)}\n" - f" {' ' * (col(loc, instring) - 1)}^" + f" {'^':>{col(loc, instring)}}" ) ) diff --git a/pyparsing/exceptions.py b/pyparsing/exceptions.py index 57a1579d..3ff8f9b5 100644 --- a/pyparsing/exceptions.py +++ b/pyparsing/exceptions.py @@ -87,7 +87,7 @@ def explain_exception(exc: Exception, depth: int = 16) -> str: ret: list[str] = [] if isinstance(exc, ParseBaseException): ret.append(exc.line) - ret.append(f"{' ' * (exc.column - 1)}^") + ret.append(f"{'^':>{exc.column}}") ret.append(f"{type(exc).__name__}: {exc}") if depth <= 0 or exc.__traceback__ is None: From 8de05337485a2d7adf430fb78c0adcc6b306eeca Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sat, 4 Jan 2025 19:24:52 -0600 Subject: [PATCH 07/42] Fix exception messages for MatchFirst and Or expressions - fixes Issue #592 --- CHANGES | 3 +++ pyparsing/__init__.py | 2 +- pyparsing/core.py | 6 ++++-- tests/test_unit.py | 20 ++++++++++++++++++++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 3637d354..2c3400af 100644 --- a/CHANGES +++ b/CHANGES @@ -14,6 +14,9 @@ Devin J. Pohly in structuring the code to enable this peaceful transition.) Version 3.2.2 - under development --------------------------------- +- Better exception message for `MatchFirst` and `Or` expressions, showing all alternatives + rather than just the first one. Fixes Issue #592, reported by Focke, thanks! + - Added support for Python 3.14. diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 734233cd..15ce647c 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ def __repr__(self): __version_info__ = version_info(3, 2, 2, "final", 1) -__version_time__ = "04 Jan 2025 08:43 UTC" +__version_time__ = "05 Jan 2025 01:20 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " diff --git a/pyparsing/core.py b/pyparsing/core.py index bfa94001..7f570d50 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -4292,7 +4292,8 @@ def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: if maxException is not None: # infer from this check that all alternatives failed at the current position # so emit this collective error message instead of any single error message - if maxExcLoc == loc: + parse_start_loc = self.preParse(instring, loc) + if maxExcLoc == parse_start_loc: maxException.msg = self.errmsg or "" raise maxException @@ -4399,7 +4400,8 @@ def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: if maxException is not None: # infer from this check that all alternatives failed at the current position # so emit this collective error message instead of any individual error message - if maxExcLoc == loc: + parse_start_loc = self.preParse(instring, loc) + if maxExcLoc == parse_start_loc: maxException.msg = self.errmsg or "" raise maxException diff --git a/tests/test_unit.py b/tests/test_unit.py index 9251f668..23c08a88 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -10657,6 +10657,26 @@ def testForwardsDoProperStreamlining(self): "123", "Expected W:(A-Za-z), found '123'", ), + ( + "prefix" + (pp.Regex("a").set_name("a") | pp.Regex("b").set_name("b")), + "prefixc", + "Expected {a | b}, found 'c'", + ), + ( + "prefix" + (pp.Regex("a").set_name("a") | pp.Regex("b").set_name("b")), + "prefix c", + "Expected {a | b}, found 'c'", + ), + ( + "prefix" + (pp.Regex("a").set_name("a") ^ pp.Regex("b").set_name("b")), + "prefixc", + "Expected {a ^ b}, found 'c'", + ), + ( + "prefix" + (pp.Regex("a").set_name("a") ^ pp.Regex("b").set_name("b")), + "prefix c", + "Expected {a ^ b}, found 'c'", + ), ) def test_exception_messages(self, tests=test_exception_messages_tests): From b8f58013434e81269d4ea88d0016277d5bbea424 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sun, 5 Jan 2025 19:59:28 -0600 Subject: [PATCH 08/42] Add show_hidden parameter to create_diagram() --- CHANGES | 8 ++ docs/HowToUsePyparsing.rst | 2 + examples/delta_time_diagram.html | 134 ++++++++++--------------------- examples/tag_metadata.py | 6 +- pyparsing/__init__.py | 2 +- pyparsing/core.py | 4 + pyparsing/diagram/__init__.py | 10 +++ 7 files changed, 74 insertions(+), 92 deletions(-) diff --git a/CHANGES b/CHANGES index 2c3400af..7a3b443c 100644 --- a/CHANGES +++ b/CHANGES @@ -17,6 +17,14 @@ Version 3.2.2 - under development - Better exception message for `MatchFirst` and `Or` expressions, showing all alternatives rather than just the first one. Fixes Issue #592, reported by Focke, thanks! +- Added optional argument `show_hidden` to create_diagram` to show + elements that are used internally by pyparsing, but are not part of the actual + parser grammar. For instance, the `Tag` class can insert values into the parsed + results but it does not actually parse any input, so by default it is not included + in a railroad diagram. By calling `create_diagram` with `show_hidden` = `True`, + these internal elements will be included. (You can see this in the tag_metadata.py + script in the examples directory.) + - Added support for Python 3.14. diff --git a/docs/HowToUsePyparsing.rst b/docs/HowToUsePyparsing.rst index f23047f0..7ec83448 100644 --- a/docs/HowToUsePyparsing.rst +++ b/docs/HowToUsePyparsing.rst @@ -1489,6 +1489,8 @@ This will result in the railroad diagram being written to ``street_address_diagr - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box +- ``show_hidden`` - bool flag whether internal pyparsing elements that are normally omitted in diagrams should be shown (default=False) + - ``embed`` - bool flag whether generated HTML should omit , , and tags to embed the resulting HTML in an enclosing HTML source (such as PyScript HTML) diff --git a/examples/delta_time_diagram.html b/examples/delta_time_diagram.html index 16dbbe38..71689063 100644 --- a/examples/delta_time_diagram.html +++ b/examples/delta_time_diagram.html @@ -18,34 +18,32 @@
-

time and day

+

time and day

- + - - -time referencetime reference -time_ref_presenttime_ref_present - - - - - -on_on_ -day referenceday reference - -day referenceday reference - - - - - -at_at_ -time of daytime of day -time_ref_presenttime_ref_present -
-
- -
-

time_ref_present

-
-
- - - - -Tag:time_ref_present=True
diff --git a/examples/tag_metadata.py b/examples/tag_metadata.py index 2da39b4d..18f32673 100644 --- a/examples/tag_metadata.py +++ b/examples/tag_metadata.py @@ -25,7 +25,11 @@ import contextlib with contextlib.suppress(Exception): - greeting.create_diagram("tag_metadata_diagram.html", vertical=3) + greeting.create_diagram( + "tag_metadata_diagram.html", + vertical=3, + show_hidden=True + ) greeting.run_tests( """\ diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 15ce647c..832b53df 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ def __repr__(self): __version_info__ = version_info(3, 2, 2, "final", 1) -__version_time__ = "05 Jan 2025 01:20 UTC" +__version_time__ = "06 Jan 2025 01:52 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " diff --git a/pyparsing/core.py b/pyparsing/core.py index 7f570d50..53f99c38 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -2264,6 +2264,7 @@ def create_diagram( show_results_names: bool = False, show_groups: bool = False, embed: bool = False, + show_hidden: bool = False, **kwargs, ) -> None: """ @@ -2278,6 +2279,7 @@ def create_diagram( - ``show_results_names`` - bool flag whether diagram should show annotations for defined results names - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box + - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden - ``embed`` - bool flag whether generated HTML should omit , , and tags to embed the resulting HTML in an enclosing HTML source - ``head`` - str containing additional HTML to insert into the section of the generated code; @@ -2303,6 +2305,7 @@ def create_diagram( vertical=vertical, show_results_names=show_results_names, show_groups=show_groups, + show_hidden=show_hidden, diagram_kwargs=kwargs, ) if not isinstance(output_html, (str, Path)): @@ -3830,6 +3833,7 @@ def __init__(self, tag_name: str, value: Any = True): self.tag_name = tag_name self.tag_value = value self.add_parse_action(self._add_tag) + self.show_in_diagram = False def _add_tag(self, tokens: ParseResults): tokens[self.tag_name] = self.tag_value diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index 56526b74..3b3dfeef 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -226,6 +226,7 @@ def to_railroad( vertical: int = 3, show_results_names: bool = False, show_groups: bool = False, + show_hidden: bool = False, ) -> list[NamedDiagram]: """ Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram @@ -238,6 +239,8 @@ def to_railroad( included in the diagram :param show_groups - bool to indicate whether groups should be highlighted with an unlabeled surrounding box + :param show_hidden - bool to indicate whether internal elements that are typically hidden + should be shown """ # Convert the whole tree underneath the root lookup = ConverterState(diagram_kwargs=diagram_kwargs or {}) @@ -248,6 +251,7 @@ def to_railroad( vertical=vertical, show_results_names=show_results_names, show_groups=show_groups, + show_hidden=show_hidden, ) root_id = id(element) @@ -453,6 +457,7 @@ def _inner( name_hint: str = None, show_results_names: bool = False, show_groups: bool = False, + show_hidden: bool = False, ) -> typing.Optional[EditablePartial]: ret = fn( element, @@ -463,6 +468,7 @@ def _inner( name_hint, show_results_names, show_groups, + show_hidden, ) # apply annotation for results name, if present @@ -555,6 +561,7 @@ def _to_diagram_element( name_hint=propagated_name, show_results_names=show_results_names, show_groups=show_groups, + show_hidden=show_hidden, ) # If the element isn't worth extracting, we always treat it as the first time we say it @@ -641,6 +648,7 @@ def _to_diagram_element( name_hint, show_results_names, show_groups, + show_hidden, ] return _to_diagram_element( (~element.not_ender.expr + element.expr)[1, ...].set_name(element.name), @@ -657,6 +665,7 @@ def _to_diagram_element( name_hint, show_results_names, show_groups, + show_hidden, ] return _to_diagram_element( (~element.not_ender.expr + element.expr)[...].set_name(element.name), @@ -707,6 +716,7 @@ def _to_diagram_element( index=i, show_results_names=show_results_names, show_groups=show_groups, + show_hidden=show_hidden, ) # Some elements don't need to be shown in the diagram From b683e30475de2663c37c5c2e454af5ad5757e262 Mon Sep 17 00:00:00 2001 From: "FeRD (Frank Dana)" Date: Tue, 7 Jan 2025 20:00:30 -0500 Subject: [PATCH 09/42] Docs: Update ParseException's docstring example --- pyparsing/exceptions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyparsing/exceptions.py b/pyparsing/exceptions.py index 3ff8f9b5..6312a998 100644 --- a/pyparsing/exceptions.py +++ b/pyparsing/exceptions.py @@ -272,12 +272,11 @@ class ParseException(ParseBaseException): try: integer.parse_string("ABC") except ParseException as pe: - print(pe) - print(f"column: {pe.column}") + print(pe, f"column: {pe.column}") prints:: - Expected integer (at char 0), (line:1, col:1) column: 1 + Expected integer, found 'ABC' (at char 0), (line:1, col:1) column: 1 """ From 4068022ec34b85003f314fe3c86fd8f72b4f2cf8 Mon Sep 17 00:00:00 2001 From: "FeRD (Frank Dana)" Date: Tue, 7 Jan 2025 00:36:24 -0500 Subject: [PATCH 10/42] Docs: Correct encoding damage in What's New 3.2 The mention of `complex_chemical_formulas.py` being included "to add parsing capability for formulas such as..." was followed by a string of line-noise that looked nothing like a chemical formula, and was clearly not UTF-8 text or anything close to it. Replace with real UTF-8 from the actual source file. Signed-off-by: FeRD (Frank Dana) --- docs/whats_new_in_3_2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/whats_new_in_3_2.rst b/docs/whats_new_in_3_2.rst index daf85ad4..c210d800 100644 --- a/docs/whats_new_in_3_2.rst +++ b/docs/whats_new_in_3_2.rst @@ -117,7 +117,7 @@ New / Enhanced Examples Robert Nystrom's "Crafting Interpreters" (http://craftinginterpreters.com/). - Added ``complex_chemical_formulas.py`` example, to add parsing capability for - formulas such as "3(C₆Hâ‚…OH)â‚‚". + formulas such as "Ba(BrO₃)₂·H₂O". - Updated ``tag_emitter.py`` to use new ``Tag`` class, introduced in pyparsing 3.1.3. From 56a1957161973fade7662350f9359c20083c9e7e Mon Sep 17 00:00:00 2001 From: "FeRD (Frank Dana)" Date: Wed, 8 Jan 2025 03:07:52 -0500 Subject: [PATCH 11/42] Add return type annotations to __init__ methods Dunder init methods return None, which seems pretty obvious and easily inferred, but unless they're annotated that way, running `mypy --strict` will complain. (Including in external subclasses.) --- pyparsing/actions.py | 2 +- pyparsing/core.py | 104 +++++++++++++++++----------------- pyparsing/diagram/__init__.py | 8 +-- pyparsing/exceptions.py | 4 +- pyparsing/results.py | 2 +- pyparsing/util.py | 2 +- 6 files changed, 61 insertions(+), 61 deletions(-) diff --git a/pyparsing/actions.py b/pyparsing/actions.py index f491aab9..0153cc71 100644 --- a/pyparsing/actions.py +++ b/pyparsing/actions.py @@ -22,7 +22,7 @@ class OnlyOnce: Note: parse action signature must include all 3 arguments. """ - def __init__(self, method_call: Callable[[str, int, ParseResults], Any]): + def __init__(self, method_call: Callable[[str, int, ParseResults], Any]) -> None: from .core import _trim_arity self.callable = _trim_arity(method_call) diff --git a/pyparsing/core.py b/pyparsing/core.py index 53f99c38..83c93ec6 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -246,7 +246,7 @@ class _ParseActionIndexError(Exception): ParserElement parseImpl methods. """ - def __init__(self, msg: str, exc: BaseException): + def __init__(self, msg: str, exc: BaseException) -> None: self.msg: str = msg self.exc: BaseException = exc @@ -454,7 +454,7 @@ class DebugActions(NamedTuple): debug_match: typing.Optional[DebugSuccessAction] debug_fail: typing.Optional[DebugExceptionAction] - def __init__(self, savelist: bool = False): + def __init__(self, savelist: bool = False) -> None: self.parseAction: list[ParseAction] = list() self.failAction: typing.Optional[ParseFailAction] = None self.customName: str = None # type: ignore[assignment] @@ -2355,7 +2355,7 @@ def create_diagram( class _PendingSkip(ParserElement): # internal placeholder class to hold a place were '...' is added to a parser element, # once another ParserElement is added, this placeholder will be replaced with a SkipTo - def __init__(self, expr: ParserElement, must_skip: bool = False): + def __init__(self, expr: ParserElement, must_skip: bool = False) -> None: super().__init__() self.anchor = expr self.must_skip = must_skip @@ -2398,7 +2398,7 @@ class Token(ParserElement): matching patterns. """ - def __init__(self): + def __init__(self) -> None: super().__init__(savelist=False) def _generateDefaultName(self) -> str: @@ -2410,7 +2410,7 @@ class NoMatch(Token): A token that will never match. """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.mayReturnEmpty = True self.mayIndexError = False @@ -2452,7 +2452,7 @@ def __new__(cls, match_string: str = "", *, matchString: str = ""): def __getnewargs__(self): return (self.match,) - def __init__(self, match_string: str = "", *, matchString: str = ""): + def __init__(self, match_string: str = "", *, matchString: str = "") -> None: super().__init__() match_string = matchString or match_string self.match = match_string @@ -2478,7 +2478,7 @@ class Empty(Literal): An empty token, will always match. """ - def __init__(self, match_string="", *, matchString=""): + def __init__(self, match_string="", *, matchString="") -> None: super().__init__("") self.mayReturnEmpty = True self.mayIndexError = False @@ -2537,7 +2537,7 @@ def __init__( *, matchString: str = "", identChars: typing.Optional[str] = None, - ): + ) -> None: super().__init__() identChars = identChars or ident_chars if identChars is None: @@ -2631,7 +2631,7 @@ class CaselessLiteral(Literal): (Contrast with example for :class:`CaselessKeyword`.) """ - def __init__(self, match_string: str = "", *, matchString: str = ""): + def __init__(self, match_string: str = "", *, matchString: str = "") -> None: match_string = matchString or match_string super().__init__(match_string.upper()) # Preserve the defining literal. @@ -2663,7 +2663,7 @@ def __init__( *, matchString: str = "", identChars: typing.Optional[str] = None, - ): + ) -> None: identChars = identChars or ident_chars match_string = matchString or match_string super().__init__(match_string, identChars, caseless=True) @@ -2711,7 +2711,7 @@ def __init__( *, maxMismatches: int = 1, caseless=False, - ): + ) -> None: maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches super().__init__() self.match_string = match_string @@ -2837,7 +2837,7 @@ def __init__( bodyChars: typing.Optional[str] = None, asKeyword: bool = False, excludeChars: typing.Optional[str] = None, - ): + ) -> None: initChars = initChars or init_chars bodyChars = bodyChars or body_chars asKeyword = asKeyword or as_keyword @@ -3021,7 +3021,7 @@ def __init__( *, asKeyword: bool = False, excludeChars: typing.Optional[str] = None, - ): + ) -> None: asKeyword = asKeyword or as_keyword excludeChars = excludeChars or exclude_chars super().__init__( @@ -3063,7 +3063,7 @@ def __init__( *, asGroupList: bool = False, asMatch: bool = False, - ): + ) -> None: """The parameters ``pattern`` and ``flags`` are passed to the ``re.compile()`` function as-is. See the Python `re module `_ module for an @@ -3261,7 +3261,7 @@ def __init__( unquoteResults: bool = True, endQuoteChar: typing.Optional[str] = None, convertWhitespaceEscapes: bool = True, - ): + ) -> None: super().__init__() esc_char = escChar or esc_char esc_quote = escQuote or esc_quote @@ -3468,7 +3468,7 @@ def __init__( exact: int = 0, *, notChars: str = "", - ): + ) -> None: super().__init__() self.skipWhitespace = False self.notChars = not_chars or notChars @@ -3555,7 +3555,7 @@ class White(Token): "\u3000": "", } - def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): + def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0) -> None: super().__init__() self.matchWhite = ws self.set_whitespace_chars( @@ -3597,7 +3597,7 @@ def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: class PositionToken(Token): - def __init__(self): + def __init__(self) -> None: super().__init__() self.mayReturnEmpty = True self.mayIndexError = False @@ -3608,7 +3608,7 @@ class GoToColumn(PositionToken): tabular report scraping. """ - def __init__(self, colno: int): + def __init__(self, colno: int) -> None: super().__init__() self.col = colno @@ -3660,7 +3660,7 @@ class LineStart(PositionToken): """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.leave_whitespace() self.orig_whiteChars = set() | self.whiteChars @@ -3691,7 +3691,7 @@ class LineEnd(PositionToken): parse string """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.whiteChars.discard("\n") self.set_whitespace_chars(self.whiteChars, copy_defaults=False) @@ -3714,7 +3714,7 @@ class StringStart(PositionToken): string """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.set_name("start of text") @@ -3731,7 +3731,7 @@ class StringEnd(PositionToken): Matches if current position is at the end of the parse string """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.set_name("end of text") @@ -3756,7 +3756,7 @@ class WordStart(PositionToken): a line. """ - def __init__(self, word_chars: str = printables, *, wordChars: str = printables): + def __init__(self, word_chars: str = printables, *, wordChars: str = printables) -> None: wordChars = word_chars if wordChars == printables else wordChars super().__init__() self.wordChars = set(wordChars) @@ -3781,7 +3781,7 @@ class WordEnd(PositionToken): of a line. """ - def __init__(self, word_chars: str = printables, *, wordChars: str = printables): + def __init__(self, word_chars: str = printables, *, wordChars: str = printables) -> None: wordChars = word_chars if wordChars == printables else wordChars super().__init__() self.wordChars = set(wordChars) @@ -3825,7 +3825,7 @@ class Tag(Token): - enthusiastic: True """ - def __init__(self, tag_name: str, value: Any = True): + def __init__(self, tag_name: str, value: Any = True) -> None: super().__init__() self.mayReturnEmpty = True self.mayIndexError = False @@ -3847,7 +3847,7 @@ class ParseExpression(ParserElement): post-processing parsed tokens. """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False) -> None: super().__init__(savelist) self.exprs: list[ParserElement] if isinstance(exprs, _generatorType): @@ -4032,7 +4032,7 @@ class And(ParseExpression): """ class _ErrorStop(Empty): - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.leave_whitespace() @@ -4043,7 +4043,7 @@ def __init__( self, exprs_arg: typing.Iterable[Union[ParserElement, str]], savelist: bool = True, - ): + ) -> None: # instantiate exprs as a list, converting strs to ParserElements exprs: list[ParserElement] = [ self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg @@ -4199,7 +4199,7 @@ class Or(ParseExpression): [['123'], ['3.1416'], ['789']] """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False) -> None: super().__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) @@ -4355,7 +4355,7 @@ class MatchFirst(ParseExpression): print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False) -> None: super().__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) @@ -4503,7 +4503,7 @@ class Each(ParseExpression): - size: 20 """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): + def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True) -> None: super().__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) @@ -4624,7 +4624,7 @@ class ParseElementEnhance(ParserElement): post-processing parsed tokens. """ - def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): + def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: super().__init__(savelist) if isinstance(expr, str_type): expr_str = typing.cast(str, expr) @@ -4736,20 +4736,20 @@ class IndentedBlock(ParseElementEnhance): """ class _Indent(Empty): - def __init__(self, ref_col: int): + def __init__(self, ref_col: int) -> None: super().__init__() self.errmsg = f"expected indent at column {ref_col}" self.add_condition(lambda s, l, t: col(l, s) == ref_col) class _IndentGreater(Empty): - def __init__(self, ref_col: int): + def __init__(self, ref_col: int) -> None: super().__init__() self.errmsg = f"expected indent at column greater than {ref_col}" self.add_condition(lambda s, l, t: col(l, s) > ref_col) def __init__( self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True - ): + ) -> None: super().__init__(expr, savelist=True) # if recursive: # raise NotImplementedError("IndentedBlock with recursive is not implemented") @@ -4804,7 +4804,7 @@ class AtStringStart(ParseElementEnhance): # raises ParseException """ - def __init__(self, expr: Union[ParserElement, str]): + def __init__(self, expr: Union[ParserElement, str]) -> None: super().__init__(expr) self.callPreparse = False @@ -4837,7 +4837,7 @@ class AtLineStart(ParseElementEnhance): """ - def __init__(self, expr: Union[ParserElement, str]): + def __init__(self, expr: Union[ParserElement, str]) -> None: super().__init__(expr) self.callPreparse = False @@ -4870,7 +4870,7 @@ class FollowedBy(ParseElementEnhance): [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] """ - def __init__(self, expr: Union[ParserElement, str]): + def __init__(self, expr: Union[ParserElement, str]) -> None: super().__init__(expr) self.mayReturnEmpty = True @@ -4913,7 +4913,7 @@ class PrecededBy(ParseElementEnhance): """ - def __init__(self, expr: Union[ParserElement, str], retreat: int = 0): + def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None: super().__init__(expr) self.expr = self.expr().leave_whitespace() self.mayReturnEmpty = True @@ -5031,7 +5031,7 @@ class NotAny(ParseElementEnhance): integer = Word(nums) + ~Char(".") """ - def __init__(self, expr: Union[ParserElement, str]): + def __init__(self, expr: Union[ParserElement, str]) -> None: super().__init__(expr) # do NOT use self.leave_whitespace(), don't want to propagate to exprs # self.leave_whitespace() @@ -5056,7 +5056,7 @@ def __init__( stop_on: typing.Optional[Union[ParserElement, str]] = None, *, stopOn: typing.Optional[Union[ParserElement, str]] = None, - ): + ) -> None: super().__init__(expr) stopOn = stopOn or stop_on self.saveAsList = True @@ -5177,7 +5177,7 @@ def __init__( stop_on: typing.Optional[Union[ParserElement, str]] = None, *, stopOn: typing.Optional[Union[ParserElement, str]] = None, - ): + ) -> None: super().__init__(expr, stopOn=stopOn or stop_on) self.mayReturnEmpty = True @@ -5201,7 +5201,7 @@ def __init__( max: typing.Optional[int] = None, *, allow_trailing_delim: bool = False, - ): + ) -> None: """Helper to define a delimited list of expressions - the delimiter defaults to ','. By default, the list elements and delimiters can have intervening whitespace, and comments, but this can be @@ -5308,7 +5308,7 @@ class Opt(ParseElementEnhance): def __init__( self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched - ): + ) -> None: super().__init__(expr, savelist=False) self.saveAsList = self.expr.saveAsList self.defaultValue = default @@ -5413,7 +5413,7 @@ def __init__( fail_on: typing.Optional[Union[ParserElement, str]] = None, *, failOn: typing.Optional[Union[ParserElement, str]] = None, - ): + ) -> None: super().__init__(other) failOn = failOn or fail_on self.ignoreExpr = ignore @@ -5524,7 +5524,7 @@ class Forward(ParseElementEnhance): parser created using ``Forward``. """ - def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): + def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None) -> None: self.caller_frame = traceback.extract_stack(limit=2)[0] super().__init__(other, savelist=False) # type: ignore[arg-type] self.lshift_line = None @@ -5764,7 +5764,7 @@ class TokenConverter(ParseElementEnhance): Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results. """ - def __init__(self, expr: Union[ParserElement, str], savelist=False): + def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None: super().__init__(expr) # , savelist) self.saveAsList = False @@ -5795,7 +5795,7 @@ def __init__( adjacent: bool = True, *, joinString: typing.Optional[str] = None, - ): + ) -> None: super().__init__(expr) joinString = joinString if joinString is not None else join_string # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself @@ -5847,7 +5847,7 @@ class Group(TokenConverter): # -> ['fn', ['a', 'b', '100']] """ - def __init__(self, expr: ParserElement, aslist: bool = False): + def __init__(self, expr: ParserElement, aslist: bool = False) -> None: super().__init__(expr) self.saveAsList = True self._asPythonList = aslist @@ -5905,7 +5905,7 @@ class Dict(TokenConverter): See more examples at :class:`ParseResults` of accessing fields by results name. """ - def __init__(self, expr: ParserElement, asdict: bool = False): + def __init__(self, expr: ParserElement, asdict: bool = False) -> None: super().__init__(expr) self.saveAsList = True self._asPythonDict = asdict @@ -5981,7 +5981,7 @@ class Suppress(TokenConverter): (See also :class:`DelimitedList`.) """ - def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): + def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: if expr is ...: expr = _PendingSkip(NoMatch()) super().__init__(expr) diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index 3b3dfeef..526cf386 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -120,7 +120,7 @@ class EachItem(railroad.Group): all_label = "[ALL]" - def __init__(self, *items): + def __init__(self, *items) -> None: choice_item = railroad.Choice(len(items) - 1, *items) one_or_more_item = railroad.OneOrMore(item=choice_item) super().__init__(one_or_more_item, label=self.all_label) @@ -131,7 +131,7 @@ class AnnotatedItem(railroad.Group): Simple subclass of Group that creates an annotation label """ - def __init__(self, label: str, item): + def __init__(self, label: str, item) -> None: super().__init__(item=item, label=f"[{label}]" if label else "") @@ -144,7 +144,7 @@ class EditablePartial(Generic[T]): # We need this here because the railroad constructors actually transform the data, so can't be called until the # entire tree is assembled - def __init__(self, func: Callable[..., T], args: list, kwargs: dict): + def __init__(self, func: Callable[..., T], args: list, kwargs: dict) -> None: self.func = func self.args = args self.kwargs = kwargs @@ -352,7 +352,7 @@ class ConverterState: Stores some state that persists between recursions into the element tree """ - def __init__(self, diagram_kwargs: typing.Optional[dict] = None): + def __init__(self, diagram_kwargs: typing.Optional[dict] = None) -> None: #: A dictionary mapping ParserElements to state relating to them self._element_diagram_states: dict[int, ElementState] = {} #: A dictionary mapping ParserElement IDs to subdiagrams generated from them diff --git a/pyparsing/exceptions.py b/pyparsing/exceptions.py index 6312a998..fe07a855 100644 --- a/pyparsing/exceptions.py +++ b/pyparsing/exceptions.py @@ -52,7 +52,7 @@ def __init__( loc: int = 0, msg: typing.Optional[str] = None, elem=None, - ): + ) -> None: if msg is None: msg, pstr = pstr, "" @@ -306,7 +306,7 @@ class RecursiveGrammarException(Exception): Deprecated: only used by deprecated method ParserElement.validate. """ - def __init__(self, parseElementList): + def __init__(self, parseElementList) -> None: self.parseElementTrace = parseElementList def __str__(self) -> str: diff --git a/pyparsing/results.py b/pyparsing/results.py index be834b7e..95623035 100644 --- a/pyparsing/results.py +++ b/pyparsing/results.py @@ -23,7 +23,7 @@ class _ParseResultsWithOffset: tup: tuple[ParseResults, int] __slots__ = ["tup"] - def __init__(self, p1: ParseResults, p2: int): + def __init__(self, p1: ParseResults, p2: int) -> None: self.tup: tuple[ParseResults, int] = (p1, p2) def __getitem__(self, i): diff --git a/pyparsing/util.py b/pyparsing/util.py index 03a60d4f..6930bee6 100644 --- a/pyparsing/util.py +++ b/pyparsing/util.py @@ -193,7 +193,7 @@ class _GroupConsecutive: (3, iter(['p', 'q', 'r', 's'])) """ - def __init__(self): + def __init__(self) -> None: self.prev = 0 self.counter = itertools.count() self.value = -1 From 74b2b6dc151c8a24fdcc8dea830e9e48ebb10468 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Wed, 8 Jan 2025 22:19:54 -0600 Subject: [PATCH 12/42] Update CHANGES to include notes on new type annotations. Plus some black reformatting. --- CHANGES | 3 +++ pyparsing/__init__.py | 2 +- pyparsing/core.py | 32 ++++++++++++++++++++++++-------- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/CHANGES b/CHANGES index 7a3b443c..219f06f3 100644 --- a/CHANGES +++ b/CHANGES @@ -17,6 +17,9 @@ Version 3.2.2 - under development - Better exception message for `MatchFirst` and `Or` expressions, showing all alternatives rather than just the first one. Fixes Issue #592, reported by Focke, thanks! +- Added return type annotation of "-> None" for all `__init__()` methods, to satisfy + `mypy --strict` type checking. PR submitted by FeRD, thank you! + - Added optional argument `show_hidden` to create_diagram` to show elements that are used internally by pyparsing, but are not part of the actual parser grammar. For instance, the `Tag` class can insert values into the parsed diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 832b53df..b8d29309 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ def __repr__(self): __version_info__ = version_info(3, 2, 2, "final", 1) -__version_time__ = "06 Jan 2025 01:52 UTC" +__version_time__ = "09 Jan 2025 04:14 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " diff --git a/pyparsing/core.py b/pyparsing/core.py index 83c93ec6..813e1141 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -3555,7 +3555,9 @@ class White(Token): "\u3000": "", } - def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0) -> None: + def __init__( + self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0 + ) -> None: super().__init__() self.matchWhite = ws self.set_whitespace_chars( @@ -3756,7 +3758,9 @@ class WordStart(PositionToken): a line. """ - def __init__(self, word_chars: str = printables, *, wordChars: str = printables) -> None: + def __init__( + self, word_chars: str = printables, *, wordChars: str = printables + ) -> None: wordChars = word_chars if wordChars == printables else wordChars super().__init__() self.wordChars = set(wordChars) @@ -3781,7 +3785,9 @@ class WordEnd(PositionToken): of a line. """ - def __init__(self, word_chars: str = printables, *, wordChars: str = printables) -> None: + def __init__( + self, word_chars: str = printables, *, wordChars: str = printables + ) -> None: wordChars = word_chars if wordChars == printables else wordChars super().__init__() self.wordChars = set(wordChars) @@ -3847,7 +3853,9 @@ class ParseExpression(ParserElement): post-processing parsed tokens. """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False) -> None: + def __init__( + self, exprs: typing.Iterable[ParserElement], savelist: bool = False + ) -> None: super().__init__(savelist) self.exprs: list[ParserElement] if isinstance(exprs, _generatorType): @@ -4199,7 +4207,9 @@ class Or(ParseExpression): [['123'], ['3.1416'], ['789']] """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False) -> None: + def __init__( + self, exprs: typing.Iterable[ParserElement], savelist: bool = False + ) -> None: super().__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) @@ -4355,7 +4365,9 @@ class MatchFirst(ParseExpression): print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False) -> None: + def __init__( + self, exprs: typing.Iterable[ParserElement], savelist: bool = False + ) -> None: super().__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) @@ -4503,7 +4515,9 @@ class Each(ParseExpression): - size: 20 """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True) -> None: + def __init__( + self, exprs: typing.Iterable[ParserElement], savelist: bool = True + ) -> None: super().__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) @@ -5524,7 +5538,9 @@ class Forward(ParseElementEnhance): parser created using ``Forward``. """ - def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None) -> None: + def __init__( + self, other: typing.Optional[Union[ParserElement, str]] = None + ) -> None: self.caller_frame = traceback.extract_stack(limit=2)[0] super().__init__(other, savelist=False) # type: ignore[arg-type] self.lshift_line = None From b6719a63262af8b32c78564b25aad95f65ffdfb4 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Wed, 8 Jan 2025 22:49:29 -0600 Subject: [PATCH 13/42] Update CHANGES to include notes on new type annotations. Plus some black reformatting. --- CHANGES | 5 +++++ pyparsing/__init__.py | 2 +- pyparsing/helpers.py | 25 ++++++++++++++++++------- tests/test_unit.py | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/CHANGES b/CHANGES index 219f06f3..0d52c21c 100644 --- a/CHANGES +++ b/CHANGES @@ -14,6 +14,11 @@ Devin J. Pohly in structuring the code to enable this peaceful transition.) Version 3.2.2 - under development --------------------------------- +- Fixed bug in `nested_expr` where nested contents were stripped of whitespace when + the default whitespace characters were cleared (raised in this StackOverflow + question https://stackoverflow.com/questions/79327649 by Ben Alan). Also addressed + bug in resolving PEP8 compliant argument name and legacy argument name. + - Better exception message for `MatchFirst` and `Or` expressions, showing all alternatives rather than just the first one. Fixes Issue #592, reported by Focke, thanks! diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index b8d29309..8800f850 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ def __repr__(self): __version_info__ = version_info(3, 2, 2, "final", 1) -__version_time__ = "09 Jan 2025 04:14 UTC" +__version_time__ = "09 Jan 2025 04:49 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index 4e9807ca..82ca4107 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -411,13 +411,16 @@ def locatedExpr(expr: ParserElement) -> ParserElement: ) +_NO_IGNORE_EXPR_GIVEN = NoMatch() + + def nested_expr( opener: Union[str, ParserElement] = "(", closer: Union[str, ParserElement] = ")", content: typing.Optional[ParserElement] = None, - ignore_expr: ParserElement = quoted_string(), + ignore_expr: ParserElement = _NO_IGNORE_EXPR_GIVEN, *, - ignoreExpr: ParserElement = quoted_string(), + ignoreExpr: ParserElement = _NO_IGNORE_EXPR_GIVEN, ) -> ParserElement: """Helper method for defining nested lists enclosed in opening and closing delimiters (``"("`` and ``")"`` are the default). @@ -487,7 +490,10 @@ def nested_expr( dec_to_hex (int) args: [['char', 'hchar']] """ if ignoreExpr != ignore_expr: - ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr + ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr + if ignoreExpr is _NO_IGNORE_EXPR_GIVEN: + ignoreExpr = quoted_string() + if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: @@ -504,11 +510,11 @@ def nested_expr( exact=1, ) ) - ).set_parse_action(lambda t: t[0].strip()) + ) else: content = empty.copy() + CharsNotIn( opener + closer + ParserElement.DEFAULT_WHITE_CHARS - ).set_parse_action(lambda t: t[0].strip()) + ) else: if ignoreExpr is not None: content = Combine( @@ -518,7 +524,7 @@ def nested_expr( + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) ) - ).set_parse_action(lambda t: t[0].strip()) + ) else: content = Combine( OneOrMore( @@ -526,11 +532,16 @@ def nested_expr( + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) ) - ).set_parse_action(lambda t: t[0].strip()) + ) else: raise ValueError( "opening and closing arguments must be strings if no content expression is given" ) + if ParserElement.DEFAULT_WHITE_CHARS: + content.set_parse_action( + lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS) + ) + ret = Forward() if ignoreExpr is not None: ret <<= Group( diff --git a/tests/test_unit.py b/tests/test_unit.py index 23c08a88..36f8fa4c 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -5361,6 +5361,46 @@ def testNestedExpressions2(self): msg="using different openers and closers shouldn't affect resulting ParseResults", ) + def testNestedExpressions3(self): + + prior_ws_chars = pp.ParserElement.DEFAULT_WHITE_CHARS + with resetting(pp.ParserElement, "DEFAULT_WHITE_CHARS"): + pp.ParserElement.set_default_whitespace_chars('') + + input_str = dedent( + """\ + selector + { + a:b; + c:d; + selector + { + a:b; + c:d; + } + y:z; + }""" + ) + + print(ppt.with_line_numbers(input_str, 1, 100)) + + nested_result = pp.nested_expr('{', '}').parse_string("{" + input_str + "}").asList() + expected_result = [ + [ + 'selector\n', + [ + '\n a:b;\n c:d;\n selector\n ', + [ + '\n a:b;\n c:d;\n ' + ], + '\n y:z;\n' + ] + ] + ] + self.assertEqual(nested_result, expected_result) + + self.assertEqual(pp.ParserElement.DEFAULT_WHITE_CHARS, prior_ws_chars) + def testWordMinMaxArgs(self): parsers = [ "A" + pp.Word(pp.nums), From 24c5b1863d7977d5046bcee199cd274d6e055f09 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Wed, 8 Jan 2025 23:04:18 -0600 Subject: [PATCH 14/42] Fix unit test bug that did not restore pyparsing state after changing default whitespace characters --- tests/test_unit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_unit.py b/tests/test_unit.py index 36f8fa4c..25952da6 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -5364,7 +5364,7 @@ def testNestedExpressions2(self): def testNestedExpressions3(self): prior_ws_chars = pp.ParserElement.DEFAULT_WHITE_CHARS - with resetting(pp.ParserElement, "DEFAULT_WHITE_CHARS"): + with ppt.reset_pyparsing_context(): pp.ParserElement.set_default_whitespace_chars('') input_str = dedent( @@ -5399,6 +5399,7 @@ def testNestedExpressions3(self): ] self.assertEqual(nested_result, expected_result) + # make sure things have been put back properly self.assertEqual(pp.ParserElement.DEFAULT_WHITE_CHARS, prior_ws_chars) def testWordMinMaxArgs(self): From 5db8be432b2ccef2db9f603d5d7745be32403710 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Thu, 23 Jan 2025 12:06:24 -0600 Subject: [PATCH 15/42] Updated ebnf.py to current pyparsing styles, optimized regex for quoted strings --- examples/ebnf.py | 129 ++++---- examples/ebnf_diagram.html | 656 +++++++++++++++++++++++++++++++++++++ examples/ebnftest.py | 85 ++--- 3 files changed, 762 insertions(+), 108 deletions(-) create mode 100644 examples/ebnf_diagram.html diff --git a/examples/ebnf.py b/examples/ebnf.py index 4843d40c..df8668bb 100644 --- a/examples/ebnf.py +++ b/examples/ebnf.py @@ -1,14 +1,16 @@ # This module tries to implement ISO 14977 standard with pyparsing. # pyparsing version 1.1 or greater is required. +from typing import Any # ISO 14977 standardize The Extended Backus-Naur Form(EBNF) syntax. # You can read a final draft version here: # https://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html # # Submitted 2004 by Seo Sanghyeon +# Updated to current pyparsing styles 2025 by Paul McGuire # -from pyparsing import * +import pyparsing as pp all_names = """ @@ -27,147 +29,160 @@ syntax """.split() +LBRACK, RBRACK, LBRACE, RBRACE, LPAR, RPAR, DASH, STAR, EQ, SEMI = pp.Suppress.using_each( + "[]{}()-*=;" +) -integer = Word(nums) -meta_identifier = Word(alphas, alphanums + "_") -terminal_string = Suppress("'") + CharsNotIn("'") + Suppress("'") ^ Suppress( - '"' -) + CharsNotIn('"') + Suppress('"') -definitions_list = Forward() -optional_sequence = Suppress("[") + definitions_list + Suppress("]") -repeated_sequence = Suppress("{") + definitions_list + Suppress("}") -grouped_sequence = Suppress("(") + definitions_list + Suppress(")") +integer = pp.common.integer +meta_identifier = pp.common.identifier +terminal_string = pp.Regex( + r'"[^"]*"' + r"|" + r"'[^']*'" +).add_parse_action(pp.remove_quotes) + +definitions_list = pp.Forward() +optional_sequence = LBRACK + definitions_list + RBRACK +repeated_sequence = LBRACE + definitions_list + RBRACE +grouped_sequence = LPAR + definitions_list + RPAR syntactic_primary = ( optional_sequence - ^ repeated_sequence - ^ grouped_sequence - ^ meta_identifier - ^ terminal_string + | repeated_sequence + | grouped_sequence + | meta_identifier + | terminal_string ) -syntactic_factor = Optional(integer + Suppress("*")) + syntactic_primary -syntactic_term = syntactic_factor + Optional(Suppress("-") + syntactic_factor) -single_definition = delimitedList(syntactic_term, ",") -definitions_list << delimitedList(single_definition, "|") -syntax_rule = meta_identifier + Suppress("=") + definitions_list + Suppress(";") +syntactic_factor = pp.Optional(integer + STAR) + syntactic_primary +syntactic_term = syntactic_factor + pp.Optional(DASH + syntactic_factor) +single_definition = pp.DelimitedList(syntactic_term, ",") +definitions_list <<= pp.DelimitedList(single_definition, "|") +syntax_rule = meta_identifier + EQ + definitions_list + SEMI ebnfComment = ( - ("(*" + ZeroOrMore(CharsNotIn("*") | ("*" + ~Literal(")"))) + "*)") + ("(*" + (pp.CharsNotIn("*") | ("*" + ~pp.Literal(")")))[...] + "*)") .streamline() .setName("ebnfComment") ) -syntax = OneOrMore(syntax_rule) +syntax = syntax_rule[1, ...] syntax.ignore(ebnfComment) -def do_integer(str, loc, toks): +def do_integer(toks): return int(toks[0]) -def do_meta_identifier(str, loc, toks): +def do_meta_identifier(toks): if toks[0] in symbol_table: return symbol_table[toks[0]] else: forward_count.value += 1 - symbol_table[toks[0]] = Forward() + symbol_table[toks[0]] = pp.Forward() return symbol_table[toks[0]] -def do_terminal_string(str, loc, toks): - return Literal(toks[0]) +def do_terminal_string(toks): + return pp.Literal(toks[0]) -def do_optional_sequence(str, loc, toks): - return Optional(toks[0]) +def do_optional_sequence(toks): + return pp.Optional(toks[0]) -def do_repeated_sequence(str, loc, toks): - return ZeroOrMore(toks[0]) +def do_repeated_sequence(toks): + return pp.ZeroOrMore(toks[0]) -def do_grouped_sequence(str, loc, toks): - return Group(toks[0]) +def do_grouped_sequence(toks): + return pp.Group(toks[0]) -def do_syntactic_primary(str, loc, toks): +def do_syntactic_primary(toks): return toks[0] -def do_syntactic_factor(str, loc, toks): - if len(toks) == 2: +def do_syntactic_factor(toks): + if len(toks) == 2 and toks[0] > 1: # integer * syntactic_primary - return And([toks[1]] * toks[0]) + return pp.And([toks[1]] * toks[0]) else: # syntactic_primary return [toks[0]] -def do_syntactic_term(str, loc, toks): +def do_syntactic_term(toks): if len(toks) == 2: # syntactic_factor - syntactic_factor - return NotAny(toks[1]) + toks[0] + return pp.NotAny(toks[1]) + toks[0] else: # syntactic_factor return [toks[0]] -def do_single_definition(str, loc, toks): +def do_single_definition(toks): toks = toks.asList() if len(toks) > 1: # syntactic_term , syntactic_term , ... - return And(toks) + return pp.And(toks) else: # syntactic_term return [toks[0]] -def do_definitions_list(str, loc, toks): +def do_definitions_list(toks): toks = toks.asList() if len(toks) > 1: # single_definition | single_definition | ... - return Or(toks) + return pp.Or(toks) else: # single_definition return [toks[0]] -def do_syntax_rule(str, loc, toks): +def do_syntax_rule(toks): # meta_identifier = definitions_list ; assert toks[0].expr is None, "Duplicate definition" forward_count.value -= 1 - toks[0] << toks[1] + toks[0] <<= toks[1] return [toks[0]] -def do_syntax(str, loc, toks): +def do_syntax(): # syntax_rule syntax_rule ... return symbol_table -symbol_table = {} - +symbol_table: dict[str, pp.Forward] = {} class forward_count: pass - - forward_count.value = 0 + for name in all_names: expr = vars()[name] action = vars()["do_" + name] - expr.setName(name) - expr.setParseAction(action) - # ~ expr.setDebug() + expr.set_name(name) + expr.add_parse_action(action) + # expr.setDebug() -def parse(ebnf, given_table={}): +def parse(ebnf, given_table=None): + given_table = given_table or {} symbol_table.clear() symbol_table.update(given_table) forward_count.value = 0 - table = syntax.parseString(ebnf)[0] - assert forward_count.value == 0, "Missing definition" + table = syntax.parse_string(ebnf, parse_all=True)[0] + # assert forward_count.value == 0, "Missing definition" for name in table: expr = table[name] - expr.setName(name) - # ~ expr.setDebug() + expr.set_name(name) + # expr.set_debug() return table + + +if __name__ == '__main__': + try: + syntax.create_diagram("ebnf_diagram.html") + except Exception as e: + print("Failed to create diagram for EBNF syntax parser" + f" - {type(e).__name__}: {e}") diff --git a/examples/ebnf_diagram.html b/examples/ebnf_diagram.html new file mode 100644 index 00000000..74ec4443 --- /dev/null +++ b/examples/ebnf_diagram.html @@ -0,0 +1,656 @@ + + + + + + + + + + + + + + + +
+

syntax

+
+
+ + + + + +syntax_rulesyntax_rule + +
+
+ +
+

syntax_rule

+
+
+ + + + + +meta_identifiermeta_identifier + +'=' +[suppress] +definitions_listdefinitions_list + +';' +[suppress] +
+
+ +
+

definitions_list

+
+
+ + + + + + + +single_definitionsingle_definition + + + + + +'|' +[suppress] +single_definitionsingle_definition + +
+
+ +
+

single_definition

+
+
+ + + + + + +syntactic_termsyntactic_term + + + + + +',' +[suppress] +syntactic_termsyntactic_term + +
+
+ +
+

syntactic_term

+
+
+ + + + + +syntactic_factorsyntactic_factor + + + + +'-' +[suppress] +syntactic_factorsyntactic_factor +
+
+ +
+

syntactic_factor

+
+
+ + + + + + + + +integerinteger + +'*' +[suppress] +syntactic_primarysyntactic_primary +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

syntactic_primary

+
+
+ + + + + +optional_sequenceoptional_sequence +repeated_sequencerepeated_sequence +grouped_sequencegrouped_sequence +meta_identifiermeta_identifier +terminal_stringterminal_string +
+
+ +
+

optional_sequence

+
+
+ + + + + + +'[' +[suppress] +definitions_listdefinitions_list + +']' +[suppress] +
+
+ +
+

repeated_sequence

+
+
+ + + + + + +'{' +[suppress] +definitions_listdefinitions_list + +'}' +[suppress] +
+
+ +
+

grouped_sequence

+
+
+ + + + + + +'(' +[suppress] +definitions_listdefinitions_list + +')' +[suppress] +
+
+ +
+

meta_identifier

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

terminal_string

+
+
+ + + + +"[^"]*"|'[^']*' +
+
+ + + + diff --git a/examples/ebnftest.py b/examples/ebnftest.py index 7b1ff759..4737e0e0 100644 --- a/examples/ebnftest.py +++ b/examples/ebnftest.py @@ -6,70 +6,53 @@ # Submitted 2004 by Seo Sanghyeon # print("Importing pyparsing...") -from pyparsing import * +import pyparsing as pp print("Constructing EBNF parser with pyparsing...") import ebnf grammar = """ -syntax = (syntax_rule), {(syntax_rule)}; -syntax_rule = meta_identifier, '=', definitions_list, ';'; -definitions_list = single_definition, {'|', single_definition}; -single_definition = syntactic_term, {',', syntactic_term}; -syntactic_term = syntactic_factor,['-', syntactic_factor]; -syntactic_factor = [integer, '*'], syntactic_primary; -syntactic_primary = optional_sequence | repeated_sequence | - grouped_sequence | meta_identifier | terminal_string; -optional_sequence = '[', definitions_list, ']'; -repeated_sequence = '{', definitions_list, '}'; -grouped_sequence = '(', definitions_list, ')'; -(* -terminal_string = "'", character - "'", {character - "'"}, "'" | - '"', character - '"', {character - '"'}, '"'; - meta_identifier = letter, {letter | digit}; -integer = digit, {digit}; -*) + (* + ISO 14977 standardize The Extended Backus-Naur Form(EBNF) syntax. + You can read a final draft version here: + https://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html + *) + syntax = (syntax_rule), {(syntax_rule)}; + syntax_rule = meta_identifier, '=', definitions_list, ';'; + definitions_list = single_definition, {'|', single_definition}; + single_definition = syntactic_term, {',', syntactic_term}; + syntactic_term = syntactic_factor,['-', syntactic_factor]; + syntactic_factor = [integer, '*'], syntactic_primary; + syntactic_primary = optional_sequence | repeated_sequence | + grouped_sequence | meta_identifier | terminal_string; + optional_sequence = '[', definitions_list, ']'; + repeated_sequence = '{', definitions_list, '}'; + grouped_sequence = '(', definitions_list, ')'; + (* + terminal_string = "'", character - "'", {character - "'"}, "'" | + '"', character - '"', {character - '"'}, '"'; + meta_identifier = letter, {letter | digit}; + integer = digit, {digit}; + *) """ -table = {} -# ~ table['character'] = Word(printables, exact=1) -# ~ table['letter'] = Word(alphas + '_', exact=1) -# ~ table['digit'] = Word(nums, exact=1) -table["terminal_string"] = sglQuotedString -table["meta_identifier"] = Word(alphas + "_", alphas + "_" + nums) -table["integer"] = Word(nums) +table: dict[str, pp.ParserElement] = {} +# table['character'] = Char(printables) +# table['letter'] = Char(alphas + '_') +# table['digit'] = Char(nums) +table["terminal_string"] = pp.sgl_quoted_string | pp.dbl_quoted_string +table["meta_identifier"] = pp.Word(pp.alphas + "_", pp.alphas + "_" + pp.nums) +table["integer"] = pp.common.integer print("Parsing EBNF grammar with EBNF parser...") parsers = ebnf.parse(grammar, table) ebnf_parser = parsers["syntax"] -commentcharcount = 0 -commentlocs = set() - - -def tallyCommentChars(s, l, t): - global commentcharcount, commentlocs - # only count this comment if we haven't seen it before - if l not in commentlocs: - charCount = len(t[0]) - len(list(filter(str.isspace, t[0]))) - commentcharcount += charCount - commentlocs.add(l) - return l, t - - -# ordinarily, these lines wouldn't be necessary, but we are doing extra stuff with the comment expression -ebnf.ebnfComment.setParseAction(tallyCommentChars) ebnf_parser.ignore(ebnf.ebnfComment) -print("Parsing EBNF grammar with generated EBNF parser...\n") -parsed_chars = ebnf_parser.parseString(grammar) -parsed_char_len = len(parsed_chars) +ebnf_parser.create_diagram("ebnftest_diagram.html") -print("],\n".join(str(parsed_chars.asList()).split("],"))) - -# ~ grammar_length = len(grammar) - len(filter(str.isspace, grammar))-commentcharcount - -# ~ assert parsed_char_len == grammar_length - -print("Ok!") +print("Parsing EBNF grammar with generated EBNF parser...\n") +parsed_chars = ebnf_parser.parse_string(grammar, parse_all=True) +print("\n".join(str(pc) for pc in parsed_chars.as_list())) From 81178184bde2d7ee55d66b9b75aff6fec3213987 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Fri, 24 Jan 2025 00:50:03 -0600 Subject: [PATCH 16/42] Fixed change in ebnf.py to make it compatible with unit tests; redo missing defintion detection and assert message --- examples/ebnf.py | 29 +++++++++++++---------------- examples/ebnftest.py | 15 ++++++++------- tests/test_unit.py | 21 +++++++++++++++++++++ 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/examples/ebnf.py b/examples/ebnf.py index df8668bb..96749f7e 100644 --- a/examples/ebnf.py +++ b/examples/ebnf.py @@ -33,8 +33,8 @@ "[]{}()-*=;" ) -integer = pp.common.integer -meta_identifier = pp.common.identifier +integer = pp.common.integer() +meta_identifier = pp.common.identifier() terminal_string = pp.Regex( r'"[^"]*"' r"|" @@ -76,7 +76,6 @@ def do_meta_identifier(toks): if toks[0] in symbol_table: return symbol_table[toks[0]] else: - forward_count.value += 1 symbol_table[toks[0]] = pp.Forward() return symbol_table[toks[0]] @@ -142,7 +141,6 @@ def do_definitions_list(toks): def do_syntax_rule(toks): # meta_identifier = definitions_list ; assert toks[0].expr is None, "Duplicate definition" - forward_count.value -= 1 toks[0] <<= toks[1] return [toks[0]] @@ -152,12 +150,6 @@ def do_syntax(): return symbol_table -symbol_table: dict[str, pp.Forward] = {} - -class forward_count: - pass -forward_count.value = 0 - for name in all_names: expr = vars()[name] action = vars()["do_" + name] @@ -166,17 +158,22 @@ class forward_count: # expr.setDebug() -def parse(ebnf, given_table=None): +symbol_table: dict[str, pp.Forward] = {} + + +def parse(ebnf, given_table=None, *, enable_debug=False): given_table = given_table or {} symbol_table.clear() symbol_table.update(given_table) - forward_count.value = 0 table = syntax.parse_string(ebnf, parse_all=True)[0] - # assert forward_count.value == 0, "Missing definition" - for name in table: - expr = table[name] + missing_definitions = [ + k for k, v in table.items() + if k not in given_table and v.expr is None + ] + assert not missing_definitions, f"Missing definitions for {missing_definitions}" + for name, expr in table.items(): expr.set_name(name) - # expr.set_debug() + expr.set_debug(enable_debug) return table diff --git a/examples/ebnftest.py b/examples/ebnftest.py index 4737e0e0..88b88bf1 100644 --- a/examples/ebnftest.py +++ b/examples/ebnftest.py @@ -37,13 +37,14 @@ *) """ -table: dict[str, pp.ParserElement] = {} -# table['character'] = Char(printables) -# table['letter'] = Char(alphas + '_') -# table['digit'] = Char(nums) -table["terminal_string"] = pp.sgl_quoted_string | pp.dbl_quoted_string -table["meta_identifier"] = pp.Word(pp.alphas + "_", pp.alphas + "_" + pp.nums) -table["integer"] = pp.common.integer +table: dict[str, pp.ParserElement] = { + # "character": pp.Char(pp.printables), + # "letter": pp.Char(pp.alphas + '_'), + # "digit": pp.Char(nums), + "terminal_string": pp.sgl_quoted_string | pp.dbl_quoted_string, + "meta_identifier": pp.Word(pp.alphas + "_", pp.alphas + "_" + pp.nums), + "integer": pp.common.integer, +} print("Parsing EBNF grammar with EBNF parser...") parsers = ebnf.parse(grammar, table) diff --git a/tests/test_unit.py b/tests/test_unit.py index 25952da6..b42fa3db 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -995,6 +995,27 @@ def testParseEBNF(self): "failed to tokenize grammar correctly", ) + def testParseEBNFmissingDefinitions(self): + """ + Test detection of missing definitions in EBNF + """ + from examples import ebnf + + grammar = """ + (* + EBNF for number_words.py + *) + number = [thousands, [and]], [hundreds, [and]], [one_to_99]; + """ + + with self.assertRaisesRegex( + AssertionError, + r"Missing definitions for \['thousands', 'and', 'hundreds', 'one_to_99']" + ): + ebnf.parse(grammar) + + + def testParseIDL(self): from examples import idlParse From 22507fe2a372bc423941347859737d9e886a36d4 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Fri, 24 Jan 2025 01:21:06 -0600 Subject: [PATCH 17/42] Fixed bug in number_words.py, added diagram; added example EBNF-generated parser for number_words example --- CHANGES | 4 + examples/README.md | 9 +- examples/ebnf_number_parser_diagram.html | 531 +++++++++++++++++++++++ examples/ebnf_number_words.py | 77 ++++ examples/number_words.py | 47 +- examples/number_words_diagram.html | 271 ++++++------ 6 files changed, 793 insertions(+), 146 deletions(-) create mode 100644 examples/ebnf_number_parser_diagram.html create mode 100644 examples/ebnf_number_words.py diff --git a/CHANGES b/CHANGES index 0d52c21c..9ce042f4 100644 --- a/CHANGES +++ b/CHANGES @@ -33,6 +33,10 @@ Version 3.2.2 - under development these internal elements will be included. (You can see this in the tag_metadata.py script in the examples directory.) +- Fixed bug in number_words.py example. Also added ebnf_number_words.py to demonstrate + using the ebnf.py EBNF parser generator to build a similar parser directly from + EBNF. + - Added support for Python 3.14. diff --git a/examples/README.md b/examples/README.md index e67e1103..efb50325 100644 --- a/examples/README.md +++ b/examples/README.md @@ -69,12 +69,15 @@ categories (several examples include supporting railroad diagrams): * rosetta code * [rosettacode.py](./rosettacode.py) [(diagram)](./rosettacode_diagram.html) ## Domain Specific Language parsers - * adventureEngine + * adventureEngine - interactive fiction parser and game runner * [adventureEngine.py](./adventureEngine.py) [(diagram)](./adventure_game_parser_diagram.html) - * pgn + * pgn - Chess notation parser * [pgn.py](./pgn.py) - * TAP + * TAP - Test results parser * [TAP.py](./TAP.py) [(diagram)](./TAP_diagram.html) + * EBNF - Extended Backus-Naur Format parser (and compiler to a running pyparsing parser) + * [ebnf.py](./ebnf.py) [(diagram)](./ebnf_diagram.html) + * [ebnf_number_words.py](./ebnf_number_words.py) [(diagram)](./ebnf_number_parser_diagram.html) ## Search and query language parsers * basic search * [searchparser.py](./searchparser.py) [demo](./searchParserAppDemo.py) diff --git a/examples/ebnf_number_parser_diagram.html b/examples/ebnf_number_parser_diagram.html new file mode 100644 index 00000000..2f1b534b --- /dev/null +++ b/examples/ebnf_number_parser_diagram.html @@ -0,0 +1,531 @@ + + + + + + + + + + + + + + + +
+

number

+
+
+ + + + + + + + + +thousandsthousands + + +andand + + + +hundredshundreds + + +andand + + +one_to_99one_to_99 +
+
+ +
+

thousands

+
+
+ + + + + + +one_to_99one_to_99 +'thousand' +
+
+ +
+

one_to_99

+
+
+ + + + + + +unitsunits +teensteens +tenten + +multiples_of_tenmultiples_of_ten + + + + + +'-' +unitsunits +
+
+ +
+

units

+
+
+ + + + + + +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +
+
+ +
+

teens

+
+
+ + + + + + +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' +
+
+ +
+

ten

+
+
+ + + + + +'ten' +
+
+ +
+

multiples_of_ten

+
+
+ + + + + + +'twenty' +'thirty' +'forty' +'fifty' +'sixty' +'seventy' +'eighty' +'ninety' +
+
+ +
+

and

+
+
+ + + + + + +'and' +'-' +
+
+ +
+

hundreds

+
+
+ + + + + + +hundreds_multhundreds_mult +'hundred' +
+
+ +
+

hundreds_mult

+
+
+ + + + + + +unitsunits +teensteens + +multiples_of_tenmultiples_of_ten + + +'-' +unitsunits +
+
+ + + + diff --git a/examples/ebnf_number_words.py b/examples/ebnf_number_words.py new file mode 100644 index 00000000..8d6b46f2 --- /dev/null +++ b/examples/ebnf_number_words.py @@ -0,0 +1,77 @@ +# +# ebnftest_number_parser.py +# +# BNF from number_parser.py: +# +# optional_and ::= ["and" | "-"] +# optional_dash ::= ["-"] +# units ::= "one" | "two" | "three" | ... | "nine" +# tens ::= "twenty" | "thirty" | ... | "ninety" +# one_to_99 ::= units | ten | teens | (tens [optional_dash units]) +# ten ::= "ten" +# teens ::= "eleven" | "twelve" | ... | "nineteen" +# hundreds ::= (units | teens_only | tens optional_dash units) "hundred" +# thousands ::= one_to_99 "thousand" +# +# # number from 1-999,999 +# number ::= [thousands [optional_and]] [hundreds[optional_and]] one_to_99 +# | [thousands [optional_and]] hundreds +# | thousands +# + +import ebnf + +grammar = """ + (* + EBNF for number_words.py + *) + number = [thousands, [and]], [hundreds, [and]], [one_to_99]; + thousands = one_to_99, "thousand"; + hundreds_mult = units | teens | multiples_of_ten, ["-"], units; + hundreds = hundreds_mult, "hundred"; + teens = + "eleven" + | "twelve" + | "thirteen" + | "fourteen" + | "fifteen" + | "sixteen" + | "seventeen" + | "eighteen" + | "nineteen" + ; + one_to_99 = units | teens | ten | multiples_of_ten, [["-"], units]; + ten = "ten"; + multiples_of_ten = "twenty" | "thirty" | "forty" | "fifty" | "sixty" | "seventy" | "eighty" | "ninety"; + units = "one" | "two" | "three" | "four" | "five" | "six" | "seven" | "eight" | "nine"; + and = "and" | "-"; + """ + +parsers = ebnf.parse(grammar) +number_parser = parsers["number"] + +try: + number_parser.create_diagram("ebnf_number_parser_diagram.html") +except Exception as e: + print("Failed to create diagram for EBNF-generated number parser" + f" - {type(e).__name__}: {e}") + +number_parser.run_tests( + """ + one + seven + twelve + twenty six + forty-two + two hundred + twelve hundred + one hundred and eleven + seven thousand and six + twenty five hundred and one + ninety nine thousand nine hundred and ninety nine + + # invalid + twenty hundred + """, + full_dump=False +) \ No newline at end of file diff --git a/examples/number_words.py b/examples/number_words.py index aa3ea09f..8eeb577d 100644 --- a/examples/number_words.py +++ b/examples/number_words.py @@ -12,22 +12,22 @@ # # # BNF: -""" - optional_and ::= ["and" | "-"] - optional_dash ::= ["-"] - units ::= one | two | three | ... | nine - teens ::= ten | teens_only - tens ::= twenty | thirty | ... | ninety - one_to_99 ::= units | teens | (tens [optional_dash units]) - teens_only ::= eleven | twelve | ... | nineteen - hundreds ::= (units | teens_only | tens optional_dash units) "hundred" - thousands ::= one_to_99 "thousand" - - # number from 1-999,999 - number ::= [thousands [optional_and]] [hundreds[optional_and]] one_to_99 - | [thousands [optional_and]] hundreds - | thousands -""" +# optional_and ::= ["and" | "-"] +# optional_dash ::= ["-"] +# units ::= "one" | "two" | "three" | ... | "nine" +# ten ::= "ten" +# tens ::= "twenty" | "thirty" | ... | "ninety" +# one_to_99 ::= units | ten | teens | (tens [optional_dash units]) +# teens ::= "eleven" | "twelve" | ... | "nineteen" +# hundreds ::= (units | teens | tens optional_dash units) "hundred" +# thousands ::= one_to_99 "thousand" +# +# # number from 1-999,999 +# number ::= [thousands [optional_and]] [hundreds[optional_and]] one_to_99 +# | [thousands [optional_and]] hundreds +# | thousands +# + import pyparsing as pp from operator import mul @@ -70,24 +70,26 @@ def multiply(t): opt_and = pp.Opt((pp.CaselessKeyword("and") | "-").suppress()).set_name("'and/-'") units = define_numeric_word_range("one two three four five six seven eight nine", 1, 9) -teens_only = define_numeric_word_range( +teens = define_numeric_word_range( "eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen", 11, 19, ) ten = define_numeric_word_range("ten", 10) -teens = ten | teens_only tens = define_numeric_word_range( "twenty thirty forty fifty sixty seventy eighty ninety", 20, 90, 10 ) -one_to_99 = (units | teens | (tens + pp.Opt(opt_dash + units))).set_name("1-99") -one_to_99.add_parse_action(sum) hundred = define_numeric_word_range("hundred", 100) thousand = define_numeric_word_range("thousand", 1000) -hundreds = (units | teens_only | (tens + opt_dash + units)) + hundred +one_to_99_except_tens = (units | teens | (tens + opt_dash + units)).set_name("1-99 except tens") +one_to_99_except_tens.add_parse_action(sum) +one_to_99 = (one_to_99_except_tens | ten | tens).set_name("1-99") +one_to_99.add_parse_action(sum) + +hundreds = one_to_99_except_tens + hundred hundreds.set_name("100s") one_to_999 = ( @@ -128,6 +130,9 @@ def multiply(t): two hundred twelve hundred one hundred and eleven + seven thousand and six + twenty five hundred + twenty five hundred and one ninety nine thousand nine hundred and ninety nine nine hundred thousand nine hundred and ninety nine nine hundred and ninety nine thousand nine hundred and ninety nine diff --git a/examples/number_words_diagram.html b/examples/number_words_diagram.html index 626f7cb8..d7784206 100644 --- a/examples/number_words_diagram.html +++ b/examples/number_words_diagram.html @@ -18,7 +18,7 @@
-

numeric_words

+

numeric_words

@@ -30,22 +30,22 @@

numeric_words

-1000s1000s -'and/-''and/-' +1000s1000s +'and/-''and/-' -100s100s -'and/-''and/-' -1-991-99 +100s100s +'and/-''and/-' +1-991-99 -1000s1000s -'and/-''and/-' -100s100s -1000s1000s +
+
+ +
+

1-99

+
+
+ - -'one' -'two' -'three' -'four' -'five' -'six' -'seven' -'eight' -'nine'