From bc7aa69a5395620162d65e096565101672bc2c0d Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sun, 13 Oct 2024 05:05:25 -0500 Subject: [PATCH 01/31] Prep for 3.2.1 work --- pyparsing/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 543ceb62..6b6ca26d 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -120,7 +120,7 @@ def __repr__(self): return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})" -__version_info__ = version_info(3, 2, 0, "final", 1) +__version_info__ = version_info(3, 2, 1, "final", 1) __version_time__ = "13 Oct 2024 09:46 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ From de626a45db18ed42d8c4dfaa478fc3ef96147fd3 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sun, 13 Oct 2024 11:05:08 -0500 Subject: [PATCH 02/31] Add config for readthedocs --- readthedocs.yaml | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 readthedocs.yaml diff --git a/readthedocs.yaml b/readthedocs.yaml new file mode 100644 index 00000000..5e69f092 --- /dev/null +++ b/readthedocs.yaml @@ -0,0 +1,36 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "20" + # rust: "1.70" + # golang: "1.20" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs + # builder: "dirhtml" + builder: "classic" + # Fail on all warnings to avoid broken references + # fail_on_warning: true + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +# python: +# install: +# - requirements: docs/requirements.txt From 344a6b72d15819bc0008affbf41b29d8ac7e18d1 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sun, 13 Oct 2024 11:08:17 -0500 Subject: [PATCH 03/31] Fix typo in config for readthedocs --- readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs.yaml b/readthedocs.yaml index 5e69f092..38318dbb 100644 --- a/readthedocs.yaml +++ b/readthedocs.yaml @@ -19,7 +19,7 @@ sphinx: configuration: docs/conf.py # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs # builder: "dirhtml" - builder: "classic" + builder: "html" # Fail on all warnings to avoid broken references # fail_on_warning: true From d319ad4a85f49396c6f9060ac2a7bac76e14fd07 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sun, 20 Oct 2024 01:25:01 -0500 Subject: [PATCH 04/31] Use _getframe depth arg instead of walking frame stack with f_back --- pyparsing/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyparsing/core.py b/pyparsing/core.py index 4f43c3bf..c42accec 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -6157,7 +6157,7 @@ def autoname_elements() -> None: Utility to simplify mass-naming of parser elements, for generating railroad diagram with named subdiagrams. """ - calling_frame = sys._getframe().f_back + calling_frame = sys._getframe(1) if calling_frame is None: return calling_frame = typing.cast(types.FrameType, calling_frame) From 8dae684dd3afe7692add5eb66c1bc093c33419d7 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sun, 20 Oct 2024 01:45:56 -0500 Subject: [PATCH 05/31] More robust detection of presence or support for matplotlib tests --- tests/test_matplotlib_cases.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_matplotlib_cases.py b/tests/test_matplotlib_cases.py index d2c1bce9..65d4013f 100644 --- a/tests/test_matplotlib_cases.py +++ b/tests/test_matplotlib_cases.py @@ -6,10 +6,10 @@ import pytest -if platform.python_implementation() == "PyPy": - mpl_mathtext = None -else: +try: import matplotlib.mathtext as mpl_mathtext +except ImportError: + mpl_mathtext = None # fmt: off @pytest.mark.parametrize( From 7808f932421171c5f0fcfe02cd56ed0cf7103f3c Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sun, 20 Oct 2024 01:59:28 -0500 Subject: [PATCH 06/31] Fix detection of free threaded Python and JIT enabled in unit tests --- tests/test_unit.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/tests/test_unit.py b/tests/test_unit.py index 62b20b8d..7d291990 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -40,6 +40,14 @@ JYTHON_ENV = python_impl == "Jython" PYPY_ENV = python_impl == "PyPy" +# global flags for Python config settings +_config_vars = sysconfig.get_config_vars() +_config_args = set( + shlex.split(_config_vars.get("CONFIG_ARGS", "")) +) +PYTHON_JIT_ENABLED = "--enable-experimental-jit" in _config_args +PYTHON_FREE_THREADED = _config_vars.get("Py_GIL_DISABLED", 0) == 1 + # get full stack traces during testing pp.ParserElement.verbose_stacktrace = True @@ -152,11 +160,15 @@ def runTest(self): pp.__version__, pp.__version_time__, ) - python_jit_enabled = "--enable-experimental-jit" in shlex.split( - sysconfig.get_config_vars().get("CONFIG_ARGS", "") - ) + config_options = [] + if PYTHON_JIT_ENABLED: + config_options.append("JIT enabled") + if PYTHON_FREE_THREADED: + config_options.append("free_threaded") + config_options_str = f" ({','.join(config_options)})" print( - f"Python version {sys.version} {'(JIT enabled)' if python_jit_enabled else ''}" + f"Python version {sys.version}" + f"{config_options_str if config_options else ''}" ) print(f"__version_info__ : {pp.__version_info__}") print(f"__version_info__ repr: {repr(pp.__version_info__)}") @@ -10507,7 +10519,14 @@ def testExpressionDefaultStrings(self): self.assertEqual("(0-9)", repr(expr)) def testEmptyExpressionsAreHandledProperly(self): - from pyparsing.diagram import to_railroad + try: + from pyparsing.diagram import to_railroad + except ModuleNotFoundError as mnfe: + print("Failed 'from pyparsing.diagram import to_railroad'" + f"\n {type(mnfe).__name__}: {mnfe}") + if mnfe.__cause__: + print(f"\n {type(mnfe.__cause__).__name__}: {mnfe.__cause__}") + self.skipTest("Failed 'from pyparsing.diagram import to_railroad'") for cls in (pp.And, pp.Or, pp.MatchFirst, pp.Each): print("testing empty", cls.__name__) From ec0a86684043dccdae3ffca6bbb3a5756c3ad1a0 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sun, 20 Oct 2024 02:00:56 -0500 Subject: [PATCH 07/31] Doc cleanup --- docs/HowToUsePyparsing.rst | 60 ++++++++++++++++++++++---------------- docs/whats_new_in_3_1.rst | 14 ++++++--- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/docs/HowToUsePyparsing.rst b/docs/HowToUsePyparsing.rst index fe8ea3cd..f23047f0 100644 --- a/docs/HowToUsePyparsing.rst +++ b/docs/HowToUsePyparsing.rst @@ -374,6 +374,14 @@ methods for code to use are: basic element can be referenced multiple times and given different names within a complex grammar. +.. _using_each: + +- ``using_each(list_of_symbols)`` a short-cut for defining a number of + symbols of a particular ``ParserElement`` subclass:: + + LBRACK, RBRACK, LBRACE, RBRACE, LPAR, RPAR = Suppress.using_each("[]{}()") + AND, OR, NOT = Keyword.using_each("and or not".split()) + .. _set_parse_action: - ``set_parse_action(*fn)`` - specify one or more functions to call after successful @@ -412,7 +420,7 @@ methods for code to use are: A nice short-cut for calling ``set_parse_action`` is to use it as a decorator:: - identifier = Word(alphas, alphanums+"_") + identifier = Word(alphas, alphanums + "_") @identifier.set_parse_action def resolve_identifier(results: ParseResults): @@ -463,9 +471,11 @@ methods for code to use are: when trying to match this element - ``validate()`` - function to verify that the defined grammar does not - contain infinitely recursive constructs (``validate()`` is deprecated, and + contain infinitely recursive constructs. + + *(``validate()`` is deprecated, and will be removed in a future pyparsing release. Pyparsing now supports - left-recursive parsers, which this function attempted to catch.) + left-recursive parsers, which this function attempted to catch.)* .. _parse_with_tabs: @@ -613,7 +623,7 @@ Basic ParserElement subclasses ``SkipTo`` can also be written using ``...``:: - LBRACE, RBRACE = map(Literal, "{}") + LBRACE, RBRACE = Literal.using_each("{}") brace_expr = LBRACE + SkipTo(RBRACE) + RBRACE # can also be written as @@ -1423,27 +1433,27 @@ access them using code like the following:: The following language ranges are defined. -========================== ================= ================================================ +========================== ================= ======================================================== Unicode set Alternate names Description --------------------------- ----------------- ------------------------------------------------ -Arabic العربية -Chinese 中文 -CJK Union of Chinese, Japanese, and Korean sets -Cyrillic кириллица -Devanagari देवनागरी -Greek Ελληνικά -Hangul Korean, 한국어 -Hebrew עִברִית -Japanese 日本語 Union of Kanji, Katakana, and Hiragana sets -Japanese.Hiragana ひらがな -Japanese.Kanji 漢字 -Japanese.Katakana カタカナ -Latin1 All Unicode characters up to code point 255 -LatinA -LatinB -Thai ไทย -BasicMultilingualPlane BMP All Unicode characters up to code point 65535 -========================== ================= ================================================ +-------------------------- ----------------- -------------------------------------------------------- +``Arabic`` العربية +``Chinese`` 中文 +``CJK`` Union of Chinese, Japanese, and Korean sets +``Cyrillic`` кириллица +``Devanagari`` देवनागरी +``Greek`` Ελληνικά +``Hangul`` Korean, 한국어 +``Hebrew`` עִברִית +``Japanese`` 日本語 Union of Kanji, Katakana, and Hiragana sets +``Japanese.Hiragana`` ひらがな +``Japanese.Kanji`` 漢字 +``Japanese.Katakana`` カタカナ +``Latin1`` All Unicode characters up to code point 0x7f (255) +``LatinA`` Unicode characters for code points 0x100-0x17f (256-383) +``LatinB`` Unicode characters for code points 0x180-0x24f (384-591) +``Thai`` ไทย +``BasicMultilingualPlane`` BMP All Unicode characters up to code point 0xffff (65535) +========================== ================= ======================================================== The base ``unicode`` class also includes definitions based on all Unicode code points up to ``sys.maxunicode``. This set will include emojis, wingdings, and many other specialized and typographical variant characters. @@ -1493,7 +1503,7 @@ Example ------- You can view an example railroad diagram generated from `a pyparsing grammar for SQL SELECT statements <_static/sql_railroad.html>`_ (generated from -`examples/select_parser.py <../examples/select_parser.py>`_). +`examples/select_parser.py `_). Naming tip ---------- diff --git a/docs/whats_new_in_3_1.rst b/docs/whats_new_in_3_1.rst index 1d22dbf3..cc97e5c7 100644 --- a/docs/whats_new_in_3_1.rst +++ b/docs/whats_new_in_3_1.rst @@ -130,7 +130,7 @@ API Changes ident = ppu.Greek.identifier # or - # ident = ppu.Ελληνικά.identifier + # ident = ppu.Ελληνικά.identifier - Added bool ``embed`` argument to ``ParserElement.create_diagram()``. When passed as True, the resulting diagram will omit the ````, @@ -198,7 +198,7 @@ Fixed Bugs - Updated ``create_diagram()`` code to be compatible with railroad-diagrams package version 3.0. -- Fixed bug in pyparsing.common.url, when input URL is not alone +- Fixed bug in ``pyparsing.common.url``, when input URL is not alone on an input line. - Fixed bug in srange, when parsing escaped '/' and '\' inside a @@ -262,8 +262,14 @@ Fixed Bugs New / Enhanced Examples ======================= - Added example ``mongodb_query_expression.py``, to convert human-readable infix query - expressions (such as ``a==100 and b>=200``) and transform them into the equivalent - query argument for the pymongo package (``{'$and': [{'a': 100}, {'b': {'$gte': 200}}]}``). + expressions, such as:: + + a==100 and b>=200 + + and transform them into an equivalent query argument for the pymongo package:: + + {'$and': [{'a': 100}, {'b': {'$gte': 200}}]} + Supports many equality and inequality operators - see the docstring for the ``transform_query`` function for many more examples. From 96d44647450463a7b9c0962b03f3aeb0d1f165d5 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sun, 20 Oct 2024 02:51:01 -0500 Subject: [PATCH 08/31] Fix up missing warning prefixes --- pyparsing/__init__.py | 2 +- pyparsing/core.py | 9 ++++++--- pyparsing/helpers.py | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 6b6ca26d..14effa7e 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ def __repr__(self): __version_info__ = version_info(3, 2, 1, "final", 1) -__version_time__ = "13 Oct 2024 09:46 UTC" +__version_time__ = "20 Oct 2024 07:48 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " diff --git a/pyparsing/core.py b/pyparsing/core.py index c42accec..ecf704f4 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -5558,7 +5558,8 @@ def __or__(self, other) -> ParserElement: not in self.suppress_warnings_ ): warnings.warn( - "using '<<' operator with '|' is probably an error, use '<<='", + "warn_on_match_first_with_lshift_operator:" + " using '<<' operator with '|' is probably an error, use '<<='", stacklevel=2, ) ret = super().__or__(other) @@ -5572,7 +5573,8 @@ def __del__(self): and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ ): warnings.warn_explicit( - "Forward defined here but no expression attached later using '<<=' or '<<'", + "warn_on_assignment_to_Forward:" + " Forward defined here but no expression attached later using '<<=' or '<<'", UserWarning, filename=self.caller_frame.filename, lineno=self.caller_frame.lineno, @@ -5600,7 +5602,8 @@ def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: else: stacklevel = 2 warnings.warn( - "Forward expression was never assigned a value, will not parse any input", + "warn_on_parse_using_empty_Forward:" + " Forward expression was never assigned a value, will not parse any input", stacklevel=stacklevel, ) if not ParserElement._left_recursion_enabled: diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index d2bd05f3..950a1963 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -199,7 +199,8 @@ def one_of( and __diag__.warn_on_multiple_string_args_to_oneof ): warnings.warn( - "More than one string argument passed to one_of, pass" + "warn_on_multiple_string_args_to_oneof:" + " More than one string argument passed to one_of, pass" " choices as a list or space-delimited string", stacklevel=2, ) From 98ad3e19d611cdfc0919d8fd52ba542dd7e6d779 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Wed, 13 Nov 2024 18:21:02 -0300 Subject: [PATCH 09/31] Improve performance of cpp_style_comment --- pyparsing/helpers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index 950a1963..c41f1f6e 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -1023,9 +1023,7 @@ def checkUnindent(s, l, t): dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") "Comment of the form ``// ... (to end of line)``" -cpp_style_comment = Combine( - Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment -).set_name("C++ style comment") +cpp_style_comment = Regex(r"(?:/\*(?:[^*]|\*(?!/))*\*/)|(?://(?:\\\n|[^\n])*)").set_name("C++ style comment") "Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" java_style_comment = cpp_style_comment From 8c6ae45000650329e4731286bc2e32f7ec4245cf Mon Sep 17 00:00:00 2001 From: ptmcg Date: Mon, 18 Nov 2024 04:49:15 -0600 Subject: [PATCH 10/31] Replicate fix in cpp_style_comment to c_style_comment; tighten up test cases --- CHANGES | 6 ++ pyparsing/helpers.py | 9 ++- tests/test_unit.py | 131 +++++++++++++++++++++++++++++++------------ 3 files changed, 107 insertions(+), 39 deletions(-) diff --git a/CHANGES b/CHANGES index 66484eed..71095188 100644 --- a/CHANGES +++ b/CHANGES @@ -12,6 +12,12 @@ the new function names before the old functions are completely removed. (Big hel Devin J. Pohly in structuring the code to enable this peaceful transition.) +Version 3.2.1 - in development +------------------------------ +- Improved performance of `cpp_style_comment` and `c_style_comment` Regex expressions. + PR submitted by Gabriel Gerlero, nice work, thanks! + + Version 3.2.0 - October, 2024 ------------------------------- - Discontinued support for Python 3.6, 3.7, and 3.8. Adopted new Python features from diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index c41f1f6e..399b70cf 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -1010,8 +1010,9 @@ def checkUnindent(s, l, t): return smExpr.set_name("indented block") -# it's easy to get these comment structures wrong - they're very common, so may as well make them available -c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( +# it's easy to get these comment structures wrong - they're very common, +# so may as well make them available +c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name( "C style comment" ) "Comment of the form ``/* ... */``" @@ -1023,7 +1024,9 @@ def checkUnindent(s, l, t): dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") "Comment of the form ``// ... (to end of line)``" -cpp_style_comment = Regex(r"(?:/\*(?:[^*]|\*(?!/))*\*/)|(?://(?:\\\n|[^\n])*)").set_name("C++ style comment") +cpp_style_comment = Regex( + r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)" +).set_name("C++ style comment") "Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" java_style_comment = cpp_style_comment diff --git a/tests/test_unit.py b/tests/test_unit.py index 7d291990..d37df659 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -1327,24 +1327,6 @@ def testQuotedStrings(self): f"quoted string escaped quote failure ({[str(s[0]) for s in allStrings]})", ) - print( - "testing catastrophic RE backtracking in implementation of dblQuotedString" - ) - for expr, test_string in [ - (pp.dblQuotedString, '"' + "\\xff" * 500), - (pp.sglQuotedString, "'" + "\\xff" * 500), - (pp.quotedString, '"' + "\\xff" * 500), - (pp.quotedString, "'" + "\\xff" * 500), - (pp.QuotedString('"'), '"' + "\\xff" * 500), - (pp.QuotedString("'"), "'" + "\\xff" * 500), - ]: - with self.subTest(expr=expr, test_string=test_string): - expr.parseString(test_string + test_string[0], parseAll=True) - try: - expr.parseString(test_string, parseAll=True) - except Exception: - continue - # test invalid endQuoteChar with self.subTest(): with self.assertRaises( @@ -1493,13 +1475,13 @@ def testCaselessOneOf(self): "Aa" * 4, "".join(res), "caseless1 CaselessLiteral return failed" ) - def testCommentParser(self): - print("verify processing of C and HTML comments") - testdata = """ + def testCStyleCommentParser(self): + print("verify processing of C-style /* */ comments") + testdata = f""" /* */ /** **/ /**/ - /***/ + /*{'*' * 1_000_000}*/ /****/ /* /*/ /** /*/ @@ -1508,14 +1490,30 @@ def testCommentParser(self): ablsjdflj */ """ - found_lines = [ - pp.lineno(s, testdata) for t, s, e in pp.cStyleComment.scanString(testdata) - ] - self.assertEqual( - list(range(11))[2:], - found_lines, - f"only found C comments on lines {found_lines}", - ) + for test_expr in (pp.c_style_comment, pp.cpp_style_comment, pp.java_style_comment): + with self.subTest("parse test - /* */ comments", test_expr=test_expr): + found_matches = [ + len(t[0]) for t, s, e in test_expr.scanString(testdata) + ] + self.assertEqual( + [5, 7, 4, 1000004, 6, 6, 7, 8, 33], + found_matches, + f"only found {test_expr} lengths {found_matches}", + ) + + found_lines = [ + pp.lineno(s, testdata) for t, s, e in test_expr.scanString(testdata) + ] + self.assertEqual( + [2, 3, 4, 5, 6, 7, 8, 9, 10], + found_lines, + f"only found {test_expr} on lines {found_lines}", + ) + + def testHtmlCommentParser(self): + print("verify processing of HTML comments") + + test_expr = pp.html_comment testdata = """ @@ -1531,27 +1529,88 @@ def testCommentParser(self): ablsjdflj --> """ + found_matches = [ + len(t[0]) for t, s, e in test_expr.scanString(testdata) + ] + self.assertEqual( + [8, 10, 7, 8, 9, 9, 10, 11, 79], + found_matches, + f"only found {test_expr} lengths {found_matches}", + ) + found_lines = [ pp.lineno(s, testdata) for t, s, e in pp.htmlComment.scanString(testdata) ] self.assertEqual( - list(range(11))[2:], + [2, 3, 4, 5, 6, 7, 8, 9, 10], found_lines, f"only found HTML comments on lines {found_lines}", ) + def testDoubleSlashCommentParser(self): + print("verify processing of C++ and Java comments - // comments") + # test C++ single line comments that have line terminated with '\' (should continue comment to following line) - testSource = r""" + testdata = r""" // comment1 // comment2 \ still comment 2 // comment 3 """ - self.assertEqual( - 41, - len(pp.cppStyleComment.searchString(testSource)[1][0]), - r"failed to match single-line comment with '\' at EOL", + for test_expr in (pp.dbl_slash_comment, pp.cpp_style_comment, pp.java_style_comment): + with self.subTest("parse test - // comments", test_expr=test_expr): + found_matches = [ + len(t[0]) for t, s, e in test_expr.scanString(testdata) + ] + self.assertEqual( + [11, 41, 12], + found_matches, + f"only found {test_expr} lengths {found_matches}", + ) + + found_lines = [ + pp.lineno(s, testdata) for t, s, e in test_expr.scanString(testdata) + ] + self.assertEqual( + [2, 3, 5], + found_lines, + f"only found {test_expr} on lines {found_lines}", + ) + + def testReCatastrophicBacktrackingInQuotedStringParsers(self): + # reported by webpentest - 2016-04-28 + print( + "testing catastrophic RE backtracking in implementation of quoted string parsers" + ) + for expr, test_string in [ + (pp.dblQuotedString, '"' + "\\xff" * 500), + (pp.sglQuotedString, "'" + "\\xff" * 500), + (pp.quotedString, '"' + "\\xff" * 500), + (pp.quotedString, "'" + "\\xff" * 500), + (pp.QuotedString('"'), '"' + "\\xff" * 500), + (pp.QuotedString("'"), "'" + "\\xff" * 500), + ]: + with self.subTest("Test catastrophic RE backtracking", expr=expr): + try: + expr.parse_string(test_string) + except pp.ParseException: + continue + + def testReCatastrophicBacktrackingInCommentParsers(self): + print( + "testing catastrophic RE backtracking in implementation of comment parsers" ) + for expr, test_string in [ + (pp.c_style_comment, f"/*{'*' * 500}"), + (pp.cpp_style_comment, f"/*{'*' * 500}"), + (pp.java_style_comment, f"/*{'*' * 500}"), + (pp.html_comment, f"<-- {'-' * 500}") + ]: + with self.subTest("Test catastrophic RE backtracking", expr=expr): + try: + expr.parse_string(test_string) + except pp.ParseException: + continue def testParseExpressionResults(self): a = pp.Word("a", pp.alphas).setName("A") From 61ac5e33e3316397d609aa79ada7a672b9421f7f Mon Sep 17 00:00:00 2001 From: ptmcg Date: Mon, 18 Nov 2024 04:53:09 -0600 Subject: [PATCH 11/31] Update latest version date --- pyparsing/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 14effa7e..3b0c262b 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ def __repr__(self): __version_info__ = version_info(3, 2, 1, "final", 1) -__version_time__ = "20 Oct 2024 07:48 UTC" +__version_time__ = "18 Nov 2024 10:52 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " From 7847e1c42ce07abcb882e684e8703ae3e8354acb Mon Sep 17 00:00:00 2001 From: ptmcg Date: Mon, 18 Nov 2024 12:34:59 -0600 Subject: [PATCH 12/31] Add missing type annotations in actions.py --- CHANGES | 3 +++ pyparsing/actions.py | 41 +++++++++++++++++++++++++++-------------- pyparsing/core.py | 7 +------ 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/CHANGES b/CHANGES index 71095188..ccddcfa9 100644 --- a/CHANGES +++ b/CHANGES @@ -17,6 +17,9 @@ Version 3.2.1 - in development - Improved performance of `cpp_style_comment` and `c_style_comment` Regex expressions. PR submitted by Gabriel Gerlero, nice work, thanks! +- Add missing type annotations to `match_only_at_col`, `replace_with`, `remove_quotes`, + `with_attribute`, and `with_class`. Issue #585 reported by rafrafrek. + Version 3.2.0 - October, 2024 ------------------------------- diff --git a/pyparsing/actions.py b/pyparsing/actions.py index 1d2dce99..b584048c 100644 --- a/pyparsing/actions.py +++ b/pyparsing/actions.py @@ -1,21 +1,34 @@ # actions.py +from __future__ import annotations + +from typing import Union, Callable, Any from .exceptions import ParseException from .util import col, replaced_by_pep8 +from .results import ParseResults + + +ParseAction = Union[ + Callable[[], Any], + Callable[[ParseResults], Any], + Callable[[int, ParseResults], Any], + Callable[[str, int, ParseResults], Any], +] class OnlyOnce: """ Wrapper for parse actions, to ensure they are only called once. + Note: parse action signature must include all 3 arguments. """ - def __init__(self, method_call): + def __init__(self, method_call: Callable[[str, int, ParseResults], Any]): from .core import _trim_arity self.callable = _trim_arity(method_call) self.called = False - def __call__(self, s, l, t): + def __call__(self, s: str, l: int, t: ParseResults) -> ParseResults: if not self.called: results = self.callable(s, l, t) self.called = True @@ -30,20 +43,20 @@ def reset(self): self.called = False -def match_only_at_col(n): +def match_only_at_col(n: int) -> ParseAction: """ Helper method for defining parse actions that require matching at a specific column in the input text. """ - def verify_col(strg, locn, toks): + def verify_col(strg: str, locn: int, toks: ParseResults) -> None: if col(locn, strg) != n: raise ParseException(strg, locn, f"matched token not at column {n}") return verify_col -def replace_with(repl_str): +def replace_with(repl_str: str) -> ParseAction: """ Helper method for common parse actions that simply return a literal value. Especially useful when used with @@ -60,7 +73,7 @@ def replace_with(repl_str): return lambda s, l, t: [repl_str] -def remove_quotes(s, l, t): +def remove_quotes(s: str, l: int, t: ParseResults) -> Any: """ Helper parse action for removing quotation marks from parsed quoted strings. @@ -77,7 +90,7 @@ def remove_quotes(s, l, t): return t[0][1:-1] -def with_attribute(*args, **attr_dict): +def with_attribute(*args: tuple[str, str], **attr_dict) -> ParseAction: """ Helper to create a validating parse action to be used with start tags created with :class:`make_xml_tags` or @@ -133,17 +146,17 @@ def with_attribute(*args, **attr_dict): 1 4 0 1 0 1,3 2,3 1,1 """ + attrs_list: list[tuple[str, str]] = [] if args: - attrs = args[:] + attrs_list.extend(args) else: - attrs = attr_dict.items() - attrs = [(k, v) for k, v in attrs] + attrs_list.extend(attr_dict.items()) - def pa(s, l, tokens): - for attrName, attrValue in attrs: + def pa(s: str, l: int, tokens: ParseResults) -> None: + for attrName, attrValue in attrs_list: if attrName not in tokens: raise ParseException(s, l, "no matching attribute " + attrName) - if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: + if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: # type: ignore [attr-defined] raise ParseException( s, l, @@ -156,7 +169,7 @@ def pa(s, l, tokens): with_attribute.ANY_VALUE = object() # type: ignore [attr-defined] -def with_class(classname, namespace=""): +def with_class(classname: str, namespace: str = "") -> ParseAction: """ Simplified version of :class:`with_attribute` when matching on a div class - made difficult because ``class`` is diff --git a/pyparsing/core.py b/pyparsing/core.py index ecf704f4..777165ca 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -215,12 +215,7 @@ def _should_enable_warnings( _generatorType = types.GeneratorType ParseImplReturnType = tuple[int, Any] PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] -ParseAction = Union[ - Callable[[], Any], - Callable[[ParseResults], Any], - Callable[[int, ParseResults], Any], - Callable[[str, int, ParseResults], Any], -] + ParseCondition = Union[ Callable[[], bool], Callable[[ParseResults], bool], From 1721d4982a9832f4f9ad930f01796a5f011a6d72 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Sat, 23 Nov 2024 14:28:19 -0300 Subject: [PATCH 13/31] Use non-capturing groups in common.fnumber and common.ieee_float --- pyparsing/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyparsing/common.py b/pyparsing/common.py index 649aad00..e4651108 100644 --- a/pyparsing/common.py +++ b/pyparsing/common.py @@ -210,14 +210,14 @@ class pyparsing_common: """any numeric expression, returns the corresponding Python type""" fnumber = ( - Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?") + Regex(r"[+-]?\d+\.?\d*(?:[eE][+-]?\d+)?") .set_name("fnumber") .set_parse_action(convert_to_float) ) """any int or real number, returned as float""" ieee_float = ( - Regex(r"(?i)[+-]?((\d+\.?\d*(e[+-]?\d+)?)|nan|inf(inity)?)") + Regex(r"(?i:[+-]?(?:(?:\d+\.?\d*(?:e[+-]?\d+)?)|nan|inf(?:inity)?))") .set_name("ieee_float") .set_parse_action(convert_to_float) ) From 533adf471f85b570006871e60a2e585fcda5b085 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Mon, 25 Nov 2024 08:52:45 -0600 Subject: [PATCH 14/31] Update CHANGES to reflect updates in PR --- CHANGES | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index ccddcfa9..2e485e9c 100644 --- a/CHANGES +++ b/CHANGES @@ -14,8 +14,9 @@ Devin J. Pohly in structuring the code to enable this peaceful transition.) Version 3.2.1 - in development ------------------------------ -- Improved performance of `cpp_style_comment` and `c_style_comment` Regex expressions. - PR submitted by Gabriel Gerlero, nice work, thanks! +- Improved performance of `cpp_style_comment`, `c_style_comment`, `common.fnumber` + and `common.ieee_float` Regex expressions. PRs submitted by Gabriel Gerlero, + nice work, thanks! - Add missing type annotations to `match_only_at_col`, `replace_with`, `remove_quotes`, `with_attribute`, and `with_class`. Issue #585 reported by rafrafrek. From 9f018cbe8ecabe8dd02605b4e2e39c282323fb4b Mon Sep 17 00:00:00 2001 From: ptmcg Date: Mon, 9 Dec 2024 07:56:40 -0600 Subject: [PATCH 15/31] Suppress LOOKAHEADs defined in infix_notation parsers to make railroad diagrams a little less ugly --- CHANGES | 4 + examples/lox_parser.py | 2 +- examples/lox_parser_diagram.html | 2872 ++++++++++++++++++++++++++++++ pyparsing/__init__.py | 2 +- pyparsing/core.py | 1 + pyparsing/diagram/__init__.py | 12 +- pyparsing/helpers.py | 41 +- 7 files changed, 2919 insertions(+), 15 deletions(-) create mode 100644 examples/lox_parser_diagram.html diff --git a/CHANGES b/CHANGES index 2e485e9c..34008e5b 100644 --- a/CHANGES +++ b/CHANGES @@ -14,6 +14,10 @@ Devin J. Pohly in structuring the code to enable this peaceful transition.) Version 3.2.1 - in development ------------------------------ +- Simplified railroad diagrams emitted for parsers using `infix_notation`, by hiding + lookahead terms. Updated `lox_parser.py` example and added `lox_parser_diagram.html` + diagram file. + - Improved performance of `cpp_style_comment`, `c_style_comment`, `common.fnumber` and `common.ieee_float` Regex expressions. PRs submitted by Gabriel Gerlero, nice work, thanks! diff --git a/examples/lox_parser.py b/examples/lox_parser.py index a9fdcd2b..4e356565 100644 --- a/examples/lox_parser.py +++ b/examples/lox_parser.py @@ -230,5 +230,5 @@ class Circle { if __name__ == '__main__': - program.create_diagram("lox_program_parser.html", vertical=3) + program.create_diagram("lox_parser_diagram.html", vertical=2, show_groups=True) main() diff --git a/examples/lox_parser_diagram.html b/examples/lox_parser_diagram.html new file mode 100644 index 00000000..1d70ff3c --- /dev/null +++ b/examples/lox_parser_diagram.html @@ -0,0 +1,2872 @@ + + + + + + + + + + + + + + + +
+

program

+
+
+ + + + + + + +declaration + +
+
+ +
+

declaration

+
+
+ + + + + + +class_decl +fun_decl +var_decl +statement +
+
+ +
+

class_decl

+
+
+ + + + + + +CLASS +identifier + + + +'<' +identifier +LBRACE + + + + +function +property_ +class_decl + +RBRACE +
+
+ +
+

CLASS

+
+
+ + + + +'class' +
+
+ +
+

function

+
+
+ + + + + +identifier +LPAR + + +parameters +RPAR +block +
+
+ +
+

parameters

+
+
+ + + + + + +identifier + + + + + +',' +[suppress] +identifier + +
+
+ +
+

block

+
+
+ + + + + + + +LBRACE + + + +declaration + +RBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

property_

+
+
+ + + + + +identifier +block +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ +
+

fun_decl

+
+
+ + + + + +FUN +identifier +LPAR + + +parameters +RPAR +block +
+
+ +
+

FUN

+
+
+ + + + +'fun' +
+
+ +
+

var_decl

+
+
+ + + + + +VAR +identifier + + + +EQ +expression +SEMI +
+
+ +
+

VAR

+
+
+ + + + +'var' +
+
+ +
+

expression

+
+
+ + + + + + +assignment +arith_expression +function +
+
+ +
+

assignment

+
+
+ + + + + + + +call +identifier +EQ + +assignment +arith_expression +
+
+ +
+

call

+
+
+ + + + + +primary + + + +LPAR + + +arguments +RPAR + +'.' +identifier + +
+
+ +
+

primary

+
+
+ + + + + +TRUE +FALSE +NIL +THIS +number +string +identifier + +SUPER +'.' +identifier +
+
+ +
+

arguments

+
+
+ + + + + + +expression + + + + + +',' +[suppress] +expression + +
+
+ +
+

EQ

+
+
+ + + + + +'=' +[suppress] +
+
+ +
+

arith_expression

+
+
+ + + + + +'or' term +
+
+ +
+

'or' term

+
+
+ + + + + + + + +'and' term + + +OR +'and' term + +'and' term +
+
+ +
+

'and' term

+
+
+ + + + + + + + +!= | == term + + +AND +!= | == term + +!= | == term +
+
+ +
+

!= | == term

+
+
+ + + + + + + + +>= | > | <= | < term + + +!= | == +>= | > | <= | < term + +>= | > | <= | < term +
+
+ +
+

>= | > | <= | < term

+
+
+ + + + + + + + +- | + term + + +>= | > | <= | < +- | + term + +- | + term +
+
+ +
+

- | + term

+
+
+ + + + + + + + +/ | * term + + +- | + +/ | * term + +/ | * term +
+
+ +
+

/ | * term

+
+
+ + + + + + + + +! | - term + + +/ | * +! | - term + +! | - term +
+
+ +
+

! | - term

+
+
+ + + + + + + + + + +! | - +! | - term +call +TRUE +FALSE +NIL +THIS +number +string +identifier + +SUPER +'.' +identifier +nested_arith_operand +
+
+ +
+

! | -

+
+
+ + + + +Re:('[!\-]') +
+
+ +
+

TRUE

+
+
+ + + + +'true' +
+
+ +
+

FALSE

+
+
+ + + + +'false' +
+
+ +
+

NIL

+
+
+ + + + +'nil' +
+
+ +
+

THIS

+
+
+ + + + +'this' +
+
+ +
+

number

+
+
+ + + + +Re:('\d+(?:\.\d+)?') +
+
+ +
+

string

+
+
+ + + + +string enclosed in '"' +
+
+ +
+

SUPER

+
+
+ + + + +'super' +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Z_a-z, '0-9A-Z_a-z) +
+
+ +
+

nested_arith_operand

+
+
+ + + + + + +'(' +[suppress] +arith_expression + +')' +[suppress] +
+
+ +
+

/ | *

+
+
+ + + + +Re:('[/*]') +
+
+ +
+

- | +

+
+
+ + + + +Re:('[\-+]') +
+
+ +
+

>= | > | <= | <

+
+
+ + + + +Re:('>=|>|<=|<') +
+
+ +
+

!= | ==

+
+
+ + + + +Re:('!=|==') +
+
+ +
+

AND

+
+
+ + + + +'and' +
+
+ +
+

OR

+
+
+ + + + +'or' +
+
+ +
+

statement

+
+
+ + + + + + + +expr_statement +for_statement +if_statement +print_statement +return_statement +while_statement +block +
+
+ +
+

expr_statement

+
+
+ + + + + +expression +';' +
+
+ +
+

for_statement

+
+
+ + + + + +FOR +LPAR + + + +var_decl +expr_statement +';' + + +expression +';' + + +expression +RPAR +statement +
+
+ +
+

FOR

+
+
+ + + + +'for' +
+
+ +
+

if_statement

+
+
+ + + + + +IF +LPAR +expression +RPAR +statement + + + +ELSE +statement +
+
+ +
+

IF

+
+
+ + + + +'if' +
+
+ +
+

ELSE

+
+
+ + + + +'else' +
+
+ +
+

print_statement

+
+
+ + + + + +PRINT +expression +SEMI +
+
+ +
+

PRINT

+
+
+ + + + +'print' +
+
+ +
+

return_statement

+
+
+ + + + + +RETURN + + +expression +SEMI +
+
+ +
+

RETURN

+
+
+ + + + +'return' +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

while_statement

+
+
+ + + + + +WHILE +LPAR +expression +RPAR +statement +
+
+ +
+

WHILE

+
+
+ + + + +'while' +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ + + + diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 3b0c262b..e51e0d1a 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ def __repr__(self): __version_info__ = version_info(3, 2, 1, "final", 1) -__version_time__ = "18 Nov 2024 10:52 UTC" +__version_time__ = "09 Dec 2024 13:50 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " diff --git a/pyparsing/core.py b/pyparsing/core.py index 777165ca..b884e2d4 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -481,6 +481,7 @@ def __init__(self, savelist: bool = False): self.callPreparse = True self.callDuringTry = False self.suppress_warnings_: list[Diagnostics] = [] + self.show_in_diagram = True def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: """ diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index 7926f2c3..6f8bf7df 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -99,7 +99,7 @@ class AnnotatedItem(railroad.Group): """ def __init__(self, label: str, item): - super().__init__(item=item, label=f"[{label}]") + super().__init__(item=item, label=f"[{label}]" if label else "") class EditablePartial(Generic[T]): @@ -461,6 +461,7 @@ def _to_diagram_element( name_hint: str = None, show_results_names: bool = False, show_groups: bool = False, + show_hidden: bool = False, ) -> typing.Optional[EditablePartial]: """ Recursively converts a PyParsing Element to a railroad Element @@ -472,8 +473,9 @@ def _to_diagram_element( do so :param name_hint: If provided, this will override the generated name :param show_results_names: bool flag indicating whether to add annotations for results names - :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed :param show_groups: bool flag indicating whether to show groups using bounding box + :param show_hidden: bool flag indicating whether to show elements that are typically hidden + :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed """ exprs = element.recurse() name = name_hint or element.customName or type(element).__name__ @@ -532,6 +534,12 @@ def _to_diagram_element( # Recursively convert child elements # Here we find the most relevant Railroad element for matching pyparsing Element # We use ``items=[]`` here to hold the place for where the child elements will go once created + + # see if this element is normally hidden, and whether hidden elements are desired + # if not, just return None + if not element.show_in_diagram and not show_hidden: + return None + if isinstance(element, pyparsing.And): # detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat # (all will have the same name, and resultsName) diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index 399b70cf..c4dd3eaa 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -799,6 +799,8 @@ def parseImpl(self, instring, loc, doActions=True): pa: typing.Optional[ParseAction] opExpr1: ParserElement opExpr2: ParserElement + matchExpr: ParserElement + match_lookahead: ParserElement for operDef in op_list: opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] if isinstance(opExpr, str_type): @@ -822,46 +824,63 @@ def parseImpl(self, instring, loc, doActions=True): thisExpr: ParserElement = Forward().set_name(term_name) thisExpr = typing.cast(Forward, thisExpr) + match_lookahead = And([]) if rightLeftAssoc is OpAssoc.LEFT: if arity == 1: - matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) + match_lookahead = _FB(lastExpr + opExpr) + matchExpr = Group(lastExpr + opExpr[1, ...]) elif arity == 2: if opExpr is not None: - matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( + match_lookahead = _FB(lastExpr + opExpr + lastExpr) + matchExpr = Group( lastExpr + (opExpr + lastExpr)[1, ...] ) else: - matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) + match_lookahead = _FB(lastExpr + lastExpr) + matchExpr = Group(lastExpr[2, ...]) elif arity == 3: - matchExpr = _FB( - lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr - ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) + match_lookahead = _FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + matchExpr = Group(lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...]) elif rightLeftAssoc is OpAssoc.RIGHT: if arity == 1: # try to avoid LR with this extra test if not isinstance(opExpr, Opt): opExpr = Opt(opExpr) - matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) + match_lookahead = _FB(opExpr.expr + thisExpr) + matchExpr = Group(opExpr + thisExpr) elif arity == 2: if opExpr is not None: - matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( + match_lookahead = _FB(lastExpr + opExpr + thisExpr) + matchExpr = Group( lastExpr + (opExpr + thisExpr)[1, ...] ) else: - matchExpr = _FB(lastExpr + thisExpr) + Group( + match_lookahead = _FB(lastExpr + thisExpr) + matchExpr = Group( lastExpr + thisExpr[1, ...] ) elif arity == 3: - matchExpr = _FB( + match_lookahead = _FB( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr - ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + ) + matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + + # suppress lookahead expr from railroad diagrams + match_lookahead.show_in_diagram = False + + # TODO - determine why this statement can't be included in the following + # if pa block + matchExpr = match_lookahead + matchExpr + if pa: if isinstance(pa, (tuple, list)): matchExpr.set_parse_action(*pa) else: matchExpr.set_parse_action(pa) + thisExpr <<= (matchExpr | lastExpr).setName(term_name) lastExpr = thisExpr + ret <<= lastExpr root_expr.set_name("base_expr") return ret From dc98e0c0509595a3a0a0cde70b35a6fc7e6a0e24 Mon Sep 17 00:00:00 2001 From: Federico <96267363+dalps@users.noreply.github.com> Date: Sat, 14 Dec 2024 19:19:28 +0100 Subject: [PATCH 16/31] fix typo and unused names --- examples/simpleArith.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/simpleArith.py b/examples/simpleArith.py index 99b7ce10..6ee1d31a 100644 --- a/examples/simpleArith.py +++ b/examples/simpleArith.py @@ -27,7 +27,7 @@ # To use the infixNotation helper: # 1. Define the "atom" operand term of the grammar. # For this simple grammar, the smallest operand is either -# and integer or a variable. This will be the first argument +# an integer or a variable. This will be the first argument # to the infixNotation method. # 2. Define a list of tuples for each level of operator # precedence. Each tuple is of the form @@ -51,8 +51,8 @@ expr = infixNotation( operand, [ - ("!", 1, opAssoc.LEFT), - ("^", 2, opAssoc.RIGHT), + (factop, 1, opAssoc.LEFT), + (expop, 2, opAssoc.RIGHT), (signop, 1, opAssoc.RIGHT), (multop, 2, opAssoc.LEFT), (plusop, 2, opAssoc.LEFT), From e1eb1267a4778554dd970bd9bd9bb14b9feb301b Mon Sep 17 00:00:00 2001 From: ptmcg Date: Tue, 24 Dec 2024 05:24:17 -0600 Subject: [PATCH 17/31] Make non-terminal diagram elements links to their respective subdiagrams --- CHANGES | 4 ++++ pyparsing/diagram/__init__.py | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 34008e5b..32a4a28b 100644 --- a/CHANGES +++ b/CHANGES @@ -14,6 +14,10 @@ Devin J. Pohly in structuring the code to enable this peaceful transition.) Version 3.2.1 - in development ------------------------------ +- Updated generated railroad diagrams to make non-terminal elements links to their related + sub-diagrams. This _greatly_ improves navigation of the diagram, especially for + large, complex parsers. + - Simplified railroad diagrams emitted for parsers using `infix_notation`, by hiding lookahead terms. Updated `lox_parser.py` example and added `lox_parser_diagram.html` diagram file. diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index 6f8bf7df..d1a7c1d9 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -40,7 +40,7 @@ {{ body | safe }} {% for diagram in diagrams %}
-

{{ diagram.title }}

+

{{ diagram.title }}

{{ diagram.text }}
{{ diagram.svg }} @@ -447,7 +447,7 @@ def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]): return [ e for e in exprs - if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs)) + if not isinstance(e, non_diagramming_exprs) ] @@ -693,8 +693,10 @@ def _to_diagram_element( if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete: lookup.extract_into_diagram(el_id) if ret is not None: + text = lookup.diagrams[el_id].kwargs["name"] + href = f"#{text}" ret = EditablePartial.from_call( - railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] + railroad.NonTerminal, text=text, href=href ) return ret From 5ad8b3f3d6a7586cfcc3ad3a2547744b448faa30 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Tue, 24 Dec 2024 05:32:30 -0600 Subject: [PATCH 18/31] Make non-terminal diagram elements links to their respective subdiagrams --- pyparsing/diagram/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index d1a7c1d9..af85658f 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -360,7 +360,7 @@ def extract_into_diagram(self, el_id: int): # Replace the original definition of this element with a regular block if position.parent: - ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name) + ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name, href=f"#{position.name}") if "item" in position.parent.kwargs: position.parent.kwargs["item"] = ret elif "items" in position.parent.kwargs: @@ -520,14 +520,15 @@ def _to_diagram_element( # so we have to extract it into a new diagram. looked_up = lookup[el_id] looked_up.mark_for_extraction(el_id, lookup, name=name_hint) - ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name) + ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name, href=f"#{looked_up.name}") return ret elif el_id in lookup.diagrams: # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we # just put in a marker element that refers to the sub-diagram + text = lookup.diagrams[el_id].kwargs["name"] ret = EditablePartial.from_call( - railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] + railroad.NonTerminal, text=text, href=f"#{text}" ) return ret From 764f7e2ac202183787719d7de816db67a9e5a692 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Tue, 24 Dec 2024 12:12:18 -0600 Subject: [PATCH 19/31] Make sure href links in railroad diagrams are valid bookmarks --- pyparsing/diagram/__init__.py | 58 ++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index af85658f..72d9d316 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -1,6 +1,7 @@ # mypy: ignore-errors from __future__ import annotations +import itertools import railroad import pyparsing import dataclasses @@ -40,7 +41,7 @@ {{ body | safe }} {% for diagram in diagrams %}
-

{{ diagram.title }}

+

{{ diagram.title }}

{{ diagram.text }}
{{ diagram.svg }} @@ -56,6 +57,31 @@ template = Template(jinja2_template_source) +_bookmark_lookup = {} +_bookmark_ids = itertools.count(start=1) + +def _make_bookmark(s: str) -> str: + """ + Converts a string into a valid HTML bookmark (ID or anchor name). + """ + if s in _bookmark_lookup: + return _bookmark_lookup[s] + + # Replace invalid characters with hyphens and ensure only valid characters + bookmark = re.sub(r'[^a-zA-Z0-9-]+', '-', s) + + # Ensure it starts with a letter by adding 'z' if necessary + if not bookmark[:1].isalpha(): + bookmark = f"z{bookmark}" + + # Convert to lowercase and strip hyphens + bookmark = bookmark.lower().strip('-') + + _bookmark_lookup[s] = f"{bookmark}-{next(_bookmark_ids)}" + + return bookmark + + def _collapse_verbose_regex(regex_str: str) -> str: collapsed = pyparsing.Regex(r"#.*").suppress().transform_string(regex_str) collapsed = re.sub(r"\s*\n\s*", "", collapsed) @@ -72,6 +98,12 @@ class NamedDiagram: index: int diagram: railroad.DiagramItem = None + @property + def bookmark(self): + bookmark = _make_bookmark(self.name) + print("returning bookmark", bookmark) + return bookmark + T = TypeVar("T") @@ -162,7 +194,11 @@ def railroad_to_html(diagrams: list[NamedDiagram], embed=False, **kwargs) -> str title = diagram.name if diagram.index == 0: title += " (root)" - data.append({"title": title, "text": "", "svg": io.getvalue()}) + data.append( + { + "title": title, "text": "", "svg": io.getvalue(), "bookmark": diagram.bookmark + } + ) return template.render(diagrams=data, embed=embed, **kwargs) @@ -336,6 +372,12 @@ def __delitem__(self, key: int): def __contains__(self, key: int): return key in self._element_diagram_states + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default + def generate_unnamed(self) -> int: """ Generate a number used in the name of an otherwise unnamed diagram @@ -360,7 +402,7 @@ def extract_into_diagram(self, el_id: int): # Replace the original definition of this element with a regular block if position.parent: - ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name, href=f"#{position.name}") + ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name, href=f"#{_make_bookmark(position.name)}") if "item" in position.parent.kwargs: position.parent.kwargs["item"] = ret elif "items" in position.parent.kwargs: @@ -515,12 +557,12 @@ def _to_diagram_element( # If the element isn't worth extracting, we always treat it as the first time we say it if _worth_extracting(element): - if el_id in lookup and lookup[el_id].name is not None: + looked_up = lookup.get(el_id) + if looked_up and looked_up.name is not None: # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate, # so we have to extract it into a new diagram. - looked_up = lookup[el_id] looked_up.mark_for_extraction(el_id, lookup, name=name_hint) - ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name, href=f"#{looked_up.name}") + ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name, href=f"#{_make_bookmark(looked_up.name)}") return ret elif el_id in lookup.diagrams: @@ -528,7 +570,7 @@ def _to_diagram_element( # just put in a marker element that refers to the sub-diagram text = lookup.diagrams[el_id].kwargs["name"] ret = EditablePartial.from_call( - railroad.NonTerminal, text=text, href=f"#{text}" + railroad.NonTerminal, text=text, href=f"#{_make_bookmark(text)}" ) return ret @@ -695,7 +737,7 @@ def _to_diagram_element( lookup.extract_into_diagram(el_id) if ret is not None: text = lookup.diagrams[el_id].kwargs["name"] - href = f"#{text}" + href = f"#{_make_bookmark(text)}" ret = EditablePartial.from_call( railroad.NonTerminal, text=text, href=href ) From 22b4d2d3e97bdd5938cbbaf704761519e29b3804 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Wed, 25 Dec 2024 01:32:01 -0600 Subject: [PATCH 20/31] Remove diagnostic print statement --- pyparsing/diagram/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index 72d9d316..f65d46ae 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -101,7 +101,6 @@ class NamedDiagram: @property def bookmark(self): bookmark = _make_bookmark(self.name) - print("returning bookmark", bookmark) return bookmark From 2224099f637905a7ce6b4fd4f3f8ad5086d1c4e7 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Wed, 25 Dec 2024 01:52:32 -0600 Subject: [PATCH 21/31] Modify generated bookmarks index in railroad diagrams --- pyparsing/diagram/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index f65d46ae..8948b330 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -77,7 +77,7 @@ def _make_bookmark(s: str) -> str: # Convert to lowercase and strip hyphens bookmark = bookmark.lower().strip('-') - _bookmark_lookup[s] = f"{bookmark}-{next(_bookmark_ids)}" + _bookmark_lookup[s] = f"{bookmark}-{next(_bookmark_ids):04d}" return bookmark From 4bb26472131e8fe9d2230138b48ad5d2b9c93466 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Wed, 25 Dec 2024 03:09:18 -0600 Subject: [PATCH 22/31] Remove modification of Regex exprs when generating railroad diags; also short-circuit _collapse_verbose_regex when there is no newline in the regex pattern --- pyparsing/diagram/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index 8948b330..4e101a5b 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -83,7 +83,9 @@ def _make_bookmark(s: str) -> str: def _collapse_verbose_regex(regex_str: str) -> str: - collapsed = pyparsing.Regex(r"#.*").suppress().transform_string(regex_str) + if "\n" not in regex_str: + return regex_str + collapsed = pyparsing.Regex(r"#.*$").suppress().transform_string(regex_str) collapsed = re.sub(r"\s*\n\s*", "", collapsed) return collapsed @@ -669,10 +671,8 @@ def _to_diagram_element( elif len(exprs) > 0 and not element_results_name: ret = EditablePartial.from_call(railroad.Group, item="", label=name) elif isinstance(element, pyparsing.Regex): - patt = _collapse_verbose_regex(element.pattern) - element.pattern = patt - element._defaultName = None - ret = EditablePartial.from_call(railroad.Terminal, element.defaultName) + collapsed_patt = _collapse_verbose_regex(element.pattern) + ret = EditablePartial.from_call(railroad.Terminal, collapsed_patt) elif len(exprs) > 0: ret = EditablePartial.from_call(railroad.Sequence, items=[]) else: From 471801e30a4432a79535e8a3a21685dc19e934c5 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Mon, 30 Dec 2024 08:25:13 -0600 Subject: [PATCH 23/31] Fixed cacheing bug in diagram generation; modified names for inner elements of infix_notation expressions --- CHANGES | 4 ++-- pyparsing/__init__.py | 2 +- pyparsing/diagram/__init__.py | 18 +++++++++--------- pyparsing/helpers.py | 15 +++++++-------- tests/test_diagram.py | 8 ++++---- tests/test_unit.py | 10 +++++----- 6 files changed, 28 insertions(+), 29 deletions(-) diff --git a/CHANGES b/CHANGES index 32a4a28b..0b5917d5 100644 --- a/CHANGES +++ b/CHANGES @@ -19,8 +19,8 @@ Version 3.2.1 - in development large, complex parsers. - Simplified railroad diagrams emitted for parsers using `infix_notation`, by hiding - lookahead terms. Updated `lox_parser.py` example and added `lox_parser_diagram.html` - diagram file. + lookahead terms. Renamed internally generated expressions for clarity, and improved + diagramming. - Improved performance of `cpp_style_comment`, `c_style_comment`, `common.fnumber` and `common.ieee_float` Regex expressions. PRs submitted by Gabriel Gerlero, diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index e51e0d1a..59e34cce 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -121,7 +121,7 @@ def __repr__(self): __version_info__ = version_info(3, 2, 1, "final", 1) -__version_time__ = "09 Dec 2024 13:50 UTC" +__version_time__ = "30 Dec 2024 14:19 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index 4e101a5b..56526b74 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -77,7 +77,7 @@ def _make_bookmark(s: str) -> str: # Convert to lowercase and strip hyphens bookmark = bookmark.lower().strip('-') - _bookmark_lookup[s] = f"{bookmark}-{next(_bookmark_ids):04d}" + _bookmark_lookup[s] = bookmark = f"{bookmark}-{next(_bookmark_ids):04d}" return bookmark @@ -403,7 +403,8 @@ def extract_into_diagram(self, el_id: int): # Replace the original definition of this element with a regular block if position.parent: - ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name, href=f"#{_make_bookmark(position.name)}") + href = f"#{_make_bookmark(position.name)}" + ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name, href=href) if "item" in position.parent.kwargs: position.parent.kwargs["item"] = ret elif "items" in position.parent.kwargs: @@ -534,7 +535,7 @@ def _to_diagram_element( element, ( # pyparsing.TokenConverter, - # pyparsing.Forward, + pyparsing.Forward, pyparsing.Located, ), ): @@ -563,7 +564,8 @@ def _to_diagram_element( # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate, # so we have to extract it into a new diagram. looked_up.mark_for_extraction(el_id, lookup, name=name_hint) - ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name, href=f"#{_make_bookmark(looked_up.name)}") + href = f"#{_make_bookmark(looked_up.name)}" + ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name, href=href) return ret elif el_id in lookup.diagrams: @@ -618,7 +620,9 @@ def _to_diagram_element( if show_groups: ret = EditablePartial.from_call(AnnotatedItem, label="", item="") else: - ret = EditablePartial.from_call(railroad.Sequence, items=[]) + ret = EditablePartial.from_call( + railroad.Group, item=None, label=element_results_name + ) elif isinstance(element, pyparsing.TokenConverter): label = type(element).__name__.lower() if label == "tokenconverter": @@ -659,10 +663,6 @@ def _to_diagram_element( *args, ) ret = EditablePartial.from_call(railroad.ZeroOrMore, item="") - elif isinstance(element, pyparsing.Group): - ret = EditablePartial.from_call( - railroad.Group, item=None, label=element_results_name - ) elif isinstance(element, pyparsing.Empty) and not element.customName: # Skip unnamed "Empty" elements ret = None diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index c4dd3eaa..b723b6b4 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -780,19 +780,19 @@ def parseImpl(self, instring, loc, doActions=True): _FB.__name__ = "FollowedBy>" ret = Forward() + ret.set_name(f"{base_expr.name}_expression") if isinstance(lpar, str): lpar = Suppress(lpar) if isinstance(rpar, str): rpar = Suppress(rpar) + nested_expr = (lpar + ret + rpar).set_name(f"nested_{ret.name}") + # if lpar and rpar are not suppressed, wrap in group if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)): - lastExpr = base_expr | Group(lpar + ret + rpar).set_name( - f"nested_{base_expr.name}" - ) + lastExpr = base_expr | Group(nested_expr) else: - lastExpr = base_expr | (lpar + ret + rpar).set_name(f"nested_{base_expr.name}") - root_expr = lastExpr + lastExpr = base_expr | nested_expr arity: int rightLeftAssoc: opAssoc @@ -812,9 +812,9 @@ def parseImpl(self, instring, loc, doActions=True): "if numterms=3, opExpr must be a tuple or list of two expressions" ) opExpr1, opExpr2 = opExpr - term_name = f"{opExpr1}{opExpr2} term" + term_name = f"{opExpr1}{opExpr2} operations" else: - term_name = f"{opExpr} term" + term_name = f"{opExpr} operations" if not 1 <= arity <= 3: raise ValueError("operator must be unary (1), binary (2), or ternary (3)") @@ -882,7 +882,6 @@ def parseImpl(self, instring, loc, doActions=True): lastExpr = thisExpr ret <<= lastExpr - root_expr.set_name("base_expr") return ret diff --git a/tests/test_diagram.py b/tests/test_diagram.py index 441d05e6..d9e331c5 100644 --- a/tests/test_diagram.py +++ b/tests/test_diagram.py @@ -88,8 +88,8 @@ def generate_railroad( def test_example_rr_diags(self): subtests = [ ("jsonObject", jsonObject, 8), - ("boolExpr", boolExpr, 7), - ("simpleSQL", simpleSQL, 22), + ("boolExpr", boolExpr, 6), + ("simpleSQL", simpleSQL, 20), ("calendars", calendars, 13), ] for label, example_expr, expected_rr_len in subtests: @@ -132,9 +132,9 @@ def test_nested_forward_with_inner_name_only(self): outer <<= inner railroad = self.generate_railroad(outer, "inner_only") - assert len(railroad) == 2 + assert len(railroad) == 1 railroad = self.generate_railroad(outer, "inner_only", show_results_names=True) - assert len(railroad) == 2 + assert len(railroad) == 1 def test_each_grammar(self): diff --git a/tests/test_unit.py b/tests/test_unit.py index d37df659..48e7b24f 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -6381,10 +6381,10 @@ def testSetName(self): a | b | c d | e | f {a | b | c | d | e | f} - Forward: + | - term - + | - term - Forward: ?: term - ?: term + W:(0-9)_expression + + | - operations + W:(0-9)_expression + ?: operations Forward: {a | b | c [{d | e | f : ...}]...} int [, int]... (len) int... @@ -9121,7 +9121,7 @@ def testDelimitedListName(self): ], ) self.assertEqual( - "Forward: + | - term [, Forward: + | - term]...", + "var_expression [, var_expression]...", str(pp.delimitedList(math)), ) From dde1a025d0783d4e90ce0ee089c9b7cf89f50c78 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Mon, 30 Dec 2024 08:57:06 -0600 Subject: [PATCH 24/31] Added generated diagrams for many of the examples --- CHANGES | 2 + examples/TAP.py | 10 +- examples/TAP_diagram.html | 645 +++ examples/adventureEngine.py | 8 +- examples/adventure_game_parser_diagram.html | 885 ++++ examples/antlr_grammar.py | 13 +- examples/antlr_grammar_diagram.html | 4160 ++++++++++++++++ examples/apicheck.py | 61 +- examples/apicheck_diagram.html | 226 + examples/bf.py | 14 +- examples/bf_diagram.html | 128 + examples/chemical_formulas.html | 158 + examples/chemical_formulas.py | 44 +- examples/complex_chemical_formulas.py | 63 +- .../complex_chemical_formulas_diagram.html | 538 ++ examples/decaf_parser.py | 81 +- examples/decaf_parser_diagram.html | 4263 ++++++++++++++++ examples/delta_time.py | 10 +- examples/delta_time_diagram.html | 1910 ++++++++ examples/directx_x_file_parser.html | 270 + examples/directx_x_file_parser.py | 25 +- examples/lox_parser.py | 6 +- examples/lox_parser_diagram.html | 660 +-- examples/lua_parser.py | 7 + examples/lua_parser_diagram.html | 4363 +++++++++++++++++ examples/lucene_grammar.py | 28 +- examples/lucene_grammar_diagram.html | 867 ++++ examples/mongodb_query_expression.html | 2325 +++++++++ examples/mongodb_query_expression.py | 15 +- examples/number_words.py | 9 +- examples/number_words_diagram.html | 604 +++ examples/parse_python_value.html | 711 +++ examples/parse_python_value.py | 5 +- examples/roman_numerals.py | 9 +- examples/roman_numerals_diagram.html | 689 +++ examples/rosettacode.py | 5 +- examples/rosettacode_diagram.html | 2076 ++++++++ examples/select_parser.py | 27 +- examples/tag_metadata.py | 22 +- examples/tag_metadata_diagram.html | 86 + 40 files changed, 25524 insertions(+), 504 deletions(-) create mode 100644 examples/TAP_diagram.html create mode 100644 examples/adventure_game_parser_diagram.html create mode 100644 examples/antlr_grammar_diagram.html create mode 100644 examples/apicheck_diagram.html create mode 100644 examples/bf_diagram.html create mode 100644 examples/chemical_formulas.html create mode 100644 examples/complex_chemical_formulas_diagram.html create mode 100644 examples/decaf_parser_diagram.html create mode 100644 examples/delta_time_diagram.html create mode 100644 examples/directx_x_file_parser.html create mode 100644 examples/lua_parser_diagram.html create mode 100644 examples/lucene_grammar_diagram.html create mode 100644 examples/mongodb_query_expression.html create mode 100644 examples/number_words_diagram.html create mode 100644 examples/parse_python_value.html create mode 100644 examples/roman_numerals_diagram.html create mode 100644 examples/rosettacode_diagram.html create mode 100644 examples/tag_metadata_diagram.html diff --git a/CHANGES b/CHANGES index 0b5917d5..a82e6e4e 100644 --- a/CHANGES +++ b/CHANGES @@ -22,6 +22,8 @@ Version 3.2.1 - in development lookahead terms. Renamed internally generated expressions for clarity, and improved diagramming. +- Added generated diagrams for many of the examples. + - Improved performance of `cpp_style_comment`, `c_style_comment`, `common.fnumber` and `common.ieee_float` Regex expressions. PRs submitted by Gabriel Gerlero, nice work, thanks! diff --git a/examples/TAP.py b/examples/TAP.py index b41e9510..dfa16195 100644 --- a/examples/TAP.py +++ b/examples/TAP.py @@ -37,6 +37,7 @@ restOfLine, FollowedBy, empty, + autoname_elements, ) __all__ = ["tapOutputParser", "TAPTest", "TAPSummary"] @@ -52,7 +53,7 @@ OK, NOT_OK = map(Literal, ["ok", "not ok"]) testStatus = OK | NOT_OK -description = Regex("[^#\n]+") +description = Regex(r"[^#\n]+") description.setParseAction(lambda t: t[0].lstrip("- ")) TODO, SKIP = map(CaselessLiteral, "TODO SKIP".split()) @@ -79,6 +80,8 @@ OneOrMore((testLine | bailLine) + NL) )("tests") +autoname_elements() + class TAPTest: def __init__(self, results): @@ -170,6 +173,11 @@ def summary(self, showPassed=False, showAll=False): def main(): + import contextlib + + with contextlib.suppress(Exception): + tapOutputParser.create_diagram("TAP_diagram.html", vertical=3) + test1 = """\ 1..4 ok 1 - Input file opened diff --git a/examples/TAP_diagram.html b/examples/TAP_diagram.html new file mode 100644 index 00000000..fbf6f23d --- /dev/null +++ b/examples/TAP_diagram.html @@ -0,0 +1,645 @@ + + + + + + + + + + + + + + + +
+

tapOutputParser

+
+
+ + + + + + + + + + + +planplan +plan +NLNL + + + + +testLinetestLine +bailLinebailLine +NLNL + +tests + +[ALL] +
+
+ +
+

plan

+
+
+ + + + + +'1..' +W:(0-9) +
+
+ +
+

testLine

+
+
+ + + + + + + + + + +'#' +[suppress] +emptyempty +rest of linerest of line +NLNL + + +'ok' +'not ok' + + +integerinteger + + +descriptiondescription + + +directivedirective +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

description

+
+
+ + + + +[^#\n]+ +
+
+ +
+

directive

+
+
+ + + + + + +'#' +[suppress] + + +TODOTODO +rest of linerest of line + + +SKIPSKIP +[LOOKAHEAD] +rest of linerest of line +
+
+ +
+

TODO

+
+
+ + + + +'TODO' +
+
+ +
+

SKIP

+
+
+ + + + +'SKIP' +
+
+ +
+

bailLine

+
+
+ + + + + +'Bail out!' +emptyempty + + +rest of linerest of line +
+
+ +
+

empty

+
+
+ + + + +Empty +
+
+ +
+

rest of line

+
+
+ + + + +.* +
+
+ +
+

NL

+
+
+ + + + + +end of lineend of line +[suppress] +
+
+ +
+

end of line

+
+
+ + + + +LineEnd +
+
+ + + + diff --git a/examples/adventureEngine.py b/examples/adventureEngine.py index 7010181f..c4d155b6 100644 --- a/examples/adventureEngine.py +++ b/examples/adventureEngine.py @@ -5,9 +5,10 @@ # Updated 2023 - using PEP8 API names # -import pyparsing as pp +import contextlib import random import string +import pyparsing as pp def a_or_an(item): @@ -508,7 +509,10 @@ def make_bnf(self): | doorsCommand | helpCommand | quitCommand - )("command") + )("command").set_name("command") + + with contextlib.suppress(Exception): + parser.create_diagram("adventure_game_parser_diagram.html", vertical=2, show_groups=True) return parser diff --git a/examples/adventure_game_parser_diagram.html b/examples/adventure_game_parser_diagram.html new file mode 100644 index 00000000..e5ff5ca0 --- /dev/null +++ b/examples/adventure_game_parser_diagram.html @@ -0,0 +1,885 @@ + + + + + + + + + + + + + + + +
+

command

+
+
+ + + + + + +INVENTORY | INV | IINVENTORY | INV | I + +USE | UUSE | U +item_refitem_ref + + +IN | ONIN | ON + + +item_refitem_ref + +OPEN | OOPEN | O +item_refitem_ref + +CLOSE | CLCLOSE | CL +item_refitem_ref + +DROP | LEAVEDROP | LEAVE +item_refitem_ref + + +TAKE | PICKUPTAKE | PICKUP + +'PICK' +'UP' +item_refitem_ref + + + + +MOVE | GOMOVE | GO + +NORTH | NNORTH | N +SOUTH | SSOUTH | S +EAST | EEAST | E +WEST | WWEST | W +LOOK | LLOOK | L + +EXAMINE | EX | XEXAMINE | EX | X +item_refitem_ref +DOORSDOORS +HELP | H | ?HELP | H | ? +QUIT | QQUIT | Q +
+
+ +
+

INVENTORY | INV | I

+
+
+ + + + +INVENTORY|INV|I +
+
+ +
+

USE | U

+
+
+ + + + +USE|U +
+
+ +
+

item_ref

+
+
+ + + + + +W:(A-Za-z) + +
+
+ +
+

IN | ON

+
+
+ + + + +IN|ON +
+
+ +
+

OPEN | O

+
+
+ + + + +OPEN|O +
+
+ +
+

CLOSE | CL

+
+
+ + + + +CLOSE|CL +
+
+ +
+

DROP | LEAVE

+
+
+ + + + +DROP|LEAVE +
+
+ +
+

TAKE | PICKUP

+
+
+ + + + +TAKE|PICKUP +
+
+ +
+

MOVE | GO

+
+
+ + + + +MOVE|GO +
+
+ +
+

NORTH | N

+
+
+ + + + +NORTH|N +
+
+ +
+

SOUTH | S

+
+
+ + + + +SOUTH|S +
+
+ +
+

EAST | E

+
+
+ + + + +EAST|E +
+
+ +
+

WEST | W

+
+
+ + + + +WEST|W +
+
+ +
+

LOOK | L

+
+
+ + + + +LOOK|L +
+
+ +
+

EXAMINE | EX | X

+
+
+ + + + +EXAMINE|EX|X +
+
+ +
+

DOORS

+
+
+ + + + +'DOORS' +
+
+ +
+

HELP | H | ?

+
+
+ + + + +HELP|H|\? +
+
+ +
+

QUIT | Q

+
+
+ + + + +QUIT|Q +
+
+ + + + diff --git a/examples/antlr_grammar.py b/examples/antlr_grammar.py index 49151eee..566dd0ef 100644 --- a/examples/antlr_grammar.py +++ b/examples/antlr_grammar.py @@ -35,6 +35,7 @@ alphanums, delimitedList, Char, + autoname_elements, ) # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g @@ -75,11 +76,13 @@ PROTECTED, PUBLIC, PRIVATE, -) = map( - Keyword, +) = list( + Keyword.using_each( """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected public private """.split(), + ) ) + KEYWORD = MatchFirst(keywords) # Tokens @@ -252,6 +255,7 @@ grammarDef = grammarHeading + Group(OneOrMore(rule))("rules") +autoname_elements() def grammar(): return grammarDef @@ -341,6 +345,10 @@ def antlrConverter(antlrGrammarTree): if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + grammarDef.create_diagram("antlr_grammar_diagram.html", vertical=2, show_groups=True) text = """\ grammar SimpleCalc; @@ -379,7 +387,6 @@ def antlrConverter(antlrGrammarTree): """ - grammar().validate() antlrGrammarTree = grammar().parseString(text) print(antlrGrammarTree.dump()) pyparsingRules = antlrConverter(antlrGrammarTree) diff --git a/examples/antlr_grammar_diagram.html b/examples/antlr_grammar_diagram.html new file mode 100644 index 00000000..d6ec6ddf --- /dev/null +++ b/examples/antlr_grammar_diagram.html @@ -0,0 +1,4160 @@ + + + + + + + + + + + + + + + +
+

grammarDef

+
+
+ + + + + + + +C style commentC style comment + + +grammarTypegrammarType +GRAMMARGRAMMAR + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) +SEMISEMI + + +optionsSpecoptionsSpec + + +tokensSpectokensSpec + + + +attrScopeattrScope + + + + +actionaction + + + +rulerule + +
+
+ +
+

C style comment

+
+
+ + + + +/\*(?:[^*]|\*(?!/))*\*\/ +
+
+ +
+

grammarType

+
+
+ + + + + +LEXERLEXER +PARSERPARSER +TREETREE +
+
+ +
+

LEXER

+
+
+ + + + +'lexer' +
+
+ +
+

PARSER

+
+
+ + + + +'parser' +
+
+ +
+

TREE

+
+
+ + + + +'tree' +
+
+ +
+

GRAMMAR

+
+
+ + + + +'grammar' +
+
+ +
+

optionsSpec

+
+
+ + + + + + +OPTIONS_OPTIONS_ +[suppress] +LBRACELBRACE + + + +optionoption +SEMISEMI + +RBRACERBRACE +
+
+ +
+

option

+
+
+ + + + + + + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) +EQEQ + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) + + +"'" +[suppress] + + + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + + +"'" +[suppress] +'\\' +[NOT] +(!-~) + +[combine] + +"'" +[suppress] + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +W:(0-9) +ss +
+
+ +
+

s

+
+
+ + + + +'*' +
+
+ +
+

tokensSpec

+
+
+ + + + + + +TOKENS_TOKENS_ +[suppress] +LBRACELBRACE + + +tokenSpectokenSpec + +RBRACERBRACE +
+
+ +
+

TOKENS_

+
+
+ + + + +'tokens' +
+
+ +
+

tokenSpec

+
+
+ + + + + + + +W:(A-Z, 0-9A-Z_a-z) +EQEQ + + + +"'" +[suppress] + + + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + + +"'" +[suppress] +'\\' +[NOT] +(!-~) + +[combine] + +"'" +[suppress] + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +SEMISEMI +
+
+ +
+

EQ

+
+
+ + + + + +'=' +[suppress] +
+
+ +
+

attrScope

+
+
+ + + + + + +SCOPE_SCOPE_ +[suppress] +idid +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

NESTED_ACTION

+
+
+ + + + + + +LBRACELBRACE + + + + +NESTED_ACTIONNESTED_ACTION + + +'//' +[suppress] + +'$ANTLR' +[suppress] + +SRC_SRC_ +[suppress] + + +'"' +[suppress] + + + + + + +BSLASHBSLASH +[suppress] +APOSAPOS + +BSLASHBSLASH +[suppress] + + +BSLASHBSLASH +[suppress] + + +APOSAPOS +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + + +BSLASHBSLASH +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + +'"' +[suppress] +W:(0-9) + + + + + + +EOLEOL +[NOT] +W:(!-~) + +EOLEOL +C style commentC style comment +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

SRC_

+
+
+ + + + +'src' +
+
+ +
+

EOL

+
+
+ + + + + +end of lineend of line +[suppress] +
+
+ +
+

end of line

+
+
+ + + + +LineEnd +
+
+ +
+

ACTION_STRING_LITERAL

+
+
+ + + + + +QUOTEQUOTE + + + + + + +BSLASHBSLASH +[suppress] +APOSAPOS + +BSLASHBSLASH +[suppress] + + +BSLASHBSLASH +[suppress] + + +APOSAPOS +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + + +BSLASHBSLASH +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + +QUOTEQUOTE +
+
+ +
+

ACTION_CHAR_LITERAL

+
+
+ + + + + +APOSAPOS + + + +BSLASHBSLASH +[suppress] +APOSAPOS + +BSLASHBSLASH +[suppress] + + +BSLASHBSLASH +[suppress] + + +APOSAPOS +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + + +BSLASHBSLASH +APOSAPOS +[NOT] +SGL_PRINTABLESGL_PRINTABLE +APOSAPOS +
+
+ +
+

QUOTE

+
+
+ + + + + +'"' +[suppress] +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ +
+

action

+
+
+ + + + + +ATAT + + + +actionScopeNameactionScopeName + +'::' +[suppress] +idid +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

actionScopeName

+
+
+ + + + + +TOKEN_REFTOKEN_REF +RULE_REFRULE_REF +'lexer' +'parser' +
+
+ +
+

rule

+
+
+ + + + + + + + +C style commentC style comment + + +modifiermodifier + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) + + +'!' + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + + + +'returns' +[suppress] + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + +throwsSpecthrowsSpec + + +optionsSpecoptionsSpec + + +ruleScopeSpecruleScopeSpec + + + +ruleActionruleAction + +COLONCOLON + + + +elementelement + +rewriterewrite + + + + + +VERTVERT + + + +elementelement + +rewriterewrite + +SEMISEMI + + +exceptionGroupexceptionGroup +
+
+ +
+

modifier

+
+
+ + + + + +PROTECTEDPROTECTED +PUBLICPUBLIC +PRIVATEPRIVATE +FRAGMENTFRAGMENT +
+
+ +
+

PROTECTED

+
+
+ + + + +'protected' +
+
+ +
+

PUBLIC

+
+
+ + + + +'public' +
+
+ +
+

PRIVATE

+
+
+ + + + +'private' +
+
+ +
+

FRAGMENT

+
+
+ + + + +'fragment' +
+
+ +
+

NESTED_ARG_ACTION

+
+
+ + + + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK +
+
+ +
+

throwsSpec

+
+
+ + + + + + +THROWSTHROWS +[suppress] + + +idid + + + + + +',' +[suppress] +idid + +
+
+ +
+

THROWS

+
+
+ + + + +'throws' +
+
+ +
+

ruleScopeSpec

+
+
+ + + + + + + +SCOPE_SCOPE_ +[suppress] +NESTED_ACTIONNESTED_ACTION + + +'?' + + +SCOPE_SCOPE_ +[suppress] + + +idid + + + + + +',' +[suppress] +idid + +SEMISEMI + + +SCOPE_SCOPE_ +[suppress] +NESTED_ACTIONNESTED_ACTION + + +'?' + +SCOPE_SCOPE_ +[suppress] + + +idid + + + + + +',' +[suppress] +idid + +SEMISEMI +
+
+ +
+

SCOPE_

+
+
+ + + + +'scope' +
+
+ +
+

ruleAction

+
+
+ + + + + +ATAT +idid +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

AT

+
+
+ + + + + +'@' +[suppress] +
+
+ +
+

id

+
+
+ + + + + +TOKEN_REFTOKEN_REF +RULE_REFRULE_REF +
+
+ +
+

element

+
+
+ + + + + + +elementNoOptionSpecelementNoOptionSpec +
+
+ +
+

elementNoOptionSpec

+
+
+ + + + + + + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) += | +== | += + + + + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +RANGERANGE + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] + + +^ | !^ | ! +terminalterminal + +TILTIL + +CHAR_LITERALCHAR_LITERAL +TOKEN_REFTOKEN_REF +STRING_LITERALSTRING_LITERAL +blockblock + + +^ | !^ | ! + +RULE_REFRULE_REF + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + +^ | !^ | ! + + +? | * | +? | * | + + + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) += | +== | += +blockblock + + +? | * | +? | * | + + + + + + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +RANGERANGE + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] + + +^ | !^ | ! +terminalterminal + +TILTIL + +CHAR_LITERALCHAR_LITERAL +TOKEN_REFTOKEN_REF +STRING_LITERALSTRING_LITERAL +blockblock + + +^ | !^ | ! + +RULE_REFRULE_REF + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + +^ | !^ | ! + + +? | * | +? | * | + +ebnfebnf +ACTIONACTION + +ROOTROOT +LPARLPAR +elementelement +elementelement + + + +elementelement + +RPARRPAR + + +? | * | +? | * | + +
+
+ +
+

= | +=

+
+
+ + + + +=|\+= +
+
+ +
+

terminal

+
+
+ + + + + + +CHAR_LITERALCHAR_LITERAL + +TOKEN_REFTOKEN_REF + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +STRING_LITERALSTRING_LITERAL +'.' + + +^ | !^ | ! +
+
+ +
+

CHAR_LITERAL

+
+
+ + + + + +APOSAPOS +LITERAL_CHARLITERAL_CHAR +APOSAPOS +
+
+ +
+

LITERAL_CHAR

+
+
+ + + + + +ESCESC + + + +APOSAPOS +BSLASHBSLASH +[NOT] +SGL_PRINTABLESGL_PRINTABLE +
+
+ +
+

ESC

+
+
+ + + + + +BSLASHBSLASH + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +SGL_PRINTABLESGL_PRINTABLE +
+
+ +
+

SGL_PRINTABLE

+
+
+ + + + +(!-~) +
+
+ +
+

STRING_LITERAL

+
+
+ + + + + +APOSAPOS + + + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + + +"'" +[suppress] +'\\' +[NOT] +(!-~) + +[combine] +APOSAPOS +
+
+ +
+

n | r | t | b | f | \ | " | > | '

+
+
+ + + + +[nrtbf\\">'] +
+
+ +
+

block

+
+
+ + + + + + +LPARLPAR + + + + + + + +OPTIONS_OPTIONS_ +[suppress] + +'{' +[suppress] + + + +optionoption +SEMISEMI + + +'}' +[suppress] +COLONCOLON + + + + + +elementelement + +rewriterewrite + + + + + +VERTVERT + + + +elementelement + +rewriterewrite + +RPARRPAR +
+
+ +
+

OPTIONS_

+
+
+ + + + +'options' +
+
+ +
+

COLON

+
+
+ + + + + +':' +[suppress] +
+
+ +
+

RANGE

+
+
+ + + + + +'..' +[suppress] +
+
+ +
+

APOS

+
+
+ + + + + +"'" +[suppress] +
+
+ +
+

BSLASH

+
+
+ + + + +'\\' +
+
+ +
+

TIL

+
+
+ + + + + +'~' +[suppress] +
+
+ +
+

TOKEN_REF

+
+
+ + + + +W:(A-Z, 0-9A-Z_a-z) +
+
+ +
+

RULE_REF

+
+
+ + + + +W:(a-z, 0-9A-Z_a-z) +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

^ | !

+
+
+ + + + +[\^!] +
+
+ +
+

ebnf

+
+
+ + + + + +blockblock + + + +? | * | +? | * | + +'=>' +
+
+ +
+

ACTION

+
+
+ + + + + +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

ROOT

+
+
+ + + + + +'^' +[suppress] +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

? | * | +

+
+
+ + + + +[?*+] +
+
+ +
+

VERT

+
+
+ + + + + +'|' +[suppress] +
+
+ +
+

rewrite

+
+
+ + + + + + +'TODO REWRITE RULES TODO' +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

exceptionGroup

+
+
+ + + + + + + +exceptionHandlerexceptionHandler + + + +finallyClausefinallyClause +finallyClausefinallyClause +
+
+ +
+

exceptionHandler

+
+
+ + + + + + +CATCHCATCH +[suppress] +NESTED_ARG_ACTIONNESTED_ARG_ACTION +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

CATCH

+
+
+ + + + +'catch' +
+
+ +
+

finallyClause

+
+
+ + + + + + +FINALLYFINALLY +[suppress] +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

FINALLY

+
+
+ + + + +'finally' +
+
+ + + + diff --git a/examples/apicheck.py b/examples/apicheck.py index 358dd6f2..97010d19 100644 --- a/examples/apicheck.py +++ b/examples/apicheck.py @@ -32,29 +32,38 @@ def apiProc(name, numargs): ] ) -test = """[ procname1 $par1 $par2 ] - other code here - [ procname1 $par1 $par2 $par3 ] - more code here - [ procname1 $par1 ] - [ procname3 ${arg with spaces} $par2 ]""" - - -# now explicitly iterate through the scanner using next(), so that -# we can trap ParseSyntaxException's that would be raised due to -# an incorrect number of arguments. If an exception does occur, -# then see how we reset the input text and scanner to advance to the -# next line of source code -api_scanner = apiRef.scanString(test) -while 1: - try: - t, s, e = next(api_scanner) - print(f"found {t.procname} on line {lineno(s, test)}") - except ParseSyntaxException as pe: - print(f"invalid arg count on line {pe.lineno}") - print(f"{pe.lineno} : {pe.line}") - # reset api scanner to start after this exception location - test = "\n" * (pe.lineno - 1) + test[pe.loc + 1:] - api_scanner = apiRef.scanString(test) - except StopIteration: - break +autoname_elements() + +if __name__ == '__main__': + + import contextlib + + with contextlib.suppress(Exception): + apiRef.create_diagram("apicheck_diagram.html", vertical=9, show_groups=True) + + test = """[ procname1 $par1 $par2 ] + other code here + [ procname1 $par1 $par2 $par3 ] + more code here + [ procname1 $par1 ] + [ procname3 ${arg with spaces} $par2 ]""" + + + # now explicitly iterate through the scanner using next(), so that + # we can trap ParseSyntaxException's that would be raised due to + # an incorrect number of arguments. If an exception does occur, + # then see how we reset the input text and scanner to advance to the + # next line of source code + api_scanner = apiRef.scanString(test) + while 1: + try: + t, s, e = next(api_scanner) + print(f"found {t.procname} on line {lineno(s, test)}") + except ParseSyntaxException as pe: + print(f"invalid arg count on line {pe.lineno}") + print(f"{pe.lineno} : {pe.line}") + # reset api scanner to start after this exception location + test = "\n" * (pe.lineno - 1) + test[pe.loc + 1:] + api_scanner = apiRef.scanString(test) + except StopIteration: + break diff --git a/examples/apicheck_diagram.html b/examples/apicheck_diagram.html new file mode 100644 index 00000000..6729d519 --- /dev/null +++ b/examples/apicheck_diagram.html @@ -0,0 +1,226 @@ + + + + + + + + + + + + + + + +
+

apiRef

+
+
+ + + + + + +'[' +[LOOKAHEAD] + + +LBRACKLBRACK +'procname1' +'$' +identident +'$' +identident +RBRACKRBRACK + +LBRACKLBRACK +'procname2' +'$' +identident +RBRACKRBRACK + +LBRACKLBRACK +'procname3' +'$' +identident +'$' +identident +RBRACKRBRACK +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

ident

+
+
+ + + + + +W:(A-Za-z, 0-9A-Z_a-z) +quoted string, starting with { ending with } +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ + + + diff --git a/examples/bf.py b/examples/bf.py index 76144295..b8ff1aca 100644 --- a/examples/bf.py +++ b/examples/bf.py @@ -150,10 +150,14 @@ def run_program(tokens): t.execute(bf) print() +if __name__ == '__main__': -# generate railroad diagram -program_expr.create_diagram("bf.html") + # generate railroad diagram + import contextlib -# execute an example BF program -hw = "+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." -program_expr.parse_string(hw) + with contextlib.suppress(Exception): + program_expr.create_diagram("bf_diagram.html") + + # execute an example BF program + hw = "+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." + program_expr.parse_string(hw) diff --git a/examples/bf_diagram.html b/examples/bf_diagram.html new file mode 100644 index 00000000..9696b0ff --- /dev/null +++ b/examples/bf_diagram.html @@ -0,0 +1,128 @@ + + + + + + + + + + + + + + + +
+

program

+
+
+ + + + + + + +instructioninstruction + +
+
+ +
+

instruction

+
+
+ + + + + + +'+' +'-' +'<' +'>' +',' +'.' + + +'[' + + + +instructioninstruction + +']' +
+
+ + + + diff --git a/examples/chemical_formulas.html b/examples/chemical_formulas.html new file mode 100644 index 00000000..12f1fb7e --- /dev/null +++ b/examples/chemical_formulas.html @@ -0,0 +1,158 @@ + + + + + + + + + + + + + + + +
+

chemical_formula

+
+
+ + + + + + + +elementelement + + +subscriptsubscript + +
+
+ +
+

element

+
+
+ + + + +W:(A-Z, a-z){1,2} +
+
+ +
+

subscript

+
+
+ + + + +W:(₀-₉) +
+
+ + + + diff --git a/examples/chemical_formulas.py b/examples/chemical_formulas.py index 60577fff..80a8c969 100644 --- a/examples/chemical_formulas.py +++ b/examples/chemical_formulas.py @@ -106,23 +106,29 @@ def cvt_subscript_int(s): element_ref = pp.Group(element("symbol") + pp.Optional(subscript_int, default=1)("qty")) formula = element_ref[1, ...].set_name("chemical_formula") -formula.run_tests( - """\ - # sodium chloride - NaCl - # hydrogen hydroxide - H₂O - # phenol - C₆H₅OH - # ethanol - C₂H₅OH - # decanol - C₁₀H₂₁OH - """, - full_dump=False, - post_parse=lambda _, tokens: - f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}", -) -formula.create_diagram("chemical_formulas.html") -print() +if __name__ == '__main__': + import contextlib + + with contextlib.suppress(Exception): + formula.create_diagram("chemical_formulas.html") + + formula.run_tests( + """\ + # sodium chloride + NaCl + # hydrogen hydroxide + H₂O + # phenol + C₆H₅OH + # ethanol + C₂H₅OH + # decanol + C₁₀H₂₁OH + """, + full_dump=False, + post_parse=lambda _, tokens: + f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}", + ) + + print() diff --git a/examples/complex_chemical_formulas.py b/examples/complex_chemical_formulas.py index 91ecaef5..3c470155 100644 --- a/examples/complex_chemical_formulas.py +++ b/examples/complex_chemical_formulas.py @@ -108,9 +108,8 @@ def element_ref_sum(s, l, t): # of one or more element_ref's formula = element_ref[1, ...].set_name("chemical_formula") -# create railroad diagram for this parser +# set names on unnamed expressions for better diagram output pp.autoname_elements() -formula.create_diagram("complex_chemical_formulas.html") def molecular_weight(c: Counter) -> float: @@ -123,29 +122,37 @@ def molecular_weight(c: Counter) -> float: """ return sum(table_of_elements[k] * v for k, v in c.items()) - -formula.run_tests( - """\ - NaCl - HOH - H₂O - H₂O₂ - C₆H₅OH - C₁₀H₂₁OH - (C₆H₅OH)₂ - 3(C₆H₅OH)₂ - C(OH)₆ - CH₃(CH₂)₂OH - (CH₃)₃CH - CH₃(CH₂)₅CH₃ - Ba(BrO₃)₂·H₂O - Ba(BrO₃)₂·2(H₂O) - """, - full_dump=False, - post_parse=( - lambda _, tokens: - f"Molecular counts/weight: {dict(tokens[0])}" - f", {molecular_weight(tokens[0]):.3f}" - ), -) -print() +if __name__ == '__main__': + import contextlib + + # create railroad diagram for this parser + with contextlib.suppress(Exception): + formula.create_diagram( + "complex_chemical_formulas_diagram.html", vertical=2, show_groups=True + ) + + formula.run_tests( + """\ + NaCl + HOH + H₂O + H₂O₂ + C₆H₅OH + C₁₀H₂₁OH + (C₆H₅OH)₂ + 3(C₆H₅OH)₂ + C(OH)₆ + CH₃(CH₂)₂OH + (CH₃)₃CH + CH₃(CH₂)₅CH₃ + Ba(BrO₃)₂·H₂O + Ba(BrO₃)₂·2(H₂O) + """, + full_dump=False, + post_parse=( + lambda _, tokens: + f"Molecular counts/weight: {dict(tokens[0])}" + f", {molecular_weight(tokens[0]):.3f}" + ), + ) + print() diff --git a/examples/complex_chemical_formulas_diagram.html b/examples/complex_chemical_formulas_diagram.html new file mode 100644 index 00000000..9c0aa308 --- /dev/null +++ b/examples/complex_chemical_formulas_diagram.html @@ -0,0 +1,538 @@ + + + + + + + + + + + + + + + +
+

chemical_formula

+
+
+ + + + + +element_expressionelement_expression + +
+
+ +
+

element_expression

+
+
+ + + + + +[Suppress:(= | ·)] operations[Suppress:(= | ·)] operations +
+
+ +
+

[Suppress:(= | ·)] operations

+
+
+ + + + + + + + +integer operationsinteger operations + + +optional_separatoroptional_separator +integer operationsinteger operations + +integer operationsinteger operations +
+
+ +
+

integer operations

+
+
+ + + + + + + + + + +integerinteger +integer operationsinteger operations +subscript operationssubscript operations +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

subscript operations

+
+
+ + + + + + + + + +elementelement +nested_element_expressionnested_element_expression + +subscriptsubscript + +elementelement +nested_element_expressionnested_element_expression +
+
+ +
+

nested_element_expression

+
+
+ + + + + + +'(' +[suppress] +element_expressionelement_expression + +')' +[suppress] +
+
+ +
+

subscript

+
+
+ + + + +W:(₀-₉) +
+
+ +
+

element

+
+
+ + + + +He|Ho|Hf|Hg|Hs|H|Li|Be|Br|Ba|Bi|Bk|Bh|B|Cl|Ca|Cr|Co|Cu|Cd|Cs|Ce|Cm|Cf|Cn|C|Ne|Na|Ni|Nb|Nd|Np|No|Nh|N|Os|Og|O|Fe|Fr|Fm|Fl|F|Mg|Al|Si|Pd|Pr|Pm|Pt|Pb|Po|Pa|Pu|P|Sc|Se|Sr|Sn|Sb|Sm|Sg|S|Ar|Kr|K|Ti|V|Mn|Zn|Ga|Ge|As|Rb|Yb|Y|Zr|Mo|Tc|Ru|Rh|Ag|In|Te|Ir|I|Xe|La|Eu|Gd|Tb|Dy|Er|Tm|Lu|Ta|W|Re|Au|Tl|At|Rn|Ra|Ac|Th|U|Am|Es|Md|Lr|Rf|Db|Mt|Ds|Rg|Mc|Lv|Ts +
+
+ +
+

optional_separator

+
+
+ + + + + + + += | ·= | · +[suppress] +
+
+ +
+

= | ·

+
+
+ + + + +[=·] +
+
+ + + + diff --git a/examples/decaf_parser.py b/examples/decaf_parser.py index e60b69a4..46a1347a 100644 --- a/examples/decaf_parser.py +++ b/examples/decaf_parser.py @@ -47,17 +47,19 @@ pp.ParserElement.enable_packrat() # keywords -_keywords = ( +keywords_ = ( VOID, INT, DOUBLE, BOOL, STRING, CLASS, INTERFACE, NULL, THIS, EXTENDS, IMPLEMENTS, FOR, WHILE, IF, ELSE, RETURN, BREAK, NEW, NEWARRAY, PRINT, READINTEGER, READLINE, TRUE, FALSE, -) = pp.Keyword.using_each( - """ - void int double bool string class interface null this extends implements or while - if else return break new NewArray Print ReadInteger ReadLine true false - """.split(), +) = list( + pp.Keyword.using_each( + """ + void int double bool string class interface null this extends implements or while + if else return break new NewArray Print ReadInteger ReadLine true false + """.split(), + ) ) -keywords = pp.MatchFirst(_keywords) +keywords = pp.MatchFirst(keywords_).set_name("any_keyword") ( LPAR, RPAR, LBRACE, RBRACE, LBRACK, RBRACK, DOT, EQ, COMMA, SEMI @@ -100,7 +102,7 @@ new_array = pp.Group(NEWARRAY + LPAR + expr + COMMA + type_ + RPAR) rvalue = constant | call | read_integer | read_line | new_statement | new_array | ident arith_expr = pp.infix_notation( - rvalue, + rvalue.set_name("rvalue"), [ ("-", 1, pp.OpAssoc.RIGHT,), (pp.one_of("* / %"), 2, pp.OpAssoc.LEFT,), @@ -108,7 +110,7 @@ ], ) comparison_expr = pp.infix_notation( - arith_expr, + arith_expr.set_name("arith_expr"), [ ("!", 1, pp.OpAssoc.RIGHT,), (pp.one_of("< > <= >="), 2, pp.OpAssoc.LEFT,), @@ -215,31 +217,42 @@ program = pp.Group(decl)[1, ...] decaf_parser = program -stmt.runTests("""\ - sin(30); - a = 1; - b = 1 + 1; - b = 1 != 2 && false; - print("A"); - a.b = 100; - a.b = 100.0; - a[100] = b; - a[0][0] = 2; - a = 0x1234; -""" -) +pp.autoname_elements() + +if __name__ == '__main__': + import contextlib + + # create railroad diagram for this parser + with contextlib.suppress(Exception): + program.create_diagram( + "decaf_parser_diagram.html", vertical=2, show_groups=True + ) -test_program = """ - void getenv(string var); - int main(string[] args) { - if (a > 100) { - Print(a, " is too big"); - } else if (a < 100) { - Print(a, " is too small"); - } else { - Print(a, "just right!"); + stmt.runTests("""\ + sin(30); + a = 1; + b = 1 + 1; + b = 1 != 2 && false; + print("A"); + a.b = 100; + a.b = 100.0; + a[100] = b; + a[0][0] = 2; + a = 0x1234; + """ + ) + + test_program = """ + void getenv(string var); + int main(string[] args) { + if (a > 100) { + Print(a, " is too big"); + } else if (a < 100) { + Print(a, " is too small"); + } else { + Print(a, "just right!"); + } } - } -""" + """ -print(decaf_parser.parse_string(test_program).dump()) + print(decaf_parser.parse_string(test_program).dump()) diff --git a/examples/decaf_parser_diagram.html b/examples/decaf_parser_diagram.html new file mode 100644 index 00000000..21790d73 --- /dev/null +++ b/examples/decaf_parser_diagram.html @@ -0,0 +1,4263 @@ + + + + + + + + + + + + + + + +
+

program

+
+
+ + + + + + +decldecl + +
+
+ +
+

decl

+
+
+ + + + + +variable_declvariable_decl +function_declfunction_decl +class_declclass_decl +interface_declinterface_decl +prototypeprototype +
+
+ +
+

variable_decl

+
+
+ + + + + +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier +SEMISEMI +
+
+ +
+

type_

+
+
+ + + + + + + +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +identident + + + +'[]' + +
+
+ +
+

ident

+
+
+ + + + + + +any_keywordany_keyword +[NOT] +identifieridentifier +
+
+ +
+

function_decl

+
+
+ + + + + + + + + + +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +identident + + + +'[]' + +'void' + + +any_keywordany_keyword +[NOT] +identifieridentifier +LPARLPAR + + + + +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + + + + + +',' +[suppress] +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + +RPARRPAR + + +LBRACELBRACE + + + +variable_declvariable_decl + + + + +stmtstmt + +RBRACERBRACE +
+
+ +
+

stmt

+
+
+ + + + + + +if_stmtif_stmt +while_stmtwhile_stmt +for_stmtfor_stmt +break_stmtbreak_stmt +return_stmtreturn_stmt +print_stmtprint_stmt +stmt_blockstmt_block + + +exprexpr +SEMISEMI +
+
+ +
+

if_stmt

+
+
+ + + + + + +'if' +LPARLPAR + +exprexpr +RPARRPAR + +stmtstmt + + + + +ELSEELSE +stmtstmt +
+
+ +
+

expr

+
+
+ + + + + + +assignmentassignment +callcall +THISTHIS +arith_expr_expressionarith_expr_expression +arith_exprarith_expr +lvaluelvalue +real numberreal number +TRUETRUE +FALSEFALSE +hex_constanthex_constant +integerinteger +string enclosed in double quotesstring enclosed in double quotes +NULLNULL +read_integerread_integer +read_lineread_line +new_statementnew_statement +new_arraynew_array +
+
+ +
+

assignment

+
+
+ + + + + + + + + + +any_keywordany_keyword +[NOT] +identifieridentifier + + +LPARLPAR +exprexpr +RPARRPAR + + + + +DOTDOT + +identident +expr_parensexpr_parens + + + + + +LBRACKLBRACK +exprexpr +RBRACKRBRACK + +EQEQ +exprexpr +
+
+ +
+

expr_parens

+
+
+ + + + + + +LPARLPAR +exprexpr +RPARRPAR +
+
+ +
+

EQ

+
+
+ + + + + +'=' +[suppress] +
+
+ +
+

call

+
+
+ + + + + + + + + +any_keywordany_keyword +[NOT] +identifieridentifier +LPARLPAR + + + + +exprexpr + + + + + +',' +[suppress] +exprexpr + +RPARRPAR + + + + +LPARLPAR +exprexpr +RPARRPAR + + + + +DOTDOT + +any_keywordany_keyword +[NOT] +identifieridentifier + +LPARLPAR + + + + +exprexpr + + + + + +',' +[suppress] +exprexpr + +RPARRPAR +
+
+ +
+

arith_expr_expression

+
+
+ + + + + +|| operations|| operations +
+
+ +
+

|| operations

+
+
+ + + + + + + + +&& operations&& operations + + +|||| +&& operations&& operations + +&& operations&& operations +
+
+ +
+

&& operations

+
+
+ + + + + + + + +== | != operations== | != operations + + +&&&& +== | != operations== | != operations + +== | != operations== | != operations +
+
+ +
+

== | != operations

+
+
+ + + + + + + + +<= | < | >= | > operations<= | < | >= | > operations + + +== | !=== | != +<= | < | >= | > operations<= | < | >= | > operations + +<= | < | >= | > operations<= | < | >= | > operations +
+
+ +
+

<= | < | >= | > operations

+
+
+ + + + + + + + +'!' operations'!' operations + + +<= | < | >= | ><= | < | >= | > +'!' operations'!' operations + +'!' operations'!' operations +
+
+ +
+

'!' operations

+
+
+ + + + + + + + + + +'!' +'!' operations'!' operations +arith_exprarith_expr +nested_arith_expr_expressionnested_arith_expr_expression +
+
+ +
+

arith_expr

+
+
+ + + + + ++ | - operations+ | - operations +
+
+ +
+

+ | - operations

+
+
+ + + + + + + + +* | / | % operations* | / | % operations + + ++ | -+ | - +* | / | % operations* | / | % operations + +* | / | % operations* | / | % operations +
+
+ +
+

* | / | % operations

+
+
+ + + + + + + + +'-' operations'-' operations + + +* | / | %* | / | % +'-' operations'-' operations + +'-' operations'-' operations +
+
+ +
+

'-' operations

+
+
+ + + + + + + + + + +'-' +'-' operations'-' operations +real numberreal number +TRUETRUE +FALSEFALSE +hex_constanthex_constant +integerinteger +string enclosed in double quotesstring enclosed in double quotes +NULLNULL +callcall +read_integerread_integer +read_lineread_line +new_statementnew_statement +new_arraynew_array +identident +nested_rvalue_expressionnested_rvalue_expression +
+
+ +
+

string enclosed in double quotes

+
+
+ + + + + + +"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))* +'"' +[combine] +
+
+ +
+

read_integer

+
+
+ + + + + + +READINTEGERREADINTEGER +LPARLPAR +RPARRPAR +
+
+ +
+

read_line

+
+
+ + + + + + +READLINEREADLINE +LPARLPAR +RPARRPAR +
+
+ +
+

new_statement

+
+
+ + + + + + +NEWNEW + +any_keywordany_keyword +[NOT] +identifieridentifier +
+
+ +
+

new_array

+
+
+ + + + + + +NEWARRAYNEWARRAY +LPARLPAR +exprexpr +COMMACOMMA +type_type_ +RPARRPAR +
+
+ +
+

COMMA

+
+
+ + + + + +',' +[suppress] +
+
+ +
+

nested_rvalue_expression

+
+
+ + + + + + +'(' +[suppress] +arith_exprarith_expr + +')' +[suppress] +
+
+ +
+

* | / | %

+
+
+ + + + +[*/%] +
+
+ +
+

+ | -

+
+
+ + + + +[+\-] +
+
+ +
+

nested_arith_expr_expression

+
+
+ + + + + + +'(' +[suppress] +arith_expr_expressionarith_expr_expression + +')' +[suppress] +
+
+ +
+

<= | < | >= | >

+
+
+ + + + +<=|<|>=|> +
+
+ +
+

== | !=

+
+
+ + + + +==|!= +
+
+ +
+

&&

+
+
+ + + + +\&\& +
+
+ +
+

||

+
+
+ + + + +\|\| +
+
+ +
+

lvalue

+
+
+ + + + + + +identident +expr_parensexpr_parens + + + + +DOTDOT + +identident +expr_parensexpr_parens + + + + + +LBRACKLBRACK +exprexpr +RBRACKRBRACK + +
+
+ +
+

DOT

+
+
+ + + + + +'.' +[suppress] +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

real number

+
+
+ + + + +[+-]?(?:\d+\.\d*|\.\d+) +
+
+ +
+

hex_constant

+
+
+ + + + +0[xX][0-9a-fA-F]+ +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

while_stmt

+
+
+ + + + + + +'while' +LPARLPAR +exprexpr +RPARRPAR +stmtstmt +
+
+ +
+

for_stmt

+
+
+ + + + + + +'or' +LPARLPAR + + +exprexpr +SEMISEMI +exprexpr +SEMISEMI + + +exprexpr +RPARRPAR +stmtstmt +
+
+ +
+

break_stmt

+
+
+ + + + + + +'break' +SEMISEMI +
+
+ +
+

return_stmt

+
+
+ + + + + + +'return' +exprexpr +SEMISEMI +
+
+ +
+

print_stmt

+
+
+ + + + + + +'Print' +LPARLPAR + + + + + +exprexpr + + + + + +',' +[suppress] +exprexpr + +RPARRPAR +SEMISEMI +
+
+ +
+

stmt_block

+
+
+ + + + + + +LBRACELBRACE + + + +variable_declvariable_decl + + + + +stmtstmt + +RBRACERBRACE +
+
+ +
+

class_decl

+
+
+ + + + + + +CLASSCLASS + + +any_keywordany_keyword +[NOT] +identifieridentifier + + + +EXTENDSEXTENDS + +any_keywordany_keyword +[NOT] +identifieridentifier + + + +IMPLEMENTSIMPLEMENTS + + + +any_keywordany_keyword +[NOT] +identifieridentifier + + + + + +',' +[suppress] + +any_keywordany_keyword +[NOT] +identifieridentifier + +LBRACELBRACE + + + +fieldfield + +RBRACERBRACE +
+
+ +
+

field

+
+
+ + + + + +variable_declvariable_decl +function_declfunction_decl +
+
+ +
+

interface_decl

+
+
+ + + + + + +INTERFACEINTERFACE + + +any_keywordany_keyword +[NOT] +identifieridentifier +LBRACELBRACE + + + +prototypeprototype + +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

prototype

+
+
+ + + + + + + + + + +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +identident + + + +'[]' + +'void' + + +any_keywordany_keyword +[NOT] +identifieridentifier +LPARLPAR + + + + +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + + + + + +',' +[suppress] +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + +RPARRPAR +SEMISEMI +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

any_keyword

+
+
+ + + + + +VOIDVOID +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +CLASSCLASS +INTERFACEINTERFACE +NULLNULL +THISTHIS +EXTENDSEXTENDS +IMPLEMENTSIMPLEMENTS +FORFOR +WHILEWHILE +IFIF +ELSEELSE +RETURNRETURN +BREAKBREAK +NEWNEW +NEWARRAYNEWARRAY +PRINTPRINT +READINTEGERREADINTEGER +READLINEREADLINE +TRUETRUE +FALSEFALSE +
+
+ +
+

VOID

+
+
+ + + + +'void' +
+
+ +
+

INT

+
+
+ + + + +'int' +
+
+ +
+

DOUBLE

+
+
+ + + + +'double' +
+
+ +
+

BOOL

+
+
+ + + + +'bool' +
+
+ +
+

STRING

+
+
+ + + + +'string' +
+
+ +
+

CLASS

+
+
+ + + + +'class' +
+
+ +
+

INTERFACE

+
+
+ + + + +'interface' +
+
+ +
+

NULL

+
+
+ + + + +'null' +
+
+ +
+

THIS

+
+
+ + + + +'this' +
+
+ +
+

EXTENDS

+
+
+ + + + +'extends' +
+
+ +
+

IMPLEMENTS

+
+
+ + + + +'implements' +
+
+ +
+

FOR

+
+
+ + + + +'or' +
+
+ +
+

WHILE

+
+
+ + + + +'while' +
+
+ +
+

IF

+
+
+ + + + +'if' +
+
+ +
+

ELSE

+
+
+ + + + +'else' +
+
+ +
+

RETURN

+
+
+ + + + +'return' +
+
+ +
+

BREAK

+
+
+ + + + +'break' +
+
+ +
+

NEW

+
+
+ + + + +'new' +
+
+ +
+

NEWARRAY

+
+
+ + + + +'NewArray' +
+
+ +
+

PRINT

+
+
+ + + + +'Print' +
+
+ +
+

READINTEGER

+
+
+ + + + +'ReadInteger' +
+
+ +
+

READLINE

+
+
+ + + + +'ReadLine' +
+
+ +
+

TRUE

+
+
+ + + + +'true' +
+
+ +
+

FALSE

+
+
+ + + + +'false' +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ + + + diff --git a/examples/delta_time.py b/examples/delta_time.py index 7bc29278..4bb3c0fa 100644 --- a/examples/delta_time.py +++ b/examples/delta_time.py @@ -359,10 +359,7 @@ def _remove_temp_keys(t: pp.ParseResults) -> None: time_expression = time_and_day -_GENERATE_DIAGRAM = False -if _GENERATE_DIAGRAM: - pp.autoname_elements() - time_expression.create_diagram("delta_time.html") +pp.autoname_elements() def demo(): @@ -590,4 +587,9 @@ def main() -> int: if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + time_expression.create_diagram("delta_time_diagram.html", vertical=3, show_results_names=True, show_groups=True) + exit(main()) diff --git a/examples/delta_time_diagram.html b/examples/delta_time_diagram.html new file mode 100644 index 00000000..16dbbe38 --- /dev/null +++ b/examples/delta_time_diagram.html @@ -0,0 +1,1910 @@ + + + + + + + + + + + + + + + +
+

time and day

+
+
+ + + + + + +time referencetime reference +time_ref_presenttime_ref_present + + + + + +on_on_ +day referenceday reference + +day referenceday reference + + + + + +at_at_ +time of daytime of day +time_ref_presenttime_ref_present +
+
+ +
+

time reference

+
+
+ + + + + +time of daytime of day +relative timerelative time +
+
+ +
+

time of day

+
+
+ + + + + +noonnoon +midnightmidnight +nownow +0000 time0000 time +timespectimespec +
+
+ +
+

noon

+
+
+ + + + +'noon' +
+
+ +
+

midnight

+
+
+ + + + +'midnight' +
+
+ +
+

0000 time

+
+
+ + + + + + +numbered_time_unitsnumbered_time_units +[NOT] +HHMMHHMM +
+
+ +
+

numbered_time_units

+
+
+ + + + + +W:(0-9) +any_time_unitsany_time_units +
+
+ +
+

any_time_units

+
+
+ + + + + +'week' +'weeks' +'day' +'days' +'hour' +'hours' +'minute' +'minutes' +'second' +'seconds' +
+
+ +
+

HHMM

+
+
+ + + + +\b([01]\d|2[0-3])([0-5]\d)\b +
+
+ +
+

timespec

+
+
+ + + + + + +numericnumeric +'HH' + + + +o_clocko_clock + +COLONCOLON + +numericnumeric +'MM' + + + +COLONCOLON + +numericnumeric +'SS' + + +'AM' +'PM' +'ampm' +
+
+ +
+

numeric

+
+
+ + + + + +integerinteger +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +'ten' +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' +'twenty' +'twenty-one' +'twenty-two' +'twenty-three' +'twenty-four' +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

o_clock

+
+
+ + + + +"o'clock" +
+
+ +
+

COLON

+
+
+ + + + + +':' +[suppress] +
+
+ +
+

relative time

+
+
+ + + + + + + +qtyqty +'qty' + +time unittime unit +'units' + + +'ago' +'dir' + + + +'from' +'before' +'after' +'dir' + + +time of daytime of day +'ref_time' + + +'in' +'dir' + +qtyqty +'qty' + +time unittime unit +'units' +
+
+ +
+

qty

+
+
+ + + + + +qty_expressionqty_expression +
+
+ +
+

qty_expression

+
+
+ + + + + + + +adverb_adverb_ + +integerinteger +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +'ten' +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' +'twenty' +'twenty-one' +'twenty-two' +'twenty-three' +'twenty-four' +couplecouple +a_qtya_qty +the_the_ +
+
+ +
+

adverb_

+
+
+ + + + + + +'just' +'only' +'exactly' +[suppress] +
+
+ +
+

couple

+
+
+ + + + + + + +'a' +'couple' + + +'of' +
+
+ +
+

a_qty

+
+
+ + + + + +a_a_ +an_an_ +
+
+ +
+

a_

+
+
+ + + + +'a' +
+
+ +
+

an_

+
+
+ + + + +'an' +
+
+ +
+

the_

+
+
+ + + + +'the' +
+
+ +
+

time unit

+
+
+ + + + + +'hour' +'hours' +'minute' +'minutes' +'second' +'seconds' +
+
+ +
+

on_

+
+
+ + + + +'on' +
+
+ +
+

day reference

+
+
+ + + + + +relative dayrelative day +absolute dayabsolute day +
+
+ +
+

relative day

+
+
+ + + + + + + +'in' +'dir' + +qtyqty +'qty' + + +'day' +'days' +'week' +'weeks' +'units' + + +qtyqty +'qty' + + +'day' +'days' +'week' +'weeks' +'units' + + +'ago' +'dir' + + + +'from' +'before' +'after' +'dir' + +absolute dayabsolute day +'ref_day' +
+
+ +
+

absolute day

+
+
+ + + + + +'today' +'tomorrow' +'yesterday' + +'now' +Tag:time_ref_present=True + + + + + +next_next_ +last_last_ +'dir' + +weekday_nameweekday_name +'day_name' +
+
+ +
+

weekday_name

+
+
+ + + + + +'Monday' +'Tuesday' +'Wednesday' +'Thursday' +'Friday' +'Saturday' +'Sunday' +
+
+ +
+

today

+
+
+ + + + +'today' +
+
+ +
+

tomorrow

+
+
+ + + + +'tomorrow' +
+
+ +
+

yesterday

+
+
+ + + + +'yesterday' +
+
+ +
+

now

+
+
+ + + + +'now' +
+
+ +
+

weekday_reference

+
+
+ + + + + + + + + +next_next_ +last_last_ +'dir' + +weekday_nameweekday_name +'day_name' +
+
+ +
+

next_

+
+
+ + + + +'next' +
+
+ +
+

last_

+
+
+ + + + +'last' +
+
+ +
+

at_

+
+
+ + + + +'at' +
+
+ +
+

time_ref_present

+
+
+ + + + +Tag:time_ref_present=True +
+
+ + + + diff --git a/examples/directx_x_file_parser.html b/examples/directx_x_file_parser.html new file mode 100644 index 00000000..047584a1 --- /dev/null +++ b/examples/directx_x_file_parser.html @@ -0,0 +1,270 @@ + + + + + + + + + + + + + + + +
+

template_defn

+
+
+ + + + + +'template' + +identifieridentifier +'name' + +'{' +[suppress] + + + +<[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}> +'uuid' + + + + + + + + + + + +'array' + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'element_type' +type +'type' + +identifieridentifier +'name' + + + + + + +'[' +[suppress] + +W:(1-9, 0-9) +identifieridentifier + +']' +[suppress] + +'dims' + + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'type' + +identifieridentifier +'name' + +';' +[suppress] + +'members' + + + + + + +'[' +[suppress] +'...' + +']' +[suppress] +[combine] +'open_template' + + + + + +'[' +[suppress] + + + + + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'type' + + + +<[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}> +'uuid' + + + + + +',' +[suppress] + + + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'type' + + + +<[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}> +'uuid' + + +']' +[suppress] +'restrictions' + +'}' +[suppress] +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Za-z, 0-9A-Z_a-z) +
+
+ + + + diff --git a/examples/directx_x_file_parser.py b/examples/directx_x_file_parser.py index 2208f7a2..65364793 100644 --- a/examples/directx_x_file_parser.py +++ b/examples/directx_x_file_parser.py @@ -89,6 +89,8 @@ def make_template_parser(template_defn: pp.ParseResults) -> pp.ParserElement: expr = pp.Group(pp.DelimitedList(expr, max=dim) + SEMI) member_parsers.append(expr(member.name)) + pp.autoname_elements() + return ( pp.Keyword(template_defn.name)("type") + ident("name") @@ -99,6 +101,14 @@ def make_template_parser(template_defn: pp.ParseResults) -> pp.ParserElement: if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + # create railroad diagram + directx_template_defn.create_diagram( + "directx_x_file_parser.html", show_results_names=True, show_groups=False + ) + sample = """ some stuff... @@ -155,12 +165,6 @@ def make_template_parser(template_defn: pp.ParseResults) -> pp.ParserElement: ) # print() - # create railroad diagram - pp.autoname_elements() - directx_template_defn.create_diagram( - "directx_x_file_parser.html", show_results_names=True, show_groups=False - ) - vector_template = directx_template_defn.parse_string( """\ template Vector { @@ -173,9 +177,12 @@ def make_template_parser(template_defn: pp.ParseResults) -> pp.ParserElement: """ ) vector_parser = make_template_parser(vector_template) - vector_parser.create_diagram( - "directx_x_vector_parser.html", show_results_names=True, show_groups=False - ) + + with contextlib.suppress(Exception): + vector_parser.create_diagram( + "directx_x_vector_parser.html", show_results_names=True, show_groups=False + ) + v = vector_parser.parse_string('Vector p1 {"datum_A"; 1.0; 3.0; 5.0;}') print(v.dump()) diff --git a/examples/lox_parser.py b/examples/lox_parser.py index 4e356565..d5050d13 100644 --- a/examples/lox_parser.py +++ b/examples/lox_parser.py @@ -230,5 +230,9 @@ class Circle { if __name__ == '__main__': - program.create_diagram("lox_parser_diagram.html", vertical=2, show_groups=True) + import contextlib + + with contextlib.suppress(Exception): + program.create_diagram("lox_parser_diagram.html", vertical=2, show_groups=True) + main() diff --git a/examples/lox_parser_diagram.html b/examples/lox_parser_diagram.html index 1d70ff3c..1d1bf65b 100644 --- a/examples/lox_parser_diagram.html +++ b/examples/lox_parser_diagram.html @@ -18,7 +18,7 @@
-

program

+

program

@@ -28,7 +28,7 @@

program

-declaration +declarationdeclaration + + + + + + + + +
+

script

+
+
+ + + + + + + +statstat + +
+
+ +
+

stat

+
+
+ + + + + + + +assignment_statassignment_stat +do_statdo_stat +while_statwhile_stat +repeat_statrepeat_stat +for_loop_statfor_loop_stat +for_seq_statfor_seq_stat +func_call_statfunc_call_stat +if_statif_stat +function_deffunction_def +
+
+ +
+

assignment_stat

+
+
+ + + + + + + +LOCALLOCAL +varlist1varlist1 +EQEQ +explist1explist1 +
+
+ +
+

varlist1

+
+
+ + + + + + +varvar + + + + + +',' +[suppress] +varvar + +
+
+ +
+

var

+
+
+ + + + + + + +var_partvar_part + + + + + +'.' +[suppress] +var_partvar_part + +
+
+ +
+

var_part

+
+
+ + + + + + + +var_atomvar_atom +index_refindex_ref +functioncallfunctioncall +namename +exp_groupexp_group +
+
+ +
+

var_atom

+
+
+ + + + + +functioncallfunctioncall +namename +exp_groupexp_group +
+
+ +
+

functioncall

+
+
+ + + + + + +prefixexpprefixexp + + + +COLONCOLON +namename + +argsargs +
+
+ +
+

prefixexp

+
+
+ + + + + +namename +exp_groupexp_group +
+
+ +
+

name

+
+
+ + + + + + + + +keywordkeyword +[NOT] +identifieridentifier + + + + +'.' + +keywordkeyword +[NOT] +identifieridentifier + +[combine] +
+
+ +
+

keyword

+
+
+ + + + + +'return' +'break' +'do' +'end' +'while' +'if' +'then' +'elseif' +'else' +'for' +'in' +'function' +'local' +'repeat' +'until' +'nil' +'false' +'true' +'and' +'or' +'not' +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

exp_group

+
+
+ + + + + + +LPARLPAR +expexp +RPARRPAR +
+
+ +
+

exp

+
+
+ + + + + +'or' operations'or' operations +
+
+ +
+

'or' operations

+
+
+ + + + + + + + +'and' operations'and' operations + + +OROR +'and' operations'and' operations + +'and' operations'and' operations +
+
+ +
+

'and' operations

+
+
+ + + + + + + + +<= | < | >= | > | ~= | == operations<= | < | >= | > | ~= | == operations + + +ANDAND +<= | < | >= | > | ~= | == operations<= | < | >= | > | ~= | == operations + +<= | < | >= | > | ~= | == operations<= | < | >= | > | ~= | == operations +
+
+ +
+

<= | < | >= | > | ~= | == operations

+
+
+ + + + + + + + +'|' operations'|' operations + + +<= | < | >= | > | ~= | ==<= | < | >= | > | ~= | == +'|' operations'|' operations + +'|' operations'|' operations +
+
+ +
+

'|' operations

+
+
+ + + + + + + + +'~' operations'~' operations + + +'|' +'~' operations'~' operations + +'~' operations'~' operations +
+
+ +
+

'~' operations

+
+
+ + + + + + + + +'&' operations'&' operations + + +'~' +'&' operations'&' operations + +'&' operations'&' operations +
+
+ +
+

'&' operations

+
+
+ + + + + + + + +<< | >> operations<< | >> operations + + +'&' +<< | >> operations<< | >> operations + +<< | >> operations<< | >> operations +
+
+ +
+

<< | >> operations

+
+
+ + + + + + + + +'..' operations'..' operations + + +<< | >><< | >> +'..' operations'..' operations + +'..' operations'..' operations +
+
+ +
+

'..' operations

+
+
+ + + + + + + + ++ | - operations+ | - operations + + +'..' ++ | - operations+ | - operations + ++ | - operations+ | - operations +
+
+ +
+

+ | - operations

+
+
+ + + + + + + + +* | // | / | % operations* | // | / | % operations + + ++ | -+ | - +* | // | / | % operations* | // | / | % operations + +* | // | / | % operations* | // | / | % operations +
+
+ +
+

* | // | / | % operations

+
+
+ + + + + + + + +not op operationsnot op operations + + +* | // | / | %* | // | / | % +not op operationsnot op operations + +not op operationsnot op operations +
+
+ +
+

not op operations

+
+
+ + + + + + + + + + +not opnot op +not op operationsnot op operations +'^' operations'^' operations +
+
+ +
+

not op

+
+
+ + + + + +NOTNOT +# | - | ~# | - | ~ +
+
+ +
+

NOT

+
+
+ + + + +'not' +
+
+ +
+

# | - | ~

+
+
+ + + + +[#\-~] +
+
+ +
+

'^' operations

+
+
+ + + + + + + + + +NILNIL +FALSEFALSE +TRUETRUE +real number with scientific notationreal number with scientific notation +real numberreal number +signed integersigned integer +string enclosed in "'" +string enclosed in '"' +multiline_stringmultiline_string +ELLIPSISELLIPSIS +functioncallfunctioncall +varvar +tableconstructortableconstructor +nested_exp_atom_expressionnested_exp_atom_expression + + +'^' + +NILNIL +FALSEFALSE +TRUETRUE +real number with scientific notationreal number with scientific notation +real numberreal number +signed integersigned integer +string enclosed in "'" +string enclosed in '"' +multiline_stringmultiline_string +ELLIPSISELLIPSIS +functioncallfunctioncall +varvar +tableconstructortableconstructor +nested_exp_atom_expressionnested_exp_atom_expression + +NILNIL +FALSEFALSE +TRUETRUE +real number with scientific notationreal number with scientific notation +real numberreal number +signed integersigned integer +string enclosed in "'" +string enclosed in '"' +multiline_stringmultiline_string +ELLIPSISELLIPSIS +functioncallfunctioncall +varvar +tableconstructortableconstructor +nested_exp_atom_expressionnested_exp_atom_expression +
+
+ +
+

tableconstructor

+
+
+ + + + + + +LBRACELBRACE + + +field_listfield_list +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

field_list

+
+
+ + + + + + +fieldfield + + + + + +fieldsepfieldsep +[suppress] +fieldfield + + + + +fieldsepfieldsep +[suppress] +
+
+ +
+

field

+
+
+ + + + + + + +LBRACKLBRACK +expexp +RBRACKRBRACK +EQEQ + +expexp + +namename +EQEQ + +expexp +expexp +
+
+ +
+

fieldsep

+
+
+ + + + + +COMMACOMMA +SEMISEMI +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ +
+

nested_exp_atom_expression

+
+
+ + + + + + +'(' +[suppress] +expexp + +')' +[suppress] +
+
+ +
+

NIL

+
+
+ + + + +'nil' +
+
+ +
+

FALSE

+
+
+ + + + +'false' +
+
+ +
+

TRUE

+
+
+ + + + +'true' +
+
+ +
+

real number with scientific notation

+
+
+ + + + +[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?) +
+
+ +
+

real number

+
+
+ + + + +[+-]?(?:\d+\.\d*|\.\d+) +
+
+ +
+

signed integer

+
+
+ + + + +[+-]?\d+ +
+
+ +
+

* | // | / | %

+
+
+ + + + +\*|//|/|% +
+
+ +
+

+ | -

+
+
+ + + + +[+\-] +
+
+ +
+

<< | >>

+
+
+ + + + +<<|>> +
+
+ +
+

<= | < | >= | > | ~= | ==

+
+
+ + + + +<=|<|>=|>|\~=|== +
+
+ +
+

AND

+
+
+ + + + +'and' +
+
+ +
+

OR

+
+
+ + + + +'or' +
+
+ +
+

args

+
+
+ + + + + + +LPARLPAR + + +explist1explist1 +RPARRPAR +tableconstructortableconstructor +string enclosed in "'" +string enclosed in '"' +multiline_stringmultiline_string +
+
+ +
+

explist1

+
+
+ + + + + + +expexp + + + + + +',' +[suppress] +expexp + +
+
+ +
+

multiline_string

+
+
+ + + + +quoted string, starting with [[ ending with ]] +
+
+ +
+

index_ref

+
+
+ + + + + + +LBRACKLBRACK +expexp +RBRACKRBRACK +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

do_stat

+
+
+ + + + + +DODO +blockblock +ENDEND +
+
+ +
+

block

+
+
+ + + + + + + + +statstat +OPT_SEMIOPT_SEMI + + + + +laststatlaststat +OPT_SEMIOPT_SEMI +
+
+ +
+

OPT_SEMI

+
+
+ + + + + + + +SEMISEMI +[suppress] +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

laststat

+
+
+ + + + + + + +RETURNRETURN +explist1explist1 +BREAKBREAK +
+
+ +
+

RETURN

+
+
+ + + + +'return' +
+
+ +
+

BREAK

+
+
+ + + + +'break' +
+
+ +
+

while_stat

+
+
+ + + + + +WHILEWHILE +expexp +blockblock +ENDEND +
+
+ +
+

WHILE

+
+
+ + + + +'while' +
+
+ +
+

repeat_stat

+
+
+ + + + + +REPEATREPEAT +blockblock +UNTILUNTIL +expexp +
+
+ +
+

REPEAT

+
+
+ + + + +'repeat' +
+
+ +
+

UNTIL

+
+
+ + + + +'until' +
+
+ +
+

for_loop_stat

+
+
+ + + + + +FORFOR +namename +EQEQ +expexp +COMMACOMMA +expexp + + + +COMMACOMMA +expexp +DODO +blockblock +ENDEND +
+
+ +
+

EQ

+
+
+ + + + +'=' +
+
+ +
+

for_seq_stat

+
+
+ + + + + +FORFOR +namelistnamelist +ININ +explist1explist1 +DODO +blockblock +ENDEND +
+
+ +
+

FOR

+
+
+ + + + +'for' +
+
+ +
+

namelist

+
+
+ + + + + + +namename + + + + + +',' +[suppress] +namename + +
+
+ +
+

IN

+
+
+ + + + +'in' +
+
+ +
+

DO

+
+
+ + + + +'do' +
+
+ +
+

func_call_stat

+
+
+ + + + + + + +LOCALLOCAL +functioncallfunctioncall +
+
+ +
+

if_stat

+
+
+ + + + + +IFIF +expexp +THENTHEN +blockblock + + + + + +ELSEIFELSEIF +expexp +THENTHEN +blockblock + + + + + +ELSEELSE +blockblock +ENDEND +
+
+ +
+

IF

+
+
+ + + + +'if' +
+
+ +
+

ELSEIF

+
+
+ + + + +'elseif' +
+
+ +
+

THEN

+
+
+ + + + +'then' +
+
+ +
+

ELSE

+
+
+ + + + +'else' +
+
+ +
+

function_def

+
+
+ + + + + + + +LOCALLOCAL +FUNCTIONFUNCTION +funcnamefuncname + + +LPARLPAR +parlistparlist +RPARRPAR +blockblock +ENDEND +
+
+ +
+

LOCAL

+
+
+ + + + +'local' +
+
+ +
+

FUNCTION

+
+
+ + + + +'function' +
+
+ +
+

funcname

+
+
+ + + + + + + +namename +COLONCOLON +namename +namename +
+
+ +
+

COLON

+
+
+ + + + + +':' +[suppress] +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

parlist

+
+
+ + + + + + +namelistnamelist + + + +COMMACOMMA +ELLIPSISELLIPSIS +ELLIPSISELLIPSIS +
+
+ +
+

COMMA

+
+
+ + + + + +',' +[suppress] +
+
+ +
+

ELLIPSIS

+
+
+ + + + +'...' +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

END

+
+
+ + + + +'end' +
+
+ + + + diff --git a/examples/lucene_grammar.py b/examples/lucene_grammar.py index e79a3aa4..91310a41 100644 --- a/examples/lucene_grammar.py +++ b/examples/lucene_grammar.py @@ -18,7 +18,7 @@ and_, or_, not_, to_ = pp.CaselessKeyword.using_each("AND OR NOT TO".split()) keyword = and_ | or_ | not_ | to_ -expression = pp.Forward() +expression = pp.Forward().set_name("query expression") valid_word = pp.Regex( r'([a-zA-Z0-9_.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))' @@ -37,18 +37,18 @@ number = ppc.fnumber() fuzzy_modifier = TILDE + pp.Opt(number, default=0.5)("fuzzy") -term = pp.Forward().set_name("field") +term = pp.Forward().set_name("term") field_name = valid_word().set_name("fieldname") -incl_range_search = pp.Group(LBRACK - term("lower") + to_ + term("upper") + RBRACK) -excl_range_search = pp.Group(LBRACE - term("lower") + to_ + term("upper") + RBRACE) -range_search = incl_range_search("incl_range") | excl_range_search("excl_range") -boost = CARAT - number("boost") +incl_range_search = pp.Group(LBRACK - term("lower") + to_ + term("upper") + RBRACK).set_name("incl_range_search") +excl_range_search = pp.Group(LBRACE - term("lower") + to_ + term("upper") + RBRACE).set_name("excl_range_search") +range_search = (incl_range_search("incl_range") | excl_range_search("excl_range")).set_name("range_search") +boost = (CARAT - number("boost")).set_name("boost") -string_expr = pp.Group(string + proximity_modifier) | string -word_expr = pp.Group(valid_word + fuzzy_modifier) | valid_word +string_expr = (pp.Group(string + proximity_modifier) | string).set_name("string_expr") +word_expr = (pp.Group(valid_word + fuzzy_modifier) | valid_word).set_name("word_expr") term <<= ( ~keyword - + pp.Opt(field_name("field") + COLON) + + pp.Opt(field_name("field") + COLON).set_name("field") + (word_expr | string_expr | range_search | pp.Group(LPAR + expression + RPAR)) + pp.Opt(boost) ) @@ -66,7 +66,7 @@ pp.OpAssoc.LEFT, ), ], -).set_name("query expression") +) def main(): @@ -367,5 +367,13 @@ def main(): sys.exit(1) + if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + expression.create_diagram( + "lucene_grammar_diagram.html", vertical=2, show_groups=True + ) + main() diff --git a/examples/lucene_grammar_diagram.html b/examples/lucene_grammar_diagram.html new file mode 100644 index 00000000..ab378c9a --- /dev/null +++ b/examples/lucene_grammar_diagram.html @@ -0,0 +1,867 @@ + + + + + + + + + + + + + + + +
+

query expression

+
+
+ + + + + +term_expressionterm_expression +
+
+ +
+

term_expression

+
+
+ + + + + +or operationsor operations +
+
+ +
+

or operations

+
+
+ + + + + + + + +{'AND' | '&&'} operations{'AND' | '&&'} operations + + +oror +{'AND' | '&&'} operations{'AND' | '&&'} operations + +{'AND' | '&&'} operations{'AND' | '&&'} operations +
+
+ +
+

{'AND' | '&&'} operations

+
+
+ + + + + + + + +{'NOT' | '!'} operations{'NOT' | '!'} operations + + + +'AND' +'&&' +{'NOT' | '!'} operations{'NOT' | '!'} operations + +{'NOT' | '!'} operations{'NOT' | '!'} operations +
+
+ +
+

{'NOT' | '!'} operations

+
+
+ + + + + + + + + + + +'NOT' +'!' +{'NOT' | '!'} operations{'NOT' | '!'} operations +{'+' | '-'} operations{'+' | '-'} operations +
+
+ +
+

{'+' | '-'} operations

+
+
+ + + + + + + + + + + +'+' +'-' +{'+' | '-'} operations{'+' | '-'} operations +termterm +nested_term_expressionnested_term_expression +
+
+ +
+

term

+
+
+ + + + + + + + +'AND' +'OR' +'NOT' +'TO' +[NOT] +fieldfield + + + +wordword +'~' + + +fnumberfnumber +wordword + + +string enclosed in '"' + + +'~' +integerinteger +string enclosed in '"' +incl_range_searchincl_range_search +excl_range_searchexcl_range_search + + + +'(' +[suppress] +query expressionquery expression + +')' +[suppress] + + +boostboost +
+
+ +
+

field

+
+
+ + + + + + + +fieldnamefieldname +':' +
+
+ +
+

fieldname

+
+
+ + + + +([a-zA-Z0-9_.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&)|\*|\?)* +
+
+ +
+

fnumber

+
+
+ + + + +[+-]?\d+\.?\d*(?:[eE][+-]?\d+)? +
+
+ +
+

word

+
+
+ + + + +([a-zA-Z0-9_.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&)|\*|\?)* +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

incl_range_search

+
+
+ + + + + + +'[' +termterm +'TO' +termterm +']' +
+
+ +
+

excl_range_search

+
+
+ + + + + + +'{' +termterm +'TO' +termterm +'}' +
+
+ +
+

boost

+
+
+ + + + + +'^' +fnumberfnumber +
+
+ +
+

nested_term_expression

+
+
+ + + + + + +'(' +[suppress] +term_expressionterm_expression + +')' +[suppress] +
+
+ +
+

or

+
+
+ + + + + + + +'OR' +'||' +
+
+ + + + diff --git a/examples/mongodb_query_expression.html b/examples/mongodb_query_expression.html new file mode 100644 index 00000000..1e53d13c --- /dev/null +++ b/examples/mongodb_query_expression.html @@ -0,0 +1,2325 @@ + + + + + + + + + + + + + + + +
+

boolean_comparison_operand_expression

+
+
+ + + + + +{'or' | '∨'} operations{'or' | '∨'} operations +
+
+ +
+

{'or' | '∨'} operations

+
+
+ + + + + + + + +{'and' | '∧'} operations{'and' | '∧'} operations + + +OR_OPOR_OP +{'and' | '∧'} operations{'and' | '∧'} operations + +{'and' | '∧'} operations{'and' | '∧'} operations +
+
+ +
+

{'and' | '∧'} operations

+
+
+ + + + + + + + +{'not' ~{{'in' | 'like'}}} operations{'not' ~{{'in' | 'like'}}} operations + + +AND_OPAND_OP +{'not' ~{{'in' | 'like'}}} operations{'not' ~{{'in' | 'like'}}} operations + +{'not' ~{{'in' | 'like'}}} operations{'not' ~{{'in' | 'like'}}} operations +
+
+ +
+

{'not' ~{{'in' | 'like'}}} operations

+
+
+ + + + + + + + + + +NOT_OPNOT_OP +{'not' ~{{'in' | 'like'}}} operations{'not' ~{{'in' | 'like'}}} operations +arith_comparison_operand_expressionarith_comparison_operand_expression +identident +nested_boolean_comparison_operand_expressionnested_boolean_comparison_operand_expression +
+
+ +
+

NOT_OP

+
+
+ + + + + +NOTNOT + + +ININ +LIKELIKE +[NOT] +
+
+ +
+

arith_comparison_operand_expression

+
+
+ + + + + +contain_operator operationscontain_operator operations +
+
+ +
+

contain_operator operations

+
+
+ + + + + + + + +like_operator operationslike_operator operations + + +contain_operatorcontain_operator +like_operator operationslike_operator operations + +like_operator operationslike_operator operations +
+
+ +
+

like_operator operations

+
+
+ + + + + + + + +== | = | != | ≠ operations== | = | != | ≠ operations + + +like_operatorlike_operator +== | = | != | ≠ operations== | = | != | ≠ operations + +== | = | != | ≠ operations== | = | != | ≠ operations +
+
+ +
+

== | = | != | ≠ operations

+
+
+ + + + + + + + +<= | >= | < | > | ≤ | ≥ operations<= | >= | < | > | ≤ | ≥ operations + + +== | = | != | ≠== | = | != | ≠ +<= | >= | < | > | ≤ | ≥ operations<= | >= | < | > | ≤ | ≥ operations + +<= | >= | < | > | ≤ | ≥ operations<= | >= | < | > | ≤ | ≥ operations +
+
+ +
+

<= | >= | < | > | ≤ | ≥ operations

+
+
+ + + + + + + + +Combine:({'search' 'for'}) operationsCombine:({'search' 'for'}) operations + + +<= | >= | < | > | ≤ | ≥<= | >= | < | > | ≤ | ≥ +Combine:({'search' 'for'}) operationsCombine:({'search' 'for'}) operations + +Combine:({'search' 'for'}) operationsCombine:({'search' 'for'}) operations +
+
+ +
+

Combine:({'search' 'for'}) operations

+
+
+ + + + + + + + + + +SEARCH_FORSEARCH_FOR +Combine:({'search' 'for'}) operationsCombine:({'search' 'for'}) operations +identident +string enclosed in '"' +string enclosed in "'" +date_timedate_time +datedate +real number with scientific notationreal number with scientific notation +real numberreal number +signed integersigned integer +operand_listoperand_list +nested_arith_comparison_operand_expressionnested_arith_comparison_operand_expression +
+
+ +
+

SEARCH_FOR

+
+
+ + + + + + +SEARCHSEARCH +FORFOR +[combine] +
+
+ +
+

SEARCH

+
+
+ + + + +'search' +
+
+ +
+

FOR

+
+
+ + + + +'for' +
+
+ +
+

ident

+
+
+ + + + + + +identifieridentifier + + + + + +'.' + +identifieridentifier +integerinteger + + +'[' +[suppress] +integerinteger + +']' +[suppress] + +[combine] +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

operand_list

+
+
+ + + + + + +LBRACKLBRACK + + + + +operandoperand + + + + + +',' +[suppress] +operandoperand + +RBRACKRBRACK +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

operand

+
+
+ + + + + +identident +string enclosed in '"' +string enclosed in "'" +date_timedate_time +datedate +real number with scientific notationreal number with scientific notation +real numberreal number +signed integersigned integer +
+
+ +
+

date_time

+
+
+ + + + +\d{4}(/|-)\d{2}(\1)\d{2} \d{2}:\d{2}(:\d{2}(\.\d+)?)? +
+
+ +
+

date

+
+
+ + + + +\d{4}(/|-)\d{2}(\1)\d{2} +
+
+ +
+

real number with scientific notation

+
+
+ + + + +[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?) +
+
+ +
+

real number

+
+
+ + + + +[+-]?(?:\d+\.\d*|\.\d+) +
+
+ +
+

signed integer

+
+
+ + + + +[+-]?\d+ +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

nested_arith_comparison_operand_expression

+
+
+ + + + + + +'(' +[suppress] +arith_comparison_operand_expressionarith_comparison_operand_expression + +')' +[suppress] +
+
+ +
+

<= | >= | < | > | ≤ | ≥

+
+
+ + + + +<=|>=|<|>|≤|≥ +
+
+ +
+

== | = | != | ≠

+
+
+ + + + +==|=|!=|≠ +
+
+ +
+

like_operator

+
+
+ + + + + +LIKELIKE +NOT_LIKENOT_LIKE +'=~' +
+
+ +
+

NOT_LIKE

+
+
+ + + + + + +NOTNOT +LIKELIKE +[combine] +
+
+ +
+

LIKE

+
+
+ + + + +'like' +
+
+ +
+

contain_operator

+
+
+ + + + + +ININ +NOT_INNOT_IN +CONTAINS_ALLCONTAINS_ALL +CONTAINS_NONECONTAINS_NONE +CONTAINS_ANYCONTAINS_ANY +⊇ | ∈ | ∉⊇ | ∈ | ∉ +
+
+ +
+

NOT_IN

+
+
+ + + + + + +NOTNOT +ININ +[combine] +
+
+ +
+

NOT

+
+
+ + + + +'not' +
+
+ +
+

IN

+
+
+ + + + +'in' +
+
+ +
+

CONTAINS_ALL

+
+
+ + + + + + +CONTAINSCONTAINS +ALLALL +[combine] +
+
+ +
+

ALL

+
+
+ + + + +'all' +
+
+ +
+

CONTAINS_NONE

+
+
+ + + + + + +CONTAINSCONTAINS +NONENONE +[combine] +
+
+ +
+

NONE

+
+
+ + + + +'none' +
+
+ +
+

CONTAINS_ANY

+
+
+ + + + + + +CONTAINSCONTAINS +ANYANY +[combine] +
+
+ +
+

CONTAINS

+
+
+ + + + +'contains' +
+
+ +
+

ANY

+
+
+ + + + +'any' +
+
+ +
+

⊇ | ∈ | ∉

+
+
+ + + + +[⊇∈∉] +
+
+ +
+

nested_boolean_comparison_operand_expression

+
+
+ + + + + + +'(' +[suppress] +boolean_comparison_operand_expressionboolean_comparison_operand_expression + +')' +[suppress] +
+
+ +
+

AND_OP

+
+
+ + + + + +ANDAND +'∧' +
+
+ +
+

AND

+
+
+ + + + +'and' +
+
+ +
+

OR_OP

+
+
+ + + + + +OROR +'∨' +
+
+ +
+

OR

+
+
+ + + + +'or' +
+
+ + + + diff --git a/examples/mongodb_query_expression.py b/examples/mongodb_query_expression.py index f42386dc..081ff854 100644 --- a/examples/mongodb_query_expression.py +++ b/examples/mongodb_query_expression.py @@ -357,7 +357,7 @@ def unary_op(tokens): (SEARCH_FOR, 1, pp.OpAssoc.RIGHT, unary_op), (pp.one_of("<= >= < > ≤ ≥"), 2, pp.OpAssoc.LEFT, binary_comparison_op), (pp.one_of("= == != ≠"), 2, pp.OpAssoc.LEFT, binary_eq_neq), - (LIKE | NOT_LIKE | "=~", 2, pp.OpAssoc.LEFT, regex_comparison_op), + ((LIKE | NOT_LIKE | "=~").set_name("like_operator"), 2, pp.OpAssoc.LEFT, regex_comparison_op), ( ( IN @@ -366,7 +366,7 @@ def unary_op(tokens): | CONTAINS_NONE | CONTAINS_ANY | pp.one_of("⊇ ∈ ∉") - ), + ).set_name("contain_operator"), 2, pp.OpAssoc.LEFT, binary_array_comparison_op, @@ -657,5 +657,14 @@ def main(): if __name__ == "__main__": - query_condition_expr.create_diagram("mongodb_query_expression.html") + import contextlib + + with contextlib.suppress(Exception): + query_condition_expr.create_diagram( + "mongodb_query_expression.html", + vertical=3, + show_results_names=True, + show_groups=True + ) + main() diff --git a/examples/number_words.py b/examples/number_words.py index 181740fd..aa3ea09f 100644 --- a/examples/number_words.py +++ b/examples/number_words.py @@ -112,6 +112,12 @@ def multiply(t): if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + # create railroad diagram + numeric_expression.create_diagram("number_words_diagram.html", vertical=5) + numeric_expression.run_tests( """ one @@ -132,6 +138,3 @@ def multiply(t): """, postParse=lambda _, s: "{:,}".format(s[0]), ) - - # create railroad diagram - numeric_expression.create_diagram("numeric_words_diagram.html", vertical=5) diff --git a/examples/number_words_diagram.html b/examples/number_words_diagram.html new file mode 100644 index 00000000..626f7cb8 --- /dev/null +++ b/examples/number_words_diagram.html @@ -0,0 +1,604 @@ + + + + + + + + + + + + + + + +
+

numeric_words

+
+
+ + + + + + + + + +1000s1000s +'and/-''and/-' + + + +100s100s +'and/-''and/-' +1-991-99 + + + + +1000s1000s +'and/-''and/-' +100s100s +1000s1000s +
+
+ +
+

1000s

+
+
+ + + + + +1-9991-999 +thousandthousand +
+
+ +
+

1-999

+
+
+ + + + + + + + + +100s100s +'and/-''and/-' +1-991-99 +100s100s +
+
+ +
+

100s

+
+
+ + + + + + +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' + +twenty-ninetytwenty-ninety +'-''-' +one-nineone-nine +hundredhundred +
+
+ +
+

'-'

+
+
+ + + + + + + +'-' +[suppress] +
+
+ +
+

hundred

+
+
+ + + + + +'hundred' +
+
+ +
+

'and/-'

+
+
+ + + + + + + + +'and' +'-' +[suppress] +
+
+ +
+

1-99

+
+
+ + + + + +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +'ten' +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' + +twenty-ninetytwenty-ninety + + + +'-''-' +one-nineone-nine +
+
+ +
+

twenty-ninety

+
+
+ + + + + +'twenty' +'thirty' +'forty' +'fifty' +'sixty' +'seventy' +'eighty' +'ninety' +
+
+ +
+

one-nine

+
+
+ + + + + +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +
+
+ +
+

thousand

+
+
+ + + + + +'thousand' +
+
+ + + + diff --git a/examples/parse_python_value.html b/examples/parse_python_value.html new file mode 100644 index 00000000..2f04103b --- /dev/null +++ b/examples/parse_python_value.html @@ -0,0 +1,711 @@ + + + + + + + + + + + + + + + +
+

list_item

+
+
+ + + + + +realreal +integerinteger +quoted string using single or double quotesquoted string using single or double quotes +unicode string literalunicode string literal +True | FalseTrue | False +'None' + +list_exprlist_expr +tuple_exprtuple_expr +set_exprset_expr +dict_exprdict_expr +
+
+ +
+

real

+
+
+ + + + +[+-]?\d+\.\d*([Ee][+-]?\d+)? +
+
+ +
+

integer

+
+
+ + + + +[+-]?\d+ +
+
+ +
+

quoted string using single or double quotes

+
+
+ + + + + + +double quoted stringdouble quoted string +single quoted stringsingle quoted string +[combine] +
+
+ +
+

double quoted string

+
+
+ + + + + +"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))* +'"' +
+
+ +
+

single quoted string

+
+
+ + + + + +'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))* +"'" +
+
+ +
+

unicode string literal

+
+
+ + + + + + +'u' +quoted string using single or double quotesquoted string using single or double quotes +[combine] +
+
+ +
+

True | False

+
+
+ + + + +\b(?:True|False)\b +
+
+ +
+

list_expr

+
+
+ + + + + + + +'[' +[suppress] + + + + +list_itemlist_item + + + + + +',' +[suppress] +list_itemlist_item + + + + +',' +[suppress] + +']' +[suppress] +
+
+ +
+

tuple_expr

+
+
+ + + + + + + +'(' +[suppress] + + + + +list_itemlist_item + + + + + +',' +[suppress] +list_itemlist_item + + + + +',' +[suppress] + +')' +[suppress] +
+
+ +
+

set_expr

+
+
+ + + + + + + +'{' +[suppress] + + +list_itemlist_item + + + + + +',' +[suppress] +list_itemlist_item + + + + +',' +[suppress] + +'}' +[suppress] +
+
+ +
+

dict_expr

+
+
+ + + + + + + +'{' +[suppress] + + + + +dict_entrydict_entry + + + + + +',' +[suppress] +dict_entrydict_entry + + + + +',' +[suppress] + +'}' +[suppress] +
+
+ +
+

dict_entry

+
+
+ + + + + +list_itemlist_item + +':' +[suppress] +list_itemlist_item +
+
+ + + + diff --git a/examples/parse_python_value.py b/examples/parse_python_value.py index cb4288fe..e23d7d66 100644 --- a/examples/parse_python_value.py +++ b/examples/parse_python_value.py @@ -63,6 +63,10 @@ dict_str.add_parse_action(cvtDict) if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + list_item.create_diagram("parse_python_value.html") tests = """['a', 100, ('A', [101,102]), 3.14, [ +2.718, 'xyzzy', -1.414] ] [{0: [2], 1: []}, {0: [], 1: [], 2: []}, {0: [1, 2]}] @@ -77,4 +81,3 @@ 'a quoted string'""" list_item.run_tests(tests) - list_item.create_diagram("parse_python_value.html") diff --git a/examples/roman_numerals.py b/examples/roman_numerals.py index 25772130..10a13ec4 100644 --- a/examples/roman_numerals.py +++ b/examples/roman_numerals.py @@ -56,9 +56,6 @@ def roman_numeral_literal(numeral_string, value): ).set_parse_action(sum) pp.autoname_elements() -# uncomment to generate railroad diagram -# roman_numeral.create_diagram("romanNumerals.html") - # unit tests def make_roman_numeral(n): @@ -86,6 +83,12 @@ def add_digits(n, limit, c, s): def main(): + import contextlib + + with contextlib.suppress(Exception): + # generate railroad diagram + roman_numeral.create_diagram("roman_numerals_diagram.html") + # make a string of all roman numerals from I to MMMMM tests = " ".join(make_roman_numeral(i) for i in range(1, 5000 + 1)) diff --git a/examples/roman_numerals_diagram.html b/examples/roman_numerals_diagram.html new file mode 100644 index 00000000..55a79453 --- /dev/null +++ b/examples/roman_numerals_diagram.html @@ -0,0 +1,689 @@ + + + + + + + + + + + + + + + +
+

roman_numeral

+
+
+ + + + + + + + +onethousandonethousand + + + + +ninehundredninehundred +fourhundredfourhundred + + + +fivehundredfivehundred + + + +onehundredonehundred + + + +onehundredonehundred + + +onehundredonehundred + + + +ninetyninety +fortyforty + + + +fiftyfifty + + + +tenten + + + +tenten + + +tenten + + + +ninenine +fourfour + + + +fivefive + + + +oneone + + + +oneone + + +oneone +
+
+ +
+

onethousand

+
+
+ + + + +'M' +
+
+ +
+

ninehundred

+
+
+ + + + +'CM' +
+
+ +
+

fourhundred

+
+
+ + + + +'CD' +
+
+ +
+

fivehundred

+
+
+ + + + +'D' +
+
+ +
+

onehundred

+
+
+ + + + +'C' +
+
+ +
+

ninety

+
+
+ + + + +'XC' +
+
+ +
+

forty

+
+
+ + + + +'XL' +
+
+ +
+

fifty

+
+
+ + + + +'L' +
+
+ +
+

ten

+
+
+ + + + +'X' +
+
+ +
+

nine

+
+
+ + + + +'IX' +
+
+ +
+

four

+
+
+ + + + +'IV' +
+
+ +
+

five

+
+
+ + + + +'V' +
+
+ +
+

one

+
+
+ + + + +'I' +
+
+ + + + diff --git a/examples/rosettacode.py b/examples/rosettacode.py index ab11a1a8..1437b7f7 100644 --- a/examples/rosettacode.py +++ b/examples/rosettacode.py @@ -276,10 +276,13 @@ def main(): + import contextlib import sys + sys.setrecursionlimit(2000) - program.create_diagram("rosettacode_diagram.html") + with contextlib.suppress(Exception): + program.create_diagram("rosettacode_diagram.html") success, report = program.run_tests(tests) assert success diff --git a/examples/rosettacode_diagram.html b/examples/rosettacode_diagram.html new file mode 100644 index 00000000..31c5cf79 --- /dev/null +++ b/examples/rosettacode_diagram.html @@ -0,0 +1,2076 @@ + + + + + + + + + + + + + + + +
+

program

+
+
+ + + + + + + +stmtstmt + +
+
+ +
+

stmt

+
+
+ + + + + +statementstatement +
+
+ +
+

statement

+
+
+ + + + + +empty_statementempty_statement +assignment_stmtassignment_stmt +while_stmtwhile_stmt +if_stmtif_stmt +print_stmtprint_stmt +putc_stmtputc_stmt +stmt_liststmt_list +
+
+ +
+

empty_statement

+
+
+ + + + +SEMISEMI +
+
+ +
+

assignment_stmt

+
+
+ + + + + +identifieridentifier +EQEQ +arith_operand_expressionarith_operand_expression +SEMISEMI +
+
+ +
+

identifier

+
+
+ + + + + + + + +'while' +'if' +'print' +'putc' +'else' +[NOT] +ident_nameident_name +[combine] +
+
+ +
+

ident_name

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

EQ

+
+
+ + + + +'=' +
+
+ +
+

arith_operand_expression

+
+
+ + + + + +|| operations|| operations +
+
+ +
+

|| operations

+
+
+ + + + + + + + +&& operations&& operations + + +|||| +&& operations&& operations + +&& operations&& operations +
+
+ +
+

&& operations

+
+
+ + + + + + + + +== | != operations== | != operations + + +&&&& +== | != operations== | != operations + +== | != operations== | != operations +
+
+ +
+

== | != operations

+
+
+ + + + + + + + +<= | < | >= | > operations<= | < | >= | > operations + + +== | !=== | != +<= | < | >= | > operations<= | < | >= | > operations + +<= | < | >= | > operations<= | < | >= | > operations +
+
+ +
+

<= | < | >= | > operations

+
+
+ + + + + + + + ++ | - operations+ | - operations + + +<= | < | >= | ><= | < | >= | > ++ | - operations+ | - operations + ++ | - operations+ | - operations +
+
+ +
+

+ | - operations

+
+
+ + + + + + + + +* | / | % operations* | / | % operations + + ++ | -+ | - +* | / | % operations* | / | % operations + +* | / | % operations* | / | % operations +
+
+ +
+

* | / | % operations

+
+
+ + + + + + + + ++ | - | ! operations+ | - | ! operations + + +* | / | %* | / | % ++ | - | ! operations+ | - | ! operations + ++ | - | ! operations+ | - | ! operations +
+
+ +
+

+ | - | ! operations

+
+
+ + + + + + + + + + ++ | - | !+ | - | ! ++ | - | ! operations+ | - | ! operations +identifieridentifier +integerinteger +charchar +nested_arith_operand_expressionnested_arith_operand_expression +
+
+ +
+

+ | - | !

+
+
+ + + + +[+\-!] +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

char

+
+
+ + + + +'\\?.' +
+
+ +
+

nested_arith_operand_expression

+
+
+ + + + + + +'(' +[suppress] +arith_operand_expressionarith_operand_expression + +')' +[suppress] +
+
+ +
+

* | / | %

+
+
+ + + + +[*/%] +
+
+ +
+

+ | -

+
+
+ + + + +[+\-] +
+
+ +
+

<= | < | >= | >

+
+
+ + + + +<=|<|>=|> +
+
+ +
+

== | !=

+
+
+ + + + +==|!= +
+
+ +
+

&&

+
+
+ + + + +\&\& +
+
+ +
+

||

+
+
+ + + + +\|\| +
+
+ +
+

while_stmt

+
+
+ + + + + +WHILEWHILE +paren_exprparen_expr +stmtstmt +
+
+ +
+

WHILE

+
+
+ + + + +'while' +
+
+ +
+

paren_expr

+
+
+ + + + + +LPARLPAR +arith_operand_expressionarith_operand_expression +RPARRPAR +
+
+ +
+

if_stmt

+
+
+ + + + + +IFIF +paren_exprparen_expr +stmtstmt + + + +ELSEELSE +stmtstmt +
+
+ +
+

IF

+
+
+ + + + +'if' +
+
+ +
+

ELSE

+
+
+ + + + +'else' +
+
+ +
+

print_stmt

+
+
+ + + + + +PRINTPRINT + + +LPARLPAR +prt_listprt_list +RPARRPAR +SEMISEMI +
+
+ +
+

PRINT

+
+
+ + + + +'print' +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

prt_list

+
+
+ + + + + + + +quoted stringquoted string +arith_operand_expressionarith_operand_expression + + + + + +',' +[suppress] + +quoted stringquoted string +arith_operand_expressionarith_operand_expression + +
+
+ +
+

quoted string

+
+
+ + + + +string enclosed in '"' +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

putc_stmt

+
+
+ + + + + +PUTCPUTC +paren_exprparen_expr +SEMISEMI +
+
+ +
+

PUTC

+
+
+ + + + +'putc' +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

stmt_list

+
+
+ + + + + +LBRACELBRACE + + + +stmtstmt + +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ + + + diff --git a/examples/select_parser.py b/examples/select_parser.py index 79257eba..1f3ae4a5 100644 --- a/examples/select_parser.py +++ b/examples/select_parser.py @@ -8,7 +8,7 @@ from pyparsing import ( pyparsing_common, ParserElement, OpAssoc, CaselessKeyword, Combine, Forward, Group, Literal, MatchFirst, Optional, QuotedString, Regex, Suppress, Word, - alphanums, alphas, DelimitedList, infix_notation, nums, one_of, rest_of_line + alphanums, alphas, DelimitedList, infix_notation, nums, one_of, rest_of_line, autoname_elements ) # fmt: on @@ -29,12 +29,12 @@ } vars().update(keywords) -any_keyword = MatchFirst(keywords.values()) +any_keyword = MatchFirst(keywords.values()).set_name("any_keyword") quoted_identifier = QuotedString('"', esc_quote='""') identifier = (~any_keyword + Word(alphas, alphanums + "_")).set_parse_action( pyparsing_common.downcase_tokens -) | quoted_identifier +).set_name("identifier") | quoted_identifier collation_name = identifier.copy() column_name = identifier.copy() column_alias = identifier.copy() @@ -67,6 +67,9 @@ bind_parameter = Word("?", nums) | Combine(one_of(": @ $") + parameter_name) type_name = one_of("TEXT REAL INTEGER BLOB NULL") +def concat_qualified_column(t): + t[0][:] = ["".join(t[0])] + expr_term = ( CAST + LPAR + expr + AS + type_name + RPAR | EXISTS + LPAR + select_stmt + RPAR @@ -77,10 +80,10 @@ | literal_value | bind_parameter | Group( - identifier("col_db") + DOT + identifier("col_tab") + DOT + identifier("col") - ) - | Group(identifier("col_tab") + DOT + identifier("col")) - | Group(identifier("col")) + identifier("col_db") + DOT + identifier("col_tab") + DOT + identifier("col"), + ).add_parse_action(concat_qualified_column).set_name("db_table_column name") + | Group(identifier("col_tab") + DOT + identifier("col")).add_parse_action(concat_qualified_column).set_name("table_column name") + | Group(identifier("col")).set_name("column name") ) NOT_NULL = Group(NOT + NULL) @@ -93,17 +96,17 @@ UNARY, BINARY, TERNARY = 1, 2, 3 expr <<= infix_notation( - expr_term, + expr_term.set_name("expr_term"), [ (one_of("- + ~") | NOT, UNARY, OpAssoc.RIGHT), - (ISNULL | NOTNULL | NOT_NULL, UNARY, OpAssoc.LEFT), + ((ISNULL | NOTNULL | NOT_NULL).set_name("null_comparison_operator"), UNARY, OpAssoc.LEFT), ("||", BINARY, OpAssoc.LEFT), (one_of("* / %"), BINARY, OpAssoc.LEFT), (one_of("+ -"), BINARY, OpAssoc.LEFT), (one_of("<< >> & |"), BINARY, OpAssoc.LEFT), (one_of("< <= > >="), BINARY, OpAssoc.LEFT), ( - one_of("= == != <>") + (one_of("= == != <>") | IS | IN | LIKE @@ -114,7 +117,7 @@ | NOT_LIKE | NOT_GLOB | NOT_MATCH - | NOT_REGEXP, + | NOT_REGEXP).set_name("comparison_operator"), BINARY, OpAssoc.LEFT, ), @@ -232,7 +235,7 @@ def main(): SELECT * FROM abcd WHERE blobby == x'C0FFEE' -- hex SELECT * FROM abcd WHERE ff NOT IN (1,2,4,5) SELECT * FROM abcd WHERE ff not between 3 and 9 - SELECT * FROM abcd WHERE ff not like 'bob%' + SELECT * FROM abcd WHERE db_name.tab_name.ff not like 'bob%' """ success, _ = select_stmt.run_tests(tests) diff --git a/examples/tag_metadata.py b/examples/tag_metadata.py index cd851e28..2da39b4d 100644 --- a/examples/tag_metadata.py +++ b/examples/tag_metadata.py @@ -21,13 +21,17 @@ greeting = "Hello," + (latin | greek | japanese) + end_punc -greeting.run_tests( - """\ - Hello, World. - Hello, World! - Hello, κόσμος? - Hello, 世界! - """ -) +if __name__ == '__main__': + import contextlib + + with contextlib.suppress(Exception): + greeting.create_diagram("tag_metadata_diagram.html", vertical=3) -greeting.create_diagram("tag_metadata_diagram.html") + greeting.run_tests( + """\ + Hello, World. + Hello, World! + Hello, κόσμος? + Hello, 世界! + """ + ) diff --git a/examples/tag_metadata_diagram.html b/examples/tag_metadata_diagram.html new file mode 100644 index 00000000..d253f8e0 --- /dev/null +++ b/examples/tag_metadata_diagram.html @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + +
+

+
+
+ + + + + +'Hello,' + + +W:(A-Za-zªµºÀ-ÖØ...) +Tag:alphabet='Latin' + +W:(Ͱ-ʹͶͷͺ-ͽͿΆΈ-Ί...) +Tag:alphabet='Greek' + +W:(々〆〱-〵〻〼ぁ-ゖゝ-ゟ...) +Tag:alphabet='Japanese' + + +'.' +Tag:mood='normal' + +'!' +Tag:mood='excited' + +'?' +Tag:mood='curious' +
+
+ + + + From 00d7aed694b8c0971c2f853e09af211419b9017d Mon Sep 17 00:00:00 2001 From: ptmcg Date: Mon, 30 Dec 2024 18:43:42 -0600 Subject: [PATCH 25/31] Fix nested expression diagram element generation --- .../complex_chemical_formulas_diagram.html | 28 +++++++++---------- examples/decaf_parser_diagram.html | 28 +++++++++---------- examples/lox_parser_diagram.html | 4 +-- examples/lua_parser_diagram.html | 8 +++--- examples/lucene_grammar_diagram.html | 4 +-- examples/mongodb_query_expression.html | 12 ++++---- examples/rosettacode_diagram.html | 4 +-- pyparsing/helpers.py | 26 ++++++++--------- 8 files changed, 55 insertions(+), 59 deletions(-) diff --git a/examples/complex_chemical_formulas_diagram.html b/examples/complex_chemical_formulas_diagram.html index 9c0aa308..939bcb6a 100644 --- a/examples/complex_chemical_formulas_diagram.html +++ b/examples/complex_chemical_formulas_diagram.html @@ -257,22 +257,22 @@

integer

subscript operations

- + - - - - - -elementelement -nested_element_expressionnested_element_expression - -subscriptsubscript - -elementelement -nested_element_expressionnested_element_expression +
+
+ +
+

rbrack

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

tuple_expr

+
+
+ + + + + + +lparenlparen + + + + + +list_itemlist_item + + + +',' +[suppress] +list_itemlist_item + + + + +',' +[suppress] + +list_itemlist_item +commacomma +rparenrparen +
+
+ +
+

lparen

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

rparen

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

set_expr

+
+
+ - - - -'{' -[suppress] - - -list_itemlist_item - - - - - -',' -[suppress] -list_itemlist_item - - - - -',' -[suppress] - -'}' -[suppress] +
+
+ +
+

lbrace

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

dict_entry

+
+
+ + + + + +list_itemlist_item +coloncolon +list_itemlist_item +
+
+ +
+

colon

+
+
+ + + + + +':' +[suppress]