diff --git a/CHANGES b/CHANGES index 66484eed..abb74f38 100644 --- a/CHANGES +++ b/CHANGES @@ -12,6 +12,28 @@ the new function names before the old functions are completely removed. (Big hel Devin J. Pohly in structuring the code to enable this peaceful transition.) +Version 3.2.1 - December, 2024 +------------------------------ +- Updated generated railroad diagrams to make non-terminal elements links to their related + sub-diagrams. This _greatly_ improves navigation of the diagram, especially for + large, complex parsers. + +- Simplified railroad diagrams emitted for parsers using `infix_notation`, by hiding + lookahead terms. Renamed internally generated expressions for clarity, and improved + diagramming. + +- Improved performance of `cpp_style_comment`, `c_style_comment`, `common.fnumber` + and `common.ieee_float` Regex expressions. PRs submitted by Gabriel Gerlero, + nice work, thanks! + +- Add missing type annotations to `match_only_at_col`, `replace_with`, `remove_quotes`, + `with_attribute`, and `with_class`. Issue #585 reported by rafrafrek. + +- Added generated diagrams for many of the examples. + +- Replaced old examples/0README.html file with examples/README.md file. + + Version 3.2.0 - October, 2024 ------------------------------- - Discontinued support for Python 3.6, 3.7, and 3.8. Adopted new Python features from diff --git a/docs/HowToUsePyparsing.rst b/docs/HowToUsePyparsing.rst index fe8ea3cd..f23047f0 100644 --- a/docs/HowToUsePyparsing.rst +++ b/docs/HowToUsePyparsing.rst @@ -374,6 +374,14 @@ methods for code to use are: basic element can be referenced multiple times and given different names within a complex grammar. +.. _using_each: + +- ``using_each(list_of_symbols)`` a short-cut for defining a number of + symbols of a particular ``ParserElement`` subclass:: + + LBRACK, RBRACK, LBRACE, RBRACE, LPAR, RPAR = Suppress.using_each("[]{}()") + AND, OR, NOT = Keyword.using_each("and or not".split()) + .. _set_parse_action: - ``set_parse_action(*fn)`` - specify one or more functions to call after successful @@ -412,7 +420,7 @@ methods for code to use are: A nice short-cut for calling ``set_parse_action`` is to use it as a decorator:: - identifier = Word(alphas, alphanums+"_") + identifier = Word(alphas, alphanums + "_") @identifier.set_parse_action def resolve_identifier(results: ParseResults): @@ -463,9 +471,11 @@ methods for code to use are: when trying to match this element - ``validate()`` - function to verify that the defined grammar does not - contain infinitely recursive constructs (``validate()`` is deprecated, and + contain infinitely recursive constructs. + + *(``validate()`` is deprecated, and will be removed in a future pyparsing release. Pyparsing now supports - left-recursive parsers, which this function attempted to catch.) + left-recursive parsers, which this function attempted to catch.)* .. _parse_with_tabs: @@ -613,7 +623,7 @@ Basic ParserElement subclasses ``SkipTo`` can also be written using ``...``:: - LBRACE, RBRACE = map(Literal, "{}") + LBRACE, RBRACE = Literal.using_each("{}") brace_expr = LBRACE + SkipTo(RBRACE) + RBRACE # can also be written as @@ -1423,27 +1433,27 @@ access them using code like the following:: The following language ranges are defined. -========================== ================= ================================================ +========================== ================= ======================================================== Unicode set Alternate names Description --------------------------- ----------------- ------------------------------------------------ -Arabic العربية -Chinese 中文 -CJK Union of Chinese, Japanese, and Korean sets -Cyrillic кириллица -Devanagari देवनागरी -Greek Ελληνικά -Hangul Korean, 한국어 -Hebrew עִברִית -Japanese 日本語 Union of Kanji, Katakana, and Hiragana sets -Japanese.Hiragana ひらがな -Japanese.Kanji 漢字 -Japanese.Katakana カタカナ -Latin1 All Unicode characters up to code point 255 -LatinA -LatinB -Thai ไทย -BasicMultilingualPlane BMP All Unicode characters up to code point 65535 -========================== ================= ================================================ +-------------------------- ----------------- -------------------------------------------------------- +``Arabic`` العربية +``Chinese`` 中文 +``CJK`` Union of Chinese, Japanese, and Korean sets +``Cyrillic`` кириллица +``Devanagari`` देवनागरी +``Greek`` Ελληνικά +``Hangul`` Korean, 한국어 +``Hebrew`` עִברִית +``Japanese`` 日本語 Union of Kanji, Katakana, and Hiragana sets +``Japanese.Hiragana`` ひらがな +``Japanese.Kanji`` 漢字 +``Japanese.Katakana`` カタカナ +``Latin1`` All Unicode characters up to code point 0x7f (255) +``LatinA`` Unicode characters for code points 0x100-0x17f (256-383) +``LatinB`` Unicode characters for code points 0x180-0x24f (384-591) +``Thai`` ไทย +``BasicMultilingualPlane`` BMP All Unicode characters up to code point 0xffff (65535) +========================== ================= ======================================================== The base ``unicode`` class also includes definitions based on all Unicode code points up to ``sys.maxunicode``. This set will include emojis, wingdings, and many other specialized and typographical variant characters. @@ -1493,7 +1503,7 @@ Example ------- You can view an example railroad diagram generated from `a pyparsing grammar for SQL SELECT statements <_static/sql_railroad.html>`_ (generated from -`examples/select_parser.py <../examples/select_parser.py>`_). +`examples/select_parser.py `_). Naming tip ---------- diff --git a/docs/whats_new_in_3_1.rst b/docs/whats_new_in_3_1.rst index 1d22dbf3..cc97e5c7 100644 --- a/docs/whats_new_in_3_1.rst +++ b/docs/whats_new_in_3_1.rst @@ -130,7 +130,7 @@ API Changes ident = ppu.Greek.identifier # or - # ident = ppu.Ελληνικά.identifier + # ident = ppu.Ελληνικά.identifier - Added bool ``embed`` argument to ``ParserElement.create_diagram()``. When passed as True, the resulting diagram will omit the ````, @@ -198,7 +198,7 @@ Fixed Bugs - Updated ``create_diagram()`` code to be compatible with railroad-diagrams package version 3.0. -- Fixed bug in pyparsing.common.url, when input URL is not alone +- Fixed bug in ``pyparsing.common.url``, when input URL is not alone on an input line. - Fixed bug in srange, when parsing escaped '/' and '\' inside a @@ -262,8 +262,14 @@ Fixed Bugs New / Enhanced Examples ======================= - Added example ``mongodb_query_expression.py``, to convert human-readable infix query - expressions (such as ``a==100 and b>=200``) and transform them into the equivalent - query argument for the pymongo package (``{'$and': [{'a': 100}, {'b': {'$gte': 200}}]}``). + expressions, such as:: + + a==100 and b>=200 + + and transform them into an equivalent query argument for the pymongo package:: + + {'$and': [{'a': 100}, {'b': {'$gte': 200}}]} + Supports many equality and inequality operators - see the docstring for the ``transform_query`` function for many more examples. diff --git a/examples/0README.html b/examples/0README.html deleted file mode 100644 index c8a125ec..00000000 --- a/examples/0README.html +++ /dev/null @@ -1,296 +0,0 @@ - -pyparsing Examples - -

pyparsing Examples

-

-This directory contains a number of Python scripts that can get you started in learning to use pyparsing. - -

LICENSE

- -The files and source code in this examples directory are covered under the same LICENSE as -the core pyparsing package. You will find this license in the LICENSE file of the pyparsing package. - - - - - diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000..73f3fd0c --- /dev/null +++ b/examples/README.md @@ -0,0 +1,110 @@ +# Pyparsing Examples + +This directory contains a number of examples of parsers created using pyparsing. They fall into a few general +categories (several examples include supporting railroad diagrams): + +## Pyparsing tutorial and language feature demonstrations + * Hello World! + * [greeting.py](./greeting.py) + * [greetingInGreek.py](./greetingInGreek.py) + * [greetingInKorean.py](./greetingInKorean.py) + * [hola_mundo.py](./hola_mundo.py) + * left recursion + * [left_recursion.py](./left_recursion.py) + * macro expansion + * [macro_expander.py](./macro_expander.py) + * Roman numerals + * [roman_numerals.py](./roman_numerals.py) + * Unicode text handling + * [tag_metadata.py](./tag_metadata.py) [(diagram)](./tag_metadata_diagram.html) + * chemical formulas + * [chemical_formula.py](./chemical_formula.py) + * [complex_chemical_formulas.py](./complex_chemical_formulas.py) + * API checker + * [apicheck.py](./apicheck.py) [(diagram)](./apicheck_diagram.html) + * scan_string examples + * [scanExamples.py](./scanExamples.py) + * transform_string examples + * [include_preprocessor.py](./include_preprocessor.py) + * [macro_expander.py](./macro_expander.py) + * [nested_markup.py](./nested_markup.py) + * parse actions and conditions + * [shapes.py](./shapes.py) + * [number_words.py](./number_words.py) [(diagram)](./number_words_diagram.html) + * [wordsToNum.py](./wordsToNum.py) + * [range_check.py](./range_check.py) + * [one_to_ninety_nine.py](./one_to_ninety_nine.py) + * railroad diagrams + * [railroad_diagram_demo.py](./railroad_diagram_demo.py) [(diagram)](./railroad_diagram_demo.html) + * web page scraping + * [getNTPserversNew.py](./getNTPserversNew.py) + * [html_stripper.py](./html_stripper.py) + * [html_table_parser.py](./html_table_parser.py) + * [urlExtractorNew.py](./urlExtractorNew.py) +## Language parsers + * C + * [oc.py](./oc.py) + * lua + * [lua_parser.py](./lua_parser.py) [(diagram)](./lua_parser_diagram.html) + * lox + * [lox_parser.py](./lox_parser.py) [(diagram)](./lox_parser_diagram.html) + * verilog + * [verilog_parse.py](./verilog_parse.py) + * brainf*ck + * [bf.py](./bf.py) [(diagram)](./bf_diagram.html) + * decaf + * [decaf_parser.py](./decaf_parser.py) [(diagram)](./decaf_parser_diagram.html) + * S-expression + * [sexpParser.py](./sexpParser.py) + * rosetta code + * [rosettacode.py](./rosettacode.py) [(diagram)](./rosettacode_diagram.html) +## Domain Specific Language parsers + * adventureEngine + * [adventureEngine.py](./adventureEngine.py) [(diagram)](./adventure_game_parser_diagram.html) + * pgn + * [pgn.py](./pgn.py) + * TAP + * [TAP.py](./TAP.py) [(diagram)](./TAP_diagram.html) +## Search and query language parsers + * basic search + * [searchparser.py](./searchparser.py) [demo](./searchParserAppDemo.py) + * lucene + * [lucene_grammar.py](./lucene_grammar.py) [(diagram)](./lucene_grammar_diagram.html) + * mongodb query + * [mongodb_query_expression.py](./mongodb_query_expression.py) [(diagram)](./mongodb_query_expression.html) + * SQL + * [select_parser.py](./select_parser.py) (SELECT statements) + * [sql2dot.py](./sql2dot.py) (TABLE DML statements) + * BigQuery view + * [bigquery_view_parser.py](./bigquery_view_parser.py) +## Data format parsers + * JSON + * [jsonParser.py](./jsonParser.py) + * protobuf + * [protobuf_parser.py](./protobuf_parser.py) + * stackish + * [stackish.py](./stackish.py) + * CORBA IDL + * [idlparse.py](./idlparse.py) +## Logical and arithmetic infix notation parsers and examples + * [fourFn.py](./fourFn.py) + * [simpleArith.py](./simpleArith.py) + * [eval_arith.py](./eval_arith.py) + * [simpleCalc.py](./simpleCalc.py) + * [LAparser.py](./LAparser.py) (linear algebra) + * [simpleBool.py](./simpleBool.py) +## Helpful utilities + * parse time expressions ("2pm the day after tomorrow") + * [delta_time.py](./delta_time.py) [(diagram)](./delta_time_diagram.html) + * invert regex (generate sample strings matching a regex) + * [inv_regex.py](./inv_regex.py) + * email addresses + * [email_address_parser.py](./email_address_parser.py) + * Excel cell formula + * [excel_expr.py](./excel_expr.py) + * ctypes interfaces code generator from C include.h file + * [gen_ctypes.py](./gen_ctypes.py) + * log file parsing + * [httpServerLogPaser.py](./httpServerLogPaser.py) + + diff --git a/examples/TAP.py b/examples/TAP.py index b41e9510..dfa16195 100644 --- a/examples/TAP.py +++ b/examples/TAP.py @@ -37,6 +37,7 @@ restOfLine, FollowedBy, empty, + autoname_elements, ) __all__ = ["tapOutputParser", "TAPTest", "TAPSummary"] @@ -52,7 +53,7 @@ OK, NOT_OK = map(Literal, ["ok", "not ok"]) testStatus = OK | NOT_OK -description = Regex("[^#\n]+") +description = Regex(r"[^#\n]+") description.setParseAction(lambda t: t[0].lstrip("- ")) TODO, SKIP = map(CaselessLiteral, "TODO SKIP".split()) @@ -79,6 +80,8 @@ OneOrMore((testLine | bailLine) + NL) )("tests") +autoname_elements() + class TAPTest: def __init__(self, results): @@ -170,6 +173,11 @@ def summary(self, showPassed=False, showAll=False): def main(): + import contextlib + + with contextlib.suppress(Exception): + tapOutputParser.create_diagram("TAP_diagram.html", vertical=3) + test1 = """\ 1..4 ok 1 - Input file opened diff --git a/examples/TAP_diagram.html b/examples/TAP_diagram.html new file mode 100644 index 00000000..fbf6f23d --- /dev/null +++ b/examples/TAP_diagram.html @@ -0,0 +1,645 @@ + + + + + + + + + + + + + + + +
+

tapOutputParser

+
+
+ + + + + + + + + + + +planplan +plan +NLNL + + + + +testLinetestLine +bailLinebailLine +NLNL + +tests + +[ALL] +
+
+ +
+

plan

+
+
+ + + + + +'1..' +W:(0-9) +
+
+ +
+

testLine

+
+
+ + + + + + + + + + +'#' +[suppress] +emptyempty +rest of linerest of line +NLNL + + +'ok' +'not ok' + + +integerinteger + + +descriptiondescription + + +directivedirective +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

description

+
+
+ + + + +[^#\n]+ +
+
+ +
+

directive

+
+
+ + + + + + +'#' +[suppress] + + +TODOTODO +rest of linerest of line + + +SKIPSKIP +[LOOKAHEAD] +rest of linerest of line +
+
+ +
+

TODO

+
+
+ + + + +'TODO' +
+
+ +
+

SKIP

+
+
+ + + + +'SKIP' +
+
+ +
+

bailLine

+
+
+ + + + + +'Bail out!' +emptyempty + + +rest of linerest of line +
+
+ +
+

empty

+
+
+ + + + +Empty +
+
+ +
+

rest of line

+
+
+ + + + +.* +
+
+ +
+

NL

+
+
+ + + + + +end of lineend of line +[suppress] +
+
+ +
+

end of line

+
+
+ + + + +LineEnd +
+
+ + + + diff --git a/examples/adventureEngine.py b/examples/adventureEngine.py index 7010181f..c4d155b6 100644 --- a/examples/adventureEngine.py +++ b/examples/adventureEngine.py @@ -5,9 +5,10 @@ # Updated 2023 - using PEP8 API names # -import pyparsing as pp +import contextlib import random import string +import pyparsing as pp def a_or_an(item): @@ -508,7 +509,10 @@ def make_bnf(self): | doorsCommand | helpCommand | quitCommand - )("command") + )("command").set_name("command") + + with contextlib.suppress(Exception): + parser.create_diagram("adventure_game_parser_diagram.html", vertical=2, show_groups=True) return parser diff --git a/examples/adventure_game_parser_diagram.html b/examples/adventure_game_parser_diagram.html new file mode 100644 index 00000000..e5ff5ca0 --- /dev/null +++ b/examples/adventure_game_parser_diagram.html @@ -0,0 +1,885 @@ + + + + + + + + + + + + + + + +
+

command

+
+
+ + + + + + +INVENTORY | INV | IINVENTORY | INV | I + +USE | UUSE | U +item_refitem_ref + + +IN | ONIN | ON + + +item_refitem_ref + +OPEN | OOPEN | O +item_refitem_ref + +CLOSE | CLCLOSE | CL +item_refitem_ref + +DROP | LEAVEDROP | LEAVE +item_refitem_ref + + +TAKE | PICKUPTAKE | PICKUP + +'PICK' +'UP' +item_refitem_ref + + + + +MOVE | GOMOVE | GO + +NORTH | NNORTH | N +SOUTH | SSOUTH | S +EAST | EEAST | E +WEST | WWEST | W +LOOK | LLOOK | L + +EXAMINE | EX | XEXAMINE | EX | X +item_refitem_ref +DOORSDOORS +HELP | H | ?HELP | H | ? +QUIT | QQUIT | Q +
+
+ +
+

INVENTORY | INV | I

+
+
+ + + + +INVENTORY|INV|I +
+
+ +
+

USE | U

+
+
+ + + + +USE|U +
+
+ +
+

item_ref

+
+
+ + + + + +W:(A-Za-z) + +
+
+ +
+

IN | ON

+
+
+ + + + +IN|ON +
+
+ +
+

OPEN | O

+
+
+ + + + +OPEN|O +
+
+ +
+

CLOSE | CL

+
+
+ + + + +CLOSE|CL +
+
+ +
+

DROP | LEAVE

+
+
+ + + + +DROP|LEAVE +
+
+ +
+

TAKE | PICKUP

+
+
+ + + + +TAKE|PICKUP +
+
+ +
+

MOVE | GO

+
+
+ + + + +MOVE|GO +
+
+ +
+

NORTH | N

+
+
+ + + + +NORTH|N +
+
+ +
+

SOUTH | S

+
+
+ + + + +SOUTH|S +
+
+ +
+

EAST | E

+
+
+ + + + +EAST|E +
+
+ +
+

WEST | W

+
+
+ + + + +WEST|W +
+
+ +
+

LOOK | L

+
+
+ + + + +LOOK|L +
+
+ +
+

EXAMINE | EX | X

+
+
+ + + + +EXAMINE|EX|X +
+
+ +
+

DOORS

+
+
+ + + + +'DOORS' +
+
+ +
+

HELP | H | ?

+
+
+ + + + +HELP|H|\? +
+
+ +
+

QUIT | Q

+
+
+ + + + +QUIT|Q +
+
+ + + + diff --git a/examples/antlr_grammar.py b/examples/antlr_grammar.py index 49151eee..566dd0ef 100644 --- a/examples/antlr_grammar.py +++ b/examples/antlr_grammar.py @@ -35,6 +35,7 @@ alphanums, delimitedList, Char, + autoname_elements, ) # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g @@ -75,11 +76,13 @@ PROTECTED, PUBLIC, PRIVATE, -) = map( - Keyword, +) = list( + Keyword.using_each( """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected public private """.split(), + ) ) + KEYWORD = MatchFirst(keywords) # Tokens @@ -252,6 +255,7 @@ grammarDef = grammarHeading + Group(OneOrMore(rule))("rules") +autoname_elements() def grammar(): return grammarDef @@ -341,6 +345,10 @@ def antlrConverter(antlrGrammarTree): if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + grammarDef.create_diagram("antlr_grammar_diagram.html", vertical=2, show_groups=True) text = """\ grammar SimpleCalc; @@ -379,7 +387,6 @@ def antlrConverter(antlrGrammarTree): """ - grammar().validate() antlrGrammarTree = grammar().parseString(text) print(antlrGrammarTree.dump()) pyparsingRules = antlrConverter(antlrGrammarTree) diff --git a/examples/antlr_grammar_diagram.html b/examples/antlr_grammar_diagram.html new file mode 100644 index 00000000..d6ec6ddf --- /dev/null +++ b/examples/antlr_grammar_diagram.html @@ -0,0 +1,4160 @@ + + + + + + + + + + + + + + + +
+

grammarDef

+
+
+ + + + + + + +C style commentC style comment + + +grammarTypegrammarType +GRAMMARGRAMMAR + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) +SEMISEMI + + +optionsSpecoptionsSpec + + +tokensSpectokensSpec + + + +attrScopeattrScope + + + + +actionaction + + + +rulerule + +
+
+ +
+

C style comment

+
+
+ + + + +/\*(?:[^*]|\*(?!/))*\*\/ +
+
+ +
+

grammarType

+
+
+ + + + + +LEXERLEXER +PARSERPARSER +TREETREE +
+
+ +
+

LEXER

+
+
+ + + + +'lexer' +
+
+ +
+

PARSER

+
+
+ + + + +'parser' +
+
+ +
+

TREE

+
+
+ + + + +'tree' +
+
+ +
+

GRAMMAR

+
+
+ + + + +'grammar' +
+
+ +
+

optionsSpec

+
+
+ + + + + + +OPTIONS_OPTIONS_ +[suppress] +LBRACELBRACE + + + +optionoption +SEMISEMI + +RBRACERBRACE +
+
+ +
+

option

+
+
+ + + + + + + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) +EQEQ + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) + + +"'" +[suppress] + + + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + + +"'" +[suppress] +'\\' +[NOT] +(!-~) + +[combine] + +"'" +[suppress] + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +W:(0-9) +ss +
+
+ +
+

s

+
+
+ + + + +'*' +
+
+ +
+

tokensSpec

+
+
+ + + + + + +TOKENS_TOKENS_ +[suppress] +LBRACELBRACE + + +tokenSpectokenSpec + +RBRACERBRACE +
+
+ +
+

TOKENS_

+
+
+ + + + +'tokens' +
+
+ +
+

tokenSpec

+
+
+ + + + + + + +W:(A-Z, 0-9A-Z_a-z) +EQEQ + + + +"'" +[suppress] + + + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + + +"'" +[suppress] +'\\' +[NOT] +(!-~) + +[combine] + +"'" +[suppress] + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +SEMISEMI +
+
+ +
+

EQ

+
+
+ + + + + +'=' +[suppress] +
+
+ +
+

attrScope

+
+
+ + + + + + +SCOPE_SCOPE_ +[suppress] +idid +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

NESTED_ACTION

+
+
+ + + + + + +LBRACELBRACE + + + + +NESTED_ACTIONNESTED_ACTION + + +'//' +[suppress] + +'$ANTLR' +[suppress] + +SRC_SRC_ +[suppress] + + +'"' +[suppress] + + + + + + +BSLASHBSLASH +[suppress] +APOSAPOS + +BSLASHBSLASH +[suppress] + + +BSLASHBSLASH +[suppress] + + +APOSAPOS +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + + +BSLASHBSLASH +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + +'"' +[suppress] +W:(0-9) + + + + + + +EOLEOL +[NOT] +W:(!-~) + +EOLEOL +C style commentC style comment +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

SRC_

+
+
+ + + + +'src' +
+
+ +
+

EOL

+
+
+ + + + + +end of lineend of line +[suppress] +
+
+ +
+

end of line

+
+
+ + + + +LineEnd +
+
+ +
+

ACTION_STRING_LITERAL

+
+
+ + + + + +QUOTEQUOTE + + + + + + +BSLASHBSLASH +[suppress] +APOSAPOS + +BSLASHBSLASH +[suppress] + + +BSLASHBSLASH +[suppress] + + +APOSAPOS +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + + +BSLASHBSLASH +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + +QUOTEQUOTE +
+
+ +
+

ACTION_CHAR_LITERAL

+
+
+ + + + + +APOSAPOS + + + +BSLASHBSLASH +[suppress] +APOSAPOS + +BSLASHBSLASH +[suppress] + + +BSLASHBSLASH +[suppress] + + +APOSAPOS +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + + +BSLASHBSLASH +APOSAPOS +[NOT] +SGL_PRINTABLESGL_PRINTABLE +APOSAPOS +
+
+ +
+

QUOTE

+
+
+ + + + + +'"' +[suppress] +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ +
+

action

+
+
+ + + + + +ATAT + + + +actionScopeNameactionScopeName + +'::' +[suppress] +idid +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

actionScopeName

+
+
+ + + + + +TOKEN_REFTOKEN_REF +RULE_REFRULE_REF +'lexer' +'parser' +
+
+ +
+

rule

+
+
+ + + + + + + + +C style commentC style comment + + +modifiermodifier + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) + + +'!' + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + + + +'returns' +[suppress] + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + +throwsSpecthrowsSpec + + +optionsSpecoptionsSpec + + +ruleScopeSpecruleScopeSpec + + + +ruleActionruleAction + +COLONCOLON + + + +elementelement + +rewriterewrite + + + + + +VERTVERT + + + +elementelement + +rewriterewrite + +SEMISEMI + + +exceptionGroupexceptionGroup +
+
+ +
+

modifier

+
+
+ + + + + +PROTECTEDPROTECTED +PUBLICPUBLIC +PRIVATEPRIVATE +FRAGMENTFRAGMENT +
+
+ +
+

PROTECTED

+
+
+ + + + +'protected' +
+
+ +
+

PUBLIC

+
+
+ + + + +'public' +
+
+ +
+

PRIVATE

+
+
+ + + + +'private' +
+
+ +
+

FRAGMENT

+
+
+ + + + +'fragment' +
+
+ +
+

NESTED_ARG_ACTION

+
+
+ + + + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK +
+
+ +
+

throwsSpec

+
+
+ + + + + + +THROWSTHROWS +[suppress] + + +idid + + + + + +',' +[suppress] +idid + +
+
+ +
+

THROWS

+
+
+ + + + +'throws' +
+
+ +
+

ruleScopeSpec

+
+
+ + + + + + + +SCOPE_SCOPE_ +[suppress] +NESTED_ACTIONNESTED_ACTION + + +'?' + + +SCOPE_SCOPE_ +[suppress] + + +idid + + + + + +',' +[suppress] +idid + +SEMISEMI + + +SCOPE_SCOPE_ +[suppress] +NESTED_ACTIONNESTED_ACTION + + +'?' + +SCOPE_SCOPE_ +[suppress] + + +idid + + + + + +',' +[suppress] +idid + +SEMISEMI +
+
+ +
+

SCOPE_

+
+
+ + + + +'scope' +
+
+ +
+

ruleAction

+
+
+ + + + + +ATAT +idid +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

AT

+
+
+ + + + + +'@' +[suppress] +
+
+ +
+

id

+
+
+ + + + + +TOKEN_REFTOKEN_REF +RULE_REFRULE_REF +
+
+ +
+

element

+
+
+ + + + + + +elementNoOptionSpecelementNoOptionSpec +
+
+ +
+

elementNoOptionSpec

+
+
+ + + + + + + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) += | +== | += + + + + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +RANGERANGE + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] + + +^ | !^ | ! +terminalterminal + +TILTIL + +CHAR_LITERALCHAR_LITERAL +TOKEN_REFTOKEN_REF +STRING_LITERALSTRING_LITERAL +blockblock + + +^ | !^ | ! + +RULE_REFRULE_REF + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + +^ | !^ | ! + + +? | * | +? | * | + + + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) += | +== | += +blockblock + + +? | * | +? | * | + + + + + + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +RANGERANGE + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] + + +^ | !^ | ! +terminalterminal + +TILTIL + +CHAR_LITERALCHAR_LITERAL +TOKEN_REFTOKEN_REF +STRING_LITERALSTRING_LITERAL +blockblock + + +^ | !^ | ! + +RULE_REFRULE_REF + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + +^ | !^ | ! + + +? | * | +? | * | + +ebnfebnf +ACTIONACTION + +ROOTROOT +LPARLPAR +elementelement +elementelement + + + +elementelement + +RPARRPAR + + +? | * | +? | * | + +
+
+ +
+

= | +=

+
+
+ + + + +=|\+= +
+
+ +
+

terminal

+
+
+ + + + + + +CHAR_LITERALCHAR_LITERAL + +TOKEN_REFTOKEN_REF + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +STRING_LITERALSTRING_LITERAL +'.' + + +^ | !^ | ! +
+
+ +
+

CHAR_LITERAL

+
+
+ + + + + +APOSAPOS +LITERAL_CHARLITERAL_CHAR +APOSAPOS +
+
+ +
+

LITERAL_CHAR

+
+
+ + + + + +ESCESC + + + +APOSAPOS +BSLASHBSLASH +[NOT] +SGL_PRINTABLESGL_PRINTABLE +
+
+ +
+

ESC

+
+
+ + + + + +BSLASHBSLASH + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +SGL_PRINTABLESGL_PRINTABLE +
+
+ +
+

SGL_PRINTABLE

+
+
+ + + + +(!-~) +
+
+ +
+

STRING_LITERAL

+
+
+ + + + + +APOSAPOS + + + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + + +"'" +[suppress] +'\\' +[NOT] +(!-~) + +[combine] +APOSAPOS +
+
+ +
+

n | r | t | b | f | \ | " | > | '

+
+
+ + + + +[nrtbf\\">'] +
+
+ +
+

block

+
+
+ + + + + + +LPARLPAR + + + + + + + +OPTIONS_OPTIONS_ +[suppress] + +'{' +[suppress] + + + +optionoption +SEMISEMI + + +'}' +[suppress] +COLONCOLON + + + + + +elementelement + +rewriterewrite + + + + + +VERTVERT + + + +elementelement + +rewriterewrite + +RPARRPAR +
+
+ +
+

OPTIONS_

+
+
+ + + + +'options' +
+
+ +
+

COLON

+
+
+ + + + + +':' +[suppress] +
+
+ +
+

RANGE

+
+
+ + + + + +'..' +[suppress] +
+
+ +
+

APOS

+
+
+ + + + + +"'" +[suppress] +
+
+ +
+

BSLASH

+
+
+ + + + +'\\' +
+
+ +
+

TIL

+
+
+ + + + + +'~' +[suppress] +
+
+ +
+

TOKEN_REF

+
+
+ + + + +W:(A-Z, 0-9A-Z_a-z) +
+
+ +
+

RULE_REF

+
+
+ + + + +W:(a-z, 0-9A-Z_a-z) +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

^ | !

+
+
+ + + + +[\^!] +
+
+ +
+

ebnf

+
+
+ + + + + +blockblock + + + +? | * | +? | * | + +'=>' +
+
+ +
+

ACTION

+
+
+ + + + + +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

ROOT

+
+
+ + + + + +'^' +[suppress] +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

? | * | +

+
+
+ + + + +[?*+] +
+
+ +
+

VERT

+
+
+ + + + + +'|' +[suppress] +
+
+ +
+

rewrite

+
+
+ + + + + + +'TODO REWRITE RULES TODO' +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

exceptionGroup

+
+
+ + + + + + + +exceptionHandlerexceptionHandler + + + +finallyClausefinallyClause +finallyClausefinallyClause +
+
+ +
+

exceptionHandler

+
+
+ + + + + + +CATCHCATCH +[suppress] +NESTED_ARG_ACTIONNESTED_ARG_ACTION +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

CATCH

+
+
+ + + + +'catch' +
+
+ +
+

finallyClause

+
+
+ + + + + + +FINALLYFINALLY +[suppress] +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

FINALLY

+
+
+ + + + +'finally' +
+
+ + + + diff --git a/examples/apicheck.py b/examples/apicheck.py index 358dd6f2..97010d19 100644 --- a/examples/apicheck.py +++ b/examples/apicheck.py @@ -32,29 +32,38 @@ def apiProc(name, numargs): ] ) -test = """[ procname1 $par1 $par2 ] - other code here - [ procname1 $par1 $par2 $par3 ] - more code here - [ procname1 $par1 ] - [ procname3 ${arg with spaces} $par2 ]""" - - -# now explicitly iterate through the scanner using next(), so that -# we can trap ParseSyntaxException's that would be raised due to -# an incorrect number of arguments. If an exception does occur, -# then see how we reset the input text and scanner to advance to the -# next line of source code -api_scanner = apiRef.scanString(test) -while 1: - try: - t, s, e = next(api_scanner) - print(f"found {t.procname} on line {lineno(s, test)}") - except ParseSyntaxException as pe: - print(f"invalid arg count on line {pe.lineno}") - print(f"{pe.lineno} : {pe.line}") - # reset api scanner to start after this exception location - test = "\n" * (pe.lineno - 1) + test[pe.loc + 1:] - api_scanner = apiRef.scanString(test) - except StopIteration: - break +autoname_elements() + +if __name__ == '__main__': + + import contextlib + + with contextlib.suppress(Exception): + apiRef.create_diagram("apicheck_diagram.html", vertical=9, show_groups=True) + + test = """[ procname1 $par1 $par2 ] + other code here + [ procname1 $par1 $par2 $par3 ] + more code here + [ procname1 $par1 ] + [ procname3 ${arg with spaces} $par2 ]""" + + + # now explicitly iterate through the scanner using next(), so that + # we can trap ParseSyntaxException's that would be raised due to + # an incorrect number of arguments. If an exception does occur, + # then see how we reset the input text and scanner to advance to the + # next line of source code + api_scanner = apiRef.scanString(test) + while 1: + try: + t, s, e = next(api_scanner) + print(f"found {t.procname} on line {lineno(s, test)}") + except ParseSyntaxException as pe: + print(f"invalid arg count on line {pe.lineno}") + print(f"{pe.lineno} : {pe.line}") + # reset api scanner to start after this exception location + test = "\n" * (pe.lineno - 1) + test[pe.loc + 1:] + api_scanner = apiRef.scanString(test) + except StopIteration: + break diff --git a/examples/apicheck_diagram.html b/examples/apicheck_diagram.html new file mode 100644 index 00000000..6729d519 --- /dev/null +++ b/examples/apicheck_diagram.html @@ -0,0 +1,226 @@ + + + + + + + + + + + + + + + +
+

apiRef

+
+
+ + + + + + +'[' +[LOOKAHEAD] + + +LBRACKLBRACK +'procname1' +'$' +identident +'$' +identident +RBRACKRBRACK + +LBRACKLBRACK +'procname2' +'$' +identident +RBRACKRBRACK + +LBRACKLBRACK +'procname3' +'$' +identident +'$' +identident +RBRACKRBRACK +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

ident

+
+
+ + + + + +W:(A-Za-z, 0-9A-Z_a-z) +quoted string, starting with { ending with } +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ + + + diff --git a/examples/bf.py b/examples/bf.py index 76144295..b8ff1aca 100644 --- a/examples/bf.py +++ b/examples/bf.py @@ -150,10 +150,14 @@ def run_program(tokens): t.execute(bf) print() +if __name__ == '__main__': -# generate railroad diagram -program_expr.create_diagram("bf.html") + # generate railroad diagram + import contextlib -# execute an example BF program -hw = "+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." -program_expr.parse_string(hw) + with contextlib.suppress(Exception): + program_expr.create_diagram("bf_diagram.html") + + # execute an example BF program + hw = "+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." + program_expr.parse_string(hw) diff --git a/examples/bf_diagram.html b/examples/bf_diagram.html new file mode 100644 index 00000000..9696b0ff --- /dev/null +++ b/examples/bf_diagram.html @@ -0,0 +1,128 @@ + + + + + + + + + + + + + + + +
+

program

+
+
+ + + + + + + +instructioninstruction + +
+
+ +
+

instruction

+
+
+ + + + + + +'+' +'-' +'<' +'>' +',' +'.' + + +'[' + + + +instructioninstruction + +']' +
+
+ + + + diff --git a/examples/chemical_formulas.html b/examples/chemical_formulas.html new file mode 100644 index 00000000..12f1fb7e --- /dev/null +++ b/examples/chemical_formulas.html @@ -0,0 +1,158 @@ + + + + + + + + + + + + + + + +
+

chemical_formula

+
+
+ + + + + + + +elementelement + + +subscriptsubscript + +
+
+ +
+

element

+
+
+ + + + +W:(A-Z, a-z){1,2} +
+
+ +
+

subscript

+
+
+ + + + +W:(₀-₉) +
+
+ + + + diff --git a/examples/chemical_formulas.py b/examples/chemical_formulas.py index 60577fff..80a8c969 100644 --- a/examples/chemical_formulas.py +++ b/examples/chemical_formulas.py @@ -106,23 +106,29 @@ def cvt_subscript_int(s): element_ref = pp.Group(element("symbol") + pp.Optional(subscript_int, default=1)("qty")) formula = element_ref[1, ...].set_name("chemical_formula") -formula.run_tests( - """\ - # sodium chloride - NaCl - # hydrogen hydroxide - H₂O - # phenol - C₆H₅OH - # ethanol - C₂H₅OH - # decanol - C₁₀H₂₁OH - """, - full_dump=False, - post_parse=lambda _, tokens: - f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}", -) -formula.create_diagram("chemical_formulas.html") -print() +if __name__ == '__main__': + import contextlib + + with contextlib.suppress(Exception): + formula.create_diagram("chemical_formulas.html") + + formula.run_tests( + """\ + # sodium chloride + NaCl + # hydrogen hydroxide + H₂O + # phenol + C₆H₅OH + # ethanol + C₂H₅OH + # decanol + C₁₀H₂₁OH + """, + full_dump=False, + post_parse=lambda _, tokens: + f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}", + ) + + print() diff --git a/examples/complex_chemical_formulas.py b/examples/complex_chemical_formulas.py index 91ecaef5..3c470155 100644 --- a/examples/complex_chemical_formulas.py +++ b/examples/complex_chemical_formulas.py @@ -108,9 +108,8 @@ def element_ref_sum(s, l, t): # of one or more element_ref's formula = element_ref[1, ...].set_name("chemical_formula") -# create railroad diagram for this parser +# set names on unnamed expressions for better diagram output pp.autoname_elements() -formula.create_diagram("complex_chemical_formulas.html") def molecular_weight(c: Counter) -> float: @@ -123,29 +122,37 @@ def molecular_weight(c: Counter) -> float: """ return sum(table_of_elements[k] * v for k, v in c.items()) - -formula.run_tests( - """\ - NaCl - HOH - H₂O - H₂O₂ - C₆H₅OH - C₁₀H₂₁OH - (C₆H₅OH)₂ - 3(C₆H₅OH)₂ - C(OH)₆ - CH₃(CH₂)₂OH - (CH₃)₃CH - CH₃(CH₂)₅CH₃ - Ba(BrO₃)₂·H₂O - Ba(BrO₃)₂·2(H₂O) - """, - full_dump=False, - post_parse=( - lambda _, tokens: - f"Molecular counts/weight: {dict(tokens[0])}" - f", {molecular_weight(tokens[0]):.3f}" - ), -) -print() +if __name__ == '__main__': + import contextlib + + # create railroad diagram for this parser + with contextlib.suppress(Exception): + formula.create_diagram( + "complex_chemical_formulas_diagram.html", vertical=2, show_groups=True + ) + + formula.run_tests( + """\ + NaCl + HOH + H₂O + H₂O₂ + C₆H₅OH + C₁₀H₂₁OH + (C₆H₅OH)₂ + 3(C₆H₅OH)₂ + C(OH)₆ + CH₃(CH₂)₂OH + (CH₃)₃CH + CH₃(CH₂)₅CH₃ + Ba(BrO₃)₂·H₂O + Ba(BrO₃)₂·2(H₂O) + """, + full_dump=False, + post_parse=( + lambda _, tokens: + f"Molecular counts/weight: {dict(tokens[0])}" + f", {molecular_weight(tokens[0]):.3f}" + ), + ) + print() diff --git a/examples/complex_chemical_formulas_diagram.html b/examples/complex_chemical_formulas_diagram.html new file mode 100644 index 00000000..939bcb6a --- /dev/null +++ b/examples/complex_chemical_formulas_diagram.html @@ -0,0 +1,538 @@ + + + + + + + + + + + + + + + +
+

chemical_formula

+
+
+ + + + + +element_expressionelement_expression + +
+
+ +
+

element_expression

+
+
+ + + + + +[Suppress:(= | ·)] operations[Suppress:(= | ·)] operations +
+
+ +
+

[Suppress:(= | ·)] operations

+
+
+ + + + + + + + +integer operationsinteger operations + + +optional_separatoroptional_separator +integer operationsinteger operations + +integer operationsinteger operations +
+
+ +
+

integer operations

+
+
+ + + + + + + + + + +integerinteger +integer operationsinteger operations +subscript operationssubscript operations +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

subscript operations

+
+
+ + + + + + + + + +elementelement +nested_elementnested_element + +subscriptsubscript + +elementelement +nested_elementnested_element +
+
+ +
+

nested_element

+
+
+ + + + + + +'(' +[suppress] +element_expressionelement_expression + +')' +[suppress] +
+
+ +
+

subscript

+
+
+ + + + +W:(₀-₉) +
+
+ +
+

element

+
+
+ + + + +He|Ho|Hf|Hg|Hs|H|Li|Be|Br|Ba|Bi|Bk|Bh|B|Cl|Ca|Cr|Co|Cu|Cd|Cs|Ce|Cm|Cf|Cn|C|Ne|Na|Ni|Nb|Nd|Np|No|Nh|N|Os|Og|O|Fe|Fr|Fm|Fl|F|Mg|Al|Si|Pd|Pr|Pm|Pt|Pb|Po|Pa|Pu|P|Sc|Se|Sr|Sn|Sb|Sm|Sg|S|Ar|Kr|K|Ti|V|Mn|Zn|Ga|Ge|As|Rb|Yb|Y|Zr|Mo|Tc|Ru|Rh|Ag|In|Te|Ir|I|Xe|La|Eu|Gd|Tb|Dy|Er|Tm|Lu|Ta|W|Re|Au|Tl|At|Rn|Ra|Ac|Th|U|Am|Es|Md|Lr|Rf|Db|Mt|Ds|Rg|Mc|Lv|Ts +
+
+ +
+

optional_separator

+
+
+ + + + + + + += | ·= | · +[suppress] +
+
+ +
+

= | ·

+
+
+ + + + +[=·] +
+
+ + + + diff --git a/examples/decaf_parser.py b/examples/decaf_parser.py index e60b69a4..46a1347a 100644 --- a/examples/decaf_parser.py +++ b/examples/decaf_parser.py @@ -47,17 +47,19 @@ pp.ParserElement.enable_packrat() # keywords -_keywords = ( +keywords_ = ( VOID, INT, DOUBLE, BOOL, STRING, CLASS, INTERFACE, NULL, THIS, EXTENDS, IMPLEMENTS, FOR, WHILE, IF, ELSE, RETURN, BREAK, NEW, NEWARRAY, PRINT, READINTEGER, READLINE, TRUE, FALSE, -) = pp.Keyword.using_each( - """ - void int double bool string class interface null this extends implements or while - if else return break new NewArray Print ReadInteger ReadLine true false - """.split(), +) = list( + pp.Keyword.using_each( + """ + void int double bool string class interface null this extends implements or while + if else return break new NewArray Print ReadInteger ReadLine true false + """.split(), + ) ) -keywords = pp.MatchFirst(_keywords) +keywords = pp.MatchFirst(keywords_).set_name("any_keyword") ( LPAR, RPAR, LBRACE, RBRACE, LBRACK, RBRACK, DOT, EQ, COMMA, SEMI @@ -100,7 +102,7 @@ new_array = pp.Group(NEWARRAY + LPAR + expr + COMMA + type_ + RPAR) rvalue = constant | call | read_integer | read_line | new_statement | new_array | ident arith_expr = pp.infix_notation( - rvalue, + rvalue.set_name("rvalue"), [ ("-", 1, pp.OpAssoc.RIGHT,), (pp.one_of("* / %"), 2, pp.OpAssoc.LEFT,), @@ -108,7 +110,7 @@ ], ) comparison_expr = pp.infix_notation( - arith_expr, + arith_expr.set_name("arith_expr"), [ ("!", 1, pp.OpAssoc.RIGHT,), (pp.one_of("< > <= >="), 2, pp.OpAssoc.LEFT,), @@ -215,31 +217,42 @@ program = pp.Group(decl)[1, ...] decaf_parser = program -stmt.runTests("""\ - sin(30); - a = 1; - b = 1 + 1; - b = 1 != 2 && false; - print("A"); - a.b = 100; - a.b = 100.0; - a[100] = b; - a[0][0] = 2; - a = 0x1234; -""" -) +pp.autoname_elements() + +if __name__ == '__main__': + import contextlib + + # create railroad diagram for this parser + with contextlib.suppress(Exception): + program.create_diagram( + "decaf_parser_diagram.html", vertical=2, show_groups=True + ) -test_program = """ - void getenv(string var); - int main(string[] args) { - if (a > 100) { - Print(a, " is too big"); - } else if (a < 100) { - Print(a, " is too small"); - } else { - Print(a, "just right!"); + stmt.runTests("""\ + sin(30); + a = 1; + b = 1 + 1; + b = 1 != 2 && false; + print("A"); + a.b = 100; + a.b = 100.0; + a[100] = b; + a[0][0] = 2; + a = 0x1234; + """ + ) + + test_program = """ + void getenv(string var); + int main(string[] args) { + if (a > 100) { + Print(a, " is too big"); + } else if (a < 100) { + Print(a, " is too small"); + } else { + Print(a, "just right!"); + } } - } -""" + """ -print(decaf_parser.parse_string(test_program).dump()) + print(decaf_parser.parse_string(test_program).dump()) diff --git a/examples/decaf_parser_diagram.html b/examples/decaf_parser_diagram.html new file mode 100644 index 00000000..f1802c14 --- /dev/null +++ b/examples/decaf_parser_diagram.html @@ -0,0 +1,4263 @@ + + + + + + + + + + + + + + + +
+

program

+
+
+ + + + + + +decldecl + +
+
+ +
+

decl

+
+
+ + + + + +variable_declvariable_decl +function_declfunction_decl +class_declclass_decl +interface_declinterface_decl +prototypeprototype +
+
+ +
+

variable_decl

+
+
+ + + + + +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier +SEMISEMI +
+
+ +
+

type_

+
+
+ + + + + + + +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +identident + + + +'[]' + +
+
+ +
+

ident

+
+
+ + + + + + +any_keywordany_keyword +[NOT] +identifieridentifier +
+
+ +
+

function_decl

+
+
+ + + + + + + + + + +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +identident + + + +'[]' + +'void' + + +any_keywordany_keyword +[NOT] +identifieridentifier +LPARLPAR + + + + +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + + + + + +',' +[suppress] +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + +RPARRPAR + + +LBRACELBRACE + + + +variable_declvariable_decl + + + + +stmtstmt + +RBRACERBRACE +
+
+ +
+

stmt

+
+
+ + + + + + +if_stmtif_stmt +while_stmtwhile_stmt +for_stmtfor_stmt +break_stmtbreak_stmt +return_stmtreturn_stmt +print_stmtprint_stmt +stmt_blockstmt_block + + +exprexpr +SEMISEMI +
+
+ +
+

if_stmt

+
+
+ + + + + + +'if' +LPARLPAR + +exprexpr +RPARRPAR + +stmtstmt + + + + +ELSEELSE +stmtstmt +
+
+ +
+

expr

+
+
+ + + + + + +assignmentassignment +callcall +THISTHIS +arith_expr_expressionarith_expr_expression +arith_exprarith_expr +lvaluelvalue +real numberreal number +TRUETRUE +FALSEFALSE +hex_constanthex_constant +integerinteger +string enclosed in double quotesstring enclosed in double quotes +NULLNULL +read_integerread_integer +read_lineread_line +new_statementnew_statement +new_arraynew_array +
+
+ +
+

assignment

+
+
+ + + + + + + + + + +any_keywordany_keyword +[NOT] +identifieridentifier + + +LPARLPAR +exprexpr +RPARRPAR + + + + +DOTDOT + +identident +expr_parensexpr_parens + + + + + +LBRACKLBRACK +exprexpr +RBRACKRBRACK + +EQEQ +exprexpr +
+
+ +
+

expr_parens

+
+
+ + + + + + +LPARLPAR +exprexpr +RPARRPAR +
+
+ +
+

EQ

+
+
+ + + + + +'=' +[suppress] +
+
+ +
+

call

+
+
+ + + + + + + + + +any_keywordany_keyword +[NOT] +identifieridentifier +LPARLPAR + + + + +exprexpr + + + + + +',' +[suppress] +exprexpr + +RPARRPAR + + + + +LPARLPAR +exprexpr +RPARRPAR + + + + +DOTDOT + +any_keywordany_keyword +[NOT] +identifieridentifier + +LPARLPAR + + + + +exprexpr + + + + + +',' +[suppress] +exprexpr + +RPARRPAR +
+
+ +
+

arith_expr_expression

+
+
+ + + + + +|| operations|| operations +
+
+ +
+

|| operations

+
+
+ + + + + + + + +&& operations&& operations + + +|||| +&& operations&& operations + +&& operations&& operations +
+
+ +
+

&& operations

+
+
+ + + + + + + + +== | != operations== | != operations + + +&&&& +== | != operations== | != operations + +== | != operations== | != operations +
+
+ +
+

== | != operations

+
+
+ + + + + + + + +<= | < | >= | > operations<= | < | >= | > operations + + +== | !=== | != +<= | < | >= | > operations<= | < | >= | > operations + +<= | < | >= | > operations<= | < | >= | > operations +
+
+ +
+

<= | < | >= | > operations

+
+
+ + + + + + + + +'!' operations'!' operations + + +<= | < | >= | ><= | < | >= | > +'!' operations'!' operations + +'!' operations'!' operations +
+
+ +
+

'!' operations

+
+
+ + + + + + + + + + +'!' +'!' operations'!' operations +arith_exprarith_expr +nested_arith_exprnested_arith_expr +
+
+ +
+

arith_expr

+
+
+ + + + + ++ | - operations+ | - operations +
+
+ +
+

+ | - operations

+
+
+ + + + + + + + +* | / | % operations* | / | % operations + + ++ | -+ | - +* | / | % operations* | / | % operations + +* | / | % operations* | / | % operations +
+
+ +
+

* | / | % operations

+
+
+ + + + + + + + +'-' operations'-' operations + + +* | / | %* | / | % +'-' operations'-' operations + +'-' operations'-' operations +
+
+ +
+

'-' operations

+
+
+ + + + + + + + + + +'-' +'-' operations'-' operations +real numberreal number +TRUETRUE +FALSEFALSE +hex_constanthex_constant +integerinteger +string enclosed in double quotesstring enclosed in double quotes +NULLNULL +callcall +read_integerread_integer +read_lineread_line +new_statementnew_statement +new_arraynew_array +identident +nested_rvaluenested_rvalue +
+
+ +
+

string enclosed in double quotes

+
+
+ + + + + + +"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))* +'"' +[combine] +
+
+ +
+

read_integer

+
+
+ + + + + + +READINTEGERREADINTEGER +LPARLPAR +RPARRPAR +
+
+ +
+

read_line

+
+
+ + + + + + +READLINEREADLINE +LPARLPAR +RPARRPAR +
+
+ +
+

new_statement

+
+
+ + + + + + +NEWNEW + +any_keywordany_keyword +[NOT] +identifieridentifier +
+
+ +
+

new_array

+
+
+ + + + + + +NEWARRAYNEWARRAY +LPARLPAR +exprexpr +COMMACOMMA +type_type_ +RPARRPAR +
+
+ +
+

COMMA

+
+
+ + + + + +',' +[suppress] +
+
+ +
+

nested_rvalue

+
+
+ + + + + + +'(' +[suppress] +arith_exprarith_expr + +')' +[suppress] +
+
+ +
+

* | / | %

+
+
+ + + + +[*/%] +
+
+ +
+

+ | -

+
+
+ + + + +[+\-] +
+
+ +
+

nested_arith_expr

+
+
+ + + + + + +'(' +[suppress] +arith_expr_expressionarith_expr_expression + +')' +[suppress] +
+
+ +
+

<= | < | >= | >

+
+
+ + + + +<=|<|>=|> +
+
+ +
+

== | !=

+
+
+ + + + +==|!= +
+
+ +
+

&&

+
+
+ + + + +\&\& +
+
+ +
+

||

+
+
+ + + + +\|\| +
+
+ +
+

lvalue

+
+
+ + + + + + +identident +expr_parensexpr_parens + + + + +DOTDOT + +identident +expr_parensexpr_parens + + + + + +LBRACKLBRACK +exprexpr +RBRACKRBRACK + +
+
+ +
+

DOT

+
+
+ + + + + +'.' +[suppress] +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

real number

+
+
+ + + + +[+-]?(?:\d+\.\d*|\.\d+) +
+
+ +
+

hex_constant

+
+
+ + + + +0[xX][0-9a-fA-F]+ +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

while_stmt

+
+
+ + + + + + +'while' +LPARLPAR +exprexpr +RPARRPAR +stmtstmt +
+
+ +
+

for_stmt

+
+
+ + + + + + +'or' +LPARLPAR + + +exprexpr +SEMISEMI +exprexpr +SEMISEMI + + +exprexpr +RPARRPAR +stmtstmt +
+
+ +
+

break_stmt

+
+
+ + + + + + +'break' +SEMISEMI +
+
+ +
+

return_stmt

+
+
+ + + + + + +'return' +exprexpr +SEMISEMI +
+
+ +
+

print_stmt

+
+
+ + + + + + +'Print' +LPARLPAR + + + + + +exprexpr + + + + + +',' +[suppress] +exprexpr + +RPARRPAR +SEMISEMI +
+
+ +
+

stmt_block

+
+
+ + + + + + +LBRACELBRACE + + + +variable_declvariable_decl + + + + +stmtstmt + +RBRACERBRACE +
+
+ +
+

class_decl

+
+
+ + + + + + +CLASSCLASS + + +any_keywordany_keyword +[NOT] +identifieridentifier + + + +EXTENDSEXTENDS + +any_keywordany_keyword +[NOT] +identifieridentifier + + + +IMPLEMENTSIMPLEMENTS + + + +any_keywordany_keyword +[NOT] +identifieridentifier + + + + + +',' +[suppress] + +any_keywordany_keyword +[NOT] +identifieridentifier + +LBRACELBRACE + + + +fieldfield + +RBRACERBRACE +
+
+ +
+

field

+
+
+ + + + + +variable_declvariable_decl +function_declfunction_decl +
+
+ +
+

interface_decl

+
+
+ + + + + + +INTERFACEINTERFACE + + +any_keywordany_keyword +[NOT] +identifieridentifier +LBRACELBRACE + + + +prototypeprototype + +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

prototype

+
+
+ + + + + + + + + + +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +identident + + + +'[]' + +'void' + + +any_keywordany_keyword +[NOT] +identifieridentifier +LPARLPAR + + + + +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + + + + + +',' +[suppress] +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + +RPARRPAR +SEMISEMI +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

any_keyword

+
+
+ + + + + +VOIDVOID +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +CLASSCLASS +INTERFACEINTERFACE +NULLNULL +THISTHIS +EXTENDSEXTENDS +IMPLEMENTSIMPLEMENTS +FORFOR +WHILEWHILE +IFIF +ELSEELSE +RETURNRETURN +BREAKBREAK +NEWNEW +NEWARRAYNEWARRAY +PRINTPRINT +READINTEGERREADINTEGER +READLINEREADLINE +TRUETRUE +FALSEFALSE +
+
+ +
+

VOID

+
+
+ + + + +'void' +
+
+ +
+

INT

+
+
+ + + + +'int' +
+
+ +
+

DOUBLE

+
+
+ + + + +'double' +
+
+ +
+

BOOL

+
+
+ + + + +'bool' +
+
+ +
+

STRING

+
+
+ + + + +'string' +
+
+ +
+

CLASS

+
+
+ + + + +'class' +
+
+ +
+

INTERFACE

+
+
+ + + + +'interface' +
+
+ +
+

NULL

+
+
+ + + + +'null' +
+
+ +
+

THIS

+
+
+ + + + +'this' +
+
+ +
+

EXTENDS

+
+
+ + + + +'extends' +
+
+ +
+

IMPLEMENTS

+
+
+ + + + +'implements' +
+
+ +
+

FOR

+
+
+ + + + +'or' +
+
+ +
+

WHILE

+
+
+ + + + +'while' +
+
+ +
+

IF

+
+
+ + + + +'if' +
+
+ +
+

ELSE

+
+
+ + + + +'else' +
+
+ +
+

RETURN

+
+
+ + + + +'return' +
+
+ +
+

BREAK

+
+
+ + + + +'break' +
+
+ +
+

NEW

+
+
+ + + + +'new' +
+
+ +
+

NEWARRAY

+
+
+ + + + +'NewArray' +
+
+ +
+

PRINT

+
+
+ + + + +'Print' +
+
+ +
+

READINTEGER

+
+
+ + + + +'ReadInteger' +
+
+ +
+

READLINE

+
+
+ + + + +'ReadLine' +
+
+ +
+

TRUE

+
+
+ + + + +'true' +
+
+ +
+

FALSE

+
+
+ + + + +'false' +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ + + + diff --git a/examples/delta_time.py b/examples/delta_time.py index 7bc29278..4bb3c0fa 100644 --- a/examples/delta_time.py +++ b/examples/delta_time.py @@ -359,10 +359,7 @@ def _remove_temp_keys(t: pp.ParseResults) -> None: time_expression = time_and_day -_GENERATE_DIAGRAM = False -if _GENERATE_DIAGRAM: - pp.autoname_elements() - time_expression.create_diagram("delta_time.html") +pp.autoname_elements() def demo(): @@ -590,4 +587,9 @@ def main() -> int: if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + time_expression.create_diagram("delta_time_diagram.html", vertical=3, show_results_names=True, show_groups=True) + exit(main()) diff --git a/examples/delta_time_diagram.html b/examples/delta_time_diagram.html new file mode 100644 index 00000000..16dbbe38 --- /dev/null +++ b/examples/delta_time_diagram.html @@ -0,0 +1,1910 @@ + + + + + + + + + + + + + + + +
+

time and day

+
+
+ + + + + + +time referencetime reference +time_ref_presenttime_ref_present + + + + + +on_on_ +day referenceday reference + +day referenceday reference + + + + + +at_at_ +time of daytime of day +time_ref_presenttime_ref_present +
+
+ +
+

time reference

+
+
+ + + + + +time of daytime of day +relative timerelative time +
+
+ +
+

time of day

+
+
+ + + + + +noonnoon +midnightmidnight +nownow +0000 time0000 time +timespectimespec +
+
+ +
+

noon

+
+
+ + + + +'noon' +
+
+ +
+

midnight

+
+
+ + + + +'midnight' +
+
+ +
+

0000 time

+
+
+ + + + + + +numbered_time_unitsnumbered_time_units +[NOT] +HHMMHHMM +
+
+ +
+

numbered_time_units

+
+
+ + + + + +W:(0-9) +any_time_unitsany_time_units +
+
+ +
+

any_time_units

+
+
+ + + + + +'week' +'weeks' +'day' +'days' +'hour' +'hours' +'minute' +'minutes' +'second' +'seconds' +
+
+ +
+

HHMM

+
+
+ + + + +\b([01]\d|2[0-3])([0-5]\d)\b +
+
+ +
+

timespec

+
+
+ + + + + + +numericnumeric +'HH' + + + +o_clocko_clock + +COLONCOLON + +numericnumeric +'MM' + + + +COLONCOLON + +numericnumeric +'SS' + + +'AM' +'PM' +'ampm' +
+
+ +
+

numeric

+
+
+ + + + + +integerinteger +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +'ten' +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' +'twenty' +'twenty-one' +'twenty-two' +'twenty-three' +'twenty-four' +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

o_clock

+
+
+ + + + +"o'clock" +
+
+ +
+

COLON

+
+
+ + + + + +':' +[suppress] +
+
+ +
+

relative time

+
+
+ + + + + + + +qtyqty +'qty' + +time unittime unit +'units' + + +'ago' +'dir' + + + +'from' +'before' +'after' +'dir' + + +time of daytime of day +'ref_time' + + +'in' +'dir' + +qtyqty +'qty' + +time unittime unit +'units' +
+
+ +
+

qty

+
+
+ + + + + +qty_expressionqty_expression +
+
+ +
+

qty_expression

+
+
+ + + + + + + +adverb_adverb_ + +integerinteger +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +'ten' +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' +'twenty' +'twenty-one' +'twenty-two' +'twenty-three' +'twenty-four' +couplecouple +a_qtya_qty +the_the_ +
+
+ +
+

adverb_

+
+
+ + + + + + +'just' +'only' +'exactly' +[suppress] +
+
+ +
+

couple

+
+
+ + + + + + + +'a' +'couple' + + +'of' +
+
+ +
+

a_qty

+
+
+ + + + + +a_a_ +an_an_ +
+
+ +
+

a_

+
+
+ + + + +'a' +
+
+ +
+

an_

+
+
+ + + + +'an' +
+
+ +
+

the_

+
+
+ + + + +'the' +
+
+ +
+

time unit

+
+
+ + + + + +'hour' +'hours' +'minute' +'minutes' +'second' +'seconds' +
+
+ +
+

on_

+
+
+ + + + +'on' +
+
+ +
+

day reference

+
+
+ + + + + +relative dayrelative day +absolute dayabsolute day +
+
+ +
+

relative day

+
+
+ + + + + + + +'in' +'dir' + +qtyqty +'qty' + + +'day' +'days' +'week' +'weeks' +'units' + + +qtyqty +'qty' + + +'day' +'days' +'week' +'weeks' +'units' + + +'ago' +'dir' + + + +'from' +'before' +'after' +'dir' + +absolute dayabsolute day +'ref_day' +
+
+ +
+

absolute day

+
+
+ + + + + +'today' +'tomorrow' +'yesterday' + +'now' +Tag:time_ref_present=True + + + + + +next_next_ +last_last_ +'dir' + +weekday_nameweekday_name +'day_name' +
+
+ +
+

weekday_name

+
+
+ + + + + +'Monday' +'Tuesday' +'Wednesday' +'Thursday' +'Friday' +'Saturday' +'Sunday' +
+
+ +
+

today

+
+
+ + + + +'today' +
+
+ +
+

tomorrow

+
+
+ + + + +'tomorrow' +
+
+ +
+

yesterday

+
+
+ + + + +'yesterday' +
+
+ +
+

now

+
+
+ + + + +'now' +
+
+ +
+

weekday_reference

+
+
+ + + + + + + + + +next_next_ +last_last_ +'dir' + +weekday_nameweekday_name +'day_name' +
+
+ +
+

next_

+
+
+ + + + +'next' +
+
+ +
+

last_

+
+
+ + + + +'last' +
+
+ +
+

at_

+
+
+ + + + +'at' +
+
+ +
+

time_ref_present

+
+
+ + + + +Tag:time_ref_present=True +
+
+ + + + diff --git a/examples/directx_x_file_parser.html b/examples/directx_x_file_parser.html new file mode 100644 index 00000000..047584a1 --- /dev/null +++ b/examples/directx_x_file_parser.html @@ -0,0 +1,270 @@ + + + + + + + + + + + + + + + +
+

template_defn

+
+
+ + + + + +'template' + +identifieridentifier +'name' + +'{' +[suppress] + + + +<[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}> +'uuid' + + + + + + + + + + + +'array' + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'element_type' +type +'type' + +identifieridentifier +'name' + + + + + + +'[' +[suppress] + +W:(1-9, 0-9) +identifieridentifier + +']' +[suppress] + +'dims' + + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'type' + +identifieridentifier +'name' + +';' +[suppress] + +'members' + + + + + + +'[' +[suppress] +'...' + +']' +[suppress] +[combine] +'open_template' + + + + + +'[' +[suppress] + + + + + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'type' + + + +<[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}> +'uuid' + + + + + +',' +[suppress] + + + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'type' + + + +<[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}> +'uuid' + + +']' +[suppress] +'restrictions' + +'}' +[suppress] +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Za-z, 0-9A-Z_a-z) +
+
+ + + + diff --git a/examples/directx_x_file_parser.py b/examples/directx_x_file_parser.py index 2208f7a2..65364793 100644 --- a/examples/directx_x_file_parser.py +++ b/examples/directx_x_file_parser.py @@ -89,6 +89,8 @@ def make_template_parser(template_defn: pp.ParseResults) -> pp.ParserElement: expr = pp.Group(pp.DelimitedList(expr, max=dim) + SEMI) member_parsers.append(expr(member.name)) + pp.autoname_elements() + return ( pp.Keyword(template_defn.name)("type") + ident("name") @@ -99,6 +101,14 @@ def make_template_parser(template_defn: pp.ParseResults) -> pp.ParserElement: if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + # create railroad diagram + directx_template_defn.create_diagram( + "directx_x_file_parser.html", show_results_names=True, show_groups=False + ) + sample = """ some stuff... @@ -155,12 +165,6 @@ def make_template_parser(template_defn: pp.ParseResults) -> pp.ParserElement: ) # print() - # create railroad diagram - pp.autoname_elements() - directx_template_defn.create_diagram( - "directx_x_file_parser.html", show_results_names=True, show_groups=False - ) - vector_template = directx_template_defn.parse_string( """\ template Vector { @@ -173,9 +177,12 @@ def make_template_parser(template_defn: pp.ParseResults) -> pp.ParserElement: """ ) vector_parser = make_template_parser(vector_template) - vector_parser.create_diagram( - "directx_x_vector_parser.html", show_results_names=True, show_groups=False - ) + + with contextlib.suppress(Exception): + vector_parser.create_diagram( + "directx_x_vector_parser.html", show_results_names=True, show_groups=False + ) + v = vector_parser.parse_string('Vector p1 {"datum_A"; 1.0; 3.0; 5.0;}') print(v.dump()) diff --git a/examples/lox_parser.py b/examples/lox_parser.py index a9fdcd2b..d5050d13 100644 --- a/examples/lox_parser.py +++ b/examples/lox_parser.py @@ -230,5 +230,9 @@ class Circle { if __name__ == '__main__': - program.create_diagram("lox_program_parser.html", vertical=3) + import contextlib + + with contextlib.suppress(Exception): + program.create_diagram("lox_parser_diagram.html", vertical=2, show_groups=True) + main() diff --git a/examples/lox_parser_diagram.html b/examples/lox_parser_diagram.html new file mode 100644 index 00000000..de0c8778 --- /dev/null +++ b/examples/lox_parser_diagram.html @@ -0,0 +1,2872 @@ + + + + + + + + + + + + + + + +
+

program

+
+
+ + + + + + + +declarationdeclaration + +
+
+ +
+

declaration

+
+
+ + + + + + +class_declclass_decl +fun_declfun_decl +var_declvar_decl +statementstatement +
+
+ +
+

class_decl

+
+
+ + + + + + +CLASSCLASS +identifieridentifier + + + +'<' +identifieridentifier +LBRACELBRACE + + + + +functionfunction +property_property_ +class_declclass_decl + +RBRACERBRACE +
+
+ +
+

CLASS

+
+
+ + + + +'class' +
+
+ +
+

function

+
+
+ + + + + +identifieridentifier +LPARLPAR + + +parametersparameters +RPARRPAR +blockblock +
+
+ +
+

parameters

+
+
+ + + + + + +identifieridentifier + + + + + +',' +[suppress] +identifieridentifier + +
+
+ +
+

block

+
+
+ + + + + + + +LBRACELBRACE + + + +declarationdeclaration + +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

property_

+
+
+ + + + + +identifieridentifier +blockblock +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ +
+

fun_decl

+
+
+ + + + + +FUNFUN +identifieridentifier +LPARLPAR + + +parametersparameters +RPARRPAR +blockblock +
+
+ +
+

FUN

+
+
+ + + + +'fun' +
+
+ +
+

var_decl

+
+
+ + + + + +VARVAR +identifieridentifier + + + +EQEQ +expressionexpression +SEMISEMI +
+
+ +
+

VAR

+
+
+ + + + +'var' +
+
+ +
+

expression

+
+
+ + + + + + +assignmentassignment +arith_operand_expressionarith_operand_expression +functionfunction +
+
+ +
+

assignment

+
+
+ + + + + + + +callcall +identifieridentifier +EQEQ + +assignmentassignment +arith_operand_expressionarith_operand_expression +
+
+ +
+

call

+
+
+ + + + + +primaryprimary + + + +LPARLPAR + + +argumentsarguments +RPARRPAR + +'.' +identifieridentifier + +
+
+ +
+

primary

+
+
+ + + + + +TRUETRUE +FALSEFALSE +NILNIL +THISTHIS +numbernumber +stringstring +identifieridentifier + +SUPERSUPER +'.' +identifieridentifier +
+
+ +
+

arguments

+
+
+ + + + + + +expressionexpression + + + + + +',' +[suppress] +expressionexpression + +
+
+ +
+

EQ

+
+
+ + + + + +'=' +[suppress] +
+
+ +
+

arith_operand_expression

+
+
+ + + + + +'or' operations'or' operations +
+
+ +
+

'or' operations

+
+
+ + + + + + + + +'and' operations'and' operations + + +OROR +'and' operations'and' operations + +'and' operations'and' operations +
+
+ +
+

'and' operations

+
+
+ + + + + + + + +!= | == operations!= | == operations + + +ANDAND +!= | == operations!= | == operations + +!= | == operations!= | == operations +
+
+ +
+

!= | == operations

+
+
+ + + + + + + + +>= | > | <= | < operations>= | > | <= | < operations + + +!= | ==!= | == +>= | > | <= | < operations>= | > | <= | < operations + +>= | > | <= | < operations>= | > | <= | < operations +
+
+ +
+

>= | > | <= | < operations

+
+
+ + + + + + + + +- | + operations- | + operations + + +>= | > | <= | <>= | > | <= | < +- | + operations- | + operations + +- | + operations- | + operations +
+
+ +
+

- | + operations

+
+
+ + + + + + + + +/ | * operations/ | * operations + + +- | +- | + +/ | * operations/ | * operations + +/ | * operations/ | * operations +
+
+ +
+

/ | * operations

+
+
+ + + + + + + + +! | - operations! | - operations + + +/ | */ | * +! | - operations! | - operations + +! | - operations! | - operations +
+
+ +
+

! | - operations

+
+
+ + + + + + + + + + +! | -! | - +! | - operations! | - operations +callcall +TRUETRUE +FALSEFALSE +NILNIL +THISTHIS +numbernumber +stringstring +identifieridentifier + +SUPERSUPER +'.' +identifieridentifier +nested_arith_operandnested_arith_operand +
+
+ +
+

! | -

+
+
+ + + + +[!\-] +
+
+ +
+

TRUE

+
+
+ + + + +'true' +
+
+ +
+

FALSE

+
+
+ + + + +'false' +
+
+ +
+

NIL

+
+
+ + + + +'nil' +
+
+ +
+

THIS

+
+
+ + + + +'this' +
+
+ +
+

number

+
+
+ + + + +\d+(?:\.\d+)? +
+
+ +
+

string

+
+
+ + + + +string enclosed in '"' +
+
+ +
+

SUPER

+
+
+ + + + +'super' +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Z_a-z, '0-9A-Z_a-z) +
+
+ +
+

nested_arith_operand

+
+
+ + + + + + +'(' +[suppress] +arith_operand_expressionarith_operand_expression + +')' +[suppress] +
+
+ +
+

/ | *

+
+
+ + + + +[/*] +
+
+ +
+

- | +

+
+
+ + + + +[\-+] +
+
+ +
+

>= | > | <= | <

+
+
+ + + + +>=|>|<=|< +
+
+ +
+

!= | ==

+
+
+ + + + +!=|== +
+
+ +
+

AND

+
+
+ + + + +'and' +
+
+ +
+

OR

+
+
+ + + + +'or' +
+
+ +
+

statement

+
+
+ + + + + + + +expr_statementexpr_statement +for_statementfor_statement +if_statementif_statement +print_statementprint_statement +return_statementreturn_statement +while_statementwhile_statement +blockblock +
+
+ +
+

expr_statement

+
+
+ + + + + +expressionexpression +';' +
+
+ +
+

for_statement

+
+
+ + + + + +FORFOR +LPARLPAR + + + +var_declvar_decl +expr_statementexpr_statement +';' + + +expressionexpression +';' + + +expressionexpression +RPARRPAR +statementstatement +
+
+ +
+

FOR

+
+
+ + + + +'for' +
+
+ +
+

if_statement

+
+
+ + + + + +IFIF +LPARLPAR +expressionexpression +RPARRPAR +statementstatement + + + +ELSEELSE +statementstatement +
+
+ +
+

IF

+
+
+ + + + +'if' +
+
+ +
+

ELSE

+
+
+ + + + +'else' +
+
+ +
+

print_statement

+
+
+ + + + + +PRINTPRINT +expressionexpression +SEMISEMI +
+
+ +
+

PRINT

+
+
+ + + + +'print' +
+
+ +
+

return_statement

+
+
+ + + + + +RETURNRETURN + + +expressionexpression +SEMISEMI +
+
+ +
+

RETURN

+
+
+ + + + +'return' +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

while_statement

+
+
+ + + + + +WHILEWHILE +LPARLPAR +expressionexpression +RPARRPAR +statementstatement +
+
+ +
+

WHILE

+
+
+ + + + +'while' +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ + + + diff --git a/examples/lua_parser.py b/examples/lua_parser.py index 792243c7..ec64fcc3 100644 --- a/examples/lua_parser.py +++ b/examples/lua_parser.py @@ -264,6 +264,13 @@ lua_script.ignore(lua_comment) if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + lua_script.create_diagram( + "lua_parser_diagram.html", vertical=2, show_groups=True + ) + sample = r""" function test(x) local t = {foo=1, bar=2, arg=x} diff --git a/examples/lua_parser_diagram.html b/examples/lua_parser_diagram.html new file mode 100644 index 00000000..7ebe0d93 --- /dev/null +++ b/examples/lua_parser_diagram.html @@ -0,0 +1,4363 @@ + + + + + + + + + + + + + + + +
+

script

+
+
+ + + + + + + +statstat + +
+
+ +
+

stat

+
+
+ + + + + + + +assignment_statassignment_stat +do_statdo_stat +while_statwhile_stat +repeat_statrepeat_stat +for_loop_statfor_loop_stat +for_seq_statfor_seq_stat +func_call_statfunc_call_stat +if_statif_stat +function_deffunction_def +
+
+ +
+

assignment_stat

+
+
+ + + + + + + +LOCALLOCAL +varlist1varlist1 +EQEQ +explist1explist1 +
+
+ +
+

varlist1

+
+
+ + + + + + +varvar + + + + + +',' +[suppress] +varvar + +
+
+ +
+

var

+
+
+ + + + + + + +var_partvar_part + + + + + +'.' +[suppress] +var_partvar_part + +
+
+ +
+

var_part

+
+
+ + + + + + + +var_atomvar_atom +index_refindex_ref +functioncallfunctioncall +namename +exp_groupexp_group +
+
+ +
+

var_atom

+
+
+ + + + + +functioncallfunctioncall +namename +exp_groupexp_group +
+
+ +
+

functioncall

+
+
+ + + + + + +prefixexpprefixexp + + + +COLONCOLON +namename + +argsargs +
+
+ +
+

prefixexp

+
+
+ + + + + +namename +exp_groupexp_group +
+
+ +
+

name

+
+
+ + + + + + + + +keywordkeyword +[NOT] +identifieridentifier + + + + +'.' + +keywordkeyword +[NOT] +identifieridentifier + +[combine] +
+
+ +
+

keyword

+
+
+ + + + + +'return' +'break' +'do' +'end' +'while' +'if' +'then' +'elseif' +'else' +'for' +'in' +'function' +'local' +'repeat' +'until' +'nil' +'false' +'true' +'and' +'or' +'not' +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

exp_group

+
+
+ + + + + + +LPARLPAR +expexp +RPARRPAR +
+
+ +
+

exp

+
+
+ + + + + +'or' operations'or' operations +
+
+ +
+

'or' operations

+
+
+ + + + + + + + +'and' operations'and' operations + + +OROR +'and' operations'and' operations + +'and' operations'and' operations +
+
+ +
+

'and' operations

+
+
+ + + + + + + + +<= | < | >= | > | ~= | == operations<= | < | >= | > | ~= | == operations + + +ANDAND +<= | < | >= | > | ~= | == operations<= | < | >= | > | ~= | == operations + +<= | < | >= | > | ~= | == operations<= | < | >= | > | ~= | == operations +
+
+ +
+

<= | < | >= | > | ~= | == operations

+
+
+ + + + + + + + +'|' operations'|' operations + + +<= | < | >= | > | ~= | ==<= | < | >= | > | ~= | == +'|' operations'|' operations + +'|' operations'|' operations +
+
+ +
+

'|' operations

+
+
+ + + + + + + + +'~' operations'~' operations + + +'|' +'~' operations'~' operations + +'~' operations'~' operations +
+
+ +
+

'~' operations

+
+
+ + + + + + + + +'&' operations'&' operations + + +'~' +'&' operations'&' operations + +'&' operations'&' operations +
+
+ +
+

'&' operations

+
+
+ + + + + + + + +<< | >> operations<< | >> operations + + +'&' +<< | >> operations<< | >> operations + +<< | >> operations<< | >> operations +
+
+ +
+

<< | >> operations

+
+
+ + + + + + + + +'..' operations'..' operations + + +<< | >><< | >> +'..' operations'..' operations + +'..' operations'..' operations +
+
+ +
+

'..' operations

+
+
+ + + + + + + + ++ | - operations+ | - operations + + +'..' ++ | - operations+ | - operations + ++ | - operations+ | - operations +
+
+ +
+

+ | - operations

+
+
+ + + + + + + + +* | // | / | % operations* | // | / | % operations + + ++ | -+ | - +* | // | / | % operations* | // | / | % operations + +* | // | / | % operations* | // | / | % operations +
+
+ +
+

* | // | / | % operations

+
+
+ + + + + + + + +not op operationsnot op operations + + +* | // | / | %* | // | / | % +not op operationsnot op operations + +not op operationsnot op operations +
+
+ +
+

not op operations

+
+
+ + + + + + + + + + +not opnot op +not op operationsnot op operations +'^' operations'^' operations +
+
+ +
+

not op

+
+
+ + + + + +NOTNOT +# | - | ~# | - | ~ +
+
+ +
+

NOT

+
+
+ + + + +'not' +
+
+ +
+

# | - | ~

+
+
+ + + + +[#\-~] +
+
+ +
+

'^' operations

+
+
+ + + + + + + + + +NILNIL +FALSEFALSE +TRUETRUE +real number with scientific notationreal number with scientific notation +real numberreal number +signed integersigned integer +string enclosed in "'" +string enclosed in '"' +multiline_stringmultiline_string +ELLIPSISELLIPSIS +functioncallfunctioncall +varvar +tableconstructortableconstructor +nested_exp_atomnested_exp_atom + + +'^' + +NILNIL +FALSEFALSE +TRUETRUE +real number with scientific notationreal number with scientific notation +real numberreal number +signed integersigned integer +string enclosed in "'" +string enclosed in '"' +multiline_stringmultiline_string +ELLIPSISELLIPSIS +functioncallfunctioncall +varvar +tableconstructortableconstructor +nested_exp_atomnested_exp_atom + +NILNIL +FALSEFALSE +TRUETRUE +real number with scientific notationreal number with scientific notation +real numberreal number +signed integersigned integer +string enclosed in "'" +string enclosed in '"' +multiline_stringmultiline_string +ELLIPSISELLIPSIS +functioncallfunctioncall +varvar +tableconstructortableconstructor +nested_exp_atomnested_exp_atom +
+
+ +
+

tableconstructor

+
+
+ + + + + + +LBRACELBRACE + + +field_listfield_list +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

field_list

+
+
+ + + + + + +fieldfield + + + + + +fieldsepfieldsep +[suppress] +fieldfield + + + + +fieldsepfieldsep +[suppress] +
+
+ +
+

field

+
+
+ + + + + + + +LBRACKLBRACK +expexp +RBRACKRBRACK +EQEQ + +expexp + +namename +EQEQ + +expexp +expexp +
+
+ +
+

fieldsep

+
+
+ + + + + +COMMACOMMA +SEMISEMI +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ +
+

nested_exp_atom

+
+
+ + + + + + +'(' +[suppress] +expexp + +')' +[suppress] +
+
+ +
+

NIL

+
+
+ + + + +'nil' +
+
+ +
+

FALSE

+
+
+ + + + +'false' +
+
+ +
+

TRUE

+
+
+ + + + +'true' +
+
+ +
+

real number with scientific notation

+
+
+ + + + +[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?) +
+
+ +
+

real number

+
+
+ + + + +[+-]?(?:\d+\.\d*|\.\d+) +
+
+ +
+

signed integer

+
+
+ + + + +[+-]?\d+ +
+
+ +
+

* | // | / | %

+
+
+ + + + +\*|//|/|% +
+
+ +
+

+ | -

+
+
+ + + + +[+\-] +
+
+ +
+

<< | >>

+
+
+ + + + +<<|>> +
+
+ +
+

<= | < | >= | > | ~= | ==

+
+
+ + + + +<=|<|>=|>|\~=|== +
+
+ +
+

AND

+
+
+ + + + +'and' +
+
+ +
+

OR

+
+
+ + + + +'or' +
+
+ +
+

args

+
+
+ + + + + + +LPARLPAR + + +explist1explist1 +RPARRPAR +tableconstructortableconstructor +string enclosed in "'" +string enclosed in '"' +multiline_stringmultiline_string +
+
+ +
+

explist1

+
+
+ + + + + + +expexp + + + + + +',' +[suppress] +expexp + +
+
+ +
+

multiline_string

+
+
+ + + + +quoted string, starting with [[ ending with ]] +
+
+ +
+

index_ref

+
+
+ + + + + + +LBRACKLBRACK +expexp +RBRACKRBRACK +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

do_stat

+
+
+ + + + + +DODO +blockblock +ENDEND +
+
+ +
+

block

+
+
+ + + + + + + + +statstat +OPT_SEMIOPT_SEMI + + + + +laststatlaststat +OPT_SEMIOPT_SEMI +
+
+ +
+

OPT_SEMI

+
+
+ + + + + + + +SEMISEMI +[suppress] +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

laststat

+
+
+ + + + + + + +RETURNRETURN +explist1explist1 +BREAKBREAK +
+
+ +
+

RETURN

+
+
+ + + + +'return' +
+
+ +
+

BREAK

+
+
+ + + + +'break' +
+
+ +
+

while_stat

+
+
+ + + + + +WHILEWHILE +expexp +blockblock +ENDEND +
+
+ +
+

WHILE

+
+
+ + + + +'while' +
+
+ +
+

repeat_stat

+
+
+ + + + + +REPEATREPEAT +blockblock +UNTILUNTIL +expexp +
+
+ +
+

REPEAT

+
+
+ + + + +'repeat' +
+
+ +
+

UNTIL

+
+
+ + + + +'until' +
+
+ +
+

for_loop_stat

+
+
+ + + + + +FORFOR +namename +EQEQ +expexp +COMMACOMMA +expexp + + + +COMMACOMMA +expexp +DODO +blockblock +ENDEND +
+
+ +
+

EQ

+
+
+ + + + +'=' +
+
+ +
+

for_seq_stat

+
+
+ + + + + +FORFOR +namelistnamelist +ININ +explist1explist1 +DODO +blockblock +ENDEND +
+
+ +
+

FOR

+
+
+ + + + +'for' +
+
+ +
+

namelist

+
+
+ + + + + + +namename + + + + + +',' +[suppress] +namename + +
+
+ +
+

IN

+
+
+ + + + +'in' +
+
+ +
+

DO

+
+
+ + + + +'do' +
+
+ +
+

func_call_stat

+
+
+ + + + + + + +LOCALLOCAL +functioncallfunctioncall +
+
+ +
+

if_stat

+
+
+ + + + + +IFIF +expexp +THENTHEN +blockblock + + + + + +ELSEIFELSEIF +expexp +THENTHEN +blockblock + + + + + +ELSEELSE +blockblock +ENDEND +
+
+ +
+

IF

+
+
+ + + + +'if' +
+
+ +
+

ELSEIF

+
+
+ + + + +'elseif' +
+
+ +
+

THEN

+
+
+ + + + +'then' +
+
+ +
+

ELSE

+
+
+ + + + +'else' +
+
+ +
+

function_def

+
+
+ + + + + + + +LOCALLOCAL +FUNCTIONFUNCTION +funcnamefuncname + + +LPARLPAR +parlistparlist +RPARRPAR +blockblock +ENDEND +
+
+ +
+

LOCAL

+
+
+ + + + +'local' +
+
+ +
+

FUNCTION

+
+
+ + + + +'function' +
+
+ +
+

funcname

+
+
+ + + + + + + +namename +COLONCOLON +namename +namename +
+
+ +
+

COLON

+
+
+ + + + + +':' +[suppress] +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

parlist

+
+
+ + + + + + +namelistnamelist + + + +COMMACOMMA +ELLIPSISELLIPSIS +ELLIPSISELLIPSIS +
+
+ +
+

COMMA

+
+
+ + + + + +',' +[suppress] +
+
+ +
+

ELLIPSIS

+
+
+ + + + +'...' +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

END

+
+
+ + + + +'end' +
+
+ + + + diff --git a/examples/lucene_grammar.py b/examples/lucene_grammar.py index e79a3aa4..91310a41 100644 --- a/examples/lucene_grammar.py +++ b/examples/lucene_grammar.py @@ -18,7 +18,7 @@ and_, or_, not_, to_ = pp.CaselessKeyword.using_each("AND OR NOT TO".split()) keyword = and_ | or_ | not_ | to_ -expression = pp.Forward() +expression = pp.Forward().set_name("query expression") valid_word = pp.Regex( r'([a-zA-Z0-9_.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))' @@ -37,18 +37,18 @@ number = ppc.fnumber() fuzzy_modifier = TILDE + pp.Opt(number, default=0.5)("fuzzy") -term = pp.Forward().set_name("field") +term = pp.Forward().set_name("term") field_name = valid_word().set_name("fieldname") -incl_range_search = pp.Group(LBRACK - term("lower") + to_ + term("upper") + RBRACK) -excl_range_search = pp.Group(LBRACE - term("lower") + to_ + term("upper") + RBRACE) -range_search = incl_range_search("incl_range") | excl_range_search("excl_range") -boost = CARAT - number("boost") +incl_range_search = pp.Group(LBRACK - term("lower") + to_ + term("upper") + RBRACK).set_name("incl_range_search") +excl_range_search = pp.Group(LBRACE - term("lower") + to_ + term("upper") + RBRACE).set_name("excl_range_search") +range_search = (incl_range_search("incl_range") | excl_range_search("excl_range")).set_name("range_search") +boost = (CARAT - number("boost")).set_name("boost") -string_expr = pp.Group(string + proximity_modifier) | string -word_expr = pp.Group(valid_word + fuzzy_modifier) | valid_word +string_expr = (pp.Group(string + proximity_modifier) | string).set_name("string_expr") +word_expr = (pp.Group(valid_word + fuzzy_modifier) | valid_word).set_name("word_expr") term <<= ( ~keyword - + pp.Opt(field_name("field") + COLON) + + pp.Opt(field_name("field") + COLON).set_name("field") + (word_expr | string_expr | range_search | pp.Group(LPAR + expression + RPAR)) + pp.Opt(boost) ) @@ -66,7 +66,7 @@ pp.OpAssoc.LEFT, ), ], -).set_name("query expression") +) def main(): @@ -367,5 +367,13 @@ def main(): sys.exit(1) + if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + expression.create_diagram( + "lucene_grammar_diagram.html", vertical=2, show_groups=True + ) + main() diff --git a/examples/lucene_grammar_diagram.html b/examples/lucene_grammar_diagram.html new file mode 100644 index 00000000..bd09106b --- /dev/null +++ b/examples/lucene_grammar_diagram.html @@ -0,0 +1,867 @@ + + + + + + + + + + + + + + + +
+

query expression

+
+
+ + + + + +term_expressionterm_expression +
+
+ +
+

term_expression

+
+
+ + + + + +or operationsor operations +
+
+ +
+

or operations

+
+
+ + + + + + + + +{'AND' | '&&'} operations{'AND' | '&&'} operations + + +oror +{'AND' | '&&'} operations{'AND' | '&&'} operations + +{'AND' | '&&'} operations{'AND' | '&&'} operations +
+
+ +
+

{'AND' | '&&'} operations

+
+
+ + + + + + + + +{'NOT' | '!'} operations{'NOT' | '!'} operations + + + +'AND' +'&&' +{'NOT' | '!'} operations{'NOT' | '!'} operations + +{'NOT' | '!'} operations{'NOT' | '!'} operations +
+
+ +
+

{'NOT' | '!'} operations

+
+
+ + + + + + + + + + + +'NOT' +'!' +{'NOT' | '!'} operations{'NOT' | '!'} operations +{'+' | '-'} operations{'+' | '-'} operations +
+
+ +
+

{'+' | '-'} operations

+
+
+ + + + + + + + + + + +'+' +'-' +{'+' | '-'} operations{'+' | '-'} operations +termterm +nested_termnested_term +
+
+ +
+

term

+
+
+ + + + + + + + +'AND' +'OR' +'NOT' +'TO' +[NOT] +fieldfield + + + +wordword +'~' + + +fnumberfnumber +wordword + + +string enclosed in '"' + + +'~' +integerinteger +string enclosed in '"' +incl_range_searchincl_range_search +excl_range_searchexcl_range_search + + + +'(' +[suppress] +query expressionquery expression + +')' +[suppress] + + +boostboost +
+
+ +
+

field

+
+
+ + + + + + + +fieldnamefieldname +':' +
+
+ +
+

fieldname

+
+
+ + + + +([a-zA-Z0-9_.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&)|\*|\?)* +
+
+ +
+

fnumber

+
+
+ + + + +[+-]?\d+\.?\d*(?:[eE][+-]?\d+)? +
+
+ +
+

word

+
+
+ + + + +([a-zA-Z0-9_.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&)|\*|\?)* +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

incl_range_search

+
+
+ + + + + + +'[' +termterm +'TO' +termterm +']' +
+
+ +
+

excl_range_search

+
+
+ + + + + + +'{' +termterm +'TO' +termterm +'}' +
+
+ +
+

boost

+
+
+ + + + + +'^' +fnumberfnumber +
+
+ +
+

nested_term

+
+
+ + + + + + +'(' +[suppress] +term_expressionterm_expression + +')' +[suppress] +
+
+ +
+

or

+
+
+ + + + + + + +'OR' +'||' +
+
+ + + + diff --git a/examples/mongodb_query_expression.html b/examples/mongodb_query_expression.html new file mode 100644 index 00000000..1058ebea --- /dev/null +++ b/examples/mongodb_query_expression.html @@ -0,0 +1,2325 @@ + + + + + + + + + + + + + + + +
+

boolean_comparison_operand_expression

+
+
+ + + + + +{'or' | '∨'} operations{'or' | '∨'} operations +
+
+ +
+

{'or' | '∨'} operations

+
+
+ + + + + + + + +{'and' | '∧'} operations{'and' | '∧'} operations + + +OR_OPOR_OP +{'and' | '∧'} operations{'and' | '∧'} operations + +{'and' | '∧'} operations{'and' | '∧'} operations +
+
+ +
+

{'and' | '∧'} operations

+
+
+ + + + + + + + +{'not' ~{{'in' | 'like'}}} operations{'not' ~{{'in' | 'like'}}} operations + + +AND_OPAND_OP +{'not' ~{{'in' | 'like'}}} operations{'not' ~{{'in' | 'like'}}} operations + +{'not' ~{{'in' | 'like'}}} operations{'not' ~{{'in' | 'like'}}} operations +
+
+ +
+

{'not' ~{{'in' | 'like'}}} operations

+
+
+ + + + + + + + + + +NOT_OPNOT_OP +{'not' ~{{'in' | 'like'}}} operations{'not' ~{{'in' | 'like'}}} operations +arith_comparison_operand_expressionarith_comparison_operand_expression +identident +nested_boolean_comparison_operandnested_boolean_comparison_operand +
+
+ +
+

NOT_OP

+
+
+ + + + + +NOTNOT + + +ININ +LIKELIKE +[NOT] +
+
+ +
+

arith_comparison_operand_expression

+
+
+ + + + + +contain_operator operationscontain_operator operations +
+
+ +
+

contain_operator operations

+
+
+ + + + + + + + +like_operator operationslike_operator operations + + +contain_operatorcontain_operator +like_operator operationslike_operator operations + +like_operator operationslike_operator operations +
+
+ +
+

like_operator operations

+
+
+ + + + + + + + +== | = | != | ≠ operations== | = | != | ≠ operations + + +like_operatorlike_operator +== | = | != | ≠ operations== | = | != | ≠ operations + +== | = | != | ≠ operations== | = | != | ≠ operations +
+
+ +
+

== | = | != | ≠ operations

+
+
+ + + + + + + + +<= | >= | < | > | ≤ | ≥ operations<= | >= | < | > | ≤ | ≥ operations + + +== | = | != | ≠== | = | != | ≠ +<= | >= | < | > | ≤ | ≥ operations<= | >= | < | > | ≤ | ≥ operations + +<= | >= | < | > | ≤ | ≥ operations<= | >= | < | > | ≤ | ≥ operations +
+
+ +
+

<= | >= | < | > | ≤ | ≥ operations

+
+
+ + + + + + + + +Combine:({'search' 'for'}) operationsCombine:({'search' 'for'}) operations + + +<= | >= | < | > | ≤ | ≥<= | >= | < | > | ≤ | ≥ +Combine:({'search' 'for'}) operationsCombine:({'search' 'for'}) operations + +Combine:({'search' 'for'}) operationsCombine:({'search' 'for'}) operations +
+
+ +
+

Combine:({'search' 'for'}) operations

+
+
+ + + + + + + + + + +SEARCH_FORSEARCH_FOR +Combine:({'search' 'for'}) operationsCombine:({'search' 'for'}) operations +identident +string enclosed in '"' +string enclosed in "'" +date_timedate_time +datedate +real number with scientific notationreal number with scientific notation +real numberreal number +signed integersigned integer +operand_listoperand_list +nested_arith_comparison_operandnested_arith_comparison_operand +
+
+ +
+

SEARCH_FOR

+
+
+ + + + + + +SEARCHSEARCH +FORFOR +[combine] +
+
+ +
+

SEARCH

+
+
+ + + + +'search' +
+
+ +
+

FOR

+
+
+ + + + +'for' +
+
+ +
+

ident

+
+
+ + + + + + +identifieridentifier + + + + + +'.' + +identifieridentifier +integerinteger + + +'[' +[suppress] +integerinteger + +']' +[suppress] + +[combine] +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

operand_list

+
+
+ + + + + + +LBRACKLBRACK + + + + +operandoperand + + + + + +',' +[suppress] +operandoperand + +RBRACKRBRACK +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

operand

+
+
+ + + + + +identident +string enclosed in '"' +string enclosed in "'" +date_timedate_time +datedate +real number with scientific notationreal number with scientific notation +real numberreal number +signed integersigned integer +
+
+ +
+

date_time

+
+
+ + + + +\d{4}(/|-)\d{2}(\1)\d{2} \d{2}:\d{2}(:\d{2}(\.\d+)?)? +
+
+ +
+

date

+
+
+ + + + +\d{4}(/|-)\d{2}(\1)\d{2} +
+
+ +
+

real number with scientific notation

+
+
+ + + + +[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?) +
+
+ +
+

real number

+
+
+ + + + +[+-]?(?:\d+\.\d*|\.\d+) +
+
+ +
+

signed integer

+
+
+ + + + +[+-]?\d+ +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

nested_arith_comparison_operand

+
+
+ + + + + + +'(' +[suppress] +arith_comparison_operand_expressionarith_comparison_operand_expression + +')' +[suppress] +
+
+ +
+

<= | >= | < | > | ≤ | ≥

+
+
+ + + + +<=|>=|<|>|≤|≥ +
+
+ +
+

== | = | != | ≠

+
+
+ + + + +==|=|!=|≠ +
+
+ +
+

like_operator

+
+
+ + + + + +LIKELIKE +NOT_LIKENOT_LIKE +'=~' +
+
+ +
+

NOT_LIKE

+
+
+ + + + + + +NOTNOT +LIKELIKE +[combine] +
+
+ +
+

LIKE

+
+
+ + + + +'like' +
+
+ +
+

contain_operator

+
+
+ + + + + +ININ +NOT_INNOT_IN +CONTAINS_ALLCONTAINS_ALL +CONTAINS_NONECONTAINS_NONE +CONTAINS_ANYCONTAINS_ANY +⊇ | ∈ | ∉⊇ | ∈ | ∉ +
+
+ +
+

NOT_IN

+
+
+ + + + + + +NOTNOT +ININ +[combine] +
+
+ +
+

NOT

+
+
+ + + + +'not' +
+
+ +
+

IN

+
+
+ + + + +'in' +
+
+ +
+

CONTAINS_ALL

+
+
+ + + + + + +CONTAINSCONTAINS +ALLALL +[combine] +
+
+ +
+

ALL

+
+
+ + + + +'all' +
+
+ +
+

CONTAINS_NONE

+
+
+ + + + + + +CONTAINSCONTAINS +NONENONE +[combine] +
+
+ +
+

NONE

+
+
+ + + + +'none' +
+
+ +
+

CONTAINS_ANY

+
+
+ + + + + + +CONTAINSCONTAINS +ANYANY +[combine] +
+
+ +
+

CONTAINS

+
+
+ + + + +'contains' +
+
+ +
+

ANY

+
+
+ + + + +'any' +
+
+ +
+

⊇ | ∈ | ∉

+
+
+ + + + +[⊇∈∉] +
+
+ +
+

nested_boolean_comparison_operand

+
+
+ + + + + + +'(' +[suppress] +boolean_comparison_operand_expressionboolean_comparison_operand_expression + +')' +[suppress] +
+
+ +
+

AND_OP

+
+
+ + + + + +ANDAND +'∧' +
+
+ +
+

AND

+
+
+ + + + +'and' +
+
+ +
+

OR_OP

+
+
+ + + + + +OROR +'∨' +
+
+ +
+

OR

+
+
+ + + + +'or' +
+
+ + + + diff --git a/examples/mongodb_query_expression.py b/examples/mongodb_query_expression.py index f42386dc..081ff854 100644 --- a/examples/mongodb_query_expression.py +++ b/examples/mongodb_query_expression.py @@ -357,7 +357,7 @@ def unary_op(tokens): (SEARCH_FOR, 1, pp.OpAssoc.RIGHT, unary_op), (pp.one_of("<= >= < > ≤ ≥"), 2, pp.OpAssoc.LEFT, binary_comparison_op), (pp.one_of("= == != ≠"), 2, pp.OpAssoc.LEFT, binary_eq_neq), - (LIKE | NOT_LIKE | "=~", 2, pp.OpAssoc.LEFT, regex_comparison_op), + ((LIKE | NOT_LIKE | "=~").set_name("like_operator"), 2, pp.OpAssoc.LEFT, regex_comparison_op), ( ( IN @@ -366,7 +366,7 @@ def unary_op(tokens): | CONTAINS_NONE | CONTAINS_ANY | pp.one_of("⊇ ∈ ∉") - ), + ).set_name("contain_operator"), 2, pp.OpAssoc.LEFT, binary_array_comparison_op, @@ -657,5 +657,14 @@ def main(): if __name__ == "__main__": - query_condition_expr.create_diagram("mongodb_query_expression.html") + import contextlib + + with contextlib.suppress(Exception): + query_condition_expr.create_diagram( + "mongodb_query_expression.html", + vertical=3, + show_results_names=True, + show_groups=True + ) + main() diff --git a/examples/number_words.py b/examples/number_words.py index 181740fd..aa3ea09f 100644 --- a/examples/number_words.py +++ b/examples/number_words.py @@ -112,6 +112,12 @@ def multiply(t): if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + # create railroad diagram + numeric_expression.create_diagram("number_words_diagram.html", vertical=5) + numeric_expression.run_tests( """ one @@ -132,6 +138,3 @@ def multiply(t): """, postParse=lambda _, s: "{:,}".format(s[0]), ) - - # create railroad diagram - numeric_expression.create_diagram("numeric_words_diagram.html", vertical=5) diff --git a/examples/number_words_diagram.html b/examples/number_words_diagram.html new file mode 100644 index 00000000..626f7cb8 --- /dev/null +++ b/examples/number_words_diagram.html @@ -0,0 +1,604 @@ + + + + + + + + + + + + + + + +
+

numeric_words

+
+
+ + + + + + + + + +1000s1000s +'and/-''and/-' + + + +100s100s +'and/-''and/-' +1-991-99 + + + + +1000s1000s +'and/-''and/-' +100s100s +1000s1000s +
+
+ +
+

1000s

+
+
+ + + + + +1-9991-999 +thousandthousand +
+
+ +
+

1-999

+
+
+ + + + + + + + + +100s100s +'and/-''and/-' +1-991-99 +100s100s +
+
+ +
+

100s

+
+
+ + + + + + +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' + +twenty-ninetytwenty-ninety +'-''-' +one-nineone-nine +hundredhundred +
+
+ +
+

'-'

+
+
+ + + + + + + +'-' +[suppress] +
+
+ +
+

hundred

+
+
+ + + + + +'hundred' +
+
+ +
+

'and/-'

+
+
+ + + + + + + + +'and' +'-' +[suppress] +
+
+ +
+

1-99

+
+
+ + + + + +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +'ten' +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' + +twenty-ninetytwenty-ninety + + + +'-''-' +one-nineone-nine +
+
+ +
+

twenty-ninety

+
+
+ + + + + +'twenty' +'thirty' +'forty' +'fifty' +'sixty' +'seventy' +'eighty' +'ninety' +
+
+ +
+

one-nine

+
+
+ + + + + +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +
+
+ +
+

thousand

+
+
+ + + + + +'thousand' +
+
+ + + + diff --git a/examples/parse_python_value.html b/examples/parse_python_value.html new file mode 100644 index 00000000..232e88ba --- /dev/null +++ b/examples/parse_python_value.html @@ -0,0 +1,1050 @@ + + + + + + + + + + + + + + + +
+

list_item

+
+
+ + + + + +realreal +integerinteger +quoted string using single or double quotesquoted string using single or double quotes +True | FalseTrue | False +none_literalnone_literal + +list_exprlist_expr +tuple_exprtuple_expr +set_exprset_expr +dict_exprdict_expr +
+
+ +
+

real

+
+
+ + + + +[+-]?\d+\.\d*([Ee][+-]?\d+)? +
+
+ +
+

integer

+
+
+ + + + +[+-]?\d+ +
+
+ +
+

quoted string using single or double quotes

+
+
+ + + + + + +double quoted stringdouble quoted string +single quoted stringsingle quoted string +[combine] +
+
+ +
+

double quoted string

+
+
+ + + + + +"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))* +'"' +
+
+ +
+

single quoted string

+
+
+ + + + + +'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))* +"'" +
+
+ +
+

True | False

+
+
+ + + + +\b(?:True|False)\b +
+
+ +
+

none_literal

+
+
+ + + + +'None' +
+
+ +
+

list_expr

+
+
+ + + + + + +lbracklbrack + + + + +list_itemlist_item + + + + + +',' +[suppress] +list_itemlist_item + + + + +',' +[suppress] +rbrackrbrack +
+
+ +
+

lbrack

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

rbrack

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

tuple_expr

+
+
+ + + + + + +lparenlparen + + + + + +list_itemlist_item + + + +',' +[suppress] +list_itemlist_item + + + + +',' +[suppress] + +list_itemlist_item +commacomma +rparenrparen +
+
+ +
+

lparen

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

comma

+
+
+ + + + + +',' +[suppress] +
+
+ +
+

rparen

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

set_expr

+
+
+ + + + + + +lbracelbrace + + +list_itemlist_item + + + + + +',' +[suppress] +list_itemlist_item + + + + +',' +[suppress] +rbracerbrace +
+
+ +
+

dict_expr

+
+
+ + + + + + +lbracelbrace + + + + +dict_entrydict_entry + + + + + +',' +[suppress] +dict_entrydict_entry + + + + +',' +[suppress] +rbracerbrace +
+
+ +
+

lbrace

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

dict_entry

+
+
+ + + + + +list_itemlist_item +coloncolon +list_itemlist_item +
+
+ +
+

colon

+
+
+ + + + + +':' +[suppress] +
+
+ +
+

rbrace

+
+
+ + + + + +'}' +[suppress] +
+
+ + + + diff --git a/examples/parse_python_value.py b/examples/parse_python_value.py index cb4288fe..5431accd 100644 --- a/examples/parse_python_value.py +++ b/examples/parse_python_value.py @@ -3,36 +3,36 @@ # Copyright, 2006, by Paul McGuire # import pyparsing as pp +from pyparsing import ParseResults, autoname_elements - -cvtBool = lambda t: t[0] == "True" -cvtInt = lambda toks: int(toks[0]) -cvtReal = lambda toks: float(toks[0]) -cvtTuple = lambda toks: tuple(toks.as_list()) -cvtSet = lambda toks: set(toks.as_list()) -cvtDict = lambda toks: dict(toks.as_list()) -cvtList = lambda toks: [toks.as_list()] +convert_bool = lambda t: t[0] == "True" +convert_int = lambda toks: int(toks[0]) +convert_real = lambda toks: float(toks[0]) +convert_tuple = lambda toks: tuple(toks.as_list()) +convert_set = lambda toks: set(toks.as_list()) +convert_dict = lambda toks: dict(toks.as_list()) +convert_list = lambda toks: [toks.as_list()] # define punctuation as suppressed literals lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon, comma = pp.Suppress.using_each("()[]{}:,") -integer = pp.Regex(r"[+-]?\d+").set_name("integer").add_parse_action(cvtInt) -real = pp.Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").set_name("real").add_parse_action(cvtReal) +integer = pp.Regex(r"[+-]?\d+").set_name("integer").add_parse_action(convert_int) +real = pp.Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").set_name("real").add_parse_action(convert_real) + +# containers must be defined using a Forward, since they get parsed recursively tuple_str = pp.Forward().set_name("tuple_expr") list_str = pp.Forward().set_name("list_expr") set_str = pp.Forward().set_name("set_expr") dict_str = pp.Forward().set_name("dict_expr") -unistr = pp.unicodeString().add_parse_action(lambda t: t[0][2:-1]) quoted_str = pp.quotedString().add_parse_action(lambda t: t[0][1:-1]) -bool_literal = pp.oneOf("True False", as_keyword=True).add_parse_action(cvtBool) +bool_literal = pp.oneOf("True False", as_keyword=True).add_parse_action(convert_bool) none_literal = pp.Keyword("None").add_parse_action(pp.replace_with(None)) list_item = ( real | integer | quoted_str - | unistr | bool_literal | none_literal | pp.Group(list_str) @@ -41,40 +41,111 @@ | dict_str ).set_name("list_item") +# tuple must have a comma-separated list of 2 or more items, with optional +# trailing comma, or a single item with required trailing comma tuple_str <<= ( - lparen + pp.Opt(pp.DelimitedList(list_item, allow_trailing_delim=True)) + rparen + lparen + pp.Opt( + pp.DelimitedList(list_item, min=2, allow_trailing_delim=True) + | list_item + comma + ) + + rparen ) -tuple_str.add_parse_action(cvtTuple) +tuple_str.add_parse_action(convert_tuple) set_str <<= ( lbrace + pp.DelimitedList(list_item, allow_trailing_delim=True) + rbrace ) -set_str.add_parse_action(cvtSet) +set_str.add_parse_action(convert_set) list_str <<= ( lbrack + pp.Opt(pp.DelimitedList(list_item, allow_trailing_delim=True)) + rbrack ) -list_str.add_parse_action(cvtList, lambda t: t[0]) +list_str.add_parse_action(convert_list, lambda t: t[0]) dict_entry = pp.Group(list_item + colon + list_item).set_name("dict_entry") dict_str <<= ( lbrace + pp.Opt(pp.DelimitedList(dict_entry, allow_trailing_delim=True)) + rbrace ) -dict_str.add_parse_action(cvtDict) +dict_str.add_parse_action(convert_dict) -if __name__ == "__main__": +python_value = list_item + +autoname_elements() + +def main(): + from ast import literal_eval + import contextlib + + with contextlib.suppress(Exception): + list_item.create_diagram("parse_python_value.html") + + non_list_tests = """\ + # dict of str to int or dict + { 'A':1, 'B':2, 'C': {'a': 1.2, 'b': 3.4} } + + # dict of str or tuple keys + {'A':1, 'B':2, (1, 2): {'a', 1.2, 'b', 3.4}} + + # empty dict + {} + + # set of mixed types + {1, 2, 11, "blah"} + + # empty set + {()} + + # a tuple of mixed types + ('A', 100, -2.71828, {'b':99}) + + # a tuple with just one value + ('A',) + + # empty tuple + () - tests = """['a', 100, ('A', [101,102]), 3.14, [ +2.718, 'xyzzy', -1.414] ] - [{0: [2], 1: []}, {0: [], 1: [], 2: []}, {0: [1, 2]}] - { 'A':1, 'B':2, 'C': {'a': 1.2, 'b': 3.4} } - { 1, 2, 11, "blah" } - { 'A':1, 'B':2, 'C': {'a', 1.2, 'b', 3.4} } - 3.14159 - 42 - 6.02E23 - 6.02e+023 - 1.0e-7 - 'a quoted string'""" - - list_item.run_tests(tests) - list_item.create_diagram("parse_python_value.html") + # float + 3.14159 + + # int + 42 + + # float in scientific notation + 6.02E23 + 6.02e+023 + 1.0e-7 + + # quoted string + 'a quoted string' + """ + + list_tests = """\ + # list of mixed types + ['a', 100, ('A', [101,102]), 3.14, [ +2.718, 'xyzzy', -1.414] ] + + # list of dicts + [{0: [2], 1: []}, {0: [], 1: [], 2: []}, {0: [1, 2]}] + + # empty list + [] + """ + + def validate_parsed_value(test_str: str, result: ParseResults) -> bool: + python_value = literal_eval(test_str) + return python_value == result[0] + + def validate_parsed_list(test_str: str, result: ParseResults) -> bool: + python_value = literal_eval(test_str) + return python_value == result.as_list()[0] + + success1, report_1 = list_item.run_tests(non_list_tests) + success1 = success1 and all(validate_parsed_value(*rpt) for rpt in report_1) + + success2, report_2 = list_item.run_tests(list_tests) + success2 = success2 and all(validate_parsed_list(*rpt) for rpt in report_2) + + assert success1 and success2 + + +if __name__ == "__main__": + main() diff --git a/examples/railroad_diagram_demo.py b/examples/railroad_diagram_demo.py index b8442fd2..a34d7f6d 100644 --- a/examples/railroad_diagram_demo.py +++ b/examples/railroad_diagram_demo.py @@ -32,7 +32,7 @@ ).setName("grammar") -grammar.create_diagram("railroad_diagram_demo.html", vertical=6, show_results_names=True) +grammar.create_diagram("railroad_diagram_demo.html", vertical=4, show_results_names=True) test = """\ 1 2 3 diff --git a/examples/range_check.py b/examples/range_check.py index 046fe792..fcc96f60 100644 --- a/examples/range_check.py +++ b/examples/range_check.py @@ -57,21 +57,21 @@ def ranged_value( year = ranged_value(integer, 2000, None, "year") SLASH = pp.Suppress("/") -dateExpr = year("year") + SLASH + month("month") + pp.Opt(SLASH + day("day")) -dateExpr.set_name("date") +date_expr = year("year") + SLASH + month("month") + pp.Opt(SLASH + day("day")) +date_expr.set_name("date") # convert date fields to datetime (also validates dates as truly valid dates) -dateExpr.set_parse_action(lambda t: date(t.year, t.month, t.day or 1)) +date_expr.set_parse_action(lambda t: date(t.year, t.month, t.day or 1)) # add range checking on dates min_date = date(2002, 1, 1) max_date = date.today() -date_expr = ranged_value(dateExpr, min_date, max_date, "date") +range_checked_date_expr = ranged_value(date_expr, min_date, max_date, "date") -date_expr.create_diagram("range_check.html") +range_checked_date_expr.create_diagram("range_check.html") # tests of valid dates -success_valid_tests, _ = date_expr.run_tests( +success_valid_tests, _ = range_checked_date_expr.run_tests( """ # valid date 2011/5/8 @@ -85,7 +85,7 @@ def ranged_value( ) # tests of invalid dates -success_invalid_tests, _ = date_expr.run_tests( +success_invalid_tests, _ = range_checked_date_expr.run_tests( """ # all values are in range, but date is too early 2001/1/1 diff --git a/examples/roman_numerals.py b/examples/roman_numerals.py index 25772130..10a13ec4 100644 --- a/examples/roman_numerals.py +++ b/examples/roman_numerals.py @@ -56,9 +56,6 @@ def roman_numeral_literal(numeral_string, value): ).set_parse_action(sum) pp.autoname_elements() -# uncomment to generate railroad diagram -# roman_numeral.create_diagram("romanNumerals.html") - # unit tests def make_roman_numeral(n): @@ -86,6 +83,12 @@ def add_digits(n, limit, c, s): def main(): + import contextlib + + with contextlib.suppress(Exception): + # generate railroad diagram + roman_numeral.create_diagram("roman_numerals_diagram.html") + # make a string of all roman numerals from I to MMMMM tests = " ".join(make_roman_numeral(i) for i in range(1, 5000 + 1)) diff --git a/examples/roman_numerals_diagram.html b/examples/roman_numerals_diagram.html new file mode 100644 index 00000000..55a79453 --- /dev/null +++ b/examples/roman_numerals_diagram.html @@ -0,0 +1,689 @@ + + + + + + + + + + + + + + + +
+

roman_numeral

+
+
+ + + + + + + + +onethousandonethousand + + + + +ninehundredninehundred +fourhundredfourhundred + + + +fivehundredfivehundred + + + +onehundredonehundred + + + +onehundredonehundred + + +onehundredonehundred + + + +ninetyninety +fortyforty + + + +fiftyfifty + + + +tenten + + + +tenten + + +tenten + + + +ninenine +fourfour + + + +fivefive + + + +oneone + + + +oneone + + +oneone +
+
+ +
+

onethousand

+
+
+ + + + +'M' +
+
+ +
+

ninehundred

+
+
+ + + + +'CM' +
+
+ +
+

fourhundred

+
+
+ + + + +'CD' +
+
+ +
+

fivehundred

+
+
+ + + + +'D' +
+
+ +
+

onehundred

+
+
+ + + + +'C' +
+
+ +
+

ninety

+
+
+ + + + +'XC' +
+
+ +
+

forty

+
+
+ + + + +'XL' +
+
+ +
+

fifty

+
+
+ + + + +'L' +
+
+ +
+

ten

+
+
+ + + + +'X' +
+
+ +
+

nine

+
+
+ + + + +'IX' +
+
+ +
+

four

+
+
+ + + + +'IV' +
+
+ +
+

five

+
+
+ + + + +'V' +
+
+ +
+

one

+
+
+ + + + +'I' +
+
+ + + + diff --git a/examples/rosettacode.py b/examples/rosettacode.py index ab11a1a8..1437b7f7 100644 --- a/examples/rosettacode.py +++ b/examples/rosettacode.py @@ -276,10 +276,13 @@ def main(): + import contextlib import sys + sys.setrecursionlimit(2000) - program.create_diagram("rosettacode_diagram.html") + with contextlib.suppress(Exception): + program.create_diagram("rosettacode_diagram.html") success, report = program.run_tests(tests) assert success diff --git a/examples/rosettacode_diagram.html b/examples/rosettacode_diagram.html new file mode 100644 index 00000000..0335bf93 --- /dev/null +++ b/examples/rosettacode_diagram.html @@ -0,0 +1,2076 @@ + + + + + + + + + + + + + + + +
+

program

+
+
+ + + + + + + +stmtstmt + +
+
+ +
+

stmt

+
+
+ + + + + +statementstatement +
+
+ +
+

statement

+
+
+ + + + + +empty_statementempty_statement +assignment_stmtassignment_stmt +while_stmtwhile_stmt +if_stmtif_stmt +print_stmtprint_stmt +putc_stmtputc_stmt +stmt_liststmt_list +
+
+ +
+

empty_statement

+
+
+ + + + +SEMISEMI +
+
+ +
+

assignment_stmt

+
+
+ + + + + +identifieridentifier +EQEQ +arith_operand_expressionarith_operand_expression +SEMISEMI +
+
+ +
+

identifier

+
+
+ + + + + + + + +'while' +'if' +'print' +'putc' +'else' +[NOT] +ident_nameident_name +[combine] +
+
+ +
+

ident_name

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

EQ

+
+
+ + + + +'=' +
+
+ +
+

arith_operand_expression

+
+
+ + + + + +|| operations|| operations +
+
+ +
+

|| operations

+
+
+ + + + + + + + +&& operations&& operations + + +|||| +&& operations&& operations + +&& operations&& operations +
+
+ +
+

&& operations

+
+
+ + + + + + + + +== | != operations== | != operations + + +&&&& +== | != operations== | != operations + +== | != operations== | != operations +
+
+ +
+

== | != operations

+
+
+ + + + + + + + +<= | < | >= | > operations<= | < | >= | > operations + + +== | !=== | != +<= | < | >= | > operations<= | < | >= | > operations + +<= | < | >= | > operations<= | < | >= | > operations +
+
+ +
+

<= | < | >= | > operations

+
+
+ + + + + + + + ++ | - operations+ | - operations + + +<= | < | >= | ><= | < | >= | > ++ | - operations+ | - operations + ++ | - operations+ | - operations +
+
+ +
+

+ | - operations

+
+
+ + + + + + + + +* | / | % operations* | / | % operations + + ++ | -+ | - +* | / | % operations* | / | % operations + +* | / | % operations* | / | % operations +
+
+ +
+

* | / | % operations

+
+
+ + + + + + + + ++ | - | ! operations+ | - | ! operations + + +* | / | %* | / | % ++ | - | ! operations+ | - | ! operations + ++ | - | ! operations+ | - | ! operations +
+
+ +
+

+ | - | ! operations

+
+
+ + + + + + + + + + ++ | - | !+ | - | ! ++ | - | ! operations+ | - | ! operations +identifieridentifier +integerinteger +charchar +nested_arith_operandnested_arith_operand +
+
+ +
+

+ | - | !

+
+
+ + + + +[+\-!] +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

char

+
+
+ + + + +'\\?.' +
+
+ +
+

nested_arith_operand

+
+
+ + + + + + +'(' +[suppress] +arith_operand_expressionarith_operand_expression + +')' +[suppress] +
+
+ +
+

* | / | %

+
+
+ + + + +[*/%] +
+
+ +
+

+ | -

+
+
+ + + + +[+\-] +
+
+ +
+

<= | < | >= | >

+
+
+ + + + +<=|<|>=|> +
+
+ +
+

== | !=

+
+
+ + + + +==|!= +
+
+ +
+

&&

+
+
+ + + + +\&\& +
+
+ +
+

||

+
+
+ + + + +\|\| +
+
+ +
+

while_stmt

+
+
+ + + + + +WHILEWHILE +paren_exprparen_expr +stmtstmt +
+
+ +
+

WHILE

+
+
+ + + + +'while' +
+
+ +
+

paren_expr

+
+
+ + + + + +LPARLPAR +arith_operand_expressionarith_operand_expression +RPARRPAR +
+
+ +
+

if_stmt

+
+
+ + + + + +IFIF +paren_exprparen_expr +stmtstmt + + + +ELSEELSE +stmtstmt +
+
+ +
+

IF

+
+
+ + + + +'if' +
+
+ +
+

ELSE

+
+
+ + + + +'else' +
+
+ +
+

print_stmt

+
+
+ + + + + +PRINTPRINT + + +LPARLPAR +prt_listprt_list +RPARRPAR +SEMISEMI +
+
+ +
+

PRINT

+
+
+ + + + +'print' +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

prt_list

+
+
+ + + + + + + +quoted stringquoted string +arith_operand_expressionarith_operand_expression + + + + + +',' +[suppress] + +quoted stringquoted string +arith_operand_expressionarith_operand_expression + +
+
+ +
+

quoted string

+
+
+ + + + +string enclosed in '"' +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

putc_stmt

+
+
+ + + + + +PUTCPUTC +paren_exprparen_expr +SEMISEMI +
+
+ +
+

PUTC

+
+
+ + + + +'putc' +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

stmt_list

+
+
+ + + + + +LBRACELBRACE + + + +stmtstmt + +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ + + + diff --git a/examples/select_parser.py b/examples/select_parser.py index 79257eba..1f3ae4a5 100644 --- a/examples/select_parser.py +++ b/examples/select_parser.py @@ -8,7 +8,7 @@ from pyparsing import ( pyparsing_common, ParserElement, OpAssoc, CaselessKeyword, Combine, Forward, Group, Literal, MatchFirst, Optional, QuotedString, Regex, Suppress, Word, - alphanums, alphas, DelimitedList, infix_notation, nums, one_of, rest_of_line + alphanums, alphas, DelimitedList, infix_notation, nums, one_of, rest_of_line, autoname_elements ) # fmt: on @@ -29,12 +29,12 @@ } vars().update(keywords) -any_keyword = MatchFirst(keywords.values()) +any_keyword = MatchFirst(keywords.values()).set_name("any_keyword") quoted_identifier = QuotedString('"', esc_quote='""') identifier = (~any_keyword + Word(alphas, alphanums + "_")).set_parse_action( pyparsing_common.downcase_tokens -) | quoted_identifier +).set_name("identifier") | quoted_identifier collation_name = identifier.copy() column_name = identifier.copy() column_alias = identifier.copy() @@ -67,6 +67,9 @@ bind_parameter = Word("?", nums) | Combine(one_of(": @ $") + parameter_name) type_name = one_of("TEXT REAL INTEGER BLOB NULL") +def concat_qualified_column(t): + t[0][:] = ["".join(t[0])] + expr_term = ( CAST + LPAR + expr + AS + type_name + RPAR | EXISTS + LPAR + select_stmt + RPAR @@ -77,10 +80,10 @@ | literal_value | bind_parameter | Group( - identifier("col_db") + DOT + identifier("col_tab") + DOT + identifier("col") - ) - | Group(identifier("col_tab") + DOT + identifier("col")) - | Group(identifier("col")) + identifier("col_db") + DOT + identifier("col_tab") + DOT + identifier("col"), + ).add_parse_action(concat_qualified_column).set_name("db_table_column name") + | Group(identifier("col_tab") + DOT + identifier("col")).add_parse_action(concat_qualified_column).set_name("table_column name") + | Group(identifier("col")).set_name("column name") ) NOT_NULL = Group(NOT + NULL) @@ -93,17 +96,17 @@ UNARY, BINARY, TERNARY = 1, 2, 3 expr <<= infix_notation( - expr_term, + expr_term.set_name("expr_term"), [ (one_of("- + ~") | NOT, UNARY, OpAssoc.RIGHT), - (ISNULL | NOTNULL | NOT_NULL, UNARY, OpAssoc.LEFT), + ((ISNULL | NOTNULL | NOT_NULL).set_name("null_comparison_operator"), UNARY, OpAssoc.LEFT), ("||", BINARY, OpAssoc.LEFT), (one_of("* / %"), BINARY, OpAssoc.LEFT), (one_of("+ -"), BINARY, OpAssoc.LEFT), (one_of("<< >> & |"), BINARY, OpAssoc.LEFT), (one_of("< <= > >="), BINARY, OpAssoc.LEFT), ( - one_of("= == != <>") + (one_of("= == != <>") | IS | IN | LIKE @@ -114,7 +117,7 @@ | NOT_LIKE | NOT_GLOB | NOT_MATCH - | NOT_REGEXP, + | NOT_REGEXP).set_name("comparison_operator"), BINARY, OpAssoc.LEFT, ), @@ -232,7 +235,7 @@ def main(): SELECT * FROM abcd WHERE blobby == x'C0FFEE' -- hex SELECT * FROM abcd WHERE ff NOT IN (1,2,4,5) SELECT * FROM abcd WHERE ff not between 3 and 9 - SELECT * FROM abcd WHERE ff not like 'bob%' + SELECT * FROM abcd WHERE db_name.tab_name.ff not like 'bob%' """ success, _ = select_stmt.run_tests(tests) diff --git a/examples/simpleArith.py b/examples/simpleArith.py index 99b7ce10..6ee1d31a 100644 --- a/examples/simpleArith.py +++ b/examples/simpleArith.py @@ -27,7 +27,7 @@ # To use the infixNotation helper: # 1. Define the "atom" operand term of the grammar. # For this simple grammar, the smallest operand is either -# and integer or a variable. This will be the first argument +# an integer or a variable. This will be the first argument # to the infixNotation method. # 2. Define a list of tuples for each level of operator # precedence. Each tuple is of the form @@ -51,8 +51,8 @@ expr = infixNotation( operand, [ - ("!", 1, opAssoc.LEFT), - ("^", 2, opAssoc.RIGHT), + (factop, 1, opAssoc.LEFT), + (expop, 2, opAssoc.RIGHT), (signop, 1, opAssoc.RIGHT), (multop, 2, opAssoc.LEFT), (plusop, 2, opAssoc.LEFT), diff --git a/examples/tag_metadata.py b/examples/tag_metadata.py index cd851e28..2da39b4d 100644 --- a/examples/tag_metadata.py +++ b/examples/tag_metadata.py @@ -21,13 +21,17 @@ greeting = "Hello," + (latin | greek | japanese) + end_punc -greeting.run_tests( - """\ - Hello, World. - Hello, World! - Hello, κόσμος? - Hello, 世界! - """ -) +if __name__ == '__main__': + import contextlib + + with contextlib.suppress(Exception): + greeting.create_diagram("tag_metadata_diagram.html", vertical=3) -greeting.create_diagram("tag_metadata_diagram.html") + greeting.run_tests( + """\ + Hello, World. + Hello, World! + Hello, κόσμος? + Hello, 世界! + """ + ) diff --git a/examples/tag_metadata_diagram.html b/examples/tag_metadata_diagram.html new file mode 100644 index 00000000..d253f8e0 --- /dev/null +++ b/examples/tag_metadata_diagram.html @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + +
+

+
+
+ + + + + +'Hello,' + + +W:(A-Za-zªµºÀ-ÖØ...) +Tag:alphabet='Latin' + +W:(Ͱ-ʹͶͷͺ-ͽͿΆΈ-Ί...) +Tag:alphabet='Greek' + +W:(々〆〱-〵〻〼ぁ-ゖゝ-ゟ...) +Tag:alphabet='Japanese' + + +'.' +Tag:mood='normal' + +'!' +Tag:mood='excited' + +'?' +Tag:mood='curious' +
+
+ + + + diff --git a/examples/wordsToNum.py b/examples/wordsToNum.py index 99eddab8..a830fd24 100644 --- a/examples/wordsToNum.py +++ b/examples/wordsToNum.py @@ -8,85 +8,92 @@ from functools import reduce -def makeLit(s, val): +def make_literal_converter(s, val): ret = pp.CaselessLiteral(s) - return ret.setParseAction(pp.replaceWith(val)) - - -unitDefinitions = [ - ("zero", 0), - ("oh", 0), - ("zip", 0), - ("zilch", 0), - ("nada", 0), - ("bupkis", 0), - ("one", 1), - ("two", 2), - ("three", 3), - ("four", 4), - ("five", 5), - ("six", 6), - ("seven", 7), - ("eight", 8), - ("nine", 9), - ("ten", 10), - ("eleven", 11), - ("twelve", 12), - ("thirteen", 13), - ("fourteen", 14), - ("fifteen", 15), - ("sixteen", 16), - ("seventeen", 17), - ("eighteen", 18), - ("nineteen", 19), -] + return ret.set_parse_action(pp.replaceWith(val)) + + +unit_definitions = { + "zero": 0, + "oh": 0, + "zip": 0, + "zilch": 0, + "nada": 0, + "bupkis": 0, + "one": 1, + "two": 2, + "three": 3, + "four": 4, + "five": 5, + "six": 6, + "seven": 7, + "eight": 8, + "nine": 9, + "ten": 10, + "eleven": 11, + "twelve": 12, + "thirteen": 13, + "fourteen": 14, + "fifteen": 15, + "sixteen": 16, + "seventeen": 17, + "eighteen": 18, + "nineteen": 19, +} units = pp.MatchFirst( - makeLit(s, v) for s, v in sorted(unitDefinitions, key=lambda d: -len(d[0])) + make_literal_converter(s, v) + for s, v in sorted(unit_definitions.items(), key=lambda d: -len(d[0])) ) -tensDefinitions = [ - ("ten", 10), - ("twenty", 20), - ("thirty", 30), - ("forty", 40), - ("fourty", 40), # for the spelling-challenged... - ("fifty", 50), - ("sixty", 60), - ("seventy", 70), - ("eighty", 80), - ("ninety", 90), -] -tens = pp.MatchFirst(makeLit(s, v) for s, v in tensDefinitions) - -hundreds = makeLit("hundred", 100) - -majorDefinitions = [ - ("thousand", int(1e3)), - ("million", int(1e6)), - ("billion", int(1e9)), - ("trillion", int(1e12)), - ("quadrillion", int(1e15)), - ("quintillion", int(1e18)), -] -mag = pp.MatchFirst(makeLit(s, v) for s, v in majorDefinitions) +tens_definitions = { + "ten": 10, + "twenty": 20, + "thirty": 30, + "forty": 40, + "fourty": 40, # for the spelling-challenged... + "fifty": 50, + "sixty": 60, + "seventy": 70, + "eighty": 80, + "ninety": 90, +} +tens = pp.MatchFirst( + make_literal_converter(s, v) + for s, v in tens_definitions.items() +) + +hundreds = make_literal_converter("hundred", 100) + +major_definitions = { + "thousand": int(1e3), + "million": int(1e6), + "billion": int(1e9), + "trillion": int(1e12), + "quadrillion": int(1e15), + "quintillion": int(1e18), +} +mag = pp.MatchFirst( + make_literal_converter(s, v) + for s, v in major_definitions.items() +) wordprod = lambda t: reduce(mul, t) numPart = ( ( ( - (units + pp.Optional(hundreds)).setParseAction(wordprod) + pp.Optional(tens) - ).setParseAction(sum) + (units + pp.Optional(hundreds)).set_parse_action(wordprod) + pp.Optional(tens) + ).set_parse_action(sum) ^ tens ) + pp.Optional(units) -).setParseAction(sum) -numWords = ( - (numPart + pp.Optional(mag)).setParseAction(wordprod)[1, ...] -).setParseAction(sum) -numWords.setName("num word parser") +).set_parse_action(sum) +num_words = ( + (numPart + pp.Optional(mag)).set_parse_action(wordprod)[1, ...] +).set_parse_action(sum) +num_words.setName("num word parser") -numWords.ignore(pp.Literal("-")) -numWords.ignore(pp.CaselessLiteral("and")) +num_words.ignore(pp.Literal("-")) +num_words.ignore(pp.CaselessLiteral("and")) tests = """ one hundred twenty hundred, None @@ -108,7 +115,7 @@ def makeLit(s, val): # use '| ...' to indicate "if omitted, skip to next" logic test_expr = ( - (numWords("result") | ...) + (num_words("result") | ...) + "," + (pp.pyparsing_common.integer("expected") | "None") ) diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 543ceb62..726c76cb 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -120,8 +120,8 @@ def __repr__(self): return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})" -__version_info__ = version_info(3, 2, 0, "final", 1) -__version_time__ = "13 Oct 2024 09:46 UTC" +__version_info__ = version_info(3, 2, 1, "final", 1) +__version_time__ = "31 Dec 2024 20:41 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " diff --git a/pyparsing/actions.py b/pyparsing/actions.py index 1d2dce99..f491aab9 100644 --- a/pyparsing/actions.py +++ b/pyparsing/actions.py @@ -1,21 +1,34 @@ # actions.py +from __future__ import annotations + +from typing import Union, Callable, Any from .exceptions import ParseException from .util import col, replaced_by_pep8 +from .results import ParseResults + + +ParseAction = Union[ + Callable[[], Any], + Callable[[ParseResults], Any], + Callable[[int, ParseResults], Any], + Callable[[str, int, ParseResults], Any], +] class OnlyOnce: """ Wrapper for parse actions, to ensure they are only called once. + Note: parse action signature must include all 3 arguments. """ - def __init__(self, method_call): + def __init__(self, method_call: Callable[[str, int, ParseResults], Any]): from .core import _trim_arity self.callable = _trim_arity(method_call) self.called = False - def __call__(self, s, l, t): + def __call__(self, s: str, l: int, t: ParseResults) -> ParseResults: if not self.called: results = self.callable(s, l, t) self.called = True @@ -30,20 +43,20 @@ def reset(self): self.called = False -def match_only_at_col(n): +def match_only_at_col(n: int) -> ParseAction: """ Helper method for defining parse actions that require matching at a specific column in the input text. """ - def verify_col(strg, locn, toks): + def verify_col(strg: str, locn: int, toks: ParseResults) -> None: if col(locn, strg) != n: raise ParseException(strg, locn, f"matched token not at column {n}") return verify_col -def replace_with(repl_str): +def replace_with(repl_str: str) -> ParseAction: """ Helper method for common parse actions that simply return a literal value. Especially useful when used with @@ -60,7 +73,7 @@ def replace_with(repl_str): return lambda s, l, t: [repl_str] -def remove_quotes(s, l, t): +def remove_quotes(s: str, l: int, t: ParseResults) -> Any: """ Helper parse action for removing quotation marks from parsed quoted strings. @@ -77,7 +90,7 @@ def remove_quotes(s, l, t): return t[0][1:-1] -def with_attribute(*args, **attr_dict): +def with_attribute(*args: tuple[str, str], **attr_dict) -> ParseAction: """ Helper to create a validating parse action to be used with start tags created with :class:`make_xml_tags` or @@ -133,17 +146,17 @@ def with_attribute(*args, **attr_dict): 1 4 0 1 0 1,3 2,3 1,1 """ + attrs_list: list[tuple[str, str]] = [] if args: - attrs = args[:] + attrs_list.extend(args) else: - attrs = attr_dict.items() - attrs = [(k, v) for k, v in attrs] + attrs_list.extend(attr_dict.items()) - def pa(s, l, tokens): - for attrName, attrValue in attrs: + def pa(s: str, l: int, tokens: ParseResults) -> None: + for attrName, attrValue in attrs_list: if attrName not in tokens: raise ParseException(s, l, "no matching attribute " + attrName) - if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: + if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: # type: ignore [attr-defined] raise ParseException( s, l, @@ -156,7 +169,7 @@ def pa(s, l, tokens): with_attribute.ANY_VALUE = object() # type: ignore [attr-defined] -def with_class(classname, namespace=""): +def with_class(classname: str, namespace: str = "") -> ParseAction: """ Simplified version of :class:`with_attribute` when matching on a div class - made difficult because ``class`` is diff --git a/pyparsing/common.py b/pyparsing/common.py index 649aad00..e4651108 100644 --- a/pyparsing/common.py +++ b/pyparsing/common.py @@ -210,14 +210,14 @@ class pyparsing_common: """any numeric expression, returns the corresponding Python type""" fnumber = ( - Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?") + Regex(r"[+-]?\d+\.?\d*(?:[eE][+-]?\d+)?") .set_name("fnumber") .set_parse_action(convert_to_float) ) """any int or real number, returned as float""" ieee_float = ( - Regex(r"(?i)[+-]?((\d+\.?\d*(e[+-]?\d+)?)|nan|inf(inity)?)") + Regex(r"(?i:[+-]?(?:(?:\d+\.?\d*(?:e[+-]?\d+)?)|nan|inf(?:inity)?))") .set_name("ieee_float") .set_parse_action(convert_to_float) ) diff --git a/pyparsing/core.py b/pyparsing/core.py index 4f43c3bf..b884e2d4 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -215,12 +215,7 @@ def _should_enable_warnings( _generatorType = types.GeneratorType ParseImplReturnType = tuple[int, Any] PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] -ParseAction = Union[ - Callable[[], Any], - Callable[[ParseResults], Any], - Callable[[int, ParseResults], Any], - Callable[[str, int, ParseResults], Any], -] + ParseCondition = Union[ Callable[[], bool], Callable[[ParseResults], bool], @@ -486,6 +481,7 @@ def __init__(self, savelist: bool = False): self.callPreparse = True self.callDuringTry = False self.suppress_warnings_: list[Diagnostics] = [] + self.show_in_diagram = True def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: """ @@ -5558,7 +5554,8 @@ def __or__(self, other) -> ParserElement: not in self.suppress_warnings_ ): warnings.warn( - "using '<<' operator with '|' is probably an error, use '<<='", + "warn_on_match_first_with_lshift_operator:" + " using '<<' operator with '|' is probably an error, use '<<='", stacklevel=2, ) ret = super().__or__(other) @@ -5572,7 +5569,8 @@ def __del__(self): and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ ): warnings.warn_explicit( - "Forward defined here but no expression attached later using '<<=' or '<<'", + "warn_on_assignment_to_Forward:" + " Forward defined here but no expression attached later using '<<=' or '<<'", UserWarning, filename=self.caller_frame.filename, lineno=self.caller_frame.lineno, @@ -5600,7 +5598,8 @@ def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: else: stacklevel = 2 warnings.warn( - "Forward expression was never assigned a value, will not parse any input", + "warn_on_parse_using_empty_Forward:" + " Forward expression was never assigned a value, will not parse any input", stacklevel=stacklevel, ) if not ParserElement._left_recursion_enabled: @@ -6157,7 +6156,7 @@ def autoname_elements() -> None: Utility to simplify mass-naming of parser elements, for generating railroad diagram with named subdiagrams. """ - calling_frame = sys._getframe().f_back + calling_frame = sys._getframe(1) if calling_frame is None: return calling_frame = typing.cast(types.FrameType, calling_frame) diff --git a/pyparsing/diagram/__init__.py b/pyparsing/diagram/__init__.py index 7926f2c3..56526b74 100644 --- a/pyparsing/diagram/__init__.py +++ b/pyparsing/diagram/__init__.py @@ -1,6 +1,7 @@ # mypy: ignore-errors from __future__ import annotations +import itertools import railroad import pyparsing import dataclasses @@ -40,7 +41,7 @@ {{ body | safe }} {% for diagram in diagrams %}
-

{{ diagram.title }}

+

{{ diagram.title }}

{{ diagram.text }}
{{ diagram.svg }} @@ -56,8 +57,35 @@ template = Template(jinja2_template_source) +_bookmark_lookup = {} +_bookmark_ids = itertools.count(start=1) + +def _make_bookmark(s: str) -> str: + """ + Converts a string into a valid HTML bookmark (ID or anchor name). + """ + if s in _bookmark_lookup: + return _bookmark_lookup[s] + + # Replace invalid characters with hyphens and ensure only valid characters + bookmark = re.sub(r'[^a-zA-Z0-9-]+', '-', s) + + # Ensure it starts with a letter by adding 'z' if necessary + if not bookmark[:1].isalpha(): + bookmark = f"z{bookmark}" + + # Convert to lowercase and strip hyphens + bookmark = bookmark.lower().strip('-') + + _bookmark_lookup[s] = bookmark = f"{bookmark}-{next(_bookmark_ids):04d}" + + return bookmark + + def _collapse_verbose_regex(regex_str: str) -> str: - collapsed = pyparsing.Regex(r"#.*").suppress().transform_string(regex_str) + if "\n" not in regex_str: + return regex_str + collapsed = pyparsing.Regex(r"#.*$").suppress().transform_string(regex_str) collapsed = re.sub(r"\s*\n\s*", "", collapsed) return collapsed @@ -72,6 +100,11 @@ class NamedDiagram: index: int diagram: railroad.DiagramItem = None + @property + def bookmark(self): + bookmark = _make_bookmark(self.name) + return bookmark + T = TypeVar("T") @@ -99,7 +132,7 @@ class AnnotatedItem(railroad.Group): """ def __init__(self, label: str, item): - super().__init__(item=item, label=f"[{label}]") + super().__init__(item=item, label=f"[{label}]" if label else "") class EditablePartial(Generic[T]): @@ -162,7 +195,11 @@ def railroad_to_html(diagrams: list[NamedDiagram], embed=False, **kwargs) -> str title = diagram.name if diagram.index == 0: title += " (root)" - data.append({"title": title, "text": "", "svg": io.getvalue()}) + data.append( + { + "title": title, "text": "", "svg": io.getvalue(), "bookmark": diagram.bookmark + } + ) return template.render(diagrams=data, embed=embed, **kwargs) @@ -336,6 +373,12 @@ def __delitem__(self, key: int): def __contains__(self, key: int): return key in self._element_diagram_states + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default + def generate_unnamed(self) -> int: """ Generate a number used in the name of an otherwise unnamed diagram @@ -360,7 +403,8 @@ def extract_into_diagram(self, el_id: int): # Replace the original definition of this element with a regular block if position.parent: - ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name) + href = f"#{_make_bookmark(position.name)}" + ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name, href=href) if "item" in position.parent.kwargs: position.parent.kwargs["item"] = ret elif "items" in position.parent.kwargs: @@ -447,7 +491,7 @@ def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]): return [ e for e in exprs - if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs)) + if not isinstance(e, non_diagramming_exprs) ] @@ -461,6 +505,7 @@ def _to_diagram_element( name_hint: str = None, show_results_names: bool = False, show_groups: bool = False, + show_hidden: bool = False, ) -> typing.Optional[EditablePartial]: """ Recursively converts a PyParsing Element to a railroad Element @@ -472,8 +517,9 @@ def _to_diagram_element( do so :param name_hint: If provided, this will override the generated name :param show_results_names: bool flag indicating whether to add annotations for results names - :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed :param show_groups: bool flag indicating whether to show groups using bounding box + :param show_hidden: bool flag indicating whether to show elements that are typically hidden + :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed """ exprs = element.recurse() name = name_hint or element.customName or type(element).__name__ @@ -489,7 +535,7 @@ def _to_diagram_element( element, ( # pyparsing.TokenConverter, - # pyparsing.Forward, + pyparsing.Forward, pyparsing.Located, ), ): @@ -513,25 +559,33 @@ def _to_diagram_element( # If the element isn't worth extracting, we always treat it as the first time we say it if _worth_extracting(element): - if el_id in lookup and lookup[el_id].name is not None: + looked_up = lookup.get(el_id) + if looked_up and looked_up.name is not None: # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate, # so we have to extract it into a new diagram. - looked_up = lookup[el_id] looked_up.mark_for_extraction(el_id, lookup, name=name_hint) - ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name) + href = f"#{_make_bookmark(looked_up.name)}" + ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name, href=href) return ret elif el_id in lookup.diagrams: # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we # just put in a marker element that refers to the sub-diagram + text = lookup.diagrams[el_id].kwargs["name"] ret = EditablePartial.from_call( - railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] + railroad.NonTerminal, text=text, href=f"#{_make_bookmark(text)}" ) return ret # Recursively convert child elements # Here we find the most relevant Railroad element for matching pyparsing Element # We use ``items=[]`` here to hold the place for where the child elements will go once created + + # see if this element is normally hidden, and whether hidden elements are desired + # if not, just return None + if not element.show_in_diagram and not show_hidden: + return None + if isinstance(element, pyparsing.And): # detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat # (all will have the same name, and resultsName) @@ -566,7 +620,9 @@ def _to_diagram_element( if show_groups: ret = EditablePartial.from_call(AnnotatedItem, label="", item="") else: - ret = EditablePartial.from_call(railroad.Sequence, items=[]) + ret = EditablePartial.from_call( + railroad.Group, item=None, label=element_results_name + ) elif isinstance(element, pyparsing.TokenConverter): label = type(element).__name__.lower() if label == "tokenconverter": @@ -607,10 +663,6 @@ def _to_diagram_element( *args, ) ret = EditablePartial.from_call(railroad.ZeroOrMore, item="") - elif isinstance(element, pyparsing.Group): - ret = EditablePartial.from_call( - railroad.Group, item=None, label=element_results_name - ) elif isinstance(element, pyparsing.Empty) and not element.customName: # Skip unnamed "Empty" elements ret = None @@ -619,10 +671,8 @@ def _to_diagram_element( elif len(exprs) > 0 and not element_results_name: ret = EditablePartial.from_call(railroad.Group, item="", label=name) elif isinstance(element, pyparsing.Regex): - patt = _collapse_verbose_regex(element.pattern) - element.pattern = patt - element._defaultName = None - ret = EditablePartial.from_call(railroad.Terminal, element.defaultName) + collapsed_patt = _collapse_verbose_regex(element.pattern) + ret = EditablePartial.from_call(railroad.Terminal, collapsed_patt) elif len(exprs) > 0: ret = EditablePartial.from_call(railroad.Sequence, items=[]) else: @@ -685,8 +735,10 @@ def _to_diagram_element( if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete: lookup.extract_into_diagram(el_id) if ret is not None: + text = lookup.diagrams[el_id].kwargs["name"] + href = f"#{_make_bookmark(text)}" ret = EditablePartial.from_call( - railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] + railroad.NonTerminal, text=text, href=href ) return ret diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index d2bd05f3..f781e871 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -199,7 +199,8 @@ def one_of( and __diag__.warn_on_multiple_string_args_to_oneof ): warnings.warn( - "More than one string argument passed to one_of, pass" + "warn_on_multiple_string_args_to_oneof:" + " More than one string argument passed to one_of, pass" " choices as a list or space-delimited string", stacklevel=2, ) @@ -779,25 +780,27 @@ def parseImpl(self, instring, loc, doActions=True): _FB.__name__ = "FollowedBy>" ret = Forward() + ret.set_name(f"{base_expr.name}_expression") if isinstance(lpar, str): lpar = Suppress(lpar) if isinstance(rpar, str): rpar = Suppress(rpar) + nested_expr = (lpar + ret + rpar).set_name(f"nested_{base_expr.name}") + # if lpar and rpar are not suppressed, wrap in group if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)): - lastExpr = base_expr | Group(lpar + ret + rpar).set_name( - f"nested_{base_expr.name}" - ) + lastExpr = base_expr | Group(nested_expr) else: - lastExpr = base_expr | (lpar + ret + rpar).set_name(f"nested_{base_expr.name}") - root_expr = lastExpr + lastExpr = base_expr | nested_expr arity: int rightLeftAssoc: opAssoc pa: typing.Optional[ParseAction] opExpr1: ParserElement opExpr2: ParserElement + matchExpr: ParserElement + match_lookahead: ParserElement for operDef in op_list: opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] if isinstance(opExpr, str_type): @@ -809,9 +812,9 @@ def parseImpl(self, instring, loc, doActions=True): "if numterms=3, opExpr must be a tuple or list of two expressions" ) opExpr1, opExpr2 = opExpr - term_name = f"{opExpr1}{opExpr2} term" + term_name = f"{opExpr1}{opExpr2} operations" else: - term_name = f"{opExpr} term" + term_name = f"{opExpr} operations" if not 1 <= arity <= 3: raise ValueError("operator must be unary (1), binary (2), or ternary (3)") @@ -821,48 +824,62 @@ def parseImpl(self, instring, loc, doActions=True): thisExpr: ParserElement = Forward().set_name(term_name) thisExpr = typing.cast(Forward, thisExpr) + match_lookahead = And([]) if rightLeftAssoc is OpAssoc.LEFT: if arity == 1: - matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) + match_lookahead = _FB(lastExpr + opExpr) + matchExpr = Group(lastExpr + opExpr[1, ...]) elif arity == 2: if opExpr is not None: - matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( - lastExpr + (opExpr + lastExpr)[1, ...] - ) + match_lookahead = _FB(lastExpr + opExpr + lastExpr) + matchExpr = Group(lastExpr + (opExpr + lastExpr)[1, ...]) else: - matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) + match_lookahead = _FB(lastExpr + lastExpr) + matchExpr = Group(lastExpr[2, ...]) elif arity == 3: - matchExpr = _FB( + match_lookahead = _FB( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr - ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) + ) + matchExpr = Group( + lastExpr + (opExpr1 + lastExpr + opExpr2 + lastExpr)[1, ...] + ) elif rightLeftAssoc is OpAssoc.RIGHT: if arity == 1: # try to avoid LR with this extra test if not isinstance(opExpr, Opt): opExpr = Opt(opExpr) - matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) + match_lookahead = _FB(opExpr.expr + thisExpr) + matchExpr = Group(opExpr + thisExpr) elif arity == 2: if opExpr is not None: - matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( - lastExpr + (opExpr + thisExpr)[1, ...] - ) + match_lookahead = _FB(lastExpr + opExpr + thisExpr) + matchExpr = Group(lastExpr + (opExpr + thisExpr)[1, ...]) else: - matchExpr = _FB(lastExpr + thisExpr) + Group( - lastExpr + thisExpr[1, ...] - ) + match_lookahead = _FB(lastExpr + thisExpr) + matchExpr = Group(lastExpr + thisExpr[1, ...]) elif arity == 3: - matchExpr = _FB( + match_lookahead = _FB( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr - ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + ) + matchExpr = Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + + # suppress lookahead expr from railroad diagrams + match_lookahead.show_in_diagram = False + + # TODO - determine why this statement can't be included in the following + # if pa block + matchExpr = match_lookahead + matchExpr + if pa: if isinstance(pa, (tuple, list)): matchExpr.set_parse_action(*pa) else: matchExpr.set_parse_action(pa) + thisExpr <<= (matchExpr | lastExpr).setName(term_name) lastExpr = thisExpr + ret <<= lastExpr - root_expr.set_name("base_expr") return ret @@ -1009,10 +1026,9 @@ def checkUnindent(s, l, t): return smExpr.set_name("indented block") -# it's easy to get these comment structures wrong - they're very common, so may as well make them available -c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( - "C style comment" -) +# it's easy to get these comment structures wrong - they're very common, +# so may as well make them available +c_style_comment = Regex(r"/\*(?:[^*]|\*(?!/))*\*\/").set_name("C style comment") "Comment of the form ``/* ... */``" html_comment = Regex(r"").set_name("HTML comment") @@ -1022,8 +1038,8 @@ def checkUnindent(s, l, t): dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") "Comment of the form ``// ... (to end of line)``" -cpp_style_comment = Combine( - Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment +cpp_style_comment = Regex( + r"(?:/\*(?:[^*]|\*(?!/))*\*\/)|(?://(?:\\\n|[^\n])*)" ).set_name("C++ style comment") "Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" diff --git a/pyparsing/results.py b/pyparsing/results.py index 24584783..be834b7e 100644 --- a/pyparsing/results.py +++ b/pyparsing/results.py @@ -523,6 +523,7 @@ def as_list(self, *, flatten: bool = False) -> list: result_list = result.as_list() print(type(result_list), result_list) # -> ['sldkj', 'lsdkj', 'sldkj'] """ + def flattened(pr): to_visit = collections.deque([*self]) while to_visit: diff --git a/pyparsing/util.py b/pyparsing/util.py index 1487019c..03a60d4f 100644 --- a/pyparsing/util.py +++ b/pyparsing/util.py @@ -192,6 +192,7 @@ class _GroupConsecutive: (2, iter(['m'])) (3, iter(['p', 'q', 'r', 's'])) """ + def __init__(self): self.prev = 0 self.counter = itertools.count() diff --git a/readthedocs.yaml b/readthedocs.yaml new file mode 100644 index 00000000..38318dbb --- /dev/null +++ b/readthedocs.yaml @@ -0,0 +1,36 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "20" + # rust: "1.70" + # golang: "1.20" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs + # builder: "dirhtml" + builder: "html" + # Fail on all warnings to avoid broken references + # fail_on_warning: true + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +# python: +# install: +# - requirements: docs/requirements.txt diff --git a/tests/test_diagram.py b/tests/test_diagram.py index 441d05e6..d9e331c5 100644 --- a/tests/test_diagram.py +++ b/tests/test_diagram.py @@ -88,8 +88,8 @@ def generate_railroad( def test_example_rr_diags(self): subtests = [ ("jsonObject", jsonObject, 8), - ("boolExpr", boolExpr, 7), - ("simpleSQL", simpleSQL, 22), + ("boolExpr", boolExpr, 6), + ("simpleSQL", simpleSQL, 20), ("calendars", calendars, 13), ] for label, example_expr, expected_rr_len in subtests: @@ -132,9 +132,9 @@ def test_nested_forward_with_inner_name_only(self): outer <<= inner railroad = self.generate_railroad(outer, "inner_only") - assert len(railroad) == 2 + assert len(railroad) == 1 railroad = self.generate_railroad(outer, "inner_only", show_results_names=True) - assert len(railroad) == 2 + assert len(railroad) == 1 def test_each_grammar(self): diff --git a/tests/test_examples.py b/tests/test_examples.py index bb51f054..8ea7c3a7 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -19,6 +19,9 @@ def _run(self, name): def test_numerics(self): self._run("numerics") + def test_parse_python_value(self): + self._run("parse_python_value") + def test_tap(self): self._run("TAP") diff --git a/tests/test_matplotlib_cases.py b/tests/test_matplotlib_cases.py index d2c1bce9..65d4013f 100644 --- a/tests/test_matplotlib_cases.py +++ b/tests/test_matplotlib_cases.py @@ -6,10 +6,10 @@ import pytest -if platform.python_implementation() == "PyPy": - mpl_mathtext = None -else: +try: import matplotlib.mathtext as mpl_mathtext +except ImportError: + mpl_mathtext = None # fmt: off @pytest.mark.parametrize( diff --git a/tests/test_unit.py b/tests/test_unit.py index 62b20b8d..48e7b24f 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -40,6 +40,14 @@ JYTHON_ENV = python_impl == "Jython" PYPY_ENV = python_impl == "PyPy" +# global flags for Python config settings +_config_vars = sysconfig.get_config_vars() +_config_args = set( + shlex.split(_config_vars.get("CONFIG_ARGS", "")) +) +PYTHON_JIT_ENABLED = "--enable-experimental-jit" in _config_args +PYTHON_FREE_THREADED = _config_vars.get("Py_GIL_DISABLED", 0) == 1 + # get full stack traces during testing pp.ParserElement.verbose_stacktrace = True @@ -152,11 +160,15 @@ def runTest(self): pp.__version__, pp.__version_time__, ) - python_jit_enabled = "--enable-experimental-jit" in shlex.split( - sysconfig.get_config_vars().get("CONFIG_ARGS", "") - ) + config_options = [] + if PYTHON_JIT_ENABLED: + config_options.append("JIT enabled") + if PYTHON_FREE_THREADED: + config_options.append("free_threaded") + config_options_str = f" ({','.join(config_options)})" print( - f"Python version {sys.version} {'(JIT enabled)' if python_jit_enabled else ''}" + f"Python version {sys.version}" + f"{config_options_str if config_options else ''}" ) print(f"__version_info__ : {pp.__version_info__}") print(f"__version_info__ repr: {repr(pp.__version_info__)}") @@ -1315,24 +1327,6 @@ def testQuotedStrings(self): f"quoted string escaped quote failure ({[str(s[0]) for s in allStrings]})", ) - print( - "testing catastrophic RE backtracking in implementation of dblQuotedString" - ) - for expr, test_string in [ - (pp.dblQuotedString, '"' + "\\xff" * 500), - (pp.sglQuotedString, "'" + "\\xff" * 500), - (pp.quotedString, '"' + "\\xff" * 500), - (pp.quotedString, "'" + "\\xff" * 500), - (pp.QuotedString('"'), '"' + "\\xff" * 500), - (pp.QuotedString("'"), "'" + "\\xff" * 500), - ]: - with self.subTest(expr=expr, test_string=test_string): - expr.parseString(test_string + test_string[0], parseAll=True) - try: - expr.parseString(test_string, parseAll=True) - except Exception: - continue - # test invalid endQuoteChar with self.subTest(): with self.assertRaises( @@ -1481,13 +1475,13 @@ def testCaselessOneOf(self): "Aa" * 4, "".join(res), "caseless1 CaselessLiteral return failed" ) - def testCommentParser(self): - print("verify processing of C and HTML comments") - testdata = """ + def testCStyleCommentParser(self): + print("verify processing of C-style /* */ comments") + testdata = f""" /* */ /** **/ /**/ - /***/ + /*{'*' * 1_000_000}*/ /****/ /* /*/ /** /*/ @@ -1496,14 +1490,30 @@ def testCommentParser(self): ablsjdflj */ """ - found_lines = [ - pp.lineno(s, testdata) for t, s, e in pp.cStyleComment.scanString(testdata) - ] - self.assertEqual( - list(range(11))[2:], - found_lines, - f"only found C comments on lines {found_lines}", - ) + for test_expr in (pp.c_style_comment, pp.cpp_style_comment, pp.java_style_comment): + with self.subTest("parse test - /* */ comments", test_expr=test_expr): + found_matches = [ + len(t[0]) for t, s, e in test_expr.scanString(testdata) + ] + self.assertEqual( + [5, 7, 4, 1000004, 6, 6, 7, 8, 33], + found_matches, + f"only found {test_expr} lengths {found_matches}", + ) + + found_lines = [ + pp.lineno(s, testdata) for t, s, e in test_expr.scanString(testdata) + ] + self.assertEqual( + [2, 3, 4, 5, 6, 7, 8, 9, 10], + found_lines, + f"only found {test_expr} on lines {found_lines}", + ) + + def testHtmlCommentParser(self): + print("verify processing of HTML comments") + + test_expr = pp.html_comment testdata = """ @@ -1519,27 +1529,88 @@ def testCommentParser(self): ablsjdflj --> """ + found_matches = [ + len(t[0]) for t, s, e in test_expr.scanString(testdata) + ] + self.assertEqual( + [8, 10, 7, 8, 9, 9, 10, 11, 79], + found_matches, + f"only found {test_expr} lengths {found_matches}", + ) + found_lines = [ pp.lineno(s, testdata) for t, s, e in pp.htmlComment.scanString(testdata) ] self.assertEqual( - list(range(11))[2:], + [2, 3, 4, 5, 6, 7, 8, 9, 10], found_lines, f"only found HTML comments on lines {found_lines}", ) + def testDoubleSlashCommentParser(self): + print("verify processing of C++ and Java comments - // comments") + # test C++ single line comments that have line terminated with '\' (should continue comment to following line) - testSource = r""" + testdata = r""" // comment1 // comment2 \ still comment 2 // comment 3 """ - self.assertEqual( - 41, - len(pp.cppStyleComment.searchString(testSource)[1][0]), - r"failed to match single-line comment with '\' at EOL", + for test_expr in (pp.dbl_slash_comment, pp.cpp_style_comment, pp.java_style_comment): + with self.subTest("parse test - // comments", test_expr=test_expr): + found_matches = [ + len(t[0]) for t, s, e in test_expr.scanString(testdata) + ] + self.assertEqual( + [11, 41, 12], + found_matches, + f"only found {test_expr} lengths {found_matches}", + ) + + found_lines = [ + pp.lineno(s, testdata) for t, s, e in test_expr.scanString(testdata) + ] + self.assertEqual( + [2, 3, 5], + found_lines, + f"only found {test_expr} on lines {found_lines}", + ) + + def testReCatastrophicBacktrackingInQuotedStringParsers(self): + # reported by webpentest - 2016-04-28 + print( + "testing catastrophic RE backtracking in implementation of quoted string parsers" ) + for expr, test_string in [ + (pp.dblQuotedString, '"' + "\\xff" * 500), + (pp.sglQuotedString, "'" + "\\xff" * 500), + (pp.quotedString, '"' + "\\xff" * 500), + (pp.quotedString, "'" + "\\xff" * 500), + (pp.QuotedString('"'), '"' + "\\xff" * 500), + (pp.QuotedString("'"), "'" + "\\xff" * 500), + ]: + with self.subTest("Test catastrophic RE backtracking", expr=expr): + try: + expr.parse_string(test_string) + except pp.ParseException: + continue + + def testReCatastrophicBacktrackingInCommentParsers(self): + print( + "testing catastrophic RE backtracking in implementation of comment parsers" + ) + for expr, test_string in [ + (pp.c_style_comment, f"/*{'*' * 500}"), + (pp.cpp_style_comment, f"/*{'*' * 500}"), + (pp.java_style_comment, f"/*{'*' * 500}"), + (pp.html_comment, f"<-- {'-' * 500}") + ]: + with self.subTest("Test catastrophic RE backtracking", expr=expr): + try: + expr.parse_string(test_string) + except pp.ParseException: + continue def testParseExpressionResults(self): a = pp.Word("a", pp.alphas).setName("A") @@ -6310,10 +6381,10 @@ def testSetName(self): a | b | c d | e | f {a | b | c | d | e | f} - Forward: + | - term - + | - term - Forward: ?: term - ?: term + W:(0-9)_expression + + | - operations + W:(0-9)_expression + ?: operations Forward: {a | b | c [{d | e | f : ...}]...} int [, int]... (len) int... @@ -9050,7 +9121,7 @@ def testDelimitedListName(self): ], ) self.assertEqual( - "Forward: + | - term [, Forward: + | - term]...", + "var_expression [, var_expression]...", str(pp.delimitedList(math)), ) @@ -10507,7 +10578,14 @@ def testExpressionDefaultStrings(self): self.assertEqual("(0-9)", repr(expr)) def testEmptyExpressionsAreHandledProperly(self): - from pyparsing.diagram import to_railroad + try: + from pyparsing.diagram import to_railroad + except ModuleNotFoundError as mnfe: + print("Failed 'from pyparsing.diagram import to_railroad'" + f"\n {type(mnfe).__name__}: {mnfe}") + if mnfe.__cause__: + print(f"\n {type(mnfe.__cause__).__name__}: {mnfe.__cause__}") + self.skipTest("Failed 'from pyparsing.diagram import to_railroad'") for cls in (pp.And, pp.Or, pp.MatchFirst, pp.Each): print("testing empty", cls.__name__)