diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c8c79452..9967a08e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,53 +6,43 @@ on: pull_request: paths: - - .github/workflows/cis.yml + - .github/workflows/ci.yml - pyparsing/* - pyproject.toml - tox.ini +permissions: + contents: read + jobs: tests: name: Unit tests runs-on: ${{ matrix.os || 'ubuntu-latest' }} strategy: matrix: + os: ["ubuntu-latest"] + toxenv: [py] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13.0-rc.2"] include: - - python-version: "3.6" - toxenv: py36 - - python-version: "3.7" - toxenv: py37 - - python-version: "3.8" - toxenv: py38 - - python-version: "3.9" - toxenv: py39 - - python-version: "3.10" - toxenv: py310 - - python-version: "3.10" - toxenv: py310 + - python-version: "3.12" os: macos-latest - - python-version: "pypy-3.7" - toxenv: pypy3 + - python-version: "3.11" + toxenv: mypy-check + - python-version: "pypy-3.9" env: - TOXENV: ${{ matrix.toxenv }} + TOXENV: ${{ matrix.toxenv || 'py' }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install tox codecov railroad-diagrams Jinja2 + python -m pip install tox railroad-diagrams Jinja2 - name: Test - run: tox -e ALL - - - name: Upload coverage to Codecov - if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' }} - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - run: codecov + run: tox diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 799985da..b2808c23 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,4 +3,4 @@ repos: rev: stable hooks: - id: black - language_version: python3.6 + language_version: python3.9 diff --git a/CHANGES b/CHANGES index e4b399dc..b98feb11 100644 --- a/CHANGES +++ b/CHANGES @@ -2,17 +2,619 @@ Change Log ========== -Version 3.0.9 - (in development) ---------------- +RELEASE PLANNING NOTES: + +In the pyparsing release 3.3.0, use of many of the pre-PEP8 methods (such as +`ParserElement.parseString`) will start to raise `DeprecationWarnings`. I plan to +completely drop the pre-PEP8 methods in pyparsing 4.0, though we won't see that release +until some time in 2026. So there is plenty of time to convert existing parsers to +the new function names before the old functions are completely removed. (Big help from +Devin J. Pohly in structuring the code to enable this peaceful transition.) + +=========================================================================================== + The version 3.3.0 release will begin emitting `DeprecationWarnings` for pyparsing methods + that have been renamed to PEP8-compliant names (introduced in pyparsing 3.0.0, in August, + 2021, with legacy names retained as aliases). In preparation, I have added in pyparsing + 3.2.2 a utility for finding and replacing the legacy method names with the new names. + This utility is located at `pyparsing/tools/cvt_pep8_names.py`. This script will scan all + Python files specified on the command line, and if the `-u` option is selected, will + replace all occurrences of the old method names with the new PEP8-compliant names, + updating the files in place. + + Here is an example that converts all the files in the pyparsing `/examples` directory: + + python -m pyparsing.tools.cvt_pyparsing_pep8_names -u examples/*.py + + The new names are compatible with pyparsing versions 3.0.0 and later. +=========================================================================================== + + +Required Python versions by pyparsing version +--------------------------------------------- + ++--------------------------------------------------+-------------------+ +| pyparsing version | Required Python | ++==================================================+===================+ +| 3.2.0 - later | 3.9 or later | +| 3.0.8 - 3.1.4 | 3.6.8 or later | +| 3.0.0 - 3.0.7 (these versions are discouraged) | 3.6 or later | +| 2.4.7 | 2.7 or later | +| 1.5.7 | 2.6 - 2.7 | ++--------------------------------------------------+-------------------+ + + +Version 3.2.4 - September, 2025 +------------------------------- +- Barring any catastrophic bugs in this release, this will be the last release in + the 3.2.x line. The next release, 3.3.0, will begin emitting `DeprecationWarnings` + when the pre-PEP8 methods are used (see header notes above for more information, + including available automation for converting any existing code using + pyparsing with the old names). + +- Fixed bug when using a copy of a `Word` expression (either by using the explicit + `copy()` method, or attaching a results name), and setting a new expression name, + a raised `ParseException` still used the original expression name. Also affected + `Regex` expressions with `as_match` or `as_group_list` = True. Reported by + Waqas Ilyas, in Issue #612 - good catch! + +- Fixed type annotation for `replace_with`, to accept `Any` type. Fixes Issue #602, + reported by esquonk. + +- Added locking around potential race condition in `ParserElement.reset_cache`, as + well as other cache-related methods. Fixes Issue #604, reported by CarlosDescalziIM. + +- Substantial update to docstrings and doc generation in preparation for 3.3.0, + great effort by FeRD, thanks! + +- Notable addition by FeRD to convert docstring examples to work with doctest! This + was long overdue, thanks so much! + + +Version 3.2.3 - March, 2025 +--------------------------- +- Fixed bug released in 3.2.2 in which `nested_expr` could overwrite parse actions + for defined content, and could truncate list of items within a nested list. + Fixes Issue #600, reported by hoxbro and luisglft, with helpful diag logs and + repro code. + + +Version 3.2.2 - March, 2025 +--------------------------- +- Released `cvt_pyparsing_pep8_names.py` conversion utility to upgrade pyparsing-based + programs and libraries that use legacy camelCase names to use the new PEP8-compliant + snake_case method names. The converter can also be imported into other scripts as + + from pyparsing.tools.cvt_pyparsing_pep8_names import pep8_converter + +- Fixed bug in `nested_expr` where nested contents were stripped of whitespace when + the default whitespace characters were cleared (raised in this StackOverflow + question https://stackoverflow.com/questions/79327649 by Ben Alan). Also addressed + bug in resolving PEP8 compliant argument name and legacy argument name. + +- Fixed bug in `rest_of_line` and the underlying `Regex` class, in which matching a + pattern that could match an empty string (such as `".*"` or `"[A-Z]*"` would not raise + a `ParseException` at or beyond the end of the input string. This could cause an + infinite parsing loop when parsing `rest_of_line` at the end of the input string. + Reported by user Kylotan, thanks! (Issue #593) + +- Enhancements and extra input validation for `pyparsing.util.make_compressed_re` - see + usage in `examples/complex_chemical_formulas.py` and result in the generated railroad + diagram `examples/complex_chemical_formulas_diagram.html`. Properly escapes characters + like "." and "*" that have special meaning in regular expressions. + +- Fixed bug in `one_of()` to properly escape characters that are regular expression markers + (such as '*', '+', '?', etc.) before building the internal regex. + +- Better exception message for `MatchFirst` and `Or` expressions, showing all alternatives + rather than just the first one. Fixes Issue #592, reported by Focke, thanks! + +- Added return type annotation of "-> None" for all `__init__()` methods, to satisfy + `mypy --strict` type checking. PR submitted by FeRD, thank you! + +- Added optional argument `show_hidden` to `create_diagram` to show + elements that are used internally by pyparsing, but are not part of the actual + parser grammar. For instance, the `Tag` class can insert values into the parsed + results but it does not actually parse any input, so by default it is not included + in a railroad diagram. By calling `create_diagram` with `show_hidden` = `True`, + these internal elements will be included. (You can see this in the tag_metadata.py + script in the examples directory.) + +- Fixed bug in `number_words.py` example. Also added `ebnf_number_words.py` to demonstrate + using the `ebnf.py` EBNF parser generator to build a similar parser directly from + EBNF. + +- Fixed syntax warning raised in `bigquery_view_parser.py`, invalid escape sequence "\s". + Reported by sameer-google, nice catch! (Issue #598) + +- Added support for Python 3.14. + + +Version 3.2.1 - December, 2024 +------------------------------ +- Updated generated railroad diagrams to make non-terminal elements links to their related + sub-diagrams. This _greatly_ improves navigation of the diagram, especially for + large, complex parsers. + +- Simplified railroad diagrams emitted for parsers using `infix_notation`, by hiding + lookahead terms. Renamed internally generated expressions for clarity, and improved + diagramming. + +- Improved performance of `cpp_style_comment`, `c_style_comment`, `common.fnumber` + and `common.ieee_float` Regex expressions. PRs submitted by Gabriel Gerlero, + nice work, thanks! + +- Add missing type annotations to `match_only_at_col`, `replace_with`, `remove_quotes`, + `with_attribute`, and `with_class`. Issue #585 reported by rafrafrek. + +- Added generated diagrams for many of the examples. + +- Replaced old `examples/0README.html` file with `examples/README.md` file. + + +Version 3.2.0 - October, 2024 +------------------------------- +- Discontinued support for Python 3.6, 3.7, and 3.8. Adopted new Python features from + Python versions 3.7-3.9: + - Updated type annotations to use built-in container types instead of names + imported from the `typing` module (e.g., `list[str]` vs `List[str]`). + - Reworked portions of the packrat cache to leverage insertion-preserving ordering + in dicts (including removal of uses of `OrderedDict`). + - Changed `pdb.set_trace()` call in `ParserElement.set_break()` to `breakpoint()`. + - Converted `typing.NamedTuple` to `dataclasses.dataclass` in railroad diagramming + code. + - Added `from __future__ import annotations` to clean up some type annotations. + (with assistance from ISyncWithFoo, issue #535, thanks for the help!) + +- POSSIBLE BREAKING CHANGES + + The following bugfixes may result in subtle changes in the results returned or + exceptions raised by pyparsing. + + - Fixed code in `ParseElementEnhance` subclasses that + replaced detailed exception messages raised in contained expressions with a + less-specific and less-informative generic exception message and location. + + If your code has conditional logic based on the message content in raised + `ParseExceptions`, this bugfix may require changes in your code. + + - Fixed bug in `transform_string()` where whitespace + in the input string was not properly preserved in the output string. + + If your code uses `transform_string`, this bugfix may require changes in + your code. + + - Fixed bug where an `IndexError` raised in a parse action was + incorrectly handled as an `IndexError` raised as part of the `ParserElement` + parsing methods, and reraised as a `ParseException`. Now an `IndexError` + that raises inside a parse action will properly propagate out as an `IndexError`. + (Issue #573, reported by August Karlstedt, thanks!) + + If your code raises `IndexError`s in parse actions, this bugfix may require + changes in your code. + +- FIXES AND NEW FEATURES + + - Added type annotations to remainder of `pyparsing` package, and added `mypy` + run to `tox.ini`, so that type annotations are now run as part of pyparsing's CI. + Addresses Issue #373, raised by Iwan Aucamp, thanks! + + - Exception message format can now be customized, by overriding + `ParseBaseException.format_message`: + + def custom_exception_message(exc) -> str: + found_phrase = f", found {exc.found}" if exc.found else "" + return f"{exc.lineno}:{exc.column} {exc.msg}{found_phrase}" + + ParseBaseException.formatted_message = custom_exception_message + + (PR #571 submitted by Odysseyas Krystalakos, nice work!) + + - `run_tests` now detects if an exception is raised in a parse action, and will + report it with an enhanced error message, with the exception type, string, + and parse action name. + + - `QuotedString` now handles translation of escaped integer, hex, octal, and + Unicode sequences to their corresponding characters. + + - Fixed the displayed output of `Regex` terms to deduplicate repeated backslashes, + for easier reading in debugging, printing, and railroad diagrams. + + - Fixed (or at least reduced) elusive bug when generating railroad diagrams, + where some diagram elements were just empty blocks. Fix submitted by RoDuth, + thanks a ton! + + - Fixed railroad diagrams that get generated with a parser containing a Regex element + defined using a verbose pattern - the pattern gets flattened and comments removed + before creating the corresponding diagram element. + + - Defined a more performant regular expression used internally by `common_html_entity`. + + - `Regex` instances can now be created using a callable that takes no arguments + and just returns a string or a compiled regular expression, so that creating complex + regular expression patterns can be deferred until they are actually used for the first + time in the parser. + + - Added optional `flatten` Boolean argument to `ParseResults.as_list()`, to + return the parsed values in a flattened list. + + - Added `indent` and `base_1` arguments to `pyparsing.testing.with_line_numbers`. When + using `with_line_numbers` inside a parse action, set `base_1`=False, since the + reported `loc` value is 0-based. `indent` can be a leading string (typically of + spaces or tabs) to indent the numbered string passed to `with_line_numbers`. + Added while working on #557, reported by Bernd Wechner. + +- NEW/ENHANCED EXAMPLES + + - Added query syntax to `mongodb_query_expression.py` with: + - better support for array fields ("contains all", + "contains any", and "contains none") + - "like" and "not like" operators to support SQL "%" wildcard matching + and "=~" operator to support regex matching + - text search using "search for" + - dates and datetimes as query values + - `a[0]` style array referencing + + - Added `lox_parser.py` example, a parser for the Lox language used as a tutorial in + Robert Nystrom's "Crafting Interpreters" (http://craftinginterpreters.com/). + With helpful corrections from RoDuth. + + - Added `complex_chemical_formulas.py` example, to add parsing capability for + formulas such as "3(C₆H₅OH)₂". + + - Updated `tag_emitter.py` to use new `Tag` class, introduced in pyparsing + 3.1.3. + + +Version 3.1.4 - August, 2024 +---------------------------- +- Fixed a regression introduced in pyparsing 3.1.3, addition of a type annotation that + referenced `re.Pattern`. Since this type was introduced in Python 3.7, using this type + definition broke Python 3.6 installs of pyparsing 3.1.3. PR submitted by Felix Fontein, + nice work! + + +Version 3.1.3 - August, 2024 +---------------------------- +- Added new `Tag` ParserElement, for inserting metadata into the parsed results. + This allows a parser to add metadata or annotations to the parsed tokens. + The `Tag` element also accepts an optional `value` parameter, defaulting to `True`. + See the new `tag_metadata.py` example in the `examples` directory. + + Example: + + # add tag indicating mood + end_punc = "." | ("!" + Tag("enthusiastic")) + greeting = "Hello" + Word(alphas) + end_punc + + result = greeting.parse_string("Hello World.") + print(result.dump()) + + result = greeting.parse_string("Hello World!") + print(result.dump()) + + prints: + + ['Hello', 'World', '.'] + + ['Hello', 'World', '!'] + - enthusiastic: True + +- Added example `mongodb_query_expression.py`, to convert human-readable infix query + expressions (such as `a==100 and b>=200`) and transform them into the equivalent + query argument for the pymongo package (`{'$and': [{'a': 100}, {'b': {'$gte': 200}}]}`). + Supports many equality and inequality operators - see the docstring for the + `transform_query` function for more examples. + +- Fixed issue where PEP8 compatibility names for `ParserElement` static methods were + not themselves defined as `staticmethods`. When called using a `ParserElement` instance, + this resulted in a `TypeError` exception. Reported by eylenburg (#548). + +- To address a compatibility issue in RDFLib, added a property setter for the + `ParserElement.name` property, to call `ParserElement.set_name`. + +- Modified `ParserElement.set_name()` to accept a None value, to clear the defined + name and corresponding error message for a `ParserElement`. + +- Updated railroad diagram generation for `ZeroOrMore` and `OneOrMore` expressions with + `stop_on` expressions, while investigating #558, reported by user Gu_f. + +- Added `` tag to HTML generated for railroad diagrams to force UTF-8 encoding + with older browsers, to better display Unicode parser characters. + +- Fixed some cosmetics/bugs in railroad diagrams: + - fixed groups being shown even when `show_groups`=False + - show results names as quoted strings when `show_results_names`=True + - only use integer loop counter if repetition > 2 + +- Some type annotations added for parse action related methods, thanks August + Karlstedt (#551). + +- Added exception type to `trace_parse_action` exception output, while investigating + SO question posted by medihack. + +- Added `set_name` calls to internal expressions generated in `infix_notation`, for + improved railroad diagramming. + +- `delta_time`, `lua_parser`, `decaf_parser`, and `roman_numerals` examples cleaned up + to use latest PEP8 names and add minor enhancements. + +- Fixed bug (and corresponding test code) in `delta_time` example that did not handle + weekday references in time expressions (like "Monday at 4pm") when the weekday was + the same as the current weekday. + +- Minor performance speedup in `trim_arity`, to benefit any parsers using parse actions. + +- Added early testing support for Python 3.13 with JIT enabled. + + +Version 3.1.2 - March, 2024 +--------------------------- +- Added `ieee_float` expression to `pyparsing.common`, which parses float values, + plus "NaN", "Inf", "Infinity". PR submitted by Bob Peterson (#538). + +- Updated pep8 synonym wrappers for better type checking compatibility. PR submitted + by Ricardo Coccioli (#507). + +- Fixed empty error message bug, PR submitted by InSync (#534). This _should_ return + pyparsing's exception messages to a former, more helpful form. If you have code that + parses the exception messages returned by pyparsing, this may require some code + changes. + +- Added unit tests to test for exception message contents, with enhancement to + `pyparsing.testing.assertRaisesParseException` to accept an expected exception message. + +- Updated example `select_parser.py` to use PEP8 names and added Groups for better retrieval + of parsed values from multiple SELECT clauses. + +- Added example `email_address_parser.py`, as suggested by John Byrd (#539). + +- Added example `directx_x_file_parser.py` to parse DirectX template definitions, and + generate a Pyparsing parser from a template to parse .x files. + +- Some code refactoring to reduce code nesting, PRs submitted by InSync. + +- All internal string expressions using '%' string interpolation and `str.format()` + converted to f-strings. + + +Version 3.1.1 - July, 2023 +-------------------------- +- Fixed regression in Word(min), reported by Ricardo Coccioli, good catch! (Issue #502) + +- Fixed bug in bad exception messages raised by Forward expressions. PR submitted + by Kyle Sunden, thanks for your patience and collaboration on this (#493). + +- Fixed regression in SkipTo, where ignored expressions were not checked when looking + for the target expression. Reported by catcombo, Issue #500. + +- Fixed type annotation for enable_packrat, PR submitted by Mike Urbach, thanks! (Issue #498) + +- Some general internal code cleanup. (Instigated by Michal Čihař, Issue #488) + + +Version 3.1.0 - June, 2023 +-------------------------- +- Added `tag_emitter.py` to examples. This example demonstrates how to insert + tags into your parsed results that are not part of the original parsed text. + + +Version 3.1.0b2 - May, 2023 +--------------------------- +- Updated `create_diagram()` code to be compatible with railroad-diagrams package + version 3.0. Fixes Issue #477 (railroad diagrams generated with black bars), + reported by Sam Morley-Short. + +- Fixed bug in `NotAny`, where parse actions on the negated expr were not being run. + This could cause `NotAny` to incorrectly fail if the expr would normally match, + but would fail to match if a condition used as a parse action returned False. + Fixes Issue #482, raised by byaka, thank you! + +- Fixed `create_diagram()` to accept keyword args, to be passed through to the + `template.render()` method to generate the output HTML (PR submitted by Aussie Schnore, + good catch!) + +- Fixed bug in `python_quoted_string` regex. + +- Added `examples/bf.py` Brainf*ck parser/executor example. Illustrates using + a pyparsing grammar to parse language syntax, and attach executable AST nodes to + the parsed results. + + +Version 3.1.0b1 - April, 2023 +----------------------------- +- Added support for Python 3.12. + +- API CHANGE: A slight change has been implemented when unquoting a quoted string + parsed using the `QuotedString` class. Formerly, when unquoting and processing + whitespace markers such as \t and \n, these substitutions would occur first, and + then any additional '\' escaping would be done on the resulting string. This would + parse "\\n" as "\". Now escapes and whitespace markers are all processed + in a single pass working left to right, so the quoted string "\\n" would get unquoted + to "\n" (a backslash followed by "n"). Fixes issue #474 raised by jakeanq, + thanks! + +- Added named field "url" to `pyparsing.common.url`, returning the entire + parsed URL string. + +- Fixed bug when parse actions returned an empty string for an expression that + had a results name, that the results name was not saved. That is: + + expr = Literal("X").add_parse_action(lambda tokens: "")("value") + result = expr.parse_string("X") + print(result["value"]) + + would raise a `KeyError`. Now empty strings will be saved with the associated + results name. Raised in Issue #470 by Nicco Kunzmann, thank you. + +- Fixed bug in `SkipTo` where ignore expressions were not properly handled while + scanning for the target expression. Issue #475, reported by elkniwt, thanks + (this bug has been there for a looooong time!). + +- Updated `ci.yml` permissions to limit default access to source - submitted by Joyce + Brum of Google. Thanks so much! + +- Updated the `lucene_grammar.py` example (better support for '*' and '?' wildcards) + and corrected the test cases - brought to my attention by Elijah Nicol, good catch! + + +Version 3.1.0a1 - March, 2023 +----------------------------- +- API ENHANCEMENT: `Optional(expr)` may now be written as `expr | ""` + + This will make this code: + + "{" + Optional(Literal("A") | Literal("a")) + "}" + + writable as: + + "{" + (Literal("A") | Literal("a") | "") + "}" + + Some related changes implemented as part of this work: + - `Literal("")` now internally generates an `Empty()` (and no longer raises an exception) + - `Empty` is now a subclass of `Literal` + + Suggested by Antony Lee (issue #412), PR (#413) by Devin J. Pohly. + +- Added new class property `identifier` to all Unicode set classes in `pyparsing.unicode`, + using the class's values for `cls.identchars` and `cls.identbodychars`. Now Unicode-aware + parsers that formerly wrote: + + ppu = pyparsing.unicode + ident = Word(ppu.Greek.identchars, ppu.Greek.identbodychars) + + can now write: + + ident = ppu.Greek.identifier + # or + # ident = ppu.Ελληνικά.identifier + +- `ParseResults` now has a new method `deepcopy()`, in addition to the current + `copy()` method. `copy()` only makes a shallow copy - any contained `ParseResults` + are copied as references - changes in the copy will be seen as changes in the original. + In many cases, a shallow copy is sufficient, but some applications require a deep copy. + `deepcopy()` makes a deeper copy: any contained `ParseResults` or other mappings or + containers are built with copies from the original, and do not get changed if the + original is later changed. Addresses issue #463, reported by Bryn Pickering. + +- Reworked `delimited_list` function into the new `DelimitedList` class. + `DelimitedList` has the same constructor interface as `delimited_list`, and + in this release, `delimited_list` changes from a function to a synonym for + `DelimitedList`. `delimited_list` and the older `delimitedList` method will be + deprecated in a future release, in favor of `DelimitedList`. + +- Error messages from `MatchFirst` and `Or` expressions will try to give more details + if one of the alternatives matches better than the others, but still fails. + Question raised in Issue #464 by msdemlei, thanks! + +- Added new class method `ParserElement.using_each`, to simplify code + that creates a sequence of `Literals`, `Keywords`, or other `ParserElement` + subclasses. + + For instance, to define suppressible punctuation, you would previously + write: + + LPAR, RPAR, LBRACE, RBRACE, SEMI = map(Suppress, "(){};") + + You can now write: + + LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") + + `using_each` will also accept optional keyword args, which it will + pass through to the class initializer. Here is an expression for + single-letter variable names that might be used in an algebraic + expression: + + algebra_var = MatchFirst( + Char.using_each(string.ascii_lowercase, as_keyword=True) + ) + +- Added new builtin `python_quoted_string`, which will match any form + of single-line or multiline quoted strings defined in Python. (Inspired + by discussion with Andreas Schörgenhumer in Issue #421.) + +- Extended `expr[]` notation for repetition of `expr` to accept a + slice, where the slice's stop value indicates a `stop_on` + expression: + + test = "BEGIN aaa bbb ccc END" + BEGIN, END = Keyword.using_each("BEGIN END".split()) + body_word = Word(alphas) + + expr = BEGIN + Group(body_word[...:END]) + END + # equivalent to + # expr = BEGIN + Group(ZeroOrMore(body_word, stop_on=END)) + END + + print(expr.parse_string(test)) + + Prints: + + ['BEGIN', ['aaa', 'bbb', 'ccc'], 'END'] + +- `ParserElement.validate()` is deprecated. It predates the support for left-recursive + parsers, and was prone to false positives (warning that a grammar was invalid when + it was in fact valid). It will be removed in a future pyparsing release. In its + place, developers should use debugging and analytical tools, such as `ParserElement.set_debug()` + and `ParserElement.create_diagram()`. + (Raised in Issue #444, thanks Andrea Micheli!) + +- Added bool `embed` argument to `ParserElement.create_diagram()`. + When passed as True, the resulting diagram will omit the ``, + ``, and `` tags so that it can be embedded in other + HTML source. (Useful when embedding a call to `create_diagram()` in + a PyScript HTML page.) + +- Added `recurse` argument to `ParserElement.set_debug` to set the + debug flag on an expression and all of its sub-expressions. Requested + by multimeric in Issue #399. + +- Added '·' (Unicode MIDDLE DOT) to the set of Latin1.identbodychars. + +- Fixed bug in `Word` when `max=2`. Also added performance enhancement + when specifying `exact` argument. Reported in issue #409 by + panda-34, nice catch! + +- `Word` arguments are now validated if `min` and `max` are both + given, that `min` <= `max`; raises `ValueError` if values are invalid. + +- Fixed bug in srange, when parsing escaped '/' and '\' inside a + range set. + +- Fixed exception messages for some `ParserElements` with custom names, + which instead showed their contained expression names. + +- Fixed bug in pyparsing.common.url, when input URL is not alone + on an input line. Fixes Issue #459, reported by David Kennedy. + +- Multiple added and corrected type annotations. With much help from + Stephen Rosen, thanks! + +- Some documentation and error message clarifications on pyparsing's + keyword logic, cited by Basil Peace. + +- General docstring cleanup for Sphinx doc generation, PRs submitted + by Devin J. Pohly. A dirty job, but someone has to do it - much + appreciated! + +- `invRegex.py` example renamed to `inv_regex.py` and updated to PEP-8 + variable and method naming. PR submitted by Ross J. Duff, thanks! + +- Removed examples `sparser.py` and `pymicko.py`, since each included its + own GPL license in the header. Since this conflicts with pyparsing's + MIT license, they were removed from the distribution to avoid + confusion among those making use of them in their own projects. + + +Version 3.0.9 - May, 2022 +------------------------- - Added Unicode set `BasicMultilingualPlane` (may also be referenced as `BMP`) representing the Basic Multilingual Plane (Unicode characters up to code point 65535). Can be used to parse most language characters, but omits emojis, wingdings, etc. Raised in discussion with Dave Tapley (issue #392). -- To address mypy confusion of pyparsing.Optional and typing.Optional +- To address mypy confusion of `pyparsing.Optional` and `typing.Optional` resulting in `error: "_SpecialForm" not callable` message - reported in issue #365, fixed the import in exceptions.py. Nice + reported in issue #365, fixed the import in `exceptions.py`. Nice sleuthing by Iwan Aucamp and Dominic Davis-Foster, thank you! (Removed definitions of `OptionalType`, `DictType`, and `IterableType` and replaced them with `typing.Optional`, `typing.Dict`, and @@ -24,13 +626,13 @@ Version 3.0.9 - (in development) - Removed use of deprecated `pkg_resources` package in railroad diagramming code (issue #391). -- Updated bigquery_view_parser.py example to parse examples at +- Updated `bigquery_view_parser.py` example to parse examples at https://cloud.google.com/bigquery/docs/reference/legacy-sql -Version 3.0.8 - ---------------- -- API CHANGE: modified pyproject.toml to require Python version +Version 3.0.8 - April, 2022 +--------------------------- +- API CHANGE: modified `pyproject.toml` to require Python version 3.6.8 or later for pyparsing 3.x. Earlier minor versions of 3.6 fail in evaluating the `version_info` class (implemented using `typing.NamedTuple`). If you are using an earlier version of Python @@ -39,7 +641,7 @@ Version 3.0.8 - - Improved pyparsing import time by deferring regex pattern compiles. PR submitted by Anthony Sottile to fix issue #362, thanks! -- Updated build to use flit, PR by Michał Górny, added BUILDING.md +- Updated build to use flit, PR by Michał Górny, added `BUILDING.md` doc and removed old Windows build scripts - nice cleanup work! - More type-hinting added for all arithmetic and logical operator @@ -65,8 +667,8 @@ Version 3.0.8 - Serhiy Storchaka, thank you. -Version 3.0.7 - ---------------- +Version 3.0.7 - January, 2022 +----------------------------- - Fixed bug #345, in which delimitedList changed expressions in place using `expr.streamline()`. Reported by Kim Gräsman, thanks! @@ -130,8 +732,8 @@ Version 3.0.7 - - Additional type annotations on public methods. -Version 3.0.6 - ---------------- +Version 3.0.6 - November, 2021 +------------------------------ - Added `suppress_warning()` method to individually suppress a warning on a specific ParserElement. Used to refactor `original_text_for` to preserve internal results names, which, while undocumented, had been adopted by @@ -141,8 +743,8 @@ Version 3.0.6 - parse expression. -Version 3.0.5 - ---------------- +Version 3.0.5 - November, 2021 +------------------------------ - Added return type annotations for `col`, `line`, and `lineno`. - Fixed bug when `warn_ungrouped_named_tokens_in_collection` warning was raised @@ -157,8 +759,8 @@ Version 3.0.5 - minor bug where separating line was not included after a test failure. -Version 3.0.4 - ---------------- +Version 3.0.4 - October, 2021 +----------------------------- - Fixed bug in which `Dict` classes did not correctly return tokens as nested `ParseResults`, reported by and fix identified by Bu Sun Kim, many thanks!!! @@ -186,8 +788,8 @@ Version 3.0.4 - elements. -Version 3.0.3 - ---------------- +Version 3.0.3 - October, 2021 +----------------------------- - Fixed regex typo in `one_of` fix for `as_keyword=True`. - Fixed a whitespace-skipping bug, Issue #319, introduced as part of the revert @@ -198,8 +800,8 @@ Version 3.0.3 - are longer than others. -Version 3.0.2 - ---------------- +Version 3.0.2 - October, 2021 +----------------------------- - Reverted change in behavior with `LineStart` and `StringStart`, which changed the interpretation of when and how `LineStart` and `StringStart` should match when a line starts with spaces. In 3.0.0, the `xxxStart` expressions were not @@ -233,8 +835,8 @@ Version 3.0.2 - the `IndentedBlock` with `grouped=False`. -Version 3.0.1 - ---------------- +Version 3.0.1 - October, 2021 +----------------------------- - Fixed bug where `Word(max=n)` did not match word groups less than length 'n'. Thanks to Joachim Metz for catching this! @@ -245,15 +847,15 @@ Version 3.0.1 - even when not enabled. -Version 3.0.0 - ---------------- +Version 3.0.0 - October, 2021 +----------------------------- - A consolidated list of all the changes in the 3.0.0 release can be found in `docs/whats_new_in_3_0_0.rst`. (https://github.com/pyparsing/pyparsing/blob/master/docs/whats_new_in_3_0_0.rst) -Version 3.0.0.final - ---------------------- +Version 3.0.0.final - October, 2021 +----------------------------------- - Added support for python `-W` warning option to call `enable_all_warnings`() at startup. Also detects setting of `PYPARSINGENABLEALLWARNINGS` environment variable to any non-blank value. (If using `-Wd` for testing, but wishing to disable pyparsing warnings, add @@ -290,8 +892,8 @@ Version 3.0.0.final - `a` will get named "a", while `b` will keep its name "bbb". -Version 3.0.0rc2 - ------------------- +Version 3.0.0rc2 - October, 2021 +-------------------------------- - Added `url` expression to `pyparsing_common`. (Sample code posted by Wolfgang Fahl, very nice!) @@ -904,8 +1506,8 @@ Version 3.0.0a1 - April, 2020 a few. -Version 2.4.7 - March, 2020 (April, actually) ---------------------------------------------- +Version 2.4.7 - April, 2020 +--------------------------- - Backport of selected fixes from 3.0.0 work: . Each bug with Regex expressions . And expressions not properly constructing with generator diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2fd54094..d3d44030 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# CONTRIBUTING +# Contributing to Pyparsing Thank you for your interest in working on pyparsing! Pyparsing has become a popular module for creating simple text parsing and data scraping applications. It has been incorporated in several widely-used packages, and is @@ -34,6 +34,53 @@ If you have a question on using pyparsing, there are a number of resources avail other open and closed issues. Or post your question on SO or reddit. But don't wait until you are desperate and frustrated - just ask! :) +## Submitting examples + +If you have an example you wish to submit, please follow these guidelines. + +- **License - Submitted example code must be available for distribution with the rest of pyparsing under the MIT + open source license.** + +- Please follow PEP8 name and coding guidelines, and use the black formatter + to auto-format code. + +- Examples should import pyparsing and the common namespace classes as: + + ```python + import pyparsing as pp + # if necessary + ppc = pp.pyparsing_common + ppu = pp.pyparsing_unicode + ``` + +- Submitted examples _must_ be Python 3.6.8 or later compatible. + (It is acceptable if examples use Python features added after 3.6) + +- Where possible use operators to create composite parse expressions: + + ```python + expr = expr_a + expr_b | expr_c + ``` + + instead of: + + ```python + expr = pp.MatchFirst([pp.And([expr_a, expr_b]), expr_c]) + ``` + + Exception: if using a generator to create an expression: + + ```python + import keyword + python_keywords = keyword.kwlist + any_keyword = pp.MatchFirst(pp.Keyword(kw) + for kw in python_keywords)) + ``` + +- Learn [Common Pitfalls When Writing Parsers][pitfalls] and + how to avoid them when developing new examples. + +- See additional notes under [Some coding points](#some-coding-points). ## Submitting changes @@ -49,11 +96,11 @@ intended on prior versions of Python (currently back to Python 3.6.8). ## Some design points -- Minimize additions to the module namespace. Over time, pyparsing's namespace has acquired a *lot* of names. +- Minimize additions to the module namespace. Over time, pyparsing's namespace has acquired a _lot_ of names. New features have been encapsulated into namespace classes to try to hold back the name flooding when importing pyparsing. -- New operator overloads for ParserElement will need to show broad applicability, and should be related to +- New operator overloads for ParserElement will need to show broad applicability, and should be related to parser construction. - Performance tuning should focus on parse time performance. Optimizing parser definition performance is secondary. @@ -69,59 +116,87 @@ These coding styles are encouraged whether submitting code for core pyparsing or name casing. I had just finished several years of Java and Smalltalk development, and camel case seemed to be the future trend in coding styles. As of version 3.0.0, pyparsing is moving over to PEP8 naming, while maintaining compatibility with existing parser code by defining synonyms using the legacy names. These names will be - retained until a future release (probably 4.0), to provide a migration path for current pyparsing-dependent + retained until a future release (probably 4.0), to provide a migration path for current pyparsing-dependent applications - DO NOT MODIFY OR REMOVE THESE NAMES. See more information at the [PEP8 wiki page](https://github.com/pyparsing/pyparsing/wiki/PEP-8-planning). - If you wish to submit a new example, please follow PEP8 name and coding guidelines, and use the black formatter - to auto-format code. Example code must be available for distribution with the rest of pyparsing under the MIT - open source license. - - No backslashes for line continuations. - Continuation lines for expressions in ()'s should start with the continuing operator: + Continuation lines for expressions in `()`'s should start with the continuing operator: - really_long_line = (something - + some_other_long_thing - + even_another_long_thing) + ```python + really_long_line = (something + + some_other_long_thing + + even_another_long_thing) + ``` - Maximum line length is 120 characters. (Black will override this.) - Changes to core pyparsing must be compatible back to Py3.6 without conditionalizing. Later Py3 features may be used in examples by way of illustration. -- str.format() statements should use named format arguments (unless this proves to be a slowdown at parse time). +- `str.format()` statements should use named format arguments (unless this proves to be a slowdown at parse time). - List, tuple, and dict literals should include a trailing comma after the last element, which reduces changeset clutter when another element gets added to the end. -- Examples should import pyparsing and the common namespace classes as: - - import pyparsing as pp - # if necessary - ppc = pp.pyparsing_common - ppu = pp.pyparsing_unicode - - Submitted examples *must* be Python 3.6.8 or later compatible. - -- Where possible use operators to create composite parse expressions: - - expr = expr_a + expr_b | expr_c - - instead of: - - expr = pp.MatchFirst([pp.And([expr_a, expr_b]), expr_c]) - - Exception: if using a generator to create an expression: - - import keyword - python_keywords = keyword.kwlist - any_keyword = pp.MatchFirst(pp.Keyword(kw) - for kw in python_keywords)) +- New features should be accompanied by updates to `unitTests.py` and a bullet in the CHANGES file. -- Learn [Common Pitfalls When Writing Parsers](https://github.com/pyparsing/pyparsing/wiki/Common-Pitfalls-When-Writing-Parsers) and - how to avoid them when developing new examples. - -- New features should be accompanied by updates to unitTests.py and a bullet in the CHANGES file. - -- Do not modify pyparsing_archive.py. This file is kept as a reference artifact from when pyparsing was distributed +- Do not modify `pyparsing_archive.py`. This file is kept as a reference artifact from when pyparsing was distributed as a single source file. + +## Some documentation points + +- The docstrings in pyparsing (which are generated into the package's + API documentation by Sphinx) make heavy use of doctests for their + example code. This allows examples to be tested and verified as + working, and ensures that any changes to the code which affect + output are accompanied by corresponding changes in the examples. + +- The codebase's docstring tests can be verified by running the + command `make doctest` from the `docs/` directory. The output + should ideally look something like this: + + ```console + $ make doctest + [...documentation build...] + running tests... + + Document: pyparsing + ------------------- + 1 item passed all tests: + 204 tests in default + 204 tests in 1 item. + 204 passed. + Test passed. + + Document: whats_new_in_3_1 + -------------------------- + 1 item passed all tests: + 15 tests in default + 15 tests in 1 item. + 15 passed. + Test passed. + + Doctest summary + =============== + 219 tests + 0 failures in tests + 0 failures in setup code + 0 failures in cleanup code + ``` + + Any failed tests will be displayed in detail. + +- Much more information about doctests can be found in the + [Pyparsing documentation][pyparsing-docs], in the chapter titled + "Writing doctest examples". Even if you have never worked with them + before, it should guide you through everything you need to know in + order to write Pyparsing doctest examples. If you are already familiar + with doctests and with `sphinx.ext.doctest` in general, you may wish + to skip over the introductory content and go straight to the section + on "Doctests in Pyparsing" which covers some issues specific to the + project. + + +[pitfalls]: https://github.com/pyparsing/pyparsing/wiki/Common-Pitfalls-When-Writing-Parsers +[pyparsing-docs]: https://pyparsing-docs.readthedocs.io/en/latest/ diff --git a/README.rst b/README.rst index f51c9ddd..cfb9889f 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ PyParsing -- A Python Parsing Module ==================================== -|Build Status| |Coverage| +|Version| |Build Status| |Coverage| |License| |Python Versions| |Snyk Score| Introduction ============ @@ -26,7 +26,7 @@ Here is a program to parse ``"Hello, World!"`` (or any greeting of the form from pyparsing import Word, alphas greet = Word(alphas) + "," + Word(alphas) + "!" hello = "Hello, World!" - print(hello, "->", greet.parseString(hello)) + print(hello, "->", greet.parse_string(hello)) The program outputs the following:: @@ -36,7 +36,7 @@ The Python representation of the grammar is quite readable, owing to the self-explanatory class names, and the use of '+', '|' and '^' operator definitions. -The parsed results returned from ``parseString()`` is a collection of type +The parsed results returned from ``parse_string()`` is a collection of type ``ParseResults``, which can be accessed as a nested list, a dictionary, or an object with named attributes. @@ -63,7 +63,7 @@ entire directory of examples can be found `here `__ file. +MIT License. See header of the `pyparsing __init__.py `__ file. History ======= @@ -72,5 +72,22 @@ See `CHANGES `__ fil .. |Build Status| image:: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml/badge.svg :target: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml + .. |Coverage| image:: https://codecov.io/gh/pyparsing/pyparsing/branch/master/graph/badge.svg :target: https://codecov.io/gh/pyparsing/pyparsing + +.. |Version| image:: https://img.shields.io/pypi/v/pyparsing?style=flat-square + :target: https://pypi.org/project/pyparsing/ + :alt: Version + +.. |License| image:: https://img.shields.io/pypi/l/pyparsing.svg?style=flat-square + :target: https://pypi.org/project/pyparsing/ + :alt: License + +.. |Python Versions| image:: https://img.shields.io/pypi/pyversions/pyparsing.svg?style=flat-square + :target: https://pypi.org/project/python-liquid/ + :alt: Python versions + +.. |Snyk Score| image:: https://snyk.io//advisor/python/pyparsing/badge.svg + :target: https://snyk.io//advisor/python/pyparsing + :alt: pyparsing diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 120000 index 00000000..44fcc634 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1 @@ +../CONTRIBUTING.md \ No newline at end of file diff --git a/docs/HowToUsePyparsing.rst b/docs/HowToUsePyparsing.rst index fb28b7d9..cea9241d 100644 --- a/docs/HowToUsePyparsing.rst +++ b/docs/HowToUsePyparsing.rst @@ -5,10 +5,10 @@ Using the pyparsing module :author: Paul McGuire :address: ptmcg.pm+pyparsing@gmail.com -:revision: 3.0.0 -:date: October, 2021 +:revision: 3.2.0 +:date: October, 2024 -:copyright: Copyright |copy| 2003-2022 Paul McGuire. +:copyright: Copyright |copy| 2003-2024 Paul McGuire. .. |copy| unicode:: 0xA9 @@ -19,8 +19,6 @@ Using the pyparsing module expressions, processing custom application language commands, or extracting data from formatted reports. -.. sectnum:: :depth: 4 - .. contents:: :depth: 4 Note: While this content is still valid, there are more detailed @@ -36,15 +34,15 @@ directory of the pyparsing GitHub repo. **Note**: *In pyparsing 3.0, many method and function names which were originally written using camelCase have been converted to PEP8-compatible snake_case. So ``parseString()`` is being renamed to ``parse_string()``, -``delimitedList`` to ``delimited_list``, and so on. You may see the old +``delimitedList`` to DelimitedList_, and so on. You may see the old names in legacy parsers, and they will be supported for a time with synonyms, but the synonyms will be removed in a future release.* *If you are using this documentation, but working with a 2.4.x version of pyparsing, you'll need to convert methods and arguments from the documented snake_case -names to the legacy camelCase names. In pyparsing 3.0.x, both forms are +names to the legacy camelCase names. In pyparsing 3.x, both forms are supported, but the legacy forms are deprecated; they will be dropped in a -future release.* +future 4.0 release.* ----------- @@ -58,8 +56,8 @@ To parse an incoming data string, the client code must follow these steps: this to a program variable. Optional results names or parse actions can also be defined at this time. -2. Call ``parse_string()`` or ``scan_string()`` on this variable, passing in - the string to +2. Call ``parse_string()``, ``scan_string()``, or ``search_string()`` + on this variable, passing in the string to be parsed. During the matching process, whitespace between tokens is skipped by default (although this can be changed). When token matches occur, any defined parse action methods are @@ -182,7 +180,7 @@ Usage notes - ``expr[... ,n]`` is equivalent to ``expr*(0, n)`` (read as "0 to n instances of expr") - - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` + - ``expr[...]``, ``expr[0, ...]`` and ``expr * ...`` are equivalent to ``ZeroOrMore(expr)`` - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` @@ -192,6 +190,11 @@ Usage notes occurrences. If this behavior is desired, then write ``expr[..., n] + ~expr``. +- ``[]`` notation will also accept a stop expression using ':' slice + notation: + + - ``expr[...:end_expr]`` is equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` + - MatchFirst_ expressions are matched left-to-right, and the first match found will skip all later expressions within, so be sure to define less-specific patterns after more-specific patterns. @@ -229,7 +232,7 @@ Usage notes - Punctuation may be significant for matching, but is rarely of much interest in the parsed results. Use the ``suppress()`` method to keep these tokens from cluttering up your returned lists of - tokens. For example, ``delimited_list()`` matches a succession of + tokens. For example, DelimitedList_ matches a succession of one or more expressions, separated by delimiters (commas by default), but only returns a list of the actual expressions - the delimiters are used for parsing, but are suppressed from the @@ -263,6 +266,9 @@ Usage notes Classes ======= +All the pyparsing classes can be found in this +`UML class diagram <_static/pyparsingClassDiagram_3.0.9.jpg>`_. + Classes in the pyparsing module ------------------------------- @@ -348,10 +354,12 @@ methods for code to use are: ^ FAIL: Expected numeric digits, found end of text (at char 4), (line:1, col:5) +.. _set_results_name: + - ``set_results_name(string, list_all_matches=False)`` - name to be given to tokens matching the element; if multiple tokens within - a repetition group (such as ``ZeroOrMore`` or ``delimited_list``) the + a repetition group (such as ZeroOrMore_ or DelimitedList_) the default is to return only the last matching token - if ``list_all_matches`` is set to True, then a list of all the matching tokens is returned. @@ -364,6 +372,14 @@ methods for code to use are: basic element can be referenced multiple times and given different names within a complex grammar. +.. _using_each: + +- ``using_each(list_of_symbols)`` a short-cut for defining a number of + symbols of a particular ``ParserElement`` subclass:: + + LBRACK, RBRACK, LBRACE, RBRACE, LPAR, RPAR = Suppress.using_each("[]{}()") + AND, OR, NOT = Keyword.using_each("and or not".split()) + .. _set_parse_action: - ``set_parse_action(*fn)`` - specify one or more functions to call after successful @@ -402,7 +418,7 @@ methods for code to use are: A nice short-cut for calling ``set_parse_action`` is to use it as a decorator:: - identifier = Word(alphas, alphanums+"_") + identifier = Word(alphas, alphanums + "_") @identifier.set_parse_action def resolve_identifier(results: ParseResults): @@ -449,11 +465,15 @@ methods for code to use are: repeatedly to specify multiple expressions; useful to specify patterns of comment syntax, for example -- ``set_debug(debug_flag=True)`` - function to enable/disable tracing output +- ``set_debug(flag=True)`` - function to enable/disable tracing output when trying to match this element - ``validate()`` - function to verify that the defined grammar does not - contain infinitely recursive constructs + contain infinitely recursive constructs. + + *(``validate()`` is deprecated, and + will be removed in a future pyparsing release. Pyparsing now supports + left-recursive parsers, which this function attempted to catch.)* .. _parse_with_tabs: @@ -495,7 +515,7 @@ Basic ParserElement subclasses defined keyword - ``CaselessKeyword`` - similar to Keyword_, but with caseless matching - behavior + behavior as described in CaselessLiteral_. .. _Word: @@ -535,16 +555,21 @@ Basic ParserElement subclasses - ``max`` - indicating a maximum length of matching characters - - ``exact`` - indicating an exact length of matching characters + - ``exact`` - indicating an exact length of matching characters; + if ``exact`` is specified, it will override any values for ``min`` or ``max`` - If ``exact`` is specified, it will override any values for ``min`` or ``max``. + - ``as_keyword`` - indicating that preceding and following characters must + be whitespace or non-keyword characters - Sometimes you want to define a word using all - characters in a range except for one or two of them; you can do this - with the new ``exclude_chars`` argument. This is helpful if you want to define - a word with all ``printables`` except for a single delimiter character, such - as '.'. Previously, you would have to create a custom string to pass to Word. - With this change, you can just create ``Word(printables, exclude_chars='.')``. + - ``exclude_chars`` - a string of characters that should be excluded from + init_chars and body_chars + + Sometimes you want to define a word using all + characters in a range except for one or two of them; you can do this + with the ``exclude_chars`` argument. This is helpful if you want to define + a word with all ``printables`` except for a single delimiter character, such + as '.'. Previously, you would have to create a custom string to pass to Word. + With this change, you can just create ``Word(printables, exclude_chars='.')``. - ``Char`` - a convenience form of ``Word`` that will match just a single character from a string of matching characters:: @@ -596,7 +621,7 @@ Basic ParserElement subclasses ``SkipTo`` can also be written using ``...``:: - LBRACE, RBRACE = map(Literal, "{}") + LBRACE, RBRACE = Literal.using_each("{}") brace_expr = LBRACE + SkipTo(RBRACE) + RBRACE # can also be written as @@ -675,17 +700,37 @@ Expression subclasses parse element is not found in the input string; parse action will only be called if a match is found, or if a default is specified. + An optional element ``expr`` can also be expressed using ``expr | ""``. + (``Opt`` was formerly named ``Optional``, but since the standard Python library module ``typing`` now defines ``Optional``, the pyparsing class has been renamed to ``Opt``. A compatibility synonym ``Optional`` is defined, but will be removed in a future release.) +.. _ZeroOrMore: + - ``ZeroOrMore`` - similar to ``Opt``, but can be repeated; ``ZeroOrMore(expr)`` can also be written as ``expr[...]``. -- ``OneOrMore`` - similar to ``ZeroOrMore``, but at least one match must +.. _OneOrMore: + +- ``OneOrMore`` - similar to ZeroOrMore_, but at least one match must be present; ``OneOrMore(expr)`` can also be written as ``expr[1, ...]``. +.. _DelimitedList: + +- ``DelimitedList`` - used for + matching one or more occurrences of ``expr``, separated by ``delim``. + By default, the delimiters are suppressed, so the returned results contain + only the separate list elements. Can optionally specify ``combine=True``, + indicating that the expressions and delimiters should be returned as one + combined value (useful for scoped variables, such as ``"a.b.c"``, or + ``"a::b::c"``, or paths such as ``"a/b/c"``). Can also optionally specify ``min` and ``max`` + restrictions on the length of the list, and + ``allow_trailing_delim`` to accept a trailing delimiter at the end of the list. + +.. _FollowedBy: + - ``FollowedBy`` - a lookahead expression, requires matching of the given expressions, but does not advance the parsing position within the input string @@ -734,6 +779,8 @@ Expression operators equivalent to ``OneOrMore(expr)``, and ``expr[..., 3]`` is equivalent to "up to 3 instances of ``expr``". +- ``[:stop_on]`` - specifies a stopping expression for the current repetition (may be combined + with ``...`` or ``min, max``), as in ``Keyword("start") + Word(alphas)[...:Keyword("end")] + Keyword("end")`` Positional subclasses --------------------- @@ -769,7 +816,7 @@ Special subclasses ------------------ - ``Group`` - causes the matched tokens to be enclosed in a list; - useful in repeated elements like ``ZeroOrMore`` and ``OneOrMore`` to + useful in repeated elements like ZeroOrMore_ and OneOrMore_ to break up matched tokens into groups for each repeated pattern - ``Dict`` - like ``Group``, but also constructs a dictionary, using the @@ -781,6 +828,10 @@ Special subclasses program, insert it into the ``Forward`` object using the ``<<=`` operator (see fourFn.py_ for an example). +- ``Tag`` - a non-parsing token that always matches, and inserts + a tag and value into the current parsed tokens; useful for adding + metadata or annotations to parsed results (see `examples/tag_example.py <../examples/tag_example.py>`_). + Other classes ------------- @@ -799,7 +850,7 @@ Other classes - elements can be deleted using ``del`` - - the ``-1``th element can be extracted and removed in a single operation + - the last element can be extracted and removed in a single operation using ``pop()``, or any element can be extracted and removed using ``pop(n)`` @@ -813,7 +864,8 @@ Other classes ['abc', ['100', '200', '300'], 'end'] If the ``Group`` is constructed using ``aslist=True``, the resulting tokens - will be a Python list instead of a ParseResults_. + will be a Python list instead of a ParseResults_. In this case, the returned value will + no longer support the extended features or methods of a ParseResults_. - as a dictionary @@ -825,8 +877,9 @@ Other classes input text - in addition to ParseResults_ listed as ``[ [ a1, b1, c1, ...], [ a2, b2, c2, ...] ]`` it also acts as a dictionary with entries defined as ``{ a1 : [ b1, c1, ... ] }, { a2 : [ b2, c2, ... ] }``; this is especially useful when processing tabular data where the first column contains a key - value for that line of data; when constructed with ``aslist=True``, will - return an actual Python ``dict`` instead of a ParseResults_. + value for that line of data; when constructed with ``asdict=True``, will + return an actual Python ``dict`` instead of a ParseResults_. In this case, the returned value will + no longer support the extended features or methods of a ParseResults_. - list elements that are deleted using ``del`` will still be accessible by their dictionary keys @@ -858,6 +911,10 @@ Other classes (The ``pprint`` module is especially good at printing out the nested contents given by ``as_list()``.) + If a ParseResults_ is built with expressions that use results names (see _set_results_name) or + using the ``Dict`` class, then those names and values can be extracted as a Python + dict using ``as_dict()``. + Finally, ParseResults_ can be viewed by calling ``dump()``. ``dump()`` will first show the ``as_list()`` output, followed by an indented structure listing parsed tokens that have been assigned results names. @@ -939,7 +996,7 @@ Exception classes and Troubleshooting expr = pp.Word(pp.alphanums).set_name("word").set_debug() print(ppt.with_line_numbers(data)) - expr[...].parseString(data) + expr[...].parse_string(data) prints:: @@ -1009,15 +1066,6 @@ Miscellaneous attributes and methods Helper methods -------------- -- ``delimited_list(expr, delim=',')`` - convenience function for - matching one or more occurrences of expr, separated by delim. - By default, the delimiters are suppressed, so the returned results contain - only the separate list elements. Can optionally specify ``combine=True``, - indicating that the expressions and delimiters should be returned as one - combined value (useful for scoped variables, such as ``"a.b.c"``, or - ``"a::b::c"``, or paths such as ``"a/b/c"``). Can also optionally specify - ``allow_trailing_delim`` to accept a trailing delimiter at the end of the list. - - ``counted_array(expr)`` - convenience function for a pattern where an list of instances of the given expression are preceded by an integer giving the count of elements in the list. Returns an expression that parses the leading integer, @@ -1079,6 +1127,22 @@ Helper methods this expression to parse input strings, or incorporate it into a larger, more complex grammar. + Here is an ``infix_notation`` definition for 4-function arithmetic, + taking numbers or variables as operands. The order of definition of + the operators follows the standard precedence of operations for + arithmetic:: + + number = pp.common.number() + variable = pp.common.identifier() + arithmetic_expression = pp.infix_notation( + integer | variable, + [ + ("-", 1, pp.OpAssoc.RIGHT), + (pp.one_of("* /"), 2, pp.OpAssoc.LEFT), + (pp.one_of("+ -"), 2, pp.OpAssoc.LEFT), + ] + ) + ``infix_notation`` also supports optional arguments ``lpar`` and ``rpar``, to parse groups with symbols other than "(" and ")". They may be passed as strings (in which case they will be converted to ``Suppress`` objects, and suppressed from @@ -1089,7 +1153,7 @@ Helper methods expr = infix_notation(int_expr, [ - (one_of("+ -"), 2, opAssoc.LEFT), + (one_of("+ -"), 2, OpAssoc.LEFT), ], lpar="<", rpar=">" @@ -1104,7 +1168,7 @@ Helper methods expr = infix_notation(int_expr, [ - (one_of("+ -"), 2, opAssoc.LEFT), + (one_of("+ -"), 2, OpAssoc.LEFT), ], lpar=Literal("<"), rpar=Literal(">") @@ -1247,9 +1311,9 @@ Helper parse actions ``ParseException`` if matching at a different column number; useful when parsing tabular data -- ``common.convert_to_integer()`` - converts all matched tokens to uppercase +- ``common.convert_to_integer()`` - converts all matched tokens to int -- ``common.convert_to_float()`` - converts all matched tokens to uppercase +- ``common.convert_to_float()`` - converts all matched tokens to float - ``common.convert_to_date()`` - converts matched token to a datetime.date @@ -1265,7 +1329,7 @@ Helper parse actions Common string and token constants --------------------------------- -- ``alphas`` - same as ``string.letters`` +- ``alphas`` - same as ``string.ascii_letters`` - ``nums`` - same as ``string.digits`` @@ -1275,6 +1339,19 @@ Common string and token constants ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ +.. _identchars: + +- ``identchars`` - a string containing characters that are valid as initial identifier characters:: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzª + µºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ + +- ``identbodychars`` - a string containing characters that are valid as identifier body characters (those following a + valid leading identifier character as given in identchars_):: + + 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzª + µ·ºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ + - ``printables`` - same as ``string.printable``, minus the space (``' '``) character - ``empty`` - a global ``Empty()``; will always match @@ -1287,13 +1364,15 @@ Common string and token constants - ``quoted_string`` - ``sgl_quoted_string | dbl_quoted_string`` +- ``python_quoted_string`` - ``quoted_string | multiline quoted string`` + - ``c_style_comment`` - a comment block delimited by ``'/*'`` and ``'*/'`` sequences; can span multiple lines, but does not support nesting of comments - ``html_comment`` - a comment block delimited by ``''`` sequences; can span multiple lines, but does not support nesting of comments -- ``comma_separated_list`` - similar to ``delimited_list``, except that the +- ``comma_separated_list`` - similar to DelimitedList_, except that the list expressions can be any text value, or a quoted string; quoted strings can safely include commas without incorrectly breaking the string into two tokens @@ -1318,6 +1397,8 @@ Common string and token constants - ``common.fnumber`` - any numeric expression; parsed tokens are converted to float +- ``common.ieee_float`` - any floating-point literal (int, real number, infinity, or NaN), returned as float + - ``common.identifier`` - a programming identifier (follows Python's syntax convention of leading alpha or "_", followed by 0 or more alpha, num, or "_") @@ -1350,27 +1431,27 @@ access them using code like the following:: The following language ranges are defined. -========================== ================= ================================================ +========================== ================= ======================================================== Unicode set Alternate names Description --------------------------- ----------------- ------------------------------------------------ -Arabic العربية -Chinese 中文 -Cyrillic кириллица -Greek Ελληνικά -Hebrew עִברִית -Japanese 日本語 Union of Kanji, Katakana, and Hiragana sets -Japanese.Kanji 漢字 -Japanese.Katakana カタカナ -Japanese.Hiragana ひらがな -Hangul Korean, 한국어 -Latin1 All Unicode characters up to code point 255 -LatinA -LatinB -Thai ไทย -Devanagari देवनागरी -BasicMultilingualPlane BMP All Unicode characters up to code point 65535 -CJK Union of Chinese, Japanese, and Korean sets -========================== ================= ================================================ +-------------------------- ----------------- -------------------------------------------------------- +``Arabic`` العربية +``Chinese`` 中文 +``CJK`` Union of Chinese, Japanese, and Korean sets +``Cyrillic`` кириллица +``Devanagari`` देवनागरी +``Greek`` Ελληνικά +``Hangul`` Korean, 한국어 +``Hebrew`` עִברִית +``Japanese`` 日本語 Union of Kanji, Katakana, and Hiragana sets +``Japanese.Hiragana`` ひらがな +``Japanese.Kanji`` 漢字 +``Japanese.Katakana`` カタカナ +``Latin1`` All Unicode characters up to code point 0x7f (255) +``LatinA`` Unicode characters for code points 0x100-0x17f (256-383) +``LatinB`` Unicode characters for code points 0x180-0x24f (384-591) +``Thai`` ไทย +``BasicMultilingualPlane`` BMP All Unicode characters up to code point 0xffff (65535) +========================== ================= ======================================================== The base ``unicode`` class also includes definitions based on all Unicode code points up to ``sys.maxunicode``. This set will include emojis, wingdings, and many other specialized and typographical variant characters. @@ -1396,13 +1477,33 @@ Create your parser as you normally would. Then call ``create_diagram()``, passin This will result in the railroad diagram being written to ``street_address_diagram.html``. -Diagrams usually will vertically wrap expressions containing more than 3 terms. You can override this by -passing the `vertical` argument to `create_diagram` with a larger value. +`create_diagram` takes the following arguments: + +- ``output_html`` (str or file-like object) - output target for generated diagram HTML + +- ``vertical`` (int) - threshold for formatting multiple alternatives vertically instead of horizontally (default=3) + +- ``show_results_names`` - bool flag whether diagram should show annotations for defined results names + +- ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box + +- ``show_hidden`` - bool flag whether internal pyparsing elements that are normally omitted in diagrams should be shown (default=False) + +- ``embed`` - bool flag whether generated HTML should omit , , and tags to embed + the resulting HTML in an enclosing HTML source (such as PyScript HTML) + +- ``head`` - str containing additional HTML to insert into the section of the generated code; + can be used to insert custom CSS styling + +- ``body`` - str containing additional HTML to insert at the beginning of the section of the + generated code + Example ------- You can view an example railroad diagram generated from `a pyparsing grammar for -SQL SELECT statements <_static/sql_railroad.html>`_. +SQL SELECT statements <_static/sql_railroad.html>`_ (generated from +`examples/select_parser.py `_). Naming tip ---------- diff --git a/docs/Writing_Doctests.rst b/docs/Writing_Doctests.rst new file mode 100644 index 00000000..4e4e2b56 --- /dev/null +++ b/docs/Writing_Doctests.rst @@ -0,0 +1,443 @@ +======================== +Writing doctest examples +======================== + +Doctest support is provided in Sphinx by the extension +`sphinx.ext.doctest`_, and its documentation is one +useful resurce for working with the pyparsing doctests. + +.. _sphinx.ext.doctest: https://www.sphinx-doc.org/en/master/usage/extensions/doctest.html + + +Types of doctests +================= + +There are two basic forms of doctest, and both are used extensively +in the Pyparsing documentation. Which one to use for a given example +is a decision that needs to be made when writing it, but there are +some factors that usually make the correct choice an obvious one. + +Doctest type 1: ``testcode`` / ``testoutput`` blocks +---------------------------------------------------- + +The first form involves one or potentially two separate code blocks. +The ``testcode`` block contains all of the input code in the form of +a standard Python script. This can optionally be paired with a +second ``testoutput`` block, which if present will contain the output +for the preceding ``testcode`` block. + +An example of a ``testcode`` / ``testoutput`` pair, from the docstring +for ``ParserElement.__add__``: + + +.. code-block:: rst + + Example: + + .. testcode:: + + greet = Word(alphas) + "," + Word(alphas) + "!" + hello = "Hello, World!" + print(hello, "->", greet.parse_string(hello)) + + prints: + + .. testoutput:: + + Hello, World! -> ['Hello', ',', 'World', '!'] + +Examples written like this will be formatted in the rendered HTML/Latex/etc. +documentation **exactly** as if they'd been written as normal code blocks. +There is no visible difference between the code above and this code without +doctest support: + +.. code-block:: rst + + Example:: + + greet = Word(alphas) + "," + Word(alphas) + "!" + hello = "Hello, World!" + print(hello, "->", greet.parse_string(hello)) + + prints:: + + Hello, World! -> ['Hello', ',', 'World', '!'] + +However, the advantage to writing doctests is that when ``make doctest`` +is run from the ``docs/`` directory, the doctest extension will execute +each ``testcode`` block, and verify that its output exactly matches the +``testoutput`` block (if present). + +Any deviations will be displayed in "ndiff" format. This enhancement +to the standard unified diff will (sometimes) indicate where in each +line the differences occur. (The character-difference highlighting is +frustratingly inconsistent. But at worst ndiff is equivalent to unified +diff, so it's still worth using.) + +Testing examples with doctest allows the code used to demonstrate the +pyparsing API to be verified against the *actual* API as it's currently +implemented, and ensures that examples stay current and relevant. + +Not all ``testcode`` blocks need a corresponding ``testoutput`` — if a +``testcode`` block is included on its own, the code inside the block will +still be executed, but its output won't be verified. This can be useful +when displaying code that doesn't require demonstration of its output +(or doesn't output anything), as the extension will still verify that +the code can be run without error. + +It's also possible to include a *hidden* ``testoutput`` block, which will +beverified against the preceding ``testcode`` but won't be displayed in the +documentation. To hide a ``testoutput`` block (or a ``testcode`` block, +for that matter), add the ``:hide:`` option as an argument to the +directive, i.e.: + +.. code-block:: rst + + .. testoutput:: + :hide: + + """Output that won't be shown, but will be verified against the + preceding testcode block.""" + +Doctest type 2: ``doctest`` interactive blocks +---------------------------------------------- + +The second type of doctest is a ``doctest`` block, which takes the form of +an interactive Python REPL session in standard format (using ``>>>`` and +``...`` markers for input lines). + +With these tests, output is interleaved with the code, which can be much +easier to follow when there are multiple lines producing output. If an +example would contains multiple ``print()`` calls, rather than first +displaying all of the code in a ``testcode`` block, then all of the +output in a ``testoutput`` block, consider using a ``doctest`` session +so that the reader can follow along each step as it occurs. + +A typical ``doctest`` example can be found in the ``ParserElement.ignore`` +docstring: + +.. code-block:: rst + + Example: + + .. doctest:: + + >>> patt = Word(alphas)[...] + >>> print(patt.parse_string('ablaj /* comment */ lskjd')) + ['ablaj'] + + >>> patt = Word(alphas)[...].ignore(c_style_comment) + >>> print(patt.parse_string('ablaj /* comment */ lskjd')) + ['ablaj', 'lskjd'] + + +Setup code for doctest blocks +============================= + +The doctest extension is configured with extensive setup code +which is run before each test block. It can be viewed in the +:download:`docs/conf.py <../docs/conf.py>` file — look for the +``doctest_global_setup`` variable near the end of the file. + +The setup code is intended to make any useful symbols available +to the tests without them having to be included in each and every +doctest block. If additional modules are needed, feel free to add +them to the global setup. When writing doctests, Pyparsing classes +can be invoked directly, or as members of the ``pp`` alias namespace. +Either way, the definition of those symbols can be assumed without +explicitly importing/defining them. + +When using symbols from other aliased namespaces, however, it's a +good idea to establish the alias for the reader at the start of the +example code. Even though these are both defined in the global setup, +showing the establishing lines before referencing ``ppc`` or ``ppu`` +in an example makes that example clearer: + +.. code-block: py + + ppc = pp.pyparsing_common + ppu = pp.pyparsing_unicode + +However, because those symbols *are* provided by default, they don't +need to be explicitly established for **every** example. Feel free +to omit them after the first use, when writing multiple examples for +a given class or function. + +Documenting exceptions +====================== + +Code that will trigger an exception can be both demonstrated and +verified using doctests (of either type), although when a ``testoutput`` +block will demonstrate an exception it should be the only output in +that block — doctest does not support mixing regular output and +exceptions. + +Both the ``IGNORE_EXCEPTION_DETAIL`` and ``ELLIPSIS`` doctest options +are enabled by default, which make demonstrating exceptions far more +convenient. Ignoring exception detail means that the full traceback +for an exception can be omitted, as well as the fully-qualified name +of the exception class. As long as the ``Traceback...`` line and the +exception class name match, the doctest will pass. (The exception +message is also verified by default, but read on for more about that.) + +This example code, from the ``ParserElement.set_name`` docstring, will +actually output a long traceback, followed by an exception of type +``pyparsing.exceptions.ParseException``. But because the ignore-detail +option is enabled, the doctest will pass with this abbreviated form: + +.. code-block:: rst + + .. doctest:: + + >>> integer = Word(nums) + >>> integer.parse_string("ABC") + Traceback (most recent call last): + ParseException: Expected W:(0-9) (at char 0), (line:1, col:1) + +Relaxing doctest output validation +================================== + +For even more flexibility in demonstrating output, the ``ELLIPSIS`` +option (enabled by default) means that parts of the output can be +replaced with an ellipsis (three periods, ``...``) which will validate +against any output. + +This is an extremely useful tool when the exact output of the code is +unpredictable (for example, when messages include line and column +numbers, or variable data like the current date or a directory path). +The code above could also be written like this, and it would still +pass the doctest: + +.. code-block:: rst + + .. doctest:: + + >>> integer = Word(nums) + >>> integer.parse_string("ABC") + Traceback (most recent call last): + ParseException: Expected W:(0-9) ... + +While this is necessary in some situations, it shouldn't be overused. +The more precisely a doctest validates the output of its example, +the more useful it is, so think twice before employing an ellipsis in +doctest output. + +Normalizing whitespace checks +============================= + +Another method of relaxing doctest checks that doesn't impact the +test's ability to validate output is the ``NORMALIZE_WHITESPACE`` +option. This option isn't enabled by default, but can be turned on +for any doctest block with a directive argument: + +.. code-block:: rst + + .. testoutput:: + :options: +NORMALIZE_WHITESPACE + +(Note the preceding ``+`` sign, which adds the option to the default +set instead of replacing the default options.) + +With normalization activated, any combination of spaces, tabs, and +newlines will compare equal to any other combination. + +One advantage this has is permitting long messages to be wrapped +over several lines in the example output. In this example from the +``Keyword`` class docstring, the exception message at the end would +normally be printed as one long line. To make the example readable +without excessive horizontal scrolling, ``NORMALIZE_WHITESPACE`` +allows the example output to be broken into multiple lines: + +.. code-block:: rst + + .. doctest:: + :options: +NORMALIZE_WHITESPACE + + >>> Keyword("start").parse_string("start") + ParseResults(['start'], {}) + >>> Keyword("start").parse_string("starting") + Traceback (most recent call last): + ParseException: Expected Keyword 'start', + keyword was immediately followed by keyword character, + found 'ing' (at char 5), (line:1, col:6) + +Doctests in the Pyparsing codebase +================================== + +While the preceding is generally applicable to doctests in any +codebase, there are some issues specific to Pyparsing doctests that +you should be aware of. + +``run_tests()`` output +---------------------- + +There is one scenario in the pyparsing documentation where the +``NORMALIZE_WHITESPACE`` option *must* be used. + +When the example code uses the ``ParserElement.run_tests()`` method, +the output will consist of test strings and matches potentially +separated by two blank lines each. (Unless each test is preceded +by a comment, then there will be only one blank line.) + +Since ReStructuredText will collapse multiple blank lines in embedded +code, the only way to get the ``run_tests`` output to validate against +the example is to enable ``NORMALIZE_WHITESPACE`` and collapse the +multiple blank lines in the expected output, as well. + +Also, "any whitespace compares equal" doesn't mean that *no* +whitespace will be accepted, so the beginning of the ``testoutput`` +block MUST include an extra blank line at the start, in order +to match the leading 2 (or 1) blank lines in the output. + +So, a valid ``run_tests`` output block consists of the ``testoutput`` +directive, the ``:options: +NORMALIZE_WHITESPACE`` argument, then +**TWO blank lines** followed by the output to be verified. This +example, from the ``ParserElement.run_tests`` docstring itself, +demonstrates the required format: + +.. code-block:: rst + :linenos: + :emphasize-lines: 17,21,22,26,27 + + Failing example: + + .. testcode:: + + number_expr = pyparsing_common.number.copy() + result = number_expr.run_tests(''' + 100Z + 3.14.159 + ''', failure_tests=True) + print("Success" if result[0] else "Failed!") + + prints: + + .. testoutput:: + :options: +NORMALIZE_WHITESPACE + + + 100Z + 100Z + ^ + ParseException: Expected end of text, found 'Z' ... + + 3.14.159 + 3.14.159 + ^ + ParseException: Expected end of text, found '.' ... + FAIL: Expected end of text, found '.' ... + Success + +Note in particular: + +- The extra blank line (line 17) before the first line of output, which + is required to match the *two* blank lines in the actual output. + +- Only one blank line (line 22) separating the two tests' output. + The real output will again contain two blank lines. + +- The use of ellipses to abbreviate the expected output (lines 21, 26, 27). + +- Exception messages mixed with normal output. + + In this case that presents no problems, because ``run_tests()`` catches + any exceptions generated and prints their messages as normal output. + Doctest has no restrictions on normal output, only when the exception + is raised and a traceback is triggered. + + By the same token, ``IGNORE_EXCEPTION_DETAIL`` is not applicable here + (there are no exceptions in the expected string, only regular output), + so the normal string-matching rules apply when comparing expected output + to actual output. + +Two final notes about failing doctests +-------------------------------------- + +There are two things to watch out for, when attempting to address +doctest failures during a ``make doctest`` run. + +Code location references are not useful +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Due to the uncommon structure of the pyparsing namespace (with the +symbols from all of the package's files imported into the top-level +``pyparsing`` namespace, and documented there rather than at their +"home" locations where they're defined), the doctest output for +failing test will not display the correct source location for the +code. Every failing test will be preceded by a reference similar to: + +.. code-block:: + + File "../pyparsing/core.py", line ?, in default + +However, this will be followed by a listing of the code that +produced the failing test. So as long as we write examples +which are not too generic and are sufficiently distinct from +each other (which is good practice anyway), it should be easy +enough to find the failing code. + +Diffs on failing tests will include *ALL* differences +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When ``doctest`` displays the NDIFF-format differences between the +expected output and the actual output, it will indicate **EVERY** +difference between them — even the differences that would otherwise +be ignored. The ``IGNORE_TRACEBACK_DETAILS``, ``ELLIPSIS``, and +``NORMALIZE_WHITESPACE`` options do not apply when NDIFF is generating +the comparison ouput for a failed test. + +What this means is that, even though the NDIFF flags an ellipsized +section of text as a difference from the actual output, or marks a +difference where an output line has been split into two when the +``NORMALIZE_WHITESPACE`` option is enabled, those differences WILL be +ignored when the doctest is in a passing state. It's important to +focus on the differences that *wouldn't* otherwise be ignored, and +just trust that correcting those differences will result in a passing +test. + +For example, consider this failing test: + +.. code-block:: shell-session + :linenos: + :emphasize-lines: 20-23 + + $ make doctest + ... + File "../pyparsing/core.py", line ?, in default + Failed example: + data_word = Word(alphas) + label = data_word + FollowedBy(':') + + attr_expr = ( + label + Suppress(':') + + OneOrMore(data_word, stop_on=label + ).set_parse_action(' '.join)) + + print(attr_expr.parse_string("color: RED")) + + text = "shape: SQUARE posn: upper left color: light blue texture: burlap" + + # print attributes as plain groups + print(attr_expr[1, ...].parse_string(text)) + Differences (ndiff with -expected +actual): + - ['color', "RED"] + ? ^ ^ + + ['color', 'RED'] + ? ^ ^ + + ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] + - ['shape', 'SQUARE', + - ... 'texture', 'burlap'] + ********************************************************************** + 1 item had failures: + 1 of 208 in default + 208 tests in 1 item. + 207 passed and 1 failed. + ***Test Failed*** 1 failure. + +The **only** significant difference is the highlighted one: The wrong +quotes used around the word ``"RED"`` in the expected output. Once that's +changed to ``'RED'``, the doctest will pass. The remaining diff line(s), +where the expected output uses an ellipsis and is split over two lines +(with ``NORMALIZE_WHITESPACE`` enabled), will not fail despite being +shown as differing from the actual output. (Technically it *does* differ, +after all. The configuration simply ignores that difference.) diff --git a/docs/_static/pyparsing.css b/docs/_static/pyparsing.css new file mode 100644 index 00000000..dab72a2d --- /dev/null +++ b/docs/_static/pyparsing.css @@ -0,0 +1,52 @@ +/* Deprecated get a red border spanning the entire length of the doc + * (class, function, etc.), and the message itself has a faint red + * background shading, but no border. */ +dl.py:where(.class,.exception,.method,.function):has(> dd > div.deprecated) +{ + margin-inline-start: -0.6rem; + border-inline-start: 0.4rem solid #f00; + padding: 0 0.6rem 0 0.2rem; +} + +span.deprecated { + background-color: #fee; + font-weight: bold; + text-decoration: #f00 underline; + padding: 0 0.5rem; +} + +/* Added and changed get a blue or orange (respectively) border next to + * the message only, plus a light background shade of the same color + * (again, only on the message, not the rest of the doc). */ +div.versionadded, div.versionchanged +{ + border-inline-start: 0.4rem solid transparent; +} + +div.versionchanged p, +div.versionadded p, +span.versionmodified { + line-height: initial; + padding-bottom: 0.2rem; + padding-top: 0.2rem; +} +span.versionmodified { + padding-inline-start: 0.5rem; + padding-inline-end: 0.2rem; + margin-inline-end: 0.5rem; + line-height: 130%; +} + +div.versionchanged p, +div.versionadded p { + padding-inline-start: 0.5rem; +} +span.versionmodified { + margin-inline-start: -0.5rem; /* Make up for padding above */ +} + +div.versionadded { border-color: #2d67f3; } +div.versionadded span.added { background-color: #d1e5ff; } + +div.versionchanged { border-color: #ff9800; } +div.versionchanged span.changed { background-color: #ffddac; } diff --git a/docs/_static/pyparsingClassDiagram_3.0.0.jpg b/docs/_static/pyparsingClassDiagram_3.0.0.jpg deleted file mode 100644 index f65e5f1a..00000000 Binary files a/docs/_static/pyparsingClassDiagram_3.0.0.jpg and /dev/null differ diff --git a/docs/_static/pyparsingClassDiagram_3.0.9.jpg b/docs/_static/pyparsingClassDiagram_3.0.9.jpg new file mode 100644 index 00000000..d92feed4 Binary files /dev/null and b/docs/_static/pyparsingClassDiagram_3.0.9.jpg differ diff --git a/docs/conf.py b/docs/conf.py index ce571f9b..642c16c5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,19 +1,20 @@ -# -# Configuration file for the Sphinx documentation builder. -# +"""Configuration file for the Sphinx documentation builder.""" + # This file does only contain a selection of the most common options. For a # full list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html -# -- Path setup -------------------------------------------------------------- +# pylint: disable=all +import doctest +import os +import sys + +# -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -import os -import sys - sys.path.insert(0, os.path.abspath("..")) from pyparsing import __version__ as pyparsing_version @@ -21,7 +22,7 @@ # -- Project information ----------------------------------------------------- project = "PyParsing" -copyright = "2018-2021, Paul T. McGuire" +copyright = "2018-2024, Paul T. McGuire" author = "Paul T. McGuire" # The short X.Y version @@ -41,16 +42,19 @@ # ones. extensions = [ "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "myst_parser", ] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = ".rst" +# You can specify multiple suffixes: +source_suffix = { + '.rst': 'restructuredtext', + '.md': 'markdown', +} # The master toctree document. master_doc = "index" @@ -60,7 +64,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -82,13 +86,20 @@ # further. For a list of options available for each theme, see the # documentation. # -# html_theme_options = {} +html_theme_options = { + 'github_user': 'pyparsing', + 'github_repo': 'pyparsing', +} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] +html_css_files = { + "pyparsing.css": "*", +} + # Custom sidebar templates, must be a dictionary that maps document names # to template names. # @@ -97,7 +108,22 @@ # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # -# html_sidebars = {} +html_sidebars = { + '**': [ + 'about.html', + 'searchfield.html', + 'navigation.html', + 'relations.html', + 'donate.html', + ], + 'pyparsing': [ + 'about.html', + 'searchfield.html', + 'localtoc.html', + 'relations.html', + 'donate.html', + ], +} # -- Options for HTMLHelp output --------------------------------------------- @@ -183,4 +209,42 @@ epub_exclude_files = ["search.html"] +# -- Domain configuration ---------------------------------------------------- + +python_use_unqualified_type_names = True +python_display_short_literal_types = True +python_maximum_signature_line_length = 100 +add_module_names = False +toc_object_entries_show_parents = 'hide' + # -- Extension configuration ------------------------------------------------- +autodoc_class_signature = 'mixed' +autodoc_mock_imports = ['railroad'] +autodoc_preserve_defaults = True +autodoc_default_options = { + 'class-doc-from': 'both', + 'undoc-members': True, + 'show-inheritance': True, +} + +doctest_global_setup = ''' +import math +import string +import pprint +import pyparsing +import pyparsing.common +ppc = pyparsing.common +ppu = pyparsing.unicode +import pyparsing.util +import pyparsing as pp +from pyparsing import * +''' + +doctest_default_flags = ( + doctest.ELLIPSIS + | doctest.IGNORE_EXCEPTION_DETAIL + | doctest.DONT_ACCEPT_TRUE_FOR_1 + | doctest.REPORT_NDIFF +) + +myst_heading_anchors = 3 diff --git a/docs/index.rst b/docs/index.rst index 65f05571..10574be9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,13 +7,18 @@ Welcome to PyParsing's documentation! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Release v\ |version| +.. rubric:: Contents + .. toctree:: :maxdepth: 2 - :caption: Contents: - whats_new_in_3_0_0 HowToUsePyparsing - modules + whats_new_in_3_2 + whats_new_in_3_1 + whats_new_in_3_0_0 + pyparsing + CONTRIBUTING + Writing_Doctests CODE_OF_CONDUCT diff --git a/docs/make_sphinx_docs.bat b/docs/make_sphinx_docs.bat new file mode 100644 index 00000000..340aca3d --- /dev/null +++ b/docs/make_sphinx_docs.bat @@ -0,0 +1 @@ +sphinx-build.exe -E -b html . _build diff --git a/docs/modules.rst b/docs/modules.rst deleted file mode 100644 index 6163a45a..00000000 --- a/docs/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -pyparsing -========= - -.. toctree:: - :maxdepth: 4 - - pyparsing diff --git a/docs/pyparsing.rst b/docs/pyparsing.rst index 6d51a78d..6102a44a 100644 --- a/docs/pyparsing.rst +++ b/docs/pyparsing.rst @@ -1,7 +1,22 @@ -pyparsing module -================ +************* +pyparsing API +************* .. automodule:: pyparsing :members: - :special-members: - :show-inheritance: + :special-members: __add__,__sub__,__div__,__mul__,__and__,__or__,__xor__,__lshift__,__invert__,__call__,__getitem__,__str__ + :exclude-members: __init__,__repl__,parseImpl,parseImpl_regex,parseImplAsGroupList,parseImplAsMatch,postParse,preParse + +Module ``pyparsing.diagram`` +---------------------------- + +.. automodule:: pyparsing.diagram + :members: + +.. 'hidden' prevents the toctree from appearing at the bottom of the page + +.. toctree:: + :maxdepth: 2 + :hidden: + + self diff --git a/docs/pyparsing_class_diagram.puml b/docs/pyparsing_class_diagram.puml index cf8d1ebb..f90f99e2 100644 --- a/docs/pyparsing_class_diagram.puml +++ b/docs/pyparsing_class_diagram.puml @@ -22,7 +22,6 @@ class globals { quoted_string sgl_quoted_string dbl_quoted_string -delimited_list() counted_array() match_previous_literal() match_previous_expr() @@ -185,6 +184,7 @@ class Each class OneOrMore class ZeroOrMore +class DelimitedList class SkipTo class Group class Forward { @@ -246,6 +246,7 @@ ParseElementEnhance <|-- Located ParseElementEnhance <|--- _MultipleMatch _MultipleMatch <|-- OneOrMore _MultipleMatch <|-- ZeroOrMore +ParseElementEnhance <|-- DelimitedList ParseElementEnhance <|--- NotAny ParseElementEnhance <|--- FollowedBy ParseElementEnhance <|--- PrecededBy diff --git a/docs/pyparsing_class_diagram.svg b/docs/pyparsing_class_diagram.svg deleted file mode 100644 index 9a9e7ac3..00000000 --- a/docs/pyparsing_class_diagram.svg +++ /dev/null @@ -1,836 +0,0 @@ -corecommonunicodeglobalsquoted_stringsgl_quoted_stringdbl_quoted_stringcommon_html_entityclass OpAssocclass IndentedBlockc_style_commenthtml_commentrest_of_linedbl_slash_commentcpp_style_commentjava_style_commentpython_style_commentdelimited_list()counted_array()match_previous_literal()match_previous_expr()one_of()dict_of()original_text_for()ungroup()nested_expr()make_html_tags()make_xml_tags()replace_html_entity()infix_notation()match_only_at_col()replace_with()remove_quotes()with_attribute()with_class()trace_parse_action()condition_as_parse_action()srange()token_map()autoname_elements()ParseResultsclass Listfrom_dict()__getitem__()__setitem__()__contains__()__len__()__bool__()__iter__()__reversed__()__getattr__()__add__()__getstate__()__setstate__()__getnewargs__()__dir__()as_dict()as_list()dump()get_name()items()keys()values()haskeys()pop()get()insert()append()extend()clear()copy()get_name()pprint()ParseBaseExceptionlinelinenocolumnparser_elementexplain_exception()explain()mark_input_line()ParseExceptionParseFatalExceptionParseSyntaxExceptionParserElementname: strresults_name: strenable_packrat()enable_left_recursion()disable_memoization()set_default_whitespace_chars()inline_literals_using()reset_cache() verbose_stacktrace operator + () -> Andoperator - () -> And.ErrorStopoperator | () -> MatchFirstoperator ^ () -> Oroperator & () -> Eachoperator ~ () -> NotAnyoperator [] () -> _MultipleMatchadd_condition()add_parse_action()set_parse_action()copy()ignore(expr)leave_whitespace()parse_with_tabs()suppress()set_break()set_debug()set_debug_actions()set_name()set_results_name()parse_string()scan_string()search_string()transform_string()split()run_tests()recurse()create_diagram()TokenParseExpressionexprs: list[ParserElement]ParseElementEnhanceexpr: ParserElement_PositionTokenCharWhiteWordKeywordset_default_keyword_chars(chars: str)CaselessKeywordEmptyLiteralRegexNoMatchCharsNotInQuotedStringAndOrMatchFirstEachOneOrMoreZeroOrMoreSkipToGroupForwardoperator <<= ()LineStartLineEndStringStartStringEndWordStartWordEnd_MultipleMatchFollowedByPrecededByAtLineStartAtStringStartTokenConverterLocatedOptCombineDictSuppressCloseMatchCaselessLiteralNotAny comma_separated_listintegerhex_integersigned_integerfractionmixed_integerrealsci_realnumberfnumberidentifieripv4_addressipv6_addressmac_addressiso8601_dateiso8601_datetimeuuidurlconvert_to_integer()convert_to_float()convert_to_date()convert_to_datetime()strip_html_tags()upcase_tokens()downcase_tokens()unicode_setprintables: stralphas: strnums: stralphanums: stridentchars: stridentbodychars: strLatin1LatinALatinBBasicMultilingualPlaneChineseThaiJapaneseclass Kanjiclass Hiraganaclass KatakanaGreekHangulArabicDevanagariHebrewCyrillicCJKClass Diagrampyparsing 3.0.9May, 2022 \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..6185f18b --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +sphinx < 8.2 +myst-parser + diff --git a/docs/whats_new_in_3_0_0.rst b/docs/whats_new_in_3_0_0.rst index 10651cda..3068958f 100644 --- a/docs/whats_new_in_3_0_0.rst +++ b/docs/whats_new_in_3_0_0.rst @@ -4,13 +4,11 @@ What's New in Pyparsing 3.0.0 :author: Paul McGuire -:date: April, 2022 +:date: May, 2022 :abstract: This document summarizes the changes made in the 3.0.0 release of pyparsing. - (Updated to reflect changes up to 3.0.8) - -.. sectnum:: :depth: 4 + (Updated to reflect changes up to 3.0.10) .. contents:: :depth: 4 @@ -62,6 +60,20 @@ generator for documenting pyparsing parsers.:: # save as HTML parser.create_diagram('parser_rr_diag.html') +``create_diagram`` accepts these named arguments: + +- ``vertical`` (int) - threshold for formatting multiple alternatives vertically + instead of horizontally (default=3) +- ``show_results_names`` - bool flag whether diagram should show annotations for + defined results names +- ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box +- ``embed`` - bool flag whether generated HTML should omit ````, ````, and ```` tags to embed + the resulting HTML in an enclosing HTML source (new in 3.0.10) +- ``head`` - str containing additional HTML to insert into the ```` section of the + generated code; can be used to insert custom CSS styling +- ``body`` - str containing additional HTML to insert at the beginning of the ```` section of the + generated code + To use this new feature, install the supporting diagramming packages using:: pip install pyparsing[diagrams] @@ -200,7 +212,7 @@ just namespaces, to add some helpful behavior: (**currently not working on PyPy**) Support for yielding native Python ``list`` and ``dict`` types in place of ``ParseResults`` -------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------- To support parsers that are intended to generate native Python collection types such as lists and dicts, the ``Group`` and ``Dict`` classes now accept an additional boolean keyword argument ``aslist`` and ``asdict`` respectively. See @@ -226,7 +238,7 @@ This is the mechanism used internally by the ``Group`` class when defined using ``aslist=True``. New Located class to replace ``locatedExpr`` helper method ------------------------------------------------------- +---------------------------------------------------------- The new ``Located`` class will replace the current ``locatedExpr`` method for marking parsed results with the start and end locations of the parsed data in the input string. ``locatedExpr`` had several bugs, and returned its results @@ -279,7 +291,7 @@ leading whitespace.:: [This is a fix to behavior that was added in 3.0.0, but was actually a regression from 2.4.x.] New ``IndentedBlock`` class to replace ``indentedBlock`` helper method --------------------------------------------------------------- +---------------------------------------------------------------------- The new ``IndentedBlock`` class will replace the current ``indentedBlock`` method for defining indented blocks of text, similar to Python source code. Using ``IndentedBlock``, the expression instance itself keeps track of the indent stack, @@ -392,7 +404,7 @@ Other new features common fields in URLs. See the updated ``urlExtractorNew.py`` file in the ``examples`` directory. Submitted by Wolfgang Fahl. -- ``delimited_list`` now supports an additional flag ``allow_trailing_delim``, +- ``DelimitedList`` now supports an additional flag ``allow_trailing_delim``, to optionally parse an additional delimiter at the end of the list. Submitted by Kazantcev Andrey. @@ -661,7 +673,8 @@ counted_array countedArray cpp_style_comment cppStyleComment dbl_quoted_string dblQuotedString dbl_slash_comment dblSlashComment -delimited_list delimitedList +DelimitedList delimitedList +DelimitedList delimited_list dict_of dictOf html_comment htmlComment infix_notation infixNotation @@ -798,4 +811,4 @@ to jdufresne, klahnakoski, mattcarmody, ckeygusuz, tmiguelt, and toonarmycaptain to name just a few. Thanks also to Michael Milton and Max Fischer, who added some -significant new features to pyparsing. \ No newline at end of file +significant new features to pyparsing. diff --git a/docs/whats_new_in_3_1.rst b/docs/whats_new_in_3_1.rst new file mode 100644 index 00000000..45914c2f --- /dev/null +++ b/docs/whats_new_in_3_1.rst @@ -0,0 +1,326 @@ +============================= +What's New in Pyparsing 3.1.0 +============================= + +:author: Paul McGuire + +:date: October, 2024 + +:abstract: This document summarizes the changes made + in the 3.1.x releases of pyparsing. + +.. contents:: :depth: 4 + + +Supported Python versions +========================= +- Added support for Python 3.12. + +- All internal string expressions using '%' string interpolation and ``str.format()`` + converted to f-strings. + + +New Features +============ +- Added new ``Tag`` ParserElement, for inserting metadata into the parsed results. + This allows a parser to add metadata or annotations to the parsed tokens. + The ``Tag`` element also accepts an optional ``value`` parameter, defaulting to ``True``. + See the new ``tag_metadata.py`` example in the ``examples`` directory. + + Example: + + .. doctest:: + + >>> # add tag indicating mood + >>> end_punc = "." | ("!" + Tag("enthusiastic")) + >>> greeting = "Hello" + Word(alphas) + end_punc + + >>> result = greeting.parse_string("Hello World.") + >>> print(result.dump()) + ['Hello', 'World', '.'] + + >>> result = greeting.parse_string("Hello World!") + >>> print(result.dump()) + ['Hello', 'World', '!'] + - enthusiastic: True + +- Extended ``expr[]`` notation for repetition of ``expr`` to accept a + slice, where the slice's stop value indicates a ``stop_on`` + expression: + + .. testcode:: + + test = "BEGIN aaa bbb ccc END" + BEGIN, END = Keyword.using_each("BEGIN END".split()) + body_word = Word(alphas) + + # new slice syntax support + expr = BEGIN + Group(body_word[...:END]) + END + # equivalent to + # BEGIN + Group(ZeroOrMore(body_word, stop_on=END)) + END + + print(expr.parse_string(test)) + + Prints: + + .. testoutput:: + + ['BEGIN', ['aaa', 'bbb', 'ccc'], 'END'] + +- Added new class method ``ParserElement.using_each``, to simplify code + that creates a sequence of ``Literals``, ``Keywords``, or other ``ParserElement`` + subclasses. + + For instance, to define suppressible punctuation, you would previously + write:: + + LPAR, RPAR, LBRACE, RBRACE, SEMI = map(Suppress, "(){};") + + You can now write:: + + LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") + + ``using_each`` will also accept optional keyword args, which it will + pass through to the class initializer. Here is an expression for + single-letter variable names that might be used in an algebraic + expression: + + .. testcode:: + + algebra_var = MatchFirst( + Char.using_each(string.ascii_lowercase, as_keyword=True) + ) + +- Added new builtin ``python_quoted_string``, which will match any form + of single-line or multiline quoted strings defined in Python. + +- ``Word`` arguments are now validated if ``min`` and ``max`` are both + given, that ``min`` <= ``max``; raises ``ValueError`` if values are invalid. + +- Added '·' (Unicode MIDDLE DOT) to the set of ``Latin1.identbodychars``. + +- Added ``ieee_float`` expression to ``pyparsing.common``, which parses float values, + plus "NaN", "Inf", "Infinity". + +- Minor performance speedup in ``trim_arity``, to benefit any parsers using parse actions. + + +API Changes +=========== +- ``Optional(expr)`` may now be written as ``expr | ""`` + + This will make this code: + + .. testcode:: + + "{" + Optional(Literal("A") | Literal("a")) + "}" + + writable as: + + .. testcode:: + + "{" + (Literal("A") | Literal("a") | "") + "}" + + Some related changes implemented as part of this work: + - ``Literal("")`` now internally generates an ``Empty()`` (and no longer raises an exception) + - ``Empty`` is now a subclass of ``Literal`` + +- Added new class property ``identifier`` to all Unicode set classes in ``pyparsing.unicode``, + using the class's values for ``cls.identchars`` and ``cls.identbodychars``. Now Unicode-aware + parsers that formerly wrote: + + .. testcode:: + + ppu = pyparsing.unicode + ident = Word(ppu.Greek.identchars, ppu.Greek.identbodychars) + + can now write: + + .. testcode:: + + ident = ppu.Greek.identifier + # or + ident = ppu.Ελληνικά.identifier + +- Added bool ``embed`` argument to ``ParserElement.create_diagram()``. + When passed as True, the resulting diagram will omit the ````, + ````, and ```` tags so that it can be embedded in other + HTML source. (Useful when embedding a call to ``create_diagram()`` in + a PyScript HTML page.) + +- Added ``recurse`` argument to ``ParserElement.set_debug`` to set the + debug flag on an expression and all of its sub-expressions. + +- Reworked ``delimited_list`` function into the new ``DelimitedList`` class. + ``DelimitedList`` has the same constructor interface as ``delimited_list``, and + in this release, ``delimited_list`` changes from a function to a synonym for + ``DelimitedList``. ``delimited_list`` and the older ``delimitedList`` method will be + deprecated in a future release, in favor of ``DelimitedList``. + +- ``ParseResults`` now has a new method ``deepcopy()``, in addition to the current + ``copy()`` method. ``copy()`` only makes a shallow copy - any contained ``ParseResults`` + are copied as references - changes in the copy will be seen as changes in the original. + In many cases, a shallow copy is sufficient, but some applications require a deep copy. + ``deepcopy()`` makes a deeper copy: any contained ``ParseResults`` or other mappings or + containers are built with copies from the original, and do not get changed if the + original is later changed. + +- Added named field "url" to ``pyparsing.common.url``, returning the entire + parsed URL string. + +- Added exception type to ``trace_parse_action`` exception output. + +- Added ```` tag to HTML generated for railroad diagrams to force UTF-8 encoding + with older browsers, to better display Unicode parser characters. + +- To address a compatibility issue in RDFLib, added a property setter for the + ``ParserElement.name`` property, to call ``ParserElement.set_name``. + +- Modified ``ParserElement.set_name()`` to accept a None value, to clear the defined + name and corresponding error message for a ``ParserElement``. + +- Updated railroad diagram generation for ``ZeroOrMore`` and ``OneOrMore`` expressions with + ``stop_on`` expressions. + + +Discontinued Features +===================== + +Python 2.x no longer supported +------------------------------ +Removed Py2.x support and other deprecated features. Pyparsing +now requires Python 3.6.8 or later. If you are using an earlier +version of Python, you must use a Pyparsing 2.4.x version. + +Other discontinued / deprecated features +---------------------------------------- +- ``ParserElement.validate()`` is deprecated. It predates the support for left-recursive + parsers, and was prone to false positives (warning that a grammar was invalid when + it was in fact valid). It will be removed in a future pyparsing release. In its + place, developers should use debugging and analytical tools, such as ``ParserElement.set_debug()`` + and ``ParserElement.create_diagram()``. + + +Fixed Bugs +========== +- Updated ``ci.yml`` permissions to limit default access to source. + +- Updated ``create_diagram()`` code to be compatible with railroad-diagrams package + version 3.0. + +- Fixed bug in ``pyparsing.common.url``, when input URL is not alone + on an input line. + +- Fixed bug in srange, when parsing escaped '/' and '\' inside a + range set. + +- Fixed exception messages for some ``ParserElements`` with custom names, + which instead showed their contained expression names. + +- Fixed bug in ``Word`` when ``max=2``. Also added performance enhancement + when specifying ``exact`` argument. + +- Fixed bug when parse actions returned an empty string for an expression that + had a results name, that the results name was not saved. That is: + + .. doctest:: + + >>> expr = Literal("X").add_parse_action(lambda tokens: "")("value") + >>> result = expr.parse_string("X") + >>> result["value"] + '' + + would raise a ``KeyError``. Now empty strings will be saved with the associated + results name. + +- Fixed bug in ``SkipTo`` where ignore expressions were not properly handled while + scanning for the target expression. + +- Fixed bug in ``NotAny``, where parse actions on the negated expr were not being run. + This could cause ``NotAny`` to incorrectly fail if the expr would normally match, + but would fail to match if a condition used as a parse action returned False. + +- Fixed ``create_diagram()`` to accept keyword args, to be passed through to the + ``template.render()`` method to generate the output HTML. + +- Fixed bug in ``python_quoted_string`` regex. + +- Fixed regression in Word(min). + +- Fixed bug in bad exception messages raised by Forward expressions. + +- Fixed regression in SkipTo, where ignored expressions were not checked when looking + for the target expression. + +- Updated pep8 synonym wrappers for better type checking compatibility. + +- Fixed empty error message bug. This _should_ return + pyparsing's exception messages to a former, more helpful form. If you have code that + parses the exception messages returned by pyparsing, this may require some code + changes. + +- Fixed issue where PEP8 compatibility names for ``ParserElement`` static methods were + not themselves defined as ``staticmethods``. When called using a ``ParserElement`` instance, + this resulted in a ``TypeError`` exception. + +- Fixed some cosmetics/bugs in railroad diagrams: + + - fixed groups being shown even when ``show_groups`` = False + + - show results names as quoted strings when ``show_results_names`` = True + + - only use integer loop counter if repetition > 2 + + +New / Enhanced Examples +======================= +- Added example ``mongodb_query_expression.py``, to convert human-readable infix query + expressions, such as:: + + a==100 and b>=200 + + and transform them into an equivalent query argument for the pymongo package:: + + {'$and': [{'a': 100}, {'b': {'$gte': 200}}]} + + Supports many equality and inequality operators - see the docstring for the + ``transform_query`` function for many more examples. + +- ``invRegex.py`` example renamed to ``inv_regex.py`` and updated to PEP-8 + variable and method naming. + +- Removed examples ``sparser.py`` and ``pymicko.py``, since each included its + own GPL license in the header. Since this conflicts with pyparsing's + MIT license, they were removed from the distribution to avoid + confusion among those making use of them in their own projects. + +- Updated the ``lucene_grammar.py`` example (better support for '*' and '?' wildcards) + and corrected the test cases! + +- Added ``bf.py`` Brainf*ck parser/executor example. Illustrates using + a pyparsing grammar to parse language syntax, and attach executable AST nodes to + the parsed results. + +- Added ``tag_emitter.py`` to examples. This example demonstrates how to insert + tags into your parsed results that are not part of the original parsed text. + +- Updated example ``select_parser.py`` to use PEP8 names and added Groups for better retrieval + of parsed values from multiple SELECT clauses. + +- Added example ``email_address_parser.py``. + +- Added example ``directx_x_file_parser.py`` to parse DirectX template definitions, and + generate a Pyparsing parser from a template to parse .x files. + +- ``delta_time``, ``lua_parser``, ``decaf_parser``, and ``roman_numerals`` examples cleaned up + to use latest PEP8 names and add minor enhancements. + +- Fixed bug (and corresponding test code) in ``delta_time`` example that did not handle + weekday references in time expressions (like "Monday at 4pm") when the weekday was + the same as the current weekday. + + +Acknowledgments +=============== +Again, thanks to the many contributors who submitted issues, questions, suggestions, +and PRs. diff --git a/docs/whats_new_in_3_2.rst b/docs/whats_new_in_3_2.rst new file mode 100644 index 00000000..3e1f3ef2 --- /dev/null +++ b/docs/whats_new_in_3_2.rst @@ -0,0 +1,127 @@ +============================= +What's New in Pyparsing 3.2.0 +============================= + +:author: Paul McGuire + +:date: October, 2024 + +:abstract: This document summarizes the changes made + in the 3.2.x releases of pyparsing. + +.. contents:: :depth: 4 + + +Supported Python versions +========================= + +- Added support for Python 3.13. + +- Python versions before 3.9 are no longer supported. + Removed legacy Py2.x support and other deprecated features. Pyparsing + now requires Python 3.9 or later. If you are using an earlier 3.x + version of Python, use pyparsing 3.1; for Python 2.x, use Pyparsing + 2.4.7. + + +New Features +============ + +- Added type annotations to remainder of ``pyparsing`` package, and added ``mypy`` + run to ``tox.ini``, so that type annotations are now run as part of pyparsing's CI. + +- Exception message format can now be customized, by overriding + ``ParseBaseException.format_message``:: + + def custom_exception_message(exc) -> str: + found_phrase = f", found {exc.found}" if exc.found else "" + return f"{exc.lineno}:{exc.column} {exc.msg}{found_phrase}" + + ParseBaseException.formatted_message = custom_exception_message + +- ``run_tests`` now detects if an exception is raised in a parse action, and will + report it with an enhanced error message, with the exception type, string, + and parse action name. + +- ``QuotedString`` now handles translation of escaped integer, hex, octal, and + Unicode sequences to their corresponding characters. + +- Defined a more performant regular expression used internally by ``common_html_entity``. + +- ``Regex`` instances can now be created using a callable that takes no arguments + and just returns a string or a compiled regular expression, so that creating complex + regular expression patterns can be deferred until they are actually used for the first + time in the parser. + +- Fixed the displayed output of ``Regex`` terms to deduplicate repeated backslashes, + for easier reading in debugging, printing, and railroad diagrams. + +- Fixed railroad diagrams that get generated with a parser containing a Regex element + defined using a verbose pattern - the pattern gets flattened and comments removed + before creating the corresponding diagram element. + + +API Changes +=========== + +Possible breaking changes +------------------------- +- Fixed code in ``ParseElementEnhance`` subclasses that + replaced detailed exception messages raised in contained expressions with a + less-specific and less-informative generic exception message and location. + + If your code has conditional logic based on the message content in raised + ``ParseExceptions``, this bugfix may require changes in your code. + +- Fixed bug in ``transform_string()`` where whitespace + in the input string was not properly preserved in the output string. + + If your code uses ``transform_string``, this bugfix may require changes in + your code. + +- Fixed bug where an ``IndexError`` raised in a parse action was + incorrectly handled as an ``IndexError`` raised as part of the ``ParserElement`` + parsing methods, and reraised as a ``ParseException``. Now an ``IndexError`` + that raises inside a parse action will properly propagate out as an ``IndexError``. + + If your code raises ``IndexError`` in parse actions, this bugfix may require + changes in your code. + + +Additional API changes +---------------------- +- Added optional ``flatten`` Boolean argument to ``ParseResults.as_list()``, to + return the parsed values in a flattened list. + +- Added ``indent`` and ``base_1`` arguments to ``pyparsing.testing.with_line_numbers``. When + using ``with_line_numbers`` inside a parse action, set ``base_1`` =False, since the + reported ``loc`` value is 0-based. ``indent`` can be a leading string (typically of + spaces or tabs) to indent the numbered string passed to ``with_line_numbers``. + + +New / Enhanced Examples +======================= +- Added query syntax to ``mongodb_query_expression.py`` with: + + - better support for array fields ("contains all", + "contains any", and "contains none") + - "like" and "not like" operators to support SQL "%" wildcard matching + and "=~" operator to support regex matching + - text search using "search for" + - dates and datetimes as query values + - ``a[0]`` style array referencing + +- Added ``lox_parser.py`` example, a parser for the Lox language used as a tutorial in + Robert Nystrom's "Crafting Interpreters" (http://craftinginterpreters.com/). + +- Added ``complex_chemical_formulas.py`` example, to add parsing capability for + formulas such as "Ba(BrO₃)₂·H₂O". + +- Updated ``tag_emitter.py`` to use new ``Tag`` class, introduced in pyparsing + 3.1.3. + + +Acknowledgments +=============== +Again, thanks to the many contributors who submitted issues, questions, suggestions, +and PRs. diff --git a/docs/whats_new_in_x_x_template.rst.txt b/docs/whats_new_in_x_x_template.rst.txt new file mode 100644 index 00000000..1befde71 --- /dev/null +++ b/docs/whats_new_in_x_x_template.rst.txt @@ -0,0 +1,43 @@ +============================= +What's New in Pyparsing 3.x.0 +============================= + +:author: Paul McGuire + +:date: month, year + +:abstract: This document summarizes the changes made + in the 3.x.x releases of pyparsing. + +.. contents:: :depth: 4 + + +Supported Python versions +========================= + +New Features +============ + +API Changes +=========== + +Discontinued Features +===================== + + +Other discontinued features +--------------------------- + + +Fixed Bugs +========== + + +New / Enhanced Examples +======================= + + +Acknowledgments +=============== +Again, thanks to the many contributors who submitted issues, questions, suggestions, +and PRs. diff --git a/examples/0README.html b/examples/0README.html deleted file mode 100644 index 617c16e5..00000000 --- a/examples/0README.html +++ /dev/null @@ -1,295 +0,0 @@ - -pyparsing Examples - -

pyparsing Examples

-

-This directory contains a number of Python scripts that can get you started in learning to use pyparsing. - -

    -
  • greeting.py
    -Parse "Hello, World!". -
  • -

    - -

  • greetingInKorean.py ~ submission by June Kim
    -Unicode example to parse "Hello, World!" in Korean. -
  • -

    - -

  • greetingInGreek.py ~ submission by ???
    -Unicode example to parse "Hello, World!" in Greek. -
  • -

    - -

  • holaMundo.py ~ submission by Marco Alfonso
    -"Hello, World!" example translated to Spanish, from Marco Alfonso's blog. -
  • -

    - -

  • chemicalFormulas.py
    -Simple example to demonstrate the use of ParseResults returned from parseString(). -Parses a chemical formula (such as "H2O" or "C6H5OH"), and walks the returned list of tokens to calculate the molecular weight. -
  • -

    - -

  • wordsToNum.py
    -A sample program that reads a number in words (such as "fifteen hundred and sixty four"), and returns the actual number (1564). -Also demonstrates some processing of ParseExceptions, including marking where the parse failure was found. -
  • -

    - -

  • pythonGrammarparser.py ~ suggested by JH Stovall
    -A sample program that parses the EBNF used in the Python source code to define the Python grammar. From this parser, -one can generate Python grammar documentation tools, such as railroad track diagrams. Also demonstrates use of -Dict class. -
  • -

    - -

  • commasep.py
    -Demonstration of the use of the commaSeparatedList helper. Shows examples of -proper handling of commas within quotes, trimming of whitespace around delimited entries, and handling of consecutive commas (null arguments). Includes comparison with simple string.split(','). -
  • -

    - -

  • dictExample.py
    -A demonstration of using the Dict class, to parse a table of ASCII tabulated data. -
  • -

    - -

  • dictExample2.py ~ submission by Mike Kelly
    -An extended version of dictExample.py, in which Mike Kelly also parses the column headers, and generates a transposed version of the original table! -
  • -

    - -

  • scanExamples.py
    -Some examples of using scanString and transformString, as alternative parsing methods to parseString, to do macro substitution, and selection and/or removal of matching strings within a source file. -
  • -

    - -

  • urlExtractorNew.py
    -A sample program showing sample definitions and applications of HTML tag expressions -created using makeHTMLTags helper function. Very useful for scraping data from HTML pages. -
  • -

    - -

  • fourFn.py
    -A simple algebraic expression parser, that performs +,-,*,/, and ^ arithmetic operations. (With suggestions and bug-fixes graciously offered by Andrea Griffini.) -
  • -

    - -

  • SimpleCalc.py ~ submission by Steven Siew
    -An interactive version of fourFn.py, with support for variables. -
  • -

    - -

  • LAParser.py ~ submission by Mike Ellis
    -An interactive Linear Algebra Parser, an extension of SimpleCalc.py. Supports linear algebra (LA) notation for vectors, matrices, and scalars, -including matrix operations such as inversion and determinants. Converts LA expressions to C code - uses a separate C library for runtime -evaluation of results. -
  • -

    - -

  • configParse.py
    -A simple alternative to Python's ConfigParse module, demonstrating the use of the Dict class to return nested dictionary access to configuration values. -
  • -

    - -

  • getNTPserversNew.py
    -Yet another scanString example, to read/extract the list of NTP servers from NIST's web site. -Uses the new makeHTMLTags() method. -
  • -

    - -

  • httpServerLogParser.py
    -Parser for Apache server log files. -
  • -

    - -

  • idlParse.py
    -Parser for CORBA IDL files. -
  • -

    - -

  • mozillaCalendarParser.py -~ submission by Petri Savolainen
    -Parser for Mozilla calendar (*.ics) files. -
  • -

    - -

  • pgn.py ~ submission by Alberto Santini
    -Parser for PGN (Portable Game Notation) files, the standard form for documenting the moves in chess games. -
  • -

    - -

  • simpleSQL.py
    -A simple parser that will extract table and column names from SQL SELECT statements.. -
  • -

    - -

  • dfmparse.py ~ submission by Dan Griffith
    -Parser for Delphi forms. -
  • -

    - -

  • ebnf.py / ebnftest.py ~ submission by Seo Sanghyeon
    -An EBNF-compiler that reads EBNF and generates a pyparsing grammar! Including a test that compiles... EBNF itself! -
  • -

    - -

  • searchparser.py ~ submission by Steven Mooij and Rudolph Froger
    -An expression parser that parses search strings, with special keyword and expression operations using (), not, and, or, and quoted strings. -
  • -

    - -

  • sparser.py ~ submission by Tim Cera
    -A configurable parser module that can be configured with a list of tuples, giving a high-level definition for parsing common sets -of water table data files. Tim had to contend with several different styles of data file formats, each with slight variations of its own. -Tim created a configurable parser (or "SPECIFIED parser" - hence the name "sparser"), that simply works from a config variable listing -the field names and data types, and implicitly, their order in the source data file. -

    -See mayport_florida_8720220_data_def.txt for an -example configuration file. -

  • -

    - -

  • romanNumerals.py
    -A Roman numeral generator and parser example, showing the power of parse actions -to compile Roman numerals into their integer values. -
  • -

    - -

  • removeLineBreaks.py
    -A string transformer that converts text files with hard line-breaks into one with line breaks -only between paragraphs. Useful when converting downloads from -Project Gutenberg to import to word processing apps -that can reformat paragraphs once hard line-breaks are removed, or for loading into your Palm Pilot for portable perusal. -

    -See Successful Methods of Public Speaking.txt and -Successful Methods of Public Speaking(2).txt for a sample -before and after (text file courtesy of Project Gutenberg). -

  • -

    - -

  • listAllMatches.py
    -An example program showing the utility of the listAllMatches option when specifying results naming. -
  • -

    - -

  • linenoExample.py
    -An example program showing how to use the string location to extract line and column numbers, or the -source line of text. -
  • -

    - -

  • parseListString.py
    -An example program showing a progression of steps, how to parse a string representation of a Python -list back into a true list. -
  • -

    - -

  • parsePythonValue.py
    -An extension of parseListString.py to parse tuples and dicts, including nested values, -returning a Python value of the original type. -
  • -

    - -

  • indentedGrammarExample.py
    -An example program showing how to parse a grammar using indentation for grouping, -such as is done in Python. -
  • -

    - -

  • simpleArith.py
    -An example program showing how to use the new operatorPrecedence helper method to define a 6-function -(+, -, *, /, ^, and !) arithmetic expression parser, with unary plus and minus signs. -
  • -

    - -

  • simpleBool.py
    -An example program showing how to use the new operatorPrecedence helper method to define a -boolean expression parser, with parse actions associated with each operator to "compile" the expression -into a data structure that will evaluate the expression's boolean value. -
  • -

    - -

  • simpleWiki.py
    -An example program showing how to use transformString to implement a simple Wiki markup parser. -
  • -

    - -

  • sql2dot.py~ submission by EnErGy [CSDX]
    -A nice graphing program that generates schema diagrams from SQL table definition statements. -
  • -

    - -

  • htmlStripper.py
    -An example implementation of a common application, removing HTML markup tags from an HTML page, -leaving just the text content. -
  • -

    - -

  • macroExpansion.py
    -An example implementation of a simple preprocessor, that will read embedded macro definitions -and replace macro references with the defined substitution string. -
  • -

    - -

  • sexpParser.py
    -A parser that uses a recursive grammar to parse S-expressions. -
  • -

    - -

  • nested.py
    -An example using nestedExpr, a helper method to simplify definitions of expressions of nested lists. -
  • -

    - -

  • withAttribute.py
    -An example using withAttribute, a helper method to define parse actions to validate matched HTML tags -using additional attributes. Especially helpful for matching common tags such as <DIV> and <TD>. -
  • -

    - -

  • stackish.py
    -A parser for the data representation format, Stackish. -
  • -

    - -

  • builtin_parse_action_demo.py
    -New in version 1.5.7
    -Demonstration of using builtins (min, max, sum, len, etc.) as parse actions. -
  • -

    - -

  • antlr_grammar.py~ submission by Luca DellOlio
    -New in version 1.5.7
    -Pyparsing example parsing ANTLR .a files and generating a working pyparsing parser. -
  • -

    - -

  • shapes.py
    -New in version 1.5.7
    -Parse actions example simple shape definition syntax, and returning the matched tokens as -domain objects instead of just strings. -
  • -

    - -

  • datetimeParseActions.py
    -New in version 1.5.7
    -Parse actions example showing a parse action returning a datetime object instead of -string tokens, and doing validation of the tokens, raising a ParseException if the -given YYYY/MM/DD string does not represent a valid date. -
  • -

    - -

  • position.py
    -New in version 1.5.7
    -Demonstration of a couple of different ways to capture the location a particular -expression was found within the overall input string. -
  • -

    - - -

- - diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000..efb50325 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,123 @@ +# Pyparsing Examples + +This directory contains a number of examples of parsers created using pyparsing. They fall into a few general +categories (several examples include supporting railroad diagrams): + + +* [Pyparsing tutorial and language feature demonstrations](#pyparsing-tutorial-and-language-feature-demonstrations) +* [Language parsers](#language-parsers) +* [Domain Specific Language parsers](#domain-specific-language-parsers) +* [Search and query language parsers](#search-and-query-language-parsers) +* [Data format parsers](#data-format-parsers) +* [Logical and arithmetic infix notation parsers and examples](#logical-and-arithmetic-infix-notation-parsers-and-examples) +* [Helpful utilities](#helpful-utilities) + + +## Pyparsing tutorial and language feature demonstrations + * Hello World! + * [greeting.py](./greeting.py) + * [greetingInGreek.py](./greetingInGreek.py) + * [greetingInKorean.py](./greetingInKorean.py) + * [hola_mundo.py](./hola_mundo.py) + * left recursion + * [left_recursion.py](./left_recursion.py) + * macro expansion + * [macro_expander.py](./macro_expander.py) + * Roman numerals + * [roman_numerals.py](./roman_numerals.py) + * Unicode text handling + * [tag_metadata.py](./tag_metadata.py) [(diagram)](./tag_metadata_diagram.html) + * chemical formulas + * [chemical_formulas.py](./chemical_formulas.py) + * [complex_chemical_formulas.py](./complex_chemical_formulas.py) + * API checker + * [apicheck.py](./apicheck.py) [(diagram)](./apicheck_diagram.html) + * scan_string examples + * [scanExamples.py](./scanExamples.py) + * transform_string examples + * [include_preprocessor.py](./include_preprocessor.py) + * [macro_expander.py](./macro_expander.py) + * [nested_markup.py](./nested_markup.py) + * parse actions and conditions + * [shapes.py](./shapes.py) + * [number_words.py](./number_words.py) [(diagram)](./number_words_diagram.html) + * [wordsToNum.py](./wordsToNum.py) + * [range_check.py](./range_check.py) + * [one_to_ninety_nine.py](./one_to_ninety_nine.py) + * railroad diagrams + * [railroad_diagram_demo.py](./railroad_diagram_demo.py) [(diagram)](./railroad_diagram_demo.html) + * web page scraping + * [getNTPserversNew.py](./getNTPserversNew.py) + * [html_stripper.py](./html_stripper.py) + * [html_table_parser.py](./html_table_parser.py) + * [urlExtractorNew.py](./urlExtractorNew.py) +## Language parsers + * C + * [oc.py](./oc.py) + * lua + * [lua_parser.py](./lua_parser.py) [(diagram)](./lua_parser_diagram.html) + * lox + * [lox_parser.py](./lox_parser.py) [(diagram)](./lox_parser_diagram.html) + * verilog + * [verilog_parse.py](./verilog_parse.py) + * brainf*ck + * [bf.py](./bf.py) [(diagram)](./bf_diagram.html) + * decaf + * [decaf_parser.py](./decaf_parser.py) [(diagram)](./decaf_parser_diagram.html) + * S-expression + * [sexpParser.py](./sexpParser.py) + * rosetta code + * [rosettacode.py](./rosettacode.py) [(diagram)](./rosettacode_diagram.html) +## Domain Specific Language parsers + * adventureEngine - interactive fiction parser and game runner + * [adventureEngine.py](./adventureEngine.py) [(diagram)](./adventure_game_parser_diagram.html) + * pgn - Chess notation parser + * [pgn.py](./pgn.py) + * TAP - Test results parser + * [TAP.py](./TAP.py) [(diagram)](./TAP_diagram.html) + * EBNF - Extended Backus-Naur Format parser (and compiler to a running pyparsing parser) + * [ebnf.py](./ebnf.py) [(diagram)](./ebnf_diagram.html) + * [ebnf_number_words.py](./ebnf_number_words.py) [(diagram)](./ebnf_number_parser_diagram.html) +## Search and query language parsers + * basic search + * [searchparser.py](./searchparser.py) [demo](./searchParserAppDemo.py) + * lucene + * [lucene_grammar.py](./lucene_grammar.py) [(diagram)](./lucene_grammar_diagram.html) + * mongodb query + * [mongodb_query_expression.py](./mongodb_query_expression.py) [(diagram)](./mongodb_query_expression.html) + * SQL + * [select_parser.py](./select_parser.py) (SELECT statements) + * [sql2dot.py](./sql2dot.py) (TABLE DML statements) + * BigQuery view + * [bigquery_view_parser.py](./bigquery_view_parser.py) +## Data format parsers + * JSON + * [jsonParser.py](./jsonParser.py) + * protobuf + * [protobuf_parser.py](./protobuf_parser.py) + * stackish + * [stackish.py](./stackish.py) + * CORBA IDL + * [idlparse.py](./idlparse.py) +## Logical and arithmetic infix notation parsers and examples + * [fourFn.py](./fourFn.py) + * [simpleArith.py](./simpleArith.py) + * [eval_arith.py](./eval_arith.py) + * [simpleCalc.py](./simpleCalc.py) + * [LAparser.py](./LAparser.py) (linear algebra) + * [simpleBool.py](./simpleBool.py) +## Helpful utilities + * parse time expressions ("2pm the day after tomorrow") + * [delta_time.py](./delta_time.py) [(diagram)](./delta_time_diagram.html) + * invert regex (generate sample strings matching a regex) + * [inv_regex.py](./inv_regex.py) + * email addresses + * [email_address_parser.py](./email_address_parser.py) + * Excel cell formula + * [excel_expr.py](./excel_expr.py) + * ctypes interfaces code generator from C include.h file + * [gen_ctypes.py](./gen_ctypes.py) + * log file parsing + * [httpServerLogPaser.py](./httpServerLogPaser.py) + + diff --git a/examples/SimpleCalc.py b/examples/SimpleCalc.py index 7ace9aea..23b2893d 100644 --- a/examples/SimpleCalc.py +++ b/examples/SimpleCalc.py @@ -52,7 +52,7 @@ # elif op[0].isalpha(): # if op in variables: # return variables[op] -# raise Exception("invalid identifier '%s'" % op) +# raise Exception(f"invalid identifier {op!r}") # else: # return float( op ) diff --git a/examples/TAP.py b/examples/TAP.py index 788a656a..dfa16195 100644 --- a/examples/TAP.py +++ b/examples/TAP.py @@ -37,6 +37,7 @@ restOfLine, FollowedBy, empty, + autoname_elements, ) __all__ = ["tapOutputParser", "TAPTest", "TAPSummary"] @@ -52,7 +53,7 @@ OK, NOT_OK = map(Literal, ["ok", "not ok"]) testStatus = OK | NOT_OK -description = Regex("[^#\n]+") +description = Regex(r"[^#\n]+") description.setParseAction(lambda t: t[0].lstrip("- ")) TODO, SKIP = map(CaselessLiteral, "TODO SKIP".split()) @@ -79,6 +80,8 @@ OneOrMore((testLine | bailLine) + NL) )("tests") +autoname_elements() + class TAPTest: def __init__(self, results): @@ -148,15 +151,15 @@ def summary(self, showPassed=False, showAll=False): testListStr = lambda tl: "[" + ",".join(str(t.num) for t in tl) + "]" summaryText = [] if showPassed or showAll: - summaryText.append("PASSED: %s" % testListStr(self.passedTests)) + summaryText.append(f"PASSED: {testListStr(self.passedTests)}") if self.failedTests or showAll: - summaryText.append("FAILED: %s" % testListStr(self.failedTests)) + summaryText.append(f"FAILED: {testListStr(self.failedTests)}") if self.skippedTests or showAll: - summaryText.append("SKIPPED: %s" % testListStr(self.skippedTests)) + summaryText.append(f"SKIPPED: {testListStr(self.skippedTests)}") if self.todoTests or showAll: - summaryText.append("TODO: %s" % testListStr(self.todoTests)) + summaryText.append(f"TODO: {testListStr(self.todoTests)}") if self.bonusTests or showAll: - summaryText.append("BONUS: %s" % testListStr(self.bonusTests)) + summaryText.append(f"BONUS: {testListStr(self.bonusTests)}") if self.passedSuite: summaryText.append("PASSED") else: @@ -170,6 +173,11 @@ def summary(self, showPassed=False, showAll=False): def main(): + import contextlib + + with contextlib.suppress(Exception): + tapOutputParser.create_diagram("TAP_diagram.html", vertical=3) + test1 = """\ 1..4 ok 1 - Input file opened diff --git a/examples/TAP_diagram.html b/examples/TAP_diagram.html new file mode 100644 index 00000000..fbf6f23d --- /dev/null +++ b/examples/TAP_diagram.html @@ -0,0 +1,645 @@ + + + + + + + + + + + + + + + +
+

tapOutputParser

+
+
+ + + + + + + + + + + +planplan +plan +NLNL + + + + +testLinetestLine +bailLinebailLine +NLNL + +tests + +[ALL] +
+
+ +
+

plan

+
+
+ + + + + +'1..' +W:(0-9) +
+
+ +
+

testLine

+
+
+ + + + + + + + + + +'#' +[suppress] +emptyempty +rest of linerest of line +NLNL + + +'ok' +'not ok' + + +integerinteger + + +descriptiondescription + + +directivedirective +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

description

+
+
+ + + + +[^#\n]+ +
+
+ +
+

directive

+
+
+ + + + + + +'#' +[suppress] + + +TODOTODO +rest of linerest of line + + +SKIPSKIP +[LOOKAHEAD] +rest of linerest of line +
+
+ +
+

TODO

+
+
+ + + + +'TODO' +
+
+ +
+

SKIP

+
+
+ + + + +'SKIP' +
+
+ +
+

bailLine

+
+
+ + + + + +'Bail out!' +emptyempty + + +rest of linerest of line +
+
+ +
+

empty

+
+
+ + + + +Empty +
+
+ +
+

rest of line

+
+
+ + + + +.* +
+
+ +
+

NL

+
+
+ + + + + +end of lineend of line +[suppress] +
+
+ +
+

end of line

+
+
+ + + + +LineEnd +
+
+ + + + diff --git a/examples/adventureEngine.py b/examples/adventureEngine.py index efc096c7..b258b7c7 100644 --- a/examples/adventureEngine.py +++ b/examples/adventureEngine.py @@ -2,39 +2,45 @@ # Copyright 2005-2006, Paul McGuire # # Updated 2012 - latest pyparsing API +# Updated 2023 - using PEP8 API names # -from pyparsing import * +import contextlib import random import string +import pyparsing as pp -def aOrAn(item): - if item.desc[0] in "aeiou": +def a_or_an(item): + if item.desc.startswith(tuple("aeiou")): return "an " + item.desc else: return "a " + item.desc - -def enumerateItems(l): - if len(l) == 0: +def enumerate_items(items_list): + if not items_list: return "nothing" + *all_but_last, last = items_list out = [] - if len(l) > 1: - out.append(", ".join(aOrAn(item) for item in l[:-1])) + if all_but_last: + out.append(", ".join(a_or_an(item) for item in all_but_last)) + if len(all_but_last) > 1: + out[-1] += ',' out.append("and") - out.append(aOrAn(l[-1])) + out.append(a_or_an(last)) return " ".join(out) - -def enumerateDoors(l): - if len(l) == 0: +def enumerate_doors(doors_list): + if not doors_list: return "" + *all_but_last, last = doors_list out = [] - if len(l) > 1: - out.append(", ".join(l[:-1])) + if all_but_last: + out.append(", ".join(all_but_last)) + if len(all_but_last) > 1: + out[-1] += ',' out.append("and") - out.append(l[-1]) + out.append(last) return " ".join(out) @@ -57,10 +63,10 @@ def enter(self, player): if self.gameOver: player.gameOver = True - def addItem(self, it): + def add_item(self, it): self.inv.append(it) - def removeItem(self, it): + def remove_item(self, it): self.inv.remove(it) def describe(self): @@ -71,9 +77,9 @@ def describe(self): is_form = "are" else: is_form = "is" - print("There {} {} here.".format(is_form, enumerateItems(visibleItems))) + print(f"There {is_form} {enumerate_items(visibleItems)} here.") else: - print("You see %s." % (enumerateItems(visibleItems))) + print(f"You see {enumerate_items(visibleItems)}.") class Exit(Room): @@ -135,16 +141,16 @@ def __init__(self, desc, contents=None): else: self.contents = [] - def openItem(self, player): + def open_item(self, player): if not self.isOpened: self.isOpened = not self.isOpened if self.contents is not None: for item in self.contents: - player.room.addItem(item) + player.room.add_item(item) self.contents = [] self.desc = "open " + self.desc - def closeItem(self, player): + def close_item(self, player): if self.isOpened: self.isOpened = not self.isOpened if self.desc.startswith("open "): @@ -159,15 +165,15 @@ def __init__(self, verb, verbProg): self.verbProg = verbProg @staticmethod - def helpDescription(): + def help_description(): return "" - def _doCommand(self, player): + def _do_command(self, player): pass def __call__(self, player): print(self.verbProg.capitalize() + "...") - self._doCommand(player) + self._do_command(player) class MoveCommand(Command): @@ -176,11 +182,11 @@ def __init__(self, quals): self.direction = quals.direction[0] @staticmethod - def helpDescription(): + def help_description(): return """MOVE or GO - go NORTH, SOUTH, EAST, or WEST (can abbreviate as 'GO N' and 'GO W', or even just 'E' and 'S')""" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room nextRoom = rm.doors[ { @@ -202,20 +208,20 @@ def __init__(self, quals): self.subject = quals.item @staticmethod - def helpDescription(): + def help_description(): return "TAKE or PICKUP or PICK UP - pick up an object (but some are deadly)" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room subj = Item.items[self.subject] if subj in rm.inv and subj.isVisible: if subj.isTakeable: - rm.removeItem(subj) + rm.remove_item(subj) player.take(subj) else: print(subj.cantTakeMessage) else: - print("There is no %s here." % subj) + print(f"There is no {subj} here.") class DropCommand(Command): @@ -224,17 +230,17 @@ def __init__(self, quals): self.subject = quals.item @staticmethod - def helpDescription(): + def help_description(): return "DROP or LEAVE - drop an object (but fragile items may break)" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room subj = Item.items[self.subject] if subj in player.inv: - rm.addItem(subj) + rm.add_item(subj) player.drop(subj) else: - print("You don't have %s." % (aOrAn(subj))) + print(f"You don't have {a_or_an(subj)}.") class InventoryCommand(Command): @@ -242,11 +248,11 @@ def __init__(self, quals): super().__init__("INV", "taking inventory") @staticmethod - def helpDescription(): + def help_description(): return "INVENTORY or INV or I - lists what items you have" - def _doCommand(self, player): - print("You have %s." % enumerateItems(player.inv)) + def _do_command(self, player): + print(f"You have {enumerate_items(player.inv)}.") class LookCommand(Command): @@ -254,24 +260,48 @@ def __init__(self, quals): super().__init__("LOOK", "looking") @staticmethod - def helpDescription(): + def help_description(): return "LOOK or L - describes the current room and any objects in it" - def _doCommand(self, player): + def _do_command(self, player): player.room.describe() +class ExamineCommand(Command): + def __init__(self, quals): + super().__init__("EXAMINE", "examining") + self.subject = Item.items[quals.item] + + @staticmethod + def help_description(): + return "EXAMINE or EX or X - look closely at an object" + + def _do_command(self, player): + msg = random.choice( + [ + "It's {}.", + "It's just {}.", + "It's a beautiful {1}.", + "It's a rare and beautiful {1}.", + "It's a rare {1}.", + "Just {}, nothing special...", + "{0}, just {0}." + ] + ) + print(msg.format(a_or_an(self.subject), self.subject).capitalize()) + + class DoorsCommand(Command): def __init__(self, quals): super().__init__("DOORS", "looking for doors") @staticmethod - def helpDescription(): + def help_description(): return "DOORS - display what doors are visible from this room" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room - numDoors = sum([1 for r in rm.doors if r is not None]) + numDoors = sum(1 for r in rm.doors if r is not None) if numDoors == 0: reply = "There are no doors in any direction." else: @@ -284,8 +314,7 @@ def _doCommand(self, player): for i, d in enumerate(rm.doors) if d is not None ] - # ~ print doorNames - reply += enumerateDoors(doorNames) + reply += enumerate_doors(doorNames) reply += "." print(reply) @@ -300,10 +329,10 @@ def __init__(self, quals): self.target = None @staticmethod - def helpDescription(): + def help_description(): return "USE or U - use an object, optionally IN or ON another object" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room availItems = rm.inv + player.inv if self.subject in availItems: @@ -312,7 +341,7 @@ def _doCommand(self, player): else: print("You can't use that here.") else: - print("There is no %s here to use." % self.subject) + print(f"There is no {self.subject} here to use.") class OpenCommand(Command): @@ -321,22 +350,22 @@ def __init__(self, quals): self.subject = Item.items[quals.item] @staticmethod - def helpDescription(): + def help_description(): return "OPEN or O - open an object" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room availItems = rm.inv + player.inv if self.subject in availItems: if self.subject.isOpenable: if not self.subject.isOpened: - self.subject.openItem(player) + self.subject.open_item(player) else: print("It's already open.") else: print("You can't open that.") else: - print("There is no %s here to open." % self.subject) + print(f"There is no {self.subject} here to open.") class CloseCommand(Command): @@ -345,22 +374,22 @@ def __init__(self, quals): self.subject = Item.items[quals.item] @staticmethod - def helpDescription(): + def help_description(): return "CLOSE or CL - close an object" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room availItems = rm.inv + player.inv if self.subject in availItems: if self.subject.isOpenable: if self.subject.isOpened: - self.subject.closeItem(player) + self.subject.close_item(player) else: print("You can't close that, it's not open.") else: print("You can't close that.") else: - print("There is no %s here to close." % self.subject) + print(f"There is no {self.subject} here to close.") class QuitCommand(Command): @@ -368,10 +397,10 @@ def __init__(self, quals): super().__init__("QUIT", "quitting") @staticmethod - def helpDescription(): + def help_description(): return "QUIT or Q - ends the game" - def _doCommand(self, player): + def _do_command(self, player): print("Ok....") player.gameOver = True @@ -381,10 +410,10 @@ def __init__(self, quals): super().__init__("HELP", "helping") @staticmethod - def helpDescription(): + def help_description(): return "HELP or H or ? - displays this help message" - def _doCommand(self, player): + def _do_command(self, player): print("Enter any of the following commands (not case sensitive):") for cmd in [ InventoryCommand, @@ -395,42 +424,43 @@ def _doCommand(self, player): CloseCommand, MoveCommand, LookCommand, + ExamineCommand, DoorsCommand, QuitCommand, HelpCommand, ]: - print(" - %s" % cmd.helpDescription()) + print(f" - {cmd.help_description()}") print() -class AppParseException(ParseException): +class AppParseException(pp.ParseException): pass class Parser: def __init__(self): - self.bnf = self.makeBNF() + self.bnf = self.make_bnf() - def makeBNF(self): - invVerb = oneOf("INV INVENTORY I", caseless=True) - dropVerb = oneOf("DROP LEAVE", caseless=True) - takeVerb = oneOf("TAKE PICKUP", caseless=True) | ( - CaselessLiteral("PICK") + CaselessLiteral("UP") + def make_bnf(self): + invVerb = pp.one_of("INV INVENTORY I", caseless=True) + dropVerb = pp.one_of("DROP LEAVE", caseless=True) + takeVerb = pp.one_of("TAKE PICKUP", caseless=True) | ( + pp.CaselessLiteral("PICK") + pp.CaselessLiteral("UP") ) - moveVerb = oneOf("MOVE GO", caseless=True) | empty - useVerb = oneOf("USE U", caseless=True) - openVerb = oneOf("OPEN O", caseless=True) - closeVerb = oneOf("CLOSE CL", caseless=True) - quitVerb = oneOf("QUIT Q", caseless=True) - lookVerb = oneOf("LOOK L", caseless=True) - doorsVerb = CaselessLiteral("DOORS") - helpVerb = oneOf("H HELP ?", caseless=True) - - itemRef = OneOrMore(Word(alphas)).setParseAction(self.validateItemName) - nDir = oneOf("N NORTH", caseless=True).setParseAction(replaceWith("N")) - sDir = oneOf("S SOUTH", caseless=True).setParseAction(replaceWith("S")) - eDir = oneOf("E EAST", caseless=True).setParseAction(replaceWith("E")) - wDir = oneOf("W WEST", caseless=True).setParseAction(replaceWith("W")) + moveVerb = pp.one_of("MOVE GO", caseless=True) | pp.Empty() + useVerb = pp.one_of("USE U", caseless=True) + openVerb = pp.one_of("OPEN O", caseless=True) + closeVerb = pp.one_of("CLOSE CL", caseless=True) + quitVerb = pp.one_of("QUIT Q", caseless=True) + lookVerb = pp.one_of("LOOK L", caseless=True) + doorsVerb = pp.CaselessLiteral("DOORS") + helpVerb = pp.one_of("H HELP ?", caseless=True).set_name("HELP | H | ?") + + itemRef = pp.OneOrMore(pp.Word(pp.alphas)).set_parse_action(self.validate_item_name).setName("item_ref") + nDir = pp.one_of("N NORTH", caseless=True).set_parse_action(pp.replace_with("N")) + sDir = pp.one_of("S SOUTH", caseless=True).set_parse_action(pp.replace_with("S")) + eDir = pp.one_of("E EAST", caseless=True).set_parse_action(pp.replace_with("E")) + wDir = pp.one_of("W WEST", caseless=True).set_parse_action(pp.replace_with("W")) moveDirection = nDir | sDir | eDir | wDir invCommand = invVerb @@ -439,32 +469,34 @@ def makeBNF(self): useCommand = ( useVerb + itemRef("usedObj") - + Optional(oneOf("IN ON", caseless=True)) - + Optional(itemRef, default=None)("targetObj") + + pp.Opt(pp.one_of("IN ON", caseless=True)) + + pp.Opt(itemRef, default=None)("targetObj") ) openCommand = openVerb + itemRef("item") closeCommand = closeVerb + itemRef("item") - moveCommand = moveVerb + moveDirection("direction") + moveCommand = (moveVerb | "") + moveDirection("direction") quitCommand = quitVerb lookCommand = lookVerb - doorsCommand = doorsVerb + examineCommand = pp.one_of("EXAMINE EX X", caseless=True) + itemRef("item") + doorsCommand = doorsVerb.setName("DOORS") helpCommand = helpVerb # attach command classes to expressions - invCommand.setParseAction(InventoryCommand) - dropCommand.setParseAction(DropCommand) - takeCommand.setParseAction(TakeCommand) - useCommand.setParseAction(UseCommand) - openCommand.setParseAction(OpenCommand) - closeCommand.setParseAction(CloseCommand) - moveCommand.setParseAction(MoveCommand) - quitCommand.setParseAction(QuitCommand) - lookCommand.setParseAction(LookCommand) - doorsCommand.setParseAction(DoorsCommand) - helpCommand.setParseAction(HelpCommand) + invCommand.set_parse_action(InventoryCommand) + dropCommand.set_parse_action(DropCommand) + takeCommand.set_parse_action(TakeCommand) + useCommand.set_parse_action(UseCommand) + openCommand.set_parse_action(OpenCommand) + closeCommand.set_parse_action(CloseCommand) + moveCommand.set_parse_action(MoveCommand) + quitCommand.set_parse_action(QuitCommand) + lookCommand.set_parse_action(LookCommand) + examineCommand.set_parse_action(ExamineCommand) + doorsCommand.set_parse_action(DoorsCommand) + helpCommand.set_parse_action(HelpCommand) # define parser using all command expressions - return ( + parser = pp.ungroup( invCommand | useCommand | openCommand @@ -473,24 +505,35 @@ def makeBNF(self): | takeCommand | moveCommand | lookCommand + | examineCommand | doorsCommand | helpCommand | quitCommand - )("command") + LineEnd() + )("command").set_name("command") + + with contextlib.suppress(Exception): + parser.create_diagram( + "adventure_game_parser_diagram.html", + vertical=3, + show_groups=True, + show_results_names=True + ) - def validateItemName(self, s, l, t): + return parser + + def validate_item_name(self, s, l, t): iname = " ".join(t) if iname not in Item.items: - raise AppParseException(s, l, "No such item '%s'." % iname) + raise AppParseException(s, l, f"No such item '{iname}'.") return iname - def parseCmd(self, cmdstr): + def parse_cmd(self, cmdstr): try: - ret = self.bnf.parseString(cmdstr) + ret = self.bnf.parse_string(cmdstr) return ret except AppParseException as pe: print(pe.msg) - except ParseException as pe: + except pp.ParseException as pe: print( random.choice( [ @@ -522,7 +565,7 @@ def moveTo(self, rm): def take(self, it): if it.isDeadly: - print("Aaaagh!...., the %s killed me!" % it) + print(f"Aaaagh!...., the {it} killed me!") self.gameOver = True else: self.inv.append(it) @@ -595,7 +638,7 @@ def createRooms(rm): def putItemInRoom(i, r): if isinstance(r, str): r = rooms[r] - r.addItem(Item.items[i]) + r.add_item(Item.items[i]) def playGame(p, startRoom): @@ -604,98 +647,98 @@ def playGame(p, startRoom): p.moveTo(startRoom) while not p.gameOver: cmdstr = input(">> ") - cmd = parser.parseCmd(cmdstr) + cmd = parser.parse_cmd(cmdstr) if cmd is not None: cmd.command(p) print() print("You ended the game with:") for i in p.inv: - print(" -", aOrAn(i)) - - -# ==================== -# start game definition -roomMap = """ - d-Z - | - f-c-e - . | - q + + + + + + + + + + + + + +
+

command

+
+
+ + + + + + +INVENTORY | INV | IINVENTORY | INV | I + +USE | UUSE | U + +item_refitem_ref +'usedObj' + + +IN | ONIN | ON + + + +item_refitem_ref +'targetObj' + +OPEN | OOPEN | O + +item_refitem_ref +'item' + +CLOSE | CLCLOSE | CL + +item_refitem_ref +'item' + +DROP | LEAVEDROP | LEAVE + +item_refitem_ref +'item' + + +TAKE | PICKUPTAKE | PICKUP + +'PICK' +'UP' + +item_refitem_ref +'item' + + + + +MOVE | GOMOVE | GO + + +NORTH | NNORTH | N +SOUTH | SSOUTH | S +EAST | EEAST | E +WEST | WWEST | W +'direction' +LOOK | LLOOK | L + +EXAMINE | EX | XEXAMINE | EX | X + +item_refitem_ref +'item' +DOORSDOORS +HELP | H | ?HELP | H | ? +QUIT | QQUIT | Q +
+
+ +
+

INVENTORY | INV | I

+
+
+ + + + +INVENTORY|INV|I +
+
+ +
+

USE | U

+
+
+ + + + +USE|U +
+
+ +
+

item_ref

+
+
+ + + + + +W:(A-Za-z) + +
+
+ +
+

IN | ON

+
+
+ + + + +IN|ON +
+
+ +
+

OPEN | O

+
+
+ + + + +OPEN|O +
+
+ +
+

CLOSE | CL

+
+
+ + + + +CLOSE|CL +
+
+ +
+

DROP | LEAVE

+
+
+ + + + +DROP|LEAVE +
+
+ +
+

TAKE | PICKUP

+
+
+ + + + +TAKE|PICKUP +
+
+ +
+

MOVE | GO

+
+
+ + + + +MOVE|GO +
+
+ +
+

NORTH | N

+
+
+ + + + +NORTH|N +
+
+ +
+

SOUTH | S

+
+
+ + + + +SOUTH|S +
+
+ +
+

EAST | E

+
+
+ + + + +EAST|E +
+
+ +
+

WEST | W

+
+
+ + + + +WEST|W +
+
+ +
+

LOOK | L

+
+
+ + + + +LOOK|L +
+
+ +
+

EXAMINE | EX | X

+
+
+ + + + +EXAMINE|EX|X +
+
+ +
+

DOORS

+
+
+ + + + +'DOORS' +
+
+ +
+

HELP | H | ?

+
+
+ + + + +HELP|H|\? +
+
+ +
+

QUIT | Q

+
+
+ + + + +QUIT|Q +
+
+ + + + diff --git a/examples/antlr_grammar.py b/examples/antlr_grammar.py index 49151eee..566dd0ef 100644 --- a/examples/antlr_grammar.py +++ b/examples/antlr_grammar.py @@ -35,6 +35,7 @@ alphanums, delimitedList, Char, + autoname_elements, ) # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g @@ -75,11 +76,13 @@ PROTECTED, PUBLIC, PRIVATE, -) = map( - Keyword, +) = list( + Keyword.using_each( """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected public private """.split(), + ) ) + KEYWORD = MatchFirst(keywords) # Tokens @@ -252,6 +255,7 @@ grammarDef = grammarHeading + Group(OneOrMore(rule))("rules") +autoname_elements() def grammar(): return grammarDef @@ -341,6 +345,10 @@ def antlrConverter(antlrGrammarTree): if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + grammarDef.create_diagram("antlr_grammar_diagram.html", vertical=2, show_groups=True) text = """\ grammar SimpleCalc; @@ -379,7 +387,6 @@ def antlrConverter(antlrGrammarTree): """ - grammar().validate() antlrGrammarTree = grammar().parseString(text) print(antlrGrammarTree.dump()) pyparsingRules = antlrConverter(antlrGrammarTree) diff --git a/examples/antlr_grammar_diagram.html b/examples/antlr_grammar_diagram.html new file mode 100644 index 00000000..d6ec6ddf --- /dev/null +++ b/examples/antlr_grammar_diagram.html @@ -0,0 +1,4160 @@ + + + + + + + + + + + + + + + +
+

grammarDef

+
+
+ + + + + + + +C style commentC style comment + + +grammarTypegrammarType +GRAMMARGRAMMAR + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) +SEMISEMI + + +optionsSpecoptionsSpec + + +tokensSpectokensSpec + + + +attrScopeattrScope + + + + +actionaction + + + +rulerule + +
+
+ +
+

C style comment

+
+
+ + + + +/\*(?:[^*]|\*(?!/))*\*\/ +
+
+ +
+

grammarType

+
+
+ + + + + +LEXERLEXER +PARSERPARSER +TREETREE +
+
+ +
+

LEXER

+
+
+ + + + +'lexer' +
+
+ +
+

PARSER

+
+
+ + + + +'parser' +
+
+ +
+

TREE

+
+
+ + + + +'tree' +
+
+ +
+

GRAMMAR

+
+
+ + + + +'grammar' +
+
+ +
+

optionsSpec

+
+
+ + + + + + +OPTIONS_OPTIONS_ +[suppress] +LBRACELBRACE + + + +optionoption +SEMISEMI + +RBRACERBRACE +
+
+ +
+

option

+
+
+ + + + + + + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) +EQEQ + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) + + +"'" +[suppress] + + + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + + +"'" +[suppress] +'\\' +[NOT] +(!-~) + +[combine] + +"'" +[suppress] + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +W:(0-9) +ss +
+
+ +
+

s

+
+
+ + + + +'*' +
+
+ +
+

tokensSpec

+
+
+ + + + + + +TOKENS_TOKENS_ +[suppress] +LBRACELBRACE + + +tokenSpectokenSpec + +RBRACERBRACE +
+
+ +
+

TOKENS_

+
+
+ + + + +'tokens' +
+
+ +
+

tokenSpec

+
+
+ + + + + + + +W:(A-Z, 0-9A-Z_a-z) +EQEQ + + + +"'" +[suppress] + + + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + + +"'" +[suppress] +'\\' +[NOT] +(!-~) + +[combine] + +"'" +[suppress] + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +SEMISEMI +
+
+ +
+

EQ

+
+
+ + + + + +'=' +[suppress] +
+
+ +
+

attrScope

+
+
+ + + + + + +SCOPE_SCOPE_ +[suppress] +idid +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

NESTED_ACTION

+
+
+ + + + + + +LBRACELBRACE + + + + +NESTED_ACTIONNESTED_ACTION + + +'//' +[suppress] + +'$ANTLR' +[suppress] + +SRC_SRC_ +[suppress] + + +'"' +[suppress] + + + + + + +BSLASHBSLASH +[suppress] +APOSAPOS + +BSLASHBSLASH +[suppress] + + +BSLASHBSLASH +[suppress] + + +APOSAPOS +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + + +BSLASHBSLASH +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + +'"' +[suppress] +W:(0-9) + + + + + + +EOLEOL +[NOT] +W:(!-~) + +EOLEOL +C style commentC style comment +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

SRC_

+
+
+ + + + +'src' +
+
+ +
+

EOL

+
+
+ + + + + +end of lineend of line +[suppress] +
+
+ +
+

end of line

+
+
+ + + + +LineEnd +
+
+ +
+

ACTION_STRING_LITERAL

+
+
+ + + + + +QUOTEQUOTE + + + + + + +BSLASHBSLASH +[suppress] +APOSAPOS + +BSLASHBSLASH +[suppress] + + +BSLASHBSLASH +[suppress] + + +APOSAPOS +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + + +BSLASHBSLASH +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + +QUOTEQUOTE +
+
+ +
+

ACTION_CHAR_LITERAL

+
+
+ + + + + +APOSAPOS + + + +BSLASHBSLASH +[suppress] +APOSAPOS + +BSLASHBSLASH +[suppress] + + +BSLASHBSLASH +[suppress] + + +APOSAPOS +QUOTEQUOTE +[NOT] +SGL_PRINTABLESGL_PRINTABLE + + + +BSLASHBSLASH +APOSAPOS +[NOT] +SGL_PRINTABLESGL_PRINTABLE +APOSAPOS +
+
+ +
+

QUOTE

+
+
+ + + + + +'"' +[suppress] +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ +
+

action

+
+
+ + + + + +ATAT + + + +actionScopeNameactionScopeName + +'::' +[suppress] +idid +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

actionScopeName

+
+
+ + + + + +TOKEN_REFTOKEN_REF +RULE_REFRULE_REF +'lexer' +'parser' +
+
+ +
+

rule

+
+
+ + + + + + + + +C style commentC style comment + + +modifiermodifier + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) + + +'!' + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + + + +'returns' +[suppress] + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + +throwsSpecthrowsSpec + + +optionsSpecoptionsSpec + + +ruleScopeSpecruleScopeSpec + + + +ruleActionruleAction + +COLONCOLON + + + +elementelement + +rewriterewrite + + + + + +VERTVERT + + + +elementelement + +rewriterewrite + +SEMISEMI + + +exceptionGroupexceptionGroup +
+
+ +
+

modifier

+
+
+ + + + + +PROTECTEDPROTECTED +PUBLICPUBLIC +PRIVATEPRIVATE +FRAGMENTFRAGMENT +
+
+ +
+

PROTECTED

+
+
+ + + + +'protected' +
+
+ +
+

PUBLIC

+
+
+ + + + +'public' +
+
+ +
+

PRIVATE

+
+
+ + + + +'private' +
+
+ +
+

FRAGMENT

+
+
+ + + + +'fragment' +
+
+ +
+

NESTED_ARG_ACTION

+
+
+ + + + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK +
+
+ +
+

throwsSpec

+
+
+ + + + + + +THROWSTHROWS +[suppress] + + +idid + + + + + +',' +[suppress] +idid + +
+
+ +
+

THROWS

+
+
+ + + + +'throws' +
+
+ +
+

ruleScopeSpec

+
+
+ + + + + + + +SCOPE_SCOPE_ +[suppress] +NESTED_ACTIONNESTED_ACTION + + +'?' + + +SCOPE_SCOPE_ +[suppress] + + +idid + + + + + +',' +[suppress] +idid + +SEMISEMI + + +SCOPE_SCOPE_ +[suppress] +NESTED_ACTIONNESTED_ACTION + + +'?' + +SCOPE_SCOPE_ +[suppress] + + +idid + + + + + +',' +[suppress] +idid + +SEMISEMI +
+
+ +
+

SCOPE_

+
+
+ + + + +'scope' +
+
+ +
+

ruleAction

+
+
+ + + + + +ATAT +idid +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

AT

+
+
+ + + + + +'@' +[suppress] +
+
+ +
+

id

+
+
+ + + + + +TOKEN_REFTOKEN_REF +RULE_REFRULE_REF +
+
+ +
+

element

+
+
+ + + + + + +elementNoOptionSpecelementNoOptionSpec +
+
+ +
+

elementNoOptionSpec

+
+
+ + + + + + + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) += | +== | += + + + + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +RANGERANGE + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] + + +^ | !^ | ! +terminalterminal + +TILTIL + +CHAR_LITERALCHAR_LITERAL +TOKEN_REFTOKEN_REF +STRING_LITERALSTRING_LITERAL +blockblock + + +^ | !^ | ! + +RULE_REFRULE_REF + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + +^ | !^ | ! + + +? | * | +? | * | + + + +W:(A-Z, 0-9A-Z_a-z) +W:(a-z, 0-9A-Z_a-z) += | +== | += +blockblock + + +? | * | +? | * | + + + + + + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] +RANGERANGE + + +"'" +[suppress] + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + +APOSAPOS +BSLASHBSLASH +[NOT] +(!-~) + +"'" +[suppress] + + +^ | !^ | ! +terminalterminal + +TILTIL + +CHAR_LITERALCHAR_LITERAL +TOKEN_REFTOKEN_REF +STRING_LITERALSTRING_LITERAL +blockblock + + +^ | !^ | ! + +RULE_REFRULE_REF + + + +LBRACKLBRACK + + + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +ACTION_STRING_LITERALACTION_STRING_LITERAL +ACTION_CHAR_LITERALACTION_CHAR_LITERAL + +RBRACKRBRACK + + +^ | !^ | ! + + +? | * | +? | * | + +ebnfebnf +ACTIONACTION + +ROOTROOT +LPARLPAR +elementelement +elementelement + + + +elementelement + +RPARRPAR + + +? | * | +? | * | + +
+
+ +
+

= | +=

+
+
+ + + + +=|\+= +
+
+ +
+

terminal

+
+
+ + + + + + +CHAR_LITERALCHAR_LITERAL + +TOKEN_REFTOKEN_REF + + +NESTED_ARG_ACTIONNESTED_ARG_ACTION +STRING_LITERALSTRING_LITERAL +'.' + + +^ | !^ | ! +
+
+ +
+

CHAR_LITERAL

+
+
+ + + + + +APOSAPOS +LITERAL_CHARLITERAL_CHAR +APOSAPOS +
+
+ +
+

LITERAL_CHAR

+
+
+ + + + + +ESCESC + + + +APOSAPOS +BSLASHBSLASH +[NOT] +SGL_PRINTABLESGL_PRINTABLE +
+
+ +
+

ESC

+
+
+ + + + + +BSLASHBSLASH + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +SGL_PRINTABLESGL_PRINTABLE +
+
+ +
+

SGL_PRINTABLE

+
+
+ + + + +(!-~) +
+
+ +
+

STRING_LITERAL

+
+
+ + + + + +APOSAPOS + + + + +'\\' + +n | r | t | b | f | \ | " | > | 'n | r | t | b | f | \ | " | > | ' + +'u' +W:(0-9A-Fa-f){4} +(!-~) + + + + +"'" +[suppress] +'\\' +[NOT] +(!-~) + +[combine] +APOSAPOS +
+
+ +
+

n | r | t | b | f | \ | " | > | '

+
+
+ + + + +[nrtbf\\">'] +
+
+ +
+

block

+
+
+ + + + + + +LPARLPAR + + + + + + + +OPTIONS_OPTIONS_ +[suppress] + +'{' +[suppress] + + + +optionoption +SEMISEMI + + +'}' +[suppress] +COLONCOLON + + + + + +elementelement + +rewriterewrite + + + + + +VERTVERT + + + +elementelement + +rewriterewrite + +RPARRPAR +
+
+ +
+

OPTIONS_

+
+
+ + + + +'options' +
+
+ +
+

COLON

+
+
+ + + + + +':' +[suppress] +
+
+ +
+

RANGE

+
+
+ + + + + +'..' +[suppress] +
+
+ +
+

APOS

+
+
+ + + + + +"'" +[suppress] +
+
+ +
+

BSLASH

+
+
+ + + + +'\\' +
+
+ +
+

TIL

+
+
+ + + + + +'~' +[suppress] +
+
+ +
+

TOKEN_REF

+
+
+ + + + +W:(A-Z, 0-9A-Z_a-z) +
+
+ +
+

RULE_REF

+
+
+ + + + +W:(a-z, 0-9A-Z_a-z) +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

^ | !

+
+
+ + + + +[\^!] +
+
+ +
+

ebnf

+
+
+ + + + + +blockblock + + + +? | * | +? | * | + +'=>' +
+
+ +
+

ACTION

+
+
+ + + + + +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

ROOT

+
+
+ + + + + +'^' +[suppress] +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

? | * | +

+
+
+ + + + +[?*+] +
+
+ +
+

VERT

+
+
+ + + + + +'|' +[suppress] +
+
+ +
+

rewrite

+
+
+ + + + + + +'TODO REWRITE RULES TODO' +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

exceptionGroup

+
+
+ + + + + + + +exceptionHandlerexceptionHandler + + + +finallyClausefinallyClause +finallyClausefinallyClause +
+
+ +
+

exceptionHandler

+
+
+ + + + + + +CATCHCATCH +[suppress] +NESTED_ARG_ACTIONNESTED_ARG_ACTION +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

CATCH

+
+
+ + + + +'catch' +
+
+ +
+

finallyClause

+
+
+ + + + + + +FINALLYFINALLY +[suppress] +NESTED_ACTIONNESTED_ACTION + + +'?' +
+
+ +
+

FINALLY

+
+
+ + + + +'finally' +
+
+ + + + diff --git a/examples/apicheck.py b/examples/apicheck.py index 366ad066..97010d19 100644 --- a/examples/apicheck.py +++ b/examples/apicheck.py @@ -32,29 +32,38 @@ def apiProc(name, numargs): ] ) -test = """[ procname1 $par1 $par2 ] - other code here - [ procname1 $par1 $par2 $par3 ] - more code here - [ procname1 $par1 ] - [ procname3 ${arg with spaces} $par2 ]""" - - -# now explicitly iterate through the scanner using next(), so that -# we can trap ParseSyntaxException's that would be raised due to -# an incorrect number of arguments. If an exception does occur, -# then see how we reset the input text and scanner to advance to the -# next line of source code -api_scanner = apiRef.scanString(test) -while 1: - try: - t, s, e = next(api_scanner) - print("found %s on line %d" % (t.procname, lineno(s, test))) - except ParseSyntaxException as pe: - print("invalid arg count on line", pe.lineno) - print(pe.lineno, ":", pe.line) - # reset api scanner to start after this exception location - test = "\n" * (pe.lineno - 1) + test[pe.loc + 1 :] - api_scanner = apiRef.scanString(test) - except StopIteration: - break +autoname_elements() + +if __name__ == '__main__': + + import contextlib + + with contextlib.suppress(Exception): + apiRef.create_diagram("apicheck_diagram.html", vertical=9, show_groups=True) + + test = """[ procname1 $par1 $par2 ] + other code here + [ procname1 $par1 $par2 $par3 ] + more code here + [ procname1 $par1 ] + [ procname3 ${arg with spaces} $par2 ]""" + + + # now explicitly iterate through the scanner using next(), so that + # we can trap ParseSyntaxException's that would be raised due to + # an incorrect number of arguments. If an exception does occur, + # then see how we reset the input text and scanner to advance to the + # next line of source code + api_scanner = apiRef.scanString(test) + while 1: + try: + t, s, e = next(api_scanner) + print(f"found {t.procname} on line {lineno(s, test)}") + except ParseSyntaxException as pe: + print(f"invalid arg count on line {pe.lineno}") + print(f"{pe.lineno} : {pe.line}") + # reset api scanner to start after this exception location + test = "\n" * (pe.lineno - 1) + test[pe.loc + 1:] + api_scanner = apiRef.scanString(test) + except StopIteration: + break diff --git a/examples/apicheck_diagram.html b/examples/apicheck_diagram.html new file mode 100644 index 00000000..6729d519 --- /dev/null +++ b/examples/apicheck_diagram.html @@ -0,0 +1,226 @@ + + + + + + + + + + + + + + + +
+

apiRef

+
+
+ + + + + + +'[' +[LOOKAHEAD] + + +LBRACKLBRACK +'procname1' +'$' +identident +'$' +identident +RBRACKRBRACK + +LBRACKLBRACK +'procname2' +'$' +identident +RBRACKRBRACK + +LBRACKLBRACK +'procname3' +'$' +identident +'$' +identident +RBRACKRBRACK +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

ident

+
+
+ + + + + +W:(A-Za-z, 0-9A-Z_a-z) +quoted string, starting with { ending with } +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ + + + diff --git a/examples/bf.py b/examples/bf.py new file mode 100644 index 00000000..b8ff1aca --- /dev/null +++ b/examples/bf.py @@ -0,0 +1,163 @@ +# bf.py +# +# Brainf*ck interpreter demo +# +# BF instructions (symbols): +# + - increment value at the current pointer +# - - decrement value at the current pointer +# > - increment pointer +# < - decrement pointer +# , - input new byte value, store at the current pointer +# . - output the byte at the current pointer +# [] - evaluate value at current pointer, if nonzero, execute all statements in []'s and repeat +# +import pyparsing as pp + +# define the basic parser + +# define Literals for each symbol in the BF langauge +PLUS, MINUS, GT, LT, INP, OUT, LBRACK, RBRACK = pp.Literal.using_each("+-<>,.[]") + +# use a pyparsing Forward for the recursive definition of an instruction that can +# itself contain instructions +instruction_expr = pp.Forward().set_name("instruction") + +# define a LOOP expression for the instructions enclosed in brackets; use a +# pyparsing Group to wrap the instructions in a sub-list +LOOP = pp.Group(LBRACK + instruction_expr[...] + RBRACK) + +# use '<<=' operator to insert expression definition into existing Forward +instruction_expr <<= PLUS | MINUS | GT | LT | INP | OUT | LOOP + +program_expr = instruction_expr[...].set_name("program") + +# ignore everything that is not a BF symbol +ignore_chars = pp.Word(pp.printables, exclude_chars="+-<>,.[]") +program_expr.ignore(ignore_chars) + + +class BFEngine: + """ + Brainf*ck execution environment, with a memory array and pointer. + """ + def __init__(self, memory_size: int = 1024): + self._ptr = 0 + self._memory_size = memory_size + self._memory = [0] * self._memory_size + + @property + def ptr(self): + return self._ptr + + @ptr.setter + def ptr(self, value): + self._ptr = value % self._memory_size + + @property + def at_ptr(self): + return self._memory[self._ptr] + + @at_ptr.setter + def at_ptr(self, value): + self._memory[self._ptr] = value % 256 + + def output_value_at_ptr(self): + print(chr(self.at_ptr), end="") + + def input_value(self): + input_char = input() or "\0" + self.at_ptr = ord(input_char[0]) + + def reset(self): + self._ptr = 0 + self._memory[:] = [0] * self._memory_size + + def dump_state(self): + for i in range(30): + print(f"{self._memory[i]:3d} ", end="") + print() + + if self.ptr < 30: + print(f" {' ' * self.ptr}^") + + +# define executable classes for each instruction + +class Instruction: + """Abstract class for all instruction classes to implement.""" + def __init__(self, tokens): + self.tokens = tokens + + def execute(self, bf_engine: BFEngine): + raise NotImplementedError() + + +class IncrPtr(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.ptr += 1 + + +class DecrPtr(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.ptr -= 1 + + +class IncrPtrValue(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.at_ptr += 1 + + +class DecrPtrValue(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.at_ptr -= 1 + + +class OutputPtrValue(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.output_value_at_ptr() + + +class InputPtrValue(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.input_value() + + +class RunInstructionLoop(Instruction): + def __init__(self, tokens): + super().__init__(tokens) + self.instructions = self.tokens[0][1:-1] + + def execute(self, bf_engine: BFEngine): + while bf_engine.at_ptr: + for i in self.instructions: + i.execute(bf_engine) + + +# add parse actions to all BF instruction expressions +PLUS.add_parse_action(IncrPtrValue) +MINUS.add_parse_action(DecrPtrValue) +GT.add_parse_action(IncrPtr) +LT.add_parse_action(DecrPtr) +OUT.add_parse_action(OutputPtrValue) +INP.add_parse_action(InputPtrValue) +LOOP.add_parse_action(RunInstructionLoop) + + +@program_expr.add_parse_action +def run_program(tokens): + bf = BFEngine() + for t in tokens: + t.execute(bf) + print() + +if __name__ == '__main__': + + # generate railroad diagram + import contextlib + + with contextlib.suppress(Exception): + program_expr.create_diagram("bf_diagram.html") + + # execute an example BF program + hw = "+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." + program_expr.parse_string(hw) diff --git a/examples/bf_diagram.html b/examples/bf_diagram.html new file mode 100644 index 00000000..9696b0ff --- /dev/null +++ b/examples/bf_diagram.html @@ -0,0 +1,128 @@ + + + + + + + + + + + + + + + +
+

program

+
+
+ + + + + + + +instructioninstruction + +
+
+ +
+

instruction

+
+
+ + + + + + +'+' +'-' +'<' +'>' +',' +'.' + + +'[' + + + +instructioninstruction + +']' +
+
+ + + + diff --git a/examples/bigquery_view_parser.py b/examples/bigquery_view_parser.py index 9215225e..2f1b4d5d 100644 --- a/examples/bigquery_view_parser.py +++ b/examples/bigquery_view_parser.py @@ -1709,7 +1709,7 @@ def print_(*args): ], ], [ - """\ + r""" SELECT /* Replace white spaces in the title with underscores. */ REGEXP_REPLACE(title, r'\s+', '_') AS regexp_title, revisions diff --git a/examples/booleansearchparser.py b/examples/booleansearchparser.py index c901db14..503a4cdf 100644 --- a/examples/booleansearchparser.py +++ b/examples/booleansearchparser.py @@ -90,11 +90,14 @@ Suppress, OneOrMore, one_of, + ParserElement, ) import re +ParserElement.enablePackrat() # Updated on 02 Dec 2021 according to ftp://ftp.unicode.org/Public/UNIDATA/Blocks.txt +# (includes characters not found in the BasicMultilingualPlane) alphabet_ranges = [ # CYRILIC: https://en.wikipedia.org/wiki/Cyrillic_(Unicode_block) [int("0400", 16), int("04FF", 16)], @@ -322,6 +325,7 @@ class ParserTest(BooleanSearchParser): """ def Test(self): + # fmt: off exprs = { "0": "help", "1": "help or hulp", @@ -363,93 +367,28 @@ def Test(self): texts_matcheswith = { "halp thinks he needs help": [ - "25", - "22", - "20", - "21", - "11", - "17", - "16", - "23", - "34", - "1", - "0", - "5", - "7", - "6", - "9", - "8", + "25", "22", "20", "21", "11", "17", "16", "23", "34", "1", + "0", "5", "7", "6", "9", "8", ], "he needs halp": ["24", "25", "20", "11", "10", "12", "34", "6"], "help": ["25", "20", "12", "17", "16", "1", "0", "5", "6"], "help hilp": [ - "25", - "22", - "20", - "32", - "21", - "12", - "17", - "16", - "19", - "31", - "23", - "1", - "0", - "5", - "4", - "7", - "6", - "9", - "8", - "33", + "25", "22", "20", "32", "21", "12", "17", "16", "19", "31", + "23", "1", "0", "5", "4", "7", "6", "9", "8", "33", ], "help me please hulp": [ - "30", - "25", - "27", - "20", - "13", - "12", - "15", - "14", - "17", - "16", - "19", - "18", - "23", - "29", - "1", - "0", - "3", - "2", - "5", - "4", - "6", - "9", + "30", "25", "27", "20", "13", "12", "15", "14", "17", "16", + "19", "18", "23", "29", "1", "0", "3", "2", "5", "4", "6", "9", ], "helper": ["20", "10", "12", "16"], "hulp hilp": [ - "25", - "27", - "20", - "21", - "10", - "12", - "14", - "17", - "19", - "23", - "1", - "5", - "4", - "7", - "6", - "9", + "25", "27", "20", "21", "10", "12", "14", "17", "19", "23", + "1", "5", "4", "7", "6", "9", ], "nothing": ["25", "10", "12"], "안녕하세요, 당신은 어떠세요?": ["10", "12", "25", "35"], } + # fmt: on all_ok = True for text, matches in texts_matcheswith.items(): @@ -459,7 +398,9 @@ def Test(self): _matches.append(_id) test_passed = sorted(matches) == sorted(_matches) - if not test_passed: + if test_passed: + print("Passed", repr(text)) + else: print("Failed", repr(text), "expected", matches, "matched", _matches) all_ok = all_ok and test_passed @@ -490,7 +431,9 @@ def Test(self): _matches.append(_id) test_passed = sorted(matches) == sorted(_matches) - if not test_passed: + if test_passed: + print("Passed", repr(text)) + else: print("Failed", repr(text), "expected", matches, "matched", _matches) all_ok = all_ok and test_passed @@ -498,10 +441,13 @@ def Test(self): return all_ok -if __name__ == "__main__": +def main(): if ParserTest().Test(): print("All tests OK") - exit(0) else: print("One or more tests FAILED") - exit(1) + raise Exception("One or more tests FAILED") + + +if __name__ == "__main__": + main() diff --git a/examples/btpyparse.py b/examples/btpyparse.py index 3531761d..be5cb0b4 100644 --- a/examples/btpyparse.py +++ b/examples/btpyparse.py @@ -30,7 +30,7 @@ def __init__(self, name): self.name = name def __repr__(self): - return 'Macro("%s")' % self.name + return f'Macro("{self.name}")' def __eq__(self, other): return self.name == other.name diff --git a/examples/builtin_parse_action_demo.py b/examples/builtin_parse_action_demo.py index 36b3a98b..fed6e2a3 100644 --- a/examples/builtin_parse_action_demo.py +++ b/examples/builtin_parse_action_demo.py @@ -5,14 +5,13 @@ # Simple example of using builtin functions as parse actions. # -from pyparsing import * - -integer = Word(nums).setParseAction(lambda t: int(t[0])) +import pyparsing as pp +ppc = pp.common # make an expression that will match a list of ints (which # will be converted to actual ints by the parse action attached # to integer) -nums = OneOrMore(integer) +nums = ppc.integer[...] test = "2 54 34 2 211 66 43 2 0" @@ -20,10 +19,9 @@ # try each of these builtins as parse actions for fn in (sum, max, min, len, sorted, reversed, list, tuple, set, any, all): - fn_name = fn.__name__ if fn is reversed: # reversed returns an iterator, we really want to show the list of items fn = lambda x: list(reversed(x)) # show how each builtin works as a free-standing parse action - print(fn_name, nums.setParseAction(fn).parseString(test)) + print(fn.__name__, nums.set_parse_action(fn).parse_string(test)) diff --git a/examples/chemicalFormulas.py b/examples/chemicalFormulas.py deleted file mode 100644 index d4c87cd9..00000000 --- a/examples/chemicalFormulas.py +++ /dev/null @@ -1,121 +0,0 @@ -# -# chemicalFormulas.py -# -# Copyright (c) 2003,2019 Paul McGuire -# - -import pyparsing as pp - -atomicWeight = { - "O": 15.9994, - "H": 1.00794, - "Na": 22.9897, - "Cl": 35.4527, - "C": 12.0107, -} - -digits = "0123456789" - -# Version 1 -element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2).set_name("element") -# for stricter matching, use this Regex instead -# element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|" -# "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|" -# "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|" -# "S[bcegimnr]?|T[abcehilm]|U(u[bhopqst])?|V|W|Xe|Yb?|Z[nr]") -elementRef = pp.Group(element + pp.Optional(pp.Word(digits), default="1")) -formula = elementRef[...] - - -def sum_atomic_weights(element_list): - return sum(atomicWeight[elem] * int(qty) for elem, qty in element_list) - - -formula.runTests( - """\ - H2O - C6H5OH - NaCl - """, - fullDump=False, - postParse=lambda _, tokens: "Molecular weight: {}".format( - sum_atomic_weights(tokens) - ), -) -print() - -# Version 2 - access parsed items by results name -elementRef = pp.Group( - element("symbol") + pp.Optional(pp.Word(digits), default="1")("qty") -) -formula = elementRef[...] - - -def sum_atomic_weights_by_results_name(element_list): - return sum(atomicWeight[elem.symbol] * int(elem.qty) for elem in element_list) - - -formula.runTests( - """\ - H2O - C6H5OH - NaCl - """, - fullDump=False, - postParse=lambda _, tokens: "Molecular weight: {}".format( - sum_atomic_weights_by_results_name(tokens) - ), -) -print() - -# Version 3 - convert integers during parsing process -integer = pp.Word(digits).setParseAction(lambda t: int(t[0])).setName("integer") -elementRef = pp.Group(element("symbol") + pp.Optional(integer, default=1)("qty")) -formula = elementRef[...].setName("chemical_formula") - - -def sum_atomic_weights_by_results_name_with_converted_ints(element_list): - return sum(atomicWeight[elem.symbol] * int(elem.qty) for elem in element_list) - - -formula.runTests( - """\ - H2O - C6H5OH - NaCl - """, - fullDump=False, - postParse=lambda _, tokens: "Molecular weight: {}".format( - sum_atomic_weights_by_results_name_with_converted_ints(tokens) - ), -) -print() - -# Version 4 - parse and convert integers as subscript digits -subscript_digits = "₀₁₂₃₄₅₆₇₈₉" -subscript_int_map = {e[1]: e[0] for e in enumerate(subscript_digits)} - - -def cvt_subscript_int(s): - ret = 0 - for c in s[0]: - ret = ret * 10 + subscript_int_map[c] - return ret - - -subscript_int = pp.Word(subscript_digits).addParseAction(cvt_subscript_int).set_name("subscript") - -elementRef = pp.Group(element("symbol") + pp.Optional(subscript_int, default=1)("qty")) -formula = elementRef[1, ...].setName("chemical_formula") -formula.runTests( - """\ - H₂O - C₆H₅OH - NaCl - """, - fullDump=False, - postParse=lambda _, tokens: "Molecular weight: {}".format( - sum_atomic_weights_by_results_name_with_converted_ints(tokens) - ), -) -print() diff --git a/examples/chemical_formulas.html b/examples/chemical_formulas.html new file mode 100644 index 00000000..12f1fb7e --- /dev/null +++ b/examples/chemical_formulas.html @@ -0,0 +1,158 @@ + + + + + + + + + + + + + + + +
+

chemical_formula

+
+
+ + + + + + + +elementelement + + +subscriptsubscript + +
+
+ +
+

element

+
+
+ + + + +W:(A-Z, a-z){1,2} +
+
+ +
+

subscript

+
+
+ + + + +W:(₀-₉) +
+
+ + + + diff --git a/examples/chemical_formulas.py b/examples/chemical_formulas.py new file mode 100644 index 00000000..80a8c969 --- /dev/null +++ b/examples/chemical_formulas.py @@ -0,0 +1,134 @@ +# +# chemicalFormulas.py +# +# Copyright (c) 2003,2019 Paul McGuire +# + +import pyparsing as pp + +atomic_weight = { + "O": 15.9994, + "H": 1.00794, + "Na": 22.9897, + "Cl": 35.4527, + "C": 12.0107, +} + +digits = "0123456789" + +# Version 1 +element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2).set_name("element") +# for stricter matching, use this Regex instead +# element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|" +# "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|" +# "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|" +# "S[bcegimnr]?|T[abcehilm]|U(u[bhopqst])?|V|W|Xe|Yb?|Z[nr]") +element_ref = pp.Group(element + pp.Optional(pp.Word(digits), default="1")) +formula = element_ref[...] + + +def sum_atomic_weights(element_list): + return sum(atomic_weight[elem] * int(qty) for elem, qty in element_list) + + +formula.run_tests( + """\ + NaCl + H2O + C6H5OH + """, + full_dump=False, + post_parse=lambda _, tokens: f"Molecular weight: {sum_atomic_weights(tokens)}", +) +print() + + +# Version 2 - access parsed items by results name +element_ref = pp.Group( + element("symbol") + pp.Optional(pp.Word(digits), default="1")("qty") +) +formula = element_ref[...] + + +def sum_atomic_weights_by_results_name(element_list): + return sum(atomic_weight[elem.symbol] * int(elem.qty) for elem in element_list) + + +formula.run_tests( + """\ + NaCl + H2O + C6H5OH + """, + full_dump=False, + post_parse=lambda _, tokens: + f"Molecular weight: {sum_atomic_weights_by_results_name(tokens)}", +) +print() + +# Version 3 - convert integers during parsing process +integer = pp.Word(digits).set_name("integer") +integer.add_parse_action(lambda t: int(t[0])) +element_ref = pp.Group(element("symbol") + pp.Optional(integer, default=1)("qty")) +formula = element_ref[...].set_name("chemical_formula") + + +def sum_atomic_weights_by_results_name_with_converted_ints(element_list): + return sum(atomic_weight[elem.symbol] * int(elem.qty) for elem in element_list) + + +formula.run_tests( + """\ + NaCl + H2O + C6H5OH + """, + full_dump=False, + post_parse=lambda _, tokens: + f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}", +) +print() + +# Version 4 - parse and convert integers as subscript digits +subscript_digits = "₀₁₂₃₄₅₆₇₈₉" +subscript_int_map = {digit: value for value, digit in enumerate(subscript_digits)} + + +def cvt_subscript_int(s): + ret = 0 + for c in s[0]: + ret = ret * 10 + subscript_int_map[c] + return ret + + +subscript_int = pp.Word(subscript_digits).set_name("subscript") +subscript_int.add_parse_action(cvt_subscript_int) + +element_ref = pp.Group(element("symbol") + pp.Optional(subscript_int, default=1)("qty")) +formula = element_ref[1, ...].set_name("chemical_formula") + +if __name__ == '__main__': + import contextlib + + with contextlib.suppress(Exception): + formula.create_diagram("chemical_formulas.html") + + formula.run_tests( + """\ + # sodium chloride + NaCl + # hydrogen hydroxide + H₂O + # phenol + C₆H₅OH + # ethanol + C₂H₅OH + # decanol + C₁₀H₂₁OH + """, + full_dump=False, + post_parse=lambda _, tokens: + f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}", + ) + + print() diff --git a/examples/complex_chemical_formulas.py b/examples/complex_chemical_formulas.py new file mode 100644 index 00000000..f2d240b0 --- /dev/null +++ b/examples/complex_chemical_formulas.py @@ -0,0 +1,159 @@ +# +# complex_chemical_formulas.py +# +# Example that expands on the basic chemical_formulas.py parser to +# include grouped multiplication notation, such as "3(C₆H₅OH)₂". +# +# Copyright (c) 2024, Paul McGuire +# + +from collections import Counter + +import pyparsing as pp + +ppc = pp.common + +# fmt: off +table_of_elements: dict[str, float] = { + "H": 1.007, "He": 4.002, "Li": 6.941, "Be": 9.012, "B": 10.811, "C": 12.011, + "N": 14.007, "O": 15.999, "F": 18.998, "Ne": 20.18, "Na": 22.99, "Mg": 24.305, + "Al": 26.982, "Si": 28.086, "P": 30.974, "S": 32.065, "Cl": 35.453, "Ar": 39.948, + "K": 39.098, "Ca": 40.078, "Sc": 44.956, "Ti": 47.867, "V": 50.942, "Cr": 51.996, + "Mn": 54.938, "Fe": 55.845, "Co": 58.933, "Ni": 58.693, "Cu": 63.546, "Zn": 65.38, + "Ga": 69.723, "Ge": 72.64, "As": 74.922, "Se": 78.96, "Br": 79.904, "Kr": 83.798, + "Rb": 85.468, "Sr": 87.62, "Y": 88.906, "Zr": 91.224, "Nb": 92.906, "Mo": 95.96, + "Tc": 98.0, "Ru": 101.07, "Rh": 102.906, "Pd": 106.42, "Ag": 107.868, + "Cd": 112.411, "In": 114.818, "Sn": 118.71, "Sb": 121.76, "Te": 127.6, + "I": 126.904, "Xe": 131.293, "Cs": 132.905, "Ba": 137.327, "La": 138.905, + "Ce": 140.116, "Pr": 140.908, "Nd": 144.242, "Pm": 145.0, "Sm": 150.36, + "Eu": 151.964, "Gd": 157.25, "Tb": 158.925, "Dy": 162.5, "Ho": 164.93, + "Er": 167.259, "Tm": 168.934, "Yb": 173.054, "Lu": 174.967, "Hf": 178.49, + "Ta": 180.948, "W": 183.84, "Re": 186.207, "Os": 190.23, "Ir": 192.217, + "Pt": 195.084, "Au": 196.967, "Hg": 200.59, "Tl": 204.383, "Pb": 207.2, + "Bi": 208.98, "Po": 210.0, "At": 210.0, "Rn": 222.0, "Fr": 223.0, "Ra": 226.0, + "Ac": 227.0, "Th": 232.038, "Pa": 231.036, "U": 238.029, "Np": 237.0, + "Pu": 244.0, "Am": 243.0, "Cm": 247.0, "Bk": 247.0, "Cf": 251.0, "Es": 252.0, + "Fm": 257.0, "Md": 258.0, "No": 259.0, "Lr": 262.0, "Rf": 261.0, "Db": 262.0, + "Sg": 266.0, "Bh": 264.0, "Hs": 267.0, "Mt": 268.0, "Ds": 271.0, "Rg": 272.0, + "Cn": 285.0, "Nh": 284.0, "Fl": 289.0, "Mc": 288.0, "Lv": 292.0, "Ts": 295.0, + "Og": 294.0, +} +# fmt: on + +# basic parser elements +# - element - a chemical symbol, corresponding to one of the entries +# in table_of_elements +# - subcript_int - an integer made up of subscript digits +# (a normal integer definition uses the one defined in pyparsing.common) +# +# element = pp.one_of(table_of_elements).set_name("element") +element = pp.Regex(pp.util.make_compressed_re(table_of_elements)).set_name("element") +element.add_parse_action(lambda t: Counter([t[0]])) + +subscript_digits = "₀₁₂₃₄₅₆₇₈₉" +subscript_int = pp.Word(subscript_digits).set_name("subscript") + +# define mapping of the int value of each subscript digit +subscript_int_map = {digit: value for value, digit in enumerate(subscript_digits)} + +@subscript_int.add_parse_action +def convert_subscript_int(s: pp.ParseResults) -> int: + ret = 0 + for c in s[0]: + ret = ret * 10 + subscript_int_map[c] + return ret + +# +# parse actions used internally by the infix_notation expression +# + +def lmult(s, l, t): + """ + Multiply + """ + *terms, qty = t[0] + return sum(qty * terms, Counter()) + + +def rmult(s, l, t): + """ + Multiply + """ + qty, *terms = t[0] + return sum(qty * terms, Counter()) + + +def element_ref_sum(s, l, t): + """ + Add multiple consecutive element references + """ + return sum(t[0], Counter()) + + +# optional separator in some chemical formulas +optional_separator = pp.Optional(pp.one_of("= ·").suppress()) + +# define infix expression, where multipliers and subscripts +# are treated like operators, so that grouping in ()'s gets +# properly handled, even when they are nested +element_ref = pp.infix_notation( + element, + [ + (subscript_int, 1, pp.OpAssoc.LEFT, lmult), + (ppc.integer, 1, pp.OpAssoc.RIGHT, rmult), + (optional_separator, 2, pp.OpAssoc.LEFT, element_ref_sum), + ], +) + +# define the overall parser for a chemical formula, made up +# of one or more element_ref's +formula = element_ref[1, ...].set_name("chemical_formula") + +# set names on unnamed expressions for better diagram output +pp.autoname_elements() + + +def molecular_weight(c: Counter) -> float: + """ + Compute overall molecular weight of a chemical formula, + whose elements have been parsed into a Counter containing + chemical symbols and counts of each element, using + the table_of_elements dict to map chemical symbols to + each element's atomic weight. + """ + return sum(table_of_elements[k] * v for k, v in c.items()) + +if __name__ == '__main__': + import contextlib + + # create railroad diagram for this parser + with contextlib.suppress(Exception): + formula.create_diagram( + "complex_chemical_formulas_diagram.html", vertical=2, show_groups=True + ) + + formula.run_tests( + """\ + NaCl + HOH + H₂O + H₂O₂ + C₆H₅OH + C₁₀H₂₁OH + (C₆H₅OH)₂ + 3(C₆H₅OH)₂ + C(OH)₆ + CH₃(CH₂)₂OH + (CH₃)₃CH + CH₃(CH₂)₅CH₃ + Ba(BrO₃)₂·H₂O + Ba(BrO₃)₂·2(H₂O) + """, + full_dump=False, + post_parse=( + lambda _, tokens: + f"Molecular counts/weight: {dict(tokens[0])}" + f", {molecular_weight(tokens[0]):.3f}" + ), + ) + print() diff --git a/examples/complex_chemical_formulas_diagram.html b/examples/complex_chemical_formulas_diagram.html new file mode 100644 index 00000000..77f13b55 --- /dev/null +++ b/examples/complex_chemical_formulas_diagram.html @@ -0,0 +1,538 @@ + + + + + + + + + + + + + + + +
+

chemical_formula

+
+
+ + + + + +element_expressionelement_expression + +
+
+ +
+

element_expression

+
+
+ + + + + +[Suppress:(= | ·)] operations[Suppress:(= | ·)] operations +
+
+ +
+

[Suppress:(= | ·)] operations

+
+
+ + + + + + + + +integer operationsinteger operations + + +optional_separatoroptional_separator +integer operationsinteger operations + +integer operationsinteger operations +
+
+ +
+

integer operations

+
+
+ + + + + + + + + + +integerinteger +integer operationsinteger operations +subscript operationssubscript operations +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

subscript operations

+
+
+ + + + + + + + + +elementelement +nested_elementnested_element + +subscriptsubscript + +elementelement +nested_elementnested_element +
+
+ +
+

nested_element

+
+
+ + + + + + +'(' +[suppress] +element_expressionelement_expression + +')' +[suppress] +
+
+ +
+

subscript

+
+
+ + + + +W:(₀-₉) +
+
+ +
+

element

+
+
+ + + + +A[cglmrstu]|B[aehikr]?|C[adeflmnorsu]?|D[bsy]|E[rsu]|F[elmr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airuv]|M[cdgnot]|N[abdehiop]?|O[gs]?|P[abdmortu]?|R[abefghnu]|S[bcegimnr]?|T[abcehilms]|U|V|W|Xe|Yb?|Z[nr] +
+
+ +
+

optional_separator

+
+
+ + + + + + + += | ·= | · +[suppress] +
+
+ +
+

= | ·

+
+
+ + + + +[=·] +
+
+ + + + diff --git a/examples/cpp_enum_parser.py b/examples/cpp_enum_parser.py index 26dde7c3..1b015097 100644 --- a/examples/cpp_enum_parser.py +++ b/examples/cpp_enum_parser.py @@ -9,7 +9,7 @@ # # -from pyparsing import * +import pyparsing as pp # sample string with enums and other stuff sample = """ @@ -35,19 +35,19 @@ """ # syntax we don't want to see in the final parse tree -LBRACE, RBRACE, EQ, COMMA = map(Suppress, "{}=,") -_enum = Suppress("enum") -identifier = Word(alphas, alphanums + "_") -integer = Word(nums) -enumValue = Group(identifier("name") + Optional(EQ + integer("value"))) -enumList = Group(enumValue + ZeroOrMore(COMMA + enumValue)) +LBRACE, RBRACE, EQ, COMMA = pp.Suppress.using_each("{}=,") +_enum = pp.Suppress("enum") +identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_") +integer = pp.Word(pp.nums) +enumValue = pp.Group(identifier("name") + pp.Optional(EQ + integer("value"))) +enumList = pp.Group(enumValue + (COMMA + enumValue)[...]) enum = _enum + identifier("enum") + LBRACE + enumList("names") + RBRACE # find instances of enums ignoring other syntax -for item, start, stop in enum.scanString(sample): - id = 0 +for item, start, stop in enum.scan_string(sample): + idx = 0 for entry in item.names: if entry.value != "": - id = int(entry.value) - print("%s_%s = %d" % (item.enum.upper(), entry.name.upper(), id)) - id += 1 + idx = int(entry.value) + print(f"{item.enum.upper()}_{entry.name.upper()} = {idx}") + idx += 1 diff --git a/examples/datetimeParseActions.py b/examples/datetime_parse_actions.py similarity index 63% rename from examples/datetimeParseActions.py rename to examples/datetime_parse_actions.py index f7c4fc98..b1121418 100644 --- a/examples/datetimeParseActions.py +++ b/examples/datetime_parse_actions.py @@ -1,84 +1,79 @@ -# parseActions.py -# -# A sample program a parser to match a date string of the form "YYYY/MM/DD", -# and return it as a datetime, or raise an exception if not a valid date. -# -# Copyright 2012, Paul T. McGuire -# -from datetime import datetime -import pyparsing as pp -from pyparsing import pyparsing_common as ppc - -# define an integer string, and a parse action to convert it -# to an integer at parse time -integer = pp.Word(pp.nums).setName("integer") - - -def convertToInt(tokens): - # no need to test for validity - we can't get here - # unless tokens[0] contains all numeric digits - return int(tokens[0]) - - -integer.setParseAction(convertToInt) -# or can be written as one line as -# integer = Word(nums).setParseAction(lambda t: int(t[0])) - -# define a pattern for a year/month/day date -date_expr = integer("year") + "/" + integer("month") + "/" + integer("day") -date_expr.ignore(pp.pythonStyleComment) - - -def convertToDatetime(s, loc, tokens): - try: - # note that the year, month, and day fields were already - # converted to ints from strings by the parse action defined - # on the integer expression above - return datetime(tokens.year, tokens.month, tokens.day).date() - except Exception as ve: - errmsg = "'%s/%s/%s' is not a valid date, %s" % ( - tokens.year, - tokens.month, - tokens.day, - ve, - ) - raise pp.ParseException(s, loc, errmsg) - - -date_expr.setParseAction(convertToDatetime) - - -date_expr.runTests( - """\ - 2000/1/1 - - # invalid month - 2000/13/1 - - # 1900 was not a leap year - 1900/2/29 - - # but 2000 was - 2000/2/29 - """ -) - - -# if dates conform to ISO8601, use definitions in pyparsing_common -date_expr = ppc.iso8601_date.setParseAction(ppc.convertToDate()) -date_expr.ignore(pp.pythonStyleComment) - -date_expr.runTests( - """\ - 2000-01-01 - - # invalid month - 2000-13-01 - - # 1900 was not a leap year - 1900-02-29 - - # but 2000 was - 2000-02-29 - """ -) +# parseActions.py +# +# A sample parser to match a date string of the form "YYYY/MM/DD", +# and return it as a datetime, or raise an exception if not a valid date. +# +# Copyright 2012, Paul T. McGuire +# +from datetime import datetime +import pyparsing as pp +from pyparsing import pyparsing_common as ppc + +# define an integer string, and a parse action to convert it +# to an integer at parse time +integer = pp.Word(pp.nums).set_name("integer") + + +def convert_to_int(tokens): + # no need to test for validity - we can't get here + # unless tokens[0] contains all numeric digits + return int(tokens[0]) + + +integer.set_parse_action(convert_to_int) +# or can be written as one line as +# integer = Word(nums).set_parse_action(lambda t: int(t[0])) + +# define a pattern for a year/month/day date +date_expr = integer("year") + "/" + integer("month") + "/" + integer("day") +date_expr.ignore(pp.python_style_comment) + + +def convert_to_datetime(s, loc, tokens): + try: + # note that the year, month, and day fields were already + # converted to ints from strings by the parse action defined + # on the integer expression above + return datetime(tokens.year, tokens.month, tokens.day).date() + except Exception as ve: + errmsg = f"'{tokens.year}/{tokens.month}/{tokens.day}' is not a valid date, {ve}" + raise pp.ParseException(s, loc, errmsg) + + +date_expr.set_parse_action(convert_to_datetime) + + +date_expr.run_tests( + """\ + 2000/1/1 + + # invalid month + 2000/13/1 + + # 1900 was not a leap year + 1900/2/29 + + # but 2000 was + 2000/2/29 + """ +) + + +# if dates conform to ISO8601, use definitions in pyparsing_common +date_expr = ppc.iso8601_date.set_parse_action(ppc.convert_to_date()) +date_expr.ignore(pp.python_style_comment) + +date_expr.run_tests( + """\ + 2000-01-01 + + # invalid month + 2000-13-01 + + # 1900 was not a leap year + 1900-02-29 + + # but 2000 was + 2000-02-29 + """ +) diff --git a/examples/decaf_parser.py b/examples/decaf_parser.py index d0a376df..46a1347a 100644 --- a/examples/decaf_parser.py +++ b/examples/decaf_parser.py @@ -9,6 +9,7 @@ # # Copyright 2018, Paul McGuire # +# fmt: off """ Program ::= Decl+ Decl ::= VariableDecl | FunctionDecl | ClassDecl | InterfaceDecl @@ -43,71 +44,56 @@ import pyparsing as pp from pyparsing import pyparsing_common as ppc -pp.ParserElement.enablePackrat() +pp.ParserElement.enable_packrat() # keywords -keywords = ( - VOID, - INT, - DOUBLE, - BOOL, - STRING, - CLASS, - INTERFACE, - NULL, - THIS, - EXTENDS, - IMPLEMENTS, - FOR, - WHILE, - IF, - ELSE, - RETURN, - BREAK, - NEW, - NEWARRAY, - PRINT, - READINTEGER, - READLINE, - TRUE, - FALSE, -) = map( - pp.Keyword, - """void int double bool string class interface null this extends implements or while - if else return break new NewArray Print ReadInteger ReadLine true false""".split(), +keywords_ = ( + VOID, INT, DOUBLE, BOOL, STRING, CLASS, INTERFACE, NULL, THIS, EXTENDS, + IMPLEMENTS, FOR, WHILE, IF, ELSE, RETURN, BREAK, NEW, NEWARRAY, + PRINT, READINTEGER, READLINE, TRUE, FALSE, +) = list( + pp.Keyword.using_each( + """ + void int double bool string class interface null this extends implements or while + if else return break new NewArray Print ReadInteger ReadLine true false + """.split(), + ) ) -keywords = pp.MatchFirst(list(keywords)) +keywords = pp.MatchFirst(keywords_).set_name("any_keyword") -LPAR, RPAR, LBRACE, RBRACE, LBRACK, RBRACK, DOT, EQ, COMMA, SEMI = map( - pp.Suppress, "(){}[].=,;" +( + LPAR, RPAR, LBRACE, RBRACE, LBRACK, RBRACK, DOT, EQ, COMMA, SEMI +) = pp.Suppress.using_each("(){}[].=,;") + +hex_constant = pp.Regex(r"0[xX][0-9a-fA-F]+").add_parse_action( + lambda t: int(t[0][2:], 16) ) -hexConstant = pp.Regex(r"0[xX][0-9a-fA-F]+").addParseAction(lambda t: int(t[0][2:], 16)) -intConstant = hexConstant | ppc.integer -doubleConstant = ppc.real -boolConstant = TRUE | FALSE -stringConstant = pp.dblQuotedString +int_constant = hex_constant | ppc.integer +double_constant = ppc.real +bool_constant = TRUE | FALSE +string_constant = pp.dbl_quoted_string null = NULL -constant = doubleConstant | boolConstant | intConstant | stringConstant | null -ident = ~keywords + pp.Word(pp.alphas, pp.alphanums + "_") -type_ = pp.Group((INT | DOUBLE | BOOL | STRING | ident) + pp.ZeroOrMore("[]")) +constant = double_constant | bool_constant | int_constant | string_constant | null +ident = ~keywords + ppc.identifier +type_ = pp.Group((INT | DOUBLE | BOOL | STRING | ident) + pp.Literal("[]")[...]) variable = type_ + ident variable_decl = variable + SEMI expr = pp.Forward() expr_parens = pp.Group(LPAR + expr + RPAR) -actuals = pp.Optional(pp.delimitedList(expr)) +actuals = pp.DelimitedList(expr) | "" call = pp.Group( ident("call_ident") + LPAR + actuals("call_args") + RPAR - | (expr_parens + pp.ZeroOrMore(DOT + ident))("call_ident_expr") + | (expr_parens + (DOT + ident)[...])("call_ident_expr") + LPAR + actuals("call_args") + RPAR ) lvalue = ( (ident | expr_parens) - + pp.ZeroOrMore(DOT + (ident | expr_parens)) - + pp.ZeroOrMore(LBRACK + expr + RBRACK) + + (DOT + (ident | expr_parens))[...] + + (LBRACK + expr + RBRACK)[...] ) assignment = pp.Group(lvalue("lhs") + EQ + expr("rhs")) read_integer = pp.Group(READINTEGER + LPAR + RPAR) @@ -115,54 +101,22 @@ new_statement = pp.Group(NEW + ident) new_array = pp.Group(NEWARRAY + LPAR + expr + COMMA + type_ + RPAR) rvalue = constant | call | read_integer | read_line | new_statement | new_array | ident -arith_expr = pp.infixNotation( - rvalue, +arith_expr = pp.infix_notation( + rvalue.set_name("rvalue"), [ - ( - "-", - 1, - pp.opAssoc.RIGHT, - ), - ( - pp.oneOf("* / %"), - 2, - pp.opAssoc.LEFT, - ), - ( - pp.oneOf("+ -"), - 2, - pp.opAssoc.LEFT, - ), + ("-", 1, pp.OpAssoc.RIGHT,), + (pp.one_of("* / %"), 2, pp.OpAssoc.LEFT,), + (pp.one_of("+ -"), 2, pp.OpAssoc.LEFT,), ], ) -comparison_expr = pp.infixNotation( - arith_expr, +comparison_expr = pp.infix_notation( + arith_expr.set_name("arith_expr"), [ - ( - "!", - 1, - pp.opAssoc.RIGHT, - ), - ( - pp.oneOf("< > <= >="), - 2, - pp.opAssoc.LEFT, - ), - ( - pp.oneOf("== !="), - 2, - pp.opAssoc.LEFT, - ), - ( - pp.oneOf("&&"), - 2, - pp.opAssoc.LEFT, - ), - ( - pp.oneOf("||"), - 2, - pp.opAssoc.LEFT, - ), + ("!", 1, pp.OpAssoc.RIGHT,), + (pp.one_of("< > <= >="), 2, pp.OpAssoc.LEFT,), + (pp.one_of("== !="), 2, pp.OpAssoc.LEFT,), + (pp.one_of("&&"), 2, pp.OpAssoc.LEFT,), + (pp.one_of("||"), 2, pp.OpAssoc.LEFT,), ], ) expr <<= ( @@ -183,7 +137,7 @@ print_stmt = pp.Group( PRINT("statement") + LPAR - + pp.Group(pp.Optional(pp.delimitedList(expr)))("args") + + pp.Group(pp.DelimitedList(expr) | "")("args") + RPAR + SEMI ) @@ -192,11 +146,11 @@ for_stmt = pp.Group( FOR("statement") + LPAR - + pp.Optional(expr) + + (expr | "") + SEMI + expr + SEMI - + pp.Optional(expr) + + (expr | "") + RPAR + stmt ) @@ -207,10 +161,10 @@ + pp.Group(expr)("condition") + RPAR + pp.Group(stmt)("then_statement") - + pp.Group(pp.Optional(ELSE + stmt))("else_statement") + + pp.Group((ELSE + stmt | ""))("else_statement") ) stmt_block = pp.Group( - LBRACE + pp.ZeroOrMore(variable_decl) + pp.ZeroOrMore(stmt) + RBRACE + LBRACE + variable_decl[...] + stmt[...] + RBRACE ) stmt <<= ( if_stmt @@ -223,7 +177,7 @@ | pp.Group(expr + SEMI) ) -formals = pp.Optional(pp.delimitedList(variable)) +formals = pp.DelimitedList(variable) | "" prototype = pp.Group( (type_ | VOID)("return_type") + ident("function_name") @@ -245,50 +199,60 @@ INTERFACE + ident("interface_name") + LBRACE - + pp.ZeroOrMore(prototype)("prototypes") + + prototype[...]("prototypes") + RBRACE )("interface") field = variable_decl | function_decl class_decl = pp.Group( CLASS + ident("class_name") - + pp.Optional(EXTENDS + ident)("extends") - + pp.Optional(IMPLEMENTS + pp.delimitedList(ident))("implements") + + (EXTENDS + ident | "")("extends") + + (IMPLEMENTS + pp.DelimitedList(ident) | "")("implements") + LBRACE - + pp.ZeroOrMore(field)("fields") + + field[...]("fields") + RBRACE )("class_decl") decl = variable_decl | function_decl | class_decl | interface_decl | prototype -program = pp.OneOrMore(pp.Group(decl)) +program = pp.Group(decl)[1, ...] decaf_parser = program -stmt.runTests( - """\ - sin(30); - a = 1; - b = 1 + 1; - b = 1 != 2 && false; - print("A"); - a.b = 100; - a.b = 100.0; - a[100] = b; - a[0][0] = 2; - a = 0x1234; -""" -) +pp.autoname_elements() + +if __name__ == '__main__': + import contextlib -test_program = """ - void getenv(string var); - int main(string[] args) { - if (a > 100) { - Print(a, " is too big"); - } else if (a < 100) { - Print(a, " is too small"); - } else { - Print(a, "just right!"); + # create railroad diagram for this parser + with contextlib.suppress(Exception): + program.create_diagram( + "decaf_parser_diagram.html", vertical=2, show_groups=True + ) + + stmt.runTests("""\ + sin(30); + a = 1; + b = 1 + 1; + b = 1 != 2 && false; + print("A"); + a.b = 100; + a.b = 100.0; + a[100] = b; + a[0][0] = 2; + a = 0x1234; + """ + ) + + test_program = """ + void getenv(string var); + int main(string[] args) { + if (a > 100) { + Print(a, " is too big"); + } else if (a < 100) { + Print(a, " is too small"); + } else { + Print(a, "just right!"); + } } - } -""" + """ -print(decaf_parser.parseString(test_program).dump()) + print(decaf_parser.parse_string(test_program).dump()) diff --git a/examples/decaf_parser_diagram.html b/examples/decaf_parser_diagram.html new file mode 100644 index 00000000..f1802c14 --- /dev/null +++ b/examples/decaf_parser_diagram.html @@ -0,0 +1,4263 @@ + + + + + + + + + + + + + + + +
+

program

+
+
+ + + + + + +decldecl + +
+
+ +
+

decl

+
+
+ + + + + +variable_declvariable_decl +function_declfunction_decl +class_declclass_decl +interface_declinterface_decl +prototypeprototype +
+
+ +
+

variable_decl

+
+
+ + + + + +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier +SEMISEMI +
+
+ +
+

type_

+
+
+ + + + + + + +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +identident + + + +'[]' + +
+
+ +
+

ident

+
+
+ + + + + + +any_keywordany_keyword +[NOT] +identifieridentifier +
+
+ +
+

function_decl

+
+
+ + + + + + + + + + +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +identident + + + +'[]' + +'void' + + +any_keywordany_keyword +[NOT] +identifieridentifier +LPARLPAR + + + + +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + + + + + +',' +[suppress] +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + +RPARRPAR + + +LBRACELBRACE + + + +variable_declvariable_decl + + + + +stmtstmt + +RBRACERBRACE +
+
+ +
+

stmt

+
+
+ + + + + + +if_stmtif_stmt +while_stmtwhile_stmt +for_stmtfor_stmt +break_stmtbreak_stmt +return_stmtreturn_stmt +print_stmtprint_stmt +stmt_blockstmt_block + + +exprexpr +SEMISEMI +
+
+ +
+

if_stmt

+
+
+ + + + + + +'if' +LPARLPAR + +exprexpr +RPARRPAR + +stmtstmt + + + + +ELSEELSE +stmtstmt +
+
+ +
+

expr

+
+
+ + + + + + +assignmentassignment +callcall +THISTHIS +arith_expr_expressionarith_expr_expression +arith_exprarith_expr +lvaluelvalue +real numberreal number +TRUETRUE +FALSEFALSE +hex_constanthex_constant +integerinteger +string enclosed in double quotesstring enclosed in double quotes +NULLNULL +read_integerread_integer +read_lineread_line +new_statementnew_statement +new_arraynew_array +
+
+ +
+

assignment

+
+
+ + + + + + + + + + +any_keywordany_keyword +[NOT] +identifieridentifier + + +LPARLPAR +exprexpr +RPARRPAR + + + + +DOTDOT + +identident +expr_parensexpr_parens + + + + + +LBRACKLBRACK +exprexpr +RBRACKRBRACK + +EQEQ +exprexpr +
+
+ +
+

expr_parens

+
+
+ + + + + + +LPARLPAR +exprexpr +RPARRPAR +
+
+ +
+

EQ

+
+
+ + + + + +'=' +[suppress] +
+
+ +
+

call

+
+
+ + + + + + + + + +any_keywordany_keyword +[NOT] +identifieridentifier +LPARLPAR + + + + +exprexpr + + + + + +',' +[suppress] +exprexpr + +RPARRPAR + + + + +LPARLPAR +exprexpr +RPARRPAR + + + + +DOTDOT + +any_keywordany_keyword +[NOT] +identifieridentifier + +LPARLPAR + + + + +exprexpr + + + + + +',' +[suppress] +exprexpr + +RPARRPAR +
+
+ +
+

arith_expr_expression

+
+
+ + + + + +|| operations|| operations +
+
+ +
+

|| operations

+
+
+ + + + + + + + +&& operations&& operations + + +|||| +&& operations&& operations + +&& operations&& operations +
+
+ +
+

&& operations

+
+
+ + + + + + + + +== | != operations== | != operations + + +&&&& +== | != operations== | != operations + +== | != operations== | != operations +
+
+ +
+

== | != operations

+
+
+ + + + + + + + +<= | < | >= | > operations<= | < | >= | > operations + + +== | !=== | != +<= | < | >= | > operations<= | < | >= | > operations + +<= | < | >= | > operations<= | < | >= | > operations +
+
+ +
+

<= | < | >= | > operations

+
+
+ + + + + + + + +'!' operations'!' operations + + +<= | < | >= | ><= | < | >= | > +'!' operations'!' operations + +'!' operations'!' operations +
+
+ +
+

'!' operations

+
+
+ + + + + + + + + + +'!' +'!' operations'!' operations +arith_exprarith_expr +nested_arith_exprnested_arith_expr +
+
+ +
+

arith_expr

+
+
+ + + + + ++ | - operations+ | - operations +
+
+ +
+

+ | - operations

+
+
+ + + + + + + + +* | / | % operations* | / | % operations + + ++ | -+ | - +* | / | % operations* | / | % operations + +* | / | % operations* | / | % operations +
+
+ +
+

* | / | % operations

+
+
+ + + + + + + + +'-' operations'-' operations + + +* | / | %* | / | % +'-' operations'-' operations + +'-' operations'-' operations +
+
+ +
+

'-' operations

+
+
+ + + + + + + + + + +'-' +'-' operations'-' operations +real numberreal number +TRUETRUE +FALSEFALSE +hex_constanthex_constant +integerinteger +string enclosed in double quotesstring enclosed in double quotes +NULLNULL +callcall +read_integerread_integer +read_lineread_line +new_statementnew_statement +new_arraynew_array +identident +nested_rvaluenested_rvalue +
+
+ +
+

string enclosed in double quotes

+
+
+ + + + + + +"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))* +'"' +[combine] +
+
+ +
+

read_integer

+
+
+ + + + + + +READINTEGERREADINTEGER +LPARLPAR +RPARRPAR +
+
+ +
+

read_line

+
+
+ + + + + + +READLINEREADLINE +LPARLPAR +RPARRPAR +
+
+ +
+

new_statement

+
+
+ + + + + + +NEWNEW + +any_keywordany_keyword +[NOT] +identifieridentifier +
+
+ +
+

new_array

+
+
+ + + + + + +NEWARRAYNEWARRAY +LPARLPAR +exprexpr +COMMACOMMA +type_type_ +RPARRPAR +
+
+ +
+

COMMA

+
+
+ + + + + +',' +[suppress] +
+
+ +
+

nested_rvalue

+
+
+ + + + + + +'(' +[suppress] +arith_exprarith_expr + +')' +[suppress] +
+
+ +
+

* | / | %

+
+
+ + + + +[*/%] +
+
+ +
+

+ | -

+
+
+ + + + +[+\-] +
+
+ +
+

nested_arith_expr

+
+
+ + + + + + +'(' +[suppress] +arith_expr_expressionarith_expr_expression + +')' +[suppress] +
+
+ +
+

<= | < | >= | >

+
+
+ + + + +<=|<|>=|> +
+
+ +
+

== | !=

+
+
+ + + + +==|!= +
+
+ +
+

&&

+
+
+ + + + +\&\& +
+
+ +
+

||

+
+
+ + + + +\|\| +
+
+ +
+

lvalue

+
+
+ + + + + + +identident +expr_parensexpr_parens + + + + +DOTDOT + +identident +expr_parensexpr_parens + + + + + +LBRACKLBRACK +exprexpr +RBRACKRBRACK + +
+
+ +
+

DOT

+
+
+ + + + + +'.' +[suppress] +
+
+ +
+

LBRACK

+
+
+ + + + + +'[' +[suppress] +
+
+ +
+

RBRACK

+
+
+ + + + + +']' +[suppress] +
+
+ +
+

real number

+
+
+ + + + +[+-]?(?:\d+\.\d*|\.\d+) +
+
+ +
+

hex_constant

+
+
+ + + + +0[xX][0-9a-fA-F]+ +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

while_stmt

+
+
+ + + + + + +'while' +LPARLPAR +exprexpr +RPARRPAR +stmtstmt +
+
+ +
+

for_stmt

+
+
+ + + + + + +'or' +LPARLPAR + + +exprexpr +SEMISEMI +exprexpr +SEMISEMI + + +exprexpr +RPARRPAR +stmtstmt +
+
+ +
+

break_stmt

+
+
+ + + + + + +'break' +SEMISEMI +
+
+ +
+

return_stmt

+
+
+ + + + + + +'return' +exprexpr +SEMISEMI +
+
+ +
+

print_stmt

+
+
+ + + + + + +'Print' +LPARLPAR + + + + + +exprexpr + + + + + +',' +[suppress] +exprexpr + +RPARRPAR +SEMISEMI +
+
+ +
+

stmt_block

+
+
+ + + + + + +LBRACELBRACE + + + +variable_declvariable_decl + + + + +stmtstmt + +RBRACERBRACE +
+
+ +
+

class_decl

+
+
+ + + + + + +CLASSCLASS + + +any_keywordany_keyword +[NOT] +identifieridentifier + + + +EXTENDSEXTENDS + +any_keywordany_keyword +[NOT] +identifieridentifier + + + +IMPLEMENTSIMPLEMENTS + + + +any_keywordany_keyword +[NOT] +identifieridentifier + + + + + +',' +[suppress] + +any_keywordany_keyword +[NOT] +identifieridentifier + +LBRACELBRACE + + + +fieldfield + +RBRACERBRACE +
+
+ +
+

field

+
+
+ + + + + +variable_declvariable_decl +function_declfunction_decl +
+
+ +
+

interface_decl

+
+
+ + + + + + +INTERFACEINTERFACE + + +any_keywordany_keyword +[NOT] +identifieridentifier +LBRACELBRACE + + + +prototypeprototype + +RBRACERBRACE +
+
+ +
+

LBRACE

+
+
+ + + + + +'{' +[suppress] +
+
+ +
+

prototype

+
+
+ + + + + + + + + + +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +identident + + + +'[]' + +'void' + + +any_keywordany_keyword +[NOT] +identifieridentifier +LPARLPAR + + + + +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + + + + + +',' +[suppress] +type_type_ + +any_keywordany_keyword +[NOT] +identifieridentifier + +RPARRPAR +SEMISEMI +
+
+ +
+

LPAR

+
+
+ + + + + +'(' +[suppress] +
+
+ +
+

any_keyword

+
+
+ + + + + +VOIDVOID +INTINT +DOUBLEDOUBLE +BOOLBOOL +STRINGSTRING +CLASSCLASS +INTERFACEINTERFACE +NULLNULL +THISTHIS +EXTENDSEXTENDS +IMPLEMENTSIMPLEMENTS +FORFOR +WHILEWHILE +IFIF +ELSEELSE +RETURNRETURN +BREAKBREAK +NEWNEW +NEWARRAYNEWARRAY +PRINTPRINT +READINTEGERREADINTEGER +READLINEREADLINE +TRUETRUE +FALSEFALSE +
+
+ +
+

VOID

+
+
+ + + + +'void' +
+
+ +
+

INT

+
+
+ + + + +'int' +
+
+ +
+

DOUBLE

+
+
+ + + + +'double' +
+
+ +
+

BOOL

+
+
+ + + + +'bool' +
+
+ +
+

STRING

+
+
+ + + + +'string' +
+
+ +
+

CLASS

+
+
+ + + + +'class' +
+
+ +
+

INTERFACE

+
+
+ + + + +'interface' +
+
+ +
+

NULL

+
+
+ + + + +'null' +
+
+ +
+

THIS

+
+
+ + + + +'this' +
+
+ +
+

EXTENDS

+
+
+ + + + +'extends' +
+
+ +
+

IMPLEMENTS

+
+
+ + + + +'implements' +
+
+ +
+

FOR

+
+
+ + + + +'or' +
+
+ +
+

WHILE

+
+
+ + + + +'while' +
+
+ +
+

IF

+
+
+ + + + +'if' +
+
+ +
+

ELSE

+
+
+ + + + +'else' +
+
+ +
+

RETURN

+
+
+ + + + +'return' +
+
+ +
+

BREAK

+
+
+ + + + +'break' +
+
+ +
+

NEW

+
+
+ + + + +'new' +
+
+ +
+

NEWARRAY

+
+
+ + + + +'NewArray' +
+
+ +
+

PRINT

+
+
+ + + + +'Print' +
+
+ +
+

READINTEGER

+
+
+ + + + +'ReadInteger' +
+
+ +
+

READLINE

+
+
+ + + + +'ReadLine' +
+
+ +
+

TRUE

+
+
+ + + + +'true' +
+
+ +
+

FALSE

+
+
+ + + + +'false' +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

RPAR

+
+
+ + + + + +')' +[suppress] +
+
+ +
+

SEMI

+
+
+ + + + + +';' +[suppress] +
+
+ +
+

RBRACE

+
+
+ + + + + +'}' +[suppress] +
+
+ + + + diff --git a/examples/delta_time.py b/examples/delta_time.py index 2f9466cb..4bb3c0fa 100644 --- a/examples/delta_time.py +++ b/examples/delta_time.py @@ -1,4 +1,4 @@ -# deltaTime.py +# delta_time.py # # Parser to convert a conversational time reference such as "in a minute" or # "noon tomorrow" and convert it to a Python datetime. The returned @@ -11,7 +11,12 @@ # BNF: # time_and_day ::= time_reference [day_reference] | day_reference 'at' absolute_time_of_day # day_reference ::= absolute_day_reference | relative_day_reference -# absolute_day_reference ::= 'today' | 'tomorrow' | 'yesterday' | ('next' | 'last') weekday_name +# absolute_day_reference ::= 'today' | 'tomorrow' | 'yesterday' | ['next' | 'last'] weekday_name +# (if weekday_name is given and is the same as the reference weekday: +# if 'next' is given, use 7 days after the reference time +# else if 'last' is given, use 7 days before the reference time +# else, use the reference time) +# # relative_day_reference ::= 'in' qty day_units # | qty day_units 'ago' # | 'qty day_units ('from' | 'before' | 'after') absolute_day_reference @@ -26,131 +31,159 @@ # absolute_time ::= 24hour_time | hour ("o'clock" | ':' minute) ('AM'|'PM') # # qty ::= integer | integer_words | 'a couple of' | 'a' | 'the' +# weekday_name ::= 'Monday' | ... | 'Sunday' # # Copyright 2010, 2019 by Paul McGuire # -from datetime import datetime, time, timedelta -import pyparsing as pp import calendar +from datetime import datetime, time as datetime_time, timedelta + +import pyparsing as pp __all__ = ["time_expression"] +_WEEKDAY_NAMES = list(calendar.day_name) +_DAY_NUM_BY_NAME = {d: i for i, d in enumerate(_WEEKDAY_NAMES)} + + # basic grammar definitions -def make_integer_word_expr(int_name, int_value): - return pp.CaselessKeyword(int_name).addParseAction(pp.replaceWith(int_value)) +def _make_integer_word_expr(int_name: str, int_value: int) -> pp.CaselessKeyword: + return pp.CaselessKeyword( + int_name, ident_chars=pp.srange("[A-Za-z-]") + ).add_parse_action(pp.replace_with(int_value)) integer_word = pp.MatchFirst( - make_integer_word_expr(int_str, int_value) + _make_integer_word_expr(int_str, int_value) for int_value, int_str in enumerate( "one two three four five six seven eight nine ten" " eleven twelve thirteen fourteen fifteen sixteen" - " seventeen eighteen nineteen twenty".split(), + " seventeen eighteen nineteen twenty twenty-one" + " twenty-two twenty-three twenty-four".split(), start=1, ) -).setName("integer_word") +).set_name("integer_word") integer = pp.pyparsing_common.integer | integer_word -integer.setName("numeric") +integer.set_name("numeric") CK = pp.CaselessKeyword CL = pp.CaselessLiteral -today, tomorrow, yesterday, noon, midnight, now = map( - CK, "today tomorrow yesterday noon midnight now".split() +today, tomorrow, yesterday, noon, midnight, now = CK.using_each( + "today tomorrow yesterday noon midnight now".split() ) -def plural(s): - return CK(s) | CK(s + "s").addParseAction(pp.replaceWith(s)) +def _now(): + return datetime.now().replace(microsecond=0) + + +def _singular_or_plural(s: str) -> pp.ParserElement: + return CK(s) | CK(s + "s").add_parse_action(pp.replace_with(s)) -week, day, hour, minute, second = map(plural, "week day hour minute second".split()) +week, day, hour, minute, second = map( + _singular_or_plural, "week day hour minute second".split() +) time_units = hour | minute | second -any_time_units = (week | day | time_units).setName("time_units") +any_time_units = (week | day | time_units).set_name("any_time_units") am = CL("am") pm = CL("pm") COLON = pp.Suppress(":") -in_ = CK("in").setParseAction(pp.replaceWith(1)) -from_ = CK("from").setParseAction(pp.replaceWith(1)) -before = CK("before").setParseAction(pp.replaceWith(-1)) -after = CK("after").setParseAction(pp.replaceWith(1)) -ago = CK("ago").setParseAction(pp.replaceWith(-1)) -next_ = CK("next").setParseAction(pp.replaceWith(1)) -last_ = CK("last").setParseAction(pp.replaceWith(-1)) +in_ = CK("in").set_parse_action(pp.replace_with(1)) +from_ = CK("from").set_parse_action(pp.replace_with(1)) +before = CK("before").set_parse_action(pp.replace_with(-1)) +after = CK("after").set_parse_action(pp.replace_with(1)) +ago = CK("ago").set_parse_action(pp.replace_with(-1)) +next_ = CK("next").set_parse_action( + pp.replace_with(1), lambda t: t.__setitem__("next_present", True) +) +last_ = CK("last").set_parse_action(pp.replace_with(-1)) at_ = CK("at") on_ = CK("on") +a_ = CK("a") +an_ = CK("an") +of_ = CK("of") +the_ = CK("the") +adverb_ = pp.MatchFirst(CK.using_each("just only exactly".split())).suppress() couple = ( - (pp.Optional(CK("a")) + CK("couple") + pp.Optional(CK("of"))) - .setParseAction(pp.replaceWith(2)) - .setName("couple") + (pp.Opt(CK("a")) + CK("couple") + pp.Opt(CK("of"))) + .set_parse_action(pp.replace_with(2)) + .set_name("couple") ) -a_qty = (CK("a") | CK("an")).setParseAction(pp.replaceWith(1)) -the_qty = CK("the").setParseAction(pp.replaceWith(1)) +a_qty = (a_ | an_).set_parse_action(pp.replace_with(1)) +the_qty = the_.set_parse_action(pp.replace_with(1)) qty = pp.ungroup( - (integer | couple | a_qty | the_qty).setName("qty_expression") -).setName("qty") -time_ref_present = pp.Empty().addParseAction(pp.replaceWith(True))("time_ref_present") - + (pp.Opt(adverb_) + (integer | couple | a_qty | the_qty)).set_name("qty_expression") +).set_name("qty") +time_ref_present = pp.Tag("time_ref_present") + +# get weekday names from the calendar module +weekday_names = list(calendar.day_name) +weekday_name = pp.MatchFirst(CK.using_each(weekday_names)).set_name("weekday_name") + +# expressions for military 2400 time +_24hour_time = ~(pp.Word(pp.nums) + any_time_units).set_name( + "numbered_time_units" +) + pp.Regex( + r"\b([01]\d|2[0-3])([0-5]\d)\b", + as_group_list=True +).set_name("HHMM").add_parse_action( + lambda t: [int(t[0][0]), int(t[0][1])] +) +_24hour_time.set_name("0000 time") -def fill_24hr_time_fields(t): +@_24hour_time.add_parse_action +def _fill_24hr_time_fields(t: pp.ParseResults) -> None: t["HH"] = t[0] t["MM"] = t[1] t["SS"] = 0 - t["ampm"] = ("am", "pm")[t.HH >= 12] + t["ampm"] = "am" if t.HH < 12 else "pm" +ampm = am | pm +o_clock = CK("o'clock", ident_chars=pp.srange("[A-Za-z']")) +timespec = ( + integer("HH") + + pp.Opt(o_clock | COLON + integer("MM") + pp.Opt(COLON + integer("SS"))) + + (am | pm)("ampm") +) -def fill_default_time_fields(t): +@timespec.add_parse_action +def _fill_default_time_fields(t: pp.ParseResults) -> None: for fld in "HH MM SS".split(): if fld not in t: t[fld] = 0 -weekday_name_list = list(calendar.day_name) -weekday_name = pp.oneOf(weekday_name_list).setName("weekday_name") - -_24hour_time = ~(integer + any_time_units).setName("numbered_time_units") + pp.Word(pp.nums, exact=4).setName("HHMM").addParseAction( - lambda t: [int(t[0][:2]), int(t[0][2:])], fill_24hr_time_fields -) -_24hour_time.setName("0000 time") -ampm = am | pm -timespec = ( - integer("HH") - + pp.Optional( - CK("o'clock") | COLON + integer("MM") + pp.Optional(COLON + integer("SS")) - ) - + (am | pm)("ampm") -).addParseAction(fill_default_time_fields) absolute_time = _24hour_time | timespec -absolute_time.setName("absolute time") +absolute_time.set_name("absolute time") absolute_time_of_day = noon | midnight | now | absolute_time -absolute_time_of_day.setName("time of day") +absolute_time_of_day.set_name("time of day") - -def add_computed_time(t): - if t[0] in "now noon midnight".split(): +@absolute_time_of_day.add_parse_action +def _add_computed_time(t: pp.ParseResults) -> None: + initial_word = t[0] + if initial_word in "now noon midnight".split(): t["computed_time"] = { - "now": datetime.now().time().replace(microsecond=0), - "noon": time(hour=12), - "midnight": time(), - }[t[0]] + "now": _now().time(), + "noon": datetime_time(hour=12), + "midnight": datetime_time(hour=0), + }[initial_word] else: t["HH"] = {"am": int(t["HH"]) % 12, "pm": int(t["HH"]) % 12 + 12}[t.ampm] - t["computed_time"] = time(hour=t.HH, minute=t.MM, second=t.SS) - - -absolute_time_of_day.addParseAction(add_computed_time) + t["computed_time"] = datetime_time(hour=t.HH, minute=t.MM, second=t.SS) # relative_time_reference ::= qty time_units ('ago' | ('from' | 'before' | 'after') absolute_time_of_day) # | 'in' qty time_units -time_units = (hour | minute | second).setName("time unit") +time_units = (hour | minute | second).set_name("time unit") relative_time_reference = ( ( qty("qty") @@ -162,69 +195,72 @@ def add_computed_time(t): ) ) | in_("dir") + qty("qty") + time_units("units") -).setName("relative time") - +).set_name("relative time") -def compute_relative_time(t): +@relative_time_reference.add_parse_action +def _compute_relative_time(t: pp.ParseResults) -> None: if "ref_time" not in t: - t["ref_time"] = datetime.now().time().replace(microsecond=0) + t["ref_time"] = _now().time().replace(microsecond=0) else: t["ref_time"] = t.ref_time.computed_time delta_seconds = {"hour": 3600, "minute": 60, "second": 1}[t.units] * t.qty t["time_delta"] = timedelta(seconds=t.dir * delta_seconds) -relative_time_reference.addParseAction(compute_relative_time) - time_reference = absolute_time_of_day | relative_time_reference -time_reference.setName("time reference") +time_reference.set_name("time reference") - -def add_default_time_ref_fields(t): +@time_reference.add_parse_action +def _add_default_time_ref_fields(t: pp.ParseResults) -> None: if "time_delta" not in t: t["time_delta"] = timedelta() -time_reference.addParseAction(add_default_time_ref_fields) - # absolute_day_reference ::= 'today' | 'tomorrow' | 'yesterday' | ('next' | 'last') weekday_name # day_units ::= 'days' | 'weeks' day_units = day | week -weekday_reference = pp.Optional(next_ | last_, 1)("dir") + weekday_name("day_name") +weekday_reference = pp.Opt(next_ | last_, 1)("dir") + weekday_name("day_name") -def convert_abs_day_reference_to_date(t): - now = datetime.now().replace(microsecond=0) +absolute_day_reference = ( + today | tomorrow | yesterday | (now + time_ref_present) | weekday_reference +) +absolute_day_reference.set_name("absolute day") + +@absolute_day_reference.add_parse_action +def _convert_abs_day_reference_to_date(t: pp.ParseResults) -> None: + now_ref = _now().replace(microsecond=0) # handle day reference by weekday name if "day_name" in t: - todaynum = now.weekday() - daynames = [n.lower() for n in weekday_name_list] - nameddaynum = daynames.index(t.day_name.lower()) + today_num = now_ref.weekday() + day_names = [n.lower() for n in weekday_names] + named_day_num = day_names.index(t.day_name.lower()) # compute difference in days - if current weekday name is referenced, then # computed 0 offset is changed to 7 if t.dir > 0: - daydiff = (nameddaynum + 7 - todaynum) % 7 or 7 + if today_num != named_day_num or t.next_present: + day_diff = (named_day_num + 7 - today_num) % 7 or 7 + else: + day_diff = 0 else: - daydiff = -((todaynum + 7 - nameddaynum) % 7 or 7) - t["abs_date"] = datetime(now.year, now.month, now.day) + timedelta(daydiff) + day_diff = -((today_num + 7 - named_day_num) % 7 or 7) + t["abs_date"] = datetime(now_ref.year, now_ref.month, now_ref.day) + timedelta( + days=day_diff + ) else: name = t[0] t["abs_date"] = { - "now": now, - "today": datetime(now.year, now.month, now.day), - "yesterday": datetime(now.year, now.month, now.day) + timedelta(days=-1), - "tomorrow": datetime(now.year, now.month, now.day) + timedelta(days=+1), + "now": now_ref, + "today": datetime(now_ref.year, now_ref.month, now_ref.day), + "yesterday": datetime(now_ref.year, now_ref.month, now_ref.day) + + timedelta(days=-1), + "tomorrow": datetime(now_ref.year, now_ref.month, now_ref.day) + + timedelta(days=+1), }[name] -absolute_day_reference = ( - today | tomorrow | yesterday | now + time_ref_present | weekday_reference -) -absolute_day_reference.addParseAction(convert_abs_day_reference_to_date) -absolute_day_reference.setName("absolute day") - # relative_day_reference ::= 'in' qty day_units # | qty day_units # ('ago' @@ -234,11 +270,11 @@ def convert_abs_day_reference_to_date(t): ) + day_units("units") + ( ago("dir") | ((from_ | before | after)("dir") + absolute_day_reference("ref_day")) ) -relative_day_reference.setName("relative day") - +relative_day_reference.set_name("relative day") -def compute_relative_date(t): - now = datetime.now().replace(microsecond=0) +@relative_day_reference.add_parse_action +def _compute_relative_date(t: pp.ParseResults) -> None: + now = _now().replace(microsecond=0) if "ref_day" in t: t["computed_date"] = t.ref_day else: @@ -247,36 +283,35 @@ def compute_relative_date(t): t["date_delta"] = timedelta(days=day_diff) -relative_day_reference.addParseAction(compute_relative_date) - # combine expressions for absolute and relative day references day_reference = relative_day_reference | absolute_day_reference -day_reference.setName("day reference") - +day_reference.set_name("day reference") -def add_default_date_fields(t): +@day_reference.add_parse_action +def _add_default_date_fields(t: pp.ParseResults) -> None: if "date_delta" not in t: t["date_delta"] = timedelta() -day_reference.addParseAction(add_default_date_fields) - # combine date and time expressions into single overall parser -time_and_day = time_reference + time_ref_present + pp.Optional( - pp.Optional(on_) + day_reference -) | day_reference + pp.Optional(at_ + absolute_time_of_day + time_ref_present) -time_and_day.setName("time and day") +time_and_day = time_reference + time_ref_present + pp.Opt( + pp.Opt(on_) + day_reference +) | day_reference + pp.Opt(pp.Opt(at_) + absolute_time_of_day + time_ref_present) +time_and_day.set_name("time and day") + # parse actions for total time_and_day expression -def save_original_string(s, l, t): +@time_and_day.add_parse_action +def _save_original_string(s: str, _: int, t: pp.ParseResults) -> None: # save original input string and reference time t["original"] = " ".join(s.strip().split()) - t["relative_to"] = datetime.now().replace(microsecond=0) + t["relative_to"] = _now().replace(microsecond=0) -def compute_timestamp(t): +@time_and_day.add_parse_action +def _compute_timestamp(t: pp.ParseResults) -> None: # accumulate values from parsed time and day subexpressions - fill in defaults for omitted parts - now = datetime.now().replace(microsecond=0) + now = _now().replace(microsecond=0) if "computed_time" not in t: t["computed_time"] = t.ref_time or now.time() if "abs_date" not in t: @@ -304,7 +339,8 @@ def compute_timestamp(t): t["time_offset"] = t.computed_dt - t.relative_to -def remove_temp_keys(t): +@time_and_day.add_parse_action +def _remove_temp_keys(t: pp.ParseResults) -> None: # strip out keys that are just used internally all_keys = list(t.keys()) for k in all_keys: @@ -317,76 +353,129 @@ def remove_temp_keys(t): ): del t[k] - -time_and_day.addParseAction(save_original_string, compute_timestamp, remove_temp_keys) + # delete list elements - just return keys + del t[:] time_expression = time_and_day +pp.autoname_elements() + -def main(): - current_time = datetime.now() - # test grammar - tests = """\ - today - tomorrow - yesterday - the day before yesterday - the day after tomorrow - 2 weeks after today - in a couple of days - a couple of days from now - a couple of days from today - in a day - 3 days ago - 3 days from now - a day ago - an hour ago - in 2 weeks - in 3 days at 5pm - now - 10 minutes ago - 10 minutes from now - in 10 minutes - in a minute - in a couple of minutes - 20 seconds ago - in 30 seconds - in an hour - in a couple hours - in a couple days - 20 seconds before noon - ten seconds before noon tomorrow - noon - midnight - noon tomorrow - 6am tomorrow - 0800 yesterday - 1700 tomorrow - 12:15 AM today - 3pm 2 days from today - a week from today - a week from now - three weeks ago - noon next Sunday - noon Sunday - noon last Sunday - 2pm next Sunday - next Sunday at 2pm - last Sunday at 2pm - 10 seconds ago - 100 seconds ago - 1000 seconds ago - 10000 seconds ago +def demo(): """ + Demonstrate using the time_expression parser, and accessing + the parsed results. + - parse a complex time expression + - show all fields that are accessible in the results + - show an example of using one of the results fields in Python + """ + + # - parse a complex time expression + example_expr = "10 seconds before noon tomorrow" + result = time_expression.parse_string(example_expr) + + # - show all fields that are accessible in the results + print(f"\nDemo: Results of parsing {example_expr!r}", end="") + print(result.dump(include_list=False)) + + # - show an example of using one of the results fields in Python + print("Computed time:", result.computed_dt) + + +def run_all_tests() -> bool: + import itertools + from typing import Dict + + def make_weekday_time_references() -> Dict[str, timedelta]: + def offset_weekday( + day_name: str, offset_dir: int, next_present: bool = False + ) -> timedelta: + """ + Compute a timedelta for a reference to a weekday by name, relative to + the current weekday. + + If the current day is Monday: + "next Monday" will be one week in the future + "last Monday" will be one week in the past + "Monday" will be the current day + "next Tuesday" and "Tuesday" will be one day in the future + "last Tuesday" will be 6 days in the past + ... and similar for all other weekdays + """ + to_day_num = _DAY_NUM_BY_NAME[day_name] + from_day_num = current_time.weekday() + + if to_day_num != from_day_num: + if offset_dir == 1: + return timedelta(days=(to_day_num + 7 - from_day_num) % 7) + else: + return timedelta(days=-((from_day_num + 7 - to_day_num) % 7)) + else: + if offset_dir == 1: + if next_present: + return timedelta(days=7) + else: + return timedelta() + else: + return timedelta(days=-7) + + def next_weekday_by_name( + day_name: str, *, next_present: bool = False + ) -> timedelta: + return offset_weekday(day_name, 1, next_present) + + def prev_weekday_by_name(day_name: str, **_) -> timedelta: + return offset_weekday(day_name, -1) + + # add test_time_exprs for various times, forward and backward to a weekday by name + # define lists of expression terms to generate permutations of times, weekdays, + # and next/last + times = [("noon", 12), ("2am", 2), ("2pm", 14), ("1500", 15)] + rels = ["", "next", "last"] + weekday_rel_func = { + "": next_weekday_by_name, + "next": next_weekday_by_name, + "last": prev_weekday_by_name, + } + + weekday_test_cases = {} + for (timestr, timehours), rel, dayname in itertools.product( + times, rels, _WEEKDAY_NAMES + ): + next_or_prev_weekday_func = weekday_rel_func[rel] + expected_offset = ( + timedelta(hours=timehours) - time_of_day + ) + next_or_prev_weekday_func(dayname, next_present=rel == "next") + + # times such as "noon last Friday" or just "noon Friday" + weekday_test_cases[f"{timestr} {rel} {dayname}"] = expected_offset + # times such as "next Tuesday at 4pm" or just "Tuesday at 4pm" + weekday_test_cases[f"{rel} {dayname} at {timestr}"] = expected_offset + # times such as "next Tuesday 4pm" or just "Tuesday 4pm" + weekday_test_cases[f"{rel} {dayname} {timestr}"] = expected_offset + + return weekday_test_cases + + # get the current time as a timedelta, to compare with parsed times + current_time = _now() time_of_day = timedelta( hours=current_time.hour, minutes=current_time.minute, seconds=current_time.second, ) - expected = { + + # generate a dict of time expressions and correspdoning offset from + # the current time + # fmt: off + test_time_exprs = { "now": timedelta(0), + "midnight": -time_of_day, + "noon": timedelta(hours=12) - time_of_day, + "today": -time_of_day, + "tomorrow": timedelta(days=1) - time_of_day, + "yesterday": timedelta(days=-1) - time_of_day, "10 seconds ago": timedelta(seconds=-10), "100 seconds ago": timedelta(seconds=-100), "1000 seconds ago": timedelta(seconds=-1000), @@ -413,14 +502,8 @@ def main(): "2 weeks after today": timedelta(days=14) - time_of_day, "in 2 weeks": timedelta(days=14) - time_of_day, "the day after tomorrow": timedelta(days=2) - time_of_day, - "tomorrow": timedelta(days=1) - time_of_day, "the day before yesterday": timedelta(days=-2) - time_of_day, - "8am the day after tomorrow": timedelta(days=+2) - - time_of_day - + timedelta(hours=8), - "yesterday": timedelta(days=-1) - time_of_day, - "today": -time_of_day, - "midnight": -time_of_day, + "8am the day after tomorrow": timedelta(days=+2) - time_of_day + timedelta(hours=8), "in a day": timedelta(days=1) - time_of_day, "3 days ago": timedelta(days=-3) - time_of_day, "noon tomorrow": timedelta(days=1) - time_of_day + timedelta(hours=12), @@ -429,28 +512,84 @@ def main(): "1700 tomorrow": timedelta(days=1) - time_of_day + timedelta(hours=17), "12:15 AM today": -time_of_day + timedelta(minutes=15), "3pm 2 days from today": timedelta(days=2) - time_of_day + timedelta(hours=15), - "ten seconds before noon tomorrow": timedelta(days=1) - - time_of_day - + timedelta(hours=12) - + timedelta(seconds=-10), - "20 seconds before noon": -time_of_day - + timedelta(hours=12) - + timedelta(seconds=-20), + "ten seconds before noon tomorrow": ( + timedelta(days=1) + - time_of_day + + timedelta(hours=12) + + timedelta(seconds=-10) + ), + "20 seconds before noon": -time_of_day + timedelta(hours=12) + timedelta(seconds=-20), "in 3 days at 5pm": timedelta(days=3) - time_of_day + timedelta(hours=17), + "20 hours from now": timedelta(hours=20), + "twenty hours from now": timedelta(hours=20), + "twenty-four hours from now": timedelta(days=1), + "Twenty-four hours from now": timedelta(days=1), + "just twenty-four hours from now": timedelta(days=1), + "in just 10 seconds": timedelta(seconds=10), + "in just a couple of hours": timedelta(hours=2), + "in exactly 1 hour": timedelta(hours=1), + "only one hour from now": timedelta(hours=1), + "only a couple of days ago": timedelta(days=-2) - time_of_day, } + # fmt: on + + # add expressions using weekday names + test_time_exprs.update(make_weekday_time_references()) - def verify_offset(instring, parsed): + def verify_offset(test_time_str: str, parsed: pp.ParseResults) -> None: + """ + Function to compare computed offset time with expected offset as defined + in times dict. + """ + # allow up to a 1-second time discrepancy due to test processing time time_epsilon = timedelta(seconds=1) - if instring in expected: - # allow up to a second time discrepancy due to test processing time - if (parsed.time_offset - expected[instring]) <= time_epsilon: - parsed["verify_offset"] = "PASS" - else: - parsed["verify_offset"] = "FAIL" + expected_offset = test_time_exprs[test_time_str] + offset_error = parsed.time_offset - expected_offset + + # add helpful test results in case of a test failure + parsed["_testing_expected_offset"] = expected_offset + parsed["_testing_observed_offset"] = parsed.time_offset + parsed["_testing_offset_error"] = offset_error + parsed["_testing_abs_offset_error"] = abs(offset_error) - print("(relative to %s)" % datetime.now()) - time_expression.runTests(tests, postParse=verify_offset) + if abs(offset_error) <= time_epsilon: + parsed["_testing_verify_offset"] = "PASS" + else: + parsed["_testing_verify_offset"] = "FAIL" + + # run all test cases + print(f"(relative to {_now()})") + success, report = time_expression.run_tests( + list(test_time_exprs), post_parse=verify_offset + ) + assert success + + # collect all tests that failed to compute the expected time (relative to + # the current time) + fails = [] + for test, rpt in report: + if rpt._testing_verify_offset != "PASS": + fails.append((test, rpt)) + + if fails: + print(f"\nFAILED ({len(fails)}/{len(test_time_exprs)} tests)") + print("\n".join(f"- {test}" for test, _ in fails)) + else: + print(f"\nPASSED ({len(test_time_exprs)} tests)") + + return not fails + + +def main() -> int: + tests_pass = run_all_tests() + demo() + return 0 if tests_pass else 1 if __name__ == "__main__": - main() + import contextlib + + with contextlib.suppress(Exception): + time_expression.create_diagram("delta_time_diagram.html", vertical=3, show_results_names=True, show_groups=True) + + exit(main()) diff --git a/examples/delta_time_diagram.html b/examples/delta_time_diagram.html new file mode 100644 index 00000000..71689063 --- /dev/null +++ b/examples/delta_time_diagram.html @@ -0,0 +1,1864 @@ + + + + + + + + + + + + + + + +
+

time and day

+
+
+ + + + + + +time referencetime reference + + + + + +on_on_ +day referenceday reference + +day referenceday reference + + + + + +at_at_ +time of daytime of day +
+
+ +
+

time reference

+
+
+ + + + + +time of daytime of day +relative timerelative time +
+
+ +
+

time of day

+
+
+ + + + + +noonnoon +midnightmidnight +nownow +0000 time0000 time +timespectimespec +
+
+ +
+

noon

+
+
+ + + + +'noon' +
+
+ +
+

midnight

+
+
+ + + + +'midnight' +
+
+ +
+

0000 time

+
+
+ + + + + + +numbered_time_unitsnumbered_time_units +[NOT] +HHMMHHMM +
+
+ +
+

numbered_time_units

+
+
+ + + + + +W:(0-9) +any_time_unitsany_time_units +
+
+ +
+

any_time_units

+
+
+ + + + + +'week' +'weeks' +'day' +'days' +'hour' +'hours' +'minute' +'minutes' +'second' +'seconds' +
+
+ +
+

HHMM

+
+
+ + + + +\b([01]\d|2[0-3])([0-5]\d)\b +
+
+ +
+

timespec

+
+
+ + + + + + +numericnumeric +'HH' + + + +o_clocko_clock + +COLONCOLON + +numericnumeric +'MM' + + + +COLONCOLON + +numericnumeric +'SS' + + +'AM' +'PM' +'ampm' +
+
+ +
+

numeric

+
+
+ + + + + +integerinteger +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +'ten' +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' +'twenty' +'twenty-one' +'twenty-two' +'twenty-three' +'twenty-four' +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

o_clock

+
+
+ + + + +"o'clock" +
+
+ +
+

COLON

+
+
+ + + + + +':' +[suppress] +
+
+ +
+

relative time

+
+
+ + + + + + + +qtyqty +'qty' + +time unittime unit +'units' + + +'ago' +'dir' + + + +'from' +'before' +'after' +'dir' + + +time of daytime of day +'ref_time' + + +'in' +'dir' + +qtyqty +'qty' + +time unittime unit +'units' +
+
+ +
+

qty

+
+
+ + + + + +qty_expressionqty_expression +
+
+ +
+

qty_expression

+
+
+ + + + + + + +adverb_adverb_ + +integerinteger +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +'ten' +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' +'twenty' +'twenty-one' +'twenty-two' +'twenty-three' +'twenty-four' +couplecouple +a_qtya_qty +the_the_ +
+
+ +
+

adverb_

+
+
+ + + + + + +'just' +'only' +'exactly' +[suppress] +
+
+ +
+

couple

+
+
+ + + + + + + +'a' +'couple' + + +'of' +
+
+ +
+

a_qty

+
+
+ + + + + +a_a_ +an_an_ +
+
+ +
+

a_

+
+
+ + + + +'a' +
+
+ +
+

an_

+
+
+ + + + +'an' +
+
+ +
+

the_

+
+
+ + + + +'the' +
+
+ +
+

time unit

+
+
+ + + + + +'hour' +'hours' +'minute' +'minutes' +'second' +'seconds' +
+
+ +
+

on_

+
+
+ + + + +'on' +
+
+ +
+

day reference

+
+
+ + + + + +relative dayrelative day +absolute dayabsolute day +
+
+ +
+

relative day

+
+
+ + + + + + + +'in' +'dir' + +qtyqty +'qty' + + +'day' +'days' +'week' +'weeks' +'units' + + +qtyqty +'qty' + + +'day' +'days' +'week' +'weeks' +'units' + + +'ago' +'dir' + + + +'from' +'before' +'after' +'dir' + +absolute dayabsolute day +'ref_day' +
+
+ +
+

absolute day

+
+
+ + + + + +'today' +'tomorrow' +'yesterday' + +'now' + + + + + +next_next_ +last_last_ +'dir' + +weekday_nameweekday_name +'day_name' +
+
+ +
+

weekday_name

+
+
+ + + + + +'Monday' +'Tuesday' +'Wednesday' +'Thursday' +'Friday' +'Saturday' +'Sunday' +
+
+ +
+

today

+
+
+ + + + +'today' +
+
+ +
+

tomorrow

+
+
+ + + + +'tomorrow' +
+
+ +
+

yesterday

+
+
+ + + + +'yesterday' +
+
+ +
+

now

+
+
+ + + + +'now' +
+
+ +
+

weekday_reference

+
+
+ + + + + + + + + +next_next_ +last_last_ +'dir' + +weekday_nameweekday_name +'day_name' +
+
+ +
+

next_

+
+
+ + + + +'next' +
+
+ +
+

last_

+
+
+ + + + +'last' +
+
+ +
+

at_

+
+
+ + + + +'at' +
+
+ + + + diff --git a/examples/dfmparse.py b/examples/dfmparse.py index 5d9b1b14..cc5a0aa2 100644 --- a/examples/dfmparse.py +++ b/examples/dfmparse.py @@ -100,7 +100,7 @@ def to_chr(x): # a single matched pair of quotes around it. delphi_string = Combine( OneOrMore(CONCAT | pound_char | unquoted_sglQuotedString), adjacent=False -).setParseAction(lambda s, l, t: "'%s'" % t[0]) +).setParseAction(lambda s, l, t: f"'{t[0]}'") string_value = delphi_string | base16_value @@ -219,9 +219,10 @@ def main(testfiles=None, action=printer): except Exception: failures.append(f) + nl = "\n" if failures: - print("\nfailed while processing %s" % ", ".join(failures)) - print("\nsucceeded on %d of %d files" % (success, len(testfiles))) + print(f"{nl}failed while processing {', '.join(failures)}") + print(f"{nl}succeeded on {success} of {len(testfiles)} files") if len(retval) == 1 and len(testfiles) == 1: # if only one file is parsed, return the parseResults directly diff --git a/examples/directx_x_file_parser.html b/examples/directx_x_file_parser.html new file mode 100644 index 00000000..047584a1 --- /dev/null +++ b/examples/directx_x_file_parser.html @@ -0,0 +1,270 @@ + + + + + + + + + + + + + + + +
+

template_defn

+
+
+ + + + + +'template' + +identifieridentifier +'name' + +'{' +[suppress] + + + +<[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}> +'uuid' + + + + + + + + + + + +'array' + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'element_type' +type +'type' + +identifieridentifier +'name' + + + + + + +'[' +[suppress] + +W:(1-9, 0-9) +identifieridentifier + +']' +[suppress] + +'dims' + + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'type' + +identifieridentifier +'name' + +';' +[suppress] + +'members' + + + + + + +'[' +[suppress] +'...' + +']' +[suppress] +[combine] +'open_template' + + + + + +'[' +[suppress] + + + + + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'type' + + + +<[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}> +'uuid' + + + + + +',' +[suppress] + + + + +'WORD' +'DWORD' +'FLOAT' +'DOUBLE' +'CHAR' +'UCHAR' +'BYTE' +'STRING' +'CSTRING' +'UNICODE' +identifieridentifier +'type' + + + +<[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}> +'uuid' + + +']' +[suppress] +'restrictions' + +'}' +[suppress] +
+
+ +
+

identifier

+
+
+ + + + +W:(A-Za-z, 0-9A-Z_a-z) +
+
+ + + + diff --git a/examples/directx_x_file_parser.py b/examples/directx_x_file_parser.py new file mode 100644 index 00000000..65364793 --- /dev/null +++ b/examples/directx_x_file_parser.py @@ -0,0 +1,203 @@ +# +# directx_x_file_parser.py +# +# Parses .x files used for DirectX. +# Based on format documentation at http://paulbourke.net/dataformats/directx/ +# +# Copyright 2024, Paul McGuire +# +import pyparsing as pp + + +LBRACE, RBRACE, LBRACK, RBRACK, SEMI = pp.Suppress.using_each("{}[];") + +ident = pp.Word(pp.alphas, pp.alphanums + "_").set_name("identifier") +integer = pp.Word("123456789", pp.nums).add_parse_action(lambda t: int(t[0])) + +# scalar_type = pp.one_of( +# "WORD DWORD FLOAT DOUBLE CHAR UCHAR BYTE STRING CSTRING UNICODE", as_keyword=True +# ).set_name("base_type") +scalar_type = pp.MatchFirst( + pp.Keyword.using_each( + "WORD DWORD FLOAT DOUBLE CHAR UCHAR BYTE STRING CSTRING UNICODE".split() + ) +).set_name("scalar_type") +type_ref = scalar_type | ident + +ARRAY = pp.Keyword("array") +array_type_ref = pp.Group(ARRAY + type_ref("element_type")) +array_dim = LBRACK + (integer | ident) + RBRACK +member_defn = pp.Group( + ( + array_type_ref("type") + ident("name") + array_dim[...]("dims") + | type_ref("type") + ident("name") + ) + + SEMI +) + +TEMPLATE = pp.Keyword("template") +uuid = pp.Regex( + r"<[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}>" +).set_parse_action(lambda t: t[0][1:-1]) +open_template_indicator = pp.Combine(LBRACK + "..." + RBRACK, adjacent=False) +restriction = pp.Group(type_ref("type") + pp.Optional(uuid)("uuid")) +template_restrictions = LBRACK + pp.DelimitedList(restriction) + RBRACK +directx_template_defn = ( + TEMPLATE + + ident("name") + + LBRACE + + pp.Optional(uuid)("uuid") + + member_defn[...]("members") + + pp.Optional( + open_template_indicator.set_parse_action(lambda: True), default=False + )("open_template") + + pp.Optional(template_restrictions)("restrictions") + + RBRACE +).set_name("template_defn") +directx_template_defn.add_parse_action( + lambda t: t.__setitem__("closed", not (t.open_template or t.restrictions)) +) + +directx_template_defn.ignore(pp.cpp_style_comment) + + +def make_template_parser(template_defn: pp.ParseResults) -> pp.ParserElement: + """ + Create a pyparsing parser from a DirectX template definition. + (Limited to templates containing scalar types, or arrays of scalars.) + """ + float_ = pp.common.real + type_map = { + "WORD": integer, + "DWORD": integer, + "FLOAT": float_, + "DOUBLE": float_, + "CHAR": integer, + "UCHAR": integer, + "BYTE": integer, + "STRING": pp.QuotedString('"'), + "CSTRING": pp.QuotedString('"'), + "UNICODE": pp.QuotedString('"'), + } + member_parsers = [] + for member in template_defn.members: + if member.type in type_map: + expr = pp.ungroup(type_map[member.type] + SEMI) + elif member.dims: + expr = type_map[member.type.element_type] + for dim in member.dims: + expr = pp.Group(pp.DelimitedList(expr, max=dim) + SEMI) + member_parsers.append(expr(member.name)) + + pp.autoname_elements() + + return ( + pp.Keyword(template_defn.name)("type") + + ident("name") + + LBRACE + + pp.Group(pp.And(member_parsers))("fields") + + RBRACE + ) + + +if __name__ == "__main__": + import contextlib + + with contextlib.suppress(Exception): + # create railroad diagram + directx_template_defn.create_diagram( + "directx_x_file_parser.html", show_results_names=True, show_groups=False + ) + + + sample = """ + some stuff... + + template Mesh { + <3D82AB44-62DA-11cf-AB39-0020AF71E433> + DWORD nVertices; + array Vector vertices[nVertices]; + DWORD nFaces; + array MeshFace faces[nFaces]; + [ ... ] // An open template + } + + template PolyArray { + <3D82AB44-62DA-11cf-AB39-0020AF71E433> + DWORD nPolys; + array FLOAT polys[nPolys][3]; + } + + template Vector { + <3D82AB5E-62DA-11cf-AB39-0020AF71E434> + FLOAT x; + FLOAT y; + FLOAT z; + } // A closed template + + template FileSystem { + <3D82AB5E-62DA-11cf-AB39-0020AF71E435> + STRING name; + [ Directory <3D82AB5E-62DA-11cf-AB39-0020AF71E436>, File <3D82AB5E-62DA-11cf-AB39-0020AF71E437> ] // A restricted template + } + + more stuff... + + template mytemp { + DWORD myvar; + DWORD myvar2; + } + + template container { + DWORD count; + array mytemp tempArray[count]; + } + """ + + for template in directx_template_defn.search_string(sample): + # print(template.dump()) + print( + f"Name: {template.name!r}" + f" UUID: {template.uuid}" + f" Open: {template.open_template!r}" + f" Closed: {template.closed!r}" + f" Restricted: {bool(template.restrictions)}" + ) + # print() + + vector_template = directx_template_defn.parse_string( + """\ + template Vector { + <3D82AB5E-62DA-11cf-AB39-0020AF71E434> + STRING label; + FLOAT x; + FLOAT y; + FLOAT z; + } + """ + ) + vector_parser = make_template_parser(vector_template) + + with contextlib.suppress(Exception): + vector_parser.create_diagram( + "directx_x_vector_parser.html", show_results_names=True, show_groups=False + ) + + v = vector_parser.parse_string('Vector p1 {"datum_A"; 1.0; 3.0; 5.0;}') + print(v.dump()) + + vector_template = directx_template_defn.parse_string( + """\ + template Vector { + <3D82AB5E-62DA-11cf-AB39-0020AF71E434> + STRING label; + array FLOAT coords[3]; + } + """ + ) + vector_parser = make_template_parser(vector_template) + vector_parser.create_diagram( + "directx_x_vector_parser.html", show_results_names=True, show_groups=False + ) + v = vector_parser.parse_string('Vector p1 {"datum_A"; 1.0, 3.0, 5.0;}') + print(v.dump()) diff --git a/examples/ebnf.py b/examples/ebnf.py index 4843d40c..96749f7e 100644 --- a/examples/ebnf.py +++ b/examples/ebnf.py @@ -1,14 +1,16 @@ # This module tries to implement ISO 14977 standard with pyparsing. # pyparsing version 1.1 or greater is required. +from typing import Any # ISO 14977 standardize The Extended Backus-Naur Form(EBNF) syntax. # You can read a final draft version here: # https://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html # # Submitted 2004 by Seo Sanghyeon +# Updated to current pyparsing styles 2025 by Paul McGuire # -from pyparsing import * +import pyparsing as pp all_names = """ @@ -27,147 +29,157 @@ syntax """.split() +LBRACK, RBRACK, LBRACE, RBRACE, LPAR, RPAR, DASH, STAR, EQ, SEMI = pp.Suppress.using_each( + "[]{}()-*=;" +) -integer = Word(nums) -meta_identifier = Word(alphas, alphanums + "_") -terminal_string = Suppress("'") + CharsNotIn("'") + Suppress("'") ^ Suppress( - '"' -) + CharsNotIn('"') + Suppress('"') -definitions_list = Forward() -optional_sequence = Suppress("[") + definitions_list + Suppress("]") -repeated_sequence = Suppress("{") + definitions_list + Suppress("}") -grouped_sequence = Suppress("(") + definitions_list + Suppress(")") +integer = pp.common.integer() +meta_identifier = pp.common.identifier() +terminal_string = pp.Regex( + r'"[^"]*"' + r"|" + r"'[^']*'" +).add_parse_action(pp.remove_quotes) + +definitions_list = pp.Forward() +optional_sequence = LBRACK + definitions_list + RBRACK +repeated_sequence = LBRACE + definitions_list + RBRACE +grouped_sequence = LPAR + definitions_list + RPAR syntactic_primary = ( optional_sequence - ^ repeated_sequence - ^ grouped_sequence - ^ meta_identifier - ^ terminal_string + | repeated_sequence + | grouped_sequence + | meta_identifier + | terminal_string ) -syntactic_factor = Optional(integer + Suppress("*")) + syntactic_primary -syntactic_term = syntactic_factor + Optional(Suppress("-") + syntactic_factor) -single_definition = delimitedList(syntactic_term, ",") -definitions_list << delimitedList(single_definition, "|") -syntax_rule = meta_identifier + Suppress("=") + definitions_list + Suppress(";") +syntactic_factor = pp.Optional(integer + STAR) + syntactic_primary +syntactic_term = syntactic_factor + pp.Optional(DASH + syntactic_factor) +single_definition = pp.DelimitedList(syntactic_term, ",") +definitions_list <<= pp.DelimitedList(single_definition, "|") +syntax_rule = meta_identifier + EQ + definitions_list + SEMI ebnfComment = ( - ("(*" + ZeroOrMore(CharsNotIn("*") | ("*" + ~Literal(")"))) + "*)") + ("(*" + (pp.CharsNotIn("*") | ("*" + ~pp.Literal(")")))[...] + "*)") .streamline() .setName("ebnfComment") ) -syntax = OneOrMore(syntax_rule) +syntax = syntax_rule[1, ...] syntax.ignore(ebnfComment) -def do_integer(str, loc, toks): +def do_integer(toks): return int(toks[0]) -def do_meta_identifier(str, loc, toks): +def do_meta_identifier(toks): if toks[0] in symbol_table: return symbol_table[toks[0]] else: - forward_count.value += 1 - symbol_table[toks[0]] = Forward() + symbol_table[toks[0]] = pp.Forward() return symbol_table[toks[0]] -def do_terminal_string(str, loc, toks): - return Literal(toks[0]) +def do_terminal_string(toks): + return pp.Literal(toks[0]) -def do_optional_sequence(str, loc, toks): - return Optional(toks[0]) +def do_optional_sequence(toks): + return pp.Optional(toks[0]) -def do_repeated_sequence(str, loc, toks): - return ZeroOrMore(toks[0]) +def do_repeated_sequence(toks): + return pp.ZeroOrMore(toks[0]) -def do_grouped_sequence(str, loc, toks): - return Group(toks[0]) +def do_grouped_sequence(toks): + return pp.Group(toks[0]) -def do_syntactic_primary(str, loc, toks): +def do_syntactic_primary(toks): return toks[0] -def do_syntactic_factor(str, loc, toks): - if len(toks) == 2: +def do_syntactic_factor(toks): + if len(toks) == 2 and toks[0] > 1: # integer * syntactic_primary - return And([toks[1]] * toks[0]) + return pp.And([toks[1]] * toks[0]) else: # syntactic_primary return [toks[0]] -def do_syntactic_term(str, loc, toks): +def do_syntactic_term(toks): if len(toks) == 2: # syntactic_factor - syntactic_factor - return NotAny(toks[1]) + toks[0] + return pp.NotAny(toks[1]) + toks[0] else: # syntactic_factor return [toks[0]] -def do_single_definition(str, loc, toks): +def do_single_definition(toks): toks = toks.asList() if len(toks) > 1: # syntactic_term , syntactic_term , ... - return And(toks) + return pp.And(toks) else: # syntactic_term return [toks[0]] -def do_definitions_list(str, loc, toks): +def do_definitions_list(toks): toks = toks.asList() if len(toks) > 1: # single_definition | single_definition | ... - return Or(toks) + return pp.Or(toks) else: # single_definition return [toks[0]] -def do_syntax_rule(str, loc, toks): +def do_syntax_rule(toks): # meta_identifier = definitions_list ; assert toks[0].expr is None, "Duplicate definition" - forward_count.value -= 1 - toks[0] << toks[1] + toks[0] <<= toks[1] return [toks[0]] -def do_syntax(str, loc, toks): +def do_syntax(): # syntax_rule syntax_rule ... return symbol_table -symbol_table = {} - - -class forward_count: - pass - - -forward_count.value = 0 for name in all_names: expr = vars()[name] action = vars()["do_" + name] - expr.setName(name) - expr.setParseAction(action) - # ~ expr.setDebug() + expr.set_name(name) + expr.add_parse_action(action) + # expr.setDebug() + + +symbol_table: dict[str, pp.Forward] = {} -def parse(ebnf, given_table={}): +def parse(ebnf, given_table=None, *, enable_debug=False): + given_table = given_table or {} symbol_table.clear() symbol_table.update(given_table) - forward_count.value = 0 - table = syntax.parseString(ebnf)[0] - assert forward_count.value == 0, "Missing definition" - for name in table: - expr = table[name] - expr.setName(name) - # ~ expr.setDebug() + table = syntax.parse_string(ebnf, parse_all=True)[0] + missing_definitions = [ + k for k, v in table.items() + if k not in given_table and v.expr is None + ] + assert not missing_definitions, f"Missing definitions for {missing_definitions}" + for name, expr in table.items(): + expr.set_name(name) + expr.set_debug(enable_debug) return table + + +if __name__ == '__main__': + try: + syntax.create_diagram("ebnf_diagram.html") + except Exception as e: + print("Failed to create diagram for EBNF syntax parser" + f" - {type(e).__name__}: {e}") diff --git a/examples/ebnf_diagram.html b/examples/ebnf_diagram.html new file mode 100644 index 00000000..74ec4443 --- /dev/null +++ b/examples/ebnf_diagram.html @@ -0,0 +1,656 @@ + + + + + + + + + + + + + + + +
+

syntax

+
+
+ + + + + +syntax_rulesyntax_rule + +
+
+ +
+

syntax_rule

+
+
+ + + + + +meta_identifiermeta_identifier + +'=' +[suppress] +definitions_listdefinitions_list + +';' +[suppress] +
+
+ +
+

definitions_list

+
+
+ + + + + + + +single_definitionsingle_definition + + + + + +'|' +[suppress] +single_definitionsingle_definition + +
+
+ +
+

single_definition

+
+
+ + + + + + +syntactic_termsyntactic_term + + + + + +',' +[suppress] +syntactic_termsyntactic_term + +
+
+ +
+

syntactic_term

+
+
+ + + + + +syntactic_factorsyntactic_factor + + + + +'-' +[suppress] +syntactic_factorsyntactic_factor +
+
+ +
+

syntactic_factor

+
+
+ + + + + + + + +integerinteger + +'*' +[suppress] +syntactic_primarysyntactic_primary +
+
+ +
+

integer

+
+
+ + + + +W:(0-9) +
+
+ +
+

syntactic_primary

+
+
+ + + + + +optional_sequenceoptional_sequence +repeated_sequencerepeated_sequence +grouped_sequencegrouped_sequence +meta_identifiermeta_identifier +terminal_stringterminal_string +
+
+ +
+

optional_sequence

+
+
+ + + + + + +'[' +[suppress] +definitions_listdefinitions_list + +']' +[suppress] +
+
+ +
+

repeated_sequence

+
+
+ + + + + + +'{' +[suppress] +definitions_listdefinitions_list + +'}' +[suppress] +
+
+ +
+

grouped_sequence

+
+
+ + + + + + +'(' +[suppress] +definitions_listdefinitions_list + +')' +[suppress] +
+
+ +
+

meta_identifier

+
+
+ + + + +W:(A-Z_a-zªµºÀ-Ö..., 0-9A-Z_a-zªµ·...) +
+
+ +
+

terminal_string

+
+
+ + + + +"[^"]*"|'[^']*' +
+
+ + + + diff --git a/examples/ebnf_number_parser_diagram.html b/examples/ebnf_number_parser_diagram.html new file mode 100644 index 00000000..2f1b534b --- /dev/null +++ b/examples/ebnf_number_parser_diagram.html @@ -0,0 +1,531 @@ + + + + + + + + + + + + + + + +
+

number

+
+
+ + + + + + + + + +thousandsthousands + + +andand + + + +hundredshundreds + + +andand + + +one_to_99one_to_99 +
+
+ +
+

thousands

+
+
+ + + + + + +one_to_99one_to_99 +'thousand' +
+
+ +
+

one_to_99

+
+
+ + + + + + +unitsunits +teensteens +tenten + +multiples_of_tenmultiples_of_ten + + + + + +'-' +unitsunits +
+
+ +
+

units

+
+
+ + + + + + +'one' +'two' +'three' +'four' +'five' +'six' +'seven' +'eight' +'nine' +
+
+ +
+

teens

+
+
+ + + + + + +'eleven' +'twelve' +'thirteen' +'fourteen' +'fifteen' +'sixteen' +'seventeen' +'eighteen' +'nineteen' +
+
+ +
+

ten

+
+
+ + + + + +'ten' +
+
+ +
+

multiples_of_ten

+
+
+ + + + + + +'twenty' +'thirty' +'forty' +'fifty' +'sixty' +'seventy' +'eighty' +'ninety' +
+
+ +
+

and

+
+
+ + + + + + +'and' +'-' +
+
+ +
+

hundreds

+
+
+ + + + + + +hundreds_multhundreds_mult +'hundred' +
+
+ +
+

hundreds_mult

+
+
+ + + + + + +unitsunits +teensteens + +multiples_of_tenmultiples_of_ten + + +'-' +unitsunits +
+
+ + + + diff --git a/examples/ebnf_number_words.py b/examples/ebnf_number_words.py new file mode 100644 index 00000000..8d6b46f2 --- /dev/null +++ b/examples/ebnf_number_words.py @@ -0,0 +1,77 @@ +# +# ebnftest_number_parser.py +# +# BNF from number_parser.py: +# +# optional_and ::= ["and" | "-"] +# optional_dash ::= ["-"] +# units ::= "one" | "two" | "three" | ... | "nine" +# tens ::= "twenty" | "thirty" | ... | "ninety" +# one_to_99 ::= units | ten | teens | (tens [optional_dash units]) +# ten ::= "ten" +# teens ::= "eleven" | "twelve" | ... | "nineteen" +# hundreds ::= (units | teens_only | tens optional_dash units) "hundred" +# thousands ::= one_to_99 "thousand" +# +# # number from 1-999,999 +# number ::= [thousands [optional_and]] [hundreds[optional_and]] one_to_99 +# | [thousands [optional_and]] hundreds +# | thousands +# + +import ebnf + +grammar = """ + (* + EBNF for number_words.py + *) + number = [thousands, [and]], [hundreds, [and]], [one_to_99]; + thousands = one_to_99, "thousand"; + hundreds_mult = units | teens | multiples_of_ten, ["-"], units; + hundreds = hundreds_mult, "hundred"; + teens = + "eleven" + | "twelve" + | "thirteen" + | "fourteen" + | "fifteen" + | "sixteen" + | "seventeen" + | "eighteen" + | "nineteen" + ; + one_to_99 = units | teens | ten | multiples_of_ten, [["-"], units]; + ten = "ten"; + multiples_of_ten = "twenty" | "thirty" | "forty" | "fifty" | "sixty" | "seventy" | "eighty" | "ninety"; + units = "one" | "two" | "three" | "four" | "five" | "six" | "seven" | "eight" | "nine"; + and = "and" | "-"; + """ + +parsers = ebnf.parse(grammar) +number_parser = parsers["number"] + +try: + number_parser.create_diagram("ebnf_number_parser_diagram.html") +except Exception as e: + print("Failed to create diagram for EBNF-generated number parser" + f" - {type(e).__name__}: {e}") + +number_parser.run_tests( + """ + one + seven + twelve + twenty six + forty-two + two hundred + twelve hundred + one hundred and eleven + seven thousand and six + twenty five hundred and one + ninety nine thousand nine hundred and ninety nine + + # invalid + twenty hundred + """, + full_dump=False +) \ No newline at end of file diff --git a/examples/ebnftest.py b/examples/ebnftest.py index 7b1ff759..88b88bf1 100644 --- a/examples/ebnftest.py +++ b/examples/ebnftest.py @@ -6,70 +6,54 @@ # Submitted 2004 by Seo Sanghyeon # print("Importing pyparsing...") -from pyparsing import * +import pyparsing as pp print("Constructing EBNF parser with pyparsing...") import ebnf grammar = """ -syntax = (syntax_rule), {(syntax_rule)}; -syntax_rule = meta_identifier, '=', definitions_list, ';'; -definitions_list = single_definition, {'|', single_definition}; -single_definition = syntactic_term, {',', syntactic_term}; -syntactic_term = syntactic_factor,['-', syntactic_factor]; -syntactic_factor = [integer, '*'], syntactic_primary; -syntactic_primary = optional_sequence | repeated_sequence | - grouped_sequence | meta_identifier | terminal_string; -optional_sequence = '[', definitions_list, ']'; -repeated_sequence = '{', definitions_list, '}'; -grouped_sequence = '(', definitions_list, ')'; -(* -terminal_string = "'", character - "'", {character - "'"}, "'" | - '"', character - '"', {character - '"'}, '"'; - meta_identifier = letter, {letter | digit}; -integer = digit, {digit}; -*) + (* + ISO 14977 standardize The Extended Backus-Naur Form(EBNF) syntax. + You can read a final draft version here: + https://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html + *) + syntax = (syntax_rule), {(syntax_rule)}; + syntax_rule = meta_identifier, '=', definitions_list, ';'; + definitions_list = single_definition, {'|', single_definition}; + single_definition = syntactic_term, {',', syntactic_term}; + syntactic_term = syntactic_factor,['-', syntactic_factor]; + syntactic_factor = [integer, '*'], syntactic_primary; + syntactic_primary = optional_sequence | repeated_sequence | + grouped_sequence | meta_identifier | terminal_string; + optional_sequence = '[', definitions_list, ']'; + repeated_sequence = '{', definitions_list, '}'; + grouped_sequence = '(', definitions_list, ')'; + (* + terminal_string = "'", character - "'", {character - "'"}, "'" | + '"', character - '"', {character - '"'}, '"'; + meta_identifier = letter, {letter | digit}; + integer = digit, {digit}; + *) """ -table = {} -# ~ table['character'] = Word(printables, exact=1) -# ~ table['letter'] = Word(alphas + '_', exact=1) -# ~ table['digit'] = Word(nums, exact=1) -table["terminal_string"] = sglQuotedString -table["meta_identifier"] = Word(alphas + "_", alphas + "_" + nums) -table["integer"] = Word(nums) +table: dict[str, pp.ParserElement] = { + # "character": pp.Char(pp.printables), + # "letter": pp.Char(pp.alphas + '_'), + # "digit": pp.Char(nums), + "terminal_string": pp.sgl_quoted_string | pp.dbl_quoted_string, + "meta_identifier": pp.Word(pp.alphas + "_", pp.alphas + "_" + pp.nums), + "integer": pp.common.integer, +} print("Parsing EBNF grammar with EBNF parser...") parsers = ebnf.parse(grammar, table) ebnf_parser = parsers["syntax"] -commentcharcount = 0 -commentlocs = set() - - -def tallyCommentChars(s, l, t): - global commentcharcount, commentlocs - # only count this comment if we haven't seen it before - if l not in commentlocs: - charCount = len(t[0]) - len(list(filter(str.isspace, t[0]))) - commentcharcount += charCount - commentlocs.add(l) - return l, t - - -# ordinarily, these lines wouldn't be necessary, but we are doing extra stuff with the comment expression -ebnf.ebnfComment.setParseAction(tallyCommentChars) ebnf_parser.ignore(ebnf.ebnfComment) -print("Parsing EBNF grammar with generated EBNF parser...\n") -parsed_chars = ebnf_parser.parseString(grammar) -parsed_char_len = len(parsed_chars) +ebnf_parser.create_diagram("ebnftest_diagram.html") -print("],\n".join(str(parsed_chars.asList()).split("],"))) - -# ~ grammar_length = len(grammar) - len(filter(str.isspace, grammar))-commentcharcount - -# ~ assert parsed_char_len == grammar_length - -print("Ok!") +print("Parsing EBNF grammar with generated EBNF parser...\n") +parsed_chars = ebnf_parser.parse_string(grammar, parse_all=True) +print("\n".join(str(pc) for pc in parsed_chars.as_list())) diff --git a/examples/email_address_parser.py b/examples/email_address_parser.py new file mode 100644 index 00000000..f364f953 --- /dev/null +++ b/examples/email_address_parser.py @@ -0,0 +1,46 @@ +# +# email_address_parser.py +# +# email address parser based on RFC 5322 BNF segments +# - see https://datatracker.ietf.org/doc/html/rfc5322#section-3.4. +# +# The returned parse results include named fields 'account' and 'domain' +# for emails of the form `account@domain`. +# +# Copyright 2024, by Paul McGuire +# +from pyparsing import Regex + +email_address = Regex( + # RFC5322 email address + r"""(?P(?:(?:\"[\w\s()<>[\].,;:@"]+\")|[!#-'*+\-/-9=?A-Z\^-~.]+))""" + "@" + r"""(?P(?:(?:(?!-)[!#-'*+\-/-9=?A-Z\^-~]{1,63}(? >= != = <> LT GT LE GE EQ NE") -comp_expr = infixNotation( +comparisonop = one_of("< <= > >= != = <> LT GT LE GE EQ NE") +comp_expr = infix_notation( arith_expr, [ - (comparisonop, 2, opAssoc.LEFT, EvalComparisonOp), + (comparisonop, 2, OpAssoc.LEFT, EvalComparisonOp), ], ) diff --git a/examples/excelExpr.py b/examples/excelExpr.py deleted file mode 100644 index 311a5a41..00000000 --- a/examples/excelExpr.py +++ /dev/null @@ -1,106 +0,0 @@ -# excelExpr.py -# -# Copyright 2010, Paul McGuire -# -# A partial implementation of a parser of Excel formula expressions. -# -from pyparsing import ( - CaselessKeyword, - Suppress, - Word, - alphas, - alphanums, - nums, - Optional, - Group, - oneOf, - Forward, - infixNotation, - opAssoc, - dblQuotedString, - delimitedList, - Combine, - Literal, - QuotedString, - ParserElement, - pyparsing_common as ppc, -) - -ParserElement.enablePackrat() - -EQ, LPAR, RPAR, COLON, COMMA = map(Suppress, "=():,") -EXCL, DOLLAR = map(Literal, "!$") -sheetRef = Word(alphas, alphanums) | QuotedString("'", escQuote="''") -colRef = Optional(DOLLAR) + Word(alphas, max=2) -rowRef = Optional(DOLLAR) + Word(nums) -cellRef = Combine( - Group(Optional(sheetRef + EXCL)("sheet") + colRef("col") + rowRef("row")) -) - -cellRange = ( - Group(cellRef("start") + COLON + cellRef("end"))("range") - | cellRef - | Word(alphas, alphanums) -) - -expr = Forward() - -COMPARISON_OP = oneOf("< = > >= <= != <>") -condExpr = expr + COMPARISON_OP + expr - -ifFunc = ( - CaselessKeyword("if") - - LPAR - + Group(condExpr)("condition") - + COMMA - + Group(expr)("if_true") - + COMMA - + Group(expr)("if_false") - + RPAR -) - - -def stat_function(name): - return Group(CaselessKeyword(name) + Group(LPAR + delimitedList(expr) + RPAR)) - - -sumFunc = stat_function("sum") -minFunc = stat_function("min") -maxFunc = stat_function("max") -aveFunc = stat_function("ave") -funcCall = ifFunc | sumFunc | minFunc | maxFunc | aveFunc - -multOp = oneOf("* /") -addOp = oneOf("+ -") -numericLiteral = ppc.number -operand = numericLiteral | funcCall | cellRange | cellRef -arithExpr = infixNotation( - operand, - [ - (multOp, 2, opAssoc.LEFT), - (addOp, 2, opAssoc.LEFT), - ], -) - -textOperand = dblQuotedString | cellRef -textExpr = infixNotation( - textOperand, - [ - ("&", 2, opAssoc.LEFT), - ], -) - -expr <<= arithExpr | textExpr - - -(EQ + expr).runTests( - """\ - =3*A7+5 - =3*Sheet1!$A$7+5 - =3*'Sheet 1'!$A$7+5 - =3*'O''Reilly''s sheet'!$A$7+5 - =if(Sum(A1:A25)>42,Min(B1:B25),if(Sum(C1:C25)>3.14, (Min(C1:C25)+3)*18,Max(B1:B25))) - =sum(a1:a25,10,min(b1,c2,d3)) - =if("T"&a2="TTime", "Ready", "Not ready") -""" -) diff --git a/examples/excel_expr.py b/examples/excel_expr.py new file mode 100644 index 00000000..0877e543 --- /dev/null +++ b/examples/excel_expr.py @@ -0,0 +1,93 @@ +# excelExpr.py +# +# Copyright 2010, Paul McGuire +# +# A partial implementation of a parser of Excel formula expressions. +# +import pyparsing as pp +ppc = pp.common + +pp.ParserElement.enable_packrat() + +EQ, LPAR, RPAR, COLON, COMMA = pp.Suppress.using_each("=():,") +EXCL, DOLLAR = pp.Literal.using_each("!$") +sheet_ref = pp.Word(pp.alphas, pp.alphanums) | pp.QuotedString("'", escQuote="''") +col_ref = pp.Opt(DOLLAR) + pp.Word(pp.alphas, max=2) +row_ref = pp.Opt(DOLLAR) + pp.Word(pp.nums) +cell_ref = pp.Combine( + pp.Group(pp.Opt(sheet_ref + EXCL)("sheet") + col_ref("col") + row_ref("row")) +) + +cell_range = ( + pp.Group(cell_ref("start") + COLON + cell_ref("end"))("range") + | cell_ref + | pp.Word(pp.alphas, pp.alphanums) +) + +expr = pp.Forward() + +COMPARISON_OP = pp.one_of("< = > >= <= != <>") +cond_expr = expr + COMPARISON_OP + expr + +if_func = ( + pp.CaselessKeyword("if") + - LPAR + + pp.Group(cond_expr)("condition") + + COMMA + + pp.Group(expr)("if_true") + + COMMA + + pp.Group(expr)("if_false") + + RPAR +) + + +def stat_function(name): + return pp.Group(pp.CaselessKeyword(name) + pp.Group(LPAR + pp.DelimitedList(expr) + RPAR)) + + +sum_func = stat_function("sum") +min_func = stat_function("min") +max_func = stat_function("max") +ave_func = stat_function("ave") +func_call = if_func | sum_func | min_func | max_func | ave_func + +mult_op = pp.one_of("* /") +add_op = pp.one_of("+ -") +numeric_literal = ppc.number +operand = numeric_literal | func_call | cell_range | cell_ref +arith_expr = pp.infix_notation( + operand, + [ + (mult_op, 2, pp.OpAssoc.LEFT), + (add_op, 2, pp.OpAssoc.LEFT), + ], +) + +text_operand = pp.dbl_quoted_string | cell_ref +text_expr = pp.infix_notation( + text_operand, + [ + ("&", 2, pp.OpAssoc.LEFT), + ], +) + +expr <<= arith_expr | text_expr + + +def main(): + success, report = (EQ + expr).run_tests( + """\ + =3*A7+5 + =3*Sheet1!$A$7+5 + =3*'Sheet 1'!$A$7+5 + =3*'O''Reilly''s sheet'!$A$7+5 + =if(Sum(A1:A25)>42,Min(B1:B25),if(Sum(C1:C25)>3.14, (Min(C1:C25)+3)*18,Max(B1:B25))) + =sum(a1:a25,10,min(b1,c2,d3)) + =if("T"&a2="TTime", "Ready", "Not ready") + """ + ) + assert success + + +if __name__ == '__main__': + main() diff --git a/examples/fourFn.py b/examples/fourFn.py index e448fbb8..ebf3bd6d 100644 --- a/examples/fourFn.py +++ b/examples/fourFn.py @@ -6,7 +6,7 @@ # Extended test cases, simplified pushFirst method. # Removed unnecessary expr.suppress() call (thanks Nathaniel Peterson!), and added Group # Changed fnumber to use a Regex, which is now the preferred method -# Reformatted to latest pypyparsing features, support multiple and variable args to functions +# Reformatted to latest pyparsing features, support multiple and variable args to functions # # Copyright 2003-2019 by Paul McGuire # diff --git a/examples/gen_ctypes.py b/examples/gen_ctypes.py index 176644f3..65d2b21d 100644 --- a/examples/gen_ctypes.py +++ b/examples/gen_ctypes.py @@ -44,16 +44,16 @@ "void": "None", } -LPAR, RPAR, LBRACE, RBRACE, COMMA, SEMI = map(Suppress, "(){},;") -ident = Word(alphas, alphanums + "_") +LPAR, RPAR, LBRACE, RBRACE, COMMA, SEMI = Suppress.using_each("(){},;") +ident = pyparsing_common.identifier integer = Regex(r"[+-]?\d+") hexinteger = Regex(r"0x[0-9a-fA-F]+") const = Suppress("const") -primitiveType = oneOf(t for t in typemap if not t.endswith("*")) +primitiveType = one_of(t for t in typemap if not t.endswith("*")) structType = Suppress("struct") + ident vartype = ( - Optional(const) + (primitiveType | structType | ident) + Optional(Word("*")("ptr")) + Opt(const) + (primitiveType | structType | ident) + Opt(Word("*")("ptr")) ) @@ -64,14 +64,14 @@ def normalizetype(t): # ~ return ret -vartype.setParseAction(normalizetype) +vartype.set_parse_action(normalizetype) -arg = Group(vartype("argtype") + Optional(ident("argname"))) +arg = Group(vartype("argtype") + Opt(ident("argname"))) func_def = ( vartype("fn_type") + ident("fn_name") + LPAR - + Optional(delimitedList(arg | "..."))("fn_args") + + Opt(DelimitedList(arg | "..."))("fn_args") + RPAR + SEMI ) @@ -82,7 +82,7 @@ def derivefields(t): t["varargs"] = True -func_def.setParseAction(derivefields) +func_def.set_parse_action(derivefields) fn_typedef = "typedef" + func_def var_typedef = "typedef" + primitiveType("primType") + ident("name") + SEMI @@ -90,10 +90,10 @@ def derivefields(t): enum_def = ( Keyword("enum") + LBRACE - + delimitedList(Group(ident("name") + "=" + (hexinteger | integer)("value")))( + + DelimitedList(Group(ident("name") + "=" + (hexinteger | integer)("value")))( "evalues" ) - + Optional(COMMA) + + Opt(COMMA) + RBRACE ) @@ -130,18 +130,18 @@ def typeAsCtypes(typestr): if typestr in typemap: return typemap[typestr] if typestr.endswith("*"): - return "POINTER(%s)" % typeAsCtypes(typestr.rstrip(" *")) + return f"POINTER({typeAsCtypes(typestr.rstrip(' *'))})" return typestr # scan input header text for primitive typedefs -for td, _, _ in var_typedef.scanString(c_header): +for td, _, _ in var_typedef.scan_string(c_header): typedefs.append((td.name, td.primType)) # add typedef type to typemap to map to itself typemap[td.name] = td.name # scan input header text for function typedefs -fn_typedefs = fn_typedef.searchString(c_header) +fn_typedefs = fn_typedef.search_string(c_header) # add each function typedef to typemap to map to itself for fntd in fn_typedefs: typemap[fntd.fn_name] = fntd.fn_name @@ -149,7 +149,7 @@ def typeAsCtypes(typestr): # scan input header text, and keep running list of user-defined types for fn, _, _ in ( cStyleComment.suppress() | fn_typedef.suppress() | func_def -).scanString(c_header): +).scan_string(c_header): if not fn: continue getUDType(fn.fn_type) @@ -160,8 +160,8 @@ def typeAsCtypes(typestr): functions.append(fn) # scan input header text for enums -enum_def.ignore(cppStyleComment) -for en_, _, _ in enum_def.scanString(c_header): +enum_def.ignore(cpp_style_comment) +for en_, _, _ in enum_def.scan_string(c_header): for ev in en_.evalues: enum_constants.append((ev.name, ev.value)) @@ -178,7 +178,7 @@ def typeAsCtypes(typestr): ) ) for udtype in user_defined_types: - print("class %s(Structure): pass" % typemap[udtype]) + print(f"class {typemap[udtype]}(Structure): pass") print() print("# constant definitions") @@ -192,7 +192,7 @@ def typeAsCtypes(typestr): print("{}.restype = {}".format(prefix, typeAsCtypes(fn.fn_type))) if fn.varargs: - print("# warning - %s takes variable argument list" % prefix) + print(f"# warning - {prefix} takes variable argument list") del fn.fn_args[-1] if fn.fn_args.asList() != [["void"]]: @@ -202,4 +202,4 @@ def typeAsCtypes(typestr): ) ) else: - print("%s.argtypes = ()" % (prefix)) + print(f"{prefix}.argtypes = ()") diff --git a/examples/getNTPserversNew.py b/examples/getNTPserversNew.py index 5fcd9d15..8c4c94f3 100644 --- a/examples/getNTPserversNew.py +++ b/examples/getNTPserversNew.py @@ -13,8 +13,8 @@ integer = pp.Word(pp.nums) ipAddress = ppc.ipv4_address() -hostname = pp.delimitedList(pp.Word(pp.alphas, pp.alphanums + "-_"), ".", combine=True) -tdStart, tdEnd = pp.makeHTMLTags("td") +hostname = pp.DelimitedList(pp.Word(pp.alphas, pp.alphanums + "-_"), ".", combine=True) +tdStart, tdEnd = pp.make_html_tags("td") timeServerPattern = ( tdStart + hostname("hostname") @@ -33,6 +33,6 @@ serverListHTML = serverListPage.read().decode("UTF-8") addrs = {} -for srvr, startloc, endloc in timeServerPattern.scanString(serverListHTML): - print("{} ({}) - {}".format(srvr.ipAddr, srvr.hostname.strip(), srvr.loc.strip())) +for srvr, startloc, endloc in timeServerPattern.scan_string(serverListHTML): + print(f"{srvr.ipAddr} ({srvr.hostname.strip()}) - {srvr.loc.strip()}") addrs[srvr.ipAddr] = srvr.loc diff --git a/examples/greeting.py b/examples/greeting.py index 28a534ae..17a7b2ab 100644 --- a/examples/greeting.py +++ b/examples/greeting.py @@ -8,16 +8,16 @@ import pyparsing as pp # define grammar -greet = pp.Word(pp.alphas) + "," + pp.Word(pp.alphas) + pp.oneOf("! ? .") +greet = pp.Word(pp.alphas) + "," + pp.Word(pp.alphas) + pp.one_of("! ? .") # input string hello = "Hello, World!" # parse input string -print(hello, "->", greet.parseString(hello)) +print(hello, "->", greet.parse_string(hello)) # parse a bunch of input strings -greet.runTests( +greet.run_tests( """\ Hello, World! Ahoy, Matey! diff --git a/examples/greetingInGreek.py b/examples/greetingInGreek.py index ed98e9ad..aa8272a6 100644 --- a/examples/greetingInGreek.py +++ b/examples/greetingInGreek.py @@ -15,4 +15,4 @@ hello = "Καλημέρα, κόσμε!" # parse input string -print(greet.parseString(hello)) +print(greet.parse_string(hello)) diff --git a/examples/greetingInKorean.py b/examples/greetingInKorean.py index 00ea9bc9..d2c0b634 100644 --- a/examples/greetingInKorean.py +++ b/examples/greetingInKorean.py @@ -7,14 +7,14 @@ # from pyparsing import Word, pyparsing_unicode as ppu -koreanChars = ppu.Korean.alphas -koreanWord = Word(koreanChars, min=2) +korean_chars = ppu.한국어.alphas +korean_word = Word(korean_chars, min=2) # define grammar -greet = koreanWord + "," + koreanWord + "!" +greet = korean_word + "," + korean_word + "!" # input string hello = "안녕, 여러분!" # "Hello, World!" in Korean # parse input string -print(greet.parseString(hello)) +print(greet.parse_string(hello)) diff --git a/examples/holaMundo.py b/examples/hola_mundo.py similarity index 69% rename from examples/holaMundo.py rename to examples/hola_mundo.py index bb66ca24..d44bb351 100644 --- a/examples/holaMundo.py +++ b/examples/hola_mundo.py @@ -1,67 +1,73 @@ -# escrito por Marco Alfonso, 2004 Noviembre - -# importamos los símbolos requeridos desde el módulo -from pyparsing import ( - Word, - alphas, - oneOf, - nums, - Group, - OneOrMore, - pyparsing_unicode as ppu, -) - -# usamos las letras en latin1, que incluye las como 'ñ', 'á', 'é', etc. -alphas = ppu.Latin1.alphas - -# Aqui decimos que la gramatica "saludo" DEBE contener -# una palabra compuesta de caracteres alfanumericos -# (Word(alphas)) mas una ',' mas otra palabra alfanumerica, -# mas '!' y esos seian nuestros tokens -saludo = Word(alphas) + "," + Word(alphas) + oneOf("! . ?") -tokens = saludo.parseString("Hola, Mundo !") - -# Ahora parseamos una cadena, "Hola, Mundo!", -# el metodo parseString, nos devuelve una lista con los tokens -# encontrados, en caso de no haber errores... -for i, token in enumerate(tokens): - print("Token %d -> %s" % (i, token)) - -# imprimimos cada uno de los tokens Y listooo!!, he aquí a salida -# Token 0 -> Hola -# Token 1 -> , -# Token 2-> Mundo -# Token 3 -> ! - -# ahora cambia el parseador, aceptando saludos con mas que una sola palabra antes que ',' -saludo = Group(OneOrMore(Word(alphas))) + "," + Word(alphas) + oneOf("! . ?") -tokens = saludo.parseString("Hasta mañana, Mundo !") - -for i, token in enumerate(tokens): - print("Token %d -> %s" % (i, token)) - -# Ahora parseamos algunas cadenas, usando el metodo runTests -saludo.runTests( - """\ - Hola, Mundo! - Hasta mañana, Mundo ! -""", - fullDump=False, -) - -# Por supuesto, se pueden "reutilizar" gramáticas, por ejemplo: -numimag = Word(nums) + "i" -numreal = Word(nums) -numcomplex = numreal + "+" + numimag -print(numcomplex.parseString("3+5i")) - -# Funcion para cambiar a complejo numero durante parsear: -def hace_python_complejo(t): - valid_python = "".join(t).replace("i", "j") - return complex(valid_python) - - -numcomplex.setParseAction(hace_python_complejo) -print(numcomplex.parseString("3+5i")) - -# Excelente!!, bueno, los dejo, me voy a seguir tirando código... +# escrito por Marco Alfonso, 2004 Noviembre + +# importamos los símbolos requeridos desde el módulo +from pyparsing import ( + Word, + one_of, + nums, + Group, + OneOrMore, + Opt, + pyparsing_unicode as ppu, +) + +# usamos las letras en latin1, que incluye las como 'ñ', 'á', 'é', etc. +alphas = ppu.Latin1.alphas + +# Aqui decimos que la gramatica "saludo" DEBE contener +# una palabra compuesta de caracteres alfanumericos +# (Word(alphas)) mas una ',' mas otra palabra alfanumerica, +# mas '!' y esos seian nuestros tokens +saludo = Word(alphas) + "," + Word(alphas) + one_of("! . ?") +tokens = saludo.parse_string("Hola, Mundo !") + +# Ahora parseamos una cadena, "Hola, Mundo!", +# el metodo parseString, nos devuelve una lista con los tokens +# encontrados, en caso de no haber errores... +for i, token in enumerate(tokens): + print(f"Token {i} -> {token}") + +# imprimimos cada uno de los tokens Y listooo!!, he aquí a salida +# Token 0 -> Hola +# Token 1 -> , +# Token 2-> Mundo +# Token 3 -> ! + +# ahora cambia el parseador, aceptando saludos con mas que una sola palabra antes que ',' +saludo = Group(OneOrMore(Word(alphas))) + "," + Word(alphas) + one_of("! . ?") +tokens = saludo.parse_string("Hasta mañana, Mundo !") + +for i, token in enumerate(tokens): + print(f"Token {i} -> {token}") + +# Ahora parseamos algunas cadenas, usando el metodo runTests +saludo.run_tests("""\ + Hola, Mundo! + Hasta mañana, Mundo ! + """, + fullDump=False, +) + +# Por supuesto, se pueden "reutilizar" gramáticas, por ejemplo: +numimag = Word(nums) + "i" +numreal = Word(nums) +numcomplex = numimag | numreal + Opt("+" + numimag) + +# Funcion para cambiar a complejo numero durante parsear: +def hace_python_complejo(t): + valid_python = "".join(t).replace("i", "j") + for tipo in (int, complex): + try: + return tipo(valid_python) + except ValueError: + pass + + +numcomplex.set_parse_action(hace_python_complejo) +numcomplex.run_tests("""\ + 3 + 5i + 3+5i +""") + +# Excelente!!, bueno, los dejo, me voy a seguir tirando código... diff --git a/examples/htmlStripper.py b/examples/htmlStripper.py deleted file mode 100644 index 6a209fad..00000000 --- a/examples/htmlStripper.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# htmlStripper.py -# -# Sample code for stripping HTML markup tags and scripts from -# HTML source files. -# -# Copyright (c) 2006, 2016, Paul McGuire -# -from urllib.request import urlopen -from pyparsing import ( - makeHTMLTags, - commonHTMLEntity, - replaceHTMLEntity, - htmlComment, - anyOpenTag, - anyCloseTag, - LineEnd, - replaceWith, -) - -scriptOpen, scriptClose = makeHTMLTags("script") -scriptBody = scriptOpen + scriptOpen.tag_body + scriptClose -commonHTMLEntity.setParseAction(replaceHTMLEntity) - -# get some HTML -targetURL = "https://wiki.python.org/moin/PythonDecoratorLibrary" -with urlopen(targetURL) as targetPage: - targetHTML = targetPage.read().decode("UTF-8") - -# first pass, strip out tags and translate entities -firstPass = ( - (htmlComment | scriptBody | commonHTMLEntity | anyOpenTag | anyCloseTag) - .suppress() - .transformString(targetHTML) -) - -# first pass leaves many blank lines, collapse these down -repeatedNewlines = LineEnd() * (2,) -repeatedNewlines.setParseAction(replaceWith("\n\n")) -secondPass = repeatedNewlines.transformString(firstPass) - -print(secondPass) diff --git a/examples/html_stripper.py b/examples/html_stripper.py new file mode 100644 index 00000000..92d38c75 --- /dev/null +++ b/examples/html_stripper.py @@ -0,0 +1,58 @@ +# +# html_stripper.py +# +# Sample code for stripping HTML markup tags and scripts from +# HTML source files. +# +# Copyright (c) 2006, 2016, 2023, Paul McGuire +# +from urllib.request import urlopen +from pyparsing import ( + LineEnd, + quoted_string, + make_html_tags, + common_html_entity, + replace_html_entity, + html_comment, + any_open_tag, + any_close_tag, + replace_with, +) + +# if