diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c8c79452..39b78521 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,53 +6,43 @@ on: pull_request: paths: - - .github/workflows/cis.yml + - .github/workflows/ci.yml - pyparsing/* - pyproject.toml - tox.ini +permissions: + contents: read + jobs: tests: name: Unit tests runs-on: ${{ matrix.os || 'ubuntu-latest' }} strategy: matrix: + os: ["ubuntu-latest"] + toxenv: [py, pyparsing_packaging] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] include: - - python-version: "3.6" - toxenv: py36 - - python-version: "3.7" - toxenv: py37 - - python-version: "3.8" - toxenv: py38 - - python-version: "3.9" - toxenv: py39 - - python-version: "3.10" - toxenv: py310 - - python-version: "3.10" - toxenv: py310 + - python-version: "3.11" os: macos-latest - - python-version: "pypy-3.7" - toxenv: pypy3 + - python-version: "3.11" + toxenv: mypy-test + - python-version: "pypy-3.9" env: - TOXENV: ${{ matrix.toxenv }} + TOXENV: ${{ matrix.toxenv || 'py' }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install tox codecov railroad-diagrams Jinja2 + python -m pip install tox railroad-diagrams Jinja2 - name: Test - run: tox -e ALL - - - name: Upload coverage to Codecov - if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' }} - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - run: codecov + run: tox diff --git a/CHANGES b/CHANGES index 8fabb270..b45934b2 100644 --- a/CHANGES +++ b/CHANGES @@ -2,8 +2,246 @@ Change Log ========== -Version 3.0.9 - ---------------- +NOTE: In the future release 3.2.0, use of many of the pre-PEP8 methods (such as +`ParserElement.parseString`) will start to raise `DeprecationWarnings`. 3.2.0 should +get released some time later in 2023. I currently plan to completely +drop the pre-PEP8 methods in pyparsing 4.0, though we won't see that release until +at least late 2023 if not 2024. So there is plenty of time to convert existing parsers to +the new function names before the old functions are completely removed. (Big +help from Devin J. Pohly in structuring the code to enable this peaceful transition.) + +Version 3.2.0 will also discontinue support for Python versions 3.6 and 3.7. + + +Version 3.1.1 - July, 2023 +-------------------------- +- Fixed regression in Word(min), reported by Ricardo Coccioli, good catch! (Issue #502) + +- Fixed bug in bad exception messages raised by Forward expressions. PR submitted + by Kyle Sunden, thanks for your patience and collaboration on this (#493). + +- Fixed regression in SkipTo, where ignored expressions were not checked when looking + for the target expression. Reported by catcombo, Issue #500. + +- Fixed type annotation for enable_packrat, PR submitted by Mike Urbach, thanks! (Issue #498) + +- Some general internal code cleanup. (Instigated by Michal Čihař, Issue #488) + + +Version 3.1.0 - June, 2023 +-------------------------- +- Added `tag_emitter.py` to examples. This example demonstrates how to insert + tags into your parsed results that are not part of the original parsed text. + + +Version 3.1.0b2 - May, 2023 +--------------------------- +- Updated `create_diagram()` code to be compatible with railroad-diagrams package + version 3.0. Fixes Issue #477 (railroad diagrams generated with black bars), + reported by Sam Morley-Short. + +- Fixed bug in `NotAny`, where parse actions on the negated expr were not being run. + This could cause `NotAny` to incorrectly fail if the expr would normally match, + but would fail to match if a condition used as a parse action returned False. + Fixes Issue #482, raised by byaka, thank you! + +- Fixed `create_diagram()` to accept keyword args, to be passed through to the + `template.render()` method to generate the output HTML (PR submitted by Aussie Schnore, + good catch!) + +- Fixed bug in `python_quoted_string` regex. + +- Added `examples/bf.py` Brainf*ck parser/executor example. Illustrates using + a pyparsing grammar to parse language syntax, and attach executable AST nodes to + the parsed results. + + +Version 3.1.0b1 - April, 2023 +----------------------------- +- Added support for Python 3.12. + +- API CHANGE: A slight change has been implemented when unquoting a quoted string + parsed using the `QuotedString` class. Formerly, when unquoting and processing + whitespace markers such as \t and \n, these substitutions would occur first, and + then any additional '\' escaping would be done on the resulting string. This would + parse "\\n" as "\". Now escapes and whitespace markers are all processed + in a single pass working left to right, so the quoted string "\\n" would get unquoted + to "\n" (a backslash followed by "n"). Fixes issue #474 raised by jakeanq, + thanks! + +- Added named field "url" to `pyparsing.common.url`, returning the entire + parsed URL string. + +- Fixed bug when parse actions returned an empty string for an expression that + had a results name, that the results name was not saved. That is: + + expr = Literal("X").add_parse_action(lambda tokens: "")("value") + result = expr.parse_string("X") + print(result["value"]) + + would raise a `KeyError`. Now empty strings will be saved with the associated + results name. Raised in Issue #470 by Nicco Kunzmann, thank you. + +- Fixed bug in `SkipTo` where ignore expressions were not properly handled while + scanning for the target expression. Issue #475, reported by elkniwt, thanks + (this bug has been there for a looooong time!). + +- Updated `ci.yml` permissions to limit default access to source - submitted by Joyce + Brum of Google. Thanks so much! + +- Updated the `lucene_grammar.py` example (better support for '*' and '?' wildcards) + and corrected the test cases - brought to my attention by Elijah Nicol, good catch! + + +Version 3.1.0a1 - March, 2023 +----------------------------- +- API ENHANCEMENT: `Optional(expr)` may now be written as `expr | ""` + + This will make this code: + + "{" + Optional(Literal("A") | Literal("a")) + "}" + + writable as: + + "{" + (Literal("A") | Literal("a") | "") + "}" + + Some related changes implemented as part of this work: + - `Literal("")` now internally generates an `Empty()` (and no longer raises an exception) + - `Empty` is now a subclass of `Literal` + + Suggested by Antony Lee (issue #412), PR (#413) by Devin J. Pohly. + +- Added new class property `identifier` to all Unicode set classes in `pyparsing.unicode`, + using the class's values for `cls.identchars` and `cls.identbodychars`. Now Unicode-aware + parsers that formerly wrote: + + ppu = pyparsing.unicode + ident = Word(ppu.Greek.identchars, ppu.Greek.identbodychars) + + can now write: + + ident = ppu.Greek.identifier + # or + # ident = ppu.Ελληνικά.identifier + +- `ParseResults` now has a new method `deepcopy()`, in addition to the current + `copy()` method. `copy()` only makes a shallow copy - any contained `ParseResults` + are copied as references - changes in the copy will be seen as changes in the original. + In many cases, a shallow copy is sufficient, but some applications require a deep copy. + `deepcopy()` makes a deeper copy: any contained `ParseResults` or other mappings or + containers are built with copies from the original, and do not get changed if the + original is later changed. Addresses issue #463, reported by Bryn Pickering. + +- Reworked `delimited_list` function into the new `DelimitedList` class. + `DelimitedList` has the same constructor interface as `delimited_list`, and + in this release, `delimited_list` changes from a function to a synonym for + `DelimitedList`. `delimited_list` and the older `delimitedList` method will be + deprecated in a future release, in favor of `DelimitedList`. + +- Error messages from `MatchFirst` and `Or` expressions will try to give more details + if one of the alternatives matches better than the others, but still fails. + Question raised in Issue #464 by msdemlei, thanks! + +- Added new class method `ParserElement.using_each`, to simplify code + that creates a sequence of `Literals`, `Keywords`, or other `ParserElement` + subclasses. + + For instance, to define suppressible punctuation, you would previously + write: + + LPAR, RPAR, LBRACE, RBRACE, SEMI = map(Suppress, "(){};") + + You can now write: + + LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") + + `using_each` will also accept optional keyword args, which it will + pass through to the class initializer. Here is an expression for + single-letter variable names that might be used in an algebraic + expression: + + algebra_var = MatchFirst( + Char.using_each(string.ascii_lowercase, as_keyword=True) + ) + +- Added new builtin `python_quoted_string`, which will match any form + of single-line or multiline quoted strings defined in Python. (Inspired + by discussion with Andreas Schörgenhumer in Issue #421.) + +- Extended `expr[]` notation for repetition of `expr` to accept a + slice, where the slice's stop value indicates a `stop_on` + expression: + + test = "BEGIN aaa bbb ccc END" + BEGIN, END = Keyword.using_each("BEGIN END".split()) + body_word = Word(alphas) + + expr = BEGIN + Group(body_word[...:END]) + END + # equivalent to + # expr = BEGIN + Group(ZeroOrMore(body_word, stop_on=END)) + END + + print(expr.parse_string(test)) + + Prints: + + ['BEGIN', ['aaa', 'bbb', 'ccc'], 'END'] + +- `ParserElement.validate()` is deprecated. It predates the support for left-recursive + parsers, and was prone to false positives (warning that a grammar was invalid when + it was in fact valid). It will be removed in a future pyparsing release. In its + place, developers should use debugging and analytical tools, such as `ParserElement.set_debug()` + and `ParserElement.create_diagram()`. + (Raised in Issue #444, thanks Andrea Micheli!) + +- Added bool `embed` argument to `ParserElement.create_diagram()`. + When passed as True, the resulting diagram will omit the ``, + ``, and `` tags so that it can be embedded in other + HTML source. (Useful when embedding a call to `create_diagram()` in + a PyScript HTML page.) + +- Added `recurse` argument to `ParserElement.set_debug` to set the + debug flag on an expression and all of its sub-expressions. Requested + by multimeric in Issue #399. + +- Added '·' (Unicode MIDDLE DOT) to the set of Latin1.identbodychars. + +- Fixed bug in `Word` when `max=2`. Also added performance enhancement + when specifying `exact` argument. Reported in issue #409 by + panda-34, nice catch! + +- `Word` arguments are now validated if `min` and `max` are both + given, that `min` <= `max`; raises `ValueError` if values are invalid. + +- Fixed bug in srange, when parsing escaped '/' and '\' inside a + range set. + +- Fixed exception messages for some `ParserElements` with custom names, + which instead showed their contained expression names. + +- Fixed bug in pyparsing.common.url, when input URL is not alone + on an input line. Fixes Issue #459, reported by David Kennedy. + +- Multiple added and corrected type annotations. With much help from + Stephen Rosen, thanks! + +- Some documentation and error message clarifications on pyparsing's + keyword logic, cited by Basil Peace. + +- General docstring cleanup for Sphinx doc generation, PRs submitted + by Devin J. Pohly. A dirty job, but someone has to do it - much + appreciated! + +- `invRegex.py` example renamed to `inv_regex.py` and updated to PEP-8 + variable and method naming. PR submitted by Ross J. Duff, thanks! + +- Removed examples `sparser.py` and `pymicko.py`, since each included its + own GPL license in the header. Since this conflicts with pyparsing's + MIT license, they were removed from the distribution to avoid + confusion among those making use of them in their own projects. + + +Version 3.0.9 - May, 2022 +------------------------- - Added Unicode set `BasicMultilingualPlane` (may also be referenced as `BMP`) representing the Basic Multilingual Plane (Unicode characters up to code point 65535). Can be used to parse @@ -12,7 +250,7 @@ Version 3.0.9 - - To address mypy confusion of `pyparsing.Optional` and `typing.Optional` resulting in `error: "_SpecialForm" not callable` message - reported in issue #365, fixed the import in exceptions.py. Nice + reported in issue #365, fixed the import in `exceptions.py`. Nice sleuthing by Iwan Aucamp and Dominic Davis-Foster, thank you! (Removed definitions of `OptionalType`, `DictType`, and `IterableType` and replaced them with `typing.Optional`, `typing.Dict`, and @@ -24,13 +262,13 @@ Version 3.0.9 - - Removed use of deprecated `pkg_resources` package in railroad diagramming code (issue #391). -- Updated bigquery_view_parser.py example to parse examples at +- Updated `bigquery_view_parser.py` example to parse examples at https://cloud.google.com/bigquery/docs/reference/legacy-sql -Version 3.0.8 - ---------------- -- API CHANGE: modified pyproject.toml to require Python version +Version 3.0.8 - April, 2022 +--------------------------- +- API CHANGE: modified `pyproject.toml` to require Python version 3.6.8 or later for pyparsing 3.x. Earlier minor versions of 3.6 fail in evaluating the `version_info` class (implemented using `typing.NamedTuple`). If you are using an earlier version of Python @@ -39,7 +277,7 @@ Version 3.0.8 - - Improved pyparsing import time by deferring regex pattern compiles. PR submitted by Anthony Sottile to fix issue #362, thanks! -- Updated build to use flit, PR by Michał Górny, added BUILDING.md +- Updated build to use flit, PR by Michał Górny, added `BUILDING.md` doc and removed old Windows build scripts - nice cleanup work! - More type-hinting added for all arithmetic and logical operator @@ -65,8 +303,8 @@ Version 3.0.8 - Serhiy Storchaka, thank you. -Version 3.0.7 - ---------------- +Version 3.0.7 - January, 2022 +----------------------------- - Fixed bug #345, in which delimitedList changed expressions in place using `expr.streamline()`. Reported by Kim Gräsman, thanks! @@ -130,8 +368,8 @@ Version 3.0.7 - - Additional type annotations on public methods. -Version 3.0.6 - ---------------- +Version 3.0.6 - November, 2021 +------------------------------ - Added `suppress_warning()` method to individually suppress a warning on a specific ParserElement. Used to refactor `original_text_for` to preserve internal results names, which, while undocumented, had been adopted by @@ -141,8 +379,8 @@ Version 3.0.6 - parse expression. -Version 3.0.5 - ---------------- +Version 3.0.5 - November, 2021 +------------------------------ - Added return type annotations for `col`, `line`, and `lineno`. - Fixed bug when `warn_ungrouped_named_tokens_in_collection` warning was raised @@ -157,8 +395,8 @@ Version 3.0.5 - minor bug where separating line was not included after a test failure. -Version 3.0.4 - ---------------- +Version 3.0.4 - October, 2021 +----------------------------- - Fixed bug in which `Dict` classes did not correctly return tokens as nested `ParseResults`, reported by and fix identified by Bu Sun Kim, many thanks!!! @@ -186,8 +424,8 @@ Version 3.0.4 - elements. -Version 3.0.3 - ---------------- +Version 3.0.3 - October, 2021 +----------------------------- - Fixed regex typo in `one_of` fix for `as_keyword=True`. - Fixed a whitespace-skipping bug, Issue #319, introduced as part of the revert @@ -198,8 +436,8 @@ Version 3.0.3 - are longer than others. -Version 3.0.2 - ---------------- +Version 3.0.2 - October, 2021 +----------------------------- - Reverted change in behavior with `LineStart` and `StringStart`, which changed the interpretation of when and how `LineStart` and `StringStart` should match when a line starts with spaces. In 3.0.0, the `xxxStart` expressions were not @@ -233,8 +471,8 @@ Version 3.0.2 - the `IndentedBlock` with `grouped=False`. -Version 3.0.1 - ---------------- +Version 3.0.1 - October, 2021 +----------------------------- - Fixed bug where `Word(max=n)` did not match word groups less than length 'n'. Thanks to Joachim Metz for catching this! @@ -245,15 +483,15 @@ Version 3.0.1 - even when not enabled. -Version 3.0.0 - ---------------- +Version 3.0.0 - October, 2021 +----------------------------- - A consolidated list of all the changes in the 3.0.0 release can be found in `docs/whats_new_in_3_0_0.rst`. (https://github.com/pyparsing/pyparsing/blob/master/docs/whats_new_in_3_0_0.rst) -Version 3.0.0.final - ---------------------- +Version 3.0.0.final - October, 2021 +----------------------------------- - Added support for python `-W` warning option to call `enable_all_warnings`() at startup. Also detects setting of `PYPARSINGENABLEALLWARNINGS` environment variable to any non-blank value. (If using `-Wd` for testing, but wishing to disable pyparsing warnings, add @@ -290,8 +528,8 @@ Version 3.0.0.final - `a` will get named "a", while `b` will keep its name "bbb". -Version 3.0.0rc2 - ------------------- +Version 3.0.0rc2 - October, 2021 +-------------------------------- - Added `url` expression to `pyparsing_common`. (Sample code posted by Wolfgang Fahl, very nice!) @@ -904,8 +1142,8 @@ Version 3.0.0a1 - April, 2020 a few. -Version 2.4.7 - March, 2020 (April, actually) ---------------------------------------------- +Version 2.4.7 - April, 2020 +--------------------------- - Backport of selected fixes from 3.0.0 work: . Each bug with Regex expressions . And expressions not properly constructing with generator diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2fd54094..a2da1a57 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -34,6 +34,45 @@ If you have a question on using pyparsing, there are a number of resources avail other open and closed issues. Or post your question on SO or reddit. But don't wait until you are desperate and frustrated - just ask! :) +## Submitting examples + +If you have an example you wish to submit, please follow these guidelines. + +- **License - Submitted example code must be available for distribution with the rest of pyparsing under the MIT + open source license.** + +- Please follow PEP8 name and coding guidelines, and use the black formatter + to auto-format code. + +- Examples should import pyparsing and the common namespace classes as: + + import pyparsing as pp + # if necessary + ppc = pp.pyparsing_common + ppu = pp.pyparsing_unicode + +- Submitted examples *must* be Python 3.6.8 or later compatible. (It is acceptable if examples use Python + features added after 3.6) + +- Where possible use operators to create composite parse expressions: + + expr = expr_a + expr_b | expr_c + + instead of: + + expr = pp.MatchFirst([pp.And([expr_a, expr_b]), expr_c]) + + Exception: if using a generator to create an expression: + + import keyword + python_keywords = keyword.kwlist + any_keyword = pp.MatchFirst(pp.Keyword(kw) + for kw in python_keywords)) + +- Learn [Common Pitfalls When Writing Parsers](https://github.com/pyparsing/pyparsing/wiki/Common-Pitfalls-When-Writing-Parsers) and + how to avoid them when developing new examples. + +- See additional notes under [Some Coding Points](#some-coding-points). ## Submitting changes @@ -73,10 +112,6 @@ These coding styles are encouraged whether submitting code for core pyparsing or applications - DO NOT MODIFY OR REMOVE THESE NAMES. See more information at the [PEP8 wiki page](https://github.com/pyparsing/pyparsing/wiki/PEP-8-planning). - If you wish to submit a new example, please follow PEP8 name and coding guidelines, and use the black formatter - to auto-format code. Example code must be available for distribution with the rest of pyparsing under the MIT - open source license. - - No backslashes for line continuations. Continuation lines for expressions in ()'s should start with the continuing operator: @@ -94,33 +129,6 @@ These coding styles are encouraged whether submitting code for core pyparsing or - List, tuple, and dict literals should include a trailing comma after the last element, which reduces changeset clutter when another element gets added to the end. -- Examples should import pyparsing and the common namespace classes as: - - import pyparsing as pp - # if necessary - ppc = pp.pyparsing_common - ppu = pp.pyparsing_unicode - - Submitted examples *must* be Python 3.6.8 or later compatible. - -- Where possible use operators to create composite parse expressions: - - expr = expr_a + expr_b | expr_c - - instead of: - - expr = pp.MatchFirst([pp.And([expr_a, expr_b]), expr_c]) - - Exception: if using a generator to create an expression: - - import keyword - python_keywords = keyword.kwlist - any_keyword = pp.MatchFirst(pp.Keyword(kw) - for kw in python_keywords)) - -- Learn [Common Pitfalls When Writing Parsers](https://github.com/pyparsing/pyparsing/wiki/Common-Pitfalls-When-Writing-Parsers) and - how to avoid them when developing new examples. - - New features should be accompanied by updates to unitTests.py and a bullet in the CHANGES file. - Do not modify pyparsing_archive.py. This file is kept as a reference artifact from when pyparsing was distributed diff --git a/README.rst b/README.rst index f51c9ddd..24d603c7 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ PyParsing -- A Python Parsing Module ==================================== -|Build Status| |Coverage| +|Version| |Build Status| |Coverage| |License| |Python Versions| |Snyk Score| Introduction ============ @@ -63,7 +63,7 @@ entire directory of examples can be found `here `__ file. +MIT License. See header of the `pyparsing __init__.py `__ file. History ======= @@ -72,5 +72,22 @@ See `CHANGES `__ fil .. |Build Status| image:: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml/badge.svg :target: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml + .. |Coverage| image:: https://codecov.io/gh/pyparsing/pyparsing/branch/master/graph/badge.svg :target: https://codecov.io/gh/pyparsing/pyparsing + +.. |Version| image:: https://img.shields.io/pypi/v/pyparsing?style=flat-square + :target: https://pypi.org/project/pyparsing/ + :alt: Version + +.. |License| image:: https://img.shields.io/pypi/l/pyparsing.svg?style=flat-square + :target: https://pypi.org/project/pyparsing/ + :alt: License + +.. |Python Versions| image:: https://img.shields.io/pypi/pyversions/pyparsing.svg?style=flat-square + :target: https://pypi.org/project/python-liquid/ + :alt: Python versions + +.. |Snyk Score| image:: https://snyk.io//advisor/python/pyparsing/badge.svg + :target: https://snyk.io//advisor/python/pyparsing + :alt: pyparsing diff --git a/docs/HowToUsePyparsing.rst b/docs/HowToUsePyparsing.rst index fb28b7d9..3dc1725d 100644 --- a/docs/HowToUsePyparsing.rst +++ b/docs/HowToUsePyparsing.rst @@ -5,10 +5,10 @@ Using the pyparsing module :author: Paul McGuire :address: ptmcg.pm+pyparsing@gmail.com -:revision: 3.0.0 -:date: October, 2021 +:revision: 3.1.1 +:date: July, 2023 -:copyright: Copyright |copy| 2003-2022 Paul McGuire. +:copyright: Copyright |copy| 2003-2023 Paul McGuire. .. |copy| unicode:: 0xA9 @@ -36,13 +36,13 @@ directory of the pyparsing GitHub repo. **Note**: *In pyparsing 3.0, many method and function names which were originally written using camelCase have been converted to PEP8-compatible snake_case. So ``parseString()`` is being renamed to ``parse_string()``, -``delimitedList`` to ``delimited_list``, and so on. You may see the old +``delimitedList`` to DelimitedList_, and so on. You may see the old names in legacy parsers, and they will be supported for a time with synonyms, but the synonyms will be removed in a future release.* *If you are using this documentation, but working with a 2.4.x version of pyparsing, you'll need to convert methods and arguments from the documented snake_case -names to the legacy camelCase names. In pyparsing 3.0.x, both forms are +names to the legacy camelCase names. In pyparsing 3.0.x and 3.1.x, both forms are supported, but the legacy forms are deprecated; they will be dropped in a future release.* @@ -58,8 +58,8 @@ To parse an incoming data string, the client code must follow these steps: this to a program variable. Optional results names or parse actions can also be defined at this time. -2. Call ``parse_string()`` or ``scan_string()`` on this variable, passing in - the string to +2. Call ``parse_string()``, ``scan_string()``, or ``search_string()`` + on this variable, passing in the string to be parsed. During the matching process, whitespace between tokens is skipped by default (although this can be changed). When token matches occur, any defined parse action methods are @@ -182,7 +182,7 @@ Usage notes - ``expr[... ,n]`` is equivalent to ``expr*(0, n)`` (read as "0 to n instances of expr") - - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` + - ``expr[...]``, ``expr[0, ...]`` and ``expr * ...`` are equivalent to ``ZeroOrMore(expr)`` - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` @@ -192,6 +192,11 @@ Usage notes occurrences. If this behavior is desired, then write ``expr[..., n] + ~expr``. +- ``[]`` notation will also accept a stop expression using ':' slice + notation: + + - ``expr[...:end_expr]`` is equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` + - MatchFirst_ expressions are matched left-to-right, and the first match found will skip all later expressions within, so be sure to define less-specific patterns after more-specific patterns. @@ -229,7 +234,7 @@ Usage notes - Punctuation may be significant for matching, but is rarely of much interest in the parsed results. Use the ``suppress()`` method to keep these tokens from cluttering up your returned lists of - tokens. For example, ``delimited_list()`` matches a succession of + tokens. For example, DelimitedList_ matches a succession of one or more expressions, separated by delimiters (commas by default), but only returns a list of the actual expressions - the delimiters are used for parsing, but are suppressed from the @@ -263,6 +268,9 @@ Usage notes Classes ======= +All the pyparsing classes can be found in this +`UML class diagram <_static/pyparsingClassDiagram_3.0.9.jpg>`_. + Classes in the pyparsing module ------------------------------- @@ -348,10 +356,12 @@ methods for code to use are: ^ FAIL: Expected numeric digits, found end of text (at char 4), (line:1, col:5) +.. _set_results_name: + - ``set_results_name(string, list_all_matches=False)`` - name to be given to tokens matching the element; if multiple tokens within - a repetition group (such as ``ZeroOrMore`` or ``delimited_list``) the + a repetition group (such as ZeroOrMore_ or DelimitedList_) the default is to return only the last matching token - if ``list_all_matches`` is set to True, then a list of all the matching tokens is returned. @@ -449,11 +459,13 @@ methods for code to use are: repeatedly to specify multiple expressions; useful to specify patterns of comment syntax, for example -- ``set_debug(debug_flag=True)`` - function to enable/disable tracing output +- ``set_debug(flag=True)`` - function to enable/disable tracing output when trying to match this element - ``validate()`` - function to verify that the defined grammar does not - contain infinitely recursive constructs + contain infinitely recursive constructs (``validate()`` is deprecated, and + will be removed in a future pyparsing release. Pyparsing now supports + left-recursive parsers, which this function attempted to catch.) .. _parse_with_tabs: @@ -495,7 +507,7 @@ Basic ParserElement subclasses defined keyword - ``CaselessKeyword`` - similar to Keyword_, but with caseless matching - behavior + behavior as described in CaselessLiteral_. .. _Word: @@ -535,16 +547,21 @@ Basic ParserElement subclasses - ``max`` - indicating a maximum length of matching characters - - ``exact`` - indicating an exact length of matching characters + - ``exact`` - indicating an exact length of matching characters; + if ``exact`` is specified, it will override any values for ``min`` or ``max`` + + - ``as_keyword`` - indicating that preceding and following characters must + be whitespace or non-keyword characters - If ``exact`` is specified, it will override any values for ``min`` or ``max``. + - ``exclude_chars`` - a string of characters that should be excluded from + init_chars and body_chars - Sometimes you want to define a word using all - characters in a range except for one or two of them; you can do this - with the new ``exclude_chars`` argument. This is helpful if you want to define - a word with all ``printables`` except for a single delimiter character, such - as '.'. Previously, you would have to create a custom string to pass to Word. - With this change, you can just create ``Word(printables, exclude_chars='.')``. + Sometimes you want to define a word using all + characters in a range except for one or two of them; you can do this + with the ``exclude_chars`` argument. This is helpful if you want to define + a word with all ``printables`` except for a single delimiter character, such + as '.'. Previously, you would have to create a custom string to pass to Word. + With this change, you can just create ``Word(printables, exclude_chars='.')``. - ``Char`` - a convenience form of ``Word`` that will match just a single character from a string of matching characters:: @@ -675,17 +692,37 @@ Expression subclasses parse element is not found in the input string; parse action will only be called if a match is found, or if a default is specified. + An optional element ``expr`` can also be expressed using ``expr | ""``. + (``Opt`` was formerly named ``Optional``, but since the standard Python library module ``typing`` now defines ``Optional``, the pyparsing class has been renamed to ``Opt``. A compatibility synonym ``Optional`` is defined, but will be removed in a future release.) +.. _ZeroOrMore: + - ``ZeroOrMore`` - similar to ``Opt``, but can be repeated; ``ZeroOrMore(expr)`` can also be written as ``expr[...]``. -- ``OneOrMore`` - similar to ``ZeroOrMore``, but at least one match must +.. _OneOrMore: + +- ``OneOrMore`` - similar to ZeroOrMore_, but at least one match must be present; ``OneOrMore(expr)`` can also be written as ``expr[1, ...]``. +.. _DelimitedList: + +- ``DelimitedList`` - used for + matching one or more occurrences of ``expr``, separated by ``delim``. + By default, the delimiters are suppressed, so the returned results contain + only the separate list elements. Can optionally specify ``combine=True``, + indicating that the expressions and delimiters should be returned as one + combined value (useful for scoped variables, such as ``"a.b.c"``, or + ``"a::b::c"``, or paths such as ``"a/b/c"``). Can also optionally specify ``min` and ``max`` + restrictions on the length of the list, and + ``allow_trailing_delim`` to accept a trailing delimiter at the end of the list. + +.. _FollowedBy: + - ``FollowedBy`` - a lookahead expression, requires matching of the given expressions, but does not advance the parsing position within the input string @@ -769,7 +806,7 @@ Special subclasses ------------------ - ``Group`` - causes the matched tokens to be enclosed in a list; - useful in repeated elements like ``ZeroOrMore`` and ``OneOrMore`` to + useful in repeated elements like ZeroOrMore_ and OneOrMore_ to break up matched tokens into groups for each repeated pattern - ``Dict`` - like ``Group``, but also constructs a dictionary, using the @@ -813,7 +850,8 @@ Other classes ['abc', ['100', '200', '300'], 'end'] If the ``Group`` is constructed using ``aslist=True``, the resulting tokens - will be a Python list instead of a ParseResults_. + will be a Python list instead of a ParseResults_. In this case, the returned value will + no longer support the extended features or methods of a ParseResults_. - as a dictionary @@ -825,8 +863,9 @@ Other classes input text - in addition to ParseResults_ listed as ``[ [ a1, b1, c1, ...], [ a2, b2, c2, ...] ]`` it also acts as a dictionary with entries defined as ``{ a1 : [ b1, c1, ... ] }, { a2 : [ b2, c2, ... ] }``; this is especially useful when processing tabular data where the first column contains a key - value for that line of data; when constructed with ``aslist=True``, will - return an actual Python ``dict`` instead of a ParseResults_. + value for that line of data; when constructed with ``asdict=True``, will + return an actual Python ``dict`` instead of a ParseResults_. In this case, the returned value will + no longer support the extended features or methods of a ParseResults_. - list elements that are deleted using ``del`` will still be accessible by their dictionary keys @@ -858,6 +897,10 @@ Other classes (The ``pprint`` module is especially good at printing out the nested contents given by ``as_list()``.) + If a ParseResults_ is built with expressions that use results names (see _set_results_name) or + using the ``Dict`` class, then those names and values can be extracted as a Python + dict using ``as_dict()``. + Finally, ParseResults_ can be viewed by calling ``dump()``. ``dump()`` will first show the ``as_list()`` output, followed by an indented structure listing parsed tokens that have been assigned results names. @@ -1009,15 +1052,6 @@ Miscellaneous attributes and methods Helper methods -------------- -- ``delimited_list(expr, delim=',')`` - convenience function for - matching one or more occurrences of expr, separated by delim. - By default, the delimiters are suppressed, so the returned results contain - only the separate list elements. Can optionally specify ``combine=True``, - indicating that the expressions and delimiters should be returned as one - combined value (useful for scoped variables, such as ``"a.b.c"``, or - ``"a::b::c"``, or paths such as ``"a/b/c"``). Can also optionally specify - ``allow_trailing_delim`` to accept a trailing delimiter at the end of the list. - - ``counted_array(expr)`` - convenience function for a pattern where an list of instances of the given expression are preceded by an integer giving the count of elements in the list. Returns an expression that parses the leading integer, @@ -1247,9 +1281,9 @@ Helper parse actions ``ParseException`` if matching at a different column number; useful when parsing tabular data -- ``common.convert_to_integer()`` - converts all matched tokens to uppercase +- ``common.convert_to_integer()`` - converts all matched tokens to int -- ``common.convert_to_float()`` - converts all matched tokens to uppercase +- ``common.convert_to_float()`` - converts all matched tokens to float - ``common.convert_to_date()`` - converts matched token to a datetime.date @@ -1275,6 +1309,19 @@ Common string and token constants ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ +.. _identchars: + +- ``identchars`` - a string containing characters that are valid as initial identifier characters:: + + ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzª + µºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ + +- ``identbodychars`` - a string containing characters that are valid as identifier body characters (those following a + valid leading identifier character as given in identchars_):: + + 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzª + µ·ºÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ + - ``printables`` - same as ``string.printable``, minus the space (``' '``) character - ``empty`` - a global ``Empty()``; will always match @@ -1287,13 +1334,15 @@ Common string and token constants - ``quoted_string`` - ``sgl_quoted_string | dbl_quoted_string`` +- ``python_quoted_string`` - ``quoted_string | multiline quoted string`` + - ``c_style_comment`` - a comment block delimited by ``'/*'`` and ``'*/'`` sequences; can span multiple lines, but does not support nesting of comments - ``html_comment`` - a comment block delimited by ``''`` sequences; can span multiple lines, but does not support nesting of comments -- ``comma_separated_list`` - similar to ``delimited_list``, except that the +- ``comma_separated_list`` - similar to DelimitedList_, except that the list expressions can be any text value, or a quoted string; quoted strings can safely include commas without incorrectly breaking the string into two tokens @@ -1355,21 +1404,21 @@ Unicode set Alternate names Description -------------------------- ----------------- ------------------------------------------------ Arabic العربية Chinese 中文 +CJK Union of Chinese, Japanese, and Korean sets Cyrillic кириллица +Devanagari देवनागरी Greek Ελληνικά +Hangul Korean, 한국어 Hebrew עִברִית Japanese 日本語 Union of Kanji, Katakana, and Hiragana sets +Japanese.Hiragana ひらがな Japanese.Kanji 漢字 Japanese.Katakana カタカナ -Japanese.Hiragana ひらがな -Hangul Korean, 한국어 Latin1 All Unicode characters up to code point 255 LatinA LatinB Thai ไทย -Devanagari देवनागरी BasicMultilingualPlane BMP All Unicode characters up to code point 65535 -CJK Union of Chinese, Japanese, and Korean sets ========================== ================= ================================================ The base ``unicode`` class also includes definitions based on all Unicode code points up to ``sys.maxunicode``. This @@ -1396,13 +1445,31 @@ Create your parser as you normally would. Then call ``create_diagram()``, passin This will result in the railroad diagram being written to ``street_address_diagram.html``. -Diagrams usually will vertically wrap expressions containing more than 3 terms. You can override this by -passing the `vertical` argument to `create_diagram` with a larger value. +`create_diagrams` takes the following arguments: + +- ``output_html`` (str or file-like object) - output target for generated diagram HTML + +- ``vertical`` (int) - threshold for formatting multiple alternatives vertically instead of horizontally (default=3) + +- ``show_results_names`` - bool flag whether diagram should show annotations for defined results names + +- ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box + +- ``embed`` - bool flag whether generated HTML should omit , , and tags to embed + the resulting HTML in an enclosing HTML source (such as PyScript HTML) + +- ``head`` - str containing additional HTML to insert into the section of the generated code; + can be used to insert custom CSS styling + +- ``body`` - str containing additional HTML to insert at the beginning of the section of the + generated code + Example ------- You can view an example railroad diagram generated from `a pyparsing grammar for -SQL SELECT statements <_static/sql_railroad.html>`_. +SQL SELECT statements <_static/sql_railroad.html>`_ (generated from +`examples/select_parser.py <../examples/select_parser.py>`_). Naming tip ---------- diff --git a/docs/_static/pyparsingClassDiagram_3.0.0.jpg b/docs/_static/pyparsingClassDiagram_3.0.0.jpg deleted file mode 100644 index f65e5f1a..00000000 Binary files a/docs/_static/pyparsingClassDiagram_3.0.0.jpg and /dev/null differ diff --git a/docs/_static/pyparsingClassDiagram_3.0.9.jpg b/docs/_static/pyparsingClassDiagram_3.0.9.jpg new file mode 100644 index 00000000..d92feed4 Binary files /dev/null and b/docs/_static/pyparsingClassDiagram_3.0.9.jpg differ diff --git a/docs/conf.py b/docs/conf.py index ce571f9b..5f5bd8a0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ # -- Project information ----------------------------------------------------- project = "PyParsing" -copyright = "2018-2021, Paul T. McGuire" +copyright = "2018-2022, Paul T. McGuire" author = "Paul T. McGuire" # The short X.Y version diff --git a/docs/make_sphinx_docs.bat b/docs/make_sphinx_docs.bat new file mode 100644 index 00000000..341fd671 --- /dev/null +++ b/docs/make_sphinx_docs.bat @@ -0,0 +1 @@ +sphinx-build.exe -M html . _build diff --git a/docs/pyparsing_class_diagram.puml b/docs/pyparsing_class_diagram.puml index cf8d1ebb..f90f99e2 100644 --- a/docs/pyparsing_class_diagram.puml +++ b/docs/pyparsing_class_diagram.puml @@ -22,7 +22,6 @@ class globals { quoted_string sgl_quoted_string dbl_quoted_string -delimited_list() counted_array() match_previous_literal() match_previous_expr() @@ -185,6 +184,7 @@ class Each class OneOrMore class ZeroOrMore +class DelimitedList class SkipTo class Group class Forward { @@ -246,6 +246,7 @@ ParseElementEnhance <|-- Located ParseElementEnhance <|--- _MultipleMatch _MultipleMatch <|-- OneOrMore _MultipleMatch <|-- ZeroOrMore +ParseElementEnhance <|-- DelimitedList ParseElementEnhance <|--- NotAny ParseElementEnhance <|--- FollowedBy ParseElementEnhance <|--- PrecededBy diff --git a/docs/pyparsing_class_diagram.svg b/docs/pyparsing_class_diagram.svg deleted file mode 100644 index 9a9e7ac3..00000000 --- a/docs/pyparsing_class_diagram.svg +++ /dev/null @@ -1,836 +0,0 @@ -corecommonunicodeglobalsquoted_stringsgl_quoted_stringdbl_quoted_stringcommon_html_entityclass OpAssocclass IndentedBlockc_style_commenthtml_commentrest_of_linedbl_slash_commentcpp_style_commentjava_style_commentpython_style_commentdelimited_list()counted_array()match_previous_literal()match_previous_expr()one_of()dict_of()original_text_for()ungroup()nested_expr()make_html_tags()make_xml_tags()replace_html_entity()infix_notation()match_only_at_col()replace_with()remove_quotes()with_attribute()with_class()trace_parse_action()condition_as_parse_action()srange()token_map()autoname_elements()ParseResultsclass Listfrom_dict()__getitem__()__setitem__()__contains__()__len__()__bool__()__iter__()__reversed__()__getattr__()__add__()__getstate__()__setstate__()__getnewargs__()__dir__()as_dict()as_list()dump()get_name()items()keys()values()haskeys()pop()get()insert()append()extend()clear()copy()get_name()pprint()ParseBaseExceptionlinelinenocolumnparser_elementexplain_exception()explain()mark_input_line()ParseExceptionParseFatalExceptionParseSyntaxExceptionParserElementname: strresults_name: strenable_packrat()enable_left_recursion()disable_memoization()set_default_whitespace_chars()inline_literals_using()reset_cache() verbose_stacktrace operator + () -> Andoperator - () -> And.ErrorStopoperator | () -> MatchFirstoperator ^ () -> Oroperator & () -> Eachoperator ~ () -> NotAnyoperator [] () -> _MultipleMatchadd_condition()add_parse_action()set_parse_action()copy()ignore(expr)leave_whitespace()parse_with_tabs()suppress()set_break()set_debug()set_debug_actions()set_name()set_results_name()parse_string()scan_string()search_string()transform_string()split()run_tests()recurse()create_diagram()TokenParseExpressionexprs: list[ParserElement]ParseElementEnhanceexpr: ParserElement_PositionTokenCharWhiteWordKeywordset_default_keyword_chars(chars: str)CaselessKeywordEmptyLiteralRegexNoMatchCharsNotInQuotedStringAndOrMatchFirstEachOneOrMoreZeroOrMoreSkipToGroupForwardoperator <<= ()LineStartLineEndStringStartStringEndWordStartWordEnd_MultipleMatchFollowedByPrecededByAtLineStartAtStringStartTokenConverterLocatedOptCombineDictSuppressCloseMatchCaselessLiteralNotAny comma_separated_listintegerhex_integersigned_integerfractionmixed_integerrealsci_realnumberfnumberidentifieripv4_addressipv6_addressmac_addressiso8601_dateiso8601_datetimeuuidurlconvert_to_integer()convert_to_float()convert_to_date()convert_to_datetime()strip_html_tags()upcase_tokens()downcase_tokens()unicode_setprintables: stralphas: strnums: stralphanums: stridentchars: stridentbodychars: strLatin1LatinALatinBBasicMultilingualPlaneChineseThaiJapaneseclass Kanjiclass Hiraganaclass KatakanaGreekHangulArabicDevanagariHebrewCyrillicCJKClass Diagrampyparsing 3.0.9May, 2022 \ No newline at end of file diff --git a/docs/whats_new_in_3_0_0.rst b/docs/whats_new_in_3_0_0.rst index 10651cda..2f4fe3de 100644 --- a/docs/whats_new_in_3_0_0.rst +++ b/docs/whats_new_in_3_0_0.rst @@ -4,11 +4,11 @@ What's New in Pyparsing 3.0.0 :author: Paul McGuire -:date: April, 2022 +:date: May, 2022 :abstract: This document summarizes the changes made in the 3.0.0 release of pyparsing. - (Updated to reflect changes up to 3.0.8) + (Updated to reflect changes up to 3.0.10) .. sectnum:: :depth: 4 @@ -62,6 +62,20 @@ generator for documenting pyparsing parsers.:: # save as HTML parser.create_diagram('parser_rr_diag.html') +``create_diagram`` accepts these named arguments: + +- ``vertical`` (int) - threshold for formatting multiple alternatives vertically + instead of horizontally (default=3) +- ``show_results_names`` - bool flag whether diagram should show annotations for + defined results names +- ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box +- ``embed`` - bool flag whether generated HTML should omit ````, ````, and ```` tags to embed + the resulting HTML in an enclosing HTML source (new in 3.0.10) +- ``head`` - str containing additional HTML to insert into the ```` section of the + generated code; can be used to insert custom CSS styling +- ``body`` - str containing additional HTML to insert at the beginning of the ```` section of the + generated code + To use this new feature, install the supporting diagramming packages using:: pip install pyparsing[diagrams] @@ -200,7 +214,7 @@ just namespaces, to add some helpful behavior: (**currently not working on PyPy**) Support for yielding native Python ``list`` and ``dict`` types in place of ``ParseResults`` -------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------- To support parsers that are intended to generate native Python collection types such as lists and dicts, the ``Group`` and ``Dict`` classes now accept an additional boolean keyword argument ``aslist`` and ``asdict`` respectively. See @@ -226,7 +240,7 @@ This is the mechanism used internally by the ``Group`` class when defined using ``aslist=True``. New Located class to replace ``locatedExpr`` helper method ------------------------------------------------------- +---------------------------------------------------------- The new ``Located`` class will replace the current ``locatedExpr`` method for marking parsed results with the start and end locations of the parsed data in the input string. ``locatedExpr`` had several bugs, and returned its results @@ -279,7 +293,7 @@ leading whitespace.:: [This is a fix to behavior that was added in 3.0.0, but was actually a regression from 2.4.x.] New ``IndentedBlock`` class to replace ``indentedBlock`` helper method --------------------------------------------------------------- +---------------------------------------------------------------------- The new ``IndentedBlock`` class will replace the current ``indentedBlock`` method for defining indented blocks of text, similar to Python source code. Using ``IndentedBlock``, the expression instance itself keeps track of the indent stack, @@ -392,7 +406,7 @@ Other new features common fields in URLs. See the updated ``urlExtractorNew.py`` file in the ``examples`` directory. Submitted by Wolfgang Fahl. -- ``delimited_list`` now supports an additional flag ``allow_trailing_delim``, +- ``DelimitedList`` now supports an additional flag ``allow_trailing_delim``, to optionally parse an additional delimiter at the end of the list. Submitted by Kazantcev Andrey. @@ -661,7 +675,8 @@ counted_array countedArray cpp_style_comment cppStyleComment dbl_quoted_string dblQuotedString dbl_slash_comment dblSlashComment -delimited_list delimitedList +DelimitedList delimitedList +DelimitedList delimited_list dict_of dictOf html_comment htmlComment infix_notation infixNotation diff --git a/examples/0README.html b/examples/0README.html index 617c16e5..ba5bab06 100644 --- a/examples/0README.html +++ b/examples/0README.html @@ -21,12 +21,12 @@

pyparsing Examples

-

  • holaMundo.py ~ submission by Marco Alfonso
    +
  • hola_mundo.py ~ submission by Marco Alfonso
    "Hello, World!" example translated to Spanish, from Marco Alfonso's blog.
  • -

  • chemicalFormulas.py
    +
  • chemical_formulas.py
    Simple example to demonstrate the use of ParseResults returned from parseString(). Parses a chemical formula (such as "H2O" or "C6H5OH"), and walks the returned list of tokens to calculate the molecular weight.
  • @@ -141,17 +141,6 @@

    pyparsing Examples

    -

  • sparser.py ~ submission by Tim Cera
    -A configurable parser module that can be configured with a list of tuples, giving a high-level definition for parsing common sets -of water table data files. Tim had to contend with several different styles of data file formats, each with slight variations of its own. -Tim created a configurable parser (or "SPECIFIED parser" - hence the name "sparser"), that simply works from a config variable listing -the field names and data types, and implicitly, their order in the source data file. -

    -See mayport_florida_8720220_data_def.txt for an -example configuration file. -

  • -

    -

  • romanNumerals.py
    A Roman numeral generator and parser example, showing the power of parse actions to compile Roman numerals into their integer values. @@ -256,26 +245,22 @@

    pyparsing Examples

  • builtin_parse_action_demo.py
    -New in version 1.5.7
    Demonstration of using builtins (min, max, sum, len, etc.) as parse actions.
  • antlr_grammar.py~ submission by Luca DellOlio
    -New in version 1.5.7
    Pyparsing example parsing ANTLR .a files and generating a working pyparsing parser.
  • shapes.py
    -New in version 1.5.7
    Parse actions example simple shape definition syntax, and returning the matched tokens as domain objects instead of just strings.
  • datetimeParseActions.py
    -New in version 1.5.7
    Parse actions example showing a parse action returning a datetime object instead of string tokens, and doing validation of the tokens, raising a ParseException if the given YYYY/MM/DD string does not represent a valid date. @@ -283,7 +268,6 @@

    pyparsing Examples

  • position.py
    -New in version 1.5.7
    Demonstration of a couple of different ways to capture the location a particular expression was found within the overall input string.
  • diff --git a/examples/adventureEngine.py b/examples/adventureEngine.py index efc096c7..4f27d793 100644 --- a/examples/adventureEngine.py +++ b/examples/adventureEngine.py @@ -2,39 +2,44 @@ # Copyright 2005-2006, Paul McGuire # # Updated 2012 - latest pyparsing API +# Updated 2023 - using PEP8 API names # -from pyparsing import * +import pyparsing as pp import random import string -def aOrAn(item): - if item.desc[0] in "aeiou": +def a_or_an(item): + if item.desc.startswith(tuple("aeiou")): return "an " + item.desc else: return "a " + item.desc - -def enumerateItems(l): - if len(l) == 0: +def enumerate_items(items_list): + if not items_list: return "nothing" + *all_but_last, last = items_list out = [] - if len(l) > 1: - out.append(", ".join(aOrAn(item) for item in l[:-1])) + if all_but_last: + out.append(", ".join(a_or_an(item) for item in all_but_last)) + if len(all_but_last) > 1: + out[-1] += ',' out.append("and") - out.append(aOrAn(l[-1])) + out.append(a_or_an(last)) return " ".join(out) - -def enumerateDoors(l): - if len(l) == 0: +def enumerate_doors(doors_list): + if not doors_list: return "" + *all_but_last, last = doors_list out = [] - if len(l) > 1: - out.append(", ".join(l[:-1])) + if all_but_last: + out.append(", ".join(all_but_last)) + if len(all_but_last) > 1: + out[-1] += ',' out.append("and") - out.append(l[-1]) + out.append(last) return " ".join(out) @@ -57,10 +62,10 @@ def enter(self, player): if self.gameOver: player.gameOver = True - def addItem(self, it): + def add_item(self, it): self.inv.append(it) - def removeItem(self, it): + def remove_item(self, it): self.inv.remove(it) def describe(self): @@ -71,9 +76,9 @@ def describe(self): is_form = "are" else: is_form = "is" - print("There {} {} here.".format(is_form, enumerateItems(visibleItems))) + print("There {} {} here.".format(is_form, enumerate_items(visibleItems))) else: - print("You see %s." % (enumerateItems(visibleItems))) + print("You see %s." % (enumerate_items(visibleItems))) class Exit(Room): @@ -135,16 +140,16 @@ def __init__(self, desc, contents=None): else: self.contents = [] - def openItem(self, player): + def open_item(self, player): if not self.isOpened: self.isOpened = not self.isOpened if self.contents is not None: for item in self.contents: - player.room.addItem(item) + player.room.add_item(item) self.contents = [] self.desc = "open " + self.desc - def closeItem(self, player): + def close_item(self, player): if self.isOpened: self.isOpened = not self.isOpened if self.desc.startswith("open "): @@ -159,15 +164,15 @@ def __init__(self, verb, verbProg): self.verbProg = verbProg @staticmethod - def helpDescription(): + def help_description(): return "" - def _doCommand(self, player): + def _do_command(self, player): pass def __call__(self, player): print(self.verbProg.capitalize() + "...") - self._doCommand(player) + self._do_command(player) class MoveCommand(Command): @@ -176,11 +181,11 @@ def __init__(self, quals): self.direction = quals.direction[0] @staticmethod - def helpDescription(): + def help_description(): return """MOVE or GO - go NORTH, SOUTH, EAST, or WEST (can abbreviate as 'GO N' and 'GO W', or even just 'E' and 'S')""" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room nextRoom = rm.doors[ { @@ -202,15 +207,15 @@ def __init__(self, quals): self.subject = quals.item @staticmethod - def helpDescription(): + def help_description(): return "TAKE or PICKUP or PICK UP - pick up an object (but some are deadly)" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room subj = Item.items[self.subject] if subj in rm.inv and subj.isVisible: if subj.isTakeable: - rm.removeItem(subj) + rm.remove_item(subj) player.take(subj) else: print(subj.cantTakeMessage) @@ -224,17 +229,17 @@ def __init__(self, quals): self.subject = quals.item @staticmethod - def helpDescription(): + def help_description(): return "DROP or LEAVE - drop an object (but fragile items may break)" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room subj = Item.items[self.subject] if subj in player.inv: - rm.addItem(subj) + rm.add_item(subj) player.drop(subj) else: - print("You don't have %s." % (aOrAn(subj))) + print("You don't have %s." % (a_or_an(subj))) class InventoryCommand(Command): @@ -242,11 +247,11 @@ def __init__(self, quals): super().__init__("INV", "taking inventory") @staticmethod - def helpDescription(): + def help_description(): return "INVENTORY or INV or I - lists what items you have" - def _doCommand(self, player): - print("You have %s." % enumerateItems(player.inv)) + def _do_command(self, player): + print("You have %s." % enumerate_items(player.inv)) class LookCommand(Command): @@ -254,24 +259,48 @@ def __init__(self, quals): super().__init__("LOOK", "looking") @staticmethod - def helpDescription(): + def help_description(): return "LOOK or L - describes the current room and any objects in it" - def _doCommand(self, player): + def _do_command(self, player): player.room.describe() +class ExamineCommand(Command): + def __init__(self, quals): + super().__init__("EXAMINE", "examining") + self.subject = Item.items[quals.item] + + @staticmethod + def help_description(): + return "EXAMINE or EX or X - look closely at an object" + + def _do_command(self, player): + msg = random.choice( + [ + "It's {}.", + "It's just {}.", + "It's a beautiful {1}.", + "It's a rare and beautiful {1}.", + "It's a rare {1}.", + "Just {}, nothing special...", + "{0}, just {0}." + ] + ) + print(msg.format(a_or_an(self.subject), self.subject).capitalize()) + + class DoorsCommand(Command): def __init__(self, quals): super().__init__("DOORS", "looking for doors") @staticmethod - def helpDescription(): + def help_description(): return "DOORS - display what doors are visible from this room" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room - numDoors = sum([1 for r in rm.doors if r is not None]) + numDoors = sum(1 for r in rm.doors if r is not None) if numDoors == 0: reply = "There are no doors in any direction." else: @@ -284,8 +313,7 @@ def _doCommand(self, player): for i, d in enumerate(rm.doors) if d is not None ] - # ~ print doorNames - reply += enumerateDoors(doorNames) + reply += enumerate_doors(doorNames) reply += "." print(reply) @@ -300,10 +328,10 @@ def __init__(self, quals): self.target = None @staticmethod - def helpDescription(): + def help_description(): return "USE or U - use an object, optionally IN or ON another object" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room availItems = rm.inv + player.inv if self.subject in availItems: @@ -321,16 +349,16 @@ def __init__(self, quals): self.subject = Item.items[quals.item] @staticmethod - def helpDescription(): + def help_description(): return "OPEN or O - open an object" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room availItems = rm.inv + player.inv if self.subject in availItems: if self.subject.isOpenable: if not self.subject.isOpened: - self.subject.openItem(player) + self.subject.open_item(player) else: print("It's already open.") else: @@ -345,16 +373,16 @@ def __init__(self, quals): self.subject = Item.items[quals.item] @staticmethod - def helpDescription(): + def help_description(): return "CLOSE or CL - close an object" - def _doCommand(self, player): + def _do_command(self, player): rm = player.room availItems = rm.inv + player.inv if self.subject in availItems: if self.subject.isOpenable: if self.subject.isOpened: - self.subject.closeItem(player) + self.subject.close_item(player) else: print("You can't close that, it's not open.") else: @@ -368,10 +396,10 @@ def __init__(self, quals): super().__init__("QUIT", "quitting") @staticmethod - def helpDescription(): + def help_description(): return "QUIT or Q - ends the game" - def _doCommand(self, player): + def _do_command(self, player): print("Ok....") player.gameOver = True @@ -381,10 +409,10 @@ def __init__(self, quals): super().__init__("HELP", "helping") @staticmethod - def helpDescription(): + def help_description(): return "HELP or H or ? - displays this help message" - def _doCommand(self, player): + def _do_command(self, player): print("Enter any of the following commands (not case sensitive):") for cmd in [ InventoryCommand, @@ -395,42 +423,43 @@ def _doCommand(self, player): CloseCommand, MoveCommand, LookCommand, + ExamineCommand, DoorsCommand, QuitCommand, HelpCommand, ]: - print(" - %s" % cmd.helpDescription()) + print(" - %s" % cmd.help_description()) print() -class AppParseException(ParseException): +class AppParseException(pp.ParseException): pass class Parser: def __init__(self): - self.bnf = self.makeBNF() + self.bnf = self.make_bnf() - def makeBNF(self): - invVerb = oneOf("INV INVENTORY I", caseless=True) - dropVerb = oneOf("DROP LEAVE", caseless=True) - takeVerb = oneOf("TAKE PICKUP", caseless=True) | ( - CaselessLiteral("PICK") + CaselessLiteral("UP") + def make_bnf(self): + invVerb = pp.one_of("INV INVENTORY I", caseless=True) + dropVerb = pp.one_of("DROP LEAVE", caseless=True) + takeVerb = pp.one_of("TAKE PICKUP", caseless=True) | ( + pp.CaselessLiteral("PICK") + pp.CaselessLiteral("UP") ) - moveVerb = oneOf("MOVE GO", caseless=True) | empty - useVerb = oneOf("USE U", caseless=True) - openVerb = oneOf("OPEN O", caseless=True) - closeVerb = oneOf("CLOSE CL", caseless=True) - quitVerb = oneOf("QUIT Q", caseless=True) - lookVerb = oneOf("LOOK L", caseless=True) - doorsVerb = CaselessLiteral("DOORS") - helpVerb = oneOf("H HELP ?", caseless=True) - - itemRef = OneOrMore(Word(alphas)).setParseAction(self.validateItemName) - nDir = oneOf("N NORTH", caseless=True).setParseAction(replaceWith("N")) - sDir = oneOf("S SOUTH", caseless=True).setParseAction(replaceWith("S")) - eDir = oneOf("E EAST", caseless=True).setParseAction(replaceWith("E")) - wDir = oneOf("W WEST", caseless=True).setParseAction(replaceWith("W")) + moveVerb = pp.one_of("MOVE GO", caseless=True) | pp.Empty() + useVerb = pp.one_of("USE U", caseless=True) + openVerb = pp.one_of("OPEN O", caseless=True) + closeVerb = pp.one_of("CLOSE CL", caseless=True) + quitVerb = pp.one_of("QUIT Q", caseless=True) + lookVerb = pp.one_of("LOOK L", caseless=True) + doorsVerb = pp.CaselessLiteral("DOORS") + helpVerb = pp.one_of("H HELP ?", caseless=True) + + itemRef = pp.OneOrMore(pp.Word(pp.alphas)).set_parse_action(self.validate_item_name).setName("item_ref") + nDir = pp.one_of("N NORTH", caseless=True).set_parse_action(pp.replace_with("N")) + sDir = pp.one_of("S SOUTH", caseless=True).set_parse_action(pp.replace_with("S")) + eDir = pp.one_of("E EAST", caseless=True).set_parse_action(pp.replace_with("E")) + wDir = pp.one_of("W WEST", caseless=True).set_parse_action(pp.replace_with("W")) moveDirection = nDir | sDir | eDir | wDir invCommand = invVerb @@ -439,32 +468,34 @@ def makeBNF(self): useCommand = ( useVerb + itemRef("usedObj") - + Optional(oneOf("IN ON", caseless=True)) - + Optional(itemRef, default=None)("targetObj") + + pp.Opt(pp.one_of("IN ON", caseless=True)) + + pp.Opt(itemRef, default=None)("targetObj") ) openCommand = openVerb + itemRef("item") closeCommand = closeVerb + itemRef("item") - moveCommand = moveVerb + moveDirection("direction") + moveCommand = (moveVerb | "") + moveDirection("direction") quitCommand = quitVerb lookCommand = lookVerb - doorsCommand = doorsVerb + examineCommand = pp.one_of("EXAMINE EX X", caseless=True) + itemRef("item") + doorsCommand = doorsVerb.setName("DOORS") helpCommand = helpVerb # attach command classes to expressions - invCommand.setParseAction(InventoryCommand) - dropCommand.setParseAction(DropCommand) - takeCommand.setParseAction(TakeCommand) - useCommand.setParseAction(UseCommand) - openCommand.setParseAction(OpenCommand) - closeCommand.setParseAction(CloseCommand) - moveCommand.setParseAction(MoveCommand) - quitCommand.setParseAction(QuitCommand) - lookCommand.setParseAction(LookCommand) - doorsCommand.setParseAction(DoorsCommand) - helpCommand.setParseAction(HelpCommand) + invCommand.set_parse_action(InventoryCommand) + dropCommand.set_parse_action(DropCommand) + takeCommand.set_parse_action(TakeCommand) + useCommand.set_parse_action(UseCommand) + openCommand.set_parse_action(OpenCommand) + closeCommand.set_parse_action(CloseCommand) + moveCommand.set_parse_action(MoveCommand) + quitCommand.set_parse_action(QuitCommand) + lookCommand.set_parse_action(LookCommand) + examineCommand.set_parse_action(ExamineCommand) + doorsCommand.set_parse_action(DoorsCommand) + helpCommand.set_parse_action(HelpCommand) # define parser using all command expressions - return ( + parser = pp.ungroup( invCommand | useCommand | openCommand @@ -473,24 +504,27 @@ def makeBNF(self): | takeCommand | moveCommand | lookCommand + | examineCommand | doorsCommand | helpCommand | quitCommand - )("command") + LineEnd() + )("command") - def validateItemName(self, s, l, t): + return parser + + def validate_item_name(self, s, l, t): iname = " ".join(t) if iname not in Item.items: raise AppParseException(s, l, "No such item '%s'." % iname) return iname - def parseCmd(self, cmdstr): + def parse_cmd(self, cmdstr): try: - ret = self.bnf.parseString(cmdstr) + ret = self.bnf.parse_string(cmdstr) return ret except AppParseException as pe: print(pe.msg) - except ParseException as pe: + except pp.ParseException as pe: print( random.choice( [ @@ -595,7 +629,7 @@ def createRooms(rm): def putItemInRoom(i, r): if isinstance(r, str): r = rooms[r] - r.addItem(Item.items[i]) + r.add_item(Item.items[i]) def playGame(p, startRoom): @@ -604,98 +638,98 @@ def playGame(p, startRoom): p.moveTo(startRoom) while not p.gameOver: cmdstr = input(">> ") - cmd = parser.parseCmd(cmdstr) + cmd = parser.parse_cmd(cmdstr) if cmd is not None: cmd.command(p) print() print("You ended the game with:") for i in p.inv: - print(" -", aOrAn(i)) - - -# ==================== -# start game definition -roomMap = """ - d-Z - | - f-c-e - . | - q - increment pointer +# < - decrement pointer +# , - input new byte value, store at the current pointer +# . - output the byte at the current pointer +# [] - evaluate value at current pointer, if nonzero, execute all statements in []'s and repeat +# +import pyparsing as pp + +# define the basic parser + +# define Literals for each symbol in the BF langauge +PLUS, MINUS, GT, LT, INP, OUT, LBRACK, RBRACK = pp.Literal.using_each("+-<>,.[]") + +# use a pyparsing Forward for the recursive definition of an instruction that can +# itself contain instructions +instruction_expr = pp.Forward().set_name("instruction") + +# define a LOOP expression for the instructions enclosed in brackets; use a +# pyparsing Group to wrap the instructions in a sub-list +LOOP = pp.Group(LBRACK + instruction_expr[...] + RBRACK) + +# use '<<=' operator to insert expression definition into existing Forward +instruction_expr <<= PLUS | MINUS | GT | LT | INP | OUT | LOOP + +program_expr = instruction_expr[...].set_name("program") + +# ignore everything that is not a BF symbol +ignore_chars = pp.Word(pp.printables, exclude_chars="+-<>,.[]") +program_expr.ignore(ignore_chars) + + +class BFEngine: + """ + Brainf*ck execution environment, with a memory array and pointer. + """ + def __init__(self, memory_size: int = 1024): + self._ptr = 0 + self._memory_size = memory_size + self._memory = [0] * self._memory_size + + @property + def ptr(self): + return self._ptr + + @ptr.setter + def ptr(self, value): + self._ptr = value % self._memory_size + + @property + def at_ptr(self): + return self._memory[self._ptr] + + @at_ptr.setter + def at_ptr(self, value): + self._memory[self._ptr] = value % 256 + + def output_value_at_ptr(self): + print(chr(self.at_ptr), end="") + + def input_value(self): + input_char = input() or "\0" + self.at_ptr = ord(input_char[0]) + + def reset(self): + self._ptr = 0 + self._memory[:] = [0] * self._memory_size + + def dump_state(self): + for i in range(30): + print(f"{self._memory[i]:3d} ", end="") + print() + + if self.ptr < 30: + print(f" {' ' * self.ptr}^") + + +# define executable classes for each instruction + +class Instruction: + """Abstract class for all instruction classes to implement.""" + def __init__(self, tokens): + self.tokens = tokens + + def execute(self, bf_engine: BFEngine): + raise NotImplementedError() + + +class IncrPtr(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.ptr += 1 + + +class DecrPtr(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.ptr -= 1 + + +class IncrPtrValue(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.at_ptr += 1 + + +class DecrPtrValue(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.at_ptr -= 1 + + +class OutputPtrValue(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.output_value_at_ptr() + + +class InputPtrValue(Instruction): + def execute(self, bf_engine: BFEngine): + bf_engine.input_value() + + +class RunInstructionLoop(Instruction): + def __init__(self, tokens): + super().__init__(tokens) + self.instructions = self.tokens[0][1:-1] + + def execute(self, bf_engine: BFEngine): + while bf_engine.at_ptr: + for i in self.instructions: + i.execute(bf_engine) + + +# add parse actions to all BF instruction expressions +PLUS.add_parse_action(IncrPtrValue) +MINUS.add_parse_action(DecrPtrValue) +GT.add_parse_action(IncrPtr) +LT.add_parse_action(DecrPtr) +OUT.add_parse_action(OutputPtrValue) +INP.add_parse_action(InputPtrValue) +LOOP.add_parse_action(RunInstructionLoop) + + +@program_expr.add_parse_action +def run_program(tokens): + bf = BFEngine() + for t in tokens: + t.execute(bf) + print() + + +# generate railroad diagram +program_expr.create_diagram("bf.html") + +# execute an example BF program +hw = "+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." +program_expr.parse_string(hw) diff --git a/examples/booleansearchparser.py b/examples/booleansearchparser.py index c901db14..503a4cdf 100644 --- a/examples/booleansearchparser.py +++ b/examples/booleansearchparser.py @@ -90,11 +90,14 @@ Suppress, OneOrMore, one_of, + ParserElement, ) import re +ParserElement.enablePackrat() # Updated on 02 Dec 2021 according to ftp://ftp.unicode.org/Public/UNIDATA/Blocks.txt +# (includes characters not found in the BasicMultilingualPlane) alphabet_ranges = [ # CYRILIC: https://en.wikipedia.org/wiki/Cyrillic_(Unicode_block) [int("0400", 16), int("04FF", 16)], @@ -322,6 +325,7 @@ class ParserTest(BooleanSearchParser): """ def Test(self): + # fmt: off exprs = { "0": "help", "1": "help or hulp", @@ -363,93 +367,28 @@ def Test(self): texts_matcheswith = { "halp thinks he needs help": [ - "25", - "22", - "20", - "21", - "11", - "17", - "16", - "23", - "34", - "1", - "0", - "5", - "7", - "6", - "9", - "8", + "25", "22", "20", "21", "11", "17", "16", "23", "34", "1", + "0", "5", "7", "6", "9", "8", ], "he needs halp": ["24", "25", "20", "11", "10", "12", "34", "6"], "help": ["25", "20", "12", "17", "16", "1", "0", "5", "6"], "help hilp": [ - "25", - "22", - "20", - "32", - "21", - "12", - "17", - "16", - "19", - "31", - "23", - "1", - "0", - "5", - "4", - "7", - "6", - "9", - "8", - "33", + "25", "22", "20", "32", "21", "12", "17", "16", "19", "31", + "23", "1", "0", "5", "4", "7", "6", "9", "8", "33", ], "help me please hulp": [ - "30", - "25", - "27", - "20", - "13", - "12", - "15", - "14", - "17", - "16", - "19", - "18", - "23", - "29", - "1", - "0", - "3", - "2", - "5", - "4", - "6", - "9", + "30", "25", "27", "20", "13", "12", "15", "14", "17", "16", + "19", "18", "23", "29", "1", "0", "3", "2", "5", "4", "6", "9", ], "helper": ["20", "10", "12", "16"], "hulp hilp": [ - "25", - "27", - "20", - "21", - "10", - "12", - "14", - "17", - "19", - "23", - "1", - "5", - "4", - "7", - "6", - "9", + "25", "27", "20", "21", "10", "12", "14", "17", "19", "23", + "1", "5", "4", "7", "6", "9", ], "nothing": ["25", "10", "12"], "안녕하세요, 당신은 어떠세요?": ["10", "12", "25", "35"], } + # fmt: on all_ok = True for text, matches in texts_matcheswith.items(): @@ -459,7 +398,9 @@ def Test(self): _matches.append(_id) test_passed = sorted(matches) == sorted(_matches) - if not test_passed: + if test_passed: + print("Passed", repr(text)) + else: print("Failed", repr(text), "expected", matches, "matched", _matches) all_ok = all_ok and test_passed @@ -490,7 +431,9 @@ def Test(self): _matches.append(_id) test_passed = sorted(matches) == sorted(_matches) - if not test_passed: + if test_passed: + print("Passed", repr(text)) + else: print("Failed", repr(text), "expected", matches, "matched", _matches) all_ok = all_ok and test_passed @@ -498,10 +441,13 @@ def Test(self): return all_ok -if __name__ == "__main__": +def main(): if ParserTest().Test(): print("All tests OK") - exit(0) else: print("One or more tests FAILED") - exit(1) + raise Exception("One or more tests FAILED") + + +if __name__ == "__main__": + main() diff --git a/examples/builtin_parse_action_demo.py b/examples/builtin_parse_action_demo.py index 36b3a98b..fed6e2a3 100644 --- a/examples/builtin_parse_action_demo.py +++ b/examples/builtin_parse_action_demo.py @@ -5,14 +5,13 @@ # Simple example of using builtin functions as parse actions. # -from pyparsing import * - -integer = Word(nums).setParseAction(lambda t: int(t[0])) +import pyparsing as pp +ppc = pp.common # make an expression that will match a list of ints (which # will be converted to actual ints by the parse action attached # to integer) -nums = OneOrMore(integer) +nums = ppc.integer[...] test = "2 54 34 2 211 66 43 2 0" @@ -20,10 +19,9 @@ # try each of these builtins as parse actions for fn in (sum, max, min, len, sorted, reversed, list, tuple, set, any, all): - fn_name = fn.__name__ if fn is reversed: # reversed returns an iterator, we really want to show the list of items fn = lambda x: list(reversed(x)) # show how each builtin works as a free-standing parse action - print(fn_name, nums.setParseAction(fn).parseString(test)) + print(fn.__name__, nums.set_parse_action(fn).parse_string(test)) diff --git a/examples/chemicalFormulas.py b/examples/chemicalFormulas.py deleted file mode 100644 index d4c87cd9..00000000 --- a/examples/chemicalFormulas.py +++ /dev/null @@ -1,121 +0,0 @@ -# -# chemicalFormulas.py -# -# Copyright (c) 2003,2019 Paul McGuire -# - -import pyparsing as pp - -atomicWeight = { - "O": 15.9994, - "H": 1.00794, - "Na": 22.9897, - "Cl": 35.4527, - "C": 12.0107, -} - -digits = "0123456789" - -# Version 1 -element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2).set_name("element") -# for stricter matching, use this Regex instead -# element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|" -# "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|" -# "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|" -# "S[bcegimnr]?|T[abcehilm]|U(u[bhopqst])?|V|W|Xe|Yb?|Z[nr]") -elementRef = pp.Group(element + pp.Optional(pp.Word(digits), default="1")) -formula = elementRef[...] - - -def sum_atomic_weights(element_list): - return sum(atomicWeight[elem] * int(qty) for elem, qty in element_list) - - -formula.runTests( - """\ - H2O - C6H5OH - NaCl - """, - fullDump=False, - postParse=lambda _, tokens: "Molecular weight: {}".format( - sum_atomic_weights(tokens) - ), -) -print() - -# Version 2 - access parsed items by results name -elementRef = pp.Group( - element("symbol") + pp.Optional(pp.Word(digits), default="1")("qty") -) -formula = elementRef[...] - - -def sum_atomic_weights_by_results_name(element_list): - return sum(atomicWeight[elem.symbol] * int(elem.qty) for elem in element_list) - - -formula.runTests( - """\ - H2O - C6H5OH - NaCl - """, - fullDump=False, - postParse=lambda _, tokens: "Molecular weight: {}".format( - sum_atomic_weights_by_results_name(tokens) - ), -) -print() - -# Version 3 - convert integers during parsing process -integer = pp.Word(digits).setParseAction(lambda t: int(t[0])).setName("integer") -elementRef = pp.Group(element("symbol") + pp.Optional(integer, default=1)("qty")) -formula = elementRef[...].setName("chemical_formula") - - -def sum_atomic_weights_by_results_name_with_converted_ints(element_list): - return sum(atomicWeight[elem.symbol] * int(elem.qty) for elem in element_list) - - -formula.runTests( - """\ - H2O - C6H5OH - NaCl - """, - fullDump=False, - postParse=lambda _, tokens: "Molecular weight: {}".format( - sum_atomic_weights_by_results_name_with_converted_ints(tokens) - ), -) -print() - -# Version 4 - parse and convert integers as subscript digits -subscript_digits = "₀₁₂₃₄₅₆₇₈₉" -subscript_int_map = {e[1]: e[0] for e in enumerate(subscript_digits)} - - -def cvt_subscript_int(s): - ret = 0 - for c in s[0]: - ret = ret * 10 + subscript_int_map[c] - return ret - - -subscript_int = pp.Word(subscript_digits).addParseAction(cvt_subscript_int).set_name("subscript") - -elementRef = pp.Group(element("symbol") + pp.Optional(subscript_int, default=1)("qty")) -formula = elementRef[1, ...].setName("chemical_formula") -formula.runTests( - """\ - H₂O - C₆H₅OH - NaCl - """, - fullDump=False, - postParse=lambda _, tokens: "Molecular weight: {}".format( - sum_atomic_weights_by_results_name_with_converted_ints(tokens) - ), -) -print() diff --git a/examples/chemical_formulas.py b/examples/chemical_formulas.py new file mode 100644 index 00000000..16d4bb43 --- /dev/null +++ b/examples/chemical_formulas.py @@ -0,0 +1,119 @@ +# +# chemicalFormulas.py +# +# Copyright (c) 2003,2019 Paul McGuire +# + +import pyparsing as pp + +atomic_weight = { + "O": 15.9994, + "H": 1.00794, + "Na": 22.9897, + "Cl": 35.4527, + "C": 12.0107, +} + +digits = "0123456789" + +# Version 1 +element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2).set_name("element") +# for stricter matching, use this Regex instead +# element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|" +# "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|" +# "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|" +# "S[bcegimnr]?|T[abcehilm]|U(u[bhopqst])?|V|W|Xe|Yb?|Z[nr]") +element_ref = pp.Group(element + pp.Opt(pp.Word(digits), default="1")) +formula = element_ref[...] + + +def sum_atomic_weights(element_list): + return sum(atomic_weight[elem] * int(qty) for elem, qty in element_list) + + +formula.run_tests( + """\ + H2O + C6H5OH + NaCl + """, + full_dump=False, + post_parse=lambda _, tokens: f"Molecular weight: {sum_atomic_weights(tokens)}", +) +print() + + +# Version 2 - access parsed items by results name +element_ref = pp.Group( + element("symbol") + pp.Opt(pp.Word(digits), default="1")("qty") +) +formula = element_ref[...] + + +def sum_atomic_weights_by_results_name(element_list): + return sum(atomic_weight[elem.symbol] * int(elem.qty) for elem in element_list) + + +formula.run_tests( + """\ + H2O + C6H5OH + NaCl + """, + full_dump=False, + post_parse=lambda _, tokens: + f"Molecular weight: {sum_atomic_weights_by_results_name(tokens)}", +) +print() + +# Version 3 - convert integers during parsing process +integer = pp.Word(digits).set_name("integer") +integer.add_parse_action(lambda t: int(t[0])) +element_ref = pp.Group(element("symbol") + pp.Opt(integer, default=1)("qty")) +formula = element_ref[...].set_name("chemical_formula") + + +def sum_atomic_weights_by_results_name_with_converted_ints(element_list): + return sum(atomic_weight[elem.symbol] * int(elem.qty) for elem in element_list) + + +formula.run_tests( + """\ + H2O + C6H5OH + NaCl + """, + full_dump=False, + post_parse=lambda _, tokens: + f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}", +) +print() + +# Version 4 - parse and convert integers as subscript digits +subscript_digits = "₀₁₂₃₄₅₆₇₈₉" +subscript_int_map = {e[1]: e[0] for e in enumerate(subscript_digits)} + + +def cvt_subscript_int(s): + ret = 0 + for c in s[0]: + ret = ret * 10 + subscript_int_map[c] + return ret + + +subscript_int = pp.Word(subscript_digits).set_name("subscript") +subscript_int.add_parse_action(cvt_subscript_int) + +element_ref = pp.Group(element("symbol") + pp.Opt(subscript_int, default=1)("qty")) +formula = element_ref[1, ...].set_name("chemical_formula") +formula.run_tests( + """\ + H₂O + C₆H₅OH + NaCl + """, + full_dump=False, + post_parse=lambda _, tokens: + f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}", +) +print() diff --git a/examples/cpp_enum_parser.py b/examples/cpp_enum_parser.py index 26dde7c3..77eb3a73 100644 --- a/examples/cpp_enum_parser.py +++ b/examples/cpp_enum_parser.py @@ -9,7 +9,7 @@ # # -from pyparsing import * +import pyparsing as pp # sample string with enums and other stuff sample = """ @@ -35,19 +35,19 @@ """ # syntax we don't want to see in the final parse tree -LBRACE, RBRACE, EQ, COMMA = map(Suppress, "{}=,") -_enum = Suppress("enum") -identifier = Word(alphas, alphanums + "_") -integer = Word(nums) -enumValue = Group(identifier("name") + Optional(EQ + integer("value"))) -enumList = Group(enumValue + ZeroOrMore(COMMA + enumValue)) +LBRACE, RBRACE, EQ, COMMA = pp.Suppress.using_each("{}=,") +_enum = pp.Suppress("enum") +identifier = pp.Word(pp.alphas + "_", pp.alphanums + "_") +integer = pp.Word(pp.nums) +enumValue = pp.Group(identifier("name") + pp.Optional(EQ + integer("value"))) +enumList = pp.Group(enumValue + (COMMA + enumValue)[...]) enum = _enum + identifier("enum") + LBRACE + enumList("names") + RBRACE # find instances of enums ignoring other syntax -for item, start, stop in enum.scanString(sample): - id = 0 +for item, start, stop in enum.scan_string(sample): + idx = 0 for entry in item.names: if entry.value != "": - id = int(entry.value) - print("%s_%s = %d" % (item.enum.upper(), entry.name.upper(), id)) - id += 1 + idx = int(entry.value) + print("%s_%s = %d" % (item.enum.upper(), entry.name.upper(), idx)) + idx += 1 diff --git a/examples/datetimeParseActions.py b/examples/datetime_parse_actions.py similarity index 75% rename from examples/datetimeParseActions.py rename to examples/datetime_parse_actions.py index f7c4fc98..ff386562 100644 --- a/examples/datetimeParseActions.py +++ b/examples/datetime_parse_actions.py @@ -1,84 +1,84 @@ -# parseActions.py -# -# A sample program a parser to match a date string of the form "YYYY/MM/DD", -# and return it as a datetime, or raise an exception if not a valid date. -# -# Copyright 2012, Paul T. McGuire -# -from datetime import datetime -import pyparsing as pp -from pyparsing import pyparsing_common as ppc - -# define an integer string, and a parse action to convert it -# to an integer at parse time -integer = pp.Word(pp.nums).setName("integer") - - -def convertToInt(tokens): - # no need to test for validity - we can't get here - # unless tokens[0] contains all numeric digits - return int(tokens[0]) - - -integer.setParseAction(convertToInt) -# or can be written as one line as -# integer = Word(nums).setParseAction(lambda t: int(t[0])) - -# define a pattern for a year/month/day date -date_expr = integer("year") + "/" + integer("month") + "/" + integer("day") -date_expr.ignore(pp.pythonStyleComment) - - -def convertToDatetime(s, loc, tokens): - try: - # note that the year, month, and day fields were already - # converted to ints from strings by the parse action defined - # on the integer expression above - return datetime(tokens.year, tokens.month, tokens.day).date() - except Exception as ve: - errmsg = "'%s/%s/%s' is not a valid date, %s" % ( - tokens.year, - tokens.month, - tokens.day, - ve, - ) - raise pp.ParseException(s, loc, errmsg) - - -date_expr.setParseAction(convertToDatetime) - - -date_expr.runTests( - """\ - 2000/1/1 - - # invalid month - 2000/13/1 - - # 1900 was not a leap year - 1900/2/29 - - # but 2000 was - 2000/2/29 - """ -) - - -# if dates conform to ISO8601, use definitions in pyparsing_common -date_expr = ppc.iso8601_date.setParseAction(ppc.convertToDate()) -date_expr.ignore(pp.pythonStyleComment) - -date_expr.runTests( - """\ - 2000-01-01 - - # invalid month - 2000-13-01 - - # 1900 was not a leap year - 1900-02-29 - - # but 2000 was - 2000-02-29 - """ -) +# parseActions.py +# +# A sample program a parser to match a date string of the form "YYYY/MM/DD", +# and return it as a datetime, or raise an exception if not a valid date. +# +# Copyright 2012, Paul T. McGuire +# +from datetime import datetime +import pyparsing as pp +from pyparsing import pyparsing_common as ppc + +# define an integer string, and a parse action to convert it +# to an integer at parse time +integer = pp.Word(pp.nums).set_name("integer") + + +def convert_to_int(tokens): + # no need to test for validity - we can't get here + # unless tokens[0] contains all numeric digits + return int(tokens[0]) + + +integer.set_parse_action(convert_to_int) +# or can be written as one line as +# integer = Word(nums).set_parse_action(lambda t: int(t[0])) + +# define a pattern for a year/month/day date +date_expr = integer("year") + "/" + integer("month") + "/" + integer("day") +date_expr.ignore(pp.python_style_comment) + + +def convert_to_datetime(s, loc, tokens): + try: + # note that the year, month, and day fields were already + # converted to ints from strings by the parse action defined + # on the integer expression above + return datetime(tokens.year, tokens.month, tokens.day).date() + except Exception as ve: + errmsg = "'%s/%s/%s' is not a valid date, %s" % ( + tokens.year, + tokens.month, + tokens.day, + ve, + ) + raise pp.ParseException(s, loc, errmsg) + + +date_expr.set_parse_action(convert_to_datetime) + + +date_expr.run_tests( + """\ + 2000/1/1 + + # invalid month + 2000/13/1 + + # 1900 was not a leap year + 1900/2/29 + + # but 2000 was + 2000/2/29 + """ +) + + +# if dates conform to ISO8601, use definitions in pyparsing_common +date_expr = ppc.iso8601_date.set_parse_action(ppc.convert_to_date()) +date_expr.ignore(pp.python_style_comment) + +date_expr.run_tests( + """\ + 2000-01-01 + + # invalid month + 2000-13-01 + + # 1900 was not a leap year + 1900-02-29 + + # but 2000 was + 2000-02-29 + """ +) diff --git a/examples/delta_time.py b/examples/delta_time.py index 2f9466cb..cdd58f48 100644 --- a/examples/delta_time.py +++ b/examples/delta_time.py @@ -39,7 +39,7 @@ # basic grammar definitions def make_integer_word_expr(int_name, int_value): - return pp.CaselessKeyword(int_name).addParseAction(pp.replaceWith(int_value)) + return pp.CaselessKeyword(int_name).add_parse_action(pp.replaceWith(int_value)) integer_word = pp.MatchFirst( @@ -50,52 +50,54 @@ def make_integer_word_expr(int_name, int_value): " seventeen eighteen nineteen twenty".split(), start=1, ) -).setName("integer_word") +).set_name("integer_word") integer = pp.pyparsing_common.integer | integer_word -integer.setName("numeric") +integer.set_name("numeric") CK = pp.CaselessKeyword CL = pp.CaselessLiteral -today, tomorrow, yesterday, noon, midnight, now = map( - CK, "today tomorrow yesterday noon midnight now".split() +today, tomorrow, yesterday, noon, midnight, now = CK.using_each( + "today tomorrow yesterday noon midnight now".split() ) def plural(s): - return CK(s) | CK(s + "s").addParseAction(pp.replaceWith(s)) + return CK(s) | CK(s + "s").add_parse_action(pp.replaceWith(s)) week, day, hour, minute, second = map(plural, "week day hour minute second".split()) time_units = hour | minute | second -any_time_units = (week | day | time_units).setName("time_units") +any_time_units = (week | day | time_units).set_name("any_time_units") am = CL("am") pm = CL("pm") COLON = pp.Suppress(":") -in_ = CK("in").setParseAction(pp.replaceWith(1)) -from_ = CK("from").setParseAction(pp.replaceWith(1)) -before = CK("before").setParseAction(pp.replaceWith(-1)) -after = CK("after").setParseAction(pp.replaceWith(1)) -ago = CK("ago").setParseAction(pp.replaceWith(-1)) -next_ = CK("next").setParseAction(pp.replaceWith(1)) -last_ = CK("last").setParseAction(pp.replaceWith(-1)) +in_ = CK("in").set_parse_action(pp.replaceWith(1)) +from_ = CK("from").set_parse_action(pp.replaceWith(1)) +before = CK("before").set_parse_action(pp.replaceWith(-1)) +after = CK("after").set_parse_action(pp.replaceWith(1)) +ago = CK("ago").set_parse_action(pp.replaceWith(-1)) +next_ = CK("next").set_parse_action(pp.replaceWith(1)) +last_ = CK("last").set_parse_action(pp.replaceWith(-1)) at_ = CK("at") on_ = CK("on") couple = ( - (pp.Optional(CK("a")) + CK("couple") + pp.Optional(CK("of"))) - .setParseAction(pp.replaceWith(2)) - .setName("couple") + (pp.Opt(CK("a")) + CK("couple") + pp.Opt(CK("of"))) + .set_parse_action(pp.replaceWith(2)) + .set_name("couple") ) -a_qty = (CK("a") | CK("an")).setParseAction(pp.replaceWith(1)) -the_qty = CK("the").setParseAction(pp.replaceWith(1)) +a_qty = (CK("a") | CK("an")).set_parse_action(pp.replaceWith(1)) +the_qty = CK("the").set_parse_action(pp.replaceWith(1)) qty = pp.ungroup( - (integer | couple | a_qty | the_qty).setName("qty_expression") -).setName("qty") -time_ref_present = pp.Empty().addParseAction(pp.replaceWith(True))("time_ref_present") + (integer | couple | a_qty | the_qty).set_name("qty_expression") +).set_name("qty") +time_ref_present = pp.Empty().add_parse_action(pp.replace_with(True))( + "time_ref_present" +) def fill_24hr_time_fields(t): @@ -111,26 +113,28 @@ def fill_default_time_fields(t): t[fld] = 0 +# get weekday names from the calendar module weekday_name_list = list(calendar.day_name) -weekday_name = pp.oneOf(weekday_name_list).setName("weekday_name") +weekday_name = pp.one_of(weekday_name_list).set_name("weekday_name") -_24hour_time = ~(integer + any_time_units).setName("numbered_time_units") + pp.Word(pp.nums, exact=4).setName("HHMM").addParseAction( +# expressions for military 2400 time +_24hour_time = ~(pp.Word(pp.nums) + any_time_units).set_name("numbered_time_units") + pp.Word( + pp.nums, exact=4, as_keyword=True +).set_name("HHMM").add_parse_action( lambda t: [int(t[0][:2]), int(t[0][2:])], fill_24hr_time_fields ) -_24hour_time.setName("0000 time") +_24hour_time.set_name("0000 time") ampm = am | pm timespec = ( integer("HH") - + pp.Optional( - CK("o'clock") | COLON + integer("MM") + pp.Optional(COLON + integer("SS")) - ) + + pp.Opt(CK("o'clock") | COLON + integer("MM") + pp.Opt(COLON + integer("SS"))) + (am | pm)("ampm") -).addParseAction(fill_default_time_fields) +).add_parse_action(fill_default_time_fields) absolute_time = _24hour_time | timespec -absolute_time.setName("absolute time") +absolute_time.set_name("absolute time") absolute_time_of_day = noon | midnight | now | absolute_time -absolute_time_of_day.setName("time of day") +absolute_time_of_day.set_name("time of day") def add_computed_time(t): @@ -145,12 +149,12 @@ def add_computed_time(t): t["computed_time"] = time(hour=t.HH, minute=t.MM, second=t.SS) -absolute_time_of_day.addParseAction(add_computed_time) +absolute_time_of_day.add_parse_action(add_computed_time) # relative_time_reference ::= qty time_units ('ago' | ('from' | 'before' | 'after') absolute_time_of_day) # | 'in' qty time_units -time_units = (hour | minute | second).setName("time unit") +time_units = (hour | minute | second).set_name("time unit") relative_time_reference = ( ( qty("qty") @@ -162,7 +166,7 @@ def add_computed_time(t): ) ) | in_("dir") + qty("qty") + time_units("units") -).setName("relative time") +).set_name("relative time") def compute_relative_time(t): @@ -174,10 +178,10 @@ def compute_relative_time(t): t["time_delta"] = timedelta(seconds=t.dir * delta_seconds) -relative_time_reference.addParseAction(compute_relative_time) +relative_time_reference.add_parse_action(compute_relative_time) time_reference = absolute_time_of_day | relative_time_reference -time_reference.setName("time reference") +time_reference.set_name("time reference") def add_default_time_ref_fields(t): @@ -185,13 +189,13 @@ def add_default_time_ref_fields(t): t["time_delta"] = timedelta() -time_reference.addParseAction(add_default_time_ref_fields) +time_reference.add_parse_action(add_default_time_ref_fields) # absolute_day_reference ::= 'today' | 'tomorrow' | 'yesterday' | ('next' | 'last') weekday_name # day_units ::= 'days' | 'weeks' day_units = day | week -weekday_reference = pp.Optional(next_ | last_, 1)("dir") + weekday_name("day_name") +weekday_reference = pp.Opt(next_ | last_, 1)("dir") + weekday_name("day_name") def convert_abs_day_reference_to_date(t): @@ -222,8 +226,8 @@ def convert_abs_day_reference_to_date(t): absolute_day_reference = ( today | tomorrow | yesterday | now + time_ref_present | weekday_reference ) -absolute_day_reference.addParseAction(convert_abs_day_reference_to_date) -absolute_day_reference.setName("absolute day") +absolute_day_reference.add_parse_action(convert_abs_day_reference_to_date) +absolute_day_reference.set_name("absolute day") # relative_day_reference ::= 'in' qty day_units # | qty day_units @@ -234,7 +238,7 @@ def convert_abs_day_reference_to_date(t): ) + day_units("units") + ( ago("dir") | ((from_ | before | after)("dir") + absolute_day_reference("ref_day")) ) -relative_day_reference.setName("relative day") +relative_day_reference.set_name("relative day") def compute_relative_date(t): @@ -247,11 +251,11 @@ def compute_relative_date(t): t["date_delta"] = timedelta(days=day_diff) -relative_day_reference.addParseAction(compute_relative_date) +relative_day_reference.add_parse_action(compute_relative_date) # combine expressions for absolute and relative day references day_reference = relative_day_reference | absolute_day_reference -day_reference.setName("day reference") +day_reference.set_name("day reference") def add_default_date_fields(t): @@ -259,13 +263,13 @@ def add_default_date_fields(t): t["date_delta"] = timedelta() -day_reference.addParseAction(add_default_date_fields) +day_reference.add_parse_action(add_default_date_fields) # combine date and time expressions into single overall parser -time_and_day = time_reference + time_ref_present + pp.Optional( - pp.Optional(on_) + day_reference -) | day_reference + pp.Optional(at_ + absolute_time_of_day + time_ref_present) -time_and_day.setName("time and day") +time_and_day = time_reference + time_ref_present + pp.Opt( + pp.Opt(on_) + day_reference +) | day_reference + pp.Opt(at_ + absolute_time_of_day + time_ref_present) +time_and_day.set_name("time and day") # parse actions for total time_and_day expression def save_original_string(s, l, t): @@ -318,12 +322,13 @@ def remove_temp_keys(t): del t[k] -time_and_day.addParseAction(save_original_string, compute_timestamp, remove_temp_keys) +time_and_day.add_parse_action(save_original_string, compute_timestamp, remove_temp_keys) time_expression = time_and_day +# fmt: off def main(): current_time = datetime.now() # test grammar @@ -415,9 +420,7 @@ def main(): "the day after tomorrow": timedelta(days=2) - time_of_day, "tomorrow": timedelta(days=1) - time_of_day, "the day before yesterday": timedelta(days=-2) - time_of_day, - "8am the day after tomorrow": timedelta(days=+2) - - time_of_day - + timedelta(hours=8), + "8am the day after tomorrow": timedelta(days=+2) - time_of_day + timedelta(hours=8), "yesterday": timedelta(days=-1) - time_of_day, "today": -time_of_day, "midnight": -time_of_day, @@ -430,14 +433,13 @@ def main(): "12:15 AM today": -time_of_day + timedelta(minutes=15), "3pm 2 days from today": timedelta(days=2) - time_of_day + timedelta(hours=15), "ten seconds before noon tomorrow": timedelta(days=1) - - time_of_day - + timedelta(hours=12) - + timedelta(seconds=-10), - "20 seconds before noon": -time_of_day - + timedelta(hours=12) - + timedelta(seconds=-20), + - time_of_day + + timedelta(hours=12) + + timedelta(seconds=-10), + "20 seconds before noon": -time_of_day + timedelta(hours=12) + timedelta(seconds=-20), "in 3 days at 5pm": timedelta(days=3) - time_of_day + timedelta(hours=17), } + # fmt: on def verify_offset(instring, parsed): time_epsilon = timedelta(seconds=1) @@ -449,7 +451,19 @@ def verify_offset(instring, parsed): parsed["verify_offset"] = "FAIL" print("(relative to %s)" % datetime.now()) - time_expression.runTests(tests, postParse=verify_offset) + success, report = time_expression.runTests(tests, postParse=verify_offset) + assert success + + fails = [] + for test, rpt in report: + if rpt.get("verify_offset", "PASS") != "PASS": + fails.append((test, rpt)) + + if fails: + print("\nFAILED") + print("\n".join("- " + test for test, rpt in fails)) + + assert not fails if __name__ == "__main__": diff --git a/examples/eval_arith.py b/examples/eval_arith.py index 613e7280..3a19ae04 100644 --- a/examples/eval_arith.py +++ b/examples/eval_arith.py @@ -13,9 +13,9 @@ nums, alphas, Combine, - oneOf, - opAssoc, - infixNotation, + one_of, + OpAssoc, + infix_notation, Literal, ParserElement, ) @@ -143,28 +143,28 @@ def eval(self): variable = Word(alphas, exact=1) operand = real | integer | variable -signop = oneOf("+ -") -multop = oneOf("* /") -plusop = oneOf("+ -") +signop = one_of("+ -") +multop = one_of("* /") +plusop = one_of("+ -") expop = Literal("**") # use parse actions to attach EvalXXX constructors to sub-expressions operand.setParseAction(EvalConstant) -arith_expr = infixNotation( +arith_expr = infix_notation( operand, [ - (signop, 1, opAssoc.RIGHT, EvalSignOp), - (expop, 2, opAssoc.LEFT, EvalPowerOp), - (multop, 2, opAssoc.LEFT, EvalMultOp), - (plusop, 2, opAssoc.LEFT, EvalAddOp), + (signop, 1, OpAssoc.RIGHT, EvalSignOp), + (expop, 2, OpAssoc.LEFT, EvalPowerOp), + (multop, 2, OpAssoc.LEFT, EvalMultOp), + (plusop, 2, OpAssoc.LEFT, EvalAddOp), ], ) -comparisonop = oneOf("< <= > >= != = <> LT GT LE GE EQ NE") -comp_expr = infixNotation( +comparisonop = one_of("< <= > >= != = <> LT GT LE GE EQ NE") +comp_expr = infix_notation( arith_expr, [ - (comparisonop, 2, opAssoc.LEFT, EvalComparisonOp), + (comparisonop, 2, OpAssoc.LEFT, EvalComparisonOp), ], ) diff --git a/examples/excelExpr.py b/examples/excelExpr.py deleted file mode 100644 index 311a5a41..00000000 --- a/examples/excelExpr.py +++ /dev/null @@ -1,106 +0,0 @@ -# excelExpr.py -# -# Copyright 2010, Paul McGuire -# -# A partial implementation of a parser of Excel formula expressions. -# -from pyparsing import ( - CaselessKeyword, - Suppress, - Word, - alphas, - alphanums, - nums, - Optional, - Group, - oneOf, - Forward, - infixNotation, - opAssoc, - dblQuotedString, - delimitedList, - Combine, - Literal, - QuotedString, - ParserElement, - pyparsing_common as ppc, -) - -ParserElement.enablePackrat() - -EQ, LPAR, RPAR, COLON, COMMA = map(Suppress, "=():,") -EXCL, DOLLAR = map(Literal, "!$") -sheetRef = Word(alphas, alphanums) | QuotedString("'", escQuote="''") -colRef = Optional(DOLLAR) + Word(alphas, max=2) -rowRef = Optional(DOLLAR) + Word(nums) -cellRef = Combine( - Group(Optional(sheetRef + EXCL)("sheet") + colRef("col") + rowRef("row")) -) - -cellRange = ( - Group(cellRef("start") + COLON + cellRef("end"))("range") - | cellRef - | Word(alphas, alphanums) -) - -expr = Forward() - -COMPARISON_OP = oneOf("< = > >= <= != <>") -condExpr = expr + COMPARISON_OP + expr - -ifFunc = ( - CaselessKeyword("if") - - LPAR - + Group(condExpr)("condition") - + COMMA - + Group(expr)("if_true") - + COMMA - + Group(expr)("if_false") - + RPAR -) - - -def stat_function(name): - return Group(CaselessKeyword(name) + Group(LPAR + delimitedList(expr) + RPAR)) - - -sumFunc = stat_function("sum") -minFunc = stat_function("min") -maxFunc = stat_function("max") -aveFunc = stat_function("ave") -funcCall = ifFunc | sumFunc | minFunc | maxFunc | aveFunc - -multOp = oneOf("* /") -addOp = oneOf("+ -") -numericLiteral = ppc.number -operand = numericLiteral | funcCall | cellRange | cellRef -arithExpr = infixNotation( - operand, - [ - (multOp, 2, opAssoc.LEFT), - (addOp, 2, opAssoc.LEFT), - ], -) - -textOperand = dblQuotedString | cellRef -textExpr = infixNotation( - textOperand, - [ - ("&", 2, opAssoc.LEFT), - ], -) - -expr <<= arithExpr | textExpr - - -(EQ + expr).runTests( - """\ - =3*A7+5 - =3*Sheet1!$A$7+5 - =3*'Sheet 1'!$A$7+5 - =3*'O''Reilly''s sheet'!$A$7+5 - =if(Sum(A1:A25)>42,Min(B1:B25),if(Sum(C1:C25)>3.14, (Min(C1:C25)+3)*18,Max(B1:B25))) - =sum(a1:a25,10,min(b1,c2,d3)) - =if("T"&a2="TTime", "Ready", "Not ready") -""" -) diff --git a/examples/excel_expr.py b/examples/excel_expr.py new file mode 100644 index 00000000..0877e543 --- /dev/null +++ b/examples/excel_expr.py @@ -0,0 +1,93 @@ +# excelExpr.py +# +# Copyright 2010, Paul McGuire +# +# A partial implementation of a parser of Excel formula expressions. +# +import pyparsing as pp +ppc = pp.common + +pp.ParserElement.enable_packrat() + +EQ, LPAR, RPAR, COLON, COMMA = pp.Suppress.using_each("=():,") +EXCL, DOLLAR = pp.Literal.using_each("!$") +sheet_ref = pp.Word(pp.alphas, pp.alphanums) | pp.QuotedString("'", escQuote="''") +col_ref = pp.Opt(DOLLAR) + pp.Word(pp.alphas, max=2) +row_ref = pp.Opt(DOLLAR) + pp.Word(pp.nums) +cell_ref = pp.Combine( + pp.Group(pp.Opt(sheet_ref + EXCL)("sheet") + col_ref("col") + row_ref("row")) +) + +cell_range = ( + pp.Group(cell_ref("start") + COLON + cell_ref("end"))("range") + | cell_ref + | pp.Word(pp.alphas, pp.alphanums) +) + +expr = pp.Forward() + +COMPARISON_OP = pp.one_of("< = > >= <= != <>") +cond_expr = expr + COMPARISON_OP + expr + +if_func = ( + pp.CaselessKeyword("if") + - LPAR + + pp.Group(cond_expr)("condition") + + COMMA + + pp.Group(expr)("if_true") + + COMMA + + pp.Group(expr)("if_false") + + RPAR +) + + +def stat_function(name): + return pp.Group(pp.CaselessKeyword(name) + pp.Group(LPAR + pp.DelimitedList(expr) + RPAR)) + + +sum_func = stat_function("sum") +min_func = stat_function("min") +max_func = stat_function("max") +ave_func = stat_function("ave") +func_call = if_func | sum_func | min_func | max_func | ave_func + +mult_op = pp.one_of("* /") +add_op = pp.one_of("+ -") +numeric_literal = ppc.number +operand = numeric_literal | func_call | cell_range | cell_ref +arith_expr = pp.infix_notation( + operand, + [ + (mult_op, 2, pp.OpAssoc.LEFT), + (add_op, 2, pp.OpAssoc.LEFT), + ], +) + +text_operand = pp.dbl_quoted_string | cell_ref +text_expr = pp.infix_notation( + text_operand, + [ + ("&", 2, pp.OpAssoc.LEFT), + ], +) + +expr <<= arith_expr | text_expr + + +def main(): + success, report = (EQ + expr).run_tests( + """\ + =3*A7+5 + =3*Sheet1!$A$7+5 + =3*'Sheet 1'!$A$7+5 + =3*'O''Reilly''s sheet'!$A$7+5 + =if(Sum(A1:A25)>42,Min(B1:B25),if(Sum(C1:C25)>3.14, (Min(C1:C25)+3)*18,Max(B1:B25))) + =sum(a1:a25,10,min(b1,c2,d3)) + =if("T"&a2="TTime", "Ready", "Not ready") + """ + ) + assert success + + +if __name__ == '__main__': + main() diff --git a/examples/gen_ctypes.py b/examples/gen_ctypes.py index 176644f3..0eb0b7b7 100644 --- a/examples/gen_ctypes.py +++ b/examples/gen_ctypes.py @@ -44,16 +44,16 @@ "void": "None", } -LPAR, RPAR, LBRACE, RBRACE, COMMA, SEMI = map(Suppress, "(){},;") -ident = Word(alphas, alphanums + "_") +LPAR, RPAR, LBRACE, RBRACE, COMMA, SEMI = Suppress.using_each("(){},;") +ident = pyparsing_common.identifier integer = Regex(r"[+-]?\d+") hexinteger = Regex(r"0x[0-9a-fA-F]+") const = Suppress("const") -primitiveType = oneOf(t for t in typemap if not t.endswith("*")) +primitiveType = one_of(t for t in typemap if not t.endswith("*")) structType = Suppress("struct") + ident vartype = ( - Optional(const) + (primitiveType | structType | ident) + Optional(Word("*")("ptr")) + Opt(const) + (primitiveType | structType | ident) + Opt(Word("*")("ptr")) ) @@ -64,14 +64,14 @@ def normalizetype(t): # ~ return ret -vartype.setParseAction(normalizetype) +vartype.set_parse_action(normalizetype) -arg = Group(vartype("argtype") + Optional(ident("argname"))) +arg = Group(vartype("argtype") + Opt(ident("argname"))) func_def = ( vartype("fn_type") + ident("fn_name") + LPAR - + Optional(delimitedList(arg | "..."))("fn_args") + + Opt(DelimitedList(arg | "..."))("fn_args") + RPAR + SEMI ) @@ -82,7 +82,7 @@ def derivefields(t): t["varargs"] = True -func_def.setParseAction(derivefields) +func_def.set_parse_action(derivefields) fn_typedef = "typedef" + func_def var_typedef = "typedef" + primitiveType("primType") + ident("name") + SEMI @@ -90,10 +90,10 @@ def derivefields(t): enum_def = ( Keyword("enum") + LBRACE - + delimitedList(Group(ident("name") + "=" + (hexinteger | integer)("value")))( + + DelimitedList(Group(ident("name") + "=" + (hexinteger | integer)("value")))( "evalues" ) - + Optional(COMMA) + + Opt(COMMA) + RBRACE ) @@ -135,13 +135,13 @@ def typeAsCtypes(typestr): # scan input header text for primitive typedefs -for td, _, _ in var_typedef.scanString(c_header): +for td, _, _ in var_typedef.scan_string(c_header): typedefs.append((td.name, td.primType)) # add typedef type to typemap to map to itself typemap[td.name] = td.name # scan input header text for function typedefs -fn_typedefs = fn_typedef.searchString(c_header) +fn_typedefs = fn_typedef.search_string(c_header) # add each function typedef to typemap to map to itself for fntd in fn_typedefs: typemap[fntd.fn_name] = fntd.fn_name @@ -149,7 +149,7 @@ def typeAsCtypes(typestr): # scan input header text, and keep running list of user-defined types for fn, _, _ in ( cStyleComment.suppress() | fn_typedef.suppress() | func_def -).scanString(c_header): +).scan_string(c_header): if not fn: continue getUDType(fn.fn_type) @@ -160,8 +160,8 @@ def typeAsCtypes(typestr): functions.append(fn) # scan input header text for enums -enum_def.ignore(cppStyleComment) -for en_, _, _ in enum_def.scanString(c_header): +enum_def.ignore(cpp_style_comment) +for en_, _, _ in enum_def.scan_string(c_header): for ev in en_.evalues: enum_constants.append((ev.name, ev.value)) diff --git a/examples/getNTPserversNew.py b/examples/getNTPserversNew.py index 5fcd9d15..8c4c94f3 100644 --- a/examples/getNTPserversNew.py +++ b/examples/getNTPserversNew.py @@ -13,8 +13,8 @@ integer = pp.Word(pp.nums) ipAddress = ppc.ipv4_address() -hostname = pp.delimitedList(pp.Word(pp.alphas, pp.alphanums + "-_"), ".", combine=True) -tdStart, tdEnd = pp.makeHTMLTags("td") +hostname = pp.DelimitedList(pp.Word(pp.alphas, pp.alphanums + "-_"), ".", combine=True) +tdStart, tdEnd = pp.make_html_tags("td") timeServerPattern = ( tdStart + hostname("hostname") @@ -33,6 +33,6 @@ serverListHTML = serverListPage.read().decode("UTF-8") addrs = {} -for srvr, startloc, endloc in timeServerPattern.scanString(serverListHTML): - print("{} ({}) - {}".format(srvr.ipAddr, srvr.hostname.strip(), srvr.loc.strip())) +for srvr, startloc, endloc in timeServerPattern.scan_string(serverListHTML): + print(f"{srvr.ipAddr} ({srvr.hostname.strip()}) - {srvr.loc.strip()}") addrs[srvr.ipAddr] = srvr.loc diff --git a/examples/greeting.py b/examples/greeting.py index 28a534ae..17a7b2ab 100644 --- a/examples/greeting.py +++ b/examples/greeting.py @@ -8,16 +8,16 @@ import pyparsing as pp # define grammar -greet = pp.Word(pp.alphas) + "," + pp.Word(pp.alphas) + pp.oneOf("! ? .") +greet = pp.Word(pp.alphas) + "," + pp.Word(pp.alphas) + pp.one_of("! ? .") # input string hello = "Hello, World!" # parse input string -print(hello, "->", greet.parseString(hello)) +print(hello, "->", greet.parse_string(hello)) # parse a bunch of input strings -greet.runTests( +greet.run_tests( """\ Hello, World! Ahoy, Matey! diff --git a/examples/greetingInGreek.py b/examples/greetingInGreek.py index ed98e9ad..aa8272a6 100644 --- a/examples/greetingInGreek.py +++ b/examples/greetingInGreek.py @@ -15,4 +15,4 @@ hello = "Καλημέρα, κόσμε!" # parse input string -print(greet.parseString(hello)) +print(greet.parse_string(hello)) diff --git a/examples/greetingInKorean.py b/examples/greetingInKorean.py index 00ea9bc9..d2c0b634 100644 --- a/examples/greetingInKorean.py +++ b/examples/greetingInKorean.py @@ -7,14 +7,14 @@ # from pyparsing import Word, pyparsing_unicode as ppu -koreanChars = ppu.Korean.alphas -koreanWord = Word(koreanChars, min=2) +korean_chars = ppu.한국어.alphas +korean_word = Word(korean_chars, min=2) # define grammar -greet = koreanWord + "," + koreanWord + "!" +greet = korean_word + "," + korean_word + "!" # input string hello = "안녕, 여러분!" # "Hello, World!" in Korean # parse input string -print(greet.parseString(hello)) +print(greet.parse_string(hello)) diff --git a/examples/holaMundo.py b/examples/hola_mundo.py similarity index 69% rename from examples/holaMundo.py rename to examples/hola_mundo.py index bb66ca24..d44bb351 100644 --- a/examples/holaMundo.py +++ b/examples/hola_mundo.py @@ -1,67 +1,73 @@ -# escrito por Marco Alfonso, 2004 Noviembre - -# importamos los símbolos requeridos desde el módulo -from pyparsing import ( - Word, - alphas, - oneOf, - nums, - Group, - OneOrMore, - pyparsing_unicode as ppu, -) - -# usamos las letras en latin1, que incluye las como 'ñ', 'á', 'é', etc. -alphas = ppu.Latin1.alphas - -# Aqui decimos que la gramatica "saludo" DEBE contener -# una palabra compuesta de caracteres alfanumericos -# (Word(alphas)) mas una ',' mas otra palabra alfanumerica, -# mas '!' y esos seian nuestros tokens -saludo = Word(alphas) + "," + Word(alphas) + oneOf("! . ?") -tokens = saludo.parseString("Hola, Mundo !") - -# Ahora parseamos una cadena, "Hola, Mundo!", -# el metodo parseString, nos devuelve una lista con los tokens -# encontrados, en caso de no haber errores... -for i, token in enumerate(tokens): - print("Token %d -> %s" % (i, token)) - -# imprimimos cada uno de los tokens Y listooo!!, he aquí a salida -# Token 0 -> Hola -# Token 1 -> , -# Token 2-> Mundo -# Token 3 -> ! - -# ahora cambia el parseador, aceptando saludos con mas que una sola palabra antes que ',' -saludo = Group(OneOrMore(Word(alphas))) + "," + Word(alphas) + oneOf("! . ?") -tokens = saludo.parseString("Hasta mañana, Mundo !") - -for i, token in enumerate(tokens): - print("Token %d -> %s" % (i, token)) - -# Ahora parseamos algunas cadenas, usando el metodo runTests -saludo.runTests( - """\ - Hola, Mundo! - Hasta mañana, Mundo ! -""", - fullDump=False, -) - -# Por supuesto, se pueden "reutilizar" gramáticas, por ejemplo: -numimag = Word(nums) + "i" -numreal = Word(nums) -numcomplex = numreal + "+" + numimag -print(numcomplex.parseString("3+5i")) - -# Funcion para cambiar a complejo numero durante parsear: -def hace_python_complejo(t): - valid_python = "".join(t).replace("i", "j") - return complex(valid_python) - - -numcomplex.setParseAction(hace_python_complejo) -print(numcomplex.parseString("3+5i")) - -# Excelente!!, bueno, los dejo, me voy a seguir tirando código... +# escrito por Marco Alfonso, 2004 Noviembre + +# importamos los símbolos requeridos desde el módulo +from pyparsing import ( + Word, + one_of, + nums, + Group, + OneOrMore, + Opt, + pyparsing_unicode as ppu, +) + +# usamos las letras en latin1, que incluye las como 'ñ', 'á', 'é', etc. +alphas = ppu.Latin1.alphas + +# Aqui decimos que la gramatica "saludo" DEBE contener +# una palabra compuesta de caracteres alfanumericos +# (Word(alphas)) mas una ',' mas otra palabra alfanumerica, +# mas '!' y esos seian nuestros tokens +saludo = Word(alphas) + "," + Word(alphas) + one_of("! . ?") +tokens = saludo.parse_string("Hola, Mundo !") + +# Ahora parseamos una cadena, "Hola, Mundo!", +# el metodo parseString, nos devuelve una lista con los tokens +# encontrados, en caso de no haber errores... +for i, token in enumerate(tokens): + print(f"Token {i} -> {token}") + +# imprimimos cada uno de los tokens Y listooo!!, he aquí a salida +# Token 0 -> Hola +# Token 1 -> , +# Token 2-> Mundo +# Token 3 -> ! + +# ahora cambia el parseador, aceptando saludos con mas que una sola palabra antes que ',' +saludo = Group(OneOrMore(Word(alphas))) + "," + Word(alphas) + one_of("! . ?") +tokens = saludo.parse_string("Hasta mañana, Mundo !") + +for i, token in enumerate(tokens): + print(f"Token {i} -> {token}") + +# Ahora parseamos algunas cadenas, usando el metodo runTests +saludo.run_tests("""\ + Hola, Mundo! + Hasta mañana, Mundo ! + """, + fullDump=False, +) + +# Por supuesto, se pueden "reutilizar" gramáticas, por ejemplo: +numimag = Word(nums) + "i" +numreal = Word(nums) +numcomplex = numimag | numreal + Opt("+" + numimag) + +# Funcion para cambiar a complejo numero durante parsear: +def hace_python_complejo(t): + valid_python = "".join(t).replace("i", "j") + for tipo in (int, complex): + try: + return tipo(valid_python) + except ValueError: + pass + + +numcomplex.set_parse_action(hace_python_complejo) +numcomplex.run_tests("""\ + 3 + 5i + 3+5i +""") + +# Excelente!!, bueno, los dejo, me voy a seguir tirando código... diff --git a/examples/htmlStripper.py b/examples/htmlStripper.py deleted file mode 100644 index 6a209fad..00000000 --- a/examples/htmlStripper.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# htmlStripper.py -# -# Sample code for stripping HTML markup tags and scripts from -# HTML source files. -# -# Copyright (c) 2006, 2016, Paul McGuire -# -from urllib.request import urlopen -from pyparsing import ( - makeHTMLTags, - commonHTMLEntity, - replaceHTMLEntity, - htmlComment, - anyOpenTag, - anyCloseTag, - LineEnd, - replaceWith, -) - -scriptOpen, scriptClose = makeHTMLTags("script") -scriptBody = scriptOpen + scriptOpen.tag_body + scriptClose -commonHTMLEntity.setParseAction(replaceHTMLEntity) - -# get some HTML -targetURL = "https://wiki.python.org/moin/PythonDecoratorLibrary" -with urlopen(targetURL) as targetPage: - targetHTML = targetPage.read().decode("UTF-8") - -# first pass, strip out tags and translate entities -firstPass = ( - (htmlComment | scriptBody | commonHTMLEntity | anyOpenTag | anyCloseTag) - .suppress() - .transformString(targetHTML) -) - -# first pass leaves many blank lines, collapse these down -repeatedNewlines = LineEnd() * (2,) -repeatedNewlines.setParseAction(replaceWith("\n\n")) -secondPass = repeatedNewlines.transformString(firstPass) - -print(secondPass) diff --git a/examples/html_stripper.py b/examples/html_stripper.py new file mode 100644 index 00000000..92d38c75 --- /dev/null +++ b/examples/html_stripper.py @@ -0,0 +1,58 @@ +# +# html_stripper.py +# +# Sample code for stripping HTML markup tags and scripts from +# HTML source files. +# +# Copyright (c) 2006, 2016, 2023, Paul McGuire +# +from urllib.request import urlopen +from pyparsing import ( + LineEnd, + quoted_string, + make_html_tags, + common_html_entity, + replace_html_entity, + html_comment, + any_open_tag, + any_close_tag, + replace_with, +) + +# if