|
6 | 6 | #
|
7 | 7 | from __future__ import annotations
|
8 | 8 |
|
9 |
| -import abc |
10 | 9 | import argparse
|
11 | 10 | import ast
|
12 | 11 | import builtins as bltns
|
13 |
| -import collections |
14 | 12 | import contextlib
|
15 | 13 | import dataclasses as dc
|
16 | 14 | import enum
|
|
57 | 55 | ClassDict, ModuleDict, FunctionKind,
|
58 | 56 | CALLABLE, STATIC_METHOD, CLASS_METHOD, METHOD_INIT, METHOD_NEW,
|
59 | 57 | GETTER, SETTER)
|
| 58 | +from libclinic.language import Language, PythonLanguage |
| 59 | +from libclinic.block_parser import Block, BlockParser |
60 | 60 |
|
61 | 61 |
|
62 | 62 | # TODO:
|
@@ -144,96 +144,6 @@ def __init__(self) -> None:
|
144 | 144 | self.unlock: list[str] = []
|
145 | 145 |
|
146 | 146 |
|
147 |
| -class Language(metaclass=abc.ABCMeta): |
148 |
| - |
149 |
| - start_line = "" |
150 |
| - body_prefix = "" |
151 |
| - stop_line = "" |
152 |
| - checksum_line = "" |
153 |
| - |
154 |
| - def __init__(self, filename: str) -> None: |
155 |
| - self.filename = filename |
156 |
| - |
157 |
| - @abc.abstractmethod |
158 |
| - def render( |
159 |
| - self, |
160 |
| - clinic: Clinic, |
161 |
| - signatures: Iterable[Module | Class | Function] |
162 |
| - ) -> str: |
163 |
| - ... |
164 |
| - |
165 |
| - def parse_line(self, line: str) -> None: |
166 |
| - ... |
167 |
| - |
168 |
| - def validate(self) -> None: |
169 |
| - def assert_only_one( |
170 |
| - attr: str, |
171 |
| - *additional_fields: str |
172 |
| - ) -> None: |
173 |
| - """ |
174 |
| - Ensures that the string found at getattr(self, attr) |
175 |
| - contains exactly one formatter replacement string for |
176 |
| - each valid field. The list of valid fields is |
177 |
| - ['dsl_name'] extended by additional_fields. |
178 |
| -
|
179 |
| - e.g. |
180 |
| - self.fmt = "{dsl_name} {a} {b}" |
181 |
| -
|
182 |
| - # this passes |
183 |
| - self.assert_only_one('fmt', 'a', 'b') |
184 |
| -
|
185 |
| - # this fails, the format string has a {b} in it |
186 |
| - self.assert_only_one('fmt', 'a') |
187 |
| -
|
188 |
| - # this fails, the format string doesn't have a {c} in it |
189 |
| - self.assert_only_one('fmt', 'a', 'b', 'c') |
190 |
| -
|
191 |
| - # this fails, the format string has two {a}s in it, |
192 |
| - # it must contain exactly one |
193 |
| - self.fmt2 = '{dsl_name} {a} {a}' |
194 |
| - self.assert_only_one('fmt2', 'a') |
195 |
| -
|
196 |
| - """ |
197 |
| - fields = ['dsl_name'] |
198 |
| - fields.extend(additional_fields) |
199 |
| - line: str = getattr(self, attr) |
200 |
| - fcf = libclinic.FormatCounterFormatter() |
201 |
| - fcf.format(line) |
202 |
| - def local_fail(should_be_there_but_isnt: bool) -> None: |
203 |
| - if should_be_there_but_isnt: |
204 |
| - fail("{} {} must contain {{{}}} exactly once!".format( |
205 |
| - self.__class__.__name__, attr, name)) |
206 |
| - else: |
207 |
| - fail("{} {} must not contain {{{}}}!".format( |
208 |
| - self.__class__.__name__, attr, name)) |
209 |
| - |
210 |
| - for name, count in fcf.counts.items(): |
211 |
| - if name in fields: |
212 |
| - if count > 1: |
213 |
| - local_fail(True) |
214 |
| - else: |
215 |
| - local_fail(False) |
216 |
| - for name in fields: |
217 |
| - if fcf.counts.get(name) != 1: |
218 |
| - local_fail(True) |
219 |
| - |
220 |
| - assert_only_one('start_line') |
221 |
| - assert_only_one('stop_line') |
222 |
| - |
223 |
| - field = "arguments" if "{arguments}" in self.checksum_line else "checksum" |
224 |
| - assert_only_one('checksum_line', field) |
225 |
| - |
226 |
| - |
227 |
| - |
228 |
| -class PythonLanguage(Language): |
229 |
| - |
230 |
| - language = 'Python' |
231 |
| - start_line = "#/*[{dsl_name} input]" |
232 |
| - body_prefix = "#" |
233 |
| - stop_line = "#[{dsl_name} start generated code]*/" |
234 |
| - checksum_line = "#/*[{dsl_name} end generated code: {arguments}]*/" |
235 |
| - |
236 |
| - |
237 | 147 | ParamTuple = tuple["Parameter", ...]
|
238 | 148 |
|
239 | 149 |
|
@@ -1646,250 +1556,6 @@ def render_function(
|
1646 | 1556 | return clinic.get_destination('block').dump()
|
1647 | 1557 |
|
1648 | 1558 |
|
1649 |
| -@dc.dataclass(slots=True, repr=False) |
1650 |
| -class Block: |
1651 |
| - r""" |
1652 |
| - Represents a single block of text embedded in |
1653 |
| - another file. If dsl_name is None, the block represents |
1654 |
| - verbatim text, raw original text from the file, in |
1655 |
| - which case "input" will be the only non-false member. |
1656 |
| - If dsl_name is not None, the block represents a Clinic |
1657 |
| - block. |
1658 |
| -
|
1659 |
| - input is always str, with embedded \n characters. |
1660 |
| - input represents the original text from the file; |
1661 |
| - if it's a Clinic block, it is the original text with |
1662 |
| - the body_prefix and redundant leading whitespace removed. |
1663 |
| -
|
1664 |
| - dsl_name is either str or None. If str, it's the text |
1665 |
| - found on the start line of the block between the square |
1666 |
| - brackets. |
1667 |
| -
|
1668 |
| - signatures is a list. |
1669 |
| - It may only contain clinic.Module, clinic.Class, and |
1670 |
| - clinic.Function objects. At the moment it should |
1671 |
| - contain at most one of each. |
1672 |
| -
|
1673 |
| - output is either str or None. If str, it's the output |
1674 |
| - from this block, with embedded '\n' characters. |
1675 |
| -
|
1676 |
| - indent is a str. It's the leading whitespace |
1677 |
| - that was found on every line of input. (If body_prefix is |
1678 |
| - not empty, this is the indent *after* removing the |
1679 |
| - body_prefix.) |
1680 |
| -
|
1681 |
| - "indent" is different from the concept of "preindent" |
1682 |
| - (which is not stored as state on Block objects). |
1683 |
| - "preindent" is the whitespace that |
1684 |
| - was found in front of every line of input *before* the |
1685 |
| - "body_prefix" (see the Language object). If body_prefix |
1686 |
| - is empty, preindent must always be empty too. |
1687 |
| -
|
1688 |
| - To illustrate the difference between "indent" and "preindent": |
1689 |
| -
|
1690 |
| - Assume that '_' represents whitespace. |
1691 |
| - If the block processed was in a Python file, and looked like this: |
1692 |
| - ____#/*[python] |
1693 |
| - ____#__for a in range(20): |
1694 |
| - ____#____print(a) |
1695 |
| - ____#[python]*/ |
1696 |
| - "preindent" would be "____" and "indent" would be "__". |
1697 |
| -
|
1698 |
| - """ |
1699 |
| - input: str |
1700 |
| - dsl_name: str | None = None |
1701 |
| - signatures: list[Module | Class | Function] = dc.field(default_factory=list) |
1702 |
| - output: Any = None # TODO: Very dynamic; probably untypeable in its current form? |
1703 |
| - indent: str = '' |
1704 |
| - |
1705 |
| - def __repr__(self) -> str: |
1706 |
| - dsl_name = self.dsl_name or "text" |
1707 |
| - def summarize(s: object) -> str: |
1708 |
| - s = repr(s) |
1709 |
| - if len(s) > 30: |
1710 |
| - return s[:26] + "..." + s[0] |
1711 |
| - return s |
1712 |
| - parts = ( |
1713 |
| - repr(dsl_name), |
1714 |
| - f"input={summarize(self.input)}", |
1715 |
| - f"output={summarize(self.output)}" |
1716 |
| - ) |
1717 |
| - return f"<clinic.Block {' '.join(parts)}>" |
1718 |
| - |
1719 |
| - |
1720 |
| -class BlockParser: |
1721 |
| - """ |
1722 |
| - Block-oriented parser for Argument Clinic. |
1723 |
| - Iterator, yields Block objects. |
1724 |
| - """ |
1725 |
| - |
1726 |
| - def __init__( |
1727 |
| - self, |
1728 |
| - input: str, |
1729 |
| - language: Language, |
1730 |
| - *, |
1731 |
| - verify: bool = True |
1732 |
| - ) -> None: |
1733 |
| - """ |
1734 |
| - "input" should be a str object |
1735 |
| - with embedded \n characters. |
1736 |
| -
|
1737 |
| - "language" should be a Language object. |
1738 |
| - """ |
1739 |
| - language.validate() |
1740 |
| - |
1741 |
| - self.input = collections.deque(reversed(input.splitlines(keepends=True))) |
1742 |
| - self.block_start_line_number = self.line_number = 0 |
1743 |
| - |
1744 |
| - self.language = language |
1745 |
| - before, _, after = language.start_line.partition('{dsl_name}') |
1746 |
| - assert _ == '{dsl_name}' |
1747 |
| - self.find_start_re = libclinic.create_regex(before, after, |
1748 |
| - whole_line=False) |
1749 |
| - self.start_re = libclinic.create_regex(before, after) |
1750 |
| - self.verify = verify |
1751 |
| - self.last_checksum_re: re.Pattern[str] | None = None |
1752 |
| - self.last_dsl_name: str | None = None |
1753 |
| - self.dsl_name: str | None = None |
1754 |
| - self.first_block = True |
1755 |
| - |
1756 |
| - def __iter__(self) -> BlockParser: |
1757 |
| - return self |
1758 |
| - |
1759 |
| - def __next__(self) -> Block: |
1760 |
| - while True: |
1761 |
| - if not self.input: |
1762 |
| - raise StopIteration |
1763 |
| - |
1764 |
| - if self.dsl_name: |
1765 |
| - try: |
1766 |
| - return_value = self.parse_clinic_block(self.dsl_name) |
1767 |
| - except ClinicError as exc: |
1768 |
| - exc.filename = self.language.filename |
1769 |
| - exc.lineno = self.line_number |
1770 |
| - raise |
1771 |
| - self.dsl_name = None |
1772 |
| - self.first_block = False |
1773 |
| - return return_value |
1774 |
| - block = self.parse_verbatim_block() |
1775 |
| - if self.first_block and not block.input: |
1776 |
| - continue |
1777 |
| - self.first_block = False |
1778 |
| - return block |
1779 |
| - |
1780 |
| - |
1781 |
| - def is_start_line(self, line: str) -> str | None: |
1782 |
| - match = self.start_re.match(line.lstrip()) |
1783 |
| - return match.group(1) if match else None |
1784 |
| - |
1785 |
| - def _line(self, lookahead: bool = False) -> str: |
1786 |
| - self.line_number += 1 |
1787 |
| - line = self.input.pop() |
1788 |
| - if not lookahead: |
1789 |
| - self.language.parse_line(line) |
1790 |
| - return line |
1791 |
| - |
1792 |
| - def parse_verbatim_block(self) -> Block: |
1793 |
| - lines = [] |
1794 |
| - self.block_start_line_number = self.line_number |
1795 |
| - |
1796 |
| - while self.input: |
1797 |
| - line = self._line() |
1798 |
| - dsl_name = self.is_start_line(line) |
1799 |
| - if dsl_name: |
1800 |
| - self.dsl_name = dsl_name |
1801 |
| - break |
1802 |
| - lines.append(line) |
1803 |
| - |
1804 |
| - return Block("".join(lines)) |
1805 |
| - |
1806 |
| - def parse_clinic_block(self, dsl_name: str) -> Block: |
1807 |
| - in_lines = [] |
1808 |
| - self.block_start_line_number = self.line_number + 1 |
1809 |
| - stop_line = self.language.stop_line.format(dsl_name=dsl_name) |
1810 |
| - body_prefix = self.language.body_prefix.format(dsl_name=dsl_name) |
1811 |
| - |
1812 |
| - def is_stop_line(line: str) -> bool: |
1813 |
| - # make sure to recognize stop line even if it |
1814 |
| - # doesn't end with EOL (it could be the very end of the file) |
1815 |
| - if line.startswith(stop_line): |
1816 |
| - remainder = line.removeprefix(stop_line) |
1817 |
| - if remainder and not remainder.isspace(): |
1818 |
| - fail(f"Garbage after stop line: {remainder!r}") |
1819 |
| - return True |
1820 |
| - else: |
1821 |
| - # gh-92256: don't allow incorrectly formatted stop lines |
1822 |
| - if line.lstrip().startswith(stop_line): |
1823 |
| - fail(f"Whitespace is not allowed before the stop line: {line!r}") |
1824 |
| - return False |
1825 |
| - |
1826 |
| - # consume body of program |
1827 |
| - while self.input: |
1828 |
| - line = self._line() |
1829 |
| - if is_stop_line(line) or self.is_start_line(line): |
1830 |
| - break |
1831 |
| - if body_prefix: |
1832 |
| - line = line.lstrip() |
1833 |
| - assert line.startswith(body_prefix) |
1834 |
| - line = line.removeprefix(body_prefix) |
1835 |
| - in_lines.append(line) |
1836 |
| - |
1837 |
| - # consume output and checksum line, if present. |
1838 |
| - if self.last_dsl_name == dsl_name: |
1839 |
| - checksum_re = self.last_checksum_re |
1840 |
| - else: |
1841 |
| - before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}') |
1842 |
| - assert _ == '{arguments}' |
1843 |
| - checksum_re = libclinic.create_regex(before, after, word=False) |
1844 |
| - self.last_dsl_name = dsl_name |
1845 |
| - self.last_checksum_re = checksum_re |
1846 |
| - assert checksum_re is not None |
1847 |
| - |
1848 |
| - # scan forward for checksum line |
1849 |
| - out_lines = [] |
1850 |
| - arguments = None |
1851 |
| - while self.input: |
1852 |
| - line = self._line(lookahead=True) |
1853 |
| - match = checksum_re.match(line.lstrip()) |
1854 |
| - arguments = match.group(1) if match else None |
1855 |
| - if arguments: |
1856 |
| - break |
1857 |
| - out_lines.append(line) |
1858 |
| - if self.is_start_line(line): |
1859 |
| - break |
1860 |
| - |
1861 |
| - output: str | None |
1862 |
| - output = "".join(out_lines) |
1863 |
| - if arguments: |
1864 |
| - d = {} |
1865 |
| - for field in shlex.split(arguments): |
1866 |
| - name, equals, value = field.partition('=') |
1867 |
| - if not equals: |
1868 |
| - fail(f"Mangled Argument Clinic marker line: {line!r}") |
1869 |
| - d[name.strip()] = value.strip() |
1870 |
| - |
1871 |
| - if self.verify: |
1872 |
| - if 'input' in d: |
1873 |
| - checksum = d['output'] |
1874 |
| - else: |
1875 |
| - checksum = d['checksum'] |
1876 |
| - |
1877 |
| - computed = libclinic.compute_checksum(output, len(checksum)) |
1878 |
| - if checksum != computed: |
1879 |
| - fail("Checksum mismatch! " |
1880 |
| - f"Expected {checksum!r}, computed {computed!r}. " |
1881 |
| - "Suggested fix: remove all generated code including " |
1882 |
| - "the end marker, or use the '-f' option.") |
1883 |
| - else: |
1884 |
| - # put back output |
1885 |
| - output_lines = output.splitlines(keepends=True) |
1886 |
| - self.line_number -= len(output_lines) |
1887 |
| - self.input.extend(reversed(output_lines)) |
1888 |
| - output = None |
1889 |
| - |
1890 |
| - return Block("".join(in_lines), dsl_name, output=output) |
1891 |
| - |
1892 |
| - |
1893 | 1559 | @dc.dataclass(slots=True, frozen=True)
|
1894 | 1560 | class Include:
|
1895 | 1561 | """
|
|
0 commit comments