Skip to content

Commit b54d7c8

Browse files
authored
gh-113317, AC: Add libclinic.block_parser module (#116819)
* Move Block and BlockParser classes to a new libclinic.block_parser module. * Move Language and PythonLanguage classes to a new libclinic.language module.
1 parent bae6579 commit b54d7c8

File tree

3 files changed

+361
-336
lines changed

3 files changed

+361
-336
lines changed

Tools/clinic/clinic.py

+2-336
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@
66
#
77
from __future__ import annotations
88

9-
import abc
109
import argparse
1110
import ast
1211
import builtins as bltns
13-
import collections
1412
import contextlib
1513
import dataclasses as dc
1614
import enum
@@ -57,6 +55,8 @@
5755
ClassDict, ModuleDict, FunctionKind,
5856
CALLABLE, STATIC_METHOD, CLASS_METHOD, METHOD_INIT, METHOD_NEW,
5957
GETTER, SETTER)
58+
from libclinic.language import Language, PythonLanguage
59+
from libclinic.block_parser import Block, BlockParser
6060

6161

6262
# TODO:
@@ -144,96 +144,6 @@ def __init__(self) -> None:
144144
self.unlock: list[str] = []
145145

146146

147-
class Language(metaclass=abc.ABCMeta):
148-
149-
start_line = ""
150-
body_prefix = ""
151-
stop_line = ""
152-
checksum_line = ""
153-
154-
def __init__(self, filename: str) -> None:
155-
self.filename = filename
156-
157-
@abc.abstractmethod
158-
def render(
159-
self,
160-
clinic: Clinic,
161-
signatures: Iterable[Module | Class | Function]
162-
) -> str:
163-
...
164-
165-
def parse_line(self, line: str) -> None:
166-
...
167-
168-
def validate(self) -> None:
169-
def assert_only_one(
170-
attr: str,
171-
*additional_fields: str
172-
) -> None:
173-
"""
174-
Ensures that the string found at getattr(self, attr)
175-
contains exactly one formatter replacement string for
176-
each valid field. The list of valid fields is
177-
['dsl_name'] extended by additional_fields.
178-
179-
e.g.
180-
self.fmt = "{dsl_name} {a} {b}"
181-
182-
# this passes
183-
self.assert_only_one('fmt', 'a', 'b')
184-
185-
# this fails, the format string has a {b} in it
186-
self.assert_only_one('fmt', 'a')
187-
188-
# this fails, the format string doesn't have a {c} in it
189-
self.assert_only_one('fmt', 'a', 'b', 'c')
190-
191-
# this fails, the format string has two {a}s in it,
192-
# it must contain exactly one
193-
self.fmt2 = '{dsl_name} {a} {a}'
194-
self.assert_only_one('fmt2', 'a')
195-
196-
"""
197-
fields = ['dsl_name']
198-
fields.extend(additional_fields)
199-
line: str = getattr(self, attr)
200-
fcf = libclinic.FormatCounterFormatter()
201-
fcf.format(line)
202-
def local_fail(should_be_there_but_isnt: bool) -> None:
203-
if should_be_there_but_isnt:
204-
fail("{} {} must contain {{{}}} exactly once!".format(
205-
self.__class__.__name__, attr, name))
206-
else:
207-
fail("{} {} must not contain {{{}}}!".format(
208-
self.__class__.__name__, attr, name))
209-
210-
for name, count in fcf.counts.items():
211-
if name in fields:
212-
if count > 1:
213-
local_fail(True)
214-
else:
215-
local_fail(False)
216-
for name in fields:
217-
if fcf.counts.get(name) != 1:
218-
local_fail(True)
219-
220-
assert_only_one('start_line')
221-
assert_only_one('stop_line')
222-
223-
field = "arguments" if "{arguments}" in self.checksum_line else "checksum"
224-
assert_only_one('checksum_line', field)
225-
226-
227-
228-
class PythonLanguage(Language):
229-
230-
language = 'Python'
231-
start_line = "#/*[{dsl_name} input]"
232-
body_prefix = "#"
233-
stop_line = "#[{dsl_name} start generated code]*/"
234-
checksum_line = "#/*[{dsl_name} end generated code: {arguments}]*/"
235-
236-
237147
ParamTuple = tuple["Parameter", ...]
238148

239149

@@ -1646,250 +1556,6 @@ def render_function(
16461556
return clinic.get_destination('block').dump()
16471557

16481558

1649-
@dc.dataclass(slots=True, repr=False)
1650-
class Block:
1651-
r"""
1652-
Represents a single block of text embedded in
1653-
another file. If dsl_name is None, the block represents
1654-
verbatim text, raw original text from the file, in
1655-
which case "input" will be the only non-false member.
1656-
If dsl_name is not None, the block represents a Clinic
1657-
block.
1658-
1659-
input is always str, with embedded \n characters.
1660-
input represents the original text from the file;
1661-
if it's a Clinic block, it is the original text with
1662-
the body_prefix and redundant leading whitespace removed.
1663-
1664-
dsl_name is either str or None. If str, it's the text
1665-
found on the start line of the block between the square
1666-
brackets.
1667-
1668-
signatures is a list.
1669-
It may only contain clinic.Module, clinic.Class, and
1670-
clinic.Function objects. At the moment it should
1671-
contain at most one of each.
1672-
1673-
output is either str or None. If str, it's the output
1674-
from this block, with embedded '\n' characters.
1675-
1676-
indent is a str. It's the leading whitespace
1677-
that was found on every line of input. (If body_prefix is
1678-
not empty, this is the indent *after* removing the
1679-
body_prefix.)
1680-
1681-
"indent" is different from the concept of "preindent"
1682-
(which is not stored as state on Block objects).
1683-
"preindent" is the whitespace that
1684-
was found in front of every line of input *before* the
1685-
"body_prefix" (see the Language object). If body_prefix
1686-
is empty, preindent must always be empty too.
1687-
1688-
To illustrate the difference between "indent" and "preindent":
1689-
1690-
Assume that '_' represents whitespace.
1691-
If the block processed was in a Python file, and looked like this:
1692-
____#/*[python]
1693-
____#__for a in range(20):
1694-
____#____print(a)
1695-
____#[python]*/
1696-
"preindent" would be "____" and "indent" would be "__".
1697-
1698-
"""
1699-
input: str
1700-
dsl_name: str | None = None
1701-
signatures: list[Module | Class | Function] = dc.field(default_factory=list)
1702-
output: Any = None # TODO: Very dynamic; probably untypeable in its current form?
1703-
indent: str = ''
1704-
1705-
def __repr__(self) -> str:
1706-
dsl_name = self.dsl_name or "text"
1707-
def summarize(s: object) -> str:
1708-
s = repr(s)
1709-
if len(s) > 30:
1710-
return s[:26] + "..." + s[0]
1711-
return s
1712-
parts = (
1713-
repr(dsl_name),
1714-
f"input={summarize(self.input)}",
1715-
f"output={summarize(self.output)}"
1716-
)
1717-
return f"<clinic.Block {' '.join(parts)}>"
1718-
1719-
1720-
class BlockParser:
1721-
"""
1722-
Block-oriented parser for Argument Clinic.
1723-
Iterator, yields Block objects.
1724-
"""
1725-
1726-
def __init__(
1727-
self,
1728-
input: str,
1729-
language: Language,
1730-
*,
1731-
verify: bool = True
1732-
) -> None:
1733-
"""
1734-
"input" should be a str object
1735-
with embedded \n characters.
1736-
1737-
"language" should be a Language object.
1738-
"""
1739-
language.validate()
1740-
1741-
self.input = collections.deque(reversed(input.splitlines(keepends=True)))
1742-
self.block_start_line_number = self.line_number = 0
1743-
1744-
self.language = language
1745-
before, _, after = language.start_line.partition('{dsl_name}')
1746-
assert _ == '{dsl_name}'
1747-
self.find_start_re = libclinic.create_regex(before, after,
1748-
whole_line=False)
1749-
self.start_re = libclinic.create_regex(before, after)
1750-
self.verify = verify
1751-
self.last_checksum_re: re.Pattern[str] | None = None
1752-
self.last_dsl_name: str | None = None
1753-
self.dsl_name: str | None = None
1754-
self.first_block = True
1755-
1756-
def __iter__(self) -> BlockParser:
1757-
return self
1758-
1759-
def __next__(self) -> Block:
1760-
while True:
1761-
if not self.input:
1762-
raise StopIteration
1763-
1764-
if self.dsl_name:
1765-
try:
1766-
return_value = self.parse_clinic_block(self.dsl_name)
1767-
except ClinicError as exc:
1768-
exc.filename = self.language.filename
1769-
exc.lineno = self.line_number
1770-
raise
1771-
self.dsl_name = None
1772-
self.first_block = False
1773-
return return_value
1774-
block = self.parse_verbatim_block()
1775-
if self.first_block and not block.input:
1776-
continue
1777-
self.first_block = False
1778-
return block
1779-
1780-
1781-
def is_start_line(self, line: str) -> str | None:
1782-
match = self.start_re.match(line.lstrip())
1783-
return match.group(1) if match else None
1784-
1785-
def _line(self, lookahead: bool = False) -> str:
1786-
self.line_number += 1
1787-
line = self.input.pop()
1788-
if not lookahead:
1789-
self.language.parse_line(line)
1790-
return line
1791-
1792-
def parse_verbatim_block(self) -> Block:
1793-
lines = []
1794-
self.block_start_line_number = self.line_number
1795-
1796-
while self.input:
1797-
line = self._line()
1798-
dsl_name = self.is_start_line(line)
1799-
if dsl_name:
1800-
self.dsl_name = dsl_name
1801-
break
1802-
lines.append(line)
1803-
1804-
return Block("".join(lines))
1805-
1806-
def parse_clinic_block(self, dsl_name: str) -> Block:
1807-
in_lines = []
1808-
self.block_start_line_number = self.line_number + 1
1809-
stop_line = self.language.stop_line.format(dsl_name=dsl_name)
1810-
body_prefix = self.language.body_prefix.format(dsl_name=dsl_name)
1811-
1812-
def is_stop_line(line: str) -> bool:
1813-
# make sure to recognize stop line even if it
1814-
# doesn't end with EOL (it could be the very end of the file)
1815-
if line.startswith(stop_line):
1816-
remainder = line.removeprefix(stop_line)
1817-
if remainder and not remainder.isspace():
1818-
fail(f"Garbage after stop line: {remainder!r}")
1819-
return True
1820-
else:
1821-
# gh-92256: don't allow incorrectly formatted stop lines
1822-
if line.lstrip().startswith(stop_line):
1823-
fail(f"Whitespace is not allowed before the stop line: {line!r}")
1824-
return False
1825-
1826-
# consume body of program
1827-
while self.input:
1828-
line = self._line()
1829-
if is_stop_line(line) or self.is_start_line(line):
1830-
break
1831-
if body_prefix:
1832-
line = line.lstrip()
1833-
assert line.startswith(body_prefix)
1834-
line = line.removeprefix(body_prefix)
1835-
in_lines.append(line)
1836-
1837-
# consume output and checksum line, if present.
1838-
if self.last_dsl_name == dsl_name:
1839-
checksum_re = self.last_checksum_re
1840-
else:
1841-
before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}')
1842-
assert _ == '{arguments}'
1843-
checksum_re = libclinic.create_regex(before, after, word=False)
1844-
self.last_dsl_name = dsl_name
1845-
self.last_checksum_re = checksum_re
1846-
assert checksum_re is not None
1847-
1848-
# scan forward for checksum line
1849-
out_lines = []
1850-
arguments = None
1851-
while self.input:
1852-
line = self._line(lookahead=True)
1853-
match = checksum_re.match(line.lstrip())
1854-
arguments = match.group(1) if match else None
1855-
if arguments:
1856-
break
1857-
out_lines.append(line)
1858-
if self.is_start_line(line):
1859-
break
1860-
1861-
output: str | None
1862-
output = "".join(out_lines)
1863-
if arguments:
1864-
d = {}
1865-
for field in shlex.split(arguments):
1866-
name, equals, value = field.partition('=')
1867-
if not equals:
1868-
fail(f"Mangled Argument Clinic marker line: {line!r}")
1869-
d[name.strip()] = value.strip()
1870-
1871-
if self.verify:
1872-
if 'input' in d:
1873-
checksum = d['output']
1874-
else:
1875-
checksum = d['checksum']
1876-
1877-
computed = libclinic.compute_checksum(output, len(checksum))
1878-
if checksum != computed:
1879-
fail("Checksum mismatch! "
1880-
f"Expected {checksum!r}, computed {computed!r}. "
1881-
"Suggested fix: remove all generated code including "
1882-
"the end marker, or use the '-f' option.")
1883-
else:
1884-
# put back output
1885-
output_lines = output.splitlines(keepends=True)
1886-
self.line_number -= len(output_lines)
1887-
self.input.extend(reversed(output_lines))
1888-
output = None
1889-
1890-
return Block("".join(in_lines), dsl_name, output=output)
1891-
1892-
18931559
@dc.dataclass(slots=True, frozen=True)
18941560
class Include:
18951561
"""

0 commit comments

Comments
 (0)