Skip to content

Commit 8f5e9bd

Browse files
authored
(Nearly) complete dot_parser coverage (#482)
* parser: remove unused get_port() * Test repr for P_AttrList and DefaultStatement * Parser: Add expand_attr_lists() A helper function for `ParserElement`s that can accept `attr_list` as part of their grammar. Assigning a results_name to the `attr_list` component and passing it to this function will return either a combined dict of all the attributes in all parsed attr_lists, or an empty dict if none were found. This can then be included in `pydot.core` class constructor calls via `**attrs`. * Parser: Improve defaults node parsing Rename `attr_stmt` to `default_stmt`, apply results names, and rewrite `push_default_stmt` to make use of `expand_attr_lists()` * Parser: Improve subgraph parsing Again, add results names to grammar and use `expand_attr_lists()` * Parser: Improve edge parsing Slightly more complex than the previous parsing improvements, due to the complexity present in `push_edge_stmt()`. It's now far _less_ complex, and drops a bunch of ugly hackery. Still, the meat of the change is the same: add results names, use `expand_attr_lists()`. * Parser: improve node parsing This one's kind of a big deal. Yes, add results names and make use of `apply_attr_lists()`. But beyond that, all parsing of `port` values is eliminated — the `node_id` is now defined as a `DelimitedList`, consisting of between 1 and 3 `ID` strings separated by colons. The `DelimitedList` parser element will combine them into a single string. We were always passing the node name strings in to the `pydot.core.Node` constructor as a single string, anyway -- the logic to parse out port values lives in _that_ code. So there's no reason to duplicate it in the parser, other than to validate that the string parsed has a _plausible_ format. As a result, `do_node_ports()` is dropped as it's no longer ever called. * Parser: Clean up update_parent_graph_hierarchy The function contained a ton of dead code, being far too complex for its only REAL purpose: To ensure that subgraphs on complex edges have the correct parent graph hierarchy. (The obvious question here would be: "Wait, aren't those subgraphs stored as `FrozenDict`s? How can they be "updated"? Well, it turns out each `FrozenDict` representing a subgraph has a `parent_graph` key in its dictionary that points to the _actual_ `Subgraph` object it was created from (since the `Subgraph` had itself as its own parent, when it was first initialized and then frozen). Those `Subgraph` objects don't get garbage collected, because they're still referenced by the `FrozenDict` they were created from. That's probably a bad thing, and maybe we should focus on _deleting_ them instead of reparenting them, but for now this is the way the code's been doing it. * Parser: Streamline add_elements() Rewrite this crucial function to remove a _ton_ of dead code, including all of the defaults-handling that never actually did anything (and _shouldn't_ do anything, as it was based on a misguided premise: That defaults should be copied directly into the attributes of any individual graph elements they apply to. We don't want that!) As a result, also drop `add_defaults()` which is no longer used. * Parser: Drop unused AttributeDict import * Parser: Type adjustment * Ruff fixes
1 parent 3a062bb commit 8f5e9bd

File tree

2 files changed

+87
-192
lines changed

2 files changed

+87
-192
lines changed

src/pydot/dot_parser.py

Lines changed: 67 additions & 192 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
)
4242

4343
import pydot.core
44-
from pydot.classes import AttributeDict, FrozenDict
44+
from pydot.classes import FrozenDict
4545

4646
__author__ = ["Michael Krause", "Ero Carrera"]
4747
__license__ = "MIT"
@@ -132,120 +132,43 @@ def push_top_graph_stmt(toks: ParseResults) -> list[pydot.core.Dot]:
132132
return top_graphs
133133

134134

135-
def update_parent_graph_hierarchy(
136-
g: Any, parent_graph: Any = None, level: int = 0
137-
) -> None:
138-
if parent_graph is None:
139-
parent_graph = g
135+
def update_parent_graph_hierarchy(g: pydot.core.Dot) -> None:
136+
for edge_groups in g.obj_dict.get("edges", {}).values():
137+
for edge in edge_groups:
138+
assert isinstance(edge, dict)
139+
endpoints = edge.get("points", [])
140+
for ep in endpoints:
141+
if isinstance(ep, FrozenDict):
142+
ep["parent_graph"].set_parent_graph(g)
140143

141-
for key_name in ("edges",):
142-
if isinstance(g, FrozenDict):
143-
item_dict = g
144-
else:
145-
item_dict = g.obj_dict
146-
147-
if key_name not in item_dict:
148-
continue
149-
150-
for key, objs in item_dict[key_name].items():
151-
for obj in objs:
152-
if (
153-
"parent_graph" in obj
154-
and obj["parent_graph"].get_parent_graph() == g
155-
):
156-
if obj["parent_graph"] is g:
157-
pass
158-
else:
159-
obj["parent_graph"].set_parent_graph(parent_graph)
160-
161-
if key_name == "edges" and len(key) == 2:
162-
for idx, vertex in enumerate(obj["points"]):
163-
if isinstance(
164-
vertex,
165-
(
166-
pydot.core.Graph,
167-
pydot.core.Subgraph,
168-
pydot.core.Cluster,
169-
),
170-
):
171-
vertex.set_parent_graph(parent_graph)
172-
if isinstance(vertex, FrozenDict):
173-
if vertex["parent_graph"] is g:
174-
pass
175-
else:
176-
vertex["parent_graph"].set_parent_graph(
177-
parent_graph
178-
)
179-
180-
181-
def add_defaults(element: Any, defaults: dict[Any, Any]) -> None:
182-
d = element.__dict__
183-
for key, value in defaults.items():
184-
if not d.get(key):
185-
d[key] = value
186-
187-
188-
def add_elements(
189-
g: Any,
190-
toks: ParseResults | list[Any],
191-
defaults_graph: AttributeDict | None = None,
192-
defaults_node: AttributeDict | None = None,
193-
defaults_edge: AttributeDict | None = None,
194-
) -> None:
195-
if defaults_graph is None:
196-
defaults_graph = {}
197-
if defaults_node is None:
198-
defaults_node = {}
199-
if defaults_edge is None:
200-
defaults_edge = {}
201-
202-
for elm_idx, element in enumerate(toks):
144+
145+
def add_elements(g: Any, toks: ParseResults) -> None:
146+
for element in toks:
203147
if isinstance(element, (pydot.core.Subgraph, pydot.core.Cluster)):
204-
add_defaults(element, defaults_graph)
205148
g.add_subgraph(element)
206-
207149
elif isinstance(element, pydot.core.Node):
208-
add_defaults(element, defaults_node)
209150
g.add_node(element)
210-
211151
elif isinstance(element, pydot.core.Edge):
212-
add_defaults(element, defaults_edge)
213152
g.add_edge(element)
214-
215153
elif isinstance(element, ParseResults):
216-
for e in element:
217-
add_elements(
218-
g,
219-
[e],
220-
defaults_graph,
221-
defaults_node,
222-
defaults_edge,
223-
)
224-
154+
add_elements(g, element)
225155
elif isinstance(element, DefaultStatement):
226-
if element.default_type == "graph":
227-
default_graph_attrs = pydot.core.Node("graph", **element.attrs)
228-
g.add_node(default_graph_attrs)
229-
230-
elif element.default_type == "node":
231-
default_node_attrs = pydot.core.Node("node", **element.attrs)
232-
g.add_node(default_node_attrs)
233-
234-
elif element.default_type == "edge":
235-
default_edge_attrs = pydot.core.Node("edge", **element.attrs)
236-
g.add_node(default_edge_attrs)
237-
defaults_edge.update(element.attrs)
238-
239-
else:
240-
raise ValueError(
241-
f"Unknown DefaultStatement: {element.default_type}"
242-
)
243-
244-
elif isinstance(element, P_AttrList):
156+
default_node = pydot.core.Node(
157+
element.default_type, **element.attrs
158+
)
159+
g.add_node(default_node)
160+
else:
161+
assert isinstance(element, P_AttrList)
245162
g.obj_dict["attributes"].update(element.attrs)
246163

247-
else:
248-
raise ValueError(f"Unknown element statement: {element}")
164+
165+
def expand_attr_lists(attr_l: Any) -> dict[str, Any]:
166+
if not isinstance(attr_l, ParseResults):
167+
return {}
168+
attrs = {}
169+
for alist in attr_l:
170+
attrs.update(alist.attrs)
171+
return attrs
249172

250173

251174
def push_graph_stmt(toks: ParseResults) -> pydot.core.Subgraph:
@@ -256,105 +179,51 @@ def push_graph_stmt(toks: ParseResults) -> pydot.core.Subgraph:
256179

257180

258181
def push_subgraph_stmt(toks: ParseResults) -> pydot.core.Subgraph:
259-
g = pydot.core.Subgraph("")
260-
for e in toks:
261-
if len(e) == 3:
262-
e[2].set_name(e[1])
263-
if e[0] == "subgraph":
264-
e[2].obj_dict["show_keyword"] = True
265-
return e[2] # type: ignore
266-
else:
267-
if e[0] == "subgraph":
268-
e[1].obj_dict["show_keyword"] = True
269-
return e[1] # type: ignore
270-
182+
assert "keyword" in toks
183+
id_ = str(toks.id)
184+
show_kw = "keyword" in toks
185+
g = pydot.core.Subgraph(id_)
186+
g.obj_dict["show_keyword"] = show_kw
187+
if isinstance(toks.contents, ParseResults):
188+
add_elements(g, toks.contents)
271189
return g
272190

273191

274192
def push_default_stmt(toks: ParseResults) -> DefaultStatement:
275-
# The pydot class instances should be marked as
276-
# default statements to be inherited by actual
277-
# graphs, nodes and edges.
278-
#
279-
default_type = toks[0][0]
280-
if len(toks) > 1:
281-
attrs = toks[1].attrs
282-
else:
283-
attrs = {}
284-
285-
if default_type in ["graph", "node", "edge"]:
286-
return DefaultStatement(default_type, attrs)
287-
else:
288-
raise ValueError(f"Unknown default statement: {toks}")
193+
default_type = toks.dtype
194+
attrs = expand_attr_lists(toks.attr_l)
195+
return DefaultStatement(str(default_type), attrs)
289196

290197

291198
def push_attr_list(toks: ParseResults) -> P_AttrList:
292199
p = P_AttrList(toks)
293200
return p
294201

295202

296-
def get_port(node: Any) -> Any:
297-
if len(node) > 1:
298-
if isinstance(node[1], ParseResults):
299-
if len(node[1][0]) == 2:
300-
if node[1][0][0] == ":":
301-
return node[1][0][1]
302-
303-
return None
304-
305-
306-
def do_node_ports(node: Any) -> str:
307-
node_port = ""
308-
if len(node) > 1:
309-
node_port = "".join([str(a) + str(b) for a, b in node[1]])
310-
311-
return node_port
312-
313-
314203
def push_edge_stmt(toks: ParseResults) -> list[pydot.core.Edge]:
315-
tok_attrs = [a for a in toks if isinstance(a, P_AttrList)]
316-
attrs = {}
317-
for a in tok_attrs:
318-
attrs.update(a.attrs)
319-
320-
e = []
204+
endpoints = list(toks.endpoints)
205+
attrs = expand_attr_lists(toks.attr_l)
321206

322207
def make_endpoint(
323208
ep: pydot.core.Common | list[Any] | str,
324209
) -> FrozenDict | str:
325-
if isinstance(ep, (list, tuple)) and len(ep) == 1:
326-
# This is a hack for the Group()ed edge_point definition
327-
ep = ep[0]
328210
if isinstance(ep, pydot.core.Subgraph):
329211
return FrozenDict(ep.obj_dict)
330-
if isinstance(ep, (list, tuple)):
331-
return str(ep[0]) + do_node_ports(ep)
332212
return str(ep)
333213

334-
endpoints = [t for t in toks.as_list() if not isinstance(t, P_AttrList)]
335-
214+
edges = []
336215
n_prev = make_endpoint(endpoints[0])
337216
for endpoint in endpoints[1:]:
338217
n_next = make_endpoint(endpoint)
339-
e.append(pydot.core.Edge(n_prev, n_next, **attrs))
218+
edges.append(pydot.core.Edge(n_prev, n_next, **attrs))
340219
n_prev = n_next
341-
342-
return e
220+
return edges
343221

344222

345223
def push_node_stmt(toks: ParseResults) -> pydot.core.Node:
346-
if len(toks) == 2:
347-
attrs = toks[1].attrs
348-
else:
349-
attrs = {}
350-
351-
node_name = toks[0]
352-
if isinstance(node_name, list) or isinstance(node_name, tuple):
353-
if len(node_name) > 0:
354-
node_name = node_name[0]
355-
356-
n = pydot.core.Node(str(node_name), **attrs)
357-
return n
224+
node_name = toks.name
225+
attrs = expand_attr_lists(toks.attr_l)
226+
return pydot.core.Node(str(node_name), **attrs)
358227

359228

360229
class GraphParser:
@@ -394,20 +263,21 @@ class GraphParser:
394263

395264
righthand_id = float_number | ID
396265

397-
port = Group(Group(colon + ID) + Group(colon + ID)) | Group(
398-
Group(colon + ID)
399-
)
400-
401-
node_id = ID + Optional(port)
266+
node_id = DelimitedList(ID, delim=":", min=1, max=3, combine=True)
402267
a_list = OneOrMore(
403268
ID + Optional(equals + righthand_id) + Optional(comma.suppress())
404269
)
405-
406270
attr_list = OneOrMore(
407271
lbrack.suppress() + Optional(a_list) + rbrack.suppress()
408272
)
273+
node_stmt = (
274+
node_id("name")
275+
+ Optional(attr_list("attr_l"))
276+
+ Optional(semi.suppress())
277+
)
409278

410-
attr_stmt = Group(graph_ | node_ | edge_) + attr_list
279+
default_type = graph_ | node_ | edge_
280+
default_stmt = default_type("dtype") + attr_list("attr_l")
411281

412282
stmt_list = Forward()
413283
graph_stmt = Group(
@@ -417,20 +287,25 @@ class GraphParser:
417287
+ Optional(semi.suppress())
418288
)
419289

420-
subgraph = Group(subgraph_ + Optional(ID) + graph_stmt)
421-
422-
edgeop = Literal("--") | Literal("->")
423-
edge_point = Group(subgraph | graph_stmt | node_id)
424-
edge_stmt = DelimitedList(edge_point, delim=edgeop, min=2) + Optional(
425-
attr_list
290+
subgraph = (
291+
subgraph_("keyword") + Optional(ID("id")) + graph_stmt("contents")
426292
)
427293

428-
node_stmt = node_id + Optional(attr_list) + Optional(semi.suppress())
294+
edgeop = Literal("--") | Literal("->")
295+
edge_point = subgraph | graph_stmt | node_id
296+
edge_stmt = DelimitedList(edge_point, delim=edgeop, min=2)(
297+
"endpoints"
298+
) + Optional(attr_list("attr_l"))
429299

430300
assignment = ID + equals + righthand_id
431301

432302
stmt = (
433-
assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt
303+
assignment
304+
| edge_stmt
305+
| default_stmt
306+
| subgraph
307+
| graph_stmt
308+
| node_stmt
434309
)
435310
stmt_list <<= OneOrMore(stmt + Optional(semi.suppress()))
436311

@@ -456,7 +331,7 @@ class GraphParser:
456331
a_list.setParseAction(push_attr_list)
457332
edge_stmt.setParseAction(push_edge_stmt)
458333
node_stmt.setParseAction(push_node_stmt)
459-
attr_stmt.setParseAction(push_default_stmt)
334+
default_stmt.setParseAction(push_default_stmt)
460335

461336
subgraph.setParseAction(push_subgraph_stmt)
462337
graph_stmt.setParseAction(push_graph_stmt)

test/test_parser.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,26 @@ def test_edge_subgraph_explicit() -> None:
5959
assert edge.to_string() == expected
6060

6161

62+
def test_AttrList_repr() -> None:
63+
parser = GraphParser.attr_list("a_list")
64+
res = parser.parse_string("[color=red, shape=square]")
65+
assert isinstance(res, pp.ParseResults)
66+
a_list = res.a_list
67+
assert isinstance(a_list, pp.ParseResults)
68+
assert len(a_list) == 1
69+
repr_str = repr(a_list[0])
70+
assert repr_str == "P_AttrList({'color': 'red', 'shape': 'square'})"
71+
72+
73+
def test_DefaultStatement_repr() -> None:
74+
parser = GraphParser.default_stmt("defaults")
75+
res = parser.parse_string("node [color=blue];")
76+
assert isinstance(res, pp.ParseResults)
77+
defaults = res.defaults
78+
repr_str = repr(defaults)
79+
assert repr_str == "DefaultStatement(node, {'color': 'blue'})"
80+
81+
6282
def test_strict_graph_parsing() -> None:
6383
res = dot_parser.parse_dot_data("strict graph G { a; b; }")
6484
assert isinstance(res, list)

0 commit comments

Comments
 (0)