Skip to content

Commit f18cbcb

Browse files
authored
Parser: Support +-concatenated quoted strings (#490)
* Parser: Support +-concatenated quoted strings Fixes #488 * Add tests for +-concatenated strings
1 parent 015c0dc commit f18cbcb

File tree

3 files changed

+92
-12
lines changed

3 files changed

+92
-12
lines changed

src/pydot/dot_parser.py

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636
cStyleComment,
3737
lineno,
3838
nums,
39-
pyparsing_unicode,
4039
restOfLine,
40+
unicode,
4141
)
4242

4343
import pydot.core
@@ -180,6 +180,28 @@ def push_dbl_quoted(toks: ParseResults) -> str:
180180
return s
181181

182182

183+
def push_ID(toks: ParseResults) -> str:
184+
"""Join multiple string pieces into a single ID string."""
185+
if "concat" in toks:
186+
out = "".join(s[1:-1] for s in toks.concat)
187+
return f'"{out}"'
188+
if "dbl_quoted" in toks:
189+
return str(toks.dbl_quoted)
190+
if "ident" in toks:
191+
return str(toks.ident)
192+
# (by process of elimination, HTML)
193+
assert "html" in toks and isinstance(toks.html, str)
194+
return toks.html
195+
196+
197+
def push_node_id(toks: ParseResults) -> str:
198+
out = []
199+
for group in toks:
200+
assert "id_part" in group
201+
out.append(str(group.id_part))
202+
return ":".join(out)
203+
204+
183205
def push_graph_stmt(toks: ParseResults) -> pydot.core.Subgraph:
184206
g = pydot.core.Subgraph("")
185207
g.obj_dict["show_keyword"] = False
@@ -258,25 +280,33 @@ class GraphParser:
258280
edge_ = CaselessLiteral("edge")
259281

260282
# token definitions
261-
identifier = Word(
262-
pyparsing_unicode.BasicMultilingualPlane.alphanums + "_."
283+
identifier = Word(unicode.BasicMultilingualPlane.alphanums + "_.")
284+
285+
double_quoted = (
286+
QuotedString('"', multiline=True, unquote_results=False, esc_char="\\")
287+
.set_results_name("dbl_quoted")
288+
.set_parse_action(push_dbl_quoted)
263289
)
264290

265-
double_quoted_string = QuotedString(
266-
'"', multiline=True, unquote_results=False, esc_char="\\"
291+
concat_string = DelimitedList(
292+
double_quoted, delim="+", min=2, combine=False
267293
)
268294

269295
ID = (
270-
identifier
271-
| HTML()
272-
| double_quoted_string("dbl_quoted").set_parse_action(push_dbl_quoted)
273-
)
296+
concat_string("concat")
297+
| double_quoted
298+
| identifier("ident")
299+
| HTML().set_results_name("html")
300+
).set_parse_action(push_ID)
274301

275302
float_number = Combine(Optional(minus) + OneOrMore(Word(nums + ".")))
276303

277304
righthand_id = float_number | ID
278305

279-
node_id = DelimitedList(ID, delim=":", min=1, max=3, combine=True)
306+
node_id = DelimitedList(
307+
Group(ID("id_part")), delim=":", min=1, max=3, combine=False
308+
).set_parse_action(push_node_id)
309+
280310
a_list = OneOrMore(
281311
ID + Optional(equals + righthand_id) + Optional(comma.suppress())
282312
)
@@ -363,8 +393,7 @@ def parse_dot_data(s: str) -> list[pydot.core.Dot] | None:
363393
@rtype: `list` of `pydot.core.Dot`
364394
"""
365395
try:
366-
graphparser = GraphParser.parser
367-
tokens = graphparser.parse_string(s)
396+
tokens = GraphParser.parser.parse_string(s)
368397
return list(tokens)
369398
except ParseException as err:
370399
print(err.line)

test/graphs/b488.dot

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
2+
digraph G {
3+
"concatenated" + "name" [color="bl" + "ue"];
4+
"concatenated with " + "ports" [color="or\
5+
an" + "ge"];
6+
"concatenated" + " with ports":p45:sw -> "\
7+
concatenated\
8+
name":ne [penwidth=5, arrows="b" + "o" + "t" + "h"];
9+
"con" + "catenated" [label="this is a long" +
10+
"long label" + " that just goes on \
11+
and on and on"];
12+
"con" +
13+
"catenated" -> "back\
14+
slashed" [shape=square];
15+
}

test/test_parser.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,39 @@ def test_backslash_continuations() -> None:
117117
assert nodes[0].get_name() == '"my very long node name"'
118118
assert nodes[1].get_name() == '"my indented and wrapped node name"'
119119
assert nodes[2].get_name() == '"my node name containing \\ backslash"'
120+
121+
122+
def test_plus_concatenation() -> None:
123+
src = textwrap.dedent(r"""
124+
digraph G {
125+
"my" + "concatenated" + "name";
126+
"myconcatenated" + " with " + "ports" [color="r" + "ed"];
127+
"my\
128+
concatenated" + " with ports":p45:sw -> "my\
129+
concatenated\
130+
name":ne [penwidth=5, arrows="b" + "o" + "t" + "h"];
131+
"con" + "catenated" [label="this is a long\
132+
long label" + " that just goes on \
133+
and on and on"];
134+
}""")
135+
res = dot_parser.parse_dot_data(src)
136+
assert isinstance(res, list)
137+
assert len(res) == 1
138+
graph = res[0]
139+
nodes = graph.get_nodes()
140+
edges = graph.get_edges()
141+
142+
assert len(nodes) == 3
143+
assert nodes[0].get_name() == '"myconcatenatedname"'
144+
assert nodes[1].get_name() == '"myconcatenated with ports"'
145+
assert nodes[1].get("color") == '"red"'
146+
assert nodes[2].get_name() == '"concatenated"'
147+
assert nodes[2].get("label") == (
148+
'"this is a longlong label that just goes on and on and on"'
149+
)
150+
151+
assert len(edges) == 1
152+
edge = edges[0]
153+
assert edge.get_source() == f"{nodes[1].get_name()}:p45:sw"
154+
assert edge.get_destination() == '"myconcatenatedname":ne'
155+
assert edge.get("arrows") == '"both"'

0 commit comments

Comments
 (0)