Skip to content

Commit ce1a0a0

Browse files
committed
WIP: support quantifiers
1 parent 7336f6c commit ce1a0a0

File tree

1 file changed

+154
-6
lines changed

1 file changed

+154
-6
lines changed

src/test/modules/rpr/rpr_prefer.c

Lines changed: 154 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,6 @@ pretty_print(Node *parsed)
159159
}
160160
}
161161

162-
#if 0
163162
static bool
164163
has_variable(List *id_str)
165164
{
@@ -182,7 +181,153 @@ has_variable(List *id_str)
182181

183182
return false;
184183
}
185-
#endif
184+
185+
static void
186+
expand_worker(PL **result, IDStr *prefix, PL *terms, int remaining, bool reluctant)
187+
{
188+
if (remaining == 0)
189+
{
190+
/*
191+
* Base case. Expand the provided prefix with terms, allowing the
192+
* final term to be empty.
193+
*/
194+
for (PL *q = terms; q; q = q->next)
195+
{
196+
IDStr *qs = (IDStr *) q->node;
197+
IDStr *paren;
198+
199+
paren = list_make1(makeString("("));
200+
paren = list_concat(paren, list_copy(prefix));
201+
paren = list_concat(paren, list_copy(qs));
202+
paren = lappend(paren, makeString(")"));
203+
204+
*result = lappend(*result, paren);
205+
}
206+
207+
return;
208+
}
209+
210+
/*
211+
* Order here depends on whether the quantifier is greedy or reluctant --
212+
* for the greedy case, superstrings sort before their substrings, and
213+
* vice-versa for the reluctant case. Crucially, this is not the same as
214+
* sorting by length, which is why it's implemented recursively.
215+
*/
216+
217+
for (PL *q = terms; q; q = q->next)
218+
{
219+
IDStr *qs = (IDStr *) q->node;
220+
IDStr *new;
221+
IDStr *paren;
222+
223+
new = list_copy(prefix);
224+
new = list_concat(new, list_copy(qs));
225+
226+
paren = list_make1(makeString("("));
227+
paren = list_concat(paren, list_copy(new));
228+
paren = lappend(paren, makeString(")"));
229+
230+
if (reluctant)
231+
*result = lappend(*result, paren);
232+
233+
/*
234+
* Empty matches may not appear in the middle of the identifier string;
235+
* skip expansion unless the term had a variable.
236+
*/
237+
if (has_variable(qs))
238+
expand_worker(result, new, terms, remaining - 1, reluctant);
239+
240+
if (!reluctant)
241+
*result = lappend(*result, paren);
242+
}
243+
}
244+
245+
static PL *
246+
expand_factor(PL *primary, RowPatternQuantifier *quant)
247+
{
248+
PL *result = NIL;
249+
PL *prefixes;
250+
int min = 0;
251+
int max;
252+
int expansions;
253+
254+
if (!quant->max)
255+
mmfatal(ET_ERROR, "infinite quantifiers not yet supported");
256+
257+
if (quant->min)
258+
min = intValue(quant->min);
259+
if (quant->max)
260+
max = intValue(quant->max);
261+
262+
if (max == 0)
263+
mmfatal(ET_ERROR, "maximum must be greater than zero");
264+
if (max < min)
265+
mmfatal(ET_ERROR, "maximum may not be less than minimum");
266+
267+
/*
268+
* Build the "prefix" set. All identifier strings that are returned must
269+
* start with one of these. The list is generated in preferment order.
270+
*
271+
* By rule, an empty match -- a string that cannot advance the state
272+
* machine, for which has_variable() will return false -- may only appear
273+
* in the prefix set before the `min` index, or at the `max` index.
274+
*
275+
* For min of 0 or 1, our only prefix is the empty set.
276+
*/
277+
prefixes = list_make1(NIL);
278+
279+
for (int i = 1; i < min; i++)
280+
{
281+
PL *acc = NIL;
282+
283+
for (PL *p = prefixes; p; p = p->next)
284+
{
285+
for (PL *q = primary; q; q = q->next)
286+
{
287+
IDStr *ps = (IDStr *) p->node;
288+
IDStr *qs = (IDStr *) q->node;
289+
IDStr *new;
290+
291+
new = list_copy(ps);
292+
new = list_concat(new, qs);
293+
294+
acc = lappend(acc, new);
295+
}
296+
}
297+
298+
prefixes = acc;
299+
}
300+
301+
/* Now build the full PL. */
302+
expansions = max - min;
303+
if (min == 0)
304+
expansions--; /* we handle the empty set case explicitly */
305+
306+
for (PL *p = prefixes; p; p = p->next)
307+
{
308+
IDStr *ps = (IDStr *) p->node;
309+
310+
if (min == 0 && quant->reluctant)
311+
{
312+
IDStr *empty = list_make1(makeString("("));
313+
empty = lappend(empty, makeString(")"));
314+
315+
result = lappend(result, empty);
316+
}
317+
318+
expand_worker(&result, ps, primary, expansions, quant->reluctant);
319+
320+
if (min == 0 && !quant->reluctant)
321+
{
322+
IDStr *empty = list_make1(makeString("("));
323+
empty = lappend(empty, makeString(")"));
324+
325+
result = lappend(result, empty);
326+
}
327+
}
328+
329+
return result;
330+
}
186331

187332
static PL *
188333
parenthesized_language(Node *n)
@@ -276,11 +421,14 @@ parenthesized_language(Node *n)
276421
case T_RowPatternFactor:
277422
RowPatternFactor *f = (RowPatternFactor *) n;
278423

279-
if ((!f->quantifier->min || intValue(f->quantifier->min) != 1)
280-
|| (!f->quantifier->max || intValue(f->quantifier->max) != 1))
281-
mmfatal(ET_ERROR, "quantifiers other than 1 not yet supported");
282-
283424
result = parenthesized_language(f->primary);
425+
426+
if (!f->quantifier->min
427+
|| intValue(f->quantifier->min) != 1
428+
|| !f->quantifier->max
429+
|| intValue(f->quantifier->max) != 1)
430+
result = expand_factor(result, f->quantifier);
431+
284432
break;
285433

286434
default:

0 commit comments

Comments
 (0)