Skip to content

Commit 886cce4

Browse files
committed
Address review comments in the test suite.
1 parent 0221e33 commit 886cce4

File tree

1 file changed

+113
-71
lines changed

1 file changed

+113
-71
lines changed

test-suite/shared/src/test/scala/org/scalajs/testsuite/javalib/util/regex/RegexEngineTest.scala

Lines changed: 113 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,11 @@ class RegexEngineTest {
308308
assertMatches("a\\+", "a+")
309309
assertMatches("a\\{3}", "a{3}")
310310
assertMatches("\\(5\\)", "(5)")
311+
312+
// Escapes for characters that are syntax characters only when using Comments (the escapes work regardless)
313+
assertMatches("\\ \\\t\\\n\\\u000B\\\f\\\r", " \t\n\u000B\f\r")
311314
assertMatches("\\ \\\t\\\n\\\u000B\\\f\\\r", Comments, " \t\n\u000B\f\r")
315+
assertMatches("\\#abc", "#abc")
312316
assertMatches("\\#abc", Comments, "#abc")
313317

314318
// Letter escapes for special chars
@@ -327,7 +331,10 @@ class RegexEngineTest {
327331
assertMatches("\\c_", "\u001F")
328332
assertMatches("\\c?", 0x007f.toChar.toString())
329333

330-
// More control escapes that are not really meant to be used
334+
/* More control escapes that are not really meant to be used.
335+
* In general, '\cx' means `x ^ 0x40`, as explained at
336+
* https://stackoverflow.com/questions/35208570/java-regular-expression-cx-control-characters
337+
*/
331338
assertMatches("\\cb", 0x0022.toChar.toString())
332339
assertMatches("\\c" + GClefHigh, "" + (0xd834 ^ 0x40).toChar)
333340
assertMatches("\\c" + GClef, GClefHigh + (0xdd1e ^ 0x40).toChar)
@@ -433,8 +440,10 @@ class RegexEngineTest {
433440
assertFind(repeatedSupplementaryCodePoint, "bca\uD834\uDD1E\uDD1Edef", 2, 5)
434441

435442
// After quotes, a quantifier applies to the last code point (sic!)
436-
assertFind("a\\Qbc\\d\\E*", "aaabc\\dbc\\dbc", 2, 7)
437-
assertFind("a\\Qbc\\b\\E*", "aaabc\\bbc\\bbc", 2, 8)
443+
val repeatedQuote = compile("a\\Qbc\\d\\E*")
444+
assertFind(repeatedQuote, "aaabc\\dbc\\dbc", 2, 7)
445+
assertFind(repeatedQuote, "aaabc\\ddc", 2, 8)
446+
assertFind(repeatedQuote, "aaabc\\bc", 2, 6)
438447

439448
val repeatedQuoteEndingWithSupplementaryCodePoint = compile("a\\Qbc\\\uD834\uDD1E\\E*")
440449
assertFind(repeatedQuoteEndingWithSupplementaryCodePoint, "aaabc\\\uD834\uDD1E\uD834\uDD1Ebc", 2, 10)
@@ -443,29 +452,54 @@ class RegexEngineTest {
443452
}
444453

445454
@Test def lazyQuantifiers(): Unit = {
446-
val starLazy = compile("ba*?")
447-
assertMatches(starLazy, "b")
448-
assertMatches(starLazy, "ba")
449-
assertMatches(starLazy, "baaaaa")
450-
assertFind(starLazy, "cbaaassefaa", 1, 2)
451-
assertFind(starLazy, "cbsssefaaaaa", 1, 2)
452-
assertNotFind(starLazy, "qsessqsssddff")
453-
454-
val plusLazy = compile("ba+?")
455-
assertMatches(plusLazy, "ba")
456-
assertMatches(plusLazy, "baaaa")
457-
assertNotFind(plusLazy, "b")
458-
assertFind(plusLazy, "cbaaassefaa", 1, 3)
459-
assertFind(plusLazy, "cbsssefbaaa", 7, 9)
460-
assertNotFind(plusLazy, "qsebsqsbsddfb")
461-
462-
val questionLazy = compile("ba??")
463-
assertMatches(questionLazy, "b")
464-
assertMatches(questionLazy, "ba")
465-
assertNotMatches(questionLazy, "baa")
466-
assertFind(questionLazy, "cbaaassefaa", 1, 2)
467-
assertFind(questionLazy, "cbssefbaaa", 1, 2)
468-
assertNotFind(questionLazy, "qsessqsssddff")
455+
val starLazy = compile("a[bc]*?b")
456+
assertMatches(starLazy, "ab")
457+
assertMatches(starLazy, "abbbb")
458+
assertMatches(starLazy, "abccbb")
459+
assertFind(starLazy, "abbb", 0, 2)
460+
assertFind(starLazy, "accbbbccb", 0, 4)
461+
assertNotFind(starLazy, "accc")
462+
463+
val starLazyAtEnd = compile("ba*?")
464+
assertMatches(starLazyAtEnd, "b")
465+
assertMatches(starLazyAtEnd, "ba")
466+
assertMatches(starLazyAtEnd, "baaaaa")
467+
assertFind(starLazyAtEnd, "cbaaassefaa", 1, 2)
468+
assertFind(starLazyAtEnd, "cbsssefaaaaa", 1, 2)
469+
assertNotFind(starLazyAtEnd, "qsessqsssddff")
470+
471+
val plusLazy = compile("a[bc]+?b")
472+
assertMatches(plusLazy, "abb")
473+
assertMatches(plusLazy, "acb")
474+
assertMatches(plusLazy, "abbbcccbb")
475+
assertFind(plusLazy, "abbb", 0, 3)
476+
assertFind(plusLazy, "accbbbccb", 0, 4)
477+
assertNotFind(plusLazy, "accc")
478+
assertNotFind(plusLazy, "ab")
479+
480+
val plusLazyAtEnd = compile("ba+?")
481+
assertMatches(plusLazyAtEnd, "ba")
482+
assertMatches(plusLazyAtEnd, "baaaa")
483+
assertNotFind(plusLazyAtEnd, "b")
484+
assertFind(plusLazyAtEnd, "cbaaassefaa", 1, 3)
485+
assertFind(plusLazyAtEnd, "cbsssefbaaa", 7, 9)
486+
assertNotFind(plusLazyAtEnd, "qsebsqsbsddfb")
487+
488+
val questionLazy = compile("a[bc]??b")
489+
assertMatches(questionLazy, "ab")
490+
assertMatches(questionLazy, "abb")
491+
assertMatches(questionLazy, "acb")
492+
assertFind(questionLazy, "abbb", 0, 2)
493+
assertFind(questionLazy, "acbbbccb", 0, 3)
494+
assertNotFind(questionLazy, "accbb")
495+
496+
val questionLazyAtEnd = compile("ba??")
497+
assertMatches(questionLazyAtEnd, "b")
498+
assertMatches(questionLazyAtEnd, "ba")
499+
assertNotMatches(questionLazyAtEnd, "baa")
500+
assertFind(questionLazyAtEnd, "cbaaassefaa", 1, 2)
501+
assertFind(questionLazyAtEnd, "cbssefbaaa", 1, 2)
502+
assertNotFind(questionLazyAtEnd, "qsessqsssddff")
469503
}
470504

471505
@Test def possessiveQuantifiers(): Unit = {
@@ -477,7 +511,7 @@ class RegexEngineTest {
477511
val plusPossessive = compile("ab++[bd]")
478512
assertFind(plusPossessive, " a abbbb abbbdba ", 11, 16)
479513
assertFind(plusPossessive, " a abbbb adba abdd ", 17, 20)
480-
assertNotFind(plusPossessive, " a abbbb dab adba ")
514+
assertNotFind(plusPossessive, " a ad abbbb dab adba ")
481515

482516
val questionPossessive = compile("ab?+[bd]")
483517
assertFind(questionPossessive, " a ab abb abb ", 9, 12)
@@ -576,7 +610,7 @@ class RegexEngineTest {
576610
assertMatches(dotAll, "\u2029")
577611

578612
assertNotMatches(dotAll, "\r\n")
579-
assertFind(dotUnixLines, "\r\n", 0, 1)
613+
assertFind(dotAll, "\r\n", 0, 1)
580614

581615
val dotAllUnixLines = compile(".", DotAll | UnixLines)
582616

@@ -594,7 +628,7 @@ class RegexEngineTest {
594628
assertMatches(dotAllUnixLines, "\u2029")
595629

596630
assertNotMatches(dotAllUnixLines, "\r\n")
597-
assertFind(dotUnixLines, "\r\n", 0, 1)
631+
assertFind(dotAllUnixLines, "\r\n", 0, 1)
598632

599633
// Test case for #1847, and for the (?s) leading flag
600634
val codeMatcher = Pattern.compile("(?s).*<code>(.*?)</code>.*")
@@ -683,7 +717,7 @@ class RegexEngineTest {
683717
}
684718

685719
@Test def comments(): Unit = {
686-
val abc = compile(
720+
val lotsOfComments = compile(
687721
" \ta # a comment is interrupted by \r" +
688722
"b # or \n" +
689723
"c # or \u0085" +
@@ -695,7 +729,17 @@ class RegexEngineTest {
695729
"i",
696730
Comments)
697731

698-
assertMatches(abc, "abc\u0085d\u2028e\u2029fghi")
732+
assertMatches(lotsOfComments, "abc\u0085d\u2028e\u2029fghi")
733+
734+
// We can still match against whitespace in the input
735+
assertMatches("\ta\\ b\t", Comments, "a b")
736+
assertMatches("\ta.b\t", Comments, "a b")
737+
assertMatches("\ta[\\ c]b\t", Comments, "a b")
738+
739+
// We can still match against '#' in the input
740+
assertMatches("\ta\\#b\t", Comments, "a#b")
741+
assertMatches("\ta.b\t", Comments, "a#b")
742+
assertMatches("\ta[\\#c]b\t", Comments, "a#b")
699743
}
700744

701745
@Test def predefinedCharacterClasses(): Unit = {
@@ -1571,18 +1615,18 @@ class RegexEngineTest {
15711615
assertNotMatches(an_and_ks, "A")
15721616
assertNotMatches(an_and_ks, "N")
15731617

1574-
val az_butNot_def = compile("[a-z&&[^dfh]]")
1575-
assertMatches(az_butNot_def, "a")
1576-
assertMatches(az_butNot_def, "c")
1577-
assertMatches(az_butNot_def, "e")
1578-
assertMatches(az_butNot_def, "i")
1579-
assertMatches(az_butNot_def, "r")
1580-
assertNotMatches(az_butNot_def, "d")
1581-
assertNotMatches(az_butNot_def, "f")
1582-
assertNotMatches(az_butNot_def, "h")
1583-
assertNotMatches(az_butNot_def, "A")
1584-
assertNotMatches(az_butNot_def, "0")
1585-
assertNotMatches(az_butNot_def, "\n")
1618+
val az_butNot_dfh = compile("[a-z&&[^dfh]]")
1619+
assertMatches(az_butNot_dfh, "a")
1620+
assertMatches(az_butNot_dfh, "c")
1621+
assertMatches(az_butNot_dfh, "e")
1622+
assertMatches(az_butNot_dfh, "i")
1623+
assertMatches(az_butNot_dfh, "r")
1624+
assertNotMatches(az_butNot_dfh, "d")
1625+
assertNotMatches(az_butNot_dfh, "f")
1626+
assertNotMatches(az_butNot_dfh, "h")
1627+
assertNotMatches(az_butNot_dfh, "A")
1628+
assertNotMatches(az_butNot_dfh, "0")
1629+
assertNotMatches(az_butNot_dfh, "\n")
15861630

15871631
val az_butNot_mp = compile("[a-z&&[^m-p]]")
15881632
assertMatches(az_butNot_mp, "a")
@@ -1877,40 +1921,20 @@ class RegexEngineTest {
18771921
assertFind(lineBreakUnixLines, "ab\n\ncd", 2, 3)
18781922
}
18791923

1880-
@Test def matchesWithNonGreedyOperators(): Unit = {
1881-
val opt = compile("ab??")
1882-
assertMatches(opt, "a")
1883-
assertMatches(opt, "ab")
1884-
assertFind(opt, "ab", 0, 1)
1885-
1886-
val star = compile("ab*?")
1887-
assertMatches(star, "a")
1888-
assertMatches(star, "ab")
1889-
assertMatches(star, "abbbbbb")
1890-
assertFind(star, "abbbb", 0, 1)
1891-
1892-
val plus = compile("ab+?")
1893-
assertNotFind(plus, "a")
1894-
assertMatches(plus, "ab")
1895-
assertMatches(plus, "abb")
1896-
assertMatches(plus, "abbbbbb")
1897-
assertFind(plus, "abbbb", 0, 2)
1898-
}
1899-
19001924
@Test def namedCaptureGroups(): Unit = {
1901-
val named = compile(raw".*((?<pizza>Pizza).*?)+")
1925+
val named = compile(".*((?<pizza>Pizza).*?)+")
19021926
val m = assertMatchesAndGroupsEquals(named, "PizzaWithPizza", "Pizza", "Pizza")
19031927
assertEquals("Pizza", m.group("pizza"))
19041928

1905-
val ref = compile(raw"(?<pizza>Pizza)\k<pizza>*?")
1929+
val ref = compile("(?<pizza>Pizza)\\k<pizza>*?")
19061930
assertMatches(ref, "Pizza")
19071931
assertMatches(ref, "PizzaPizza")
19081932
assertMatches(ref, "PizzaPizzaPizza")
19091933
assertNotMatches(ref, "PizzaPizzicatoPizza")
19101934

1911-
assertSyntaxError("""(?<A>a?)\k<B>?""", "named capturing group <B> does not exit", 12)
1935+
assertSyntaxError("(?<A>a?)\\k<B>?", "named capturing group <B> does not exit", 12)
19121936

1913-
assertSyntaxError("""(?<A>a?)(?<A>dupe)""", "named capturing group <A> is already defined", 12)
1937+
assertSyntaxError("(?<A>a?)(?<A>dupe)", "named capturing group <A> is already defined", 12)
19141938
}
19151939

19161940
@Test def recursiveCapturingGroups(): Unit = {
@@ -1922,18 +1946,18 @@ class RegexEngineTest {
19221946
assertNotMatches(rec, "aaa")
19231947

19241948
// The JVM kind of supports "back references" to later groups, but we don't
1925-
assertSyntaxErrorInJS("""(a?\2?)(b?\1?)""", "numbered capturing group <2> does not exist", 4)
1949+
assertSyntaxErrorInJS("(a?\\2?)(b?\\1?)", "numbered capturing group <2> does not exist", 4)
19261950

19271951
// The JVM tolerates "back references" to non-existing groups, but we don't
1928-
assertSyntaxErrorInJS("""(a?\3?)(b?\1?)""", "numbered capturing group <3> does not exist", 4)
1952+
assertSyntaxErrorInJS("(a?\\3?)(b?\\1?)", "numbered capturing group <3> does not exist", 4)
19291953

1930-
val namedRec = compile(raw"(?<A>a?\k<A>?)\k<A>")
1954+
val namedRec = compile("(?<A>a?\\k<A>?)\\k<A>")
19311955
assertMatches(namedRec, "aa")
19321956
assertMatches(namedRec, "")
19331957
assertNotMatches(namedRec, "ab")
19341958
assertNotMatches(namedRec, "a")
19351959

1936-
assertSyntaxError("""(?<A>a?\k<B>?)(?<B>b?\k<A>?)""", "named capturing group <B> does not exit", 11)
1960+
assertSyntaxError("(?<A>a?\\k<B>?)(?<B>b?\\k<A>?)", "named capturing group <B> does not exit", 11)
19371961
}
19381962

19391963
@Test def backReferenceLimit(): Unit = {
@@ -2015,6 +2039,24 @@ class RegexEngineTest {
20152039
* When the groups are fetched in the original code, we check the groups
20162040
* here. Otherwise, we don't, even if there are capturing groups in the
20172041
* regex.
2042+
*
2043+
* These tests only really test that the regexes still work, but not that
2044+
* they work *in the same way* as before. In fact, they don't for some
2045+
* corner cases. By inspection, all the regexes below use features in 4
2046+
* categories:
2047+
*
2048+
* - Features whose semantics are equivalent in `js.RegExp` and `Pattern`,
2049+
* notably ASCII characters, repeaters, classes of ASCII characters, the
2050+
* '\d' character class, the '^' and '$' boundary matchers (without
2051+
* multiline).
2052+
* - The '.', which *is* different: it matches '\x85' in `js.RegExp` but not
2053+
* in `Pattern`; this was judged acceptable as unlikely to cause a real
2054+
* difference in practice.
2055+
* - One regex uses the `CASE_INSENSITIVE` with a pattern that contains only
2056+
* ASCII letters: it now really only matches other ASCII letters; this was
2057+
* judged acceptable as probably the intended meaning anyway.
2058+
* - One regex uses '\s' and '\S', for which we obtained confirmation from
2059+
* the maintainer that the change in semantics was not an issue.
20182060
*/
20192061
@Test def regexesFoundInLibraries(): Unit = {
20202062
// scalastyle:off line.size.limit

0 commit comments

Comments
 (0)