Skip to content

Commit 43902c4

Browse files
committed
Add more tests for Unicode case-insensitivity in regexes.
1 parent c0b6ce2 commit 43902c4

File tree

1 file changed

+45
-1
lines changed

1 file changed

+45
-1
lines changed

test-suite/shared/src/test/scala/org/scalajs/testsuite/javalib/util/regex/RegexEngineTest.scala

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1976,8 +1976,52 @@ class RegexEngineTest {
19761976
val s = compile("s", CaseInsensitive | UnicodeCase)
19771977
assertMatches(s, "s")
19781978
assertMatches(s, "S")
1979-
assertMatches(s, "\u017F") // ſ LATIN SMALL LETTER LONG S
1979+
assertMatches(s, "\u017F") // ſ LATIN SMALL LETTER LONG S; 017F folds to 's'
19801980
assertNotMatches(s, "t")
1981+
1982+
val ranges = compile("[g-l\uFB00\u0175-\u0182\u0540-\u0550\u1F68-\u1F8E\u1FAA-\u1FAF\u2126]",
1983+
CaseInsensitive | UnicodeCase)
1984+
// g-l
1985+
assertMatches(ranges, "H")
1986+
assertMatches(ranges, "\u212A") // K KELVIN SIGN, folds to 'k'
1987+
// FB00
1988+
assertMatches(ranges, "\uFB00") // ff LATIN SMALL LIGATURE FF
1989+
// 0175-0182 (contains 017F which folds to 's')
1990+
if (!executingInJVM) {
1991+
// https://bugs.openjdk.org/browse/JDK-8360459
1992+
assertMatches(ranges, "s")
1993+
assertMatches(ranges, "S")
1994+
}
1995+
assertMatches(ranges, "\u017F")
1996+
assertMatches(ranges, "\u0180") // in range; does not participate in case folding
1997+
// 0540-0550
1998+
assertMatches(ranges, "\u0547") // in range
1999+
assertMatches(ranges, "\u0577") // 0547 folds to 0577
2000+
// 1F68-1F8E
2001+
assertMatches(ranges, "\u1F65") // 1F6D folds to 1F65
2002+
assertMatches(ranges, "\u1F6D") // in range
2003+
assertMatches(ranges, "\u1F82") // 1F8A folds to 1F82, and 1F82 is also in range
2004+
// 1FAA-1FAF
2005+
assertMatches(ranges, "\u1FA4") // 1FAC folds to 1FA4 only in simple case folding
2006+
// 2126
2007+
assertMatches(ranges, "\u2126") // in the set
2008+
assertMatches(ranges, "\u03C9") // 2126 folds to 03C9
2009+
assertMatches(ranges, "\u03A9") // 03A9 also folds to 03C9
2010+
// No matches
2011+
assertNotMatches(ranges, "t")
2012+
assertNotMatches(ranges, "ff") // ff FB00 would only match with full case folding
2013+
2014+
// Demonstrate that the JVM recognizes 017F as folding to 's' if the range is ASCII
2015+
val rangeWithASCII_S = compile("[P-U]", CaseInsensitive | UnicodeCase)
2016+
assertMatches(rangeWithASCII_S, "s")
2017+
assertMatches(rangeWithASCII_S, "S")
2018+
assertMatches(rangeWithASCII_S, "\u017F")
2019+
2020+
// Demonstrate that the JVM recognizes 017F as folding to 's' if it is not a range
2021+
val nonRangeWith_017F = compile("[\u017F\u0184]", CaseInsensitive | UnicodeCase)
2022+
assertMatches(nonRangeWith_017F, "s")
2023+
assertMatches(nonRangeWith_017F, "S")
2024+
assertMatches(nonRangeWith_017F, "\u017F")
19812025
}
19822026

19832027
@Test def wordBoundary(): Unit = {

0 commit comments

Comments
 (0)