@@ -1976,8 +1976,52 @@ class RegexEngineTest {
1976
1976
val s = compile(" s" , CaseInsensitive | UnicodeCase )
1977
1977
assertMatches(s, " s" )
1978
1978
assertMatches(s, " S" )
1979
- assertMatches(s, " \u017F " ) // ſ LATIN SMALL LETTER LONG S
1979
+ assertMatches(s, " \u017F " ) // ſ LATIN SMALL LETTER LONG S; 017F folds to 's'
1980
1980
assertNotMatches(s, " t" )
1981
+
1982
+ val ranges = compile(" [g-l\uFB00\u0175 -\u0182\u0540 -\u0550\u1F68 -\u1F8E\u1FAA -\u1FAF\u2126 ]" ,
1983
+ CaseInsensitive | UnicodeCase )
1984
+ // g-l
1985
+ assertMatches(ranges, " H" )
1986
+ assertMatches(ranges, " \u212A " ) // K KELVIN SIGN, folds to 'k'
1987
+ // FB00
1988
+ assertMatches(ranges, " \uFB00 " ) // ff LATIN SMALL LIGATURE FF
1989
+ // 0175-0182 (contains 017F which folds to 's')
1990
+ if (! executingInJVM) {
1991
+ // https://bugs.openjdk.org/browse/JDK-8360459
1992
+ assertMatches(ranges, " s" )
1993
+ assertMatches(ranges, " S" )
1994
+ }
1995
+ assertMatches(ranges, " \u017F " )
1996
+ assertMatches(ranges, " \u0180 " ) // in range; does not participate in case folding
1997
+ // 0540-0550
1998
+ assertMatches(ranges, " \u0547 " ) // in range
1999
+ assertMatches(ranges, " \u0577 " ) // 0547 folds to 0577
2000
+ // 1F68-1F8E
2001
+ assertMatches(ranges, " \u1F65 " ) // 1F6D folds to 1F65
2002
+ assertMatches(ranges, " \u1F6D " ) // in range
2003
+ assertMatches(ranges, " \u1F82 " ) // 1F8A folds to 1F82, and 1F82 is also in range
2004
+ // 1FAA-1FAF
2005
+ assertMatches(ranges, " \u1FA4 " ) // 1FAC folds to 1FA4 only in simple case folding
2006
+ // 2126
2007
+ assertMatches(ranges, " \u2126 " ) // in the set
2008
+ assertMatches(ranges, " \u03C9 " ) // 2126 folds to 03C9
2009
+ assertMatches(ranges, " \u03A9 " ) // 03A9 also folds to 03C9
2010
+ // No matches
2011
+ assertNotMatches(ranges, " t" )
2012
+ assertNotMatches(ranges, " ff" ) // ff FB00 would only match with full case folding
2013
+
2014
+ // Demonstrate that the JVM recognizes 017F as folding to 's' if the range is ASCII
2015
+ val rangeWithASCII_S = compile(" [P-U]" , CaseInsensitive | UnicodeCase )
2016
+ assertMatches(rangeWithASCII_S, " s" )
2017
+ assertMatches(rangeWithASCII_S, " S" )
2018
+ assertMatches(rangeWithASCII_S, " \u017F " )
2019
+
2020
+ // Demonstrate that the JVM recognizes 017F as folding to 's' if it is not a range
2021
+ val nonRangeWith_017F = compile(" [\u017F\u0184 ]" , CaseInsensitive | UnicodeCase )
2022
+ assertMatches(nonRangeWith_017F, " s" )
2023
+ assertMatches(nonRangeWith_017F, " S" )
2024
+ assertMatches(nonRangeWith_017F, " \u017F " )
1981
2025
}
1982
2026
1983
2027
@ Test def wordBoundary (): Unit = {
0 commit comments