12
12
13
13
package java .util .regex
14
14
15
- import scala .annotation .switch
16
-
17
15
import scala .scalajs .js
18
16
19
- import java .util .ScalaOps ._
20
-
21
- final class Pattern private (jsRegExp : js.RegExp , _pattern : String , _flags : Int )
22
- extends Serializable {
17
+ final class Pattern private [regex] (
18
+ _pattern : String ,
19
+ _flags : Int ,
20
+ jsPattern : String ,
21
+ jsFlags : String ,
22
+ private [regex] val groupCount : Int ,
23
+ namedGroups : js.Dictionary [Int ]
24
+ ) extends Serializable {
23
25
24
26
import Pattern ._
25
27
28
+ /** Compile the native RegExp once.
29
+ *
30
+ * In `newJSRegExp()`, we clone that native RegExp using
31
+ * `new js.RegExp(jsRegExpBlueprint)`, which the JS engine hopefully
32
+ * optimizes by reusing the compiled internal representation of the RegExp.
33
+ * Otherwise, well, there's not much we can do about it.
34
+ */
35
+ private [this ] val jsRegExpBlueprint = new js.RegExp (jsPattern, jsFlags + " g" )
36
+
37
+ /** Another version of the RegExp that is used by `Matcher.matches()`.
38
+ *
39
+ * It forces `^` and `$` at the beginning and end of the pattern so that
40
+ * only entire inputs are matched. In addition, it does not have the 'g'
41
+ * flag, so that it can be repeatedly used without managing `lastIndex`.
42
+ *
43
+ * Since that RegExp is only used locally within `matches()`, and not stored
44
+ * in the `Matcher`, we can always reuse the same instance.
45
+ */
46
+ private [regex] lazy val jsRegExpForMatches : js.RegExp =
47
+ new js.RegExp (" ^" + jsPattern + " $" , jsFlags)
48
+
26
49
def pattern (): String = _pattern
27
50
def flags (): Int = _flags
28
51
29
- private def jsPattern : String = jsRegExp.source
30
-
31
- private def jsFlags : String = {
32
- (if (jsRegExp.global) " g" else " " ) +
33
- (if (jsRegExp.ignoreCase) " i" else " " ) +
34
- (if (jsRegExp.multiline) " m" else " " )
35
- }
36
-
37
- private [regex] lazy val groupCount : Int =
38
- new js.RegExp (" |" + jsPattern).exec(" " ).length - 1
39
-
40
52
private [regex] lazy val groupStartMapper : GroupStartMapper =
41
- GroupStartMapper (jsPattern, jsFlags)
53
+ GroupStartMapper (jsPattern, jsFlags + " g " )
42
54
43
55
override def toString (): String = pattern()
44
56
45
57
private [regex] def newJSRegExp (): js.RegExp = {
46
- val r = new js.RegExp (jsRegExp )
47
- if (r ne jsRegExp ) {
58
+ val r = new js.RegExp (jsRegExpBlueprint )
59
+ if (r ne jsRegExpBlueprint ) {
48
60
r
49
61
} else {
50
62
/* Workaround for the PhantomJS 1.x bug
51
63
* https://github.com/ariya/phantomjs/issues/11494
52
- * which causes new js.RegExp(jsRegExp) to return the same object,
53
- * rather than a new one.
54
- * We therefore reconstruct the pattern and flags used to create
55
- * jsRegExp and create a new one from there.
64
+ * which causes new js.RegExp(jsRegExpBlueprint) to return the same
65
+ * object, rather than a new one.
66
+ * In that case, we reconstruct a new js.RegExp from scratch.
56
67
*/
57
- new js.RegExp (jsPattern, jsFlags)
68
+ new js.RegExp (jsPattern, jsFlags + " g " )
58
69
}
59
70
}
60
71
72
+ private [regex] def namedGroup (name : String ): Int = {
73
+ namedGroups.getOrElse(name, {
74
+ throw new IllegalArgumentException (s " No group with name < $name> " )
75
+ })
76
+ }
77
+
61
78
def matcher (input : CharSequence ): Matcher =
62
79
new Matcher (this , input, 0 , input.length)
63
80
@@ -123,27 +140,8 @@ object Pattern {
123
140
final val CANON_EQ = 0x80
124
141
final val UNICODE_CHARACTER_CLASS = 0x100
125
142
126
- def compile (regex : String , flags : Int ): Pattern = {
127
- val (jsPattern, flags1) = {
128
- if ((flags & LITERAL ) != 0 ) {
129
- (quote(regex), flags)
130
- } else {
131
- trySplitHack(regex, flags) orElse
132
- tryFlagHack(regex, flags) getOrElse
133
- (regex, flags)
134
- }
135
- }
136
-
137
- val jsFlags = {
138
- " g" +
139
- (if ((flags1 & CASE_INSENSITIVE ) != 0 ) " i" else " " ) +
140
- (if ((flags1 & MULTILINE ) != 0 ) " m" else " " )
141
- }
142
-
143
- val jsRegExp = new js.RegExp (jsPattern, jsFlags)
144
-
145
- new Pattern (jsRegExp, regex, flags1)
146
- }
143
+ def compile (regex : String , flags : Int ): Pattern =
144
+ PatternCompiler .compile(regex, flags)
147
145
148
146
def compile (regex : String ): Pattern =
149
147
compile(regex, 0 )
@@ -152,66 +150,14 @@ object Pattern {
152
150
compile(regex).matcher(input).matches()
153
151
154
152
def quote (s : String ): String = {
155
- var result = " "
156
- var i = 0
157
- while (i < s.length) {
158
- val c = s.charAt(i)
159
- result += ((c : @ switch) match {
160
- case '\\ ' | '.' | '(' | ')' | '[' | ']' | '{' | '}' | '|'
161
- | '?' | '*' | '+' | '^' | '$' => " \\ " + c
162
- case _ => c
163
- })
164
- i += 1
153
+ var result = " \\ Q"
154
+ var start = 0
155
+ var end = s.indexOf(" \\ E" , start)
156
+ while (end >= 0 ) {
157
+ result += s.substring(start, end) + " \\ E\\\\ E\\ Q"
158
+ start = end + 2
159
+ end = s.indexOf(" \\ E" , start)
165
160
}
166
- result
161
+ result + s.substring(start) + " \\ E "
167
162
}
168
-
169
- /** This is a hack to support StringLike.split().
170
- * It replaces occurrences of \Q<char>\E by quoted(<char>)
171
- */
172
- @ inline
173
- private def trySplitHack (pat : String , flags : Int ) = {
174
- val m = splitHackPat.exec(pat)
175
- if (m != null )
176
- Some ((quote(m(1 ).get), flags))
177
- else
178
- None
179
- }
180
-
181
- @ inline
182
- private def tryFlagHack (pat : String , flags0 : Int ) = {
183
- val m = flagHackPat.exec(pat)
184
- if (m != null ) {
185
- val newPat = pat.substring(m(0 ).get.length) // cut off the flag specifiers
186
- var flags = flags0
187
- for (chars <- m(1 )) {
188
- for (i <- 0 until chars.length())
189
- flags |= charToFlag(chars.charAt(i))
190
- }
191
- for (chars <- m(2 )) {
192
- for (i <- 0 until chars.length())
193
- flags &= ~ charToFlag(chars.charAt(i))
194
- }
195
- Some ((newPat, flags))
196
- } else
197
- None
198
- }
199
-
200
- private def charToFlag (c : Char ) = (c : @ switch) match {
201
- case 'i' => CASE_INSENSITIVE
202
- case 'd' => UNIX_LINES
203
- case 'm' => MULTILINE
204
- case 's' => DOTALL
205
- case 'u' => UNICODE_CASE
206
- case 'x' => COMMENTS
207
- case 'U' => UNICODE_CHARACTER_CLASS
208
- case _ => throw new IllegalArgumentException (" bad in-pattern flag" )
209
- }
210
-
211
- /** matches \Q<char>\E to support StringLike.split */
212
- private val splitHackPat = new js.RegExp (" ^\\\\ Q(.|\\ n|\\ r)\\\\ E$" )
213
-
214
- /** regex to match flag specifiers in regex. E.g. (?u), (?-i), (?U-i) */
215
- private val flagHackPat =
216
- new js.RegExp (" ^\\ (\\ ?([idmsuxU]*)(?:-([idmsuxU]*))?\\ )" )
217
163
}
0 commit comments