@@ -14,13 +14,18 @@ package java.util.regex
14
14
15
15
import scala .annotation .{tailrec , switch }
16
16
17
- import java .util .HashMap
18
-
19
17
import scala .scalajs .js
20
18
21
- /** The goal of a `GroupStartMapper` is to retrieve the start position of each
22
- * group of a matching regular expression where only the strings of the
23
- * matched groups are known.
19
+ import Pattern .IndicesArray
20
+
21
+ /** The goal of a `IndicesBuilder` is to retrieve the start and end positions
22
+ * of each group of a matching regular expression.
23
+ *
24
+ * This is essentially a polyfill for the 'd' flag of `js.RegExp`, which is
25
+ * a Stage 4 proposal scheduled for inclusion in ECMAScript 2022. Without that
26
+ * flag, `js.RegExp` only provides the substrings matched by capturing groups,
27
+ * but not their positions. We implement the positions on top of that.
28
+ *
24
29
* For that, we use the following observation:
25
30
* If the regex /A(B)\1/ matches a string at a given index,
26
31
* then /(A)(B)\2/ matches the same string at the same index.
@@ -38,7 +43,7 @@ import scala.scalajs.js
38
43
* - It computes the start of every group thanks to the groups before it
39
44
* - It builds and returns the mapping of previous group number -> start
40
45
*
41
- * The `pattern` that is parsed by `GroupStartMapper ` is the *compiled* JS
46
+ * The `pattern` that is parsed by `IndicesBuilder ` is the *compiled* JS
42
47
* pattern produced by `PatternCompiler`, not the original Java pattern. This
43
48
* means that we can simplify a number of things with the knowledge that:
44
49
*
@@ -53,13 +58,13 @@ import scala.scalajs.js
53
58
*
54
59
* @author Mikaël Mayer
55
60
*/
56
- private [regex] class GroupStartMapper private (pattern : String , flags : String ,
57
- node : GroupStartMapper .Node , groupCount : Int ,
61
+ private [regex] class IndicesBuilder private (pattern : String , flags : String ,
62
+ node : IndicesBuilder .Node , groupCount : Int ,
58
63
jsRegExpForFind : js.RegExp , jsRegExpForMatches : js.RegExp ) {
59
64
60
- import GroupStartMapper ._
65
+ import IndicesBuilder ._
61
66
62
- def apply (forMatches : Boolean , string : String , index : Int ): js. Array [ Int ] = {
67
+ def apply (forMatches : Boolean , string : String , index : Int ): IndicesArray = {
63
68
val regExp =
64
69
if (forMatches) jsRegExpForMatches
65
70
else jsRegExpForFind
@@ -73,31 +78,35 @@ private[regex] class GroupStartMapper private (pattern: String, flags: String,
73
78
s " Original pattern ' $pattern' with flags ' $flags' did match however. " )
74
79
}
75
80
76
- // Prepare a `groupStartMap` array with the correct length filled with -1
77
- val len = groupCount + 1 // index 0 is not used
78
- val groupStartMap = new js.Array [Int ](len)
79
- var i = 0
81
+ val start = index // by definition
82
+ val end = start + allMatchResult(0 ).get.length()
83
+
84
+ // Prepare an `indices` array with the correct length filled with undefined
85
+ val len = groupCount + 1 // index 0 is the whole match
86
+ val indices = new IndicesArray (len)
87
+ indices(0 ) = js.Tuple2 (start, end)
88
+ var i = 1
80
89
while (i != len) {
81
- groupStartMap (i) = - 1
90
+ indices (i) = js.undefined
82
91
i += 1
83
92
}
84
93
85
- node.propagateFromStart (allMatchResult, groupStartMap, index )
94
+ node.propagate (allMatchResult, indices, start, end )
86
95
87
- groupStartMap
96
+ indices
88
97
}
89
98
}
90
99
91
- private [regex] object GroupStartMapper {
92
- def apply (pattern : String , flags : String ): GroupStartMapper = {
100
+ private [regex] object IndicesBuilder {
101
+ def apply (pattern : String , flags : String ): IndicesBuilder = {
93
102
val parser = new Parser (pattern)
94
103
val node = parser.parseTopLevel()
95
104
node.setNewGroup(1 )
96
105
val allMatchingPattern = node.buildRegex(parser.groupNodeMap)
97
106
val jsRegExpForFind = new js.RegExp (allMatchingPattern, flags + " g" )
98
107
val jsRegExpForMatches =
99
108
new js.RegExp (Pattern .wrapJSPatternForMatches(allMatchingPattern), flags)
100
- new GroupStartMapper (pattern, flags, node, parser.parsedGroupCount,
109
+ new IndicesBuilder (pattern, flags, node, parser.parsedGroupCount,
101
110
jsRegExpForFind, jsRegExpForMatches)
102
111
}
103
112
@@ -155,16 +164,16 @@ private[regex] object GroupStartMapper {
155
164
* `end`, while other nodes propagate the `start`.
156
165
*/
157
166
def propagate (matchResult : js.RegExp .ExecResult ,
158
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit
167
+ indices : IndicesArray , start : Int , end : Int ): Unit
159
168
160
169
/** Propagates the appropriate positions to the descendants of this node
161
170
* from its end position.
162
171
*/
163
172
final def propagateFromEnd (matchResult : js.RegExp .ExecResult ,
164
- groupStartMap : js. Array [ Int ] , end : Int ): Unit = {
173
+ indices : IndicesArray , end : Int ): Unit = {
165
174
166
175
val start = matchResult(newGroup).fold(- 1 )(matched => end - matched.length)
167
- propagate(matchResult, groupStartMap , start, end)
176
+ propagate(matchResult, indices , start, end)
168
177
}
169
178
170
179
/** Propagates the appropriate positions to the descendants of this node
@@ -173,10 +182,10 @@ private[regex] object GroupStartMapper {
173
182
* @return the end position of this node, as a convenience for `SequenceNode.propagate`
174
183
*/
175
184
final def propagateFromStart (matchResult : js.RegExp .ExecResult ,
176
- groupStartMap : js. Array [ Int ] , start : Int ): Int = {
185
+ indices : IndicesArray , start : Int ): Int = {
177
186
178
187
val end = matchResult(newGroup).fold(- 1 )(matched => start + matched.length)
179
- propagate(matchResult, groupStartMap , start, end)
188
+ propagate(matchResult, indices , start, end)
180
189
end
181
190
}
182
191
}
@@ -190,15 +199,15 @@ private[regex] object GroupStartMapper {
190
199
" (" + inner.buildRegex(groupNodeMap) + " )"
191
200
192
201
def propagate (matchResult : js.RegExp .ExecResult ,
193
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
202
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
194
203
/* #3901: A GroupNode within a negative look-ahead node may receive
195
204
* `start != -1` from above, yet not match anything itself. We must
196
205
* always keep the default `-1` if this group node does not match
197
206
* anything.
198
207
*/
199
208
if (matchResult(newGroup).isDefined)
200
- groupStartMap (number) = start
201
- inner.propagate(matchResult, groupStartMap , start, end)
209
+ indices (number) = js. Tuple2 ( start, end)
210
+ inner.propagate(matchResult, indices , start, end)
202
211
}
203
212
}
204
213
@@ -217,11 +226,11 @@ private[regex] object GroupStartMapper {
217
226
" ((" + indicator + inner.buildRegex(groupNodeMap) + " ))"
218
227
219
228
def propagate (matchResult : js.RegExp .ExecResult ,
220
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
229
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
221
230
if (isLookBehind)
222
- inner.propagateFromEnd(matchResult, groupStartMap , end)
231
+ inner.propagateFromEnd(matchResult, indices , end)
223
232
else
224
- inner.propagateFromStart(matchResult, groupStartMap , start)
233
+ inner.propagateFromStart(matchResult, indices , start)
225
234
}
226
235
}
227
236
@@ -236,8 +245,8 @@ private[regex] object GroupStartMapper {
236
245
" (" + inner.buildRegex(groupNodeMap) + repeater + " )"
237
246
238
247
def propagate (matchResult : js.RegExp .ExecResult ,
239
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
240
- inner.propagateFromEnd(matchResult, groupStartMap , end)
248
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
249
+ inner.propagateFromEnd(matchResult, indices , end)
241
250
}
242
251
}
243
252
@@ -247,7 +256,7 @@ private[regex] object GroupStartMapper {
247
256
" (" + regex + " )"
248
257
249
258
def propagate (matchResult : js.RegExp .ExecResult ,
250
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
259
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
251
260
// nothing to do
252
261
}
253
262
}
@@ -262,7 +271,7 @@ private[regex] object GroupStartMapper {
262
271
}
263
272
264
273
def propagate (matchResult : js.RegExp .ExecResult ,
265
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
274
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
266
275
// nothing to do
267
276
}
268
277
}
@@ -292,13 +301,13 @@ private[regex] object GroupStartMapper {
292
301
}
293
302
294
303
def propagate (matchResult : js.RegExp .ExecResult ,
295
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
304
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
296
305
val len = sequence.length
297
306
var i = 0
298
307
var nextStart = start
299
308
while (i != len) {
300
309
nextStart =
301
- sequence(i).propagateFromStart(matchResult, groupStartMap , nextStart)
310
+ sequence(i).propagateFromStart(matchResult, indices , nextStart)
302
311
i += 1
303
312
}
304
313
}
@@ -333,11 +342,11 @@ private[regex] object GroupStartMapper {
333
342
}
334
343
335
344
def propagate (matchResult : js.RegExp .ExecResult ,
336
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
345
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
337
346
val len = alternatives.length
338
347
var i = 0
339
348
while (i != len) {
340
- alternatives(i).propagate(matchResult, groupStartMap , start, end)
349
+ alternatives(i).propagate(matchResult, indices , start, end)
341
350
i += 1
342
351
}
343
352
}
0 commit comments