@@ -14,13 +14,18 @@ package java.util.regex
14
14
15
15
import scala .annotation .{tailrec , switch }
16
16
17
- import java .util .HashMap
18
-
19
17
import scala .scalajs .js
20
18
21
- /** The goal of a `GroupStartMapper` is to retrieve the start position of each
22
- * group of a matching regular expression where only the strings of the
23
- * matched groups are known.
19
+ import Pattern .IndicesArray
20
+
21
+ /** The goal of a `IndicesBuilder` is to retrieve the start and end positions
22
+ * of each group of a matching regular expression.
23
+ *
24
+ * This is essentially a polyfill for the 'd' flag of `js.RegExp`, which
25
+ * should make its way into ECMAScript 2021. Without that flag, `js.RegExp`
26
+ * only provides the substrings matched by capturing groups, but not their
27
+ * positions. We implement the positions on top of that.
28
+ *
24
29
* For that, we use the following observation:
25
30
* If the regex /A(B)\1/ matches a string at a given index,
26
31
* then /(A)(B)\2/ matches the same string at the same index.
@@ -38,7 +43,7 @@ import scala.scalajs.js
38
43
* - It computes the start of every group thanks to the groups before it
39
44
* - It builds and returns the mapping of previous group number -> start
40
45
*
41
- * The `pattern` that is parsed by `GroupStartMapper ` is the *compiled* JS
46
+ * The `pattern` that is parsed by `IndicesBuilder ` is the *compiled* JS
42
47
* pattern produced by `PatternCompiler`, not the original Java pattern. This
43
48
* means that we can simplify a number of things with the knowledge that:
44
49
*
@@ -53,13 +58,13 @@ import scala.scalajs.js
53
58
*
54
59
* @author Mikaël Mayer
55
60
*/
56
- private [regex] class GroupStartMapper private (pattern : String , flags : String ,
57
- node : GroupStartMapper .Node , groupCount : Int ,
61
+ private [regex] class IndicesBuilder private (pattern : String , flags : String ,
62
+ node : IndicesBuilder .Node , groupCount : Int ,
58
63
jsRegExpForFind : js.RegExp , jsRegExpForMatches : js.RegExp ) {
59
64
60
- import GroupStartMapper ._
65
+ import IndicesBuilder ._
61
66
62
- def apply (forMatches : Boolean , string : String , index : Int ): js. Array [ Int ] = {
67
+ def apply (forMatches : Boolean , string : String , index : Int ): IndicesArray = {
63
68
val regExp =
64
69
if (forMatches) jsRegExpForMatches
65
70
else jsRegExpForFind
@@ -73,30 +78,34 @@ private[regex] class GroupStartMapper private (pattern: String, flags: String,
73
78
s " Original pattern ' $pattern' with flags ' $flags' did match however. " )
74
79
}
75
80
76
- // Prepare a `groupStartMap` array with the correct length filled with -1
77
- val len = groupCount + 1 // index 0 is not used
78
- val groupStartMap = new js.Array [Int ](len)
79
- var i = 0
81
+ val start = index // by definition
82
+ val end = start + allMatchResult(0 ).get.length()
83
+
84
+ // Prepare an `indices` array with the correct length filled with undefined
85
+ val len = groupCount + 1 // index 0 is the whole match
86
+ val indices = new IndicesArray (len)
87
+ indices(0 ) = js.Tuple2 (start, end)
88
+ var i = 1
80
89
while (i != len) {
81
- groupStartMap (i) = - 1
90
+ indices (i) = js.undefined
82
91
i += 1
83
92
}
84
93
85
- node.propagateFromStart (allMatchResult, groupStartMap, index )
94
+ node.propagate (allMatchResult, indices, start, end )
86
95
87
- groupStartMap
96
+ indices
88
97
}
89
98
}
90
99
91
- private [regex] object GroupStartMapper {
92
- def apply (pattern : String , flags : String ): GroupStartMapper = {
100
+ private [regex] object IndicesBuilder {
101
+ def apply (pattern : String , flags : String ): IndicesBuilder = {
93
102
val parser = new Parser (pattern)
94
103
val node = parser.parseTopLevel()
95
104
node.setNewGroup(1 )
96
105
val allMatchingPattern = node.buildRegex(parser.groupNodeMap)
97
106
val jsRegExpForFind = new js.RegExp (allMatchingPattern, flags + " g" )
98
107
val jsRegExpForMatches = new js.RegExp (" ^" + allMatchingPattern + " $" , flags)
99
- new GroupStartMapper (pattern, flags, node, parser.parsedGroupCount,
108
+ new IndicesBuilder (pattern, flags, node, parser.parsedGroupCount,
100
109
jsRegExpForFind, jsRegExpForMatches)
101
110
}
102
111
@@ -154,16 +163,16 @@ private[regex] object GroupStartMapper {
154
163
* `end`, while other nodes propagate the `start`.
155
164
*/
156
165
def propagate (matchResult : js.RegExp .ExecResult ,
157
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit
166
+ indices : IndicesArray , start : Int , end : Int ): Unit
158
167
159
168
/** Propagates the appropriate positions to the descendants of this node
160
169
* from its end position.
161
170
*/
162
171
final def propagateFromEnd (matchResult : js.RegExp .ExecResult ,
163
- groupStartMap : js. Array [ Int ] , end : Int ): Unit = {
172
+ indices : IndicesArray , end : Int ): Unit = {
164
173
165
174
val start = matchResult(newGroup).fold(- 1 )(matched => end - matched.length)
166
- propagate(matchResult, groupStartMap , start, end)
175
+ propagate(matchResult, indices , start, end)
167
176
}
168
177
169
178
/** Propagates the appropriate positions to the descendants of this node
@@ -172,10 +181,10 @@ private[regex] object GroupStartMapper {
172
181
* @return the end position of this node, as a convenience for `SequenceNode.propagate`
173
182
*/
174
183
final def propagateFromStart (matchResult : js.RegExp .ExecResult ,
175
- groupStartMap : js. Array [ Int ] , start : Int ): Int = {
184
+ indices : IndicesArray , start : Int ): Int = {
176
185
177
186
val end = matchResult(newGroup).fold(- 1 )(matched => start + matched.length)
178
- propagate(matchResult, groupStartMap , start, end)
187
+ propagate(matchResult, indices , start, end)
179
188
end
180
189
}
181
190
}
@@ -189,15 +198,15 @@ private[regex] object GroupStartMapper {
189
198
" (" + inner.buildRegex(groupNodeMap) + " )"
190
199
191
200
def propagate (matchResult : js.RegExp .ExecResult ,
192
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
201
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
193
202
/* #3901: A GroupNode within a negative look-ahead node may receive
194
203
* `start != -1` from above, yet not match anything itself. We must
195
204
* always keep the default `-1` if this group node does not match
196
205
* anything.
197
206
*/
198
207
if (matchResult(newGroup).isDefined)
199
- groupStartMap (number) = start
200
- inner.propagate(matchResult, groupStartMap , start, end)
208
+ indices (number) = js. Tuple2 ( start, end)
209
+ inner.propagate(matchResult, indices , start, end)
201
210
}
202
211
}
203
212
@@ -216,11 +225,11 @@ private[regex] object GroupStartMapper {
216
225
" ((" + indicator + inner.buildRegex(groupNodeMap) + " ))"
217
226
218
227
def propagate (matchResult : js.RegExp .ExecResult ,
219
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
228
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
220
229
if (isLookBehind)
221
- inner.propagateFromEnd(matchResult, groupStartMap , end)
230
+ inner.propagateFromEnd(matchResult, indices , end)
222
231
else
223
- inner.propagateFromStart(matchResult, groupStartMap , start)
232
+ inner.propagateFromStart(matchResult, indices , start)
224
233
}
225
234
}
226
235
@@ -235,8 +244,8 @@ private[regex] object GroupStartMapper {
235
244
" (" + inner.buildRegex(groupNodeMap) + repeater + " )"
236
245
237
246
def propagate (matchResult : js.RegExp .ExecResult ,
238
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
239
- inner.propagateFromEnd(matchResult, groupStartMap , end)
247
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
248
+ inner.propagateFromEnd(matchResult, indices , end)
240
249
}
241
250
}
242
251
@@ -246,7 +255,7 @@ private[regex] object GroupStartMapper {
246
255
" (" + regex + " )"
247
256
248
257
def propagate (matchResult : js.RegExp .ExecResult ,
249
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
258
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
250
259
// nothing to do
251
260
}
252
261
}
@@ -261,7 +270,7 @@ private[regex] object GroupStartMapper {
261
270
}
262
271
263
272
def propagate (matchResult : js.RegExp .ExecResult ,
264
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
273
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
265
274
// nothing to do
266
275
}
267
276
}
@@ -291,13 +300,13 @@ private[regex] object GroupStartMapper {
291
300
}
292
301
293
302
def propagate (matchResult : js.RegExp .ExecResult ,
294
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
303
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
295
304
val len = sequence.length
296
305
var i = 0
297
306
var nextStart = start
298
307
while (i != len) {
299
308
nextStart =
300
- sequence(i).propagateFromStart(matchResult, groupStartMap , nextStart)
309
+ sequence(i).propagateFromStart(matchResult, indices , nextStart)
301
310
i += 1
302
311
}
303
312
}
@@ -332,11 +341,11 @@ private[regex] object GroupStartMapper {
332
341
}
333
342
334
343
def propagate (matchResult : js.RegExp .ExecResult ,
335
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
344
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
336
345
val len = alternatives.length
337
346
var i = 0
338
347
while (i != len) {
339
- alternatives(i).propagate(matchResult, groupStartMap , start, end)
348
+ alternatives(i).propagate(matchResult, indices , start, end)
340
349
i += 1
341
350
}
342
351
}
0 commit comments