@@ -14,13 +14,18 @@ package java.util.regex
14
14
15
15
import scala .annotation .{tailrec , switch }
16
16
17
- import java .util .HashMap
18
-
19
17
import scala .scalajs .js
20
18
21
- /** The goal of a `GroupStartMapper` is to retrieve the start position of each
22
- * group of a matching regular expression where only the strings of the
23
- * matched groups are known.
19
+ import Pattern .IndicesArray
20
+
21
+ /** The goal of an `IndicesBuilder` is to retrieve the start and end positions
22
+ * of each group of a matching regular expression.
23
+ *
24
+ * This is essentially a polyfill for the 'd' flag of `js.RegExp`, which is
25
+ * a Stage 4 proposal scheduled for inclusion in ECMAScript 2022. Without that
26
+ * flag, `js.RegExp` only provides the substrings matched by capturing groups,
27
+ * but not their positions. We implement the positions on top of that.
28
+ *
24
29
* For that, we use the following observation:
25
30
* If the regex /A(B)\1/ matches a string at a given index,
26
31
* then /(A)(B)\2/ matches the same string at the same index.
@@ -38,7 +43,7 @@ import scala.scalajs.js
38
43
* - It computes the start of every group thanks to the groups before it
39
44
* - It builds and returns the mapping of previous group number -> start
40
45
*
41
- * The `pattern` that is parsed by `GroupStartMapper ` is the *compiled* JS
46
+ * The `pattern` that is parsed by `IndicesBuilder ` is the *compiled* JS
42
47
* pattern produced by `PatternCompiler`, not the original Java pattern. This
43
48
* means that we can simplify a number of things with the knowledge that:
44
49
*
@@ -53,13 +58,13 @@ import scala.scalajs.js
53
58
*
54
59
* @author Mikaël Mayer
55
60
*/
56
- private [regex] class GroupStartMapper private (pattern : String , flags : String ,
57
- node : GroupStartMapper .Node , groupCount : Int ,
61
+ private [regex] class IndicesBuilder private (pattern : String , flags : String ,
62
+ node : IndicesBuilder .Node , groupCount : Int ,
58
63
jsRegExpForFind : js.RegExp , jsRegExpForMatches : js.RegExp ) {
59
64
60
- import GroupStartMapper ._
65
+ import IndicesBuilder ._
61
66
62
- def apply (forMatches : Boolean , string : String , index : Int ): js. Array [ Int ] = {
67
+ def apply (forMatches : Boolean , string : String , index : Int ): IndicesArray = {
63
68
val regExp =
64
69
if (forMatches) jsRegExpForMatches
65
70
else jsRegExpForFind
@@ -73,31 +78,42 @@ private[regex] class GroupStartMapper private (pattern: String, flags: String,
73
78
s " Original pattern ' $pattern' with flags ' $flags' did match however. " )
74
79
}
75
80
76
- // Prepare a `groupStartMap` array with the correct length filled with -1
77
- val len = groupCount + 1 // index 0 is not used
78
- val groupStartMap = new js.Array [Int ](len)
79
- var i = 0
81
+ val start = index // by definition
82
+ val end = start + allMatchResult(0 ).get.length()
83
+
84
+ /* Initialize the `indices` array with:
85
+ * - `[start, end]` at index 0, which represents the whole match, and
86
+ * - `undefined` in the other slots.
87
+ *
88
+ * We explicitly store `undefined` in the other slots to prevent the array
89
+ * from containing *empty* slots. That would make it a sparse array, which
90
+ * is less efficient.
91
+ */
92
+ val len = groupCount + 1
93
+ val indices = new IndicesArray (len)
94
+ indices(0 ) = js.Tuple2 (start, end)
95
+ var i = 1
80
96
while (i != len) {
81
- groupStartMap (i) = - 1
97
+ indices (i) = js.undefined
82
98
i += 1
83
99
}
84
100
85
- node.propagateFromStart (allMatchResult, groupStartMap, index )
101
+ node.propagate (allMatchResult, indices, start, end )
86
102
87
- groupStartMap
103
+ indices
88
104
}
89
105
}
90
106
91
- private [regex] object GroupStartMapper {
92
- def apply (pattern : String , flags : String ): GroupStartMapper = {
107
+ private [regex] object IndicesBuilder {
108
+ def apply (pattern : String , flags : String ): IndicesBuilder = {
93
109
val parser = new Parser (pattern)
94
110
val node = parser.parseTopLevel()
95
111
node.setNewGroup(1 )
96
112
val allMatchingPattern = node.buildRegex(parser.groupNodeMap)
97
113
val jsRegExpForFind = new js.RegExp (allMatchingPattern, flags + " g" )
98
114
val jsRegExpForMatches =
99
115
new js.RegExp (Pattern .wrapJSPatternForMatches(allMatchingPattern), flags)
100
- new GroupStartMapper (pattern, flags, node, parser.parsedGroupCount,
116
+ new IndicesBuilder (pattern, flags, node, parser.parsedGroupCount,
101
117
jsRegExpForFind, jsRegExpForMatches)
102
118
}
103
119
@@ -155,16 +171,16 @@ private[regex] object GroupStartMapper {
155
171
* `end`, while other nodes propagate the `start`.
156
172
*/
157
173
def propagate (matchResult : js.RegExp .ExecResult ,
158
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit
174
+ indices : IndicesArray , start : Int , end : Int ): Unit
159
175
160
176
/** Propagates the appropriate positions to the descendants of this node
161
177
* from its end position.
162
178
*/
163
179
final def propagateFromEnd (matchResult : js.RegExp .ExecResult ,
164
- groupStartMap : js. Array [ Int ] , end : Int ): Unit = {
180
+ indices : IndicesArray , end : Int ): Unit = {
165
181
166
182
val start = matchResult(newGroup).fold(- 1 )(matched => end - matched.length)
167
- propagate(matchResult, groupStartMap , start, end)
183
+ propagate(matchResult, indices , start, end)
168
184
}
169
185
170
186
/** Propagates the appropriate positions to the descendants of this node
@@ -173,10 +189,10 @@ private[regex] object GroupStartMapper {
173
189
* @return the end position of this node, as a convenience for `SequenceNode.propagate`
174
190
*/
175
191
final def propagateFromStart (matchResult : js.RegExp .ExecResult ,
176
- groupStartMap : js. Array [ Int ] , start : Int ): Int = {
192
+ indices : IndicesArray , start : Int ): Int = {
177
193
178
194
val end = matchResult(newGroup).fold(- 1 )(matched => start + matched.length)
179
- propagate(matchResult, groupStartMap , start, end)
195
+ propagate(matchResult, indices , start, end)
180
196
end
181
197
}
182
198
}
@@ -190,15 +206,15 @@ private[regex] object GroupStartMapper {
190
206
" (" + inner.buildRegex(groupNodeMap) + " )"
191
207
192
208
def propagate (matchResult : js.RegExp .ExecResult ,
193
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
209
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
194
210
/* #3901: A GroupNode within a negative look-ahead node may receive
195
211
* `start != -1` from above, yet not match anything itself. We must
196
212
* always keep the default `-1` if this group node does not match
197
213
* anything.
198
214
*/
199
215
if (matchResult(newGroup).isDefined)
200
- groupStartMap (number) = start
201
- inner.propagate(matchResult, groupStartMap , start, end)
216
+ indices (number) = js. Tuple2 ( start, end)
217
+ inner.propagate(matchResult, indices , start, end)
202
218
}
203
219
}
204
220
@@ -217,11 +233,11 @@ private[regex] object GroupStartMapper {
217
233
" ((" + indicator + inner.buildRegex(groupNodeMap) + " ))"
218
234
219
235
def propagate (matchResult : js.RegExp .ExecResult ,
220
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
236
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
221
237
if (isLookBehind)
222
- inner.propagateFromEnd(matchResult, groupStartMap , end)
238
+ inner.propagateFromEnd(matchResult, indices , end)
223
239
else
224
- inner.propagateFromStart(matchResult, groupStartMap , start)
240
+ inner.propagateFromStart(matchResult, indices , start)
225
241
}
226
242
}
227
243
@@ -236,8 +252,8 @@ private[regex] object GroupStartMapper {
236
252
" (" + inner.buildRegex(groupNodeMap) + repeater + " )"
237
253
238
254
def propagate (matchResult : js.RegExp .ExecResult ,
239
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
240
- inner.propagateFromEnd(matchResult, groupStartMap , end)
255
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
256
+ inner.propagateFromEnd(matchResult, indices , end)
241
257
}
242
258
}
243
259
@@ -247,7 +263,7 @@ private[regex] object GroupStartMapper {
247
263
" (" + regex + " )"
248
264
249
265
def propagate (matchResult : js.RegExp .ExecResult ,
250
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
266
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
251
267
// nothing to do
252
268
}
253
269
}
@@ -262,7 +278,7 @@ private[regex] object GroupStartMapper {
262
278
}
263
279
264
280
def propagate (matchResult : js.RegExp .ExecResult ,
265
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
281
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
266
282
// nothing to do
267
283
}
268
284
}
@@ -292,13 +308,13 @@ private[regex] object GroupStartMapper {
292
308
}
293
309
294
310
def propagate (matchResult : js.RegExp .ExecResult ,
295
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
311
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
296
312
val len = sequence.length
297
313
var i = 0
298
314
var nextStart = start
299
315
while (i != len) {
300
316
nextStart =
301
- sequence(i).propagateFromStart(matchResult, groupStartMap , nextStart)
317
+ sequence(i).propagateFromStart(matchResult, indices , nextStart)
302
318
i += 1
303
319
}
304
320
}
@@ -333,11 +349,11 @@ private[regex] object GroupStartMapper {
333
349
}
334
350
335
351
def propagate (matchResult : js.RegExp .ExecResult ,
336
- groupStartMap : js. Array [ Int ] , start : Int , end : Int ): Unit = {
352
+ indices : IndicesArray , start : Int , end : Int ): Unit = {
337
353
val len = alternatives.length
338
354
var i = 0
339
355
while (i != len) {
340
- alternatives(i).propagate(matchResult, groupStartMap , start, end)
356
+ alternatives(i).propagate(matchResult, indices , start, end)
341
357
i += 1
342
358
}
343
359
}
0 commit comments