Skip to content

Commit a73746b

Browse files
authored
Merge pull request scala-js#4470 from sjrd/regex-followup
Clean up j.u.regex.* and use the native 'd' flag when available
2 parents 5df7d64 + 5c76e19 commit a73746b

File tree

7 files changed

+214
-176
lines changed

7 files changed

+214
-176
lines changed

javalib/src/main/scala/java/util/regex/GroupStartMapper.scala renamed to javalib/src/main/scala/java/util/regex/IndicesBuilder.scala

+55-39
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,18 @@ package java.util.regex
1414

1515
import scala.annotation.{tailrec, switch}
1616

17-
import java.util.HashMap
18-
1917
import scala.scalajs.js
2018

21-
/** The goal of a `GroupStartMapper` is to retrieve the start position of each
22-
* group of a matching regular expression where only the strings of the
23-
* matched groups are known.
19+
import Pattern.IndicesArray
20+
21+
/** The goal of an `IndicesBuilder` is to retrieve the start and end positions
22+
* of each group of a matching regular expression.
23+
*
24+
* This is essentially a polyfill for the 'd' flag of `js.RegExp`, which is
25+
* a Stage 4 proposal scheduled for inclusion in ECMAScript 2022. Without that
26+
* flag, `js.RegExp` only provides the substrings matched by capturing groups,
27+
* but not their positions. We implement the positions on top of that.
28+
*
2429
* For that, we use the following observation:
2530
* If the regex /A(B)\1/ matches a string at a given index,
2631
* then /(A)(B)\2/ matches the same string at the same index.
@@ -38,7 +43,7 @@ import scala.scalajs.js
3843
* - It computes the start of every group thanks to the groups before it
3944
* - It builds and returns the mapping of previous group number -> start
4045
*
41-
* The `pattern` that is parsed by `GroupStartMapper` is the *compiled* JS
46+
* The `pattern` that is parsed by `IndicesBuilder` is the *compiled* JS
4247
* pattern produced by `PatternCompiler`, not the original Java pattern. This
4348
* means that we can simplify a number of things with the knowledge that:
4449
*
@@ -53,13 +58,13 @@ import scala.scalajs.js
5358
*
5459
* @author Mikaël Mayer
5560
*/
56-
private[regex] class GroupStartMapper private (pattern: String, flags: String,
57-
node: GroupStartMapper.Node, groupCount: Int,
61+
private[regex] class IndicesBuilder private (pattern: String, flags: String,
62+
node: IndicesBuilder.Node, groupCount: Int,
5863
jsRegExpForFind: js.RegExp, jsRegExpForMatches: js.RegExp) {
5964

60-
import GroupStartMapper._
65+
import IndicesBuilder._
6166

62-
def apply(forMatches: Boolean, string: String, index: Int): js.Array[Int] = {
67+
def apply(forMatches: Boolean, string: String, index: Int): IndicesArray = {
6368
val regExp =
6469
if (forMatches) jsRegExpForMatches
6570
else jsRegExpForFind
@@ -73,31 +78,42 @@ private[regex] class GroupStartMapper private (pattern: String, flags: String,
7378
s"Original pattern '$pattern' with flags '$flags' did match however.")
7479
}
7580

76-
// Prepare a `groupStartMap` array with the correct length filled with -1
77-
val len = groupCount + 1 // index 0 is not used
78-
val groupStartMap = new js.Array[Int](len)
79-
var i = 0
81+
val start = index // by definition
82+
val end = start + allMatchResult(0).get.length()
83+
84+
/* Initialize the `indices` array with:
85+
* - `[start, end]` at index 0, which represents the whole match, and
86+
* - `undefined` in the other slots.
87+
*
88+
* We explicitly store `undefined` in the other slots to prevent the array
89+
* from containing *empty* slots. That would make it a sparse array, which
90+
* is less efficient.
91+
*/
92+
val len = groupCount + 1
93+
val indices = new IndicesArray(len)
94+
indices(0) = js.Tuple2(start, end)
95+
var i = 1
8096
while (i != len) {
81-
groupStartMap(i) = -1
97+
indices(i) = js.undefined
8298
i += 1
8399
}
84100

85-
node.propagateFromStart(allMatchResult, groupStartMap, index)
101+
node.propagate(allMatchResult, indices, start, end)
86102

87-
groupStartMap
103+
indices
88104
}
89105
}
90106

91-
private[regex] object GroupStartMapper {
92-
def apply(pattern: String, flags: String): GroupStartMapper = {
107+
private[regex] object IndicesBuilder {
108+
def apply(pattern: String, flags: String): IndicesBuilder = {
93109
val parser = new Parser(pattern)
94110
val node = parser.parseTopLevel()
95111
node.setNewGroup(1)
96112
val allMatchingPattern = node.buildRegex(parser.groupNodeMap)
97113
val jsRegExpForFind = new js.RegExp(allMatchingPattern, flags + "g")
98114
val jsRegExpForMatches =
99115
new js.RegExp(Pattern.wrapJSPatternForMatches(allMatchingPattern), flags)
100-
new GroupStartMapper(pattern, flags, node, parser.parsedGroupCount,
116+
new IndicesBuilder(pattern, flags, node, parser.parsedGroupCount,
101117
jsRegExpForFind, jsRegExpForMatches)
102118
}
103119

@@ -155,16 +171,16 @@ private[regex] object GroupStartMapper {
155171
* `end`, while other nodes propagate the `start`.
156172
*/
157173
def propagate(matchResult: js.RegExp.ExecResult,
158-
groupStartMap: js.Array[Int], start: Int, end: Int): Unit
174+
indices: IndicesArray, start: Int, end: Int): Unit
159175

160176
/** Propagates the appropriate positions to the descendants of this node
161177
* from its end position.
162178
*/
163179
final def propagateFromEnd(matchResult: js.RegExp.ExecResult,
164-
groupStartMap: js.Array[Int], end: Int): Unit = {
180+
indices: IndicesArray, end: Int): Unit = {
165181

166182
val start = matchResult(newGroup).fold(-1)(matched => end - matched.length)
167-
propagate(matchResult, groupStartMap, start, end)
183+
propagate(matchResult, indices, start, end)
168184
}
169185

170186
/** Propagates the appropriate positions to the descendants of this node
@@ -173,10 +189,10 @@ private[regex] object GroupStartMapper {
173189
* @return the end position of this node, as a convenience for `SequenceNode.propagate`
174190
*/
175191
final def propagateFromStart(matchResult: js.RegExp.ExecResult,
176-
groupStartMap: js.Array[Int], start: Int): Int = {
192+
indices: IndicesArray, start: Int): Int = {
177193

178194
val end = matchResult(newGroup).fold(-1)(matched => start + matched.length)
179-
propagate(matchResult, groupStartMap, start, end)
195+
propagate(matchResult, indices, start, end)
180196
end
181197
}
182198
}
@@ -190,15 +206,15 @@ private[regex] object GroupStartMapper {
190206
"(" + inner.buildRegex(groupNodeMap) + ")"
191207

192208
def propagate(matchResult: js.RegExp.ExecResult,
193-
groupStartMap: js.Array[Int], start: Int, end: Int): Unit = {
209+
indices: IndicesArray, start: Int, end: Int): Unit = {
194210
/* #3901: A GroupNode within a negative look-ahead node may receive
195211
* `start != -1` from above, yet not match anything itself. We must
196212
* always keep the default `-1` if this group node does not match
197213
* anything.
198214
*/
199215
if (matchResult(newGroup).isDefined)
200-
groupStartMap(number) = start
201-
inner.propagate(matchResult, groupStartMap, start, end)
216+
indices(number) = js.Tuple2(start, end)
217+
inner.propagate(matchResult, indices, start, end)
202218
}
203219
}
204220

@@ -217,11 +233,11 @@ private[regex] object GroupStartMapper {
217233
"((" + indicator + inner.buildRegex(groupNodeMap) + "))"
218234

219235
def propagate(matchResult: js.RegExp.ExecResult,
220-
groupStartMap: js.Array[Int], start: Int, end: Int): Unit = {
236+
indices: IndicesArray, start: Int, end: Int): Unit = {
221237
if (isLookBehind)
222-
inner.propagateFromEnd(matchResult, groupStartMap, end)
238+
inner.propagateFromEnd(matchResult, indices, end)
223239
else
224-
inner.propagateFromStart(matchResult, groupStartMap, start)
240+
inner.propagateFromStart(matchResult, indices, start)
225241
}
226242
}
227243

@@ -236,8 +252,8 @@ private[regex] object GroupStartMapper {
236252
"(" + inner.buildRegex(groupNodeMap) + repeater + ")"
237253

238254
def propagate(matchResult: js.RegExp.ExecResult,
239-
groupStartMap: js.Array[Int], start: Int, end: Int): Unit = {
240-
inner.propagateFromEnd(matchResult, groupStartMap, end)
255+
indices: IndicesArray, start: Int, end: Int): Unit = {
256+
inner.propagateFromEnd(matchResult, indices, end)
241257
}
242258
}
243259

@@ -247,7 +263,7 @@ private[regex] object GroupStartMapper {
247263
"(" + regex + ")"
248264

249265
def propagate(matchResult: js.RegExp.ExecResult,
250-
groupStartMap: js.Array[Int], start: Int, end: Int): Unit = {
266+
indices: IndicesArray, start: Int, end: Int): Unit = {
251267
// nothing to do
252268
}
253269
}
@@ -262,7 +278,7 @@ private[regex] object GroupStartMapper {
262278
}
263279

264280
def propagate(matchResult: js.RegExp.ExecResult,
265-
groupStartMap: js.Array[Int], start: Int, end: Int): Unit = {
281+
indices: IndicesArray, start: Int, end: Int): Unit = {
266282
// nothing to do
267283
}
268284
}
@@ -292,13 +308,13 @@ private[regex] object GroupStartMapper {
292308
}
293309

294310
def propagate(matchResult: js.RegExp.ExecResult,
295-
groupStartMap: js.Array[Int], start: Int, end: Int): Unit = {
311+
indices: IndicesArray, start: Int, end: Int): Unit = {
296312
val len = sequence.length
297313
var i = 0
298314
var nextStart = start
299315
while (i != len) {
300316
nextStart =
301-
sequence(i).propagateFromStart(matchResult, groupStartMap, nextStart)
317+
sequence(i).propagateFromStart(matchResult, indices, nextStart)
302318
i += 1
303319
}
304320
}
@@ -333,11 +349,11 @@ private[regex] object GroupStartMapper {
333349
}
334350

335351
def propagate(matchResult: js.RegExp.ExecResult,
336-
groupStartMap: js.Array[Int], start: Int, end: Int): Unit = {
352+
indices: IndicesArray, start: Int, end: Int): Unit = {
337353
val len = alternatives.length
338354
var i = 0
339355
while (i != len) {
340-
alternatives(i).propagate(matchResult, groupStartMap, start, end)
356+
alternatives(i).propagate(matchResult, indices, start, end)
341357
i += 1
342358
}
343359
}

0 commit comments

Comments
 (0)