Skip to content

Commit 6bc51c1

Browse files
committed
minor #15852 [CssSelector] Optimize regexs matching simple selectors (stof)
This PR was merged into the 2.3 branch. Discussion ---------- [CssSelector] Optimize regexs matching simple selectors | Q | A | ------------- | --- | Bug fix? | no | New feature? | no | BC breaks? | no | Deprecations? | no | Tests pass? | yes | Fixed tickets | n/a | License | MIT | Doc PR | n/a These shortcut parsers are applied first when converting a CSS selector to XPath, to be faster for simple selectors (tag matching, class matching with an optional tag, id matching with an optional tag). None of the regexes defined here could have more chances to match more element when backtracking identifiers. So the backtracking is only slowing down the regex engine when the regex does not match (i.e. for any more complex selector for instance, or even for simple selectors without namespace of without tag name). Making quantifiers possessive solves this issue. I also turned some capturing groups (around the namespace and the namespace delimiter) into non-capturing groups as we don't care about them in the output (they are just here to be optional). Commits ------- d5abe0b [CssSelector] Optimize regexs matching simple selectors
2 parents 6947f69 + 877e88b commit 6bc51c1

File tree

3 files changed

+20
-23
lines changed

3 files changed

+20
-23
lines changed

Parser/Shortcut/ClassParser.php

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,14 @@ public function parse($source)
3333
{
3434
// Matches an optional namespace, optional element, and required class
3535
// $source = 'test|input.ab6bd_field';
36-
// $matches = array (size=5)
37-
// 0 => string 'test:input.ab6bd_field' (length=22)
38-
// 1 => string 'test:' (length=5)
39-
// 2 => string 'test' (length=4)
40-
// 3 => string 'input' (length=5)
41-
// 4 => string 'ab6bd_field' (length=11)
42-
if (preg_match('/^(([a-z]+)\|)?([\w-]+|\*)?\.([\w-]+)$/i', trim($source), $matches)) {
36+
// $matches = array (size=4)
37+
// 0 => string 'test|input.ab6bd_field' (length=22)
38+
// 1 => string 'test' (length=4)
39+
// 2 => string 'input' (length=5)
40+
// 3 => string 'ab6bd_field' (length=11)
41+
if (preg_match('/^(?:([a-z]++)\|)?+([\w-]++|\*)?+\.([\w-]++)$/i', trim($source), $matches)) {
4342
return array(
44-
new SelectorNode(new ClassNode(new ElementNode($matches[2] ?: null, $matches[3] ?: null), $matches[4])),
43+
new SelectorNode(new ClassNode(new ElementNode($matches[1] ?: null, $matches[2] ?: null), $matches[3])),
4544
);
4645
}
4746

Parser/Shortcut/ElementParser.php

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,12 @@ public function parse($source)
3232
{
3333
// Matches an optional namespace, required element or `*`
3434
// $source = 'testns|testel';
35-
// $matches = array (size=4)
36-
// 0 => string 'testns:testel' (length=13)
37-
// 1 => string 'testns:' (length=7)
38-
// 2 => string 'testns' (length=6)
39-
// 3 => string 'testel' (length=6)
40-
if (preg_match('/^(([a-z]+)\|)?([\w-]+|\*)$/i', trim($source), $matches)) {
41-
return array(new SelectorNode(new ElementNode($matches[2] ?: null, $matches[3])));
35+
// $matches = array (size=3)
36+
// 0 => string 'testns|testel' (length=13)
37+
// 1 => string 'testns' (length=6)
38+
// 2 => string 'testel' (length=6)
39+
if (preg_match('/^(?:([a-z]++)\|)?([\w-]++|\*)$/i', trim($source), $matches)) {
40+
return array(new SelectorNode(new ElementNode($matches[1] ?: null, $matches[2])));
4241
}
4342

4443
return array();

Parser/Shortcut/HashParser.php

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,14 @@ public function parse($source)
3333
{
3434
// Matches an optional namespace, optional element, and required id
3535
// $source = 'test|input#ab6bd_field';
36-
// $matches = array (size=5)
37-
// 0 => string 'test:input#ab6bd_field' (length=22)
38-
// 1 => string 'test:' (length=5)
39-
// 2 => string 'test' (length=4)
40-
// 3 => string 'input' (length=5)
41-
// 4 => string 'ab6bd_field' (length=11)
42-
if (preg_match('/^(([a-z]+)\|)?([\w-]+|\*)?#([\w-]+)$/i', trim($source), $matches)) {
36+
// $matches = array (size=4)
37+
// 0 => string 'test|input#ab6bd_field' (length=22)
38+
// 1 => string 'test' (length=4)
39+
// 2 => string 'input' (length=5)
40+
// 3 => string 'ab6bd_field' (length=11)
41+
if (preg_match('/^(?:([a-z]++)\|)?+([\w-]++|\*)?+#([\w-]++)$/i', trim($source), $matches)) {
4342
return array(
44-
new SelectorNode(new HashNode(new ElementNode($matches[2] ?: null, $matches[3] ?: null), $matches[4])),
43+
new SelectorNode(new HashNode(new ElementNode($matches[1] ?: null, $matches[2] ?: null), $matches[3])),
4544
);
4645
}
4746

0 commit comments

Comments
 (0)