Skip to content

Commit 151af3f

Browse files
[DomCrawler] Optimize DomCrawler::relativize()
1 parent 5b26e33 commit 151af3f

File tree

1 file changed

+42
-52
lines changed

1 file changed

+42
-52
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

+42-52
Original file line numberDiff line numberDiff line change
@@ -860,18 +860,42 @@ private function relativize($xpath)
860860
// We cannot simply drop
861861
$nonMatchingExpression = 'a[name() = "b"]';
862862

863-
// Split any unions into individual expressions.
864-
foreach ($this->splitUnionParts($xpath) as $expression) {
865-
$expression = trim($expression);
866-
$parenthesis = '';
867-
868-
// If the union is inside some braces, we need to preserve the opening braces and apply
869-
// the change only inside it.
870-
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
871-
$parenthesis = $matches[0];
872-
$expression = substr($expression, strlen($parenthesis));
863+
$xpathLen = strlen($xpath);
864+
$openedBrackets = 0;
865+
$lastUnion = strspn($xpath, " \t\n\r\0\x0B");
866+
867+
for ($i = $lastUnion; $i <= $xpathLen; ++$i) {
868+
$i += strcspn($xpath, '"\'[]|', $i);
869+
870+
if ($i < $xpathLen) {
871+
switch ($xpath[$i]) {
872+
case '"':
873+
case "'":
874+
if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
875+
return $xpath; // The XPath expression is invalid
876+
}
877+
continue 2;
878+
case '[':
879+
case ']':
880+
$openedBrackets += '[' === $xpath[$i] ? 1 : -1;
881+
continue 2;
882+
}
883+
}
884+
if ($openedBrackets) {
885+
continue;
873886
}
874887

888+
if ('(' === $xpath[$lastUnion]) {
889+
// If the union is inside some braces, we need to preserve the opening braces and apply
890+
// the change only inside it.
891+
$j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $lastUnion + 1);
892+
$parenthesis = substr($xpath, $lastUnion, $j);
893+
$lastUnion += $j;
894+
} else {
895+
$parenthesis = '';
896+
}
897+
$expression = rtrim(substr($xpath, $lastUnion, $i - $lastUnion));
898+
875899
// BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
876900
if (0 === strpos($expression, '/_root/')) {
877901
$expression = './'.substr($expression, 7);
@@ -880,7 +904,7 @@ private function relativize($xpath)
880904
}
881905

882906
// add prefix before absolute element selector
883-
if (empty($expression)) {
907+
if ('' === $expression) {
884908
$expression = $nonMatchingExpression;
885909
} elseif (0 === strpos($expression, '//')) {
886910
$expression = 'descendant-or-self::'.substr($expression, 2);
@@ -898,58 +922,24 @@ private function relativize($xpath)
898922
// '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
899923
$expression = $nonMatchingExpression;
900924
} elseif (0 === strpos($expression, 'descendant::')) {
901-
$expression = 'descendant-or-self::'.substr($expression, strlen('descendant::'));
925+
$expression = 'descendant-or-self::'.substr($expression, 12);
902926
} elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) {
903927
// the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes)
904928
$expression = $nonMatchingExpression;
905929
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
906930
$expression = 'self::'.$expression;
907931
}
908932
$expressions[] = $parenthesis.$expression;
909-
}
910933

911-
return implode(' | ', $expressions);
912-
}
913-
914-
/**
915-
* Splits the XPath into parts that are separated by the union operator.
916-
*
917-
* @param string $xpath
918-
*
919-
* @return string[]
920-
*/
921-
private function splitUnionParts($xpath)
922-
{
923-
// Split any unions into individual expressions. We need to iterate
924-
// through the string to correctly parse opening/closing quotes and
925-
// braces which is not possible with regular expressions.
926-
$unionParts = array();
927-
$inSingleQuotedString = false;
928-
$inDoubleQuotedString = false;
929-
$openedBrackets = 0;
930-
$lastUnion = 0;
931-
$xpathLength = strlen($xpath);
932-
for ($i = 0; $i < $xpathLength; ++$i) {
933-
$char = $xpath[$i];
934-
935-
if ($char === "'" && !$inDoubleQuotedString) {
936-
$inSingleQuotedString = !$inSingleQuotedString;
937-
} elseif ($char === '"' && !$inSingleQuotedString) {
938-
$inDoubleQuotedString = !$inDoubleQuotedString;
939-
} elseif (!$inSingleQuotedString && !$inDoubleQuotedString) {
940-
if ($char === '[') {
941-
++$openedBrackets;
942-
} elseif ($char === ']') {
943-
--$openedBrackets;
944-
} elseif ($char === '|' && $openedBrackets === 0) {
945-
$unionParts[] = substr($xpath, $lastUnion, $i - $lastUnion);
946-
$lastUnion = $i + 1;
947-
}
934+
if ($i === $xpathLen) {
935+
return implode(' | ', $expressions);
948936
}
937+
938+
$i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
939+
$lastUnion = $i + 1;
949940
}
950-
$unionParts[] = substr($xpath, $lastUnion);
951941

952-
return $unionParts;
942+
return $xpath; // The XPath expression is invalid
953943
}
954944

955945
/**

0 commit comments

Comments
 (0)