Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Symfony/Component/DomCrawler/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ CHANGELOG
---

* Add `CrawlerSelectorCount` test constraint
* Add argument `$normalizeWhitespace` to `Crawler::innerText()`
* Make `Crawler::innerText()` return the first non-empty text

6.0
---
Expand Down
27 changes: 24 additions & 3 deletions src/Symfony/Component/DomCrawler/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -553,18 +553,34 @@ public function text(string $default = null, bool $normalizeWhitespace = true):
$text = $this->getNode(0)->nodeValue;

if ($normalizeWhitespace) {
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $text), " \n\r\t\x0C");
return $this->normalizeWhitespace($text);
}

return $text;
}

/**
* Returns only the inner text that is the direct descendent of the current node, excluding any child nodes.
*
* @param bool $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces
*/
public function innerText(): string
public function innerText(/* bool $normalizeWhitespace = true */): string
{
return $this->filterXPath('.//text()')->text();
$normalizeWhitespace = 1 <= \func_num_args() ? func_get_arg(0) : true;

foreach ($this->getNode(0)->childNodes as $childNode) {
if (\XML_TEXT_NODE !== $childNode->nodeType) {
continue;
}
if (!$normalizeWhitespace) {
return $childNode->nodeValue;
}
if ('' !== trim($childNode->nodeValue)) {
return $this->normalizeWhitespace($childNode->nodeValue);
}
}

return '';
}

/**
Expand Down Expand Up @@ -1189,4 +1205,9 @@ private function isValidHtml5Heading(string $heading): bool
{
return 1 === preg_match('/^\x{FEFF}?\s*(<!--[^>]*?-->\s*)*$/u', $heading);
}

private function normalizeWhitespace(string $string): string
{
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $string), " \n\r\t\x0C");
}
}
61 changes: 54 additions & 7 deletions src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -348,12 +348,56 @@ public function testText()
$this->assertSame('my value', $this->createTestCrawler(null)->filterXPath('//ol')->text('my value'));
}

public function testInnerText()
public function provideInnerTextExamples()
{
self::assertCount(1, $crawler = $this->createTestCrawler()->filterXPath('//*[@id="complex-element"]'));
return [
[
'//*[@id="complex-elements"]/*[@class="one"]', // XPath query
'Parent text Child text', // Result of Crawler::text()
'Parent text', // Result of Crawler::innerText()
' Parent text ', // Result of Crawler::innerText(false)
],
[
'//*[@id="complex-elements"]/*[@class="two"]',
'Child text Parent text',
'Parent text',
' ',
],
[
'//*[@id="complex-elements"]/*[@class="three"]',
'Parent text Child text Parent text',
'Parent text',
' Parent text ',
],
[
'//*[@id="complex-elements"]/*[@class="four"]',
'Child text',
'',
' ',
],
[
'//*[@id="complex-elements"]/*[@class="five"]',
'Child text Another child',
'',
' ',
],
];
}

/**
* @dataProvider provideInnerTextExamples
*/
public function testInnerText(
string $xPathQuery,
string $expectedText,
string $expectedInnerText,
string $expectedInnerTextNormalizeWhitespaceFalse,
) {
self::assertCount(1, $crawler = $this->createTestCrawler()->filterXPath($xPathQuery));

self::assertSame('Parent text Child text', $crawler->text());
self::assertSame('Parent text', $crawler->innerText());
self::assertSame($expectedText, $crawler->text());
self::assertSame($expectedInnerText, $crawler->innerText());
self::assertSame($expectedInnerTextNormalizeWhitespaceFalse, $crawler->innerText(false));
}

public function testHtml()
Expand Down Expand Up @@ -1265,9 +1309,12 @@ public function createTestCrawler($uri = null)
<div id="child2" xmlns:foo="http://example.com"></div>
</div>
<div id="sibling"><img /></div>
<div id="complex-element">
Parent text
<span>Child text</span>
<div id="complex-elements">
<div class="one"> Parent text <span>Child text</span> </div>
<div class="two"> <span>Child text</span> Parent text </div>
<div class="three"> Parent text <span>Child text</span> Parent text </div>
<div class="four"> <span>Child text</span> </div>
<div class="five"><span>Child text</span> <span>Another child</span></div>
</div>
</body>
</html>
Expand Down