From bd7fb5178b998e4f863d388850e93be70a5b9344 Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Wed, 20 Aug 2025 17:25:58 +0200 Subject: [PATCH] [DomCrawler] Use the native HTM5 parser on PHP 8.4 --- UPGRADE-7.4.md | 10 ++ .../Component/BrowserKit/AbstractBrowser.php | 7 ++ src/Symfony/Component/BrowserKit/CHANGELOG.md | 1 + .../Component/BrowserKit/composer.json | 1 + src/Symfony/Component/DomCrawler/CHANGELOG.md | 5 + src/Symfony/Component/DomCrawler/Crawler.php | 48 +++++-- ...rawlerTestCase.php => CrawlerTestCase.php} | 118 +++++++++++++++--- .../Tests/Html5ParserCrawlerTest.php | 102 --------------- .../Tests/LegacyHtml5ParserCrawlerTest.php | 79 ++++++++++++ .../Tests/LegacyParserCrawlerTest.php | 98 +++++++++++++++ .../Tests/NativeParserCrawlerTest.php | 71 ----------- .../Component/DomCrawler/composer.json | 1 + 12 files changed, 340 insertions(+), 201 deletions(-) rename src/Symfony/Component/DomCrawler/Tests/{AbstractCrawlerTestCase.php => CrawlerTestCase.php} (93%) delete mode 100644 src/Symfony/Component/DomCrawler/Tests/Html5ParserCrawlerTest.php create mode 100644 src/Symfony/Component/DomCrawler/Tests/LegacyHtml5ParserCrawlerTest.php create mode 100644 src/Symfony/Component/DomCrawler/Tests/LegacyParserCrawlerTest.php delete mode 100644 src/Symfony/Component/DomCrawler/Tests/NativeParserCrawlerTest.php diff --git a/UPGRADE-7.4.md b/UPGRADE-7.4.md index ff4dde989237d..eea0503433f63 100644 --- a/UPGRADE-7.4.md +++ b/UPGRADE-7.4.md @@ -8,6 +8,11 @@ Read more about this in the [Symfony documentation](https://symfony.com/doc/7.4/ If you're upgrading from a version below 7.3, follow the [7.3 upgrade guide](UPGRADE-7.3.md) first. +BrowserKit +---------- + + * Deprecate `AbstractBrowser::useHtml5Parser()`; Symfony 8 will unconditionally use the native HTML5 parser + Cache ----- @@ -29,6 +34,11 @@ DoctrineBridge * Deprecate `UniqueEntity::getRequiredOptions()` and `UniqueEntity::getDefaultOption()` +DomCrawler +---------- + + * Disabling HTML5 parsing is deprecated; Symfony 8 will unconditionally use the native HTML5 parser + FrameworkBundle --------------- diff --git a/src/Symfony/Component/BrowserKit/AbstractBrowser.php b/src/Symfony/Component/BrowserKit/AbstractBrowser.php index fefd7c155a82d..bcb3b4b2e6399 100644 --- a/src/Symfony/Component/BrowserKit/AbstractBrowser.php +++ b/src/Symfony/Component/BrowserKit/AbstractBrowser.php @@ -46,6 +46,7 @@ abstract class AbstractBrowser /** @psalm-var TResponse */ protected object $response; protected Crawler $crawler; + /** @deprecated since Symfony 7.4, to be removed in Symfony 8 */ protected bool $useHtml5Parser = true; protected bool $insulated = false; protected ?string $redirect; @@ -204,10 +205,16 @@ public function getCrawler(): Crawler /** * Sets whether parsing should be done using "masterminds/html5". * + * @deprecated since Symfony 7.4, Symfony 8 will unconditionally use the native HTML5 parser + * * @return $this */ public function useHtml5Parser(bool $useHtml5Parser): static { + if (\PHP_VERSION_ID >= 80400) { + trigger_deprecation('symfony/browser-kit', '7.4', 'Method "%s()" is deprecated. Symfony 8 will unconditionally use the native HTML5 parser.', __METHOD__); + } + $this->useHtml5Parser = $useHtml5Parser; return $this; diff --git a/src/Symfony/Component/BrowserKit/CHANGELOG.md b/src/Symfony/Component/BrowserKit/CHANGELOG.md index d078c1068abf9..ad57c9536237b 100644 --- a/src/Symfony/Component/BrowserKit/CHANGELOG.md +++ b/src/Symfony/Component/BrowserKit/CHANGELOG.md @@ -6,6 +6,7 @@ CHANGELOG * Add `isFirstPage()` and `isLastPage()` methods to the History class for checking navigation boundaries * Add PHPUnit constraints: `BrowserHistoryIsOnFirstPage` and `BrowserHistoryIsOnLastPage` + * Deprecate `AbstractBrowser::useHtml5Parser()`; Symfony 8 will unconditionally use the native HTML5 parser 6.4 --- diff --git a/src/Symfony/Component/BrowserKit/composer.json b/src/Symfony/Component/BrowserKit/composer.json index b2e6761dab249..a1235775e1c8e 100644 --- a/src/Symfony/Component/BrowserKit/composer.json +++ b/src/Symfony/Component/BrowserKit/composer.json @@ -17,6 +17,7 @@ ], "require": { "php": ">=8.2", + "symfony/deprecation-contracts": "^2.5|^3", "symfony/dom-crawler": "^6.4|^7.0|^8.0" }, "require-dev": { diff --git a/src/Symfony/Component/DomCrawler/CHANGELOG.md b/src/Symfony/Component/DomCrawler/CHANGELOG.md index 53395956f3be9..56e736e8be888 100644 --- a/src/Symfony/Component/DomCrawler/CHANGELOG.md +++ b/src/Symfony/Component/DomCrawler/CHANGELOG.md @@ -1,6 +1,11 @@ CHANGELOG ========= +7.4 +--- + + * Disabling HTML5 parsing is deprecated; Symfony 8 will unconditionally use the native HTML5 parser + 7.0 --- diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 7550da83d4fdb..0c025f57f9b78 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -62,10 +62,14 @@ public function __construct( \DOMNodeList|\DOMNode|array|string|null $node = null, protected ?string $uri = null, ?string $baseHref = null, - bool $useHtml5Parser = true, + private bool $useHtml5Parser = true, ) { + if (\PHP_VERSION_ID >= 80400 && !$useHtml5Parser) { + trigger_deprecation('symfony/dom-crawler', '7.4', 'Disabling HTML5 parsing is deprecated. Symfony 8 will unconditionally use the native HTML5 parser.'); + } + $this->baseHref = $baseHref ?: $uri; - $this->html5Parser = $useHtml5Parser ? new HTML5(['disable_html_ns' => true]) : null; + $this->html5Parser = \PHP_VERSION_ID < 80400 && $useHtml5Parser ? new HTML5(['disable_html_ns' => true]) : null; $this->cachedNamespaces = new \ArrayObject(); $this->add($node); @@ -1081,23 +1085,41 @@ private function supportsEncoding(string $encoding): bool private function parseXhtml(string $htmlContent, string $charset = 'UTF-8'): \DOMDocument { - if ('UTF-8' === $charset && preg_match('//u', $htmlContent)) { - $htmlContent = ''.$htmlContent; - } else { - $htmlContent = $this->convertToHtmlEntities($htmlContent, $charset); + if (\PHP_VERSION_ID < 80400 || !$this->useHtml5Parser) { + if ('UTF-8' === $charset && preg_match('//u', $htmlContent)) { + $htmlContent = ''.$htmlContent; + } else { + $htmlContent = $this->convertToHtmlEntities($htmlContent, $charset); + } + + $internalErrors = libxml_use_internal_errors(true); + + $dom = new \DOMDocument('1.0', $charset); + $dom->validateOnParse = true; + + if ('' !== trim($htmlContent)) { + @$dom->loadHTML($htmlContent); + } + + libxml_use_internal_errors($internalErrors); + + return $dom; } - $internalErrors = libxml_use_internal_errors(true); + $document = @\Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS, $charset); + $htmlContent = $document->saveXml(); + $charset = $document->inputEncoding; $dom = new \DOMDocument('1.0', $charset); - $dom->validateOnParse = true; + $dom->loadXML($htmlContent); - if ('' !== trim($htmlContent)) { - @$dom->loadHTML($htmlContent); + // Register id attributes as ID attributes for getElementById to work + foreach ((new \DOMXPath($dom))->query('//*[@id]') as $element) { + if ($element instanceof \DOMElement) { + $element->setIdAttribute('id', true); + } } - libxml_use_internal_errors($internalErrors); - return $dom; } @@ -1216,7 +1238,7 @@ private function canParseHtml5String(string $content): bool return false; } - if (false === ($pos = stripos($content, ''))) { + if (false === $pos = stripos($content, '')) { return false; } diff --git a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTestCase.php b/src/Symfony/Component/DomCrawler/Tests/CrawlerTestCase.php similarity index 93% rename from src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTestCase.php rename to src/Symfony/Component/DomCrawler/Tests/CrawlerTestCase.php index f365dd2909931..25af30c93d95b 100644 --- a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTestCase.php +++ b/src/Symfony/Component/DomCrawler/Tests/CrawlerTestCase.php @@ -12,6 +12,8 @@ namespace Symfony\Component\DomCrawler\Tests; use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Group; +use PHPUnit\Framework\Attributes\IgnoreDeprecations; use PHPUnit\Framework\Attributes\RequiresPhpExtension; use PHPUnit\Framework\Error\Notice; use PHPUnit\Framework\TestCase; @@ -20,13 +22,16 @@ use Symfony\Component\DomCrawler\Image; use Symfony\Component\DomCrawler\Link; -abstract class AbstractCrawlerTestCase extends TestCase +class CrawlerTestCase extends TestCase { - abstract public static function getDoctype(): string; + public static function getDoctype(): string + { + return ''; + } - protected function createCrawler($node = null, ?string $uri = null, ?string $baseHref = null, bool $useHtml5Parser = true) + protected function createCrawler($node = null, ?string $uri = null, ?string $baseHref = null) { - return new Crawler($node, $uri, $baseHref, $useHtml5Parser); + return new Crawler($node, $uri, $baseHref, \PHP_VERSION_ID >= 80400); } public function testConstructor() @@ -34,7 +39,7 @@ public function testConstructor() $crawler = $this->createCrawler(); $this->assertCount(0, $crawler, '__construct() returns an empty crawler'); - $doc = new \DOMDocument(); + $doc = $this->createDomDocument(); $node = $doc->createElement('test'); $crawler = $this->createCrawler($node); @@ -236,7 +241,7 @@ public function testAddNode() public function testClear() { - $doc = new \DOMDocument(); + $doc = $this->createDomDocument(); $node = $doc->createElement('test'); $crawler = $this->createCrawler($node); @@ -407,7 +412,7 @@ public function testInnerText( public function testHtml() { $this->assertEquals('Bar', $this->createTestCrawler()->filterXPath('//a[5]')->html()); - $this->assertEquals('', trim(preg_replace('~>\s+<~', '><', $this->createTestCrawler()->filterXPath('//form[@id="FooFormId"]')->html()))); + $this->assertEquals('', trim(preg_replace('~>\s+<~', '><', $this->createTestCrawler()->filterXPath('//form[@id="FooFormId"]')->html()))); try { $this->createTestCrawler()->filterXPath('//ol')->html(); @@ -421,9 +426,9 @@ public function testHtml() public function testEmojis() { - $crawler = $this->createCrawler('

Hey 👋

'); + $crawler = $this->createCrawler('

Hey 👋

'); - $this->assertSame('

Hey 👋

', $crawler->html()); + $this->assertSame('

Hey 👋

', $crawler->html()); } public function testExtract() @@ -448,7 +453,7 @@ public function testFilterXpathComplexQueries() $this->assertCount(1, $crawler->filterXPath('./body')); $this->assertCount(1, $crawler->filterXPath('.//body')); $this->assertCount(6, $crawler->filterXPath('.//input')); - $this->assertCount(4, $crawler->filterXPath('//form')->filterXPath('//button | //input')); + $this->assertCount(7, $crawler->filterXPath('//form')->filterXPath('//button | //input')); $this->assertCount(1, $crawler->filterXPath('body')); $this->assertCount(8, $crawler->filterXPath('//button | //input')); $this->assertCount(1, $crawler->filterXPath('//body')); @@ -530,6 +535,16 @@ public function testFilterXPathWithAnUrl() $this->assertSame('Music', $crawler->text()); } + public function testCaseSentivity() + { + $crawler = $this->createTestXmlCrawler(); + + $crawler = $crawler->filterXPath('//*[local-name() = "CaseSensitiveTag"]'); + $this->assertCount(1, $crawler); + $this->assertSame('Some Content', $crawler->text()); + $this->assertSame('CaseSensitiveTag', $crawler->nodeName()); + } + public function testFilterXPathWithFakeRoot() { $crawler = $this->createTestCrawler(); @@ -1290,10 +1305,82 @@ public function testAddHtmlContentUnsupportedCharset() $this->assertEquals('Žťčýů', $crawler->filterXPath('//p')->text()); } - public function createTestCrawler($uri = null) + public function testAddXmlContentWithErrors() { - $dom = new \DOMDocument(); - $dom->loadHTML($this->getDoctype().' + $internalErrors = libxml_use_internal_errors(true); + + $crawler = $this->createCrawler(); + $crawler->addXmlContent(<<<'EOF' + + + + + + + + + EOF, + 'UTF-8' + ); + + $this->assertGreaterThan(1, libxml_get_errors()); + + libxml_clear_errors(); + libxml_use_internal_errors($internalErrors); + } + + #[IgnoreDeprecations] + #[Group('legacy')] + public function testHtml5ParserNotSameAsNativeParserForSpecificHtml() + { + // Html who create a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596) + $html = '

Foo

'; + + $html5Crawler = new Crawler(null, null, null, true); + $html5Crawler->add($html); + + $nativeCrawler = new Crawler(null, null, null, false); + $nativeCrawler->add($html); + + $this->assertNotEquals($nativeCrawler->filterXPath('//h1')->text(), $html5Crawler->filterXPath('//h1')->text(), 'Native parser and Html5 parser must be different'); + } + + public function testAddHtml5() + { + // Ensure a bug specific to the DOM extension is fixed (see https://github.com/symfony/symfony/issues/28596) + $crawler = $this->createCrawler(); + $crawler->add($this->getDoctype().'

Foo

'); + $this->assertEquals('Foo', $crawler->filterXPath('//h1')->text(), '->add() adds nodes from a string'); + } + + #[DataProvider('html5Provider')] + public function testHtml5ParserParseContentStartingWithValidHeading(string $content) + { + $crawler = $this->createCrawler(); + $crawler->addHtmlContent($content); + self::assertEquals( + 'Foo', + $crawler->filterXPath('//h1')->text(), + '->addHtmlContent() parses valid HTML with comment before doctype' + ); + } + + public static function html5Provider(): iterable + { + $html = self::getDoctype().'

Foo

'; + $BOM = \chr(0xEF).\chr(0xBB).\chr(0xBF); + + yield 'BOM first' => [$BOM.$html]; + yield 'Single comment' => [''.$html]; + yield 'Multiline comment' => ["".$html]; + yield 'Several comments' => [' '.$html]; + yield 'Whitespaces' => [' '.$html]; + yield 'All together' => [$BOM.' '.$html]; + } + + protected function createTestCrawler($uri = null) + { + $html = $this->getDoctype().'
Foo @@ -1352,9 +1439,9 @@ public function createTestCrawler($uri = null) - '); + '; - return $this->createCrawler($dom, $uri); + return $this->createCrawler($html, $uri); } protected function createTestXmlCrawler($uri = null) @@ -1369,6 +1456,7 @@ protected function createTestXmlCrawler($uri = null) widescreen Music + Some Content '; return $this->createCrawler($xml, $uri); diff --git a/src/Symfony/Component/DomCrawler/Tests/Html5ParserCrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/Html5ParserCrawlerTest.php deleted file mode 100644 index ca6d0f4ce4804..0000000000000 --- a/src/Symfony/Component/DomCrawler/Tests/Html5ParserCrawlerTest.php +++ /dev/null @@ -1,102 +0,0 @@ - - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code. - */ - -namespace Symfony\Component\DomCrawler\Tests; - -use PHPUnit\Framework\Attributes\DataProvider; -use PHPUnit\Framework\Attributes\TestWith; - -class Html5ParserCrawlerTest extends AbstractCrawlerTestCase -{ - public static function getDoctype(): string - { - return ''; - } - - public function testAddHtml5() - { - // Ensure a bug specific to the DOM extension is fixed (see https://github.com/symfony/symfony/issues/28596) - $crawler = $this->createCrawler(); - $crawler->add($this->getDoctype().'

Foo

'); - $this->assertEquals('Foo', $crawler->filterXPath('//h1')->text(), '->add() adds nodes from a string'); - } - - #[DataProvider('validHtml5Provider')] - public function testHtml5ParserParseContentStartingWithValidHeading(string $content) - { - $crawler = $this->createCrawler(); - $crawler->addHtmlContent($content); - self::assertEquals( - 'Foo', - $crawler->filterXPath('//h1')->text(), - '->addHtmlContent() parses valid HTML with comment before doctype' - ); - } - - #[DataProvider('invalidHtml5Provider')] - public function testHtml5ParserWithInvalidHeadedContent(string $content) - { - $crawler = $this->createCrawler(); - $crawler->addHtmlContent($content); - self::assertSame('', $crawler->filterXPath('//h1')->text(), '->addHtmlContent failed as expected'); - } - - public function testHtml5ParserNotSameAsNativeParserForSpecificHtml() - { - // Html who create a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596) - $html = $this->getDoctype().'

Foo

'; - - $html5Crawler = $this->createCrawler(null, null, null, true); - $html5Crawler->add($html); - - $nativeCrawler = $this->createCrawler(null, null, null, false); - $nativeCrawler->add($html); - - $this->assertNotEquals($nativeCrawler->filterXPath('//h1')->text(), $html5Crawler->filterXPath('//h1')->text(), 'Native parser and Html5 parser must be different'); - } - - #[TestWith([true])] - #[TestWith([false])] - public function testHasHtml5Parser(bool $useHtml5Parser) - { - $crawler = $this->createCrawler(null, null, null, $useHtml5Parser); - - $r = new \ReflectionProperty($crawler::class, 'html5Parser'); - $html5Parser = $r->getValue($crawler); - - if ($useHtml5Parser) { - $this->assertInstanceOf(\Masterminds\HTML5::class, $html5Parser, 'Html5Parser must be a Masterminds\HTML5 instance'); - } else { - $this->assertNull($html5Parser, 'Html5Parser must be null'); - } - } - - public static function validHtml5Provider(): iterable - { - $html = self::getDoctype().'

Foo

'; - $BOM = \chr(0xEF).\chr(0xBB).\chr(0xBF); - - yield 'BOM first' => [$BOM.$html]; - yield 'Single comment' => [''.$html]; - yield 'Multiline comment' => ["".$html]; - yield 'Several comments' => [' '.$html]; - yield 'Whitespaces' => [' '.$html]; - yield 'All together' => [$BOM.' '.$html]; - } - - public static function invalidHtml5Provider(): iterable - { - $html = self::getDoctype().'

Foo

'; - - yield 'Text' => ['hello world'.$html]; - yield 'Text between comments' => [' test '.$html]; - } -} diff --git a/src/Symfony/Component/DomCrawler/Tests/LegacyHtml5ParserCrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/LegacyHtml5ParserCrawlerTest.php new file mode 100644 index 0000000000000..22a31012d5295 --- /dev/null +++ b/src/Symfony/Component/DomCrawler/Tests/LegacyHtml5ParserCrawlerTest.php @@ -0,0 +1,79 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\DomCrawler\Tests; + +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\RequiresPhp; +use Symfony\Component\DomCrawler\Crawler; + +#[RequiresPhp('<8.4')] +class LegacyHtml5ParserCrawlerTest extends CrawlerTestCase +{ + public function testHtml() + { + $this->assertEquals('Bar', $this->createTestCrawler()->filterXPath('//a[5]')->html()); + $this->assertEquals('', trim(preg_replace('~>\s+<~', '><', $this->createTestCrawler()->filterXPath('//form[@id="FooFormId"]')->html()))); + + try { + $this->createTestCrawler()->filterXPath('//ol')->html(); + $this->fail('->html() throws an \InvalidArgumentException if the node list is empty'); + } catch (\InvalidArgumentException $e) { + $this->assertTrue(true, '->html() throws an \InvalidArgumentException if the node list is empty'); + } + + $this->assertSame('my value', $this->createTestCrawler(null)->filterXPath('//ol')->html('my value')); + } + + public function testFilterXpathComplexQueries() + { + $crawler = $this->createTestCrawler()->filterXPath('//body'); + + $this->assertCount(0, $crawler->filterXPath('/input')); + $this->assertCount(0, $crawler->filterXPath('/body')); + $this->assertCount(1, $crawler->filterXPath('./body')); + $this->assertCount(1, $crawler->filterXPath('.//body')); + $this->assertCount(6, $crawler->filterXPath('.//input')); + $this->assertCount(4, $crawler->filterXPath('//form')->filterXPath('//button | //input')); + $this->assertCount(1, $crawler->filterXPath('body')); + $this->assertCount(8, $crawler->filterXPath('//button | //input')); + $this->assertCount(1, $crawler->filterXPath('//body')); + $this->assertCount(1, $crawler->filterXPath('descendant-or-self::body')); + $this->assertCount(1, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('./div'), 'A child selection finds only the current div'); + $this->assertCount(3, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('descendant::div'), 'A descendant selector matches the current div and its child'); + $this->assertCount(3, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('//div'), 'A descendant selector matches the current div and its child'); + $this->assertCount(5, $crawler->filterXPath('(//a | //div)//img')); + $this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)')); + $this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )')); + $this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]")); + } + + #[DataProvider('invalidHtml5Provider')] + public function testHtml5ParserWithInvalidHeadedContent(string $content) + { + $crawler = $this->createCrawler(); + $crawler->addHtmlContent($content); + self::assertSame('', $crawler->filterXPath('//h1')->text(), '->addHtmlContent failed as expected'); + } + + public static function invalidHtml5Provider(): iterable + { + $html = self::getDoctype().'

Foo

'; + + yield 'Text' => ['hello world'.$html]; + yield 'Text between comments' => [' test '.$html]; + } + + protected function createCrawler($node = null, ?string $uri = null, ?string $baseHref = null) + { + return new Crawler($node, $uri, $baseHref, true); + } +} diff --git a/src/Symfony/Component/DomCrawler/Tests/LegacyParserCrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/LegacyParserCrawlerTest.php new file mode 100644 index 0000000000000..9957fcd12486e --- /dev/null +++ b/src/Symfony/Component/DomCrawler/Tests/LegacyParserCrawlerTest.php @@ -0,0 +1,98 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\DomCrawler\Tests; + +use PHPUnit\Framework\Attributes\RequiresPhp; + +#[RequiresPhp('<8.4')] +class LegacyParserCrawlerTest extends CrawlerTestCase +{ + public static function getDoctype(): string + { + return ''; + } + + public function testHtml() + { + $this->assertEquals('Bar', $this->createTestCrawler()->filterXPath('//a[5]')->html()); + $this->assertEquals('', trim(preg_replace('~>\s+<~', '><', $this->createTestCrawler()->filterXPath('//form[@id="FooFormId"]')->html()))); + + try { + $this->createTestCrawler()->filterXPath('//ol')->html(); + $this->fail('->html() throws an \InvalidArgumentException if the node list is empty'); + } catch (\InvalidArgumentException $e) { + $this->assertTrue(true, '->html() throws an \InvalidArgumentException if the node list is empty'); + } + + $this->assertSame('my value', $this->createTestCrawler(null)->filterXPath('//ol')->html('my value')); + } + + public function testFilterXpathComplexQueries() + { + $crawler = $this->createTestCrawler()->filterXPath('//body'); + + $this->assertCount(0, $crawler->filterXPath('/input')); + $this->assertCount(0, $crawler->filterXPath('/body')); + $this->assertCount(1, $crawler->filterXPath('./body')); + $this->assertCount(1, $crawler->filterXPath('.//body')); + $this->assertCount(6, $crawler->filterXPath('.//input')); + $this->assertCount(4, $crawler->filterXPath('//form')->filterXPath('//button | //input')); + $this->assertCount(1, $crawler->filterXPath('body')); + $this->assertCount(8, $crawler->filterXPath('//button | //input')); + $this->assertCount(1, $crawler->filterXPath('//body')); + $this->assertCount(1, $crawler->filterXPath('descendant-or-self::body')); + $this->assertCount(1, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('./div'), 'A child selection finds only the current div'); + $this->assertCount(3, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('descendant::div'), 'A descendant selector matches the current div and its child'); + $this->assertCount(3, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('//div'), 'A descendant selector matches the current div and its child'); + $this->assertCount(5, $crawler->filterXPath('(//a | //div)//img')); + $this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)')); + $this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )')); + $this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]")); + } + + public function testAddHtmlContentWithErrors() + { + $internalErrors = libxml_use_internal_errors(true); + + $crawler = $this->createCrawler(); + $crawler->addHtmlContent(<<<'EOF' + + + + + +
+ + + + EOF, + 'UTF-8' + ); + + $errors = libxml_get_errors(); + $this->assertCount(1, $errors); + $this->assertEquals("Unexpected end tag : body\n", $errors[0]->message); + + libxml_clear_errors(); + libxml_use_internal_errors($internalErrors); + } + + #[RequiresPhp('8.4')] + public function testAddHtml5() + { + } + + #[RequiresPhp('8.4')] + public function testHtml5ParserParseContentStartingWithValidHeading(string $content) + { + } +} diff --git a/src/Symfony/Component/DomCrawler/Tests/NativeParserCrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/NativeParserCrawlerTest.php deleted file mode 100644 index 37467b1704425..0000000000000 --- a/src/Symfony/Component/DomCrawler/Tests/NativeParserCrawlerTest.php +++ /dev/null @@ -1,71 +0,0 @@ - - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code. - */ - -namespace Symfony\Component\DomCrawler\Tests; - -class NativeParserCrawlerTest extends AbstractCrawlerTestCase -{ - public static function getDoctype(): string - { - return ''; - } - - public function testAddHtmlContentWithErrors() - { - $internalErrors = libxml_use_internal_errors(true); - - $crawler = $this->createCrawler(); - $crawler->addHtmlContent(<<<'EOF' - - - - - -
- - - - EOF, - 'UTF-8' - ); - - $errors = libxml_get_errors(); - $this->assertCount(1, $errors); - $this->assertEquals("Unexpected end tag : body\n", $errors[0]->message); - - libxml_clear_errors(); - libxml_use_internal_errors($internalErrors); - } - - public function testAddXmlContentWithErrors() - { - $internalErrors = libxml_use_internal_errors(true); - - $crawler = $this->createCrawler(); - $crawler->addXmlContent(<<<'EOF' - - - - - - - - - EOF, - 'UTF-8' - ); - - $this->assertGreaterThan(1, libxml_get_errors()); - - libxml_clear_errors(); - libxml_use_internal_errors($internalErrors); - } -} diff --git a/src/Symfony/Component/DomCrawler/composer.json b/src/Symfony/Component/DomCrawler/composer.json index 0e5c984d09be2..2a43aa20e3258 100644 --- a/src/Symfony/Component/DomCrawler/composer.json +++ b/src/Symfony/Component/DomCrawler/composer.json @@ -17,6 +17,7 @@ ], "require": { "php": ">=8.2", + "symfony/deprecation-contracts": "^2.5|^3", "symfony/polyfill-ctype": "~1.8", "symfony/polyfill-mbstring": "~1.0", "masterminds/html5": "^2.6"