12
12
namespace Symfony \Component \DomCrawler \Tests ;
13
13
14
14
use PHPUnit \Framework \Attributes \DataProvider ;
15
+ use PHPUnit \Framework \Attributes \Group ;
16
+ use PHPUnit \Framework \Attributes \IgnoreDeprecations ;
15
17
use PHPUnit \Framework \Attributes \RequiresPhpExtension ;
16
18
use PHPUnit \Framework \Error \Notice ;
17
19
use PHPUnit \Framework \TestCase ;
20
22
use Symfony \Component \DomCrawler \Image ;
21
23
use Symfony \Component \DomCrawler \Link ;
22
24
23
- abstract class AbstractCrawlerTestCase extends TestCase
25
+ class CrawlerTestCase extends TestCase
24
26
{
25
- abstract public static function getDoctype (): string ;
27
+ public static function getDoctype (): string
28
+ {
29
+ return '<!DOCTYPE html> ' ;
30
+ }
26
31
27
- protected function createCrawler ($ node = null , ?string $ uri = null , ?string $ baseHref = null , bool $ useHtml5Parser = true )
32
+ protected function createCrawler ($ node = null , ?string $ uri = null , ?string $ baseHref = null )
28
33
{
29
- return new Crawler ($ node , $ uri , $ baseHref , $ useHtml5Parser );
34
+ return new Crawler ($ node , $ uri , $ baseHref , \ PHP_VERSION_ID >= 80400 );
30
35
}
31
36
32
37
public function testConstructor ()
33
38
{
34
39
$ crawler = $ this ->createCrawler ();
35
40
$ this ->assertCount (0 , $ crawler , '__construct() returns an empty crawler ' );
36
41
37
- $ doc = new \ DOMDocument ();
42
+ $ doc = $ this -> createDomDocument ();
38
43
$ node = $ doc ->createElement ('test ' );
39
44
40
45
$ crawler = $ this ->createCrawler ($ node );
@@ -236,7 +241,7 @@ public function testAddNode()
236
241
237
242
public function testClear ()
238
243
{
239
- $ doc = new \ DOMDocument ();
244
+ $ doc = $ this -> createDomDocument ();
240
245
$ node = $ doc ->createElement ('test ' );
241
246
242
247
$ crawler = $ this ->createCrawler ($ node );
@@ -407,7 +412,7 @@ public function testInnerText(
407
412
public function testHtml ()
408
413
{
409
414
$ this ->assertEquals ('<img alt="Bar"> ' , $ this ->createTestCrawler ()->filterXPath ('//a[5] ' )->html ());
410
- $ this ->assertEquals ('<input type="text" value="TextValue" name="TextName"><input type="submit" value="FooValue" name="FooName" id="FooId"><input type="button" value="BarValue" name="BarName" id="BarId"><button value="ButtonValue" name="ButtonName" id="ButtonId"></button> ' , trim (preg_replace ('~>\s+<~ ' , '>< ' , $ this ->createTestCrawler ()->filterXPath ('//form[@id="FooFormId"] ' )->html ())));
415
+ $ this ->assertEquals ('<input type="text" value="TextValue" name="TextName"><input type="submit" value="FooValue" name="FooName" id="FooId"><input type="button" value="BarValue" name="BarName" id="BarId"><button value="ButtonValue" name="ButtonName" id="ButtonId"><input type="submit" value="FooBarValue" name="FooBarName" form="FooFormId"><input type="text" value="FooTextValue" name="FooTextName" form="FooFormId"><input type="image" alt="ImageAlt" form="FooFormId">< /button> ' , trim (preg_replace ('~>\s+<~ ' , '>< ' , $ this ->createTestCrawler ()->filterXPath ('//form[@id="FooFormId"] ' )->html ())));
411
416
412
417
try {
413
418
$ this ->createTestCrawler ()->filterXPath ('//ol ' )->html ();
@@ -421,9 +426,9 @@ public function testHtml()
421
426
422
427
public function testEmojis ()
423
428
{
424
- $ crawler = $ this ->createCrawler ('<body><p>Hey 👋</p></body> ' );
429
+ $ crawler = $ this ->createCrawler ('<head></head>< body><p>Hey 👋</p></body> ' );
425
430
426
- $ this ->assertSame ('<body><p>Hey 👋</p></body> ' , $ crawler ->html ());
431
+ $ this ->assertSame ('<head></head>< body><p>Hey 👋</p></body> ' , $ crawler ->html ());
427
432
}
428
433
429
434
public function testExtract ()
@@ -448,7 +453,7 @@ public function testFilterXpathComplexQueries()
448
453
$ this ->assertCount (1 , $ crawler ->filterXPath ('./body ' ));
449
454
$ this ->assertCount (1 , $ crawler ->filterXPath ('.//body ' ));
450
455
$ this ->assertCount (6 , $ crawler ->filterXPath ('.//input ' ));
451
- $ this ->assertCount (4 , $ crawler ->filterXPath ('//form ' )->filterXPath ('//button | //input ' ));
456
+ $ this ->assertCount (7 , $ crawler ->filterXPath ('//form ' )->filterXPath ('//button | //input ' ));
452
457
$ this ->assertCount (1 , $ crawler ->filterXPath ('body ' ));
453
458
$ this ->assertCount (8 , $ crawler ->filterXPath ('//button | //input ' ));
454
459
$ this ->assertCount (1 , $ crawler ->filterXPath ('//body ' ));
@@ -530,6 +535,16 @@ public function testFilterXPathWithAnUrl()
530
535
$ this ->assertSame ('Music ' , $ crawler ->text ());
531
536
}
532
537
538
+ public function testCaseSentivity ()
539
+ {
540
+ $ crawler = $ this ->createTestXmlCrawler ();
541
+
542
+ $ crawler = $ crawler ->filterXPath ('//*[local-name() = "CaseSensitiveTag"] ' );
543
+ $ this ->assertCount (1 , $ crawler );
544
+ $ this ->assertSame ('Some Content ' , $ crawler ->text ());
545
+ $ this ->assertSame ('CaseSensitiveTag ' , $ crawler ->nodeName ());
546
+ }
547
+
533
548
public function testFilterXPathWithFakeRoot ()
534
549
{
535
550
$ crawler = $ this ->createTestCrawler ();
@@ -1290,10 +1305,82 @@ public function testAddHtmlContentUnsupportedCharset()
1290
1305
$ this ->assertEquals ('Žťčýů ' , $ crawler ->filterXPath ('//p ' )->text ());
1291
1306
}
1292
1307
1293
- public function createTestCrawler ( $ uri = null )
1308
+ public function testAddXmlContentWithErrors ( )
1294
1309
{
1295
- $ dom = new \DOMDocument ();
1296
- $ dom ->loadHTML ($ this ->getDoctype ().'
1310
+ $ internalErrors = libxml_use_internal_errors (true );
1311
+
1312
+ $ crawler = $ this ->createCrawler ();
1313
+ $ crawler ->addXmlContent (<<<'EOF'
1314
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
1315
+ <html>
1316
+ <head>
1317
+ </head>
1318
+ <body>
1319
+ <nav><a href="#"><a href="#"></nav>
1320
+ </body>
1321
+ </html>
1322
+ EOF,
1323
+ 'UTF-8 '
1324
+ );
1325
+
1326
+ $ this ->assertGreaterThan (1 , libxml_get_errors ());
1327
+
1328
+ libxml_clear_errors ();
1329
+ libxml_use_internal_errors ($ internalErrors );
1330
+ }
1331
+
1332
+ #[IgnoreDeprecations]
1333
+ #[Group('legacy ' )]
1334
+ public function testHtml5ParserNotSameAsNativeParserForSpecificHtml ()
1335
+ {
1336
+ // Html who create a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596)
1337
+ $ html = '<!DOCTYPE html><html><body><h1><p>Foo</p></h1></body></html> ' ;
1338
+
1339
+ $ html5Crawler = new Crawler (null , null , null , true );
1340
+ $ html5Crawler ->add ($ html );
1341
+
1342
+ $ nativeCrawler = new Crawler (null , null , null , false );
1343
+ $ nativeCrawler ->add ($ html );
1344
+
1345
+ $ this ->assertNotEquals ($ nativeCrawler ->filterXPath ('//h1 ' )->text (), $ html5Crawler ->filterXPath ('//h1 ' )->text (), 'Native parser and Html5 parser must be different ' );
1346
+ }
1347
+
1348
+ public function testAddHtml5 ()
1349
+ {
1350
+ // Ensure a bug specific to the DOM extension is fixed (see https://github.com/symfony/symfony/issues/28596)
1351
+ $ crawler = $ this ->createCrawler ();
1352
+ $ crawler ->add ($ this ->getDoctype ().'<html><body><h1><p>Foo</p></h1></body></html> ' );
1353
+ $ this ->assertEquals ('Foo ' , $ crawler ->filterXPath ('//h1 ' )->text (), '->add() adds nodes from a string ' );
1354
+ }
1355
+
1356
+ #[DataProvider('html5Provider ' )]
1357
+ public function testHtml5ParserParseContentStartingWithValidHeading (string $ content )
1358
+ {
1359
+ $ crawler = $ this ->createCrawler ();
1360
+ $ crawler ->addHtmlContent ($ content );
1361
+ self ::assertEquals (
1362
+ 'Foo ' ,
1363
+ $ crawler ->filterXPath ('//h1 ' )->text (),
1364
+ '->addHtmlContent() parses valid HTML with comment before doctype '
1365
+ );
1366
+ }
1367
+
1368
+ public static function html5Provider (): iterable
1369
+ {
1370
+ $ html = self ::getDoctype ().'<html><body><h1><p>Foo</p></h1></body></html> ' ;
1371
+ $ BOM = \chr (0xEF ).\chr (0xBB ).\chr (0xBF );
1372
+
1373
+ yield 'BOM first ' => [$ BOM .$ html ];
1374
+ yield 'Single comment ' => ['<!-- comment --> ' .$ html ];
1375
+ yield 'Multiline comment ' => ["<!-- \n multiline comment \n --> " .$ html ];
1376
+ yield 'Several comments ' => ['<!--c--> <!--cc--> ' .$ html ];
1377
+ yield 'Whitespaces ' => [' ' .$ html ];
1378
+ yield 'All together ' => [$ BOM .' <!--c--> ' .$ html ];
1379
+ }
1380
+
1381
+ protected function createTestCrawler ($ uri = null )
1382
+ {
1383
+ $ html = $ this ->getDoctype ().'
1297
1384
<html>
1298
1385
<body>
1299
1386
<a href="foo">Foo</a>
@@ -1352,9 +1439,9 @@ public function createTestCrawler($uri = null)
1352
1439
</div>
1353
1440
</body>
1354
1441
</html>
1355
- ') ;
1442
+ ' ;
1356
1443
1357
- return $ this ->createCrawler ($ dom , $ uri );
1444
+ return $ this ->createCrawler ($ html , $ uri );
1358
1445
}
1359
1446
1360
1447
protected function createTestXmlCrawler ($ uri = null )
@@ -1369,6 +1456,7 @@ protected function createTestXmlCrawler($uri = null)
1369
1456
<yt:aspectRatio>widescreen</yt:aspectRatio>
1370
1457
</media:group>
1371
1458
<media:category label="Music" scheme="http://gdata.youtube.com/schemas/2007/categories.cat">Music</media:category>
1459
+ <CaseSensitiveTag>Some Content</CaseSensitiveTag>
1372
1460
</entry> ' ;
1373
1461
1374
1462
return $ this ->createCrawler ($ xml , $ uri );
0 commit comments