Skip to content

Commit d0f98ad

Browse files
[HtmlSanitizer] Use the native HTML5 parser when using PHP 8.4+
1 parent 3b7a33f commit d0f98ad

File tree

16 files changed

+217
-97
lines changed

16 files changed

+217
-97
lines changed

.github/workflows/psalm.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
runs-on: ubuntu-24.04
2121

2222
env:
23-
php-version: '8.2'
23+
php-version: '8.4'
2424
steps:
2525
- name: Setup PHP
2626
uses: shivammathur/setup-php@v2

UPGRADE-7.4.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Cache
1616
Console
1717
-------
1818

19-
* Deprecate `Symfony\Component\Console\Application::add()` in favor of `Symfony\Component\Console\Application::addCommand()`
19+
* Deprecate `Symfony\Component\Console\Application::add()` in favor of `addCommand()`
2020

2121
DependencyInjection
2222
-------------------
@@ -32,7 +32,15 @@ DoctrineBridge
3232
FrameworkBundle
3333
---------------
3434

35-
* Deprecate `Symfony\Bundle\FrameworkBundle\Console\Application::add()` in favor of `Symfony\Bundle\FrameworkBundle\Console\Application::addCommand()`
35+
* Deprecate `Symfony\Bundle\FrameworkBundle\Console\Application::add()` in favor of `addCommand()`
36+
37+
HtmlSanitizer
38+
-------------
39+
40+
* Use the native HTML5 parser when using PHP 8.4+
41+
* Deprecate `MastermindsParser`; use `NativeParser` instead
42+
* [BC BREAK] `ParserInterface::parse()` can now return `\Dom\Node|\DOMNode|null` instead of just `\DOMNode|null`
43+
* Add argument `$context` to `ParserInterface::parse()`
3644

3745
HttpClient
3846
----------

src/Symfony/Component/HtmlSanitizer/CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
CHANGELOG
22
=========
33

4+
7.4
5+
---
6+
7+
* Use the native HTML5 parser when using PHP 8.4+
8+
* Deprecate `MastermindsParser`; use `NativeParser` instead
9+
* [BC BREAK] `ParserInterface::parse()` can now return `\Dom\Node|\DOMNode|null` instead of just `\DOMNode|null`
10+
* Add argument `$context` to `ParserInterface::parse()`
11+
412
7.2
513
---
614

src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
namespace Symfony\Component\HtmlSanitizer;
1313

1414
use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser;
15+
use Symfony\Component\HtmlSanitizer\Parser\NativeParser;
1516
use Symfony\Component\HtmlSanitizer\Parser\ParserInterface;
1617
use Symfony\Component\HtmlSanitizer\Reference\W3CReference;
1718
use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer;
@@ -34,24 +35,20 @@ public function __construct(
3435
?ParserInterface $parser = null,
3536
) {
3637
$this->config = $config;
37-
$this->parser = $parser ?? new MastermindsParser();
38+
$this->parser = $parser ?? (\PHP_VERSION_ID < 80400 ? new MastermindsParser() : new NativeParser());
3839
}
3940

4041
public function sanitize(string $input): string
4142
{
42-
return $this->sanitizeWithContext(W3CReference::CONTEXT_BODY, $input);
43+
return $this->sanitizeFor(W3CReference::CONTEXT_BODY, $input);
4344
}
4445

4546
public function sanitizeFor(string $element, string $input): string
4647
{
47-
return $this->sanitizeWithContext(
48-
W3CReference::CONTEXTS_MAP[StringSanitizer::htmlLower($element)] ?? W3CReference::CONTEXT_BODY,
49-
$input
50-
);
51-
}
48+
$element = StringSanitizer::htmlLower($element);
49+
$context = W3CReference::CONTEXTS_MAP[$element] ?? W3CReference::CONTEXT_BODY;
50+
$element = isset(W3CReference::BODY_ELEMENTS[$element]) ? $element : $context;
5251

53-
private function sanitizeWithContext(string $context, string $input): string
54-
{
5552
// Text context: early return with HTML encoding
5653
if (W3CReference::CONTEXT_TEXT === $context) {
5754
return StringSanitizer::encodeHtmlEntities($input);
@@ -71,11 +68,11 @@ private function sanitizeWithContext(string $context, string $input): string
7168
return '';
7269
}
7370

74-
// Remove NULL character
75-
$input = str_replace(\chr(0), '', $input);
71+
// Remove NULL character and HTML entities for null byte
72+
$input = str_replace([\chr(0), '&#0;', '&#x00;', '&#X00;', '&#000;'], '', $input);
7673

7774
// Parse as HTML
78-
if (!$parsed = $this->parser->parse($input)) {
75+
if ('' === trim($input) || !$parsed = $this->parser->parse($input, $element)) {
7976
return '';
8077
}
8178

src/Symfony/Component/HtmlSanitizer/Parser/MastermindsParser.php

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,20 @@
1414
use Masterminds\HTML5;
1515

1616
/**
17+
* @deprecated since Symfony 7.4, use `NativeParser` instead
18+
*
1719
* @author Titouan Galopin <galopintitouan@gmail.com>
1820
*/
1921
final class MastermindsParser implements ParserInterface
2022
{
2123
public function __construct(private array $defaultOptions = [])
2224
{
25+
if (\PHP_VERSION_ID >= 80400) {
26+
trigger_deprecation('symfony/html-sanitizer', '7.4', '"%s" is deprecated since Symfony 7.4 and will be removed in 8.0. Use the "NativeParser" instead.', self::class);
27+
}
2328
}
2429

25-
public function parse(string $html): ?\DOMNode
30+
public function parse(string $html, string $context = 'body'): ?\DOMNode
2631
{
2732
return (new HTML5($this->defaultOptions))->loadHTMLFragment($html);
2833
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\HtmlSanitizer\Parser;
13+
14+
/**
15+
* Parser using PHP 8.4's new Dom API.
16+
*/
17+
final class NativeParser implements ParserInterface
18+
{
19+
public function __construct()
20+
{
21+
if (\PHP_VERSION_ID < 80400) {
22+
throw new \LogicException(self::class.' requires PHP 8.4 or higher.');
23+
}
24+
}
25+
26+
public function parse(string $html, string $context = 'body'): ?\Dom\Node
27+
{
28+
$document = @\Dom\HTMLDocument::createFromString(\sprintf('<!DOCTYPE html><%s>%s</%1$s>', $context, $html));
29+
$element = $document->getElementsByTagName($context)->item(0);
30+
31+
return $element->hasChildNodes() ? $element : null;
32+
}
33+
}

src/Symfony/Component/HtmlSanitizer/Parser/ParserInterface.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ interface ParserInterface
2222
* Parse a given string and returns a DOMNode tree.
2323
*
2424
* This method must return null if the string cannot be parsed as HTML.
25+
*
26+
* @param string $context The name of the context element in which the HTML is parsed
2527
*/
26-
public function parse(string $html): ?\DOMNode;
28+
public function parse(string $html/* , string $context = 'body' */): \Dom\Node|\DOMNode|null;
2729
}

src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,11 @@ public static function provideSanitizeHead()
6464
}
6565

6666
#[DataProvider('provideSanitizeBody')]
67-
public function testSanitizeBody(string $input, string $expected)
67+
public function testSanitizeBody(string $input, string $expected, ?string $legacyExpected = null)
6868
{
69+
if (\PHP_VERSION_ID < 80400) {
70+
$expected = $legacyExpected ?? $expected;
71+
}
6972
$this->assertSame($expected, $this->createSanitizer()->sanitize($input));
7073
}
7174

@@ -83,6 +86,7 @@ public static function provideSanitizeBody()
8386
],
8487
[
8588
'< Hello',
89+
'&lt; Hello',
8690
' Hello',
8791
],
8892
[
@@ -127,6 +131,7 @@ public static function provideSanitizeBody()
127131
],
128132
[
129133
'<<a href="javascript:evil"/>a href="javascript:evil"/>',
134+
'&lt;<a>a href&#61;&#34;javascript:evil&#34;/&gt;</a>',
130135
'<a>a href&#61;&#34;javascript:evil&#34;/&gt;</a>',
131136
],
132137
[
@@ -163,10 +168,12 @@ public static function provideSanitizeBody()
163168
],
164169
[
165170
'<<img src="javascript:evil"/>iframe src="javascript:evil"/>',
171+
'&lt;<img />iframe src&#61;&#34;javascript:evil&#34;/&gt;',
166172
'<img />iframe src&#61;&#34;javascript:evil&#34;/&gt;',
167173
],
168174
[
169175
'<<img src="javascript:evil"/>img src="javascript:evil"/>',
176+
'&lt;<img />img src&#61;&#34;javascript:evil&#34;/&gt;',
170177
'<img />img src&#61;&#34;javascript:evil&#34;/&gt;',
171178
],
172179
[
@@ -211,10 +218,12 @@ public static function provideSanitizeBody()
211218
],
212219
[
213220
'<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>',
221+
'<img />',
214222
'<img src="&amp;#0000106&amp;#0000097&amp;#0000118&amp;#0000097&amp;#0000115&amp;#0000099&amp;#0000114&amp;#0000105&amp;#0000112&amp;#0000116&amp;#0000058&amp;#0000097&amp;#0000108&amp;#0000101&amp;#0000114&amp;#0000116&amp;#0000040&amp;#0000039&amp;#0000088&amp;#0000083&amp;#0000083&amp;#0000039&amp;#0000041" />',
215223
],
216224
[
217225
'<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>',
226+
'<img />',
218227
'<img src="&amp;#x6A&amp;#x61&amp;#x76&amp;#x61&amp;#x73&amp;#x63&amp;#x72&amp;#x69&amp;#x70&amp;#x74&amp;#x3A&amp;#x61&amp;#x6C&amp;#x65&amp;#x72&amp;#x74&amp;#x28&amp;#x27&amp;#x58&amp;#x53&amp;#x53&amp;#x27&amp;#x29" />',
219228
],
220229
[
@@ -233,10 +242,6 @@ public static function provideSanitizeBody()
233242
'<svg/onload=alert(\'XSS\')>',
234243
'',
235244
],
236-
[
237-
'<BODY BACKGROUND="javascript:alert(\'XSS\')">',
238-
'<body></body>',
239-
],
240245
[
241246
'<BGSOUND SRC="javascript:alert(\'XSS\');">',
242247
'<bgsound></bgsound>',
@@ -350,10 +355,6 @@ public static function provideSanitizeBody()
350355
'Lorem ipsum <br>dolor sit amet <br />consectetur adipisicing.',
351356
'Lorem ipsum <br />dolor sit amet <br />consectetur adipisicing.',
352357
],
353-
[
354-
'<caption>Lorem ipsum</caption>',
355-
'<caption>Lorem ipsum</caption>',
356-
],
357358
[
358359
'<code>Lorem ipsum</code>',
359360
'<code>Lorem ipsum</code>',
@@ -529,41 +530,84 @@ public static function provideSanitizeBody()
529530
],
530531
[
531532
'<table>Lorem ipsum</table>',
533+
'Lorem ipsum<table></table>',
532534
'<table>Lorem ipsum</table>',
533535
],
536+
[
537+
'<ul>Lorem ipsum</ul>',
538+
'<ul>Lorem ipsum</ul>',
539+
],
540+
];
541+
542+
foreach ($cases as $case) {
543+
yield $case[0] => $case;
544+
}
545+
}
546+
547+
#[DataProvider('provideSanitizeTable')]
548+
public function testSanitizeTable(string $input, string $expected, ?string $legacyExpected = null)
549+
{
550+
if (\PHP_VERSION_ID < 80400) {
551+
$expected = $legacyExpected ?? $expected;
552+
}
553+
554+
$this->assertSame($expected, $this->createSanitizer()->sanitizeFor('table', $input));
555+
}
556+
557+
public static function provideSanitizeTable(): iterable
558+
{
559+
return [
560+
[
561+
'<caption>Lorem ipsum</caption>',
562+
'<caption>Lorem ipsum</caption>',
563+
],
534564
[
535565
'<tbody>Lorem ipsum</tbody>',
566+
'<tbody></tbody>',
536567
'<tbody>Lorem ipsum</tbody>',
537568
],
538569
[
539570
'<td>Lorem ipsum</td>',
571+
'<tbody><tr><td>Lorem ipsum</td></tr></tbody>',
540572
'<td>Lorem ipsum</td>',
541573
],
542574
[
543575
'<tfoot>Lorem ipsum</tfoot>',
576+
'<tfoot></tfoot>',
544577
'<tfoot>Lorem ipsum</tfoot>',
545578
],
546579
[
547580
'<thead>Lorem ipsum</thead>',
581+
'<thead></thead>',
548582
'<thead>Lorem ipsum</thead>',
549583
],
550584
[
551585
'<th>Lorem ipsum</th>',
586+
'<tbody><tr><th>Lorem ipsum</th></tr></tbody>',
552587
'<th>Lorem ipsum</th>',
553588
],
554589
[
555590
'<tr>Lorem ipsum</tr>',
591+
'<tbody><tr></tr></tbody>',
556592
'<tr>Lorem ipsum</tr>',
557593
],
594+
];
595+
}
596+
597+
#[DataProvider('provideSanitizeHtml')]
598+
public function testSanitizeHtml(string $input, string $expected)
599+
{
600+
$this->assertSame($expected, $this->createSanitizer()->sanitizeFor('html', $input));
601+
}
602+
603+
public static function provideSanitizeHtml(): iterable
604+
{
605+
return [
558606
[
559-
'<ul>Lorem ipsum</ul>',
560-
'<ul>Lorem ipsum</ul>',
607+
'<BODY BACKGROUND="javascript:alert(\'XSS\')">',
608+
'<body></body>',
561609
],
562610
];
563-
564-
foreach ($cases as $case) {
565-
yield $case[0] => $case;
566-
}
567611
}
568612

569613
public function testUnlimitedLength()

src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerCustomTest.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ public function testSanitizeForHead()
2525
;
2626

2727
$this->assertSame(
28-
' world',
29-
(new HtmlSanitizer($config))->sanitizeFor('head', '<div style="width: 100px">Hello</div> world')
28+
'',
29+
(new HtmlSanitizer($config))->sanitizeFor('head', '<div style="width: 100px">Hello world</div>')
3030
);
3131
}
3232

@@ -65,8 +65,8 @@ public function testSanitizeDeepNestedString()
6565

6666
public function testSanitizeNullByte()
6767
{
68-
$this->assertSame('Null byte', $this->sanitize(new HtmlSanitizerConfig(), "Null byte\0"));
69-
$this->assertSame('Null byte', $this->sanitize(new HtmlSanitizerConfig(), 'Null byte&#0;'));
68+
$this->assertSame('Null byte', $this->sanitize(new HtmlSanitizerConfig(), "Null byte\0"));
69+
$this->assertSame('Null byte', $this->sanitize(new HtmlSanitizerConfig(), 'Null byte&#0;'));
7070
}
7171

7272
public function testSanitizeDefaultBody()

src/Symfony/Component/HtmlSanitizer/Tests/Parser/MastermindsParserTest.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@
1111

1212
namespace Symfony\Component\HtmlSanitizer\Tests\Parser;
1313

14+
use PHPUnit\Framework\Attributes\Group;
15+
use PHPUnit\Framework\Attributes\IgnoreDeprecations;
1416
use PHPUnit\Framework\TestCase;
1517
use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser;
1618

19+
#[IgnoreDeprecations]
20+
#[Group('legacy')]
1721
class MastermindsParserTest extends TestCase
1822
{
1923
public function testParseValid()

0 commit comments

Comments
 (0)