Skip to content

Commit 04b6207

Browse files
committed
[HtmlSanitizer] Add ability to sanitize a whole document
1 parent d83167d commit 04b6207

File tree

6 files changed

+123
-47
lines changed

6 files changed

+123
-47
lines changed

src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,14 @@ final class HtmlSanitizer implements HtmlSanitizerInterface
2525
private ParserInterface $parser;
2626

2727
/**
28-
* @var array<string, DomVisitor>
28+
* @var ?DomVisitor
2929
*/
30-
private array $domVisitors = [];
30+
private ?DomVisitor $domVisitor = null;
3131

3232
public function __construct(
3333
private HtmlSanitizerConfig $config,
3434
?ParserInterface $parser = null,
3535
) {
36-
$this->config = $config;
3736
$this->parser = $parser ?? new MastermindsParser();
3837
}
3938

@@ -58,7 +57,7 @@ private function sanitizeWithContext(string $context, string $input): string
5857
}
5958

6059
// Other context: build a DOM visitor
61-
$this->domVisitors[$context] ??= $this->createDomVisitorForContext($context);
60+
$this->domVisitor ??= $this->createDomVisitor();
6261

6362
// Prevent DOS attack induced by extremely long HTML strings
6463
if (-1 !== $this->config->getMaxInputLength() && \strlen($input) > $this->config->getMaxInputLength()) {
@@ -80,7 +79,9 @@ private function sanitizeWithContext(string $context, string $input): string
8079
}
8180

8281
// Visit the DOM tree and render the sanitized nodes
83-
return $this->domVisitors[$context]->visit($parsed)?->render() ?? '';
82+
$sanitized = $this->domVisitor->visit($context, $parsed)?->render() ?? '';
83+
84+
return W3CReference::CONTEXT_DOCUMENT === $context ? '<!DOCTYPE html>'.$sanitized : $sanitized;
8485
}
8586

8687
private function isValidUtf8(string $html): bool
@@ -89,50 +90,20 @@ private function isValidUtf8(string $html): bool
8990
return '' === $html || preg_match('//u', $html);
9091
}
9192

92-
private function createDomVisitorForContext(string $context): DomVisitor
93+
private function createDomVisitor(): DomVisitor
9394
{
9495
$elementsConfig = [];
9596

96-
// Head: only a few elements are allowed
97-
if (W3CReference::CONTEXT_HEAD === $context) {
98-
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
99-
if (\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
100-
$elementsConfig[$allowedElement] = $allowedAttributes;
101-
}
102-
}
103-
104-
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
105-
if (\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
106-
$elementsConfig[$blockedElement] = HtmlSanitizerAction::Block;
107-
}
108-
}
109-
110-
foreach ($this->config->getDroppedElements() as $droppedElement => $v) {
111-
if (\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) {
112-
$elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop;
113-
}
114-
}
115-
116-
return new DomVisitor($this->config, $elementsConfig);
117-
}
118-
119-
// Body: allow any configured element that isn't in <head>
12097
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
121-
if (!\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
122-
$elementsConfig[$allowedElement] = $allowedAttributes;
123-
}
98+
$elementsConfig[$allowedElement] = $allowedAttributes;
12499
}
125100

126101
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
127-
if (!\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
128-
$elementsConfig[$blockedElement] = HtmlSanitizerAction::Block;
129-
}
102+
$elementsConfig[$blockedElement] = HtmlSanitizerAction::Block;
130103
}
131104

132105
foreach ($this->config->getDroppedElements() as $droppedElement => $v) {
133-
if (!\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) {
134-
$elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop;
135-
}
106+
$elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop;
136107
}
137108

138109
return new DomVisitor($this->config, $elementsConfig);

src/Symfony/Component/HtmlSanitizer/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,12 @@ $sanitizer->sanitize($userInput);
9898
// Sanitize the given string for a usage in a <head> tag
9999
$sanitizer->sanitizeFor('head', $userInput);
100100

101+
// Sanitize the given string for a usage in a <body> tag
102+
$sanitizer->sanitizeFor('body', $userInput);
103+
104+
// Sanitize the given string as a whole document (including <head> and <body>)
105+
$sanitizer->sanitizeFor('document', $userInput);
106+
101107
// Sanitize the given string for a usage in another tag
102108
$sanitizer->sanitizeFor('title', $userInput); // Will encode as HTML entities
103109
$sanitizer->sanitizeFor('textarea', $userInput); // Will encode as HTML entities

src/Symfony/Component/HtmlSanitizer/Reference/W3CReference.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,16 @@ final class W3CReference
2828
* A parent element name can be passed as an argument to {@see HtmlSanitizer::sanitizeFor()}.
2929
* When doing so, depending on the given context, different elements will be allowed.
3030
*/
31+
public const CONTEXT_DOCUMENT = 'document';
3132
public const CONTEXT_HEAD = 'head';
3233
public const CONTEXT_BODY = 'body';
3334
public const CONTEXT_TEXT = 'text';
3435

3536
// Which context to apply depending on the passed parent element name
3637
public const CONTEXTS_MAP = [
38+
'document' => self::CONTEXT_DOCUMENT,
3739
'head' => self::CONTEXT_HEAD,
40+
'body' => self::CONTEXT_BODY,
3841
'textarea' => self::CONTEXT_TEXT,
3942
'title' => self::CONTEXT_TEXT,
4043
];

src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,32 @@ private function createSanitizer(): HtmlSanitizer
3131
);
3232
}
3333

34+
/**
35+
* @dataProvider provideSanitizeDocument
36+
*/
37+
public function testSanitizeDocument(string $input, string $expected)
38+
{
39+
$this->assertSame($expected, $this->createSanitizer()->sanitizeFor('document', $input));
40+
}
41+
42+
public static function provideSanitizeDocument()
43+
{
44+
$heads = iterator_to_array(self::provideSanitizeHead());
45+
$bodies = iterator_to_array(self::provideSanitizeBody());
46+
47+
$cases = [];
48+
foreach ($heads as $head) {
49+
foreach ($bodies as $body) {
50+
$cases[] = [
51+
'<!DOCTYPE html><html><head>'.$head[0].'</head><body>'.$body[0].'</body></html>',
52+
'<!DOCTYPE html><html><head>'.$head[1].'</head><body>'.$body[1].'</body></html>',
53+
];
54+
}
55+
}
56+
57+
return $cases;
58+
}
59+
3460
/**
3561
* @dataProvider provideSanitizeHead
3662
*/

src/Symfony/Component/HtmlSanitizer/Visitor/DomVisitor.php

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
use Symfony\Component\HtmlSanitizer\HtmlSanitizerAction;
1515
use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig;
16+
use Symfony\Component\HtmlSanitizer\Reference\W3CReference;
1617
use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer;
1718
use Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer\AttributeSanitizerInterface;
1819
use Symfony\Component\HtmlSanitizer\Visitor\Model\Cursor;
@@ -51,6 +52,13 @@ final class DomVisitor
5152
*/
5253
private array $attributeSanitizers = [];
5354

55+
/**
56+
* Registry of elements configuration for each sanitization context used in the document.
57+
*
58+
* @var array<string, array<string, HtmlSanitizerAction|array<string, bool>>> $elementsConfigByContext
59+
*/
60+
private array $elementsConfigByContext = [];
61+
5462
/**
5563
* @param array<string, HtmlSanitizerAction|array<string, bool>> $elementsConfig Registry of allowed/blocked elements:
5664
* * If an element is present as a key and contains an array, the element should be allowed
@@ -75,9 +83,9 @@ public function __construct(
7583
$this->defaultAction = $config->getDefaultAction();
7684
}
7785

78-
public function visit(\DOMDocumentFragment $domNode): ?NodeInterface
86+
public function visit(?string $context, \DOMDocumentFragment $domNode): ?NodeInterface
7987
{
80-
$cursor = new Cursor(new DocumentNode());
88+
$cursor = new Cursor([$context], new DocumentNode());
8189
$this->visitChildren($domNode, $cursor);
8290

8391
return $cursor->node;
@@ -87,24 +95,35 @@ private function visitNode(\DOMNode $domNode, Cursor $cursor): void
8795
{
8896
$nodeName = StringSanitizer::htmlLower($domNode->nodeName);
8997

98+
if (array_key_exists($nodeName, W3CReference::CONTEXTS_MAP)) {
99+
$cursor->contextsPath[] = $nodeName;
100+
}
101+
90102
// Visit recursively if the node was not dropped
91103
if ($this->enterNode($nodeName, $domNode, $cursor)) {
92104
$this->visitChildren($domNode, $cursor);
93105
$cursor->node = $cursor->node->getParent();
94106
}
107+
108+
if (array_key_exists($nodeName, W3CReference::CONTEXTS_MAP)) {
109+
array_pop($cursor->contextsPath);
110+
}
95111
}
96112

97113
private function enterNode(string $domNodeName, \DOMNode $domNode, Cursor $cursor): bool
98114
{
99-
if (!\array_key_exists($domNodeName, $this->elementsConfig)) {
115+
$context = array_reverse($cursor->contextsPath)[0] ?? 'body';
116+
$this->elementsConfigByContext[$context] ??= $this->createContextElementsConfig($context);
117+
118+
if (!\array_key_exists($domNodeName, $this->elementsConfigByContext[$context])) {
100119
$action = $this->defaultAction;
101120
$allowedAttributes = [];
102121
} else {
103-
if (\is_array($this->elementsConfig[$domNodeName])) {
122+
if (\is_array($this->elementsConfigByContext[$context][$domNodeName])) {
104123
$action = HtmlSanitizerAction::Allow;
105-
$allowedAttributes = $this->elementsConfig[$domNodeName];
124+
$allowedAttributes = $this->elementsConfigByContext[$context][$domNodeName];
106125
} else {
107-
$action = $this->elementsConfig[$domNodeName];
126+
$action = $this->elementsConfigByContext[$context][$domNodeName];
108127
$allowedAttributes = [];
109128
}
110129
}
@@ -185,4 +204,53 @@ private function setAttributes(string $domNodeName, \DOMNode $domNode, Node $nod
185204
}
186205
}
187206
}
207+
208+
private function createContextElementsConfig(string $context): array
209+
{
210+
$elementsConfig = [];
211+
212+
// Head: only a few elements are allowed
213+
if (W3CReference::CONTEXT_HEAD === $context) {
214+
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
215+
if (\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
216+
$elementsConfig[$allowedElement] = $allowedAttributes;
217+
}
218+
}
219+
220+
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
221+
if (\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
222+
$elementsConfig[$blockedElement] = HtmlSanitizerAction::Block;
223+
}
224+
}
225+
226+
foreach ($this->config->getDroppedElements() as $droppedElement => $v) {
227+
if (\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) {
228+
$elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop;
229+
}
230+
}
231+
232+
return $elementsConfig;
233+
}
234+
235+
// Body: allow any configured element that isn't in <head>
236+
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
237+
if (!\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
238+
$elementsConfig[$allowedElement] = $allowedAttributes;
239+
}
240+
}
241+
242+
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
243+
if (!\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
244+
$elementsConfig[$blockedElement] = HtmlSanitizerAction::Block;
245+
}
246+
}
247+
248+
foreach ($this->config->getDroppedElements() as $droppedElement => $v) {
249+
if (!\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) {
250+
$elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop;
251+
}
252+
}
253+
254+
return $elementsConfig;
255+
}
188256
}

src/Symfony/Component/HtmlSanitizer/Visitor/Model/Cursor.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
*/
2121
final class Cursor
2222
{
23-
public function __construct(public ?NodeInterface $node)
24-
{
23+
public function __construct(
24+
public array $contextsPath,
25+
public ?NodeInterface $node
26+
) {
2527
}
2628
}

0 commit comments

Comments
 (0)