Skip to content

Commit 67e23f0

Browse files
committed
[HtmlSanitizer] Introduce HtmlSanitizer component
1 parent b2e7fcd commit 67e23f0

31 files changed

+3800
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
/Tests export-ignore
2+
/phpunit.xml.dist export-ignore
3+
/.gitattributes export-ignore
4+
/.gitignore export-ignore
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
composer.lock
2+
phpunit.xml
3+
vendor/
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CHANGELOG
2+
=========
3+
4+
6.1.0
5+
-----
6+
7+
* added the component as experimental
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\HtmlSanitizer;
13+
14+
use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser;
15+
use Symfony\Component\HtmlSanitizer\Parser\ParserInterface;
16+
use Symfony\Component\HtmlSanitizer\Reference\W3CReference;
17+
use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer;
18+
use Symfony\Component\HtmlSanitizer\Visitor\DomVisitor;
19+
use function Symfony\Component\String\u;
20+
21+
/**
22+
* @author Titouan Galopin <galopintitouan@gmail.com>
23+
*/
24+
class HtmlSanitizer
25+
{
26+
private HtmlSanitizerConfig $config;
27+
private int $maxInputLength;
28+
private ParserInterface $parser;
29+
30+
/**
31+
* @var DomVisitor[]
32+
*/
33+
private array $domVisitors = [];
34+
35+
public function __construct(HtmlSanitizerConfig $config, int $maxInputLength = 20000, ParserInterface $parser = null)
36+
{
37+
$this->config = clone $config;
38+
$this->maxInputLength = $maxInputLength;
39+
$this->parser = $parser ?: new MastermindsParser();
40+
}
41+
42+
public function sanitize(string $input): string
43+
{
44+
return $this->sanitizeWithContext(W3CReference::CONTEXT_BODY, $input);
45+
}
46+
47+
public function sanitizeFor(string $element, string $input): string
48+
{
49+
return $this->sanitizeWithContext(W3CReference::CONTEXTS_MAP[$element] ?? W3CReference::CONTEXT_BODY, $input);
50+
}
51+
52+
private function sanitizeWithContext(string $context, string $input): string
53+
{
54+
// Text context: early return with HTML encoding
55+
if (W3CReference::CONTEXT_TEXT === $context) {
56+
return StringSanitizer::encodeHtmlEntities($input);
57+
}
58+
59+
// Other context: build a DOM visitor
60+
if (!isset($this->domVisitors[$context])) {
61+
$this->domVisitors[$context] = $this->createDomVisitorForContext($context);
62+
}
63+
64+
// Prevent DOS attack induced by extremely long HTML strings
65+
$uInput = u($input);
66+
if ($uInput->length() > $this->maxInputLength) {
67+
$input = $uInput->slice(0, $this->maxInputLength)->toString();
68+
}
69+
70+
/*
71+
* Only operate on valid UTF-8 strings. This is necessary to prevent cross
72+
* site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss).
73+
*/
74+
if (!$this->isValidUtf8($input)) {
75+
return '';
76+
}
77+
78+
// Remove NULL character
79+
$input = str_replace(\chr(0), '', $input);
80+
81+
try {
82+
$parsed = $this->parser->parse($input);
83+
} catch (\Exception) {
84+
return '';
85+
}
86+
87+
return $this->domVisitors[$context]->visit($parsed)->render();
88+
}
89+
90+
private function isValidUtf8(string $html): bool
91+
{
92+
// preg_match() fails silently on strings containing invalid UTF-8.
93+
return '' === $html || 1 === preg_match('/^./us', $html);
94+
}
95+
96+
private function createDomVisitorForContext(string $context)
97+
{
98+
$allowedNodes = [];
99+
100+
// Head: only a few elements are allowed
101+
if (W3CReference::CONTEXT_HEAD === $context) {
102+
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
103+
if (\in_array($allowedElement, W3CReference::HEAD_ELEMENTS, true)) {
104+
$allowedNodes[$allowedElement] = $allowedAttributes;
105+
}
106+
}
107+
108+
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
109+
if (\in_array($blockedElement, W3CReference::HEAD_ELEMENTS, true)) {
110+
$allowedNodes[$blockedElement] = false;
111+
}
112+
}
113+
114+
return new DomVisitor($allowedNodes, $this->config);
115+
}
116+
117+
// Body: allow any configured element that isn't in <head>
118+
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
119+
if (!\in_array($allowedElement, W3CReference::HEAD_ELEMENTS, true)) {
120+
$allowedNodes[$allowedElement] = $allowedAttributes;
121+
}
122+
}
123+
124+
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
125+
if (!\in_array($blockedElement, W3CReference::HEAD_ELEMENTS, true)) {
126+
$allowedNodes[$blockedElement] = false;
127+
}
128+
}
129+
130+
return new DomVisitor($allowedNodes, $this->config);
131+
}
132+
}

0 commit comments

Comments
 (0)