diff --git a/html_sanitizer.rst b/html_sanitizer.rst new file mode 100644 index 00000000000..ab7ba7bc875 --- /dev/null +++ b/html_sanitizer.rst @@ -0,0 +1,1027 @@ +HTML Sanitizer +============== + +.. versionadded:: 6.1 + + The HTML Sanitizer component was introduced in Symfony 6.1. + +The HTML Sanitizer components aims at sanitizing/cleaning untrusted HTML +code (e.g. created by a WYSIWYG editor in the browser) into HTML that can +be trusted. It is based on the `HTML Sanitizer W3C Standard Proposal`_. + +The HTML sanitizer creates a new HTML structure from scratch, taking only +the elements and attributes that are allowed by configuration. This means +that the returned HTML is very predicatable (it only contains allowed +elements), but it does not work well with badly formatted input (e.g. +invalid HTML). The sanitizer is targetted for two use-cases: + +* Preventing security attacks based on XSS or other technologies relying on + execution of malicious code on the visitors browsers; +* Generating HTML that always respects a certain format (only certain + tags, attributes, hosts, etc.) to be able to consistently style the + resulting output with CSS. This also protects your application against + attacks related to e.g. changing the CSS of the whole page. + +.. _html-sanitizer-installation: + +Installation +------------ + +You can install the HTML Sanitizer component with: + +.. code-block:: terminal + + $ composer require symfony/html-sanitizer + +Basic Usage +----------- + +Use the :class:`Symfony\\Component\\HtmlSanitizer\\HtmlSanitizer` class to +sanitize the HTML. In the Symfony framework, this class is available as the +``html_sanitizer`` service. This service will be :doc:`autowired ` +automatically when type-hinting for +:class:`Symfony\\Component\\HtmlSanitizer\\HtmlSanitizerInterface`: + +.. configuration-block:: + + .. code-block:: php-symfony + + // src/Controller/BlogPostController.php + namespace App\Controller; + + // ... + use Symfony\Component\HtmlSanitizer\HtmlSanitizerInterface; + + class BlogPostController extends AbstractController + { + public function createAction(HtmlSanitizerInterface $htmlSanitizer, Request $request): Response + { + $unsafeContents = $request->request->get('post_contents'); + + $safeContents = $htmlSanitizer->sanitize($unsafeContents); + // ... proceed using the safe HTML + } + } + + .. code-block:: php-standalone + + use Symfony\Component\HtmlSanitizer\HtmlSanitizer; + use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; + + $htmlSanitizer = new HtmlSanitizer( + (new HtmlSanitizerConfig())->allowSafeElements() + ); + + // unsafe HTML (e.g. from a WYSIWYG editor in the browser) + $unsafePostContents = ...; + + $safePostContents = $htmlSanitizer->sanitize($unsafePostContents); + // ... proceed using the safe HTML + +.. note:: + + The default configuration of the HTML sanitizer allows all "safe" + elements and attributes, as defined by the `W3C Standard Proposal`_. In + practice, this means that the resulting code will not contain any + scripts, styles or other elements that can cause the website to behave + or look different. Later in this article, you'll learn how to + :ref:`fully customize the HTML sanitizer `. + +Sanitizing HTML for a Specific Context +-------------------------------------- + +The default :method:`Symfony\\Component\\HtmlSanitizer\\HtmlSanitizer::sanitize` +method cleans the HTML code for usage in the ```` element. Using the +:method:`Symfony\\Component\\HtmlSanitizer\\HtmlSanitizer::sanitizeFor` +method, you can instruct HTML sanitizer to customize this for the +```` or a more specific HTML tag:: + + // tags not allowed in will be removed + $safeInput = $htmlSanitizer->sanitizeFor('head', $userInput); + + // encodes the returned HTML using HTML entities + $safeInput = $htmlSanitizer->sanitizeFor('title', $userInput); + $safeInput = $htmlSanitizer->sanitizeFor('textarea', $userInput); + + // uses the context, removing tags only allowed in + $safeInput = $htmlSanitizer->sanitizeFor('body', $userInput); + $safeInput = $htmlSanitizer->sanitizeFor('section', $userInput); + +Sanitizing HTML from Form Input +------------------------------- + +The HTML sanitizer component directly integrates with Symfony Forms, to +sanitize the form input before it is processed by your application. + +You can enable the sanitizer in ``TextType`` forms, or any form extending +this type (such as ``TextareaType``), using the ``sanitize_html`` option:: + + // src/Form/BlogPostType.php + namespace App\Form; + + // ... + class BlogPostType extends AbstractType + { + // ... + + public function configureOptions(OptionsResolver $resolver): void + { + $resolver->setDefaults([ + 'sanitize_html' => true, + // use the "sanitizer" option to use a custom sanitizer (see below) + //'sanitizer' => 'app.post_sanitizer', + ]); + } + } + +.. _html-sanitizer-twig: + +Sanitizing HTML in Twig Templates +--------------------------------- + +Besides sanitizing user input, you can also sanitize HTML code before +outputting it in a Twig template using the ``sanitize_html()`` filter: + +.. code-block:: twig + + {{ post.body|sanitize_html }} + + {# you can also use a custom sanitizer (see below) #} + {{ post.body|sanitize_html('app.post_sanitizer') }} + +.. _html-sanitizer-configuration: + +Configuration +------------- + +The behavior of the HTML sanitizer can be fully customized. This allows you +to explicitly state which elements, attributes and even attribute values +are allowed. + +You can do this by defining a new HTML sanitizer in the configuration: + +.. configuration-block:: + + .. code-block:: yaml + + # config/packages/html_sanitizer.yaml + framework: + html_sanitizer: + sanitizers: + app.post_sanitizer: + block_elements: + - h1 + + .. code-block:: xml + + + + + + + + + + + + + + + .. code-block:: php + + // config/packages/framework.php + use Symfony\Config\FrameworkConfig; + + return static function (FrameworkConfig $framework) { + $framework->htmlSanitizer() + ->sanitizer('app.post_sanitizer') + ->blockElement('h1') + ; + }; + + .. code-block:: php-standalone + + use Symfony\Component\HtmlSanitizer\HtmlSanitizer; + use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; + + $postSanitizer = new HtmlSanitizer( + (new HtmlSanitizerConfig()) + ->blockElement('h1') + ); + +This configuration defines a new ``html_sanitizer.sanitizer.app.post_sanitizer`` +service. This service will be :doc:`autowired ` +for services having an ``HtmlSanitizerInterface $appPostSanitizer`` parameter. + +Allow Element Baselines +~~~~~~~~~~~~~~~~~~~~~~~ + +You can start the custom HTML sanitizer by using one of the two baselines: + +Static elements + All elements and attributes on the baseline allow lists from the + `W3C Standard Proposal`_ (this does not include scripts). +Safe elements + All elements and attributes from the "static elements" list, excluding + elements and attributes that can also lead to CSS + injection/click-jacking. + +.. configuration-block:: + + .. code-block:: yaml + + # config/packages/html_sanitizer.yaml + framework: + html_sanitizer: + sanitizers: + app.post_sanitizer: + # enable either of these + allow_safe_elements: true + allow_all_static_elements: true + + .. code-block:: xml + + + + + + + + + + + + + + .. code-block:: php + + // config/packages/framework.php + use Symfony\Config\FrameworkConfig; + + return static function (FrameworkConfig $framework) { + $framework->htmlSanitizer() + ->sanitizer('app.post_sanitizer') + // enable either of these + ->allowSafeElements(true) + ->allowAllStaticElements(true) + ; + }; + + .. code-block:: php-standalone + + use Symfony\Component\HtmlSanitizer\HtmlSanitizer; + use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; + + $postSanitizer = new HtmlSanitizer( + (new HtmlSanitizerConfig()) + // enable either of these + ->allowSafeElements() + ->allowAllStaticElements() + ); + +Allow Elements +~~~~~~~~~~~~~~ + +This adds elements to the allow list. For each element, you can also +specify the allowed attributes on that element. If not given, all allowed +attributes from the `W3C Standard Proposal`_ are allowed. + +.. configuration-block:: + + .. code-block:: yaml + + # config/packages/html_sanitizer.yaml + framework: + html_sanitizer: + sanitizers: + app.post_sanitizer: + # ... + allow_elements: + # allow the
element and 2 attributes + article: ['class', 'data-attr'] + # allow the element and preserve the src attribute + img: 'src' + # allow the

element with all safe attributes + h1: '*' + + .. code-block:: xml + + + + + + + + + + + + class + data-attr + + + + + src + + + + + * + + + + + + + .. code-block:: php + + // config/packages/framework.php + use Symfony\Config\FrameworkConfig; + + return static function (FrameworkConfig $framework) { + $framework->htmlSanitizer() + ->sanitizer('app.post_sanitizer') + // allow the
element and 2 attributes + ->allowElement('article') + ->attribute('class') + ->attribute('data-attr') + + // allow the element and preserve the src attribute + ->allowElement('img') + ->attribute('src') + + // allow the

element with all safe attributes + ->allowElement('h1', '*') + ; + }; + + .. code-block:: php-standalone + + use Symfony\Component\HtmlSanitizer\HtmlSanitizer; + use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; + + $postSanitizer = new HtmlSanitizer( + (new HtmlSanitizerConfig()) + // allow the
element and 2 attributes + ->allowElement('article', ['class', 'data-attr']) + + // allow the element and preserve the src attribute + ->allowElement('img', 'src') + + // allow the

element with all safe attributes + ->allowElement('h1') + ); + +Block and Drop Elements +~~~~~~~~~~~~~~~~~~~~~~~ + +You can also block (the element will be removed, but its children +will be kept) or drop (the element and its children will be removed) +elements. + +This can also be used to remove elements from the allow list. + +.. configuration-block:: + + .. code-block:: yaml + + # config/packages/html_sanitizer.yaml + framework: + html_sanitizer: + sanitizers: + app.post_sanitizer: + # ... + + # remove
, but process the children + block_elements: ['div'] + # remove
and its children + drop_elements: ['figure'] + + .. code-block:: xml + + + + + + + + + div + + + figure + + + + + .. code-block:: php + + // config/packages/framework.php + use Symfony\Config\FrameworkConfig; + + return static function (FrameworkConfig $framework) { + $framework->htmlSanitizer() + ->sanitizer('app.post_sanitizer') + // remove
, but process the children + ->blockElement('div') + // remove
and its children + ->dropElement('figure') + ; + }; + + .. code-block:: php-standalone + + use Symfony\Component\HtmlSanitizer\HtmlSanitizer; + use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; + + $postSanitizer = new HtmlSanitizer( + (new HtmlSanitizerConfig()) + // remove
, but process the children + ->blockElement('div') + // remove
and its children + ->dropElement('figure') + ); + +Allow Attributes +~~~~~~~~~~~~~~~~ + +Using this option, you can specify which attributes will be preserved in +the returned HTML. The attribute will be allowed on the given elements, or +on all elements allowed *before this setting*. + +.. configuration-block:: + + .. code-block:: yaml + + # config/packages/html_sanitizer.yaml + framework: + html_sanitizer: + sanitizers: + app.post_sanitizer: + # ... + allow_attributes: + # allow "src' on