Skip to content

Commit 2c5cf19

Browse files
petebacondarwinmhevery
authored andcommitted
fix(core): use appropriate inert document strategy for Firefox & Safari (#22077)
Both Firefox and Safari are vulnerable to XSS if we use an inert document created via `document.implementation.createHTMLDocument()`. Now we check for those vulnerabilities and then use a DOMParser or XHR strategy if needed. Further the platform-server has its own library for parsing HTML, so we sniff for that (by checking whether DOMParser exists) and fall back to the standard strategy. Thanks to @cure53 for the heads up on this issue.
1 parent 0dacf6d commit 2c5cf19

File tree

3 files changed

+248
-80
lines changed

3 files changed

+248
-80
lines changed

packages/platform-browser/src/security/html_sanitizer.ts

+36-80
Original file line numberDiff line numberDiff line change
@@ -10,35 +10,9 @@ import {isDevMode} from '@angular/core';
1010

1111
import {DomAdapter, getDOM} from '../dom/dom_adapter';
1212

13+
import {InertBodyHelper} from './inert_body';
1314
import {sanitizeSrcset, sanitizeUrl} from './url_sanitizer';
1415

15-
/** A <body> element that can be safely used to parse untrusted HTML. Lazily initialized below. */
16-
let inertElement: HTMLElement|null = null;
17-
/** Lazily initialized to make sure the DOM adapter gets set before use. */
18-
let DOM: DomAdapter = null !;
19-
20-
/** Returns an HTML element that is guaranteed to not execute code when creating elements in it. */
21-
function getInertElement() {
22-
if (inertElement) return inertElement;
23-
DOM = getDOM();
24-
25-
// Prefer using <template> element if supported.
26-
const templateEl = DOM.createElement('template');
27-
if ('content' in templateEl) return templateEl;
28-
29-
const doc = DOM.createHtmlDocument();
30-
inertElement = DOM.querySelector(doc, 'body');
31-
if (inertElement == null) {
32-
// usually there should be only one body element in the document, but IE doesn't have any, so we
33-
// need to create one.
34-
const html = DOM.createElement('html', doc);
35-
inertElement = DOM.createElement('body', doc);
36-
DOM.appendChild(html, inertElement);
37-
DOM.appendChild(doc, html);
38-
}
39-
return inertElement;
40-
}
41-
4216
function tagSet(tags: string): {[k: string]: boolean} {
4317
const res: {[k: string]: boolean} = {};
4418
for (const t of tags.split(',')) res[t] = true;
@@ -121,53 +95,54 @@ class SanitizingHtmlSerializer {
12195
// because characters were re-encoded.
12296
public sanitizedSomething = false;
12397
private buf: string[] = [];
98+
private DOM = getDOM();
12499

125100
sanitizeChildren(el: Element): string {
126101
// This cannot use a TreeWalker, as it has to run on Angular's various DOM adapters.
127102
// However this code never accesses properties off of `document` before deleting its contents
128103
// again, so it shouldn't be vulnerable to DOM clobbering.
129-
let current: Node = el.firstChild !;
104+
let current: Node = this.DOM.firstChild(el) !;
130105
while (current) {
131-
if (DOM.isElementNode(current)) {
106+
if (this.DOM.isElementNode(current)) {
132107
this.startElement(current as Element);
133-
} else if (DOM.isTextNode(current)) {
134-
this.chars(DOM.nodeValue(current) !);
108+
} else if (this.DOM.isTextNode(current)) {
109+
this.chars(this.DOM.nodeValue(current) !);
135110
} else {
136111
// Strip non-element, non-text nodes.
137112
this.sanitizedSomething = true;
138113
}
139-
if (DOM.firstChild(current)) {
140-
current = DOM.firstChild(current) !;
114+
if (this.DOM.firstChild(current)) {
115+
current = this.DOM.firstChild(current) !;
141116
continue;
142117
}
143118
while (current) {
144119
// Leaving the element. Walk up and to the right, closing tags as we go.
145-
if (DOM.isElementNode(current)) {
120+
if (this.DOM.isElementNode(current)) {
146121
this.endElement(current as Element);
147122
}
148123

149-
let next = checkClobberedElement(current, DOM.nextSibling(current) !);
124+
let next = this.checkClobberedElement(current, this.DOM.nextSibling(current) !);
150125

151126
if (next) {
152127
current = next;
153128
break;
154129
}
155130

156-
current = checkClobberedElement(current, DOM.parentElement(current) !);
131+
current = this.checkClobberedElement(current, this.DOM.parentElement(current) !);
157132
}
158133
}
159134
return this.buf.join('');
160135
}
161136

162137
private startElement(element: Element) {
163-
const tagName = DOM.nodeName(element).toLowerCase();
138+
const tagName = this.DOM.nodeName(element).toLowerCase();
164139
if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
165140
this.sanitizedSomething = true;
166141
return;
167142
}
168143
this.buf.push('<');
169144
this.buf.push(tagName);
170-
DOM.attributeMap(element).forEach((value: string, attrName: string) => {
145+
this.DOM.attributeMap(element).forEach((value: string, attrName: string) => {
171146
const lower = attrName.toLowerCase();
172147
if (!VALID_ATTRS.hasOwnProperty(lower)) {
173148
this.sanitizedSomething = true;
@@ -186,7 +161,7 @@ class SanitizingHtmlSerializer {
186161
}
187162

188163
private endElement(current: Element) {
189-
const tagName = DOM.nodeName(current).toLowerCase();
164+
const tagName = this.DOM.nodeName(current).toLowerCase();
190165
if (VALID_ELEMENTS.hasOwnProperty(tagName) && !VOID_ELEMENTS.hasOwnProperty(tagName)) {
191166
this.buf.push('</');
192167
this.buf.push(tagName);
@@ -195,14 +170,14 @@ class SanitizingHtmlSerializer {
195170
}
196171

197172
private chars(chars: string) { this.buf.push(encodeEntities(chars)); }
198-
}
199173

200-
function checkClobberedElement(node: Node, nextNode: Node): Node {
201-
if (nextNode && DOM.contains(node, nextNode)) {
202-
throw new Error(
203-
`Failed to sanitize html because the element is clobbered: ${DOM.getOuterHTML(node)}`);
174+
checkClobberedElement(node: Node, nextNode: Node): Node {
175+
if (nextNode && this.DOM.contains(node, nextNode)) {
176+
throw new Error(
177+
`Failed to sanitize html because the element is clobbered: ${this.DOM.getOuterHTML(node)}`);
178+
}
179+
return nextNode;
204180
}
205-
return nextNode;
206181
}
207182

208183
// Regular Expressions for parsing tags and attributes
@@ -233,33 +208,20 @@ function encodeEntities(value: string) {
233208
.replace(/>/g, '&gt;');
234209
}
235210

236-
/**
237-
* When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'
238-
* attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g. 'ns1:xlink:foo').
239-
*
240-
* This is undesirable since we don't want to allow any of these custom attributes. This method
241-
* strips them all.
242-
*/
243-
function stripCustomNsAttrs(el: Element) {
244-
DOM.attributeMap(el).forEach((_, attrName) => {
245-
if (attrName === 'xmlns:ns1' || attrName.indexOf('ns1:') === 0) {
246-
DOM.removeAttribute(el, attrName);
247-
}
248-
});
249-
for (const n of DOM.childNodesAsList(el)) {
250-
if (DOM.isElementNode(n)) stripCustomNsAttrs(n as Element);
251-
}
252-
}
211+
let inertBodyHelper: InertBodyHelper;
253212

254213
/**
255214
* Sanitizes the given unsafe, untrusted HTML fragment, and returns HTML text that is safe to add to
256215
* the DOM in a browser environment.
257216
*/
258217
export function sanitizeHtml(defaultDoc: any, unsafeHtmlInput: string): string {
218+
const DOM = getDOM();
219+
let inertBodyElement: HTMLElement|null = null;
259220
try {
260-
const containerEl = getInertElement();
221+
inertBodyHelper = inertBodyHelper || new InertBodyHelper(defaultDoc, DOM);
261222
// Make sure unsafeHtml is actually a string (TypeScript types are not enforced at runtime).
262223
let unsafeHtml = unsafeHtmlInput ? String(unsafeHtmlInput) : '';
224+
inertBodyElement = inertBodyHelper.getInertBodyElement(unsafeHtml);
263225

264226
// mXSS protection. Repeatedly parse the document to make sure it stabilizes, so that a browser
265227
// trying to auto-correct incorrect HTML cannot cause formerly inert HTML to become dangerous.
@@ -273,31 +235,25 @@ export function sanitizeHtml(defaultDoc: any, unsafeHtmlInput: string): string {
273235
mXSSAttempts--;
274236

275237
unsafeHtml = parsedHtml;
276-
DOM.setInnerHTML(containerEl, unsafeHtml);
277-
if (defaultDoc.documentMode) {
278-
// strip custom-namespaced attributes on IE<=11
279-
stripCustomNsAttrs(containerEl);
280-
}
281-
parsedHtml = DOM.getInnerHTML(containerEl);
238+
parsedHtml = DOM.getInnerHTML(inertBodyElement);
239+
inertBodyElement = inertBodyHelper.getInertBodyElement(unsafeHtml);
282240
} while (unsafeHtml !== parsedHtml);
283241

284242
const sanitizer = new SanitizingHtmlSerializer();
285-
const safeHtml = sanitizer.sanitizeChildren(DOM.getTemplateContent(containerEl) || containerEl);
286-
287-
// Clear out the body element.
288-
const parent = DOM.getTemplateContent(containerEl) || containerEl;
289-
for (const child of DOM.childNodesAsList(parent)) {
290-
DOM.removeChild(parent, child);
291-
}
292-
243+
const safeHtml =
244+
sanitizer.sanitizeChildren(DOM.getTemplateContent(inertBodyElement) || inertBodyElement);
293245
if (isDevMode() && sanitizer.sanitizedSomething) {
294246
DOM.log('WARNING: sanitizing HTML stripped some content (see http://g.co/ng/security#xss).');
295247
}
296248

297249
return safeHtml;
298-
} catch (e) {
250+
} finally {
299251
// In case anything goes wrong, clear out inertElement to reset the entire DOM structure.
300-
inertElement = null;
301-
throw e;
252+
if (inertBodyElement) {
253+
const parent = DOM.getTemplateContent(inertBodyElement) || inertBodyElement;
254+
for (const child of DOM.childNodesAsList(parent)) {
255+
DOM.removeChild(parent, child);
256+
}
257+
}
302258
}
303259
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/**
2+
* @license
3+
* Copyright Google Inc. All Rights Reserved.
4+
*
5+
* Use of this source code is governed by an MIT-style license that can be
6+
* found in the LICENSE file at https://angular.io/license
7+
*/
8+
9+
import {DomAdapter, getDOM} from '../dom/dom_adapter';
10+
11+
/**
12+
* This helper class is used to get hold of an inert tree of DOM elements containing dirty HTML
13+
* that needs sanitizing.
14+
* Depending upon browser support we must use one of three strategies for doing this.
15+
* Support: Safari 10.x -> XHR strategy
16+
* Support: Firefox -> DomParser strategy
17+
* Default: InertDocument strategy
18+
*/
19+
export class InertBodyHelper {
20+
private inertBodyElement: HTMLElement;
21+
22+
constructor(private defaultDoc: any, private DOM: DomAdapter) {
23+
const inertDocument = this.DOM.createHtmlDocument();
24+
this.inertBodyElement = inertDocument.body;
25+
26+
if (this.inertBodyElement == null) {
27+
// usually there should be only one body element in the document, but IE doesn't have any, so
28+
// we need to create one.
29+
const inertHtml = this.DOM.createElement('html', inertDocument);
30+
this.inertBodyElement = this.DOM.createElement('body', inertDocument);
31+
this.DOM.appendChild(inertHtml, this.inertBodyElement);
32+
this.DOM.appendChild(inertDocument, inertHtml);
33+
}
34+
35+
this.DOM.setInnerHTML(
36+
this.inertBodyElement, '<svg><g onload="this.parentNode.remove()"></g></svg>');
37+
if (this.inertBodyElement.querySelector && !this.inertBodyElement.querySelector('svg')) {
38+
// We just hit the Safari 10.1 bug - which allows JS to run inside the SVG G element
39+
// so use the XHR strategy.
40+
this.getInertBodyElement = this.getInertBodyElement_XHR;
41+
return;
42+
}
43+
44+
this.DOM.setInnerHTML(
45+
this.inertBodyElement, '<svg><p><style><img src="</style><img src=x onerror=alert(1)//">');
46+
if (this.inertBodyElement.querySelector && this.inertBodyElement.querySelector('svg img')) {
47+
// We just hit the Firefox bug - which prevents the inner img JS from being sanitized
48+
// so use the DOMParser strategy, if it is available.
49+
// If the DOMParser is not available then we are not in Firefox (Server/WebWorker?) so we
50+
// fall through to the default strategy below.
51+
if (isDOMParserAvailable()) {
52+
this.getInertBodyElement = this.getInertBodyElement_DOMParser;
53+
return;
54+
}
55+
}
56+
57+
// None of the bugs were hit so it is safe for us to use the default InertDocument strategy
58+
this.getInertBodyElement = this.getInertBodyElement_InertDocument;
59+
}
60+
61+
/**
62+
* Get an inert DOM element containing DOM created from the dirty HTML string provided.
63+
* The implementation of this is determined in the constructor, when the class is instantiated.
64+
*/
65+
getInertBodyElement: (html: string) => HTMLElement | null;
66+
67+
/**
68+
* Use XHR to create and fill an inert body element (on Safari 10.1)
69+
* See
70+
* https://github.com/cure53/DOMPurify/blob/a992d3a75031cb8bb032e5ea8399ba972bdf9a65/src/purify.js#L439-L449
71+
*/
72+
private getInertBodyElement_XHR(html: string) {
73+
// We add these extra elements to ensure that the rest of the content is parsed as expected
74+
// e.g. leading whitespace is maintained and tags like `<meta>` do not get hoisted to the
75+
// `<head>` tag.
76+
html = '<body><remove></remove>' + html + '</body>';
77+
try {
78+
html = encodeURI(html);
79+
} catch (e) {
80+
return null;
81+
}
82+
const xhr = new XMLHttpRequest();
83+
xhr.responseType = 'document';
84+
xhr.open('GET', 'data:text/html;charset=utf-8,' + html, false);
85+
xhr.send(null);
86+
const body: HTMLBodyElement = xhr.response.body;
87+
body.removeChild(body.firstChild !);
88+
return body;
89+
}
90+
91+
/**
92+
* Use DOMParser to create and fill an inert body element (on Firefox)
93+
* See https://github.com/cure53/DOMPurify/releases/tag/0.6.7
94+
*
95+
*/
96+
private getInertBodyElement_DOMParser(html: string) {
97+
// We add these extra elements to ensure that the rest of the content is parsed as expected
98+
// e.g. leading whitespace is maintained and tags like `<meta>` do not get hoisted to the
99+
// `<head>` tag.
100+
html = '<body><remove></remove>' + html + '</body>';
101+
try {
102+
const body = new (window as any)
103+
.DOMParser()
104+
.parseFromString(html, 'text/html')
105+
.body as HTMLBodyElement;
106+
body.removeChild(body.firstChild !);
107+
return body;
108+
} catch (e) {
109+
return null;
110+
}
111+
}
112+
113+
/**
114+
* Use an HTML5 `template` element, if supported, or an inert body element created via
115+
* `createHtmlDocument` to create and fill an inert DOM element.
116+
* This is the default sane strategy to use if the browser does not require one of the specialised
117+
* strategies above.
118+
*/
119+
private getInertBodyElement_InertDocument(html: string) {
120+
// Prefer using <template> element if supported.
121+
const templateEl = this.DOM.createElement('template');
122+
if ('content' in templateEl) {
123+
this.DOM.setInnerHTML(templateEl, html);
124+
return templateEl;
125+
}
126+
127+
this.DOM.setInnerHTML(this.inertBodyElement, html);
128+
129+
// Support: IE 9-11 only
130+
// strip custom-namespaced attributes on IE<=11
131+
if (this.defaultDoc.documentMode) {
132+
this.stripCustomNsAttrs(this.inertBodyElement);
133+
}
134+
135+
return this.inertBodyElement;
136+
}
137+
138+
/**
139+
* When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'
140+
* attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g.
141+
* 'ns1:xlink:foo').
142+
*
143+
* This is undesirable since we don't want to allow any of these custom attributes. This method
144+
* strips them all.
145+
*/
146+
private stripCustomNsAttrs(el: Element) {
147+
this.DOM.attributeMap(el).forEach((_, attrName) => {
148+
if (attrName === 'xmlns:ns1' || attrName.indexOf('ns1:') === 0) {
149+
this.DOM.removeAttribute(el, attrName);
150+
}
151+
});
152+
for (const n of this.DOM.childNodesAsList(el)) {
153+
if (this.DOM.isElementNode(n)) this.stripCustomNsAttrs(n as Element);
154+
}
155+
}
156+
}
157+
158+
/**
159+
* We need to determine whether the DOMParser exists in the global context.
160+
* The try-catch is because, on some browsers, trying to access this property
161+
* on window can actually throw an error.
162+
*
163+
* @suppress {uselessCode}
164+
*/
165+
function isDOMParserAvailable() {
166+
try {
167+
return !!(window as any).DOMParser;
168+
} catch (e) {
169+
return false;
170+
}
171+
}

0 commit comments

Comments
 (0)