@@ -10,35 +10,9 @@ import {isDevMode} from '@angular/core';
10
10
11
11
import { DomAdapter , getDOM } from '../dom/dom_adapter' ;
12
12
13
+ import { InertBodyHelper } from './inert_body' ;
13
14
import { sanitizeSrcset , sanitizeUrl } from './url_sanitizer' ;
14
15
15
- /** A <body> element that can be safely used to parse untrusted HTML. Lazily initialized below. */
16
- let inertElement : HTMLElement | null = null ;
17
- /** Lazily initialized to make sure the DOM adapter gets set before use. */
18
- let DOM : DomAdapter = null ! ;
19
-
20
- /** Returns an HTML element that is guaranteed to not execute code when creating elements in it. */
21
- function getInertElement ( ) {
22
- if ( inertElement ) return inertElement ;
23
- DOM = getDOM ( ) ;
24
-
25
- // Prefer using <template> element if supported.
26
- const templateEl = DOM . createElement ( 'template' ) ;
27
- if ( 'content' in templateEl ) return templateEl ;
28
-
29
- const doc = DOM . createHtmlDocument ( ) ;
30
- inertElement = DOM . querySelector ( doc , 'body' ) ;
31
- if ( inertElement == null ) {
32
- // usually there should be only one body element in the document, but IE doesn't have any, so we
33
- // need to create one.
34
- const html = DOM . createElement ( 'html' , doc ) ;
35
- inertElement = DOM . createElement ( 'body' , doc ) ;
36
- DOM . appendChild ( html , inertElement ) ;
37
- DOM . appendChild ( doc , html ) ;
38
- }
39
- return inertElement ;
40
- }
41
-
42
16
function tagSet ( tags : string ) : { [ k : string ] : boolean } {
43
17
const res : { [ k : string ] : boolean } = { } ;
44
18
for ( const t of tags . split ( ',' ) ) res [ t ] = true ;
@@ -121,53 +95,54 @@ class SanitizingHtmlSerializer {
121
95
// because characters were re-encoded.
122
96
public sanitizedSomething = false ;
123
97
private buf : string [ ] = [ ] ;
98
+ private DOM = getDOM ( ) ;
124
99
125
100
sanitizeChildren ( el : Element ) : string {
126
101
// This cannot use a TreeWalker, as it has to run on Angular's various DOM adapters.
127
102
// However this code never accesses properties off of `document` before deleting its contents
128
103
// again, so it shouldn't be vulnerable to DOM clobbering.
129
- let current : Node = el . firstChild ! ;
104
+ let current : Node = this . DOM . firstChild ( el ) ! ;
130
105
while ( current ) {
131
- if ( DOM . isElementNode ( current ) ) {
106
+ if ( this . DOM . isElementNode ( current ) ) {
132
107
this . startElement ( current as Element ) ;
133
- } else if ( DOM . isTextNode ( current ) ) {
134
- this . chars ( DOM . nodeValue ( current ) ! ) ;
108
+ } else if ( this . DOM . isTextNode ( current ) ) {
109
+ this . chars ( this . DOM . nodeValue ( current ) ! ) ;
135
110
} else {
136
111
// Strip non-element, non-text nodes.
137
112
this . sanitizedSomething = true ;
138
113
}
139
- if ( DOM . firstChild ( current ) ) {
140
- current = DOM . firstChild ( current ) ! ;
114
+ if ( this . DOM . firstChild ( current ) ) {
115
+ current = this . DOM . firstChild ( current ) ! ;
141
116
continue ;
142
117
}
143
118
while ( current ) {
144
119
// Leaving the element. Walk up and to the right, closing tags as we go.
145
- if ( DOM . isElementNode ( current ) ) {
120
+ if ( this . DOM . isElementNode ( current ) ) {
146
121
this . endElement ( current as Element ) ;
147
122
}
148
123
149
- let next = checkClobberedElement ( current , DOM . nextSibling ( current ) ! ) ;
124
+ let next = this . checkClobberedElement ( current , this . DOM . nextSibling ( current ) ! ) ;
150
125
151
126
if ( next ) {
152
127
current = next ;
153
128
break ;
154
129
}
155
130
156
- current = checkClobberedElement ( current , DOM . parentElement ( current ) ! ) ;
131
+ current = this . checkClobberedElement ( current , this . DOM . parentElement ( current ) ! ) ;
157
132
}
158
133
}
159
134
return this . buf . join ( '' ) ;
160
135
}
161
136
162
137
private startElement ( element : Element ) {
163
- const tagName = DOM . nodeName ( element ) . toLowerCase ( ) ;
138
+ const tagName = this . DOM . nodeName ( element ) . toLowerCase ( ) ;
164
139
if ( ! VALID_ELEMENTS . hasOwnProperty ( tagName ) ) {
165
140
this . sanitizedSomething = true ;
166
141
return ;
167
142
}
168
143
this . buf . push ( '<' ) ;
169
144
this . buf . push ( tagName ) ;
170
- DOM . attributeMap ( element ) . forEach ( ( value : string , attrName : string ) => {
145
+ this . DOM . attributeMap ( element ) . forEach ( ( value : string , attrName : string ) => {
171
146
const lower = attrName . toLowerCase ( ) ;
172
147
if ( ! VALID_ATTRS . hasOwnProperty ( lower ) ) {
173
148
this . sanitizedSomething = true ;
@@ -186,7 +161,7 @@ class SanitizingHtmlSerializer {
186
161
}
187
162
188
163
private endElement ( current : Element ) {
189
- const tagName = DOM . nodeName ( current ) . toLowerCase ( ) ;
164
+ const tagName = this . DOM . nodeName ( current ) . toLowerCase ( ) ;
190
165
if ( VALID_ELEMENTS . hasOwnProperty ( tagName ) && ! VOID_ELEMENTS . hasOwnProperty ( tagName ) ) {
191
166
this . buf . push ( '</' ) ;
192
167
this . buf . push ( tagName ) ;
@@ -195,14 +170,14 @@ class SanitizingHtmlSerializer {
195
170
}
196
171
197
172
private chars ( chars : string ) { this . buf . push ( encodeEntities ( chars ) ) ; }
198
- }
199
173
200
- function checkClobberedElement ( node : Node , nextNode : Node ) : Node {
201
- if ( nextNode && DOM . contains ( node , nextNode ) ) {
202
- throw new Error (
203
- `Failed to sanitize html because the element is clobbered: ${ DOM . getOuterHTML ( node ) } ` ) ;
174
+ checkClobberedElement ( node : Node , nextNode : Node ) : Node {
175
+ if ( nextNode && this . DOM . contains ( node , nextNode ) ) {
176
+ throw new Error (
177
+ `Failed to sanitize html because the element is clobbered: ${ this . DOM . getOuterHTML ( node ) } ` ) ;
178
+ }
179
+ return nextNode ;
204
180
}
205
- return nextNode ;
206
181
}
207
182
208
183
// Regular Expressions for parsing tags and attributes
@@ -233,33 +208,20 @@ function encodeEntities(value: string) {
233
208
. replace ( / > / g, '>' ) ;
234
209
}
235
210
236
- /**
237
- * When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'
238
- * attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g. 'ns1:xlink:foo').
239
- *
240
- * This is undesirable since we don't want to allow any of these custom attributes. This method
241
- * strips them all.
242
- */
243
- function stripCustomNsAttrs ( el : Element ) {
244
- DOM . attributeMap ( el ) . forEach ( ( _ , attrName ) => {
245
- if ( attrName === 'xmlns:ns1' || attrName . indexOf ( 'ns1:' ) === 0 ) {
246
- DOM . removeAttribute ( el , attrName ) ;
247
- }
248
- } ) ;
249
- for ( const n of DOM . childNodesAsList ( el ) ) {
250
- if ( DOM . isElementNode ( n ) ) stripCustomNsAttrs ( n as Element ) ;
251
- }
252
- }
211
+ let inertBodyHelper : InertBodyHelper ;
253
212
254
213
/**
255
214
* Sanitizes the given unsafe, untrusted HTML fragment, and returns HTML text that is safe to add to
256
215
* the DOM in a browser environment.
257
216
*/
258
217
export function sanitizeHtml ( defaultDoc : any , unsafeHtmlInput : string ) : string {
218
+ const DOM = getDOM ( ) ;
219
+ let inertBodyElement : HTMLElement | null = null ;
259
220
try {
260
- const containerEl = getInertElement ( ) ;
221
+ inertBodyHelper = inertBodyHelper || new InertBodyHelper ( defaultDoc , DOM ) ;
261
222
// Make sure unsafeHtml is actually a string (TypeScript types are not enforced at runtime).
262
223
let unsafeHtml = unsafeHtmlInput ? String ( unsafeHtmlInput ) : '' ;
224
+ inertBodyElement = inertBodyHelper . getInertBodyElement ( unsafeHtml ) ;
263
225
264
226
// mXSS protection. Repeatedly parse the document to make sure it stabilizes, so that a browser
265
227
// trying to auto-correct incorrect HTML cannot cause formerly inert HTML to become dangerous.
@@ -273,31 +235,25 @@ export function sanitizeHtml(defaultDoc: any, unsafeHtmlInput: string): string {
273
235
mXSSAttempts -- ;
274
236
275
237
unsafeHtml = parsedHtml ;
276
- DOM . setInnerHTML ( containerEl , unsafeHtml ) ;
277
- if ( defaultDoc . documentMode ) {
278
- // strip custom-namespaced attributes on IE<=11
279
- stripCustomNsAttrs ( containerEl ) ;
280
- }
281
- parsedHtml = DOM . getInnerHTML ( containerEl ) ;
238
+ parsedHtml = DOM . getInnerHTML ( inertBodyElement ) ;
239
+ inertBodyElement = inertBodyHelper . getInertBodyElement ( unsafeHtml ) ;
282
240
} while ( unsafeHtml !== parsedHtml ) ;
283
241
284
242
const sanitizer = new SanitizingHtmlSerializer ( ) ;
285
- const safeHtml = sanitizer . sanitizeChildren ( DOM . getTemplateContent ( containerEl ) || containerEl ) ;
286
-
287
- // Clear out the body element.
288
- const parent = DOM . getTemplateContent ( containerEl ) || containerEl ;
289
- for ( const child of DOM . childNodesAsList ( parent ) ) {
290
- DOM . removeChild ( parent , child ) ;
291
- }
292
-
243
+ const safeHtml =
244
+ sanitizer . sanitizeChildren ( DOM . getTemplateContent ( inertBodyElement ) || inertBodyElement ) ;
293
245
if ( isDevMode ( ) && sanitizer . sanitizedSomething ) {
294
246
DOM . log ( 'WARNING: sanitizing HTML stripped some content (see http://g.co/ng/security#xss).' ) ;
295
247
}
296
248
297
249
return safeHtml ;
298
- } catch ( e ) {
250
+ } finally {
299
251
// In case anything goes wrong, clear out inertElement to reset the entire DOM structure.
300
- inertElement = null ;
301
- throw e ;
252
+ if ( inertBodyElement ) {
253
+ const parent = DOM . getTemplateContent ( inertBodyElement ) || inertBodyElement ;
254
+ for ( const child of DOM . childNodesAsList ( parent ) ) {
255
+ DOM . removeChild ( parent , child ) ;
256
+ }
257
+ }
302
258
}
303
259
}
0 commit comments