@@ -10,35 +10,9 @@ import {isDevMode} from '@angular/core';
10
10
11
11
import { DomAdapter , getDOM } from '../dom/dom_adapter' ;
12
12
13
+ import { InertBodyHelper } from './inert_body' ;
13
14
import { sanitizeSrcset , sanitizeUrl } from './url_sanitizer' ;
14
15
15
- /** A <body> element that can be safely used to parse untrusted HTML. Lazily initialized below. */
16
- let inertElement : HTMLElement | null = null ;
17
- /** Lazily initialized to make sure the DOM adapter gets set before use. */
18
- let DOM : DomAdapter = null ! ;
19
-
20
- /** Returns an HTML element that is guaranteed to not execute code when creating elements in it. */
21
- function getInertElement ( ) {
22
- if ( inertElement ) return inertElement ;
23
- DOM = getDOM ( ) ;
24
-
25
- // Prefer using <template> element if supported.
26
- const templateEl = DOM . createElement ( 'template' ) ;
27
- if ( 'content' in templateEl ) return templateEl ;
28
-
29
- const doc = DOM . createHtmlDocument ( ) ;
30
- inertElement = DOM . querySelector ( doc , 'body' ) ;
31
- if ( inertElement == null ) {
32
- // usually there should be only one body element in the document, but IE doesn't have any, so we
33
- // need to create one.
34
- const html = DOM . createElement ( 'html' , doc ) ;
35
- inertElement = DOM . createElement ( 'body' , doc ) ;
36
- DOM . appendChild ( html , inertElement ) ;
37
- DOM . appendChild ( doc , html ) ;
38
- }
39
- return inertElement ;
40
- }
41
-
42
16
function tagSet ( tags : string ) : { [ k : string ] : boolean } {
43
17
const res : { [ k : string ] : boolean } = { } ;
44
18
for ( const t of tags . split ( ',' ) ) res [ t ] = true ;
@@ -121,53 +95,54 @@ class SanitizingHtmlSerializer {
121
95
// because characters were re-encoded.
122
96
public sanitizedSomething = false ;
123
97
private buf : string [ ] = [ ] ;
98
+ private DOM = getDOM ( ) ;
124
99
125
100
sanitizeChildren ( el : Element ) : string {
126
101
// This cannot use a TreeWalker, as it has to run on Angular's various DOM adapters.
127
102
// However this code never accesses properties off of `document` before deleting its contents
128
103
// again, so it shouldn't be vulnerable to DOM clobbering.
129
- let current : Node = el . firstChild ! ;
104
+ let current : Node = this . DOM . firstChild ( el ) ! ;
130
105
while ( current ) {
131
- if ( DOM . isElementNode ( current ) ) {
106
+ if ( this . DOM . isElementNode ( current ) ) {
132
107
this . startElement ( current as Element ) ;
133
- } else if ( DOM . isTextNode ( current ) ) {
134
- this . chars ( DOM . nodeValue ( current ) ! ) ;
108
+ } else if ( this . DOM . isTextNode ( current ) ) {
109
+ this . chars ( this . DOM . nodeValue ( current ) ! ) ;
135
110
} else {
136
111
// Strip non-element, non-text nodes.
137
112
this . sanitizedSomething = true ;
138
113
}
139
- if ( DOM . firstChild ( current ) ) {
140
- current = DOM . firstChild ( current ) ! ;
114
+ if ( this . DOM . firstChild ( current ) ) {
115
+ current = this . DOM . firstChild ( current ) ! ;
141
116
continue ;
142
117
}
143
118
while ( current ) {
144
119
// Leaving the element. Walk up and to the right, closing tags as we go.
145
- if ( DOM . isElementNode ( current ) ) {
120
+ if ( this . DOM . isElementNode ( current ) ) {
146
121
this . endElement ( current as Element ) ;
147
122
}
148
123
149
- let next = checkClobberedElement ( current , DOM . nextSibling ( current ) ! ) ;
124
+ let next = this . checkClobberedElement ( current , this . DOM . nextSibling ( current ) ! ) ;
150
125
151
126
if ( next ) {
152
127
current = next ;
153
128
break ;
154
129
}
155
130
156
- current = checkClobberedElement ( current , DOM . parentElement ( current ) ! ) ;
131
+ current = this . checkClobberedElement ( current , this . DOM . parentElement ( current ) ! ) ;
157
132
}
158
133
}
159
134
return this . buf . join ( '' ) ;
160
135
}
161
136
162
137
private startElement ( element : Element ) {
163
- const tagName = DOM . nodeName ( element ) . toLowerCase ( ) ;
138
+ const tagName = this . DOM . nodeName ( element ) . toLowerCase ( ) ;
164
139
if ( ! VALID_ELEMENTS . hasOwnProperty ( tagName ) ) {
165
140
this . sanitizedSomething = true ;
166
141
return ;
167
142
}
168
143
this . buf . push ( '<' ) ;
169
144
this . buf . push ( tagName ) ;
170
- DOM . attributeMap ( element ) . forEach ( ( value : string , attrName : string ) => {
145
+ this . DOM . attributeMap ( element ) . forEach ( ( value : string , attrName : string ) => {
171
146
const lower = attrName . toLowerCase ( ) ;
172
147
if ( ! VALID_ATTRS . hasOwnProperty ( lower ) ) {
173
148
this . sanitizedSomething = true ;
@@ -186,7 +161,7 @@ class SanitizingHtmlSerializer {
186
161
}
187
162
188
163
private endElement ( current : Element ) {
189
- const tagName = DOM . nodeName ( current ) . toLowerCase ( ) ;
164
+ const tagName = this . DOM . nodeName ( current ) . toLowerCase ( ) ;
190
165
if ( VALID_ELEMENTS . hasOwnProperty ( tagName ) && ! VOID_ELEMENTS . hasOwnProperty ( tagName ) ) {
191
166
this . buf . push ( '</' ) ;
192
167
this . buf . push ( tagName ) ;
@@ -195,14 +170,14 @@ class SanitizingHtmlSerializer {
195
170
}
196
171
197
172
private chars ( chars : string ) { this . buf . push ( encodeEntities ( chars ) ) ; }
198
- }
199
173
200
- function checkClobberedElement ( node : Node , nextNode : Node ) : Node {
201
- if ( nextNode && DOM . contains ( node , nextNode ) ) {
202
- throw new Error (
203
- `Failed to sanitize html because the element is clobbered: ${ DOM . getOuterHTML ( node ) } ` ) ;
174
+ checkClobberedElement ( node : Node , nextNode : Node ) : Node {
175
+ if ( nextNode && this . DOM . contains ( node , nextNode ) ) {
176
+ throw new Error (
177
+ `Failed to sanitize html because the element is clobbered: ${ this . DOM . getOuterHTML ( node ) } ` ) ;
178
+ }
179
+ return nextNode ;
204
180
}
205
- return nextNode ;
206
181
}
207
182
208
183
// Regular Expressions for parsing tags and attributes
@@ -232,33 +207,20 @@ function encodeEntities(value: string) {
232
207
. replace ( / > / g, '>' ) ;
233
208
}
234
209
235
- /**
236
- * When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'
237
- * attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g. 'ns1:xlink:foo').
238
- *
239
- * This is undesirable since we don't want to allow any of these custom attributes. This method
240
- * strips them all.
241
- */
242
- function stripCustomNsAttrs ( el : Element ) {
243
- DOM . attributeMap ( el ) . forEach ( ( _ , attrName ) => {
244
- if ( attrName === 'xmlns:ns1' || attrName . indexOf ( 'ns1:' ) === 0 ) {
245
- DOM . removeAttribute ( el , attrName ) ;
246
- }
247
- } ) ;
248
- for ( const n of DOM . childNodesAsList ( el ) ) {
249
- if ( DOM . isElementNode ( n ) ) stripCustomNsAttrs ( n as Element ) ;
250
- }
251
- }
210
+ let inertBodyHelper : InertBodyHelper ;
252
211
253
212
/**
254
213
* Sanitizes the given unsafe, untrusted HTML fragment, and returns HTML text that is safe to add to
255
214
* the DOM in a browser environment.
256
215
*/
257
216
export function sanitizeHtml ( defaultDoc : any , unsafeHtmlInput : string ) : string {
217
+ const DOM = getDOM ( ) ;
218
+ let inertBodyElement : HTMLElement | null = null ;
258
219
try {
259
- const containerEl = getInertElement ( ) ;
220
+ inertBodyHelper = inertBodyHelper || new InertBodyHelper ( defaultDoc , DOM ) ;
260
221
// Make sure unsafeHtml is actually a string (TypeScript types are not enforced at runtime).
261
222
let unsafeHtml = unsafeHtmlInput ? String ( unsafeHtmlInput ) : '' ;
223
+ inertBodyElement = inertBodyHelper . getInertBodyElement ( unsafeHtml ) ;
262
224
263
225
// mXSS protection. Repeatedly parse the document to make sure it stabilizes, so that a browser
264
226
// trying to auto-correct incorrect HTML cannot cause formerly inert HTML to become dangerous.
@@ -272,31 +234,25 @@ export function sanitizeHtml(defaultDoc: any, unsafeHtmlInput: string): string {
272
234
mXSSAttempts -- ;
273
235
274
236
unsafeHtml = parsedHtml ;
275
- DOM . setInnerHTML ( containerEl , unsafeHtml ) ;
276
- if ( defaultDoc . documentMode ) {
277
- // strip custom-namespaced attributes on IE<=11
278
- stripCustomNsAttrs ( containerEl ) ;
279
- }
280
- parsedHtml = DOM . getInnerHTML ( containerEl ) ;
237
+ parsedHtml = DOM . getInnerHTML ( inertBodyElement ) ;
238
+ inertBodyElement = inertBodyHelper . getInertBodyElement ( unsafeHtml ) ;
281
239
} while ( unsafeHtml !== parsedHtml ) ;
282
240
283
241
const sanitizer = new SanitizingHtmlSerializer ( ) ;
284
- const safeHtml = sanitizer . sanitizeChildren ( DOM . getTemplateContent ( containerEl ) || containerEl ) ;
285
-
286
- // Clear out the body element.
287
- const parent = DOM . getTemplateContent ( containerEl ) || containerEl ;
288
- for ( const child of DOM . childNodesAsList ( parent ) ) {
289
- DOM . removeChild ( parent , child ) ;
290
- }
291
-
242
+ const safeHtml =
243
+ sanitizer . sanitizeChildren ( DOM . getTemplateContent ( inertBodyElement ) || inertBodyElement ) ;
292
244
if ( isDevMode ( ) && sanitizer . sanitizedSomething ) {
293
245
DOM . log ( 'WARNING: sanitizing HTML stripped some content (see http://g.co/ng/security#xss).' ) ;
294
246
}
295
247
296
248
return safeHtml ;
297
- } catch ( e ) {
249
+ } finally {
298
250
// In case anything goes wrong, clear out inertElement to reset the entire DOM structure.
299
- inertElement = null ;
300
- throw e ;
251
+ if ( inertBodyElement ) {
252
+ const parent = DOM . getTemplateContent ( inertBodyElement ) || inertBodyElement ;
253
+ for ( const child of DOM . childNodesAsList ( parent ) ) {
254
+ DOM . removeChild ( parent , child ) ;
255
+ }
256
+ }
301
257
}
302
258
}
0 commit comments