Skip to content

Commit 3256fe6

Browse files
committed
Backport XSS patches from 3.0-stable
1 parent 2586d92 commit 3256fe6

File tree

1 file changed

+89
-74
lines changed

1 file changed

+89
-74
lines changed

system/core/Security.php

Lines changed: 89 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ public function xss_clean($str, $is_image = FALSE)
355355
$words = array(
356356
'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
357357
'vbs', 'script', 'base64', 'applet', 'alert', 'document',
358-
'write', 'cookie', 'window', 'confirm', 'prompt'
358+
'write', 'cookie', 'window', 'confirm', 'prompt', 'eval'
359359
);
360360

361361
foreach ($words as $word)
@@ -399,12 +399,8 @@ public function xss_clean($str, $is_image = FALSE)
399399
}
400400
}
401401
while($original !== $str);
402-
403402
unset($original);
404403

405-
// Remove evil attributes such as style, onclick and xmlns
406-
$str = $this->_remove_evil_attributes($str, $is_image);
407-
408404
/*
409405
* Sanitize naughty HTML elements
410406
*
@@ -414,8 +410,29 @@ public function xss_clean($str, $is_image = FALSE)
414410
* So this: <blink>
415411
* Becomes: &lt;blink&gt;
416412
*/
417-
$naughty = 'alert|prompt|confirm|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|button|select|isindex|layer|link|meta|keygen|object|plaintext|style|script|textarea|title|math|video|svg|xml|xss';
418-
$str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
413+
$pattern = '#'
414+
.'<((?<slash>/*\s*)(?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)' // tag start and name, followed by a non-tag character
415+
.'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
416+
// optional attributes
417+
.'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
418+
.'[^\s\042\047>/=]+' // attribute characters
419+
// optional attribute-value
420+
.'(?:\s*=' // attribute-value separator
421+
.'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
422+
.')?' // end optional attribute-value group
423+
.')*)' // end optional attributes group
424+
.'[^>]*)(?<closeTag>\>)?#isS';
425+
426+
// Note: It would be nice to optimize this for speed, BUT
427+
// only matching the naughty elements here results in
428+
// false positives and in turn - vulnerabilities!
429+
do
430+
{
431+
$old_str = $str;
432+
$str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
433+
}
434+
while ($old_str !== $str);
435+
unset($old_str);
419436

420437
/*
421438
* Sanitize naughty scripting elements
@@ -626,82 +643,80 @@ protected function _compact_exploded_words($matches)
626643

627644
// --------------------------------------------------------------------
628645

629-
/*
630-
* Remove Evil HTML Attributes (like evenhandlers and style)
646+
/**
647+
* Sanitize Naughty HTML
631648
*
632-
* It removes the evil attribute and either:
633-
* - Everything up until a space
634-
* For example, everything between the pipes:
635-
* <a |style=document.write('hello');alert('world');| class=link>
636-
* - Everything inside the quotes
637-
* For example, everything between the pipes:
638-
* <a |style="document.write('hello'); alert('world');"| class="link">
649+
* Callback function for xss_clean() to remove naughty HTML elements
639650
*
640-
* @param string $str The string to check
641-
* @param boolean $is_image TRUE if this is an image
642-
* @return string The string with the evil attributes removed
651+
* @param array
652+
* @return string
643653
*/
644-
protected function _remove_evil_attributes($str, $is_image)
654+
protected function _sanitize_naughty_html($matches)
645655
{
646-
// All javascript event handlers (e.g. onload, onclick, onmouseover), style, and xmlns
647-
$evil_attributes = array('on\w*', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime');
656+
static $naughty_tags = array(
657+
'alert', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
658+
'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
659+
'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
660+
'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
661+
);
648662

649-
if ($is_image === TRUE)
663+
static $evil_attributes = array(
664+
'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
665+
);
666+
667+
// First, escape unclosed tags
668+
if (empty($matches['closeTag']))
650669
{
651-
/*
652-
* Adobe Photoshop puts XML metadata into JFIF images,
653-
* including namespacing, so we have to allow this for images.
654-
*/
655-
unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
670+
return '&lt;'.$matches[1];
656671
}
657-
658-
do {
659-
$count = 0;
660-
$attribs = array();
661-
662-
// find occurrences of illegal attribute strings with quotes (042 and 047 are octal quotes)
663-
preg_match_all('/(?<!\w)('.implode('|', $evil_attributes).')\s*=\s*(\042|\047)([^\\2]*?)(\\2)/is', $str, $matches, PREG_SET_ORDER);
664-
665-
foreach ($matches as $attr)
666-
{
667-
$attribs[] = preg_quote($attr[0], '/');
668-
}
669-
670-
// find occurrences of illegal attribute strings without quotes
671-
preg_match_all('/(?<!\w)('.implode('|', $evil_attributes).')\s*=\s*([^\s>]*)/is', $str, $matches, PREG_SET_ORDER);
672-
673-
foreach ($matches as $attr)
672+
// Is the element that we caught naughty? If so, escape it
673+
elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
674+
{
675+
return '&lt;'.$matches[1].'&gt;';
676+
}
677+
// For other tags, see if their attributes are "evil" and strip those
678+
elseif (isset($matches['attributes']))
679+
{
680+
// We'll need to catch all attributes separately first
681+
$pattern = '#'
682+
.'([\s\042\047/=]*)' // non-attribute characters, excluding > (tag close) for obvious reasons
683+
.'(?<name>[^\s\042\047>/=]+)' // attribute characters
684+
// optional attribute-value
685+
.'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
686+
.'#i';
687+
688+
if ($count = preg_match_all($pattern, $matches['attributes'], $attributes, PREG_SET_ORDER | PREG_OFFSET_CAPTURE))
674689
{
675-
$attribs[] = preg_quote($attr[0], '/');
676-
}
690+
// Since we'll be using substr_replace() below, we
691+
// need to handle the attributes in reverse order,
692+
// so we don't damage the string.
693+
for ($i = $count - 1; $i > -1; $i--)
694+
{
695+
if (
696+
// Is it indeed an "evil" attribute?
697+
preg_match('#^('.implode('|', $evil_attributes).')$#i', $attributes[$i]['name'][0])
698+
// Or an attribute not starting with a letter? Some parsers get confused by that
699+
OR ! ctype_alpha($attributes[$i]['name'][0][0])
700+
// Does it have an equals sign, but no value and not quoted? Strip that too!
701+
OR (trim($attributes[$i]['value'][0]) === '')
702+
)
703+
{
704+
$matches['attributes'] = substr_replace(
705+
$matches['attributes'],
706+
' [removed]',
707+
$attributes[$i][0][1],
708+
strlen($attributes[$i][0][0])
709+
);
710+
}
711+
}
677712

678-
// replace illegal attribute strings that are inside an html tag
679-
if (count($attribs) > 0)
680-
{
681-
$str = preg_replace('/(<?)(\/?[^><]+?)([^A-Za-z<>\-])(.*?)('.implode('|', $attribs).')(.*?)([\s><]?)([><]*)/i', '$1$2 $4$6$7$8', $str, -1, $count);
713+
// Note: This will strip some non-space characters and/or
714+
// reduce multiple spaces between attributes.
715+
return '<'.$matches['slash'].$matches['tagName'].' '.trim($matches['attributes']).'>';
682716
}
683-
684717
}
685-
while ($count);
686-
687-
return $str;
688-
}
689718

690-
// --------------------------------------------------------------------
691-
692-
/**
693-
* Sanitize Naughty HTML
694-
*
695-
* Callback function for xss_clean() to remove naughty HTML elements
696-
*
697-
* @param array
698-
* @return string
699-
*/
700-
protected function _sanitize_naughty_html($matches)
701-
{
702-
return '&lt;'.$matches[1].$matches[2].$matches[3] // encode opening brace
703-
// encode captured opening or closing brace to prevent recursive vectors:
704-
.str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
719+
return $matches[0];
705720
}
706721

707722
// --------------------------------------------------------------------
@@ -724,7 +739,7 @@ protected function _js_link_removal($match)
724739
preg_replace(
725740
'#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
726741
'',
727-
$this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
742+
$this->_filter_attributes($match[1])
728743
),
729744
$match[0]
730745
);
@@ -748,9 +763,9 @@ protected function _js_img_removal($match)
748763
return str_replace(
749764
$match[1],
750765
preg_replace(
751-
'#src=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
766+
'#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
752767
'',
753-
$this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
768+
$this->_filter_attributes($match[1])
754769
),
755770
$match[0]
756771
);

0 commit comments

Comments
 (0)