Skip to content

Commit c9880b3

Browse files
committed
Harden xss_clean()
1 parent e50aaa3 commit c9880b3

File tree

1 file changed

+40
-28
lines changed

1 file changed

+40
-28
lines changed

system/core/Security.php

Lines changed: 40 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -677,43 +677,55 @@ protected function _sanitize_naughty_html($matches)
677677
// For other tags, see if their attributes are "evil" and strip those
678678
elseif (isset($matches['attributes']))
679679
{
680-
// We'll need to catch all attributes separately first
681-
$pattern = '#'
682-
.'([\s\042\047/=]*)' // non-attribute characters, excluding > (tag close) for obvious reasons
680+
// We'll store the already fitlered attributes here
681+
$attributes = array();
682+
683+
// Attribute-catching pattern
684+
$attributes_pattern = '#'
683685
.'(?<name>[^\s\042\047>/=]+)' // attribute characters
684686
// optional attribute-value
685687
.'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
686688
.'#i';
687689

688-
if ($count = preg_match_all($pattern, $matches['attributes'], $attributes, PREG_SET_ORDER | PREG_OFFSET_CAPTURE))
690+
// Blacklist pattern for evil attribute names
691+
$is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
692+
693+
// Each iteration filters a single attribute
694+
do
689695
{
690-
// Since we'll be using substr_replace() below, we
691-
// need to handle the attributes in reverse order,
692-
// so we don't damage the string.
693-
for ($i = $count - 1; $i > -1; $i--)
696+
// Strip any non-alpha characters that may preceed an attribute.
697+
// Browsers often parse these incorrectly and that has been a
698+
// of numerous XSS issues we've had.
699+
$matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
700+
701+
if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
694702
{
695-
if (
696-
// Is it indeed an "evil" attribute?
697-
preg_match('#^('.implode('|', $evil_attributes).')$#i', $attributes[$i]['name'][0])
698-
// Or an attribute not starting with a letter? Some parsers get confused by that
699-
OR ! ctype_alpha($attributes[$i]['name'][0][0])
700-
// Does it have an equals sign, but no value and not quoted? Strip that too!
701-
OR (trim($attributes[$i]['value'][0]) === '')
702-
)
703-
{
704-
$matches['attributes'] = substr_replace(
705-
$matches['attributes'],
706-
' [removed]',
707-
$attributes[$i][0][1],
708-
strlen($attributes[$i][0][0])
709-
);
710-
}
703+
// No (valid) attribute found? Discard everything else inside the tag
704+
break;
711705
}
712706

713-
// Note: This will strip some non-space characters and/or
714-
// reduce multiple spaces between attributes.
715-
return '<'.$matches['slash'].$matches['tagName'].' '.trim($matches['attributes']).'>';
707+
if (
708+
// Is it indeed an "evil" attribute?
709+
preg_match($is_evil_pattern, $attribute['name'][0])
710+
// Or does it have an equals sign, but no value and not quoted? Strip that too!
711+
OR (trim($attribute['value'][0]) === '')
712+
)
713+
{
714+
$attributes[] = 'xss=removed';
715+
}
716+
else
717+
{
718+
$attributes[] = $attribute[0][0];
719+
}
720+
721+
$matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
716722
}
723+
while ($matches['attributes'] !== '');
724+
725+
$attributes = empty($attributes)
726+
? ''
727+
: ' '.implode(' ', $attributes);
728+
return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
717729
}
718730

719731
return $matches[0];
@@ -887,4 +899,4 @@ protected function _csrf_set_hash()
887899
}
888900

889901
/* End of file Security.php */
890-
/* Location: ./system/core/Security.php */
902+
/* Location: ./system/core/Security.php */

0 commit comments

Comments
 (0)