Skip to content

Commit c38e3b6

Browse files
committed
Tweaking the xss filter for IE <comment> tags, parameter injection, and weird html5 attributes.
1 parent fbcf88b commit c38e3b6

File tree

1 file changed

+41
-50
lines changed

1 file changed

+41
-50
lines changed

system/core/Security.php

Lines changed: 41 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ class CI_Security {
7777
'-moz-binding' => '[removed]',
7878
'<!--' => '&lt;!--',
7979
'-->' => '--&gt;',
80-
'<![CDATA[' => '&lt;![CDATA['
80+
'<![CDATA[' => '&lt;![CDATA[',
81+
'<comment>' => '&lt;comment&gt;'
8182
);
8283

8384
/* never allowed, regex replacement */
@@ -475,15 +476,7 @@ public function xss_hash()
475476
{
476477
if ($this->_xss_hash == '')
477478
{
478-
if (phpversion() >= 4.2)
479-
{
480-
mt_srand();
481-
}
482-
else
483-
{
484-
mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
485-
}
486-
479+
mt_srand();
487480
$this->_xss_hash = md5(time() + mt_rand(0, 1999999999));
488481
}
489482

@@ -497,48 +490,26 @@ public function xss_hash()
497490
*
498491
* This function is a replacement for html_entity_decode()
499492
*
500-
* In some versions of PHP the native function does not work
501-
* when UTF-8 is the specified character set, so this gives us
502-
* a work-around. More info here:
503-
* http://bugs.php.net/bug.php?id=25670
504-
*
505-
* NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
506-
* character set, and the PHP developers said they were not back porting the
507-
* fix to versions other than PHP 5.x.
493+
* The reason we are not using html_entity_decode() by itself is because
494+
* while it is not technically correct to leave out the semicolon
495+
* at the end of an entity most browsers will still interpret the entity
496+
* correctly. html_entity_decode() does not convert entities without
497+
* semicolons, so we are left with our own little solution here. Bummer.
508498
*
509499
* @param string
510500
* @param string
511501
* @return string
512502
*/
513503
public function entity_decode($str, $charset='UTF-8')
514504
{
515-
if (stristr($str, '&') === FALSE) return $str;
516-
517-
// The reason we are not using html_entity_decode() by itself is because
518-
// while it is not technically correct to leave out the semicolon
519-
// at the end of an entity most browsers will still interpret the entity
520-
// correctly. html_entity_decode() does not convert entities without
521-
// semicolons, so we are left with our own little solution here. Bummer.
522-
523-
if (function_exists('html_entity_decode') &&
524-
(strtolower($charset) != 'utf-8'))
525-
{
526-
$str = html_entity_decode($str, ENT_COMPAT, $charset);
527-
$str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
528-
return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
529-
}
530-
531-
// Numeric Entities
532-
$str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
533-
$str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
534-
535-
// Literal Entities - Slightly slow so we do another check
536505
if (stristr($str, '&') === FALSE)
537506
{
538-
$str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
507+
return $str;
539508
}
540509

541-
return $str;
510+
$str = html_entity_decode($str, ENT_COMPAT, $charset);
511+
$str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
512+
return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
542513
}
543514

544515
// --------------------------------------------------------------------
@@ -632,25 +603,45 @@ protected function _compact_exploded_words($matches)
632603
protected function _remove_evil_attributes($str, $is_image)
633604
{
634605
// All javascript event handlers (e.g. onload, onclick, onmouseover), style, and xmlns
635-
$evil_attributes = array('on\w*', 'style', 'xmlns');
606+
$evil_attributes = array('on\w*', 'style', 'xmlns', 'formaction');
636607

637608
if ($is_image === TRUE)
638609
{
639610
/*
640-
* Adobe Photoshop puts XML metadata into JFIF images,
611+
* Adobe Photoshop puts XML metadata into JFIF images,
641612
* including namespacing, so we have to allow this for images.
642613
*/
643614
unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
644615
}
645-
616+
646617
do {
647-
$str = preg_replace(
648-
"#<(/?[^><]+?)([^A-Za-z\-])(".implode('|', $evil_attributes).")(\s*=\s*)([\"][^>]*?[\"]|[\'][^>]*?[\']|[^>]*?)([\s><])([><]*)#i",
649-
"<$1$6",
650-
$str, -1, $count
651-
);
652-
} while ($count);
618+
$count = 0;
619+
$attribs = array();
620+
621+
// find occurrences of illegal attribute strings without quotes
622+
preg_match_all("/(".implode('|', $evil_attributes).")\s*=\s*([^\s]*)/is", $str, $matches, PREG_SET_ORDER);
623+
624+
foreach ($matches as $attr)
625+
{
626+
$attribs[] = preg_quote($attr[0], '/');
627+
}
628+
629+
// find occurrences of illegal attribute strings with quotes (042 and 047 are octal quotes)
630+
preg_match_all("/(".implode('|', $evil_attributes).")\s*=\s*(\042|\047)([^\\2]*?)(\\2)/is", $str, $matches, PREG_SET_ORDER);
653631

632+
foreach ($matches as $attr)
633+
{
634+
$attribs[] = preg_quote($attr[0], '/');
635+
}
636+
637+
// replace illegal attribute strings that are inside an html tag
638+
if (count($attribs) > 0)
639+
{
640+
$str = preg_replace("/<(\/?[^><]+?)([^A-Za-z\-])(".implode('|', $attribs).")([\s><])([><]*)/i", '<$1$2$4$5', $str, -1, $count);
641+
}
642+
643+
} while ($count);
644+
654645
return $str;
655646
}
656647

0 commit comments

Comments
 (0)