@@ -77,7 +77,8 @@ class CI_Security {
77
77
'-moz-binding ' => '[removed] ' ,
78
78
'<!-- ' => '<!-- ' ,
79
79
'--> ' => '--> ' ,
80
- '<![CDATA[ ' => '<![CDATA[ '
80
+ '<![CDATA[ ' => '<![CDATA[ ' ,
81
+ '<comment> ' => '<comment> '
81
82
);
82
83
83
84
/* never allowed, regex replacement */
@@ -475,15 +476,7 @@ public function xss_hash()
475
476
{
476
477
if ($ this ->_xss_hash == '' )
477
478
{
478
- if (phpversion () >= 4.2 )
479
- {
480
- mt_srand ();
481
- }
482
- else
483
- {
484
- mt_srand (hexdec (substr (md5 (microtime ()), -8 )) & 0x7fffffff );
485
- }
486
-
479
+ mt_srand ();
487
480
$ this ->_xss_hash = md5 (time () + mt_rand (0 , 1999999999 ));
488
481
}
489
482
@@ -497,48 +490,26 @@ public function xss_hash()
497
490
*
498
491
* This function is a replacement for html_entity_decode()
499
492
*
500
- * In some versions of PHP the native function does not work
501
- * when UTF-8 is the specified character set, so this gives us
502
- * a work-around. More info here:
503
- * http://bugs.php.net/bug.php?id=25670
504
- *
505
- * NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
506
- * character set, and the PHP developers said they were not back porting the
507
- * fix to versions other than PHP 5.x.
493
+ * The reason we are not using html_entity_decode() by itself is because
494
+ * while it is not technically correct to leave out the semicolon
495
+ * at the end of an entity most browsers will still interpret the entity
496
+ * correctly. html_entity_decode() does not convert entities without
497
+ * semicolons, so we are left with our own little solution here. Bummer.
508
498
*
509
499
* @param string
510
500
* @param string
511
501
* @return string
512
502
*/
513
503
public function entity_decode ($ str , $ charset ='UTF-8 ' )
514
504
{
515
- if (stristr ($ str , '& ' ) === FALSE ) return $ str ;
516
-
517
- // The reason we are not using html_entity_decode() by itself is because
518
- // while it is not technically correct to leave out the semicolon
519
- // at the end of an entity most browsers will still interpret the entity
520
- // correctly. html_entity_decode() does not convert entities without
521
- // semicolons, so we are left with our own little solution here. Bummer.
522
-
523
- if (function_exists ('html_entity_decode ' ) &&
524
- (strtolower ($ charset ) != 'utf-8 ' ))
525
- {
526
- $ str = html_entity_decode ($ str , ENT_COMPAT , $ charset );
527
- $ str = preg_replace ('~&#x(0*[0-9a-f]{2,5})~ei ' , 'chr(hexdec(" \\1")) ' , $ str );
528
- return preg_replace ('~&#([0-9]{2,4})~e ' , 'chr( \\1) ' , $ str );
529
- }
530
-
531
- // Numeric Entities
532
- $ str = preg_replace ('~&#x(0*[0-9a-f]{2,5});{0,1}~ei ' , 'chr(hexdec(" \\1")) ' , $ str );
533
- $ str = preg_replace ('~&#([0-9]{2,4});{0,1}~e ' , 'chr( \\1) ' , $ str );
534
-
535
- // Literal Entities - Slightly slow so we do another check
536
505
if (stristr ($ str , '& ' ) === FALSE )
537
506
{
538
- $ str = strtr ( $ str, array_flip ( get_html_translation_table ( HTML_ENTITIES ))) ;
507
+ return $ str ;
539
508
}
540
509
541
- return $ str ;
510
+ $ str = html_entity_decode ($ str , ENT_COMPAT , $ charset );
511
+ $ str = preg_replace ('~&#x(0*[0-9a-f]{2,5})~ei ' , 'chr(hexdec(" \\1")) ' , $ str );
512
+ return preg_replace ('~&#([0-9]{2,4})~e ' , 'chr( \\1) ' , $ str );
542
513
}
543
514
544
515
// --------------------------------------------------------------------
@@ -632,25 +603,45 @@ protected function _compact_exploded_words($matches)
632
603
protected function _remove_evil_attributes ($ str , $ is_image )
633
604
{
634
605
// All javascript event handlers (e.g. onload, onclick, onmouseover), style, and xmlns
635
- $ evil_attributes = array ('on\w* ' , 'style ' , 'xmlns ' );
606
+ $ evil_attributes = array ('on\w* ' , 'style ' , 'xmlns ' , ' formaction ' );
636
607
637
608
if ($ is_image === TRUE )
638
609
{
639
610
/*
640
- * Adobe Photoshop puts XML metadata into JFIF images,
611
+ * Adobe Photoshop puts XML metadata into JFIF images,
641
612
* including namespacing, so we have to allow this for images.
642
613
*/
643
614
unset($ evil_attributes [array_search ('xmlns ' , $ evil_attributes )]);
644
615
}
645
-
616
+
646
617
do {
647
- $ str = preg_replace (
648
- "#<(/?[^><]+?)([^A-Za-z\-])( " .implode ('| ' , $ evil_attributes ).")(\s*=\s*)([ \"][^>]*?[ \"]|[\'][^>]*?[\']|[^>]*?)([\s><])([><]*)#i " ,
649
- "<$1$6 " ,
650
- $ str , -1 , $ count
651
- );
652
- } while ($ count );
618
+ $ count = 0 ;
619
+ $ attribs = array ();
620
+
621
+ // find occurrences of illegal attribute strings without quotes
622
+ preg_match_all ("/( " .implode ('| ' , $ evil_attributes ).")\s*=\s*([^\s]*)/is " , $ str , $ matches , PREG_SET_ORDER );
623
+
624
+ foreach ($ matches as $ attr )
625
+ {
626
+ $ attribs [] = preg_quote ($ attr [0 ], '/ ' );
627
+ }
628
+
629
+ // find occurrences of illegal attribute strings with quotes (042 and 047 are octal quotes)
630
+ preg_match_all ("/( " .implode ('| ' , $ evil_attributes ).")\s*=\s*( \042| \047)([^ \\2]*?)( \\2)/is " , $ str , $ matches , PREG_SET_ORDER );
653
631
632
+ foreach ($ matches as $ attr )
633
+ {
634
+ $ attribs [] = preg_quote ($ attr [0 ], '/ ' );
635
+ }
636
+
637
+ // replace illegal attribute strings that are inside an html tag
638
+ if (count ($ attribs ) > 0 )
639
+ {
640
+ $ str = preg_replace ("/<(\/?[^><]+?)([^A-Za-z\-])( " .implode ('| ' , $ attribs ).")([\s><])([><]*)/i " , '<$1$2$4$5 ' , $ str , -1 , $ count );
641
+ }
642
+
643
+ } while ($ count );
644
+
654
645
return $ str ;
655
646
}
656
647
0 commit comments