@@ -677,43 +677,55 @@ protected function _sanitize_naughty_html($matches)
677
677
// For other tags, see if their attributes are "evil" and strip those
678
678
elseif (isset ($ matches ['attributes ' ]))
679
679
{
680
- // We'll need to catch all attributes separately first
681
- $ pattern = '# '
682
- .'([\s\042\047/=]*) ' // non-attribute characters, excluding > (tag close) for obvious reasons
680
+ // We'll store the already fitlered attributes here
681
+ $ attributes = array ();
682
+
683
+ // Attribute-catching pattern
684
+ $ attributes_pattern = '# '
683
685
.'(?<name>[^\s\042\047>/=]+) ' // attribute characters
684
686
// optional attribute-value
685
687
.'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))) ' // attribute-value separator
686
688
.'#i ' ;
687
689
688
- if ($ count = preg_match_all ($ pattern , $ matches ['attributes ' ], $ attributes , PREG_SET_ORDER | PREG_OFFSET_CAPTURE ))
690
+ // Blacklist pattern for evil attribute names
691
+ $ is_evil_pattern = '#^( ' .implode ('| ' , $ evil_attributes ).')$#i ' ;
692
+
693
+ // Each iteration filters a single attribute
694
+ do
689
695
{
690
- // Since we'll be using substr_replace() below, we
691
- // need to handle the attributes in reverse order,
692
- // so we don't damage the string.
693
- for ($ i = $ count - 1 ; $ i > -1 ; $ i --)
696
+ // Strip any non-alpha characters that may preceed an attribute.
697
+ // Browsers often parse these incorrectly and that has been a
698
+ // of numerous XSS issues we've had.
699
+ $ matches ['attributes ' ] = preg_replace ('#^[^a-z]+#i ' , '' , $ matches ['attributes ' ]);
700
+
701
+ if ( ! preg_match ($ attributes_pattern , $ matches ['attributes ' ], $ attribute , PREG_OFFSET_CAPTURE ))
694
702
{
695
- if (
696
- // Is it indeed an "evil" attribute?
697
- preg_match ('#^( ' .implode ('| ' , $ evil_attributes ).')$#i ' , $ attributes [$ i ]['name ' ][0 ])
698
- // Or an attribute not starting with a letter? Some parsers get confused by that
699
- OR ! ctype_alpha ($ attributes [$ i ]['name ' ][0 ][0 ])
700
- // Does it have an equals sign, but no value and not quoted? Strip that too!
701
- OR (trim ($ attributes [$ i ]['value ' ][0 ]) === '' )
702
- )
703
- {
704
- $ matches ['attributes ' ] = substr_replace (
705
- $ matches ['attributes ' ],
706
- ' [removed] ' ,
707
- $ attributes [$ i ][0 ][1 ],
708
- strlen ($ attributes [$ i ][0 ][0 ])
709
- );
710
- }
703
+ // No (valid) attribute found? Discard everything else inside the tag
704
+ break ;
711
705
}
712
706
713
- // Note: This will strip some non-space characters and/or
714
- // reduce multiple spaces between attributes.
715
- return '< ' .$ matches ['slash ' ].$ matches ['tagName ' ].' ' .trim ($ matches ['attributes ' ]).'> ' ;
707
+ if (
708
+ // Is it indeed an "evil" attribute?
709
+ preg_match ($ is_evil_pattern , $ attribute ['name ' ][0 ])
710
+ // Or does it have an equals sign, but no value and not quoted? Strip that too!
711
+ OR (trim ($ attribute ['value ' ][0 ]) === '' )
712
+ )
713
+ {
714
+ $ attributes [] = 'xss=removed ' ;
715
+ }
716
+ else
717
+ {
718
+ $ attributes [] = $ attribute [0 ][0 ];
719
+ }
720
+
721
+ $ matches ['attributes ' ] = substr ($ matches ['attributes ' ], $ attribute [0 ][1 ] + strlen ($ attribute [0 ][0 ]));
716
722
}
723
+ while ($ matches ['attributes ' ] !== '' );
724
+
725
+ $ attributes = empty ($ attributes )
726
+ ? ''
727
+ : ' ' .implode (' ' , $ attributes );
728
+ return '< ' .$ matches ['slash ' ].$ matches ['tagName ' ].$ attributes .'> ' ;
717
729
}
718
730
719
731
return $ matches [0 ];
@@ -887,4 +899,4 @@ protected function _csrf_set_hash()
887
899
}
888
900
889
901
/* End of file Security.php */
890
- /* Location: ./system/core/Security.php */
902
+ /* Location: ./system/core/Security.php */
0 commit comments