@@ -79,16 +79,14 @@ final class Mbstring
79
79
['μ ' , 's ' , 'ι ' , 'σ ' , 'β ' , 'θ ' , 'φ ' , 'π ' , 'κ ' , 'ρ ' , 'ε ' , "\xE1\xB9\xA1" , 'ι ' ],
80
80
];
81
81
82
- private const CHARACTERS = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}" ;
83
-
84
82
private static $ encodingList = ['ASCII ' , 'UTF-8 ' ];
85
83
private static $ language = 'neutral ' ;
86
84
private static $ internalEncoding = 'UTF-8 ' ;
87
85
88
86
public static function mb_convert_encoding ($ s , $ toEncoding , $ fromEncoding = null )
89
87
{
90
88
if (\is_array ($ s )) {
91
- if (PHP_VERSION_ID < 70200 ) {
89
+ if (\ PHP_VERSION_ID < 70200 ) {
92
90
trigger_error ('mb_convert_encoding() expects parameter 1 to be string, array given ' , \E_USER_WARNING );
93
91
94
92
return null ;
@@ -987,68 +985,73 @@ private static function getEncoding($encoding)
987
985
988
986
public static function mb_trim (string $ string , ?string $ characters = null , ?string $ encoding = null ): string
989
987
{
990
- return self ::mb_internal_trim ('^[%s]+|[%s]+$ ' , $ string , $ characters , $ encoding );
988
+ return self ::mb_internal_trim ('{ ^[%s]+|[%1$ s]+$}Du ' , $ string , $ characters , $ encoding, __FUNCTION__ );
991
989
}
992
990
993
991
public static function mb_ltrim (string $ string , ?string $ characters = null , ?string $ encoding = null ): string
994
992
{
995
- return self ::mb_internal_trim ('^[%s]+ ' , $ string , $ characters , $ encoding );
993
+ return self ::mb_internal_trim ('{ ^[%s]+}Du ' , $ string , $ characters , $ encoding, __FUNCTION__ );
996
994
}
997
995
998
996
public static function mb_rtrim (string $ string , ?string $ characters = null , ?string $ encoding = null ): string
999
997
{
1000
- return self ::mb_internal_trim ('[%s]+$ ' , $ string , $ characters , $ encoding );
998
+ return self ::mb_internal_trim ('{ [%s]+$}D ' , $ string , $ characters , $ encoding, __FUNCTION__ );
1001
999
}
1002
1000
1003
- private static function mb_internal_trim (string $ regex , string $ string , ?string $ characters = null , ?string $ encoding = null ): string
1001
+ private static function mb_internal_trim (string $ regex , string $ string , ?string $ characters , ?string $ encoding, string $ function ): string
1004
1002
{
1005
1003
if (null === $ encoding ) {
1006
- $ encoding = mb_internal_encoding ();
1004
+ $ encoding = self ::mb_internal_encoding ();
1005
+ } else {
1006
+ self ::assertEncoding ($ encoding , $ function .'(): Argument #3 ($encoding) must be a valid encoding, "%s" given ' );
1007
1007
}
1008
1008
1009
- self ::assertEncoding ($ encoding , debug_backtrace ()[1 ]['function ' ].'(): Argument #3 ($encoding) must be a valid encoding, "%s" given. ' );
1010
-
1011
1009
if ('' === $ characters ) {
1012
- return null === $ encoding ? $ string : mb_convert_encoding ($ string , $ encoding );
1010
+ return null === $ encoding ? $ string : self :: mb_convert_encoding ($ string , $ encoding );
1013
1011
}
1014
1012
1015
- if (null === $ characters ) {
1016
- $ characters = self ::CHARACTERS ;
1017
- }
1013
+ if ('UTF-8 ' === $ encoding ) {
1014
+ $ encoding = null ;
1015
+ if (!preg_match ('//u ' , $ string )) {
1016
+ $ string = @iconv ('UTF-8 ' , 'UTF-8//IGNORE ' , $ string );
1017
+ }
1018
+ if (null !== $ characters && !preg_match ('//u ' , $ characters )) {
1019
+ $ characters = @iconv ('UTF-8 ' , 'UTF-8//IGNORE ' , $ characters );
1020
+ }
1021
+ } else {
1022
+ $ string = iconv ($ encoding , 'UTF-8//IGNORE ' , $ string );
1018
1023
1019
- $ regexCharacter = preg_quote ($ characters ?? '' , '/ ' );
1020
- $ regex = sprintf ($ regex , $ regexCharacter , $ regexCharacter );
1024
+ if (null !== $ characters ) {
1025
+ $ characters = iconv ($ encoding , 'UTF-8//IGNORE ' , $ characters );
1026
+ }
1027
+ }
1021
1028
1022
- if (' ASCII ' === mb_detect_encoding ( $ characters) && ' ASCII ' === mb_detect_encoding ( $ string ) && ! empty ( array_intersect ( str_split ( self :: CHARACTERS ), str_split ( $ string ))) ) {
1023
- $ options = ' g ' ;
1029
+ if (null === $ characters ) {
1030
+ $ characters = "\\ 0 \f\n\r\t\v\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}" ;
1024
1031
} else {
1025
- $ options = '' ;
1032
+ $ characters = preg_quote ( $ characters ) ;
1026
1033
}
1027
-
1028
- try {
1029
- $ test = mb_ereg_replace ($ regex , "" , $ string , $ options );
1030
1034
1031
- if (null === $ test ) {
1032
- throw new \Exception ();
1033
- }
1035
+ $ string = preg_replace (sprintf ($ regex , $ characters ), '' , $ string );
1034
1036
1035
- return $ test ;
1036
- } catch (\Exception $ e ) {
1037
- return preg_replace ('/ ' .$ regex .'/ ' , "" , $ string );
1037
+ if (null === $ encoding ) {
1038
+ return $ string ;
1038
1039
}
1039
- }
1040
+
1041
+ return iconv ('UTF-8 ' , $ encoding .'//IGNORE ' , $ string );
1042
+ }
1040
1043
1041
1044
private static function assertEncoding (string $ encoding , string $ errorFormat ): void
1042
1045
{
1043
1046
try {
1044
1047
$ validEncoding = @self ::mb_check_encoding ('' , $ encoding );
1045
1048
} catch (\ValueError $ e ) {
1046
- throw new \ValueError (\ sprintf ($ errorFormat , $ encoding ));
1049
+ throw new \ValueError (sprintf ($ errorFormat , $ encoding ));
1047
1050
}
1048
1051
1049
1052
// BC for PHP 7.3 and lower
1050
1053
if (!$ validEncoding ) {
1051
- throw new \ValueError (\ sprintf ($ errorFormat , $ encoding ));
1054
+ throw new \ValueError (sprintf ($ errorFormat , $ encoding ));
1052
1055
}
1053
1056
}
1054
1057
}
0 commit comments