Skip to content

Commit 8740a07

Browse files
Improve mb_*trim polyfills
1 parent 74b87a0 commit 8740a07

File tree

1 file changed

+34
-31
lines changed

1 file changed

+34
-31
lines changed

Mbstring.php

+34-31
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,14 @@ final class Mbstring
7979
['μ', 's', 'ι', 'σ', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', "\xE1\xB9\xA1", 'ι'],
8080
];
8181

82-
private const CHARACTERS = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}";
83-
8482
private static $encodingList = ['ASCII', 'UTF-8'];
8583
private static $language = 'neutral';
8684
private static $internalEncoding = 'UTF-8';
8785

8886
public static function mb_convert_encoding($s, $toEncoding, $fromEncoding = null)
8987
{
9088
if (\is_array($s)) {
91-
if (PHP_VERSION_ID < 70200) {
89+
if (\PHP_VERSION_ID < 70200) {
9290
trigger_error('mb_convert_encoding() expects parameter 1 to be string, array given', \E_USER_WARNING);
9391

9492
return null;
@@ -987,68 +985,73 @@ private static function getEncoding($encoding)
987985

988986
public static function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string
989987
{
990-
return self::mb_internal_trim('^[%s]+|[%s]+$', $string, $characters, $encoding);
988+
return self::mb_internal_trim('{^[%s]+|[%1$s]+$}Du', $string, $characters, $encoding, __FUNCTION__);
991989
}
992990

993991
public static function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string
994992
{
995-
return self::mb_internal_trim('^[%s]+', $string, $characters, $encoding);
993+
return self::mb_internal_trim('{^[%s]+}Du', $string, $characters, $encoding, __FUNCTION__);
996994
}
997995

998996
public static function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string
999997
{
1000-
return self::mb_internal_trim('[%s]+$', $string, $characters, $encoding);
998+
return self::mb_internal_trim('{[%s]+$}D', $string, $characters, $encoding, __FUNCTION__);
1001999
}
10021000

1003-
private static function mb_internal_trim(string $regex, string $string, ?string $characters = null, ?string $encoding = null): string
1001+
private static function mb_internal_trim(string $regex, string $string, ?string $characters, ?string $encoding, string $function): string
10041002
{
10051003
if (null === $encoding) {
1006-
$encoding = mb_internal_encoding();
1004+
$encoding = self::mb_internal_encoding();
1005+
} else {
1006+
self::assertEncoding($encoding, $function.'(): Argument #3 ($encoding) must be a valid encoding, "%s" given');
10071007
}
10081008

1009-
self::assertEncoding($encoding, debug_backtrace()[1]['function'].'(): Argument #3 ($encoding) must be a valid encoding, "%s" given.');
1010-
10111009
if ('' === $characters) {
1012-
return null === $encoding ? $string : mb_convert_encoding($string, $encoding);
1010+
return null === $encoding ? $string : self::mb_convert_encoding($string, $encoding);
10131011
}
10141012

1015-
if (null === $characters) {
1016-
$characters = self::CHARACTERS;
1017-
}
1013+
if ('UTF-8' === $encoding) {
1014+
$encoding = null;
1015+
if (!preg_match('//u', $string)) {
1016+
$string = @iconv('UTF-8', 'UTF-8//IGNORE', $string);
1017+
}
1018+
if (null !== $characters && !preg_match('//u', $characters)) {
1019+
$characters = @iconv('UTF-8', 'UTF-8//IGNORE', $characters);
1020+
}
1021+
} else {
1022+
$string = iconv($encoding, 'UTF-8//IGNORE', $string);
10181023

1019-
$regexCharacter = preg_quote($characters ?? '', '/');
1020-
$regex = sprintf($regex, $regexCharacter, $regexCharacter);
1024+
if (null !== $characters) {
1025+
$characters = iconv($encoding, 'UTF-8//IGNORE', $characters);
1026+
}
1027+
}
10211028

1022-
if ('ASCII' === mb_detect_encoding($characters) && 'ASCII' === mb_detect_encoding($string) && !empty(array_intersect(str_split(self::CHARACTERS), str_split($string)))) {
1023-
$options = 'g';
1029+
if (null === $characters) {
1030+
$characters = "\\0 \f\n\r\t\v\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}";
10241031
} else {
1025-
$options = '';
1032+
$characters = preg_quote($characters);
10261033
}
1027-
1028-
try {
1029-
$test = mb_ereg_replace($regex, "", $string, $options);
10301034

1031-
if (null === $test) {
1032-
throw new \Exception();
1033-
}
1035+
$string = preg_replace(sprintf($regex, $characters), '', $string);
10341036

1035-
return $test;
1036-
} catch (\Exception $e) {
1037-
return preg_replace('/'.$regex.'/', "", $string);
1037+
if (null === $encoding) {
1038+
return $string;
10381039
}
1039-
}
1040+
1041+
return iconv('UTF-8', $encoding.'//IGNORE', $string);
1042+
}
10401043

10411044
private static function assertEncoding(string $encoding, string $errorFormat): void
10421045
{
10431046
try {
10441047
$validEncoding = @self::mb_check_encoding('', $encoding);
10451048
} catch (\ValueError $e) {
1046-
throw new \ValueError(\sprintf($errorFormat, $encoding));
1049+
throw new \ValueError(sprintf($errorFormat, $encoding));
10471050
}
10481051

10491052
// BC for PHP 7.3 and lower
10501053
if (!$validEncoding) {
1051-
throw new \ValueError(\sprintf($errorFormat, $encoding));
1054+
throw new \ValueError(sprintf($errorFormat, $encoding));
10521055
}
10531056
}
10541057
}

0 commit comments

Comments
 (0)