Skip to content

Commit aad2f99

Browse files
Polyfills for mb_chr(), mb_ord() and mb_scrub()
1 parent 8a6bdec commit aad2f99

File tree

2 files changed

+47
-0
lines changed

2 files changed

+47
-0
lines changed

Mbstring.php

+42
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Partial mbstring implementation in PHP, iconv based, UTF-8 centric.
1616
*
1717
* Implemented:
18+
* - mb_chr - Returns a specific character from its Unicode code point
1819
* - mb_convert_encoding - Convert character encoding
1920
* - mb_convert_variables - Convert character code in variable(s)
2021
* - mb_decode_mimeheader - Decode string in MIME header field
@@ -25,7 +26,9 @@
2526
* - mb_http_output - Set/Get HTTP output character encoding
2627
* - mb_internal_encoding - Set/Get internal character encoding
2728
* - mb_list_encodings - Returns an array of all supported encodings
29+
* - mb_ord - Returns the Unicode code point of a character
2830
* - mb_output_handler - Callback function converts character encoding in output buffer
31+
* - mb_scrub - Replaces ill-formed byte sequences with substitute characters
2932
* - mb_strlen - Get string length
3033
* - mb_strpos - Find position of first occurrence of string in a string
3134
* - mb_strrpos - Find position of last occurrence of a string in a string
@@ -532,6 +535,45 @@ public static function mb_output_handler($contents, $status)
532535
return $contents;
533536
}
534537

538+
public static function mb_chr($code, $encoding = null)
539+
{
540+
if (0x80 > $code %= 0x200000) {
541+
$s = chr($code);
542+
} elseif (0x800 > $code) {
543+
$s = chr(0xC0 | $code >> 6).chr(0x80 | $code & 0x3F);
544+
} elseif (0x10000 > $code) {
545+
$s = chr(0xE0 | $code >> 12).chr(0x80 | $code >> 6 & 0x3F).chr(0x80 | $code & 0x3F);
546+
} else {
547+
$s = chr(0xF0 | $code >> 18).chr(0x80 | $code >> 12 & 0x3F).chr(0x80 | $code >> 6 & 0x3F).chr(0x80 | $code & 0x3F);
548+
}
549+
550+
if ('UTF-8' !== $encoding = self::getEncoding($encoding)) {
551+
$s = mb_convert_encoding($s, $encoding, 'UTF-8');
552+
}
553+
554+
return $s;
555+
}
556+
557+
public static function mb_ord($s, $encoding = null)
558+
{
559+
if ('UTF-8' !== $encoding = self::getEncoding($encoding)) {
560+
$s = mb_convert_encoding($s, 'UTF-8', $encoding);
561+
}
562+
563+
$code = ($s = unpack('C*', substr($s, 0, 4))) ? $s[1] : 0;
564+
if (0xF0 <= $code) {
565+
return (($code - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
566+
}
567+
if (0xE0 <= $code) {
568+
return (($code - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
569+
}
570+
if (0xC0 <= $code) {
571+
return (($code - 0xC0) << 6) + $s[2] - 0x80;
572+
}
573+
574+
return $code;
575+
}
576+
535577
private static function getSubpart($pos, $part, $haystack, $encoding)
536578
{
537579
if (false === $pos) {

bootstrap.php

+5
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,8 @@ function mb_output_handler($contents, $status) { return p\Mbstring::mb_output_ha
4949
function mb_http_input($type = '') { return p\Mbstring::mb_http_input($type); }
5050
function mb_convert_variables($toEncoding, $fromEncoding, &$a = null, &$b = null, &$c = null, &$d = null, &$e = null, &$f = null) { return p\Mbstring::mb_convert_variables($toEncoding, $fromEncoding, $a, $b, $c, $d, $e, $f); }
5151
}
52+
if (!function_exists('mb_chr')) {
53+
function mb_ord($s, $enc = null) { return p\Mbstring::mb_ord($s, $enc); }
54+
function mb_chr($code, $enc = null) { return p\Mbstring::mb_chr($code, $enc); }
55+
function mb_scrub($s, $enc = null) { $enc = null === $enc ? mb_internal_encoding() : $enc; return mb_convert_encoding($s, $enc, $enc); }
56+
}

0 commit comments

Comments
 (0)