diff --git a/ext/mbstring/common_codepoints.txt b/ext/mbstring/common_codepoints.txt index aa775a11b2626..ff528592be007 100644 --- a/ext/mbstring/common_codepoints.txt +++ b/ext/mbstring/common_codepoints.txt @@ -7,7 +7,9 @@ 0x0118 0x0119 # Polish 0x0141 0x0144 # Polish 0x015A 0x015B # Polish +0x0160 0x0161 # Used in Slavic names 0x0179 0x017C # Polish +0x017D 0x017E # Used in Slavic names 0x0300 0x030A # Diacritical marks 0x0370 0x0377 # Greek 0x037A 0x037F # Greek diff --git a/ext/mbstring/rare_cp_bitvec.h b/ext/mbstring/rare_cp_bitvec.h index cf9a679f8d14d..10f492ab3d1b7 100644 --- a/ext/mbstring/rare_cp_bitvec.h +++ b/ext/mbstring/rare_cp_bitvec.h @@ -11,7 +11,7 @@ static uint32_t rare_codepoint_bitvec[] = { 0xffffd9ff, 0x00000000, 0x00000000, 0x80000000, 0xffffffff, 0x00002001, 0x00000000, 0x00000000, -0xfcffff0f, 0xffffffff, 0xf3ffffe1, 0xe1ffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, +0xfcffff0f, 0xffffffff, 0xf3ffffe1, 0x81fffffc, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffff800, 0xffffffff, 0xffffffff, 0x0300ffff, 0x0000280f, 0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, diff --git a/ext/mbstring/tests/mb_detect_encoding.phpt b/ext/mbstring/tests/mb_detect_encoding.phpt index ce67dc87e7078..ba88be6443afb 100644 --- a/ext/mbstring/tests/mb_detect_encoding.phpt +++ b/ext/mbstring/tests/mb_detect_encoding.phpt @@ -58,6 +58,12 @@ END:VCARD '; echo mb_detect_encoding($test, ['UTF-8', 'UTF-16']), "\n"; +$test = 'Dušan'; +echo mb_detect_encoding($test, ['UTF-8', 'ISO-8859-1']), "\n"; // Should be UTF-8 + +$test = 'Živko'; +echo mb_detect_encoding($test, ['UTF-8', 'ISO-8859-1']), "\n"; // Should be UTF-8 + // We once had a problem where all kind of strings would be detected as 'UUENCODE' echo mb_detect_encoding('abc', ['UUENCODE', 'UTF-8']), "\n"; echo mb_detect_encoding('abc', ['UUENCODE', 'QPrint', 'HTML-ENTITIES', 'Base64', '7bit', '8bit', 'SJIS']), "\n"; @@ -246,6 +252,8 @@ ISO-8859-1 UTF-8 UTF-8 UTF-8 +UTF-8 +UTF-8 SJIS == DETECT ORDER == JIS: JIS