36#define NONUTF8_INVALID_BYTE0 (0x8d)
37#define NONUTF8_INVALID_BYTE1 (' ')
74 while (
len > 0 && *from)
95 if (*s < 0x20 || *s == 0x7f)
109 while (
len > 0 && *from)
111 if (*from ==
SS2 &&
len >= 2)
115 *to = (
SS2 << 8) | *from++;
118 else if (*from ==
SS3 &&
len >= 3)
121 *to = (
SS3 << 16) | (*from++ << 8);
236 while (
len > 0 && *from)
238 if (*from ==
SS2 &&
len >= 3)
241 *to = (
SS2 << 16) | (*from++ << 8);
245 else if (*from ==
SS3 &&
len >= 3)
248 *to = (
SS3 << 16) | (*from++ << 8);
303 while (
len > 0 && *from)
305 if (*from ==
SS2 &&
len >= 4)
308 *to = (((
uint32)
SS2) << 24) | (*from++ << 16);
313 else if (*from ==
SS3 &&
len >= 3)
316 *to = (
SS3 << 16) | (*from++ << 8);
381 while (
len > 0 && *from)
385 if ((
c = (*from >> 24)))
388 *to++ = (*from >> 16) & 0xff;
389 *to++ = (*from >> 8) & 0xff;
390 *to++ = *from & 0xff;
393 else if ((
c = (*from >> 16)))
396 *to++ = (*from >> 8) & 0xff;
397 *to++ = *from & 0xff;
400 else if ((
c = (*from >> 8)))
403 *to++ = *from & 0xff;
449 while (
len > 0 && *from)
451 if ((*from & 0x80) == 0)
456 else if ((*from & 0xe0) == 0xc0)
462 *to = (c1 << 6) | c2;
465 else if ((*from & 0xf0) == 0xe0)
472 *to = (c1 << 12) | (c2 << 6) | c3;
475 else if ((*from & 0xf8) == 0xf0)
483 *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
511 while (
len > 0 && *from)
542 if ((*s & 0x80) == 0)
544 else if ((*s & 0xe0) == 0xc0)
546 else if ((*s & 0xf0) == 0xe0)
548 else if ((*s & 0xf8) == 0xf0)
551 else if ((*s & 0xfc) == 0xf8)
553 else if ((*s & 0xfe) == 0xfc)
586 if (ucs <
table[0].first || ucs >
table[max].last)
590 mid = (min + max) / 2;
591 if (ucs >
table[mid].last)
593 else if (ucs <
table[mid].first)
637 if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
678 while (
len > 0 && *from)
710 *to = (
unsigned char) *from++;
731 while (
len > 0 && *from)
735 lb = (*from >> 16) & 0xff;
739 *to++ = *from & 0xff;
745 *to++ = (*from >> 8) & 0xff;
746 *to++ = *from & 0xff;
753 *to++ = *from & 0xff;
760 *to++ = *from & 0xff;
767 *to++ = (*from >> 8) & 0xff;
768 *to++ = *from & 0xff;
775 *to++ = (*from >> 8) & 0xff;
776 *to++ = *from & 0xff;
781 *to++ = *from & 0xff;
843 while (
len > 0 && *from)
865 while (
len > 0 && *from)
895 if (*s >= 0xa1 && *s <= 0xdf)
909 if (*s >= 0xa1 && *s <= 0xdf)
1021 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1071 const unsigned char *nullpos = memchr(s, 0,
len);
1073 if (nullpos == NULL)
1079#define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe)
1097 if (c2 < 0xa1 || c2 > 0xdf)
1139 const unsigned char *
start = s;
1197 const unsigned char *
start = s;
1224#define pg_euccn_verifychar pg_euckr_verifychar
1225#define pg_euccn_verifystr pg_euckr_verifystr
1243 if (c2 < 0xa1 || c2 > 0xa7)
1280 const unsigned char *
start = s;
1333 const unsigned char *
start = s;
1383 const unsigned char *
start = s;
1418 const unsigned char *nullpos = memchr(s, 0,
len);
1420 if (nullpos == NULL)
1452 const unsigned char *
start = s;
1506 const unsigned char *
start = s;
1560 const unsigned char *
start = s;
1614 const unsigned char *
start = s;
1647 else if (
len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1650 if (*s >= 0x81 && *s <= 0xfe &&
1651 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1652 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1657 else if (
len >= 2 && *s >= 0x81 && *s <= 0xfe)
1660 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1661 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1674 const unsigned char *
start = s;
1705 if ((*s & 0x80) == 0)
1711 else if ((*s & 0xe0) == 0xc0)
1713 else if ((*s & 0xf0) == 0xe0)
1715 else if ((*s & 0xf8) == 0xf0)
1797#define ASC (END << BGN)
1799#define L2A (CS1 << BGN)
1801#define L3A (P3A << BGN)
1802#define L3B (CS2 << BGN)
1803#define L3C (P3B << BGN)
1805#define L4A (P4A << BGN)
1806#define L4B (CS3 << BGN)
1807#define L4C (P4B << BGN)
1809#define CR1 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4B)
1810#define CR2 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3B) | (CS2 << P4A)
1811#define CR3 (END << CS1) | (CS1 << CS2) | (CS2 << CS3) | (CS1 << P3A) | (CS2 << P4A)
1893 const unsigned char *
start = s;
1894 const int orig_len =
len;
1902#define STRIDE_LENGTH (2 * sizeof(Vector8))
2000 if (a < 0x80 || a > 0xBF)
2005 if (a < 0x80 || a > 0xBF)
2013 if (a < 0xA0 || a > 0xBF)
2017 if (a < 0x80 || a > 0x9F)
2021 if (a < 0x90 || a > 0xBF)
2025 if (a < 0x80 || a > 0x8F)
2029 if (a < 0x80 || a > 0xBF)
2036 if (
a >= 0x80 &&
a < 0xC2)
static bool is_valid_ascii(const unsigned char *s, int len)
#define IS_HIGHBIT_SET(ch)
Assert(PointerIsAligned(start, uint64))
static pg_wchar utf8_to_unicode(const unsigned char *c)
static const struct lconv_member_info table[]
static rewind_source * source
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
#define IS_LCPRV2_B_RANGE(c)
#define IS_LCPRV1_A_RANGE(c)
#define PG_VALID_ENCODING(_enc)
#define IS_LCPRV1_B_RANGE(c)
#define IS_LCPRV2_A_RANGE(c)
size_t strnlen(const char *str, size_t maxlen)
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int pg_uhc_verifystr(const unsigned char *s, int len)
static int pg_latin1_dsplen(const unsigned char *s)
int pg_encoding_mblen_bounded(int encoding, const char *mbstr)
static int pg_euctw_mblen(const unsigned char *s)
static int pg_euckr_dsplen(const unsigned char *s)
static const uint32 Utf8Transition[256]
bool pg_utf8_islegal(const unsigned char *source, int length)
static int pg_ascii_verifystr(const unsigned char *s, int len)
static int pg_latin1_verifychar(const unsigned char *s, int len)
static int pg_sjis_dsplen(const unsigned char *s)
static int pg_sjis_verifychar(const unsigned char *s, int len)
static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_eucjp_dsplen(const unsigned char *s)
static int pg_ascii2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_uhc_verifychar(const unsigned char *s, int len)
static int pg_gbk_dsplen(const unsigned char *s)
static int pg_euckr2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
#define pg_euccn_verifychar
static int pg_sjis_verifystr(const unsigned char *s, int len)
static int pg_johab_mblen(const unsigned char *s)
static int pg_johab_dsplen(const unsigned char *s)
static int pg_big5_verifystr(const unsigned char *s, int len)
static int pg_mule_verifychar(const unsigned char *s, int len)
static int pg_eucjp2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_latin1_verifystr(const unsigned char *s, int len)
static int pg_latin1_mblen(const unsigned char *s)
static int pg_ascii_verifychar(const unsigned char *s, int len)
static int pg_ascii_mblen(const unsigned char *s)
void pg_encoding_set_invalid(int encoding, char *dst)
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
static int pg_big5_dsplen(const unsigned char *s)
#define pg_euccn_verifystr
int pg_utf_mblen(const unsigned char *s)
int pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr, size_t remaining)
#define NONUTF8_INVALID_BYTE0
static int pg_eucjp_mblen(const unsigned char *s)
static int pg_gbk_verifychar(const unsigned char *s, int len)
static int pg_big5_mblen(const unsigned char *s)
static int pg_euccn_dsplen(const unsigned char *s)
static int pg_euctw_verifychar(const unsigned char *s, int len)
static int pg_euckr_verifychar(const unsigned char *s, int len)
static int pg_euctw_verifystr(const unsigned char *s, int len)
static int pg_gbk_verifystr(const unsigned char *s, int len)
static int pg_gb18030_dsplen(const unsigned char *s)
static int pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
int pg_mule_mblen(const unsigned char *s)
static int pg_euccn_mblen(const unsigned char *s)
static int pg_gbk_mblen(const unsigned char *s)
static int pg_eucjp_verifystr(const unsigned char *s, int len)
static int pg_johab_verifystr(const unsigned char *s, int len)
static int pg_euc_dsplen(const unsigned char *s)
static int pg_gb18030_verifystr(const unsigned char *s, int len)
static int pg_euckr_verifystr(const unsigned char *s, int len)
static int pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_sjis_mblen(const unsigned char *s)
#define IS_EUC_RANGE_VALID(c)
static int pg_uhc_dsplen(const unsigned char *s)
static int pg_eucjp_verifychar(const unsigned char *s, int len)
static int pg_big5_verifychar(const unsigned char *s, int len)
static int pg_gb18030_verifychar(const unsigned char *s, int len)
static int pg_mule_verifystr(const unsigned char *s, int len)
static int pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_utf8_verifychar(const unsigned char *s, int len)
static int pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
static int pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_gb18030_mblen(const unsigned char *s)
int pg_encoding_dsplen(int encoding, const char *mbstr)
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
static int pg_euctw_dsplen(const unsigned char *s)
static int pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
static int pg_uhc_mblen(const unsigned char *s)
static int pg_euc_mblen(const unsigned char *s)
static int pg_mule_dsplen(const unsigned char *s)
int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len)
#define NONUTF8_INVALID_BYTE1
static int pg_utf8_verifystr(const unsigned char *s, int len)
static int pg_euckr_mblen(const unsigned char *s)
const pg_wchar_tbl pg_wchar_table[]
static int pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
int pg_encoding_max_length(int encoding)
int pg_encoding_mblen(int encoding, const char *mbstr)
static int pg_johab_verifychar(const unsigned char *s, int len)
static int pg_ascii_dsplen(const unsigned char *s)
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
static int ucs_wcwidth(pg_wchar ucs)
static int pg_utf_dsplen(const unsigned char *s)