@@ -99,8 +99,7 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
99
99
* to |= * from ++ ;
100
100
len -= 2 ;
101
101
}
102
- else
103
- /* must be ASCII */
102
+ else /* must be ASCII */
104
103
{
105
104
* to = * from ++ ;
106
105
len -- ;
@@ -339,6 +338,55 @@ pg_euctw_dsplen(const unsigned char *s)
339
338
return len ;
340
339
}
341
340
341
+ /*
342
+ * Convert pg_wchar to EUC_* encoding.
343
+ * caller must allocate enough space for "to", including a trailing zero!
344
+ * len: length of from.
345
+ * "from" not necessarily null terminated.
346
+ */
347
+ static int
348
+ pg_wchar2euc_with_len (const pg_wchar * from , unsigned char * to , int len )
349
+ {
350
+ int cnt = 0 ;
351
+
352
+ while (len > 0 && * from )
353
+ {
354
+ unsigned char c ;
355
+
356
+ if ((c = (* from >> 24 )))
357
+ {
358
+ * to ++ = c ;
359
+ * to ++ = (* from >> 16 ) & 0xff ;
360
+ * to ++ = (* from >> 8 ) & 0xff ;
361
+ * to ++ = * from & 0xff ;
362
+ cnt += 4 ;
363
+ }
364
+ else if ((c = (* from >> 16 )))
365
+ {
366
+ * to ++ = c ;
367
+ * to ++ = (* from >> 8 ) & 0xff ;
368
+ * to ++ = * from & 0xff ;
369
+ cnt += 3 ;
370
+ }
371
+ else if ((c = (* from >> 8 )))
372
+ {
373
+ * to ++ = c ;
374
+ * to ++ = * from & 0xff ;
375
+ cnt += 2 ;
376
+ }
377
+ else
378
+ {
379
+ * to ++ = * from ;
380
+ cnt ++ ;
381
+ }
382
+ from ++ ;
383
+ len -- ;
384
+ }
385
+ * to = 0 ;
386
+ return cnt ;
387
+ }
388
+
389
+
342
390
/*
343
391
* JOHAB
344
392
*/
@@ -453,6 +501,31 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
453
501
return utf8string ;
454
502
}
455
503
504
+ /*
505
+ * Trivial conversion from pg_wchar to UTF-8.
506
+ * caller should allocate enough space for "to"
507
+ * len: length of from.
508
+ * "from" not necessarily null terminated.
509
+ */
510
+ static int
511
+ pg_wchar2utf_with_len (const pg_wchar * from , unsigned char * to , int len )
512
+ {
513
+ int cnt = 0 ;
514
+
515
+ while (len > 0 && * from )
516
+ {
517
+ int char_len ;
518
+
519
+ unicode_to_utf8 (* from , to );
520
+ char_len = pg_utf_mblen (to );
521
+ cnt += char_len ;
522
+ to += char_len ;
523
+ from ++ ;
524
+ len -- ;
525
+ }
526
+ * to = 0 ;
527
+ return cnt ;
528
+ }
456
529
457
530
/*
458
531
* Return the byte length of a UTF8 character pointed to by s
@@ -717,6 +790,77 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
717
790
return cnt ;
718
791
}
719
792
793
+ /*
794
+ * convert pg_wchar to mule internal code
795
+ * caller should allocate enough space for "to"
796
+ * len: length of from.
797
+ * "from" not necessarily null terminated.
798
+ */
799
+ static int
800
+ pg_wchar2mule_with_len (const pg_wchar * from , unsigned char * to , int len )
801
+ {
802
+ int cnt = 0 ;
803
+
804
+ while (len > 0 && * from )
805
+ {
806
+ unsigned char lb ;
807
+
808
+ lb = (* from >> 16 ) & 0xff ;
809
+ if (IS_LC1 (lb ))
810
+ {
811
+ * to ++ = lb ;
812
+ * to ++ = * from & 0xff ;
813
+ cnt += 2 ;
814
+ }
815
+ else if (IS_LC2 (lb ))
816
+ {
817
+ * to ++ = lb ;
818
+ * to ++ = (* from >> 8 ) & 0xff ;
819
+ * to ++ = * from & 0xff ;
820
+ cnt += 3 ;
821
+ }
822
+ else if (IS_LCPRV1_A_RANGE (lb ))
823
+ {
824
+ * to ++ = LCPRV1_A ;
825
+ * to ++ = lb ;
826
+ * to ++ = * from & 0xff ;
827
+ cnt += 3 ;
828
+ }
829
+ else if (IS_LCPRV1_B_RANGE (lb ))
830
+ {
831
+ * to ++ = LCPRV1_B ;
832
+ * to ++ = lb ;
833
+ * to ++ = * from & 0xff ;
834
+ cnt += 3 ;
835
+ }
836
+ else if (IS_LCPRV2_A_RANGE (lb ))
837
+ {
838
+ * to ++ = LCPRV2_A ;
839
+ * to ++ = lb ;
840
+ * to ++ = (* from >> 8 ) & 0xff ;
841
+ * to ++ = * from & 0xff ;
842
+ cnt += 4 ;
843
+ }
844
+ else if (IS_LCPRV2_B_RANGE (lb ))
845
+ {
846
+ * to ++ = LCPRV2_B ;
847
+ * to ++ = lb ;
848
+ * to ++ = (* from >> 8 ) & 0xff ;
849
+ * to ++ = * from & 0xff ;
850
+ cnt += 4 ;
851
+ }
852
+ else
853
+ {
854
+ * to ++ = * from & 0xff ;
855
+ cnt += 1 ;
856
+ }
857
+ from ++ ;
858
+ len -- ;
859
+ }
860
+ * to = 0 ;
861
+ return cnt ;
862
+ }
863
+
720
864
int
721
865
pg_mule_mblen (const unsigned char * s )
722
866
{
@@ -772,6 +916,28 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
772
916
return cnt ;
773
917
}
774
918
919
+ /*
920
+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
921
+ * high bits.
922
+ * caller should allocate enough space for "to"
923
+ * len: length of from.
924
+ * "from" not necessarily null terminated.
925
+ */
926
+ static int
927
+ pg_wchar2single_with_len (const pg_wchar * from , unsigned char * to , int len )
928
+ {
929
+ int cnt = 0 ;
930
+
931
+ while (len > 0 && * from )
932
+ {
933
+ * to ++ = * from ++ ;
934
+ len -- ;
935
+ cnt ++ ;
936
+ }
937
+ * to = 0 ;
938
+ return cnt ;
939
+ }
940
+
775
941
static int
776
942
pg_latin1_mblen (const unsigned char * s )
777
943
{
@@ -1339,48 +1505,48 @@ pg_utf8_islegal(const unsigned char *source, int length)
1339
1505
*-------------------------------------------------------------------
1340
1506
*/
1341
1507
pg_wchar_tbl pg_wchar_table [] = {
1342
- {pg_ascii2wchar_with_len , pg_ascii_mblen , pg_ascii_dsplen , pg_ascii_verifier , 1 }, /* PG_SQL_ASCII */
1343
- {pg_eucjp2wchar_with_len , pg_eucjp_mblen , pg_eucjp_dsplen , pg_eucjp_verifier , 3 }, /* PG_EUC_JP */
1344
- {pg_euccn2wchar_with_len , pg_euccn_mblen , pg_euccn_dsplen , pg_euccn_verifier , 2 }, /* PG_EUC_CN */
1345
- {pg_euckr2wchar_with_len , pg_euckr_mblen , pg_euckr_dsplen , pg_euckr_verifier , 3 }, /* PG_EUC_KR */
1346
- {pg_euctw2wchar_with_len , pg_euctw_mblen , pg_euctw_dsplen , pg_euctw_verifier , 4 }, /* PG_EUC_TW */
1347
- {pg_eucjp2wchar_with_len , pg_eucjp_mblen , pg_eucjp_dsplen , pg_eucjp_verifier , 3 }, /* PG_EUC_JIS_2004 */
1348
- {pg_utf2wchar_with_len , pg_utf_mblen , pg_utf_dsplen , pg_utf8_verifier , 4 }, /* PG_UTF8 */
1349
- {pg_mule2wchar_with_len , pg_mule_mblen , pg_mule_dsplen , pg_mule_verifier , 4 }, /* PG_MULE_INTERNAL */
1350
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN1 */
1351
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN2 */
1352
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN3 */
1353
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN4 */
1354
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN5 */
1355
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN6 */
1356
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN7 */
1357
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN8 */
1358
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN9 */
1359
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN10 */
1360
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1256 */
1361
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1258 */
1362
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN866 */
1363
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN874 */
1364
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_KOI8R */
1365
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1251 */
1366
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1252 */
1367
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-5 */
1368
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-6 */
1369
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-7 */
1370
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-8 */
1371
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1250 */
1372
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1253 */
1373
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1254 */
1374
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1255 */
1375
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1257 */
1376
- {pg_latin12wchar_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_KOI8U */
1377
- {0 , pg_sjis_mblen , pg_sjis_dsplen , pg_sjis_verifier , 2 }, /* PG_SJIS */
1378
- {0 , pg_big5_mblen , pg_big5_dsplen , pg_big5_verifier , 2 }, /* PG_BIG5 */
1379
- {0 , pg_gbk_mblen , pg_gbk_dsplen , pg_gbk_verifier , 2 }, /* PG_GBK */
1380
- {0 , pg_uhc_mblen , pg_uhc_dsplen , pg_uhc_verifier , 2 }, /* PG_UHC */
1381
- {0 , pg_gb18030_mblen , pg_gb18030_dsplen , pg_gb18030_verifier , 4 }, /* PG_GB18030 */
1382
- {0 , pg_johab_mblen , pg_johab_dsplen , pg_johab_verifier , 3 }, /* PG_JOHAB */
1383
- {0 , pg_sjis_mblen , pg_sjis_dsplen , pg_sjis_verifier , 2 } /* PG_SHIFT_JIS_2004 */
1508
+ {pg_ascii2wchar_with_len , pg_wchar2single_with_len , pg_ascii_mblen , pg_ascii_dsplen , pg_ascii_verifier , 1 }, /* PG_SQL_ASCII */
1509
+ {pg_eucjp2wchar_with_len , pg_wchar2euc_with_len , pg_eucjp_mblen , pg_eucjp_dsplen , pg_eucjp_verifier , 3 }, /* PG_EUC_JP */
1510
+ {pg_euccn2wchar_with_len , pg_wchar2euc_with_len , pg_euccn_mblen , pg_euccn_dsplen , pg_euccn_verifier , 2 }, /* PG_EUC_CN */
1511
+ {pg_euckr2wchar_with_len , pg_wchar2euc_with_len , pg_euckr_mblen , pg_euckr_dsplen , pg_euckr_verifier , 3 }, /* PG_EUC_KR */
1512
+ {pg_euctw2wchar_with_len , pg_wchar2euc_with_len , pg_euctw_mblen , pg_euctw_dsplen , pg_euctw_verifier , 4 }, /* PG_EUC_TW */
1513
+ {pg_eucjp2wchar_with_len , pg_wchar2euc_with_len , pg_eucjp_mblen , pg_eucjp_dsplen , pg_eucjp_verifier , 3 }, /* PG_EUC_JIS_2004 */
1514
+ {pg_utf2wchar_with_len , pg_wchar2utf_with_len , pg_utf_mblen , pg_utf_dsplen , pg_utf8_verifier , 4 }, /* PG_UTF8 */
1515
+ {pg_mule2wchar_with_len , pg_wchar2mule_with_len , pg_mule_mblen , pg_mule_dsplen , pg_mule_verifier , 4 }, /* PG_MULE_INTERNAL */
1516
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN1 */
1517
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN2 */
1518
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN3 */
1519
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN4 */
1520
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN5 */
1521
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN6 */
1522
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN7 */
1523
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN8 */
1524
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN9 */
1525
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_LATIN10 */
1526
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1256 */
1527
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1258 */
1528
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN866 */
1529
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN874 */
1530
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_KOI8R */
1531
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1251 */
1532
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1252 */
1533
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-5 */
1534
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-6 */
1535
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-7 */
1536
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* ISO-8859-8 */
1537
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1250 */
1538
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1253 */
1539
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1254 */
1540
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1255 */
1541
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_WIN1257 */
1542
+ {pg_latin12wchar_with_len , pg_wchar2single_with_len , pg_latin1_mblen , pg_latin1_dsplen , pg_latin1_verifier , 1 }, /* PG_KOI8U */
1543
+ {0 , 0 , pg_sjis_mblen , pg_sjis_dsplen , pg_sjis_verifier , 2 }, /* PG_SJIS */
1544
+ {0 , 0 , pg_big5_mblen , pg_big5_dsplen , pg_big5_verifier , 2 }, /* PG_BIG5 */
1545
+ {0 , 0 , pg_gbk_mblen , pg_gbk_dsplen , pg_gbk_verifier , 2 }, /* PG_GBK */
1546
+ {0 , 0 , pg_uhc_mblen , pg_uhc_dsplen , pg_uhc_verifier , 2 }, /* PG_UHC */
1547
+ {0 , 0 , pg_gb18030_mblen , pg_gb18030_dsplen , pg_gb18030_verifier , 4 }, /* PG_GB18030 */
1548
+ {0 , 0 , pg_johab_mblen , pg_johab_dsplen , pg_johab_verifier , 3 }, /* PG_JOHAB */
1549
+ {0 , 0 , pg_sjis_mblen , pg_sjis_dsplen , pg_sjis_verifier , 2 } /* PG_SHIFT_JIS_2004 */
1384
1550
};
1385
1551
1386
1552
/* returns the byte length of a word for mule internal code */
0 commit comments