Skip to content

Commit 188065c

Browse files
committed
Unicode conversion fix suggested by Jan Varga...
-------------------------------------------------- Subject: Bug in unicode conversion ... From: Jan Varga <varga@utcru.sk> To: t-ishii@sra.co.jp Date: Sat, 18 Nov 2000 17:41:20 +0100 (CET) Hi, I tried this new feature in PostgreSQL. I found one bug. Script UCS_to_8859.pl skips input lines which 1. code <0x80 or 2. ucs <0x100 I think second one is not good idea because some codes in ISO8859-2 have ucs <0x100 (e.g. 0xE9 - 0x00E9) --------------------------------------------------
1 parent e3269ca commit 188065c

9 files changed

+565
-11
lines changed

src/backend/utils/mb/Unicode/UCS_to_8859.pl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# Copyright 2001 by PostgreSQL Global Development Group
44
#
5-
# $Id: UCS_to_8859.pl,v 1.1 2000/10/30 10:40:29 ishii Exp $
5+
# $Id: UCS_to_8859.pl,v 1.2 2000/11/26 10:40:43 ishii Exp $
66
#
77
# Generate UTF-8 <--> ISO8859 code conversion tables from
88
# map files provided by Unicode organization.
@@ -36,7 +36,7 @@
3636
( $c, $u, $rest ) = split;
3737
$ucs = hex($u);
3838
$code = hex($c);
39-
if( $code >= 0x80 && $ucs >= 0x100 ){
39+
if( $code >= 0x80){
4040
$utf = &ucs2utf($ucs);
4141
if( $array{ $utf } ne "" ){
4242
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
@@ -80,7 +80,7 @@
8080
( $c, $u, $rest ) = split;
8181
$ucs = hex($u);
8282
$code = hex($c);
83-
if( $code >= 0x80 && $ucs >= 0x100 ){
83+
if($code >= 0x80){
8484
$utf = &ucs2utf($ucs);
8585
if( $array{ $utf } ne "" ){
8686
printf STDERR "Warning: duplicate unicode: %04x\n",$ucs;
Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,61 @@
1-
static pg_local_to_utf LUmapISO8859_2[ 57 ] = {
1+
static pg_local_to_utf LUmapISO8859_2[ 128 ] = {
2+
{0x0080, 0xc280},
3+
{0x0081, 0xc281},
4+
{0x0082, 0xc282},
5+
{0x0083, 0xc283},
6+
{0x0084, 0xc284},
7+
{0x0085, 0xc285},
8+
{0x0086, 0xc286},
9+
{0x0087, 0xc287},
10+
{0x0088, 0xc288},
11+
{0x0089, 0xc289},
12+
{0x008a, 0xc28a},
13+
{0x008b, 0xc28b},
14+
{0x008c, 0xc28c},
15+
{0x008d, 0xc28d},
16+
{0x008e, 0xc28e},
17+
{0x008f, 0xc28f},
18+
{0x0090, 0xc290},
19+
{0x0091, 0xc291},
20+
{0x0092, 0xc292},
21+
{0x0093, 0xc293},
22+
{0x0094, 0xc294},
23+
{0x0095, 0xc295},
24+
{0x0096, 0xc296},
25+
{0x0097, 0xc297},
26+
{0x0098, 0xc298},
27+
{0x0099, 0xc299},
28+
{0x009a, 0xc29a},
29+
{0x009b, 0xc29b},
30+
{0x009c, 0xc29c},
31+
{0x009d, 0xc29d},
32+
{0x009e, 0xc29e},
33+
{0x009f, 0xc29f},
34+
{0x00a0, 0xc2a0},
235
{0x00a1, 0xc484},
336
{0x00a2, 0xcb98},
437
{0x00a3, 0xc581},
38+
{0x00a4, 0xc2a4},
539
{0x00a5, 0xc4bd},
640
{0x00a6, 0xc59a},
41+
{0x00a7, 0xc2a7},
42+
{0x00a8, 0xc2a8},
743
{0x00a9, 0xc5a0},
844
{0x00aa, 0xc59e},
945
{0x00ab, 0xc5a4},
1046
{0x00ac, 0xc5b9},
47+
{0x00ad, 0xc2ad},
1148
{0x00ae, 0xc5bd},
1249
{0x00af, 0xc5bb},
50+
{0x00b0, 0xc2b0},
1351
{0x00b1, 0xc485},
1452
{0x00b2, 0xcb9b},
1553
{0x00b3, 0xc582},
54+
{0x00b4, 0xc2b4},
1655
{0x00b5, 0xc4be},
1756
{0x00b6, 0xc59b},
1857
{0x00b7, 0xcb87},
58+
{0x00b8, 0xc2b8},
1959
{0x00b9, 0xc5a1},
2060
{0x00ba, 0xc59f},
2161
{0x00bb, 0xc5a5},
@@ -24,36 +64,67 @@ static pg_local_to_utf LUmapISO8859_2[ 57 ] = {
2464
{0x00be, 0xc5be},
2565
{0x00bf, 0xc5bc},
2666
{0x00c0, 0xc594},
67+
{0x00c1, 0xc381},
68+
{0x00c2, 0xc382},
2769
{0x00c3, 0xc482},
70+
{0x00c4, 0xc384},
2871
{0x00c5, 0xc4b9},
2972
{0x00c6, 0xc486},
73+
{0x00c7, 0xc387},
3074
{0x00c8, 0xc48c},
75+
{0x00c9, 0xc389},
3176
{0x00ca, 0xc498},
77+
{0x00cb, 0xc38b},
3278
{0x00cc, 0xc49a},
79+
{0x00cd, 0xc38d},
80+
{0x00ce, 0xc38e},
3381
{0x00cf, 0xc48e},
3482
{0x00d0, 0xc490},
3583
{0x00d1, 0xc583},
3684
{0x00d2, 0xc587},
85+
{0x00d3, 0xc393},
86+
{0x00d4, 0xc394},
3787
{0x00d5, 0xc590},
88+
{0x00d6, 0xc396},
89+
{0x00d7, 0xc397},
3890
{0x00d8, 0xc598},
3991
{0x00d9, 0xc5ae},
92+
{0x00da, 0xc39a},
4093
{0x00db, 0xc5b0},
94+
{0x00dc, 0xc39c},
95+
{0x00dd, 0xc39d},
4196
{0x00de, 0xc5a2},
97+
{0x00df, 0xc39f},
4298
{0x00e0, 0xc595},
99+
{0x00e1, 0xc3a1},
100+
{0x00e2, 0xc3a2},
43101
{0x00e3, 0xc483},
102+
{0x00e4, 0xc3a4},
44103
{0x00e5, 0xc4ba},
45104
{0x00e6, 0xc487},
105+
{0x00e7, 0xc3a7},
46106
{0x00e8, 0xc48d},
107+
{0x00e9, 0xc3a9},
47108
{0x00ea, 0xc499},
109+
{0x00eb, 0xc3ab},
48110
{0x00ec, 0xc49b},
111+
{0x00ed, 0xc3ad},
112+
{0x00ee, 0xc3ae},
49113
{0x00ef, 0xc48f},
50114
{0x00f0, 0xc491},
51115
{0x00f1, 0xc584},
52116
{0x00f2, 0xc588},
117+
{0x00f3, 0xc3b3},
118+
{0x00f4, 0xc3b4},
53119
{0x00f5, 0xc591},
120+
{0x00f6, 0xc3b6},
121+
{0x00f7, 0xc3b7},
54122
{0x00f8, 0xc599},
55123
{0x00f9, 0xc5af},
124+
{0x00fa, 0xc3ba},
56125
{0x00fb, 0xc5b1},
126+
{0x00fc, 0xc3bc},
127+
{0x00fd, 0xc3bd},
57128
{0x00fe, 0xc5a3},
58129
{0x00ff, 0xcb99}
59130
};

src/backend/utils/mb/Unicode/iso8859_3_to_utf8.map

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,122 @@
1-
static pg_local_to_utf LUmapISO8859_3[ 28 ] = {
1+
static pg_local_to_utf LUmapISO8859_3[ 121 ] = {
2+
{0x0080, 0xc280},
3+
{0x0081, 0xc281},
4+
{0x0082, 0xc282},
5+
{0x0083, 0xc283},
6+
{0x0084, 0xc284},
7+
{0x0085, 0xc285},
8+
{0x0086, 0xc286},
9+
{0x0087, 0xc287},
10+
{0x0088, 0xc288},
11+
{0x0089, 0xc289},
12+
{0x008a, 0xc28a},
13+
{0x008b, 0xc28b},
14+
{0x008c, 0xc28c},
15+
{0x008d, 0xc28d},
16+
{0x008e, 0xc28e},
17+
{0x008f, 0xc28f},
18+
{0x0090, 0xc290},
19+
{0x0091, 0xc291},
20+
{0x0092, 0xc292},
21+
{0x0093, 0xc293},
22+
{0x0094, 0xc294},
23+
{0x0095, 0xc295},
24+
{0x0096, 0xc296},
25+
{0x0097, 0xc297},
26+
{0x0098, 0xc298},
27+
{0x0099, 0xc299},
28+
{0x009a, 0xc29a},
29+
{0x009b, 0xc29b},
30+
{0x009c, 0xc29c},
31+
{0x009d, 0xc29d},
32+
{0x009e, 0xc29e},
33+
{0x009f, 0xc29f},
34+
{0x00a0, 0xc2a0},
235
{0x00a1, 0xc4a6},
336
{0x00a2, 0xcb98},
37+
{0x00a3, 0xc2a3},
38+
{0x00a4, 0xc2a4},
439
{0x00a6, 0xc4a4},
40+
{0x00a7, 0xc2a7},
41+
{0x00a8, 0xc2a8},
542
{0x00a9, 0xc4b0},
643
{0x00aa, 0xc59e},
744
{0x00ab, 0xc49e},
845
{0x00ac, 0xc4b4},
46+
{0x00ad, 0xc2ad},
947
{0x00af, 0xc5bb},
48+
{0x00b0, 0xc2b0},
1049
{0x00b1, 0xc4a7},
50+
{0x00b2, 0xc2b2},
51+
{0x00b3, 0xc2b3},
52+
{0x00b4, 0xc2b4},
53+
{0x00b5, 0xc2b5},
1154
{0x00b6, 0xc4a5},
55+
{0x00b7, 0xc2b7},
56+
{0x00b8, 0xc2b8},
1257
{0x00b9, 0xc4b1},
1358
{0x00ba, 0xc59f},
1459
{0x00bb, 0xc49f},
1560
{0x00bc, 0xc4b5},
61+
{0x00bd, 0xc2bd},
1662
{0x00bf, 0xc5bc},
63+
{0x00c0, 0xc380},
64+
{0x00c1, 0xc381},
65+
{0x00c2, 0xc382},
66+
{0x00c4, 0xc384},
1767
{0x00c5, 0xc48a},
1868
{0x00c6, 0xc488},
69+
{0x00c7, 0xc387},
70+
{0x00c8, 0xc388},
71+
{0x00c9, 0xc389},
72+
{0x00ca, 0xc38a},
73+
{0x00cb, 0xc38b},
74+
{0x00cc, 0xc38c},
75+
{0x00cd, 0xc38d},
76+
{0x00ce, 0xc38e},
77+
{0x00cf, 0xc38f},
78+
{0x00d1, 0xc391},
79+
{0x00d2, 0xc392},
80+
{0x00d3, 0xc393},
81+
{0x00d4, 0xc394},
1982
{0x00d5, 0xc4a0},
83+
{0x00d6, 0xc396},
84+
{0x00d7, 0xc397},
2085
{0x00d8, 0xc49c},
86+
{0x00d9, 0xc399},
87+
{0x00da, 0xc39a},
88+
{0x00db, 0xc39b},
89+
{0x00dc, 0xc39c},
2190
{0x00dd, 0xc5ac},
2291
{0x00de, 0xc59c},
92+
{0x00df, 0xc39f},
93+
{0x00e0, 0xc3a0},
94+
{0x00e1, 0xc3a1},
95+
{0x00e2, 0xc3a2},
96+
{0x00e4, 0xc3a4},
2397
{0x00e5, 0xc48b},
2498
{0x00e6, 0xc489},
99+
{0x00e7, 0xc3a7},
100+
{0x00e8, 0xc3a8},
101+
{0x00e9, 0xc3a9},
102+
{0x00ea, 0xc3aa},
103+
{0x00eb, 0xc3ab},
104+
{0x00ec, 0xc3ac},
105+
{0x00ed, 0xc3ad},
106+
{0x00ee, 0xc3ae},
107+
{0x00ef, 0xc3af},
108+
{0x00f1, 0xc3b1},
109+
{0x00f2, 0xc3b2},
110+
{0x00f3, 0xc3b3},
111+
{0x00f4, 0xc3b4},
25112
{0x00f5, 0xc4a1},
113+
{0x00f6, 0xc3b6},
114+
{0x00f7, 0xc3b7},
26115
{0x00f8, 0xc49d},
116+
{0x00f9, 0xc3b9},
117+
{0x00fa, 0xc3ba},
118+
{0x00fb, 0xc3bb},
119+
{0x00fc, 0xc3bc},
27120
{0x00fd, 0xc5ad},
28121
{0x00fe, 0xc59d},
29122
{0x00ff, 0xcb99}

0 commit comments

Comments
 (0)