32
32
33
33
#include "unicode_table_cns11643.h"
34
34
35
+ static int mbfl_filt_conv_euctw_wchar_flush (mbfl_convert_filter * filter );
36
+
35
37
static const unsigned char mblen_table_euctw [] = { /* 0xA1-0xFE */
36
38
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
37
39
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
@@ -71,7 +73,7 @@ const struct mbfl_convert_vtbl vtbl_euctw_wchar = {
71
73
mbfl_filt_conv_common_ctor ,
72
74
NULL ,
73
75
mbfl_filt_conv_euctw_wchar ,
74
- mbfl_filt_conv_common_flush ,
76
+ mbfl_filt_conv_euctw_wchar_flush ,
75
77
NULL ,
76
78
};
77
79
@@ -87,117 +89,97 @@ const struct mbfl_convert_vtbl vtbl_wchar_euctw = {
87
89
88
90
#define CK (statement ) do { if ((statement) < 0) return (-1); } while (0)
89
91
90
- /*
91
- * EUC-TW => wchar
92
- */
93
- int
94
- mbfl_filt_conv_euctw_wchar (int c , mbfl_convert_filter * filter )
92
+ int mbfl_filt_conv_euctw_wchar (int c , mbfl_convert_filter * filter )
95
93
{
96
- int c1 , s , w , plane ;
94
+ int c1 , s , w ;
97
95
98
96
switch (filter -> status ) {
99
97
case 0 :
100
- if (c >= 0 && c < 0x80 ) { /* latin */
98
+ if (c >= 0 && c < 0x80 ) { /* latin */
101
99
CK ((* filter -> output_function )(c , filter -> data ));
102
- } else if (c > 0xa0 && c < 0xff ) { /* dbcs first byte */
100
+ } else if ((( c >= 0xA1 && c <= 0xA6 ) || ( c >= 0xC2 && c <= 0xFD )) && c != 0xC3 ) { /* 2-byte character, first byte */
103
101
filter -> status = 1 ;
104
102
filter -> cache = c ;
105
- } else if (c == 0x8e ) { /* mbcs first byte */
103
+ } else if (c == 0x8E ) { /* 4-byte character, first byte */
106
104
filter -> status = 2 ;
107
- filter -> cache = c ;
108
105
} else {
109
- w = c & MBFL_WCSGROUP_MASK ;
110
- w |= MBFL_WCSGROUP_THROUGH ;
111
- CK ((* filter -> output_function )(w , filter -> data ));
106
+ CK ((* filter -> output_function )(c | MBFL_WCSGROUP_THROUGH , filter -> data ));
112
107
}
113
108
break ;
114
109
115
- case 1 : /* mbcs second byte */
110
+ case 1 : /* 2-byte character, second byte */
116
111
filter -> status = 0 ;
117
112
c1 = filter -> cache ;
118
- if (c > 0xa0 && c < 0xff ) {
119
- w = (c1 - 0xa1 )* 94 + (c - 0xa1 );
113
+ if (c > 0xA0 && c < 0xFF ) {
114
+ w = (c1 - 0xA1 )* 94 + (c - 0xA1 );
120
115
if (w >= 0 && w < cns11643_1_ucs_table_size ) {
121
116
w = cns11643_1_ucs_table [w ];
122
117
} else {
123
118
w = 0 ;
124
119
}
125
120
if (w <= 0 ) {
126
- w = (c1 << 8 ) | c ;
127
- w &= MBFL_WCSPLANE_MASK ;
128
- w |= MBFL_WCSPLANE_CNS11643 ;
121
+ w = (c1 << 8 ) | c | MBFL_WCSPLANE_CNS11643 ;
129
122
}
130
123
CK ((* filter -> output_function )(w , filter -> data ));
131
- } else if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
132
- CK ((* filter -> output_function )(c , filter -> data ));
133
124
} else {
134
- w = (c1 << 8 ) | c ;
135
- w &= MBFL_WCSGROUP_MASK ;
136
- w |= MBFL_WCSGROUP_THROUGH ;
125
+ filter -> status = filter -> cache = 0 ;
126
+ w = (c1 << 8 ) | c | MBFL_WCSGROUP_THROUGH ;
137
127
CK ((* filter -> output_function )(w , filter -> data ));
138
128
}
139
129
break ;
140
130
141
- case 2 : /* got 0x8e, first char */
142
- c1 = filter -> cache ;
143
- if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
144
- CK ((* filter -> output_function )(c , filter -> data ));
145
- filter -> status = 0 ;
146
- } else if (c > 0xa0 && c < 0xaf ) {
131
+ case 2 : /* got 0x8e, second byte */
132
+ if (c == 0xA1 || c == 0xA2 || c == 0xAE ) {
147
133
filter -> status = 3 ;
148
- filter -> cache = c - 0xa1 ;
134
+ filter -> cache = c - 0xA1 ;
149
135
} else {
150
- w = (c1 << 8 ) | c ;
151
- w &= MBFL_WCSGROUP_MASK ;
152
- w |= MBFL_WCSGROUP_THROUGH ;
136
+ filter -> status = filter -> cache = 0 ;
137
+ w = 0x8E00 | c | MBFL_WCSGROUP_THROUGH ;
153
138
CK ((* filter -> output_function )(w , filter -> data ));
154
139
}
155
140
break ;
156
141
157
- case 3 : /* got 0x8e, third char */
142
+ case 3 : /* got 0x8e, third byte */
158
143
filter -> status = 0 ;
159
144
c1 = filter -> cache ;
160
- if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
161
- CK ((* filter -> output_function )(c , filter -> data ));
162
- filter -> status = 0 ;
163
- } else if (c > 0xa0 && c < 0xff ) {
145
+ if (c >= 0xA1 && ((c1 == 0 && ((c >= 0xA1 && c <= 0xA6 ) || (c >= 0xC2 && c <= 0xFD )) && c != 0xC3 ) ||
146
+ (c1 == 1 && c <= 0xF2 ) || (c1 == 13 && c <= 0xE7 ))) {
164
147
filter -> status = 4 ;
165
- filter -> cache = (c1 << 8 ) + c - 0xa1 ;
148
+ filter -> cache = (c1 << 8 ) + c - 0xA1 ;
166
149
} else {
167
- w = (c1 << 8 ) | c ;
168
- w &= MBFL_WCSGROUP_MASK ;
169
- w |= MBFL_WCSGROUP_THROUGH ;
150
+ filter -> status = filter -> cache = 0 ;
151
+ w = (c1 << 8 ) | c | MBFL_WCSGROUP_THROUGH ;
170
152
CK ((* filter -> output_function )(w , filter -> data ));
171
153
}
172
154
break ;
173
155
174
- case 4 : /* mbcs fourth char */
156
+ case 4 : /* multi-byte character, fourth byte */
175
157
filter -> status = 0 ;
176
158
c1 = filter -> cache ;
177
- if (c1 >= 0x100 && c1 <= 0xdff && c > 0xa0 && c < 0xff ) {
178
- plane = (c1 & 0xf00 ) >> 8 ;
179
- s = (c1 & 0xff )* 94 + c - 0xa1 ;
159
+ if (c1 <= 0xDFF && c > 0xA0 && c < 0xFF ) {
160
+ int plane = (c1 & 0xF00 ) >> 8 ; /* This is actually the CNS-11643 plane minus one */
161
+ s = (c1 & 0xFF )* 94 + c - 0xA1 ;
180
162
w = 0 ;
181
163
if (s >= 0 ) {
182
- if (plane == 1 && s < cns11643_2_ucs_table_size ) {
164
+ /* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3",
165
+ * and added tens of thousands more characters in planes 4, 5, 6, and 7
166
+ * We only support the older version of CNS-11643
167
+ * This is the same as iconv from glibc 2.2 */
168
+ if (plane == 0 && s < cns11643_1_ucs_table_size ) {
169
+ w = cns11643_1_ucs_table [s ];
170
+ } else if (plane == 1 && s < cns11643_2_ucs_table_size ) {
183
171
w = cns11643_2_ucs_table [s ];
184
- }
185
- if (plane == 13 && s < cns11643_14_ucs_table_size ) {
172
+ } else if (plane == 13 && s < cns11643_14_ucs_table_size ) {
186
173
w = cns11643_14_ucs_table [s ];
187
174
}
188
175
}
189
176
if (w <= 0 ) {
190
- w = ((c1 & 0x7f ) << 8 ) | (c & 0x7f );
191
- w &= MBFL_WCSPLANE_MASK ;
192
- w |= MBFL_WCSPLANE_CNS11643 ;
177
+ w = ((c1 & 0x7F ) << 8 ) | (c & 0x7F ) | MBFL_WCSPLANE_CNS11643 ;
193
178
}
194
179
CK ((* filter -> output_function )(w , filter -> data ));
195
- } else if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
196
- CK ((* filter -> output_function )(c , filter -> data ));
197
180
} else {
198
- w = (c1 << 8 ) | c | 0x8e0000 ;
199
- w &= MBFL_WCSGROUP_MASK ;
200
- w |= MBFL_WCSGROUP_THROUGH ;
181
+ filter -> status = filter -> cache = 0 ;
182
+ w = (c1 << 8 ) | c | 0x8e0000 | MBFL_WCSGROUP_THROUGH ;
201
183
CK ((* filter -> output_function )(w , filter -> data ));
202
184
}
203
185
break ;
@@ -210,15 +192,10 @@ mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter)
210
192
return c ;
211
193
}
212
194
213
- /*
214
- * wchar => EUC-TW
215
- */
216
- int
217
- mbfl_filt_conv_wchar_euctw (int c , mbfl_convert_filter * filter )
195
+ int mbfl_filt_conv_wchar_euctw (int c , mbfl_convert_filter * filter )
218
196
{
219
- int c1 , s , plane ;
197
+ int s = 0 ;
220
198
221
- s = 0 ;
222
199
if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max ) {
223
200
s = ucs_a1_cns11643_table [c - ucs_a1_cns11643_table_min ];
224
201
} else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max ) {
@@ -230,36 +207,48 @@ mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter)
230
207
} else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max ) {
231
208
s = ucs_r_cns11643_table [c - ucs_r_cns11643_table_min ];
232
209
}
210
+
233
211
if (s <= 0 ) {
234
- c1 = c & ~MBFL_WCSPLANE_MASK ;
235
- if (c1 == MBFL_WCSPLANE_CNS11643 ) {
236
- s = c & MBFL_WCSPLANE_MASK ;
237
- }
238
212
if (c == 0 ) {
239
213
s = 0 ;
240
214
} else if (s <= 0 ) {
241
215
s = -1 ;
242
216
}
243
217
}
218
+
244
219
if (s >= 0 ) {
245
- plane = (s & 0x1f0000 ) >> 16 ;
246
- if (plane <= 1 ){
247
- if (s < 0x80 ) { /* latin */
220
+ int plane = (s & 0x1F0000 ) >> 16 ;
221
+ if (plane <= 1 ) {
222
+ if (s < 0x80 ) { /* latin */
248
223
CK ((* filter -> output_function )(s , filter -> data ));
249
224
} else {
250
- s = (s & 0xffff ) | 0x8080 ;
251
- CK ((* filter -> output_function )((s >> 8 ) & 0xff , filter -> data ));
252
- CK ((* filter -> output_function )(s & 0xff , filter -> data ));
225
+ s = (s & 0xFFFF ) | 0x8080 ;
226
+ CK ((* filter -> output_function )((s >> 8 ) & 0xFF , filter -> data ));
227
+ CK ((* filter -> output_function )(s & 0xFF , filter -> data ));
253
228
}
254
229
} else {
255
- s = (0x8ea00000 + (plane << 16 )) | ((s & 0xffff ) | 0x8080 );
230
+ s = (0x8EA00000 + (plane << 16 )) | ((s & 0xFFFF ) | 0x8080 );
256
231
CK ((* filter -> output_function )(0x8e , filter -> data ));
257
- CK ((* filter -> output_function )((s >> 16 ) & 0xff , filter -> data ));
258
- CK ((* filter -> output_function )((s >> 8 ) & 0xff , filter -> data ));
259
- CK ((* filter -> output_function )(s & 0xff , filter -> data ));
232
+ CK ((* filter -> output_function )((s >> 16 ) & 0xFF , filter -> data ));
233
+ CK ((* filter -> output_function )((s >> 8 ) & 0xFF , filter -> data ));
234
+ CK ((* filter -> output_function )(s & 0xFF , filter -> data ));
260
235
}
261
236
} else {
262
237
CK (mbfl_filt_conv_illegal_output (c , filter ));
263
238
}
264
239
return c ;
265
240
}
241
+
242
+ static int mbfl_filt_conv_euctw_wchar_flush (mbfl_convert_filter * filter )
243
+ {
244
+ if (filter -> status ) {
245
+ /* 2-byte or 4-byte character was truncated */
246
+ CK ((* filter -> output_function )(filter -> cache | MBFL_WCSGROUP_THROUGH , filter -> data ));
247
+ }
248
+
249
+ if (filter -> flush_function ) {
250
+ (* filter -> flush_function )(filter -> data );
251
+ }
252
+
253
+ return 0 ;
254
+ }
0 commit comments