@@ -76,188 +76,92 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
76
76
memcpy (dest -> u_name , ptr + 1 , exactsize - 1 );
77
77
}
78
78
79
- /*
80
- * udf_CS0toUTF8
81
- *
82
- * PURPOSE
83
- * Convert OSTA Compressed Unicode to the UTF-8 equivalent.
84
- *
85
- * PRE-CONDITIONS
86
- * utf Pointer to UTF-8 output buffer.
87
- * ocu Pointer to OSTA Compressed Unicode input buffer
88
- * of size UDF_NAME_LEN bytes.
89
- * both of type "struct ustr *"
90
- *
91
- * POST-CONDITIONS
92
- * <return> >= 0 on success.
93
- *
94
- * HISTORY
95
- * November 12, 1997 - Andrew E. Mileski
96
- * Written, tested, and released.
97
- */
98
- int udf_CS0toUTF8 (struct ustr * utf_o , const struct ustr * ocu_i )
79
+ static int udf_uni2char_utf8 (wchar_t uni ,
80
+ unsigned char * out ,
81
+ int boundlen )
99
82
{
100
- const uint8_t * ocu ;
101
- uint8_t cmp_id , ocu_len ;
102
- int i ;
103
-
104
- ocu_len = ocu_i -> u_len ;
105
- if (ocu_len == 0 ) {
106
- memset (utf_o , 0 , sizeof (struct ustr ));
107
- return 0 ;
108
- }
109
-
110
- cmp_id = ocu_i -> u_cmpID ;
111
- if (cmp_id != 8 && cmp_id != 16 ) {
112
- memset (utf_o , 0 , sizeof (struct ustr ));
113
- pr_err ("unknown compression code (%d) stri=%s\n" ,
114
- cmp_id , ocu_i -> u_name );
115
- return - EINVAL ;
116
- }
117
-
118
- ocu = ocu_i -> u_name ;
119
- utf_o -> u_len = 0 ;
120
- for (i = 0 ; (i < ocu_len ) && (utf_o -> u_len <= (UDF_NAME_LEN - 3 ));) {
121
-
122
- /* Expand OSTA compressed Unicode to Unicode */
123
- uint32_t c = ocu [i ++ ];
124
- if (cmp_id == 16 )
125
- c = (c << 8 ) | ocu [i ++ ];
126
-
127
- /* Compress Unicode to UTF-8 */
128
- if (c < 0x80U )
129
- utf_o -> u_name [utf_o -> u_len ++ ] = (uint8_t )c ;
130
- else if (c < 0x800U ) {
131
- if (utf_o -> u_len > (UDF_NAME_LEN - 4 ))
132
- break ;
133
- utf_o -> u_name [utf_o -> u_len ++ ] =
134
- (uint8_t )(0xc0 | (c >> 6 ));
135
- utf_o -> u_name [utf_o -> u_len ++ ] =
136
- (uint8_t )(0x80 | (c & 0x3f ));
137
- } else {
138
- if (utf_o -> u_len > (UDF_NAME_LEN - 5 ))
139
- break ;
140
- utf_o -> u_name [utf_o -> u_len ++ ] =
141
- (uint8_t )(0xe0 | (c >> 12 ));
142
- utf_o -> u_name [utf_o -> u_len ++ ] =
143
- (uint8_t )(0x80 |
144
- ((c >> 6 ) & 0x3f ));
145
- utf_o -> u_name [utf_o -> u_len ++ ] =
146
- (uint8_t )(0x80 | (c & 0x3f ));
147
- }
83
+ int u_len = 0 ;
84
+
85
+ if (boundlen <= 0 )
86
+ return - ENAMETOOLONG ;
87
+
88
+ if (uni < 0x80 ) {
89
+ out [u_len ++ ] = (unsigned char )uni ;
90
+ } else if (uni < 0x800 ) {
91
+ if (boundlen < 2 )
92
+ return - ENAMETOOLONG ;
93
+ out [u_len ++ ] = (unsigned char )(0xc0 | (uni >> 6 ));
94
+ out [u_len ++ ] = (unsigned char )(0x80 | (uni & 0x3f ));
95
+ } else {
96
+ if (boundlen < 3 )
97
+ return - ENAMETOOLONG ;
98
+ out [u_len ++ ] = (unsigned char )(0xe0 | (uni >> 12 ));
99
+ out [u_len ++ ] = (unsigned char )(0x80 | ((uni >> 6 ) & 0x3f ));
100
+ out [u_len ++ ] = (unsigned char )(0x80 | (uni & 0x3f ));
148
101
}
149
- utf_o -> u_cmpID = 8 ;
150
-
151
- return utf_o -> u_len ;
102
+ return u_len ;
152
103
}
153
104
154
- /*
155
- *
156
- * udf_UTF8toCS0
157
- *
158
- * PURPOSE
159
- * Convert UTF-8 to the OSTA Compressed Unicode equivalent.
160
- *
161
- * DESCRIPTION
162
- * This routine is only called by udf_lookup().
163
- *
164
- * PRE-CONDITIONS
165
- * ocu Pointer to OSTA Compressed Unicode output
166
- * buffer of size UDF_NAME_LEN bytes.
167
- * utf Pointer to UTF-8 input buffer.
168
- * utf_len Length of UTF-8 input buffer in bytes.
169
- *
170
- * POST-CONDITIONS
171
- * <return> Zero on success.
172
- *
173
- * HISTORY
174
- * November 12, 1997 - Andrew E. Mileski
175
- * Written, tested, and released.
176
- */
177
- static int udf_UTF8toCS0 (dstring * ocu , struct ustr * utf , int length )
105
+ static int udf_char2uni_utf8 (const unsigned char * in ,
106
+ int boundlen ,
107
+ wchar_t * uni )
178
108
{
179
- unsigned c , i , max_val , utf_char ;
180
- int utf_cnt , u_len , u_ch ;
109
+ unsigned int utf_char ;
110
+ unsigned char c ;
111
+ int utf_cnt , u_len ;
181
112
182
- memset (ocu , 0 , sizeof (dstring ) * length );
183
- ocu [0 ] = 8 ;
184
- max_val = 0xffU ;
185
- u_ch = 1 ;
186
-
187
- try_again :
188
- u_len = 0U ;
189
- utf_char = 0U ;
190
- utf_cnt = 0U ;
191
- for (i = 0U ; i < utf -> u_len ; i ++ ) {
192
- /* Name didn't fit? */
193
- if (u_len + 1 + u_ch >= length )
194
- return 0 ;
195
-
196
- c = (uint8_t )utf -> u_name [i ];
113
+ utf_char = 0 ;
114
+ utf_cnt = 0 ;
115
+ for (u_len = 0 ; u_len < boundlen ;) {
116
+ c = in [u_len ++ ];
197
117
198
118
/* Complete a multi-byte UTF-8 character */
199
119
if (utf_cnt ) {
200
- utf_char = (utf_char << 6 ) | (c & 0x3fU );
120
+ utf_char = (utf_char << 6 ) | (c & 0x3f );
201
121
if (-- utf_cnt )
202
122
continue ;
203
123
} else {
204
124
/* Check for a multi-byte UTF-8 character */
205
- if (c & 0x80U ) {
125
+ if (c & 0x80 ) {
206
126
/* Start a multi-byte UTF-8 character */
207
- if ((c & 0xe0U ) == 0xc0U ) {
208
- utf_char = c & 0x1fU ;
127
+ if ((c & 0xe0 ) == 0xc0 ) {
128
+ utf_char = c & 0x1f ;
209
129
utf_cnt = 1 ;
210
- } else if ((c & 0xf0U ) == 0xe0U ) {
211
- utf_char = c & 0x0fU ;
130
+ } else if ((c & 0xf0 ) == 0xe0 ) {
131
+ utf_char = c & 0x0f ;
212
132
utf_cnt = 2 ;
213
- } else if ((c & 0xf8U ) == 0xf0U ) {
214
- utf_char = c & 0x07U ;
133
+ } else if ((c & 0xf8 ) == 0xf0 ) {
134
+ utf_char = c & 0x07 ;
215
135
utf_cnt = 3 ;
216
- } else if ((c & 0xfcU ) == 0xf8U ) {
217
- utf_char = c & 0x03U ;
136
+ } else if ((c & 0xfc ) == 0xf8 ) {
137
+ utf_char = c & 0x03 ;
218
138
utf_cnt = 4 ;
219
- } else if ((c & 0xfeU ) == 0xfcU ) {
220
- utf_char = c & 0x01U ;
139
+ } else if ((c & 0xfe ) == 0xfc ) {
140
+ utf_char = c & 0x01 ;
221
141
utf_cnt = 5 ;
222
142
} else {
223
- goto error_out ;
143
+ utf_cnt = -1 ;
144
+ break ;
224
145
}
225
146
continue ;
226
147
} else {
227
148
/* Single byte UTF-8 character (most common) */
228
149
utf_char = c ;
229
150
}
230
151
}
231
-
232
- /* Choose no compression if necessary */
233
- if (utf_char > max_val ) {
234
- if (max_val == 0xffU ) {
235
- max_val = 0xffffU ;
236
- ocu [0 ] = (uint8_t )0x10U ;
237
- u_ch = 2 ;
238
- goto try_again ;
239
- }
240
- goto error_out ;
241
- }
242
-
243
- if (max_val == 0xffffU )
244
- ocu [++ u_len ] = (uint8_t )(utf_char >> 8 );
245
- ocu [++ u_len ] = (uint8_t )(utf_char & 0xffU );
152
+ * uni = utf_char ;
153
+ break ;
246
154
}
247
-
248
155
if (utf_cnt ) {
249
- error_out :
250
- ocu [++ u_len ] = '?' ;
251
- printk (KERN_DEBUG pr_fmt ("bad UTF-8 character\n" ));
156
+ * uni = '?' ;
157
+ return - EINVAL ;
252
158
}
253
-
254
- ocu [length - 1 ] = (uint8_t )u_len + 1 ;
255
-
256
- return u_len + 1 ;
159
+ return u_len ;
257
160
}
258
161
259
- static int udf_CS0toNLS (struct nls_table * nls , struct ustr * utf_o ,
260
- const struct ustr * ocu_i )
162
+ static int udf_name_from_CS0 (struct ustr * utf_o ,
163
+ const struct ustr * ocu_i ,
164
+ int (* conv_f )(wchar_t , unsigned char * , int ))
261
165
{
262
166
const uint8_t * ocu ;
263
167
uint8_t cmp_id , ocu_len ;
@@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
286
190
if (cmp_id == 16 )
287
191
c = (c << 8 ) | ocu [i ++ ];
288
192
289
- len = nls -> uni2char (c , & utf_o -> u_name [utf_o -> u_len ],
290
- UDF_NAME_LEN - 2 - utf_o -> u_len );
193
+ len = conv_f (c , & utf_o -> u_name [utf_o -> u_len ],
194
+ UDF_NAME_LEN - 2 - utf_o -> u_len );
291
195
/* Valid character? */
292
196
if (len >= 0 )
293
197
utf_o -> u_len += len ;
198
+ else if (len == - ENAMETOOLONG )
199
+ break ;
294
200
else
295
201
utf_o -> u_name [utf_o -> u_len ++ ] = '?' ;
296
202
}
@@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
299
205
return utf_o -> u_len ;
300
206
}
301
207
302
- static int udf_NLStoCS0 ( struct nls_table * nls , dstring * ocu , struct ustr * uni ,
303
- int length )
208
+ static int udf_name_to_CS0 ( dstring * ocu , struct ustr * uni , int length ,
209
+ int ( * conv_f )( const unsigned char * , int , wchar_t * ) )
304
210
{
305
- int len ;
306
- unsigned i , max_val ;
307
- uint16_t uni_char ;
211
+ int i , len ;
212
+ unsigned int max_val ;
213
+ wchar_t uni_char ;
308
214
int u_len , u_ch ;
309
215
310
216
memset (ocu , 0 , sizeof (dstring ) * length );
311
217
ocu [0 ] = 8 ;
312
- max_val = 0xffU ;
218
+ max_val = 0xff ;
313
219
u_ch = 1 ;
314
220
315
221
try_again :
316
- u_len = 0U ;
317
- for (i = 0U ; i < uni -> u_len ; i ++ ) {
222
+ u_len = 0 ;
223
+ for (i = 0 ; i < uni -> u_len ; i ++ ) {
318
224
/* Name didn't fit? */
319
225
if (u_len + 1 + u_ch >= length )
320
226
return 0 ;
321
- len = nls -> char2uni (& uni -> u_name [i ], uni -> u_len - i , & uni_char );
227
+ len = conv_f (& uni -> u_name [i ], uni -> u_len - i , & uni_char );
322
228
if (!len )
323
229
continue ;
324
230
/* Invalid character, deal with it */
@@ -328,26 +234,32 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
328
234
}
329
235
330
236
if (uni_char > max_val ) {
331
- max_val = 0xffffU ;
332
- ocu [0 ] = ( uint8_t ) 0x10U ;
237
+ max_val = 0xffff ;
238
+ ocu [0 ] = 0x10 ;
333
239
u_ch = 2 ;
334
240
goto try_again ;
335
241
}
336
242
337
- if (max_val == 0xffffU )
243
+ if (max_val == 0xffff )
338
244
ocu [++ u_len ] = (uint8_t )(uni_char >> 8 );
339
- ocu [++ u_len ] = (uint8_t )(uni_char & 0xffU );
245
+ ocu [++ u_len ] = (uint8_t )(uni_char & 0xff );
340
246
i += len - 1 ;
341
247
}
342
248
343
249
ocu [length - 1 ] = (uint8_t )u_len + 1 ;
344
250
return u_len + 1 ;
345
251
}
346
252
253
+ int udf_CS0toUTF8 (struct ustr * utf_o , const struct ustr * ocu_i )
254
+ {
255
+ return udf_name_from_CS0 (utf_o , ocu_i , udf_uni2char_utf8 );
256
+ }
257
+
347
258
int udf_get_filename (struct super_block * sb , uint8_t * sname , int slen ,
348
259
uint8_t * dname , int dlen )
349
260
{
350
261
struct ustr * filename , * unifilename ;
262
+ int (* conv_f )(wchar_t , unsigned char * , int );
351
263
int ret ;
352
264
353
265
if (!slen )
@@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
365
277
366
278
udf_build_ustr_exact (unifilename , sname , slen );
367
279
if (UDF_QUERY_FLAG (sb , UDF_FLAG_UTF8 )) {
368
- ret = udf_CS0toUTF8 (filename , unifilename );
369
- if (ret < 0 ) {
370
- udf_debug ("Failed in udf_get_filename: sname = %s\n" ,
371
- sname );
372
- goto out2 ;
373
- }
280
+ conv_f = udf_uni2char_utf8 ;
374
281
} else if (UDF_QUERY_FLAG (sb , UDF_FLAG_NLS_MAP )) {
375
- ret = udf_CS0toNLS (UDF_SB (sb )-> s_nls_map , filename ,
376
- unifilename );
377
- if (ret < 0 ) {
378
- udf_debug ("Failed in udf_get_filename: sname = %s\n" ,
379
- sname );
380
- goto out2 ;
381
- }
282
+ conv_f = UDF_SB (sb )-> s_nls_map -> uni2char ;
382
283
} else
383
284
BUG ();
384
285
286
+ ret = udf_name_from_CS0 (filename , unifilename , conv_f );
287
+ if (ret < 0 ) {
288
+ udf_debug ("Failed in udf_get_filename: sname = %s\n" , sname );
289
+ goto out2 ;
290
+ }
291
+
385
292
ret = udf_translate_to_linux (dname , dlen ,
386
293
filename -> u_name , filename -> u_len ,
387
294
unifilename -> u_name , unifilename -> u_len );
@@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
399
306
uint8_t * dname , int dlen )
400
307
{
401
308
struct ustr unifilename ;
402
- int namelen ;
309
+ int ( * conv_f )( const unsigned char * , int , wchar_t * ) ;
403
310
404
311
if (!udf_char_to_ustr (& unifilename , sname , slen ))
405
312
return 0 ;
406
313
407
314
if (UDF_QUERY_FLAG (sb , UDF_FLAG_UTF8 )) {
408
- namelen = udf_UTF8toCS0 (dname , & unifilename , dlen );
409
- if (!namelen )
410
- return 0 ;
315
+ conv_f = udf_char2uni_utf8 ;
411
316
} else if (UDF_QUERY_FLAG (sb , UDF_FLAG_NLS_MAP )) {
412
- namelen = udf_NLStoCS0 (UDF_SB (sb )-> s_nls_map , dname ,
413
- & unifilename , dlen );
414
- if (!namelen )
415
- return 0 ;
317
+ conv_f = UDF_SB (sb )-> s_nls_map -> char2uni ;
416
318
} else
417
- return 0 ;
319
+ BUG () ;
418
320
419
- return namelen ;
321
+ return udf_name_to_CS0 ( dname , & unifilename , dlen , conv_f ) ;
420
322
}
421
323
422
324
#define ILLEGAL_CHAR_MARK '_'
0 commit comments