Skip to content

Commit 3e7fc20

Browse files
agabbasovjankara
authored andcommitted
udf: Join functions for UTF8 and NLS conversions
There is no much sense to have separate functions for UTF8 and NLS conversions, since UTF8 encoding is actually the special case of NLS. However, although UTF8 is also supported by general NLS framework, it would be good to have separate UTF8 character conversion functions (char2uni and uni2char) locally in UDF code, so that they could be used even if NLS support is not enabled in the kernel configuration. Signed-off-by: Andrew Gabbasov <andrew_gabbasov@mentor.com> Signed-off-by: Jan Kara <jack@suse.cz>
1 parent 525e2c5 commit 3e7fc20

File tree

1 file changed

+90
-188
lines changed

1 file changed

+90
-188
lines changed

fs/udf/unicode.c

Lines changed: 90 additions & 188 deletions
Original file line numberDiff line numberDiff line change
@@ -76,188 +76,92 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
7676
memcpy(dest->u_name, ptr + 1, exactsize - 1);
7777
}
7878

79-
/*
80-
* udf_CS0toUTF8
81-
*
82-
* PURPOSE
83-
* Convert OSTA Compressed Unicode to the UTF-8 equivalent.
84-
*
85-
* PRE-CONDITIONS
86-
* utf Pointer to UTF-8 output buffer.
87-
* ocu Pointer to OSTA Compressed Unicode input buffer
88-
* of size UDF_NAME_LEN bytes.
89-
* both of type "struct ustr *"
90-
*
91-
* POST-CONDITIONS
92-
* <return> >= 0 on success.
93-
*
94-
* HISTORY
95-
* November 12, 1997 - Andrew E. Mileski
96-
* Written, tested, and released.
97-
*/
98-
int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
79+
static int udf_uni2char_utf8(wchar_t uni,
80+
unsigned char *out,
81+
int boundlen)
9982
{
100-
const uint8_t *ocu;
101-
uint8_t cmp_id, ocu_len;
102-
int i;
103-
104-
ocu_len = ocu_i->u_len;
105-
if (ocu_len == 0) {
106-
memset(utf_o, 0, sizeof(struct ustr));
107-
return 0;
108-
}
109-
110-
cmp_id = ocu_i->u_cmpID;
111-
if (cmp_id != 8 && cmp_id != 16) {
112-
memset(utf_o, 0, sizeof(struct ustr));
113-
pr_err("unknown compression code (%d) stri=%s\n",
114-
cmp_id, ocu_i->u_name);
115-
return -EINVAL;
116-
}
117-
118-
ocu = ocu_i->u_name;
119-
utf_o->u_len = 0;
120-
for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
121-
122-
/* Expand OSTA compressed Unicode to Unicode */
123-
uint32_t c = ocu[i++];
124-
if (cmp_id == 16)
125-
c = (c << 8) | ocu[i++];
126-
127-
/* Compress Unicode to UTF-8 */
128-
if (c < 0x80U)
129-
utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
130-
else if (c < 0x800U) {
131-
if (utf_o->u_len > (UDF_NAME_LEN - 4))
132-
break;
133-
utf_o->u_name[utf_o->u_len++] =
134-
(uint8_t)(0xc0 | (c >> 6));
135-
utf_o->u_name[utf_o->u_len++] =
136-
(uint8_t)(0x80 | (c & 0x3f));
137-
} else {
138-
if (utf_o->u_len > (UDF_NAME_LEN - 5))
139-
break;
140-
utf_o->u_name[utf_o->u_len++] =
141-
(uint8_t)(0xe0 | (c >> 12));
142-
utf_o->u_name[utf_o->u_len++] =
143-
(uint8_t)(0x80 |
144-
((c >> 6) & 0x3f));
145-
utf_o->u_name[utf_o->u_len++] =
146-
(uint8_t)(0x80 | (c & 0x3f));
147-
}
83+
int u_len = 0;
84+
85+
if (boundlen <= 0)
86+
return -ENAMETOOLONG;
87+
88+
if (uni < 0x80) {
89+
out[u_len++] = (unsigned char)uni;
90+
} else if (uni < 0x800) {
91+
if (boundlen < 2)
92+
return -ENAMETOOLONG;
93+
out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
94+
out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
95+
} else {
96+
if (boundlen < 3)
97+
return -ENAMETOOLONG;
98+
out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
99+
out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
100+
out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
148101
}
149-
utf_o->u_cmpID = 8;
150-
151-
return utf_o->u_len;
102+
return u_len;
152103
}
153104

154-
/*
155-
*
156-
* udf_UTF8toCS0
157-
*
158-
* PURPOSE
159-
* Convert UTF-8 to the OSTA Compressed Unicode equivalent.
160-
*
161-
* DESCRIPTION
162-
* This routine is only called by udf_lookup().
163-
*
164-
* PRE-CONDITIONS
165-
* ocu Pointer to OSTA Compressed Unicode output
166-
* buffer of size UDF_NAME_LEN bytes.
167-
* utf Pointer to UTF-8 input buffer.
168-
* utf_len Length of UTF-8 input buffer in bytes.
169-
*
170-
* POST-CONDITIONS
171-
* <return> Zero on success.
172-
*
173-
* HISTORY
174-
* November 12, 1997 - Andrew E. Mileski
175-
* Written, tested, and released.
176-
*/
177-
static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
105+
static int udf_char2uni_utf8(const unsigned char *in,
106+
int boundlen,
107+
wchar_t *uni)
178108
{
179-
unsigned c, i, max_val, utf_char;
180-
int utf_cnt, u_len, u_ch;
109+
unsigned int utf_char;
110+
unsigned char c;
111+
int utf_cnt, u_len;
181112

182-
memset(ocu, 0, sizeof(dstring) * length);
183-
ocu[0] = 8;
184-
max_val = 0xffU;
185-
u_ch = 1;
186-
187-
try_again:
188-
u_len = 0U;
189-
utf_char = 0U;
190-
utf_cnt = 0U;
191-
for (i = 0U; i < utf->u_len; i++) {
192-
/* Name didn't fit? */
193-
if (u_len + 1 + u_ch >= length)
194-
return 0;
195-
196-
c = (uint8_t)utf->u_name[i];
113+
utf_char = 0;
114+
utf_cnt = 0;
115+
for (u_len = 0; u_len < boundlen;) {
116+
c = in[u_len++];
197117

198118
/* Complete a multi-byte UTF-8 character */
199119
if (utf_cnt) {
200-
utf_char = (utf_char << 6) | (c & 0x3fU);
120+
utf_char = (utf_char << 6) | (c & 0x3f);
201121
if (--utf_cnt)
202122
continue;
203123
} else {
204124
/* Check for a multi-byte UTF-8 character */
205-
if (c & 0x80U) {
125+
if (c & 0x80) {
206126
/* Start a multi-byte UTF-8 character */
207-
if ((c & 0xe0U) == 0xc0U) {
208-
utf_char = c & 0x1fU;
127+
if ((c & 0xe0) == 0xc0) {
128+
utf_char = c & 0x1f;
209129
utf_cnt = 1;
210-
} else if ((c & 0xf0U) == 0xe0U) {
211-
utf_char = c & 0x0fU;
130+
} else if ((c & 0xf0) == 0xe0) {
131+
utf_char = c & 0x0f;
212132
utf_cnt = 2;
213-
} else if ((c & 0xf8U) == 0xf0U) {
214-
utf_char = c & 0x07U;
133+
} else if ((c & 0xf8) == 0xf0) {
134+
utf_char = c & 0x07;
215135
utf_cnt = 3;
216-
} else if ((c & 0xfcU) == 0xf8U) {
217-
utf_char = c & 0x03U;
136+
} else if ((c & 0xfc) == 0xf8) {
137+
utf_char = c & 0x03;
218138
utf_cnt = 4;
219-
} else if ((c & 0xfeU) == 0xfcU) {
220-
utf_char = c & 0x01U;
139+
} else if ((c & 0xfe) == 0xfc) {
140+
utf_char = c & 0x01;
221141
utf_cnt = 5;
222142
} else {
223-
goto error_out;
143+
utf_cnt = -1;
144+
break;
224145
}
225146
continue;
226147
} else {
227148
/* Single byte UTF-8 character (most common) */
228149
utf_char = c;
229150
}
230151
}
231-
232-
/* Choose no compression if necessary */
233-
if (utf_char > max_val) {
234-
if (max_val == 0xffU) {
235-
max_val = 0xffffU;
236-
ocu[0] = (uint8_t)0x10U;
237-
u_ch = 2;
238-
goto try_again;
239-
}
240-
goto error_out;
241-
}
242-
243-
if (max_val == 0xffffU)
244-
ocu[++u_len] = (uint8_t)(utf_char >> 8);
245-
ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
152+
*uni = utf_char;
153+
break;
246154
}
247-
248155
if (utf_cnt) {
249-
error_out:
250-
ocu[++u_len] = '?';
251-
printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
156+
*uni = '?';
157+
return -EINVAL;
252158
}
253-
254-
ocu[length - 1] = (uint8_t)u_len + 1;
255-
256-
return u_len + 1;
159+
return u_len;
257160
}
258161

259-
static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
260-
const struct ustr *ocu_i)
162+
static int udf_name_from_CS0(struct ustr *utf_o,
163+
const struct ustr *ocu_i,
164+
int (*conv_f)(wchar_t, unsigned char *, int))
261165
{
262166
const uint8_t *ocu;
263167
uint8_t cmp_id, ocu_len;
@@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
286190
if (cmp_id == 16)
287191
c = (c << 8) | ocu[i++];
288192

289-
len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
290-
UDF_NAME_LEN - 2 - utf_o->u_len);
193+
len = conv_f(c, &utf_o->u_name[utf_o->u_len],
194+
UDF_NAME_LEN - 2 - utf_o->u_len);
291195
/* Valid character? */
292196
if (len >= 0)
293197
utf_o->u_len += len;
198+
else if (len == -ENAMETOOLONG)
199+
break;
294200
else
295201
utf_o->u_name[utf_o->u_len++] = '?';
296202
}
@@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
299205
return utf_o->u_len;
300206
}
301207

302-
static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
303-
int length)
208+
static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length,
209+
int (*conv_f)(const unsigned char *, int, wchar_t *))
304210
{
305-
int len;
306-
unsigned i, max_val;
307-
uint16_t uni_char;
211+
int i, len;
212+
unsigned int max_val;
213+
wchar_t uni_char;
308214
int u_len, u_ch;
309215

310216
memset(ocu, 0, sizeof(dstring) * length);
311217
ocu[0] = 8;
312-
max_val = 0xffU;
218+
max_val = 0xff;
313219
u_ch = 1;
314220

315221
try_again:
316-
u_len = 0U;
317-
for (i = 0U; i < uni->u_len; i++) {
222+
u_len = 0;
223+
for (i = 0; i < uni->u_len; i++) {
318224
/* Name didn't fit? */
319225
if (u_len + 1 + u_ch >= length)
320226
return 0;
321-
len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
227+
len = conv_f(&uni->u_name[i], uni->u_len - i, &uni_char);
322228
if (!len)
323229
continue;
324230
/* Invalid character, deal with it */
@@ -328,26 +234,32 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
328234
}
329235

330236
if (uni_char > max_val) {
331-
max_val = 0xffffU;
332-
ocu[0] = (uint8_t)0x10U;
237+
max_val = 0xffff;
238+
ocu[0] = 0x10;
333239
u_ch = 2;
334240
goto try_again;
335241
}
336242

337-
if (max_val == 0xffffU)
243+
if (max_val == 0xffff)
338244
ocu[++u_len] = (uint8_t)(uni_char >> 8);
339-
ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
245+
ocu[++u_len] = (uint8_t)(uni_char & 0xff);
340246
i += len - 1;
341247
}
342248

343249
ocu[length - 1] = (uint8_t)u_len + 1;
344250
return u_len + 1;
345251
}
346252

253+
int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
254+
{
255+
return udf_name_from_CS0(utf_o, ocu_i, udf_uni2char_utf8);
256+
}
257+
347258
int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
348259
uint8_t *dname, int dlen)
349260
{
350261
struct ustr *filename, *unifilename;
262+
int (*conv_f)(wchar_t, unsigned char *, int);
351263
int ret;
352264

353265
if (!slen)
@@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
365277

366278
udf_build_ustr_exact(unifilename, sname, slen);
367279
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
368-
ret = udf_CS0toUTF8(filename, unifilename);
369-
if (ret < 0) {
370-
udf_debug("Failed in udf_get_filename: sname = %s\n",
371-
sname);
372-
goto out2;
373-
}
280+
conv_f = udf_uni2char_utf8;
374281
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
375-
ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
376-
unifilename);
377-
if (ret < 0) {
378-
udf_debug("Failed in udf_get_filename: sname = %s\n",
379-
sname);
380-
goto out2;
381-
}
282+
conv_f = UDF_SB(sb)->s_nls_map->uni2char;
382283
} else
383284
BUG();
384285

286+
ret = udf_name_from_CS0(filename, unifilename, conv_f);
287+
if (ret < 0) {
288+
udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
289+
goto out2;
290+
}
291+
385292
ret = udf_translate_to_linux(dname, dlen,
386293
filename->u_name, filename->u_len,
387294
unifilename->u_name, unifilename->u_len);
@@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
399306
uint8_t *dname, int dlen)
400307
{
401308
struct ustr unifilename;
402-
int namelen;
309+
int (*conv_f)(const unsigned char *, int, wchar_t *);
403310

404311
if (!udf_char_to_ustr(&unifilename, sname, slen))
405312
return 0;
406313

407314
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
408-
namelen = udf_UTF8toCS0(dname, &unifilename, dlen);
409-
if (!namelen)
410-
return 0;
315+
conv_f = udf_char2uni_utf8;
411316
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
412-
namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname,
413-
&unifilename, dlen);
414-
if (!namelen)
415-
return 0;
317+
conv_f = UDF_SB(sb)->s_nls_map->char2uni;
416318
} else
417-
return 0;
319+
BUG();
418320

419-
return namelen;
321+
return udf_name_to_CS0(dname, &unifilename, dlen, conv_f);
420322
}
421323

422324
#define ILLEGAL_CHAR_MARK '_'

0 commit comments

Comments
 (0)