~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/fs/udf/unicode.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * unicode.c
  3  *
  4  * PURPOSE
  5  *      Routines for converting between UTF-8 and OSTA Compressed Unicode.
  6  *      Also handles filename mangling
  7  *
  8  * DESCRIPTION
  9  *      OSTA Compressed Unicode is explained in the OSTA UDF specification.
 10  *              http://www.osta.org/
 11  *      UTF-8 is explained in the IETF RFC XXXX.
 12  *              ftp://ftp.internic.net/rfc/rfcxxxx.txt
 13  *
 14  * CONTACTS
 15  *      E-mail regarding any portion of the Linux UDF file system should be
 16  *      directed to the development team's mailing list (run by majordomo):
 17  *              linux_udf@hootie.lvld.hp.com
 18  *
 19  * COPYRIGHT
 20  *      This file is distributed under the terms of the GNU General Public
 21  *      License (GPL). Copies of the GPL can be obtained from:
 22  *              ftp://prep.ai.mit.edu/pub/gnu/GPL
 23  *      Each contributing author retains all rights to their own work.
 24  */
 25 
 26 
 27 #ifdef __KERNEL__
 28 #include <linux/kernel.h>
 29 #include <linux/string.h>       /* for memset */
 30 #include <linux/udf_fs.h>
 31 #else
 32 #include <string.h>
 33 #endif
 34 
 35 #include "udfdecl.h"
 36 
 37 int udf_ustr_to_dchars(Uint8 *dest, const struct ustr *src, int strlen)
 38 {
 39         if ( (!dest) || (!src) || (!strlen) || (src->u_len > strlen) )
 40                 return 0;
 41         memcpy(dest+1, src->u_name, src->u_len);
 42         dest[0] = src->u_cmpID;
 43         return src->u_len + 1;
 44 }
 45 
 46 int udf_ustr_to_char(Uint8 *dest, const struct ustr *src, int strlen)
 47 {
 48         if ( (!dest) || (!src) || (!strlen) || (src->u_len >= strlen) )
 49                 return 0;
 50         memcpy(dest, src->u_name, src->u_len);
 51         return src->u_len;
 52 }
 53 
 54 int udf_ustr_to_dstring(dstring *dest, const struct ustr *src, int dlength)
 55 {
 56         if ( udf_ustr_to_dchars(dest, src, dlength-1) )
 57         {
 58                 dest[dlength-1] = src->u_len + 1;
 59                 return dlength;
 60         }
 61         else
 62                 return 0;
 63 }
 64 
 65 int udf_dchars_to_ustr(struct ustr *dest, const Uint8 *src, int strlen)
 66 {
 67         if ( (!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN) )
 68                 return 0;
 69         memset(dest, 0, sizeof(struct ustr));
 70         memcpy(dest->u_name, src+1, strlen-1);
 71         dest->u_cmpID = src[0];
 72         dest->u_len = strlen-1;
 73         return strlen-1;
 74 }
 75 
 76 int udf_char_to_ustr(struct ustr *dest, const Uint8 *src, int strlen)
 77 {
 78         if ( (!dest) || (!src) || (!strlen) || (strlen >= UDF_NAME_LEN) )
 79                 return 0;
 80         memset(dest, 0, sizeof(struct ustr));
 81         memcpy(dest->u_name, src, strlen);
 82         dest->u_cmpID = 0x08;
 83         dest->u_len = strlen;
 84         return strlen;
 85 }
 86 
 87 
 88 int udf_dstring_to_ustr(struct ustr *dest, const dstring *src, int dlength)
 89 {
 90         if ( dlength && udf_dchars_to_ustr(dest, src, src[dlength-1]) )
 91                 return dlength;
 92         else
 93                 return 0;
 94 }
 95 
 96 /*
 97  * udf_build_ustr
 98  */
 99 int udf_build_ustr(struct ustr *dest, dstring *ptr, int size)
100 {
101         int usesize;
102 
103         if ( (!dest) || (!ptr) || (!size) )
104                 return -1;
105 
106         memset(dest, 0, sizeof(struct ustr));
107         usesize= (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size;
108         dest->u_cmpID=ptr[0];
109         dest->u_len=ptr[size-1];
110         memcpy(dest->u_name, ptr+1, usesize-1);
111         return 0;
112 }
113 
114 /*
115  * udf_build_ustr_exact
116  */
117 int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
118 {
119         if ( (!dest) || (!ptr) || (!exactsize) )
120                 return -1;
121 
122         memset(dest, 0, sizeof(struct ustr));
123         dest->u_cmpID=ptr[0];
124         dest->u_len=exactsize-1;
125         memcpy(dest->u_name, ptr+1, exactsize-1);
126         return 0;
127 }
128 
129 /*
130  * udf_ocu_to_udf8
131  *
132  * PURPOSE
133  *      Convert OSTA Compressed Unicode to the UTF-8 equivalent.
134  *
135  * DESCRIPTION
136  *      This routine is only called by udf_filldir().
137  *
138  * PRE-CONDITIONS
139  *      utf                     Pointer to UTF-8 output buffer.
140  *      ocu                     Pointer to OSTA Compressed Unicode input buffer
141  *                              of size UDF_NAME_LEN bytes.
142  *                              both of type "struct ustr *"
143  *
144  * POST-CONDITIONS
145  *      <return>                Zero on success.
146  *
147  * HISTORY
148  *      November 12, 1997 - Andrew E. Mileski
149  *      Written, tested, and released.
150  */
151 int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i)
152 {
153         Uint8 *ocu;
154         Uint32 c;
155         Uint8 cmp_id, ocu_len;
156         int i;
157 
158         ocu = ocu_i->u_name;
159 
160         ocu_len = ocu_i->u_len;
161         cmp_id = ocu_i->u_cmpID;
162         utf_o->u_len = 0;
163 
164         if (ocu_len == 0)
165         {
166                 memset(utf_o, 0, sizeof(struct ustr));
167                 utf_o->u_cmpID = 0;
168                 utf_o->u_len = 0;
169                 return 0;
170         }
171 
172         if ((cmp_id != 8) && (cmp_id != 16))
173         {
174 #ifdef __KERNEL__
175                 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
176 #endif
177                 return 0;
178         }
179 
180         for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
181         {
182 
183                 /* Expand OSTA compressed Unicode to Unicode */
184                 c = ocu[i++];
185                 if (cmp_id == 16)
186                         c = (c << 8) | ocu[i++];
187 
188                 /* Compress Unicode to UTF-8 */
189                 if (c < 0x80U)
190                         utf_o->u_name[utf_o->u_len++] = (Uint8)c;
191                 else if (c < 0x800U)
192                 {
193                         utf_o->u_name[utf_o->u_len++] = (Uint8)(0xc0 | (c >> 6));
194                         utf_o->u_name[utf_o->u_len++] = (Uint8)(0x80 | (c & 0x3f));
195                 }
196                 else
197                 {
198                         utf_o->u_name[utf_o->u_len++] = (Uint8)(0xe0 | (c >> 12));
199                         utf_o->u_name[utf_o->u_len++] = (Uint8)(0x80 | ((c >> 6) & 0x3f));
200                         utf_o->u_name[utf_o->u_len++] = (Uint8)(0x80 | (c & 0x3f));
201                 }
202         }
203         utf_o->u_cmpID=8;
204         utf_o->u_hash=0L;
205         utf_o->padding=0;
206 
207         return utf_o->u_len;
208 }
209 
210 /*
211  *
212  * udf_utf8_to_ocu
213  *
214  * PURPOSE
215  *      Convert UTF-8 to the OSTA Compressed Unicode equivalent.
216  *
217  * DESCRIPTION
218  *      This routine is only called by udf_lookup().
219  *
220  * PRE-CONDITIONS
221  *      ocu                     Pointer to OSTA Compressed Unicode output
222  *                              buffer of size UDF_NAME_LEN bytes.
223  *      utf                     Pointer to UTF-8 input buffer.
224  *      utf_len                 Length of UTF-8 input buffer in bytes.
225  *
226  * POST-CONDITIONS
227  *      <return>                Zero on success.
228  *
229  * HISTORY
230  *      November 12, 1997 - Andrew E. Mileski
231  *      Written, tested, and released.
232  */
233 int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
234 {
235         unsigned c, i, max_val, utf_char;
236         int utf_cnt;
237         int u_len = 0;
238 
239         memset(ocu, 0, sizeof(dstring) * length);
240         ocu[0] = 8;
241         max_val = 0xffU;
242 
243 try_again:
244         utf_char = 0U;
245         utf_cnt = 0U;
246         for (i = 0U; i < utf->u_len; i++)
247         {
248                 c = (Uint8)utf->u_name[i];
249 
250                 /* Complete a multi-byte UTF-8 character */
251                 if (utf_cnt)
252                 {
253                         utf_char = (utf_char << 6) | (c & 0x3fU);
254                         if (--utf_cnt)
255                                 continue;
256                 }
257                 else
258                 {
259                         /* Check for a multi-byte UTF-8 character */
260                         if (c & 0x80U)
261                         {
262                                 /* Start a multi-byte UTF-8 character */
263                                 if ((c & 0xe0U) == 0xc0U)
264                                 {
265                                         utf_char = c & 0x1fU;
266                                         utf_cnt = 1;
267                                 }
268                                 else if ((c & 0xf0U) == 0xe0U)
269                                 {
270                                         utf_char = c & 0x0fU;
271                                         utf_cnt = 2;
272                                 }
273                                 else if ((c & 0xf8U) == 0xf0U)
274                                 {
275                                         utf_char = c & 0x07U;
276                                         utf_cnt = 3;
277                                 }
278                                 else if ((c & 0xfcU) == 0xf8U)
279                                 {
280                                         utf_char = c & 0x03U;
281                                         utf_cnt = 4;
282                                 }
283                                 else if ((c & 0xfeU) == 0xfcU)
284                                 {
285                                         utf_char = c & 0x01U;
286                                         utf_cnt = 5;
287                                 }
288                                 else
289                                         goto error_out;
290                                 continue;
291                         } else
292                                 /* Single byte UTF-8 character (most common) */
293                                 utf_char = c;
294                 }
295 
296                 /* Choose no compression if necessary */
297                 if (utf_char > max_val)
298                 {
299                         if ( 0xffU == max_val )
300                         {
301                                 max_val = 0xffffU;
302                                 ocu[0] = (Uint8)0x10U;
303                                 goto try_again;
304                         }
305                         goto error_out;
306                 }
307 
308                 if (max_val == 0xffffU)
309                 {
310                         ocu[++u_len] = (Uint8)(utf_char >> 8);
311                 }
312                 ocu[++u_len] = (Uint8)(utf_char & 0xffU);
313         }
314 
315 
316         if (utf_cnt)
317         {
318 error_out:
319 #ifdef __KERNEL__
320                 printk(KERN_ERR "udf: bad UTF-8 character\n");
321 #endif
322                 return 0;
323         }
324 
325         ocu[length - 1] = (Uint8)u_len + 1;
326         return u_len + 1;
327 }
328 
329 #ifdef __KERNEL__
330 int udf_get_filename(Uint8 *sname, Uint8 *dname, int flen)
331 {
332         struct ustr filename, unifilename;
333         int len;
334 
335         if (udf_build_ustr_exact(&unifilename, sname, flen))
336         {
337                 return 0;
338         }
339 
340         if (!udf_CS0toUTF8(&filename, &unifilename) )
341         {
342                 udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
343                 return 0;
344         }
345 
346         if ((len = udf_translate_to_linux(dname, filename.u_name, filename.u_len,
347                 unifilename.u_name, unifilename.u_len)))
348         {
349                 return len;
350         }
351         return 0;
352 }
353 #endif
354 
355 #define ILLEGAL_CHAR_MARK       '_'
356 #define EXT_MARK                        '.'
357 #define CRC_MARK                        '#'
358 #define EXT_SIZE                        5
359 
360 int udf_translate_to_linux(Uint8 *newName, Uint8 *udfName, int udfLen, Uint8 *fidName, int fidNameLen)
361 {
362         int index, newIndex = 0, needsCRC = 0;  
363         int extIndex = 0, newExtIndex = 0, hasExt = 0;
364         unsigned short valueCRC;
365         Uint8 curr;
366         const Uint8 hexChar[] = "0123456789ABCDEF";
367 
368         if (udfName[0] == '.' && (udfLen == 1 ||
369                 (udfLen == 2 && udfName[1] == '.')))
370         {
371                 needsCRC = 1;
372                 newIndex = udfLen;
373                 memcpy(newName, udfName, udfLen);
374         }
375         else
376         {       
377                 for (index = 0; index < udfLen; index++)
378                 {
379                         curr = udfName[index];
380                         if (curr == '/' || curr == 0)
381                         {
382                                 needsCRC = 1;
383                                 curr = ILLEGAL_CHAR_MARK;
384                                 while (index+1 < udfLen && (udfName[index+1] == '/' ||
385                                         udfName[index+1] == 0))
386                                         index++;
387                         }
388                         if (curr == EXT_MARK && (udfLen - index - 1) <= EXT_SIZE)
389                         {
390                                 if (udfLen == index + 1)
391                                         hasExt = 0;
392                                 else
393                                 {
394                                         hasExt = 1;
395                                         extIndex = index;
396                                         newExtIndex = newIndex;
397                                 }
398                         }
399                         if (newIndex < 256)
400                                 newName[newIndex++] = curr;
401                         else
402                                 needsCRC = 1;
403                 }
404         }
405         if (needsCRC)
406         {
407                 Uint8 ext[EXT_SIZE];
408                 int localExtIndex = 0;
409 
410                 if (hasExt)
411                 {
412                         int maxFilenameLen;
413                         for(index = 0; index<EXT_SIZE && extIndex + index +1 < udfLen;
414                                 index++ )
415                         {
416                                 curr = udfName[extIndex + index + 1];
417 
418                                 if (curr == '/' || curr == 0)
419                                 {
420                                         needsCRC = 1;
421                                         curr = ILLEGAL_CHAR_MARK;
422                                         while(extIndex + index + 2 < udfLen && (index + 1 < EXT_SIZE
423                                                 && (udfName[extIndex + index + 2] == '/' ||
424                                                         udfName[extIndex + index + 2] == 0)))
425                                                 index++;
426                                 }
427                                 ext[localExtIndex++] = curr;
428                         }
429                         maxFilenameLen = 250 - localExtIndex;
430                         if (newIndex > maxFilenameLen)
431                                 newIndex = maxFilenameLen;
432                         else
433                                 newIndex = newExtIndex;
434                 }
435                 else if (newIndex > 250)
436                         newIndex = 250;
437                 newName[newIndex++] = CRC_MARK;
438                 valueCRC = udf_crc(fidName, fidNameLen, 0);
439                 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
440                 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
441                 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
442                 newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
443 
444                 if (hasExt)
445                 {
446                         newName[newIndex++] = EXT_MARK;
447                         for (index = 0;index < localExtIndex ;index++ )
448                                 newName[newIndex++] = ext[index];
449                 }
450         }
451         return newIndex;
452 }
453 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.