~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/fs/hfs/trans.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * linux/fs/hfs/trans.c
  3  *
  4  * Copyright (C) 1995-1997  Paul H. Hargrove
  5  * This file may be distributed under the terms of the GNU Public License.
  6  *
  7  * This file contains routines for converting between the Macintosh
  8  * character set and various other encodings.  This includes dealing
  9  * with ':' vs. '/' as the path-element separator.
 10  *
 11  * Latin-1 translation based on code contributed by Holger Schemel
 12  * (aeglos@valinor.owl.de).
 13  *
 14  * The '8-bit', '7-bit ASCII' and '7-bit alphanumeric' encodings are
 15  * implementations of the three encodings recommended by Apple in the
 16  * document "AppleSingle/AppleDouble Formats: Developer's Note
 17  * (9/94)".  This document is available from Apple's Technical
 18  * Information Library from the World Wide Web server
 19  * www.info.apple.com.
 20  *
 21  * The 'CAP' encoding is an implementation of the naming scheme used
 22  * by the Columbia AppleTalk Package, available for anonymous FTP from
 23  * ????.
 24  *
 25  * "XXX" in a comment is a note to myself to consider changing something.
 26  *
 27  * In function preconditions the term "valid" applied to a pointer to
 28  * a structure means that the pointer is non-NULL and the structure it
 29  * points to has all fields initialized to consistent values.
 30  */
 31 
 32 #include "hfs.h"
 33 #include <linux/hfs_fs_sb.h>
 34 #include <linux/hfs_fs_i.h>
 35 #include <linux/hfs_fs.h>
 36 
 37 /*================ File-local variables ================*/
 38 
 39 /* int->ASCII map for a single hex digit */
 40 static char hex[16] = {'','1','2','3','4','5','6','7',
 41                        '8','9','a','b','c','d','e','f'};
 42 /*
 43  * Latin-1 to Mac character set map
 44  *
 45  * For the sake of consistency this map is generated from the Mac to
 46  * Latin-1 map the first time it is needed.  This means there is just
 47  * one map to maintain.
 48  */
 49 static unsigned char latin2mac_map[128]; /* initially all zero */
 50 
 51 /*
 52  * Mac to Latin-1 map for the upper 128 characters (both have ASCII in
 53  * the lower 128 positions)
 54  */
 55 static unsigned char mac2latin_map[128] = {
 56         0xC4, 0xC5, 0xC7, 0xC9, 0xD1, 0xD6, 0xDC, 0xE1,
 57         0xE0, 0xE2, 0xE4, 0xE3, 0xE5, 0xE7, 0xE9, 0xE8,
 58         0xEA, 0xEB, 0xED, 0xEC, 0xEE, 0xEF, 0xF1, 0xF3,
 59         0xF2, 0xF4, 0xF6, 0xF5, 0xFA, 0xF9, 0xFB, 0xFC,
 60         0x00, 0xB0, 0xA2, 0xA3, 0xA7, 0xB7, 0xB6, 0xDF,
 61         0xAE, 0xA9, 0x00, 0xB4, 0xA8, 0x00, 0xC6, 0xD8,
 62         0x00, 0xB1, 0x00, 0x00, 0xA5, 0xB5, 0xF0, 0x00, 
 63         0x00, 0x00, 0x00, 0xAA, 0xBA, 0x00, 0xE6, 0xF8,
 64         0xBF, 0xA1, 0xAC, 0x00, 0x00, 0x00, 0x00, 0xAB,
 65         0xBB, 0x00, 0xA0, 0xC0, 0xC3, 0xD5, 0x00, 0x00, 
 66         0xAD, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF7, 0x00, 
 67         0xFF, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x00, 
 68         0x00, 0x00, 0xB8, 0x00, 0x00, 0xC2, 0xCA, 0xC1,
 69         0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xD3, 0xD4,
 70         0x00, 0xD2, 0xDA, 0xDB, 0xD9, 0x00, 0x00, 0x00,
 71         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 72 };
 73 
 74 /*================ File-local functions ================*/
 75 
 76 /*
 77  * dehex()
 78  *
 79  * Given a hexadecimal digit in ASCII, return the integer representation.
 80  */
 81 static inline const unsigned char dehex(char c) {
 82         if ((c>='')&&(c<='9')) {
 83                 return c-'';
 84         }
 85         if ((c>='a')&&(c<='f')) {
 86                 return c-'a'+10;
 87         }
 88         if ((c>='A')&&(c<='F')) {
 89                 return c-'A'+10;
 90         }
 91         return 0xff;
 92 }
 93 
 94 /*================ Global functions ================*/
 95 
 96 /*
 97  * hfs_mac2nat()
 98  *
 99  * Given a 'Pascal String' (a string preceded by a length byte) in
100  * the Macintosh character set produce the corresponding filename using
101  * the Netatalk name-mangling scheme, returning the length of the
102  * mangled filename.  Note that the output string is not NULL terminated.
103  *
104  * The name-mangling works as follows:
105  * Characters 32-126 (' '-'~') except '/' and any initial '.' are passed
106  * unchanged from input to output.  The remaining characters are replaced
107  * by three characters: ':xx' where xx is the hexadecimal representation
108  * of the character, using lowercase 'a' through 'f'.
109  */
110 int hfs_mac2nat(char *out, const struct hfs_name *in) {
111         unsigned char c;
112         const unsigned char *p = in->Name;
113         int len = in->Len;
114         int count = 0;
115 
116         /* Special case for .AppleDesktop which in the
117            distant future may be a pseudodirectory. */
118         if (strncmp(".AppleDesktop", p, len) == 0) {
119                 strncpy(out, p, 13);
120                 return 13;
121         }
122 
123         while (len--) {
124                 c = *p++;
125                 if ((c<32) || (c=='/') || (c>126) || (!count && (c=='.'))) {
126                         *out++ = ':';
127                         *out++ = hex[(c>>4) & 0xf];
128                         *out++ = hex[c & 0xf];
129                         count += 3;
130                 } else {
131                         *out++ = c;
132                         count++;
133                 }
134         }
135         return count;
136 }
137 
138 /*
139  * hfs_mac2cap()
140  *
141  * Given a 'Pascal String' (a string preceded by a length byte) in
142  * the Macintosh character set produce the corresponding filename using
143  * the CAP name-mangling scheme, returning the length of the mangled
144  * filename.  Note that the output string is not NULL terminated.
145  *
146  * The name-mangling works as follows:
147  * Characters 32-126 (' '-'~') except '/' are passed unchanged from
148  * input to output.  The remaining characters are replaced by three
149  * characters: ':xx' where xx is the hexadecimal representation of the
150  * character, using lowercase 'a' through 'f'.
151  */
152 int hfs_mac2cap(char *out, const struct hfs_name *in) {
153         unsigned char c;
154         const unsigned char *p = in->Name;
155         int len = in->Len;
156         int count = 0;
157 
158         while (len--) {
159                 c = *p++;
160                 if ((c<32) || (c=='/') || (c>126)) {
161                         *out++ = ':';
162                         *out++ = hex[(c>>4) & 0xf];
163                         *out++ = hex[c & 0xf];
164                         count += 3;
165                 } else {
166                         *out++ = c;
167                         count++;
168                 }
169         }
170         return count;
171 }
172 
173 /*
174  * hfs_mac2eight()
175  *
176  * Given a 'Pascal String' (a string preceded by a length byte) in
177  * the Macintosh character set produce the corresponding filename using
178  * the '8-bit' name-mangling scheme, returning the length of the
179  * mangled filename.  Note that the output string is not NULL
180  * terminated.
181  *
182  * This is one of the three recommended naming conventions described
183  * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
184  * Note (9/94)"
185  *
186  * The name-mangling works as follows:
187  * Characters 0, '%' and '/' are replaced by three characters: '%xx'
188  * where xx is the hexadecimal representation of the character, using
189  * lowercase 'a' through 'f'.  All other characters are passed
190  * unchanged from input to output.  Note that this format is mainly
191  * implemented for completeness and is rather hard to read.
192  */
193 int hfs_mac2eight(char *out, const struct hfs_name *in) {
194         unsigned char c;
195         const unsigned char *p = in->Name;
196         int len = in->Len;
197         int count = 0;
198 
199         while (len--) {
200                 c = *p++;
201                 if (!c || (c=='/') || (c=='%')) {
202                         *out++ = '%';
203                         *out++ = hex[(c>>4) & 0xf];
204                         *out++ = hex[c & 0xf];
205                         count += 3;
206                 } else {
207                         *out++ = c;
208                         count++;
209                 }
210         }
211         return count;
212 }
213 
214 /*
215  * hfs_mac2seven()
216  *
217  * Given a 'Pascal String' (a string preceded by a length byte) in
218  * the Macintosh character set produce the corresponding filename using
219  * the '7-bit ASCII' name-mangling scheme, returning the length of the
220  * mangled filename.  Note that the output string is not NULL
221  * terminated.
222  *
223  * This is one of the three recommended naming conventions described
224  * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
225  * Note (9/94)"
226  *
227  * The name-mangling works as follows:
228  * Characters 0, '%', '/' and 128-255 are replaced by three
229  * characters: '%xx' where xx is the hexadecimal representation of the
230  * character, using lowercase 'a' through 'f'.  All other characters
231  * are passed unchanged from input to output.  Note that control
232  * characters (including newline) and space are unchanged make reading
233  * these filenames difficult.
234  */
235 int hfs_mac2seven(char *out, const struct hfs_name *in) {
236         unsigned char c;
237         const unsigned char *p = in->Name;
238         int len = in->Len;
239         int count = 0;
240 
241         while (len--) {
242                 c = *p++;
243                 if (!c || (c=='/') || (c=='%') || (c&0x80)) {
244                         *out++ = '%';
245                         *out++ = hex[(c>>4) & 0xf];
246                         *out++ = hex[c & 0xf];
247                         count += 3;
248                 } else {
249                         *out++ = c;
250                         count++;
251                 }
252         }
253         return count;
254 }
255 
256 /*
257  * hfs_mac2alpha()
258  *
259  * Given a 'Pascal String' (a string preceded by a length byte) in
260  * the Macintosh character set produce the corresponding filename using
261  * the '7-bit alphanumeric' name-mangling scheme, returning the length
262  * of the mangled filename.  Note that the output string is not NULL
263  * terminated.
264  *
265  * This is one of the three recommended naming conventions described
266  * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
267  * Note (9/94)"
268  *
269  * The name-mangling works as follows:
270  * The characters 'a'-'z', 'A'-'Z', ''-'9', '_' and the last '.' in
271  * the filename are passed unchanged from input to output.  All
272  * remaining characters (including any '.'s other than the last) are
273  * replaced by three characters: '%xx' where xx is the hexadecimal
274  * representation of the character, using lowercase 'a' through 'f'.
275  */
276 int hfs_mac2alpha(char *out, const struct hfs_name *in) {
277         unsigned char c;
278         const unsigned char *p = in->Name;
279         int len = in->Len;
280         int count = 0;
281         const unsigned char *lp;        /* last period */
282 
283         /* strrchr() would be good here, but 'in' is not null-terminated */
284         for (lp=p+len-1; (lp>=p)&&(*lp!='.'); --lp) {}
285         ++lp;
286 
287         while (len--) {
288                 c = *p++;
289                 if ((p==lp) || ((c>='')&&(c<='9')) || ((c>='A')&&(c<='Z')) ||
290                                 ((c>='a')&&(c<='z')) || (c=='_')) {
291                         *out++ = c;
292                         count++;
293                 } else {
294                         *out++ = '%';
295                         *out++ = hex[(c>>4) & 0xf];
296                         *out++ = hex[c & 0xf];
297                         count += 3;
298                 }
299         }
300         return count;
301 }
302 
303 /*
304  * hfs_mac2triv()
305  *
306  * Given a 'Pascal String' (a string preceded by a length byte) in
307  * the Macintosh character set produce the corresponding filename using
308  * the 'trivial' name-mangling scheme, returning the length of the
309  * mangled filename.  Note that the output string is not NULL
310  * terminated.
311  *
312  * The name-mangling works as follows:
313  * The character '/', which is illegal in Linux filenames is replaced
314  * by ':' which never appears in HFS filenames.  All other characters
315  * are passed unchanged from input to output.
316  */
317 int hfs_mac2triv(char *out, const struct hfs_name *in) {
318         unsigned char c;
319         const unsigned char *p = in->Name;
320         int len = in->Len;
321         int count = 0;
322 
323         while (len--) {
324                 c = *p++;
325                 if (c=='/') {
326                         *out++ = ':';
327                 } else {
328                         *out++ = c;
329                 }
330                 count++;
331         }
332         return count;
333 }
334 
335 /*
336  * hfs_mac2latin()
337  *
338  * Given a 'Pascal String' (a string preceded by a length byte) in
339  * the Macintosh character set produce the corresponding filename using
340  * the 'Latin-1' name-mangling scheme, returning the length of the
341  * mangled filename.  Note that the output string is not NULL
342  * terminated.
343  *
344  * The Macintosh character set and Latin-1 are both extensions of the
345  * ASCII character set.  Some, but certainly not all, of the characters
346  * in the Macintosh character set are also in Latin-1 but not with the
347  * same encoding.  This name-mangling scheme replaces the characters in
348  * the Macintosh character set that have Latin-1 equivalents by those
349  * equivalents; the characters 32-126, excluding '/' and '%', are
350  * passed unchanged from input to output.  The remaining characters
351  * are replaced by three characters: '%xx' where xx is the hexadecimal
352  * representation of the character, using lowercase 'a' through 'f'.
353  *
354  * The array mac2latin_map[] indicates the correspondence between the
355  * two character sets.  The byte in element x-128 gives the Latin-1
356  * encoding of the character with encoding x in the Macintosh
357  * character set.  A value of zero indicates Latin-1 has no
358  * corresponding character.
359  */
360 int hfs_mac2latin(char *out, const struct hfs_name *in) {
361         unsigned char c;
362         const unsigned char *p = in->Name;
363         int len = in->Len;
364         int count = 0;
365 
366         while (len--) {
367                 c = *p++;
368 
369                 if ((c & 0x80) && mac2latin_map[c & 0x7f]) {
370                         *out++ = mac2latin_map[c & 0x7f];
371                         count++;
372                 } else if ((c>=32) && (c<=126) && (c!='/') && (c!='%')) {
373                         *out++ =  c;
374                         count++;
375                 } else {
376                         *out++ = '%';
377                         *out++ = hex[(c>>4) & 0xf];
378                         *out++ = hex[c & 0xf];
379                         count += 3;
380                 }
381         }
382         return count;
383 }
384 
385 /*
386  * hfs_colon2mac()
387  *
388  * Given an ASCII string (not null-terminated) and its length,
389  * generate the corresponding filename in the Macintosh character set
390  * using the 'CAP' name-mangling scheme, returning the length of the
391  * mangled filename.  Note that the output string is not NULL
392  * terminated.
393  *
394  * This routine is a inverse to hfs_mac2cap() and hfs_mac2nat().
395  * A ':' not followed by a 2-digit hexadecimal number (or followed
396  * by the codes for NULL or ':') is replaced by a '|'.
397  */
398 void hfs_colon2mac(struct hfs_name *out, const char *in, int len) {
399         int hi, lo;
400         unsigned char code, c, *count;
401         unsigned char *p = out->Name;
402 
403         out->Len = 0;
404         count = &out->Len;
405         while (len-- && (*count < HFS_NAMELEN)) {
406                 c = *in++;
407                 (*count)++;
408                 if (c!=':') {
409                         *p++ = c;
410                 } else if ((len<2) ||
411                            ((hi=dehex(in[0])) & 0xf0) ||
412                            ((lo=dehex(in[1])) & 0xf0) ||
413                            !(code = (hi << 4) | lo) ||
414                            (code == ':')) {
415                         *p++ = '|';
416                 } else {
417                         *p++ = code;
418                         len -= 2;
419                         in += 2;
420                 }
421         }
422 }
423 
424 /*
425  * hfs_prcnt2mac()
426  *
427  * Given an ASCII string (not null-terminated) and its length,
428  * generate the corresponding filename in the Macintosh character set
429  * using Apple's three recommended name-mangling schemes, returning
430  * the length of the mangled filename.  Note that the output string is
431  * not NULL terminated.
432  *
433  * This routine is a inverse to hfs_mac2alpha(), hfs_mac2seven() and
434  * hfs_mac2eight().
435  * A '%' not followed by a 2-digit hexadecimal number (or followed
436  * by the code for NULL or ':') is unchanged.
437  * A ':' is replaced by a '|'.
438  */
439 void hfs_prcnt2mac(struct hfs_name *out, const char *in, int len) {
440         int hi, lo;
441         unsigned char code, c, *count;
442         unsigned char *p = out->Name;
443 
444         out->Len = 0;
445         count = &out->Len;
446         while (len-- && (*count < HFS_NAMELEN)) {
447                 c = *in++;
448                 (*count)++;
449                 if (c==':') {
450                         *p++ = '|';
451                 } else if (c!='%') {
452                         *p++ = c;
453                 } else if ((len<2) ||
454                            ((hi=dehex(in[0])) & 0xf0) ||
455                            ((lo=dehex(in[1])) & 0xf0) ||
456                            !(code = (hi << 4) | lo) ||
457                            (code == ':')) {
458                         *p++ = '%';
459                 } else {
460                         *p++ = code;
461                         len -= 2;
462                         in += 2;
463                 }
464         }
465 }
466 
467 /*
468  * hfs_triv2mac()
469  *
470  * Given an ASCII string (not null-terminated) and its length,
471  * generate the corresponding filename in the Macintosh character set
472  * using the 'trivial' name-mangling scheme, returning the length of
473  * the mangled filename.  Note that the output string is not NULL
474  * terminated.
475  *
476  * This routine is a inverse to hfs_mac2triv().
477  * A ':' is replaced by a '/'.
478  */
479 void hfs_triv2mac(struct hfs_name *out, const char *in, int len) {
480         unsigned char c, *count;
481         unsigned char *p = out->Name;
482 
483         out->Len = 0;
484         count = &out->Len;
485         while (len-- && (*count < HFS_NAMELEN)) {
486                 c = *in++;
487                 (*count)++;
488                 if (c==':') {
489                         *p++ = '/';
490                 } else {
491                         *p++ = c;
492                 }
493         }
494 }
495 
496 /*
497  * hfs_latin2mac()
498  *
499  * Given an Latin-1 string (not null-terminated) and its length,
500  * generate the corresponding filename in the Macintosh character set
501  * using the 'Latin-1' name-mangling scheme, returning the length of
502  * the mangled filename.  Note that the output string is not NULL
503  * terminated.
504  *
505  * This routine is a inverse to hfs_latin2cap().
506  * A '%' not followed by a 2-digit hexadecimal number (or followed
507  * by the code for NULL or ':') is unchanged.
508  * A ':' is replaced by a '|'.
509  *
510  * Note that the character map is built the first time it is needed.
511  */
512 void hfs_latin2mac(struct hfs_name *out, const char *in, int len)
513 {
514         int hi, lo;
515         unsigned char code, c, *count;
516         unsigned char *p = out->Name;
517         static int map_initialized;
518 
519         if (!map_initialized) {
520                 int i;
521 
522                 /* build the inverse mapping at run time */
523                 for (i = 0; i < 128; i++) {
524                         if ((c = mac2latin_map[i])) {
525                                 latin2mac_map[(int)c - 128] = i + 128;
526                         }
527                 }
528                 map_initialized = 1;
529         }
530 
531         out->Len = 0;
532         count = &out->Len;
533         while (len-- && (*count < HFS_NAMELEN)) {
534                 c = *in++;
535                 (*count)++;
536 
537                 if (c==':') {
538                         *p++ = '|';
539                 } else if (c!='%') {
540                         if (c<128 || !(*p = latin2mac_map[c-128])) {
541                                 *p = c;
542                         }
543                         p++;
544                 } else if ((len<2) ||
545                            ((hi=dehex(in[0])) & 0xf0) ||
546                            ((lo=dehex(in[1])) & 0xf0) ||
547                            !(code = (hi << 4) | lo) ||
548                            (code == ':')) {
549                         *p++ = '%';
550                 } else {
551                         *p++ = code;
552                         len -= 2;
553                         in += 2;
554                 }
555         }
556 }
557 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.