~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/scripts/cramfs/mkcramfs.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 #include <sys/types.h>
  2 #include <stdio.h>
  3 #include <sys/stat.h>
  4 #include <unistd.h>
  5 #include <sys/mman.h>
  6 #include <sys/fcntl.h>
  7 #include <dirent.h>
  8 #include <stdlib.h>
  9 #include <errno.h>
 10 #include <string.h>
 11 #include <assert.h>
 12 
 13 /* zlib required.. */
 14 #include <zlib.h>
 15 
 16 typedef unsigned char u8;
 17 typedef unsigned short u16;
 18 typedef unsigned int u32;
 19 
 20 #include "cramfs.h"
 21 
 22 static const char *progname = "mkcramfs";
 23 
 24 /* N.B. If you change the disk format of cramfs, please update fs/cramfs/README. */
 25 
 26 static void usage(void)
 27 {
 28         fprintf(stderr, "Usage: '%s dirname outfile'\n"
 29                 " where <dirname> is the root of the\n"
 30                 " filesystem to be compressed.\n", progname);
 31         exit(1);
 32 }
 33 
 34 /*
 35  * If DO_HOLES is defined, then mkcramfs can create explicit holes in the
 36  * data, which saves 26 bytes per hole (which is a lot smaller a saving than
 37  * most filesystems).
 38  *
 39  * Note that kernels up to at least 2.3.39 don't support cramfs holes, which
 40  * is why this defaults to undefined at the moment.
 41  */
 42 /* #define DO_HOLES 1 */
 43 
 44 #define PAGE_CACHE_SIZE (4096)
 45 /* The kernel assumes PAGE_CACHE_SIZE as block size. */
 46 static unsigned int blksize = PAGE_CACHE_SIZE;
 47 
 48 static int warn_dev, warn_gid, warn_namelen, warn_size, warn_uid;
 49 
 50 #ifndef MIN
 51 # define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
 52 #endif
 53 
 54 /* In-core version of inode / directory entry. */
 55 struct entry {
 56         /* stats */
 57         char *name;
 58         unsigned int mode, size, uid, gid;
 59 
 60         /* FS data */
 61         void *uncompressed;
 62         /* points to other identical file */
 63         struct entry *same;
 64         unsigned int offset;            /* pointer to compressed data in archive */
 65         unsigned int dir_offset;        /* Where in the archive is the directory entry? */
 66 
 67         /* organization */
 68         struct entry *child; /* null for non-directories and empty directories */
 69         struct entry *next;
 70 };
 71 
 72 /*
 73  * Width of various bitfields in struct cramfs_inode.
 74  * Used only to generate warnings.
 75  */
 76 #define SIZE_WIDTH 24
 77 #define UID_WIDTH 16
 78 #define GID_WIDTH 8
 79 #define OFFSET_WIDTH 26
 80 
 81 /*
 82  * The longest file name component to allow for in the input directory tree.
 83  * Ext2fs (and many others) allow up to 255 bytes.  A couple of filesystems
 84  * allow longer (e.g. smbfs 1024), but there isn't much use in supporting
 85  * >255-byte names in the input directory tree given that such names get
 86  * truncated to 255 bytes when written to cramfs.
 87  */
 88 #define MAX_INPUT_NAMELEN 255
 89 
 90 static int find_identical_file(struct entry *orig,struct entry *newfile)
 91 {
 92         if(orig==newfile) return 1;
 93         if(!orig) return 0;
 94         if(orig->size==newfile->size && orig->uncompressed && !memcmp(orig->uncompressed,newfile->uncompressed,orig->size)) {
 95                 newfile->same=orig;
 96                 return 1;
 97         }
 98         return find_identical_file(orig->child,newfile) ||
 99                    find_identical_file(orig->next,newfile);
100 }
101 
102 static void eliminate_doubles(struct entry *root,struct entry *orig) {
103         if(orig) {
104                 if(orig->size && orig->uncompressed) 
105                         find_identical_file(root,orig);
106                 eliminate_doubles(root,orig->child);
107                 eliminate_doubles(root,orig->next);
108         }
109 }
110 
111 static unsigned int parse_directory(struct entry *root_entry, const char *name, struct entry **prev, loff_t *fslen_ub)
112 {
113         DIR *dir;
114         int count = 0, totalsize = 0;
115         struct dirent *dirent;
116         char *path, *endpath;
117         size_t len = strlen(name);
118 
119         dir = opendir(name);
120         if (!dir) {
121                 perror(name);
122                 exit(2);
123         }
124 
125         /* Set up the path. */
126         /* TODO: Reuse the parent's buffer to save memcpy'ing and duplication. */
127         path = malloc(len + 1 + MAX_INPUT_NAMELEN + 1);
128         if (!path) {
129                 perror(NULL);
130                 exit(1);
131         }
132         memcpy(path, name, len);
133         endpath = path + len;
134         *endpath = '/';
135         endpath++;
136 
137         while ((dirent = readdir(dir)) != NULL) {
138                 struct entry *entry;
139                 struct stat st;
140                 int size;
141                 size_t namelen;
142 
143                 /* Ignore "." and ".." - we won't be adding them to the archive */
144                 if (dirent->d_name[0] == '.') {
145                         if (dirent->d_name[1] == '\0')
146                                 continue;
147                         if (dirent->d_name[1] == '.') {
148                                 if (dirent->d_name[2] == '\0')
149                                         continue;
150                         }
151                 }
152                 namelen = strlen(dirent->d_name);
153                 if (namelen > MAX_INPUT_NAMELEN) {
154                         fprintf(stderr,
155                                 "Very long (%u bytes) filename `%s' found.\n"
156                                 " Please increase MAX_INPUT_NAMELEN in mkcramfs.c and recompile.  Exiting.\n",
157                                 namelen, dirent->d_name);
158                         exit(1);
159                 }
160                 memcpy(endpath, dirent->d_name, namelen + 1);
161 
162                 if (lstat(path, &st) < 0) {
163                         perror(endpath);
164                         continue;
165                 }
166                 entry = calloc(1, sizeof(struct entry));
167                 if (!entry) {
168                         perror(NULL);
169                         exit(5);
170                 }
171                 entry->name = strdup(dirent->d_name);
172                 if (!entry->name) {
173                         perror(NULL);
174                         exit(1);
175                 }
176                 if (namelen > 255) {
177                         /* Can't happen when reading from ext2fs. */
178 
179                         /* TODO: we ought to avoid chopping in half
180                            multi-byte UTF8 characters. */
181                         entry->name[namelen = 255] = '\0';
182                         warn_namelen = 1;
183                 }
184                 entry->mode = st.st_mode;
185                 entry->size = st.st_size;
186                 entry->uid = st.st_uid;
187                 if (entry->uid >= 1 << UID_WIDTH)
188                         warn_uid = 1;
189                 entry->gid = st.st_gid;
190                 if (entry->gid >= 1 << GID_WIDTH)
191                         /* TODO: We ought to replace with a default
192                            gid instead of truncating; otherwise there
193                            are security problems.  Maybe mode should
194                            be &= ~070.  Same goes for uid once Linux
195                            supports >16-bit uids. */
196                         warn_gid = 1;
197                 size = sizeof(struct cramfs_inode) + ((namelen + 3) & ~3);
198                 *fslen_ub += size;
199                 if (S_ISDIR(st.st_mode)) {
200                         entry->size = parse_directory(root_entry, path, &entry->child, fslen_ub);
201                 } else if (S_ISREG(st.st_mode)) {
202                         /* TODO: We ought to open files in do_compress, one
203                            at a time, instead of amassing all these memory
204                            maps during parse_directory (which don't get used
205                            until do_compress anyway).  As it is, we tend to
206                            get EMFILE errors (especially if mkcramfs is run
207                            by non-root).
208 
209                            While we're at it, do analagously for symlinks
210                            (which would just save a little memory). */
211                         int fd = open(path, O_RDONLY);
212                         if (fd < 0) {
213                                 perror(path);
214                                 continue;
215                         }
216                         if (entry->size) {
217                                 if ((entry->size >= 1 << SIZE_WIDTH)) {
218                                         warn_size = 1;
219                                         entry->size = (1 << SIZE_WIDTH) - 1;
220                                 }
221 
222                                 entry->uncompressed = mmap(NULL, entry->size, PROT_READ, MAP_PRIVATE, fd, 0);
223                                 if (-1 == (int) (long) entry->uncompressed) {
224                                         perror("mmap");
225                                         exit(5);
226                                 }
227                         }
228                         close(fd);
229                 } else if (S_ISLNK(st.st_mode)) {
230                         entry->uncompressed = malloc(entry->size);
231                         if (!entry->uncompressed) {
232                                 perror(NULL);
233                                 exit(5);
234                         }
235                         if (readlink(path, entry->uncompressed, entry->size) < 0) {
236                                 perror(path);
237                                 continue;
238                         }
239                 } else {
240                         entry->size = st.st_rdev;
241                         if (entry->size & -(1<<SIZE_WIDTH))
242                                 warn_dev = 1;
243                 }
244 
245                 if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
246                         /* block pointers & data expansion allowance + data */
247                         if(entry->size) 
248                                 *fslen_ub += ((4+26)*((entry->size - 1) / blksize + 1)
249                                               + MIN(entry->size + 3, st.st_blocks << 9));
250                         else 
251                                 *fslen_ub += MIN(entry->size + 3, st.st_blocks << 9);
252                 }
253 
254                 /* Link it into the list */
255                 *prev = entry;
256                 prev = &entry->next;
257                 count++;
258                 totalsize += size;
259         }
260         closedir(dir);
261         free(path);
262         return totalsize;
263 }
264 
265 static void set_random(void *area, size_t size)
266 {
267         int fd = open("/dev/random", O_RDONLY);
268 
269         if (fd >= 0) {
270                 if (read(fd, area, size) == size)
271                         return;
272         }
273         memset(area, 0x00, size);
274 }
275 
276 /* Returns sizeof(struct cramfs_super), which includes the root inode. */
277 static unsigned int write_superblock(struct entry *root, char *base)
278 {
279         struct cramfs_super *super = (struct cramfs_super *) base;
280         unsigned int offset = sizeof(struct cramfs_super);
281 
282         super->magic = CRAMFS_MAGIC;
283         super->flags = 0;
284         /* Note: 0x10000 is meaningless, which is a bug; but
285            super->size is never used anyway. */
286         super->size = 0x10000;
287         memcpy(super->signature, CRAMFS_SIGNATURE, sizeof(super->signature));
288         set_random(super->fsid, sizeof(super->fsid));
289         strncpy(super->name, "Compressed", sizeof(super->name));
290 
291         super->root.mode = root->mode;
292         super->root.uid = root->uid;
293         super->root.gid = root->gid;
294         super->root.size = root->size;
295         super->root.offset = offset >> 2;
296 
297         return offset;
298 }
299 
300 static void set_data_offset(struct entry *entry, char *base, unsigned long offset)
301 {
302         struct cramfs_inode *inode = (struct cramfs_inode *) (base + entry->dir_offset);
303         assert ((offset & 3) == 0);
304         if (offset >= (1 << (2 + OFFSET_WIDTH))) {
305                 fprintf(stderr, "filesystem too big.  Exiting.\n");
306                 exit(1);
307         }
308         inode->offset = (offset >> 2);
309 }
310 
311 
312 /*
313  * We do a width-first printout of the directory
314  * entries, using a stack to remember the directories
315  * we've seen.
316  */
317 #define MAXENTRIES (100)
318 static unsigned int write_directory_structure(struct entry *entry, char *base, unsigned int offset)
319 {
320         int stack_entries = 0;
321         struct entry *entry_stack[MAXENTRIES];
322 
323         for (;;) {
324                 int dir_start = stack_entries;
325                 while (entry) {
326                         struct cramfs_inode *inode = (struct cramfs_inode *) (base + offset);
327                         size_t len = strlen(entry->name);
328 
329                         entry->dir_offset = offset;
330 
331                         inode->mode = entry->mode;
332                         inode->uid = entry->uid;
333                         inode->gid = entry->gid;
334                         inode->size = entry->size;
335                         inode->offset = 0;
336                         /* Non-empty directories, regfiles and symlinks will
337                            write over inode->offset later. */
338 
339                         offset += sizeof(struct cramfs_inode);
340                         memcpy(base + offset, entry->name, len);
341                         /* Pad up the name to a 4-byte boundary */
342                         while (len & 3) {
343                                 *(base + offset + len) = '\0';
344                                 len++;
345                         }
346                         inode->namelen = len >> 2;
347                         offset += len;
348 
349                         /* TODO: this may get it wrong for chars >= 0x80.
350                            Most filesystems use UTF8 encoding for filenames,
351                            whereas the console is a single-byte character
352                            set like iso-latin-1. */
353                         printf("  %s\n", entry->name);
354                         if (entry->child) {
355                                 if (stack_entries >= MAXENTRIES) {
356                                         fprintf(stderr, "Exceeded MAXENTRIES.  Raise this value in mkcramfs.c and recompile.  Exiting.\n");
357                                         exit(1);
358                                 }
359                                 entry_stack[stack_entries] = entry;
360                                 stack_entries++;
361                         }
362                         entry = entry->next;
363                 }
364 
365                 /*
366                  * Reverse the order the stack entries pushed during
367                  * this directory, for a small optimization of disk
368                  * access in the created fs.  This change makes things
369                  * `ls -UR' order.
370                  */
371                 {
372                         struct entry **lo = entry_stack + dir_start;
373                         struct entry **hi = entry_stack + stack_entries;
374                         struct entry *tmp;
375 
376                         while (lo < --hi) {
377                                 tmp = *lo;
378                                 *lo++ = *hi;
379                                 *hi = tmp;
380                         }
381                 }
382 
383                 /* Pop a subdirectory entry from the stack, and recurse. */
384                 if (!stack_entries)
385                         break;
386                 stack_entries--;
387                 entry = entry_stack[stack_entries];
388 
389                 set_data_offset(entry, base, offset);
390                 printf("'%s':\n", entry->name);
391                 entry = entry->child;
392         }
393         return offset;
394 }
395 
396 #ifdef DO_HOLES
397 /*
398  * Returns non-zero iff the first LEN bytes from BEGIN are all NULs.
399  */
400 static int
401 is_zero(char const *begin, unsigned len)
402 {
403         return (len-- == 0 ||
404                 (begin[0] == '\0' &&
405                  (len-- == 0 ||
406                   (begin[1] == '\0' &&
407                    (len-- == 0 ||
408                     (begin[2] == '\0' &&
409                      (len-- == 0 ||
410                       (begin[3] == '\0' &&
411                        memcmp(begin, begin + 4, len) == 0))))))));
412 }
413 #else /* !DO_HOLES */
414 # define is_zero(_begin,_len) (0)  /* Never create holes. */
415 #endif /* !DO_HOLES */
416 
417 /*
418  * One 4-byte pointer per block and then the actual blocked
419  * output. The first block does not need an offset pointer,
420  * as it will start immediately after the pointer block;
421  * so the i'th pointer points to the end of the i'th block
422  * (i.e. the start of the (i+1)'th block or past EOF).
423  *
424  * Note that size > 0, as a zero-sized file wouldn't ever
425  * have gotten here in the first place.
426  */
427 static unsigned int do_compress(char *base, unsigned int offset, char const *name, char *uncompressed, unsigned int size)
428 {
429         unsigned long original_size = size;
430         unsigned long original_offset = offset;
431         unsigned long new_size;
432         unsigned long blocks = (size - 1) / blksize + 1;
433         unsigned long curr = offset + 4 * blocks;
434         int change;
435 
436         do {
437                 unsigned long len = 2 * blksize;
438                 unsigned int input = size;
439                 if (input > blksize)
440                         input = blksize;
441                 size -= input;
442                 if (!is_zero (uncompressed, input)) {
443                         compress(base + curr, &len, uncompressed, input);
444                         curr += len;
445                 }
446                 uncompressed += input;
447 
448                 if (len > blksize*2) {
449                         /* (I don't think this can happen with zlib.) */
450                         printf("AIEEE: block \"compressed\" to > 2*blocklength (%ld)\n", len);
451                         exit(1);
452                 }
453 
454                 *(u32 *) (base + offset) = curr;
455                 offset += 4;
456         } while (size);
457 
458         curr = (curr + 3) & ~3;
459         new_size = curr - original_offset;
460         /* TODO: Arguably, original_size in these 2 lines should be
461            st_blocks * 512.  But if you say that then perhaps
462            administrative data should also be included in both. */
463         change = new_size - original_size;
464         printf("%6.2f%% (%+d bytes)\t%s\n",
465                (change * 100) / (double) original_size, change, name);
466 
467         return curr;
468 }
469 
470 
471 /*
472  * Traverse the entry tree, writing data for every item that has
473  * non-null entry->compressed (i.e. every symlink and non-empty
474  * regfile).
475  */
476 static unsigned int write_data(struct entry *entry, char *base, unsigned int offset)
477 {
478         do {
479                 if (entry->uncompressed) {
480                         if(entry->same) {
481                                 set_data_offset(entry, base, entry->same->offset);
482                                 entry->offset=entry->same->offset;
483                         } else {
484                                 set_data_offset(entry, base, offset);
485                                 entry->offset=offset;
486                                 offset = do_compress(base, offset, entry->name, entry->uncompressed, entry->size);
487                         }
488                 }
489                 else if (entry->child)
490                         offset = write_data(entry->child, base, offset);
491                 entry=entry->next;
492         } while (entry);
493         return offset;
494 }
495 
496 
497 /*
498  * Maximum size fs you can create is roughly 256MB.  (The last file's
499  * data must begin within 256MB boundary but can extend beyond that.)
500  *
501  * Note that if you want it to fit in a ROM then you're limited to what the
502  * hardware and kernel can support (64MB?).
503  */
504 #define MAXFSLEN ((((1 << OFFSET_WIDTH) - 1) << 2) /* offset */ \
505                   + (1 << SIZE_WIDTH) - 1 /* filesize */ \
506                   + (1 << SIZE_WIDTH) * 4 / PAGE_CACHE_SIZE /* block pointers */ )
507 
508 
509 /*
510  * Usage:
511  *
512  *      mkcramfs directory-name outfile
513  *
514  * where "directory-name" is simply the root of the directory
515  * tree that we want to generate a compressed filesystem out
516  * of.
517  */
518 int main(int argc, char **argv)
519 {
520         struct stat st;
521         struct entry *root_entry;
522         char *rom_image;
523         unsigned int offset;
524         ssize_t written;
525         int fd;
526         loff_t fslen_ub = 0; /* initial guess (upper-bound) of
527                                 required filesystem size */
528         char const *dirname;
529 
530         if (argc)
531                 progname = argv[0];
532         if (argc != 3)
533                 usage();
534 
535         if (stat(dirname = argv[1], &st) < 0) {
536                 perror(argv[1]);
537                 exit(1);
538         }
539         fd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0666);
540 
541         root_entry = calloc(1, sizeof(struct entry));
542         if (!root_entry) {
543                 perror(NULL);
544                 exit(5);
545         }
546         root_entry->mode = st.st_mode;
547         root_entry->uid = st.st_uid;
548         root_entry->gid = st.st_gid;
549 
550         root_entry->size = parse_directory(root_entry, argv[1], &root_entry->child, &fslen_ub);
551         if (fslen_ub > MAXFSLEN) {
552                 fprintf(stderr,
553                         "warning: guestimate of required size (upper bound) is %luMB, but maximum image size is %uMB.  We might die prematurely.\n",
554                         (unsigned long) (fslen_ub >> 20),
555                         MAXFSLEN >> 20);
556                 fslen_ub = MAXFSLEN;
557         }
558 
559         /* find duplicate files. TODO: uses the most inefficient algorithm
560            possible. */
561         eliminate_doubles(root_entry,root_entry);
562 
563 
564         /* TODO: Why do we use a private/anonymous mapping here
565            followed by a write below, instead of just a shared mapping
566            and a couple of ftruncate calls?  Is it just to save us
567            having to deal with removing the file afterwards?  If we
568            really need this huge anonymous mapping, we ought to mmap
569            in smaller chunks, so that the user doesn't need nn MB of
570            RAM free.  If the reason is to be able to write to
571            un-mmappable block devices, then we could try shared mmap
572            and revert to anonymous mmap if the shared mmap fails. */
573         rom_image = mmap(NULL, fslen_ub, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
574         if (-1 == (int) (long) rom_image) {
575                 perror("ROM image map");
576                 exit(1);
577         }
578         offset = write_superblock(root_entry, rom_image);
579         printf("Super block: %d bytes\n", offset);
580 
581         offset = write_directory_structure(root_entry->child, rom_image, offset);
582         printf("Directory data: %d bytes\n", offset);
583 
584         offset = write_data(root_entry, rom_image, offset);
585 
586         /* We always write a multiple of blksize bytes, so that
587            losetup works. */
588         offset = ((offset - 1) | (blksize - 1)) + 1;
589         printf("Everything: %d kilobytes\n", offset >> 10);
590 
591         written = write(fd, rom_image, offset);
592         if (written < 0) {
593                 perror("rom image");
594                 exit(1);
595         }
596         if (offset != written) {
597                 fprintf(stderr, "ROM image write failed (%d %d)\n", written, offset);
598                 exit(1);
599         }
600 
601         /* (These warnings used to come at the start, but they scroll off the
602            screen too quickly.) */
603         if (warn_namelen) /* (can't happen when reading from ext2fs) */
604                 fprintf(stderr, /* bytes, not chars: think UTF8. */
605                         "warning: filenames truncated to 255 bytes.\n");
606         if (warn_size)
607                 fprintf(stderr,
608                         "warning: file sizes truncated to %luMB (minus 1 byte).\n",
609                         1L << (SIZE_WIDTH - 20));
610         if (warn_uid) /* (not possible with current Linux versions) */
611                 fprintf(stderr,
612                         "warning: uids truncated to %u bits.  (This may be a security concern.)\n",
613                         UID_WIDTH);
614         if (warn_gid)
615                 fprintf(stderr,
616                         "warning: gids truncated to %u bits.  (This may be a security concern.)\n",
617                         GID_WIDTH);
618         if (warn_dev)
619                 fprintf(stderr,
620                         "WARNING: device numbers truncated to %u bits.  This almost certainly means\n"
621                         "that some device files will be wrong.\n",
622                         OFFSET_WIDTH);
623         return 0;
624 }
625 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.