1 #include <sys/types.h>
2 #include <stdio.h>
3 #include <sys/stat.h>
4 #include <unistd.h>
5 #include <sys/mman.h>
6 #include <sys/fcntl.h>
7 #include <dirent.h>
8 #include <stdlib.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <assert.h>
12
13 /* zlib required.. */
14 #include <zlib.h>
15
16 typedef unsigned char u8;
17 typedef unsigned short u16;
18 typedef unsigned int u32;
19
20 #include "cramfs.h"
21
22 static const char *progname = "mkcramfs";
23
24 /* N.B. If you change the disk format of cramfs, please update fs/cramfs/README. */
25
26 static void usage(void)
27 {
28 fprintf(stderr, "Usage: '%s dirname outfile'\n"
29 " where <dirname> is the root of the\n"
30 " filesystem to be compressed.\n", progname);
31 exit(1);
32 }
33
34 /*
35 * If DO_HOLES is defined, then mkcramfs can create explicit holes in the
36 * data, which saves 26 bytes per hole (which is a lot smaller a saving than
37 * most filesystems).
38 *
39 * Note that kernels up to at least 2.3.39 don't support cramfs holes, which
40 * is why this defaults to undefined at the moment.
41 */
42 /* #define DO_HOLES 1 */
43
44 #define PAGE_CACHE_SIZE (4096)
45 /* The kernel assumes PAGE_CACHE_SIZE as block size. */
46 static unsigned int blksize = PAGE_CACHE_SIZE;
47
48 static int warn_dev, warn_gid, warn_namelen, warn_size, warn_uid;
49
50 #ifndef MIN
51 # define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
52 #endif
53
54 /* In-core version of inode / directory entry. */
55 struct entry {
56 /* stats */
57 char *name;
58 unsigned int mode, size, uid, gid;
59
60 /* FS data */
61 void *uncompressed;
62 /* points to other identical file */
63 struct entry *same;
64 unsigned int offset; /* pointer to compressed data in archive */
65 unsigned int dir_offset; /* Where in the archive is the directory entry? */
66
67 /* organization */
68 struct entry *child; /* null for non-directories and empty directories */
69 struct entry *next;
70 };
71
72 /*
73 * Width of various bitfields in struct cramfs_inode.
74 * Used only to generate warnings.
75 */
76 #define SIZE_WIDTH 24
77 #define UID_WIDTH 16
78 #define GID_WIDTH 8
79 #define OFFSET_WIDTH 26
80
81 /*
82 * The longest file name component to allow for in the input directory tree.
83 * Ext2fs (and many others) allow up to 255 bytes. A couple of filesystems
84 * allow longer (e.g. smbfs 1024), but there isn't much use in supporting
85 * >255-byte names in the input directory tree given that such names get
86 * truncated to 255 bytes when written to cramfs.
87 */
88 #define MAX_INPUT_NAMELEN 255
89
90 static int find_identical_file(struct entry *orig,struct entry *newfile)
91 {
92 if(orig==newfile) return 1;
93 if(!orig) return 0;
94 if(orig->size==newfile->size && orig->uncompressed && !memcmp(orig->uncompressed,newfile->uncompressed,orig->size)) {
95 newfile->same=orig;
96 return 1;
97 }
98 return find_identical_file(orig->child,newfile) ||
99 find_identical_file(orig->next,newfile);
100 }
101
102 static void eliminate_doubles(struct entry *root,struct entry *orig) {
103 if(orig) {
104 if(orig->size && orig->uncompressed)
105 find_identical_file(root,orig);
106 eliminate_doubles(root,orig->child);
107 eliminate_doubles(root,orig->next);
108 }
109 }
110
111 static unsigned int parse_directory(struct entry *root_entry, const char *name, struct entry **prev, loff_t *fslen_ub)
112 {
113 DIR *dir;
114 int count = 0, totalsize = 0;
115 struct dirent *dirent;
116 char *path, *endpath;
117 size_t len = strlen(name);
118
119 dir = opendir(name);
120 if (!dir) {
121 perror(name);
122 exit(2);
123 }
124
125 /* Set up the path. */
126 /* TODO: Reuse the parent's buffer to save memcpy'ing and duplication. */
127 path = malloc(len + 1 + MAX_INPUT_NAMELEN + 1);
128 if (!path) {
129 perror(NULL);
130 exit(1);
131 }
132 memcpy(path, name, len);
133 endpath = path + len;
134 *endpath = '/';
135 endpath++;
136
137 while ((dirent = readdir(dir)) != NULL) {
138 struct entry *entry;
139 struct stat st;
140 int size;
141 size_t namelen;
142
143 /* Ignore "." and ".." - we won't be adding them to the archive */
144 if (dirent->d_name[0] == '.') {
145 if (dirent->d_name[1] == '\0')
146 continue;
147 if (dirent->d_name[1] == '.') {
148 if (dirent->d_name[2] == '\0')
149 continue;
150 }
151 }
152 namelen = strlen(dirent->d_name);
153 if (namelen > MAX_INPUT_NAMELEN) {
154 fprintf(stderr,
155 "Very long (%u bytes) filename `%s' found.\n"
156 " Please increase MAX_INPUT_NAMELEN in mkcramfs.c and recompile. Exiting.\n",
157 namelen, dirent->d_name);
158 exit(1);
159 }
160 memcpy(endpath, dirent->d_name, namelen + 1);
161
162 if (lstat(path, &st) < 0) {
163 perror(endpath);
164 continue;
165 }
166 entry = calloc(1, sizeof(struct entry));
167 if (!entry) {
168 perror(NULL);
169 exit(5);
170 }
171 entry->name = strdup(dirent->d_name);
172 if (!entry->name) {
173 perror(NULL);
174 exit(1);
175 }
176 if (namelen > 255) {
177 /* Can't happen when reading from ext2fs. */
178
179 /* TODO: we ought to avoid chopping in half
180 multi-byte UTF8 characters. */
181 entry->name[namelen = 255] = '\0';
182 warn_namelen = 1;
183 }
184 entry->mode = st.st_mode;
185 entry->size = st.st_size;
186 entry->uid = st.st_uid;
187 if (entry->uid >= 1 << UID_WIDTH)
188 warn_uid = 1;
189 entry->gid = st.st_gid;
190 if (entry->gid >= 1 << GID_WIDTH)
191 /* TODO: We ought to replace with a default
192 gid instead of truncating; otherwise there
193 are security problems. Maybe mode should
194 be &= ~070. Same goes for uid once Linux
195 supports >16-bit uids. */
196 warn_gid = 1;
197 size = sizeof(struct cramfs_inode) + ((namelen + 3) & ~3);
198 *fslen_ub += size;
199 if (S_ISDIR(st.st_mode)) {
200 entry->size = parse_directory(root_entry, path, &entry->child, fslen_ub);
201 } else if (S_ISREG(st.st_mode)) {
202 /* TODO: We ought to open files in do_compress, one
203 at a time, instead of amassing all these memory
204 maps during parse_directory (which don't get used
205 until do_compress anyway). As it is, we tend to
206 get EMFILE errors (especially if mkcramfs is run
207 by non-root).
208
209 While we're at it, do analagously for symlinks
210 (which would just save a little memory). */
211 int fd = open(path, O_RDONLY);
212 if (fd < 0) {
213 perror(path);
214 continue;
215 }
216 if (entry->size) {
217 if ((entry->size >= 1 << SIZE_WIDTH)) {
218 warn_size = 1;
219 entry->size = (1 << SIZE_WIDTH) - 1;
220 }
221
222 entry->uncompressed = mmap(NULL, entry->size, PROT_READ, MAP_PRIVATE, fd, 0);
223 if (-1 == (int) (long) entry->uncompressed) {
224 perror("mmap");
225 exit(5);
226 }
227 }
228 close(fd);
229 } else if (S_ISLNK(st.st_mode)) {
230 entry->uncompressed = malloc(entry->size);
231 if (!entry->uncompressed) {
232 perror(NULL);
233 exit(5);
234 }
235 if (readlink(path, entry->uncompressed, entry->size) < 0) {
236 perror(path);
237 continue;
238 }
239 } else {
240 entry->size = st.st_rdev;
241 if (entry->size & -(1<<SIZE_WIDTH))
242 warn_dev = 1;
243 }
244
245 if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
246 /* block pointers & data expansion allowance + data */
247 if(entry->size)
248 *fslen_ub += ((4+26)*((entry->size - 1) / blksize + 1)
249 + MIN(entry->size + 3, st.st_blocks << 9));
250 else
251 *fslen_ub += MIN(entry->size + 3, st.st_blocks << 9);
252 }
253
254 /* Link it into the list */
255 *prev = entry;
256 prev = &entry->next;
257 count++;
258 totalsize += size;
259 }
260 closedir(dir);
261 free(path);
262 return totalsize;
263 }
264
265 static void set_random(void *area, size_t size)
266 {
267 int fd = open("/dev/random", O_RDONLY);
268
269 if (fd >= 0) {
270 if (read(fd, area, size) == size)
271 return;
272 }
273 memset(area, 0x00, size);
274 }
275
276 /* Returns sizeof(struct cramfs_super), which includes the root inode. */
277 static unsigned int write_superblock(struct entry *root, char *base)
278 {
279 struct cramfs_super *super = (struct cramfs_super *) base;
280 unsigned int offset = sizeof(struct cramfs_super);
281
282 super->magic = CRAMFS_MAGIC;
283 super->flags = 0;
284 /* Note: 0x10000 is meaningless, which is a bug; but
285 super->size is never used anyway. */
286 super->size = 0x10000;
287 memcpy(super->signature, CRAMFS_SIGNATURE, sizeof(super->signature));
288 set_random(super->fsid, sizeof(super->fsid));
289 strncpy(super->name, "Compressed", sizeof(super->name));
290
291 super->root.mode = root->mode;
292 super->root.uid = root->uid;
293 super->root.gid = root->gid;
294 super->root.size = root->size;
295 super->root.offset = offset >> 2;
296
297 return offset;
298 }
299
300 static void set_data_offset(struct entry *entry, char *base, unsigned long offset)
301 {
302 struct cramfs_inode *inode = (struct cramfs_inode *) (base + entry->dir_offset);
303 assert ((offset & 3) == 0);
304 if (offset >= (1 << (2 + OFFSET_WIDTH))) {
305 fprintf(stderr, "filesystem too big. Exiting.\n");
306 exit(1);
307 }
308 inode->offset = (offset >> 2);
309 }
310
311
312 /*
313 * We do a width-first printout of the directory
314 * entries, using a stack to remember the directories
315 * we've seen.
316 */
317 #define MAXENTRIES (100)
318 static unsigned int write_directory_structure(struct entry *entry, char *base, unsigned int offset)
319 {
320 int stack_entries = 0;
321 struct entry *entry_stack[MAXENTRIES];
322
323 for (;;) {
324 int dir_start = stack_entries;
325 while (entry) {
326 struct cramfs_inode *inode = (struct cramfs_inode *) (base + offset);
327 size_t len = strlen(entry->name);
328
329 entry->dir_offset = offset;
330
331 inode->mode = entry->mode;
332 inode->uid = entry->uid;
333 inode->gid = entry->gid;
334 inode->size = entry->size;
335 inode->offset = 0;
336 /* Non-empty directories, regfiles and symlinks will
337 write over inode->offset later. */
338
339 offset += sizeof(struct cramfs_inode);
340 memcpy(base + offset, entry->name, len);
341 /* Pad up the name to a 4-byte boundary */
342 while (len & 3) {
343 *(base + offset + len) = '\0';
344 len++;
345 }
346 inode->namelen = len >> 2;
347 offset += len;
348
349 /* TODO: this may get it wrong for chars >= 0x80.
350 Most filesystems use UTF8 encoding for filenames,
351 whereas the console is a single-byte character
352 set like iso-latin-1. */
353 printf(" %s\n", entry->name);
354 if (entry->child) {
355 if (stack_entries >= MAXENTRIES) {
356 fprintf(stderr, "Exceeded MAXENTRIES. Raise this value in mkcramfs.c and recompile. Exiting.\n");
357 exit(1);
358 }
359 entry_stack[stack_entries] = entry;
360 stack_entries++;
361 }
362 entry = entry->next;
363 }
364
365 /*
366 * Reverse the order the stack entries pushed during
367 * this directory, for a small optimization of disk
368 * access in the created fs. This change makes things
369 * `ls -UR' order.
370 */
371 {
372 struct entry **lo = entry_stack + dir_start;
373 struct entry **hi = entry_stack + stack_entries;
374 struct entry *tmp;
375
376 while (lo < --hi) {
377 tmp = *lo;
378 *lo++ = *hi;
379 *hi = tmp;
380 }
381 }
382
383 /* Pop a subdirectory entry from the stack, and recurse. */
384 if (!stack_entries)
385 break;
386 stack_entries--;
387 entry = entry_stack[stack_entries];
388
389 set_data_offset(entry, base, offset);
390 printf("'%s':\n", entry->name);
391 entry = entry->child;
392 }
393 return offset;
394 }
395
396 #ifdef DO_HOLES
397 /*
398 * Returns non-zero iff the first LEN bytes from BEGIN are all NULs.
399 */
400 static int
401 is_zero(char const *begin, unsigned len)
402 {
403 return (len-- == 0 ||
404 (begin[0] == '\0' &&
405 (len-- == 0 ||
406 (begin[1] == '\0' &&
407 (len-- == 0 ||
408 (begin[2] == '\0' &&
409 (len-- == 0 ||
410 (begin[3] == '\0' &&
411 memcmp(begin, begin + 4, len) == 0))))))));
412 }
413 #else /* !DO_HOLES */
414 # define is_zero(_begin,_len) (0) /* Never create holes. */
415 #endif /* !DO_HOLES */
416
417 /*
418 * One 4-byte pointer per block and then the actual blocked
419 * output. The first block does not need an offset pointer,
420 * as it will start immediately after the pointer block;
421 * so the i'th pointer points to the end of the i'th block
422 * (i.e. the start of the (i+1)'th block or past EOF).
423 *
424 * Note that size > 0, as a zero-sized file wouldn't ever
425 * have gotten here in the first place.
426 */
427 static unsigned int do_compress(char *base, unsigned int offset, char const *name, char *uncompressed, unsigned int size)
428 {
429 unsigned long original_size = size;
430 unsigned long original_offset = offset;
431 unsigned long new_size;
432 unsigned long blocks = (size - 1) / blksize + 1;
433 unsigned long curr = offset + 4 * blocks;
434 int change;
435
436 do {
437 unsigned long len = 2 * blksize;
438 unsigned int input = size;
439 if (input > blksize)
440 input = blksize;
441 size -= input;
442 if (!is_zero (uncompressed, input)) {
443 compress(base + curr, &len, uncompressed, input);
444 curr += len;
445 }
446 uncompressed += input;
447
448 if (len > blksize*2) {
449 /* (I don't think this can happen with zlib.) */
450 printf("AIEEE: block \"compressed\" to > 2*blocklength (%ld)\n", len);
451 exit(1);
452 }
453
454 *(u32 *) (base + offset) = curr;
455 offset += 4;
456 } while (size);
457
458 curr = (curr + 3) & ~3;
459 new_size = curr - original_offset;
460 /* TODO: Arguably, original_size in these 2 lines should be
461 st_blocks * 512. But if you say that then perhaps
462 administrative data should also be included in both. */
463 change = new_size - original_size;
464 printf("%6.2f%% (%+d bytes)\t%s\n",
465 (change * 100) / (double) original_size, change, name);
466
467 return curr;
468 }
469
470
471 /*
472 * Traverse the entry tree, writing data for every item that has
473 * non-null entry->compressed (i.e. every symlink and non-empty
474 * regfile).
475 */
476 static unsigned int write_data(struct entry *entry, char *base, unsigned int offset)
477 {
478 do {
479 if (entry->uncompressed) {
480 if(entry->same) {
481 set_data_offset(entry, base, entry->same->offset);
482 entry->offset=entry->same->offset;
483 } else {
484 set_data_offset(entry, base, offset);
485 entry->offset=offset;
486 offset = do_compress(base, offset, entry->name, entry->uncompressed, entry->size);
487 }
488 }
489 else if (entry->child)
490 offset = write_data(entry->child, base, offset);
491 entry=entry->next;
492 } while (entry);
493 return offset;
494 }
495
496
497 /*
498 * Maximum size fs you can create is roughly 256MB. (The last file's
499 * data must begin within 256MB boundary but can extend beyond that.)
500 *
501 * Note that if you want it to fit in a ROM then you're limited to what the
502 * hardware and kernel can support (64MB?).
503 */
504 #define MAXFSLEN ((((1 << OFFSET_WIDTH) - 1) << 2) /* offset */ \
505 + (1 << SIZE_WIDTH) - 1 /* filesize */ \
506 + (1 << SIZE_WIDTH) * 4 / PAGE_CACHE_SIZE /* block pointers */ )
507
508
509 /*
510 * Usage:
511 *
512 * mkcramfs directory-name outfile
513 *
514 * where "directory-name" is simply the root of the directory
515 * tree that we want to generate a compressed filesystem out
516 * of.
517 */
518 int main(int argc, char **argv)
519 {
520 struct stat st;
521 struct entry *root_entry;
522 char *rom_image;
523 unsigned int offset;
524 ssize_t written;
525 int fd;
526 loff_t fslen_ub = 0; /* initial guess (upper-bound) of
527 required filesystem size */
528 char const *dirname;
529
530 if (argc)
531 progname = argv[0];
532 if (argc != 3)
533 usage();
534
535 if (stat(dirname = argv[1], &st) < 0) {
536 perror(argv[1]);
537 exit(1);
538 }
539 fd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0666);
540
541 root_entry = calloc(1, sizeof(struct entry));
542 if (!root_entry) {
543 perror(NULL);
544 exit(5);
545 }
546 root_entry->mode = st.st_mode;
547 root_entry->uid = st.st_uid;
548 root_entry->gid = st.st_gid;
549
550 root_entry->size = parse_directory(root_entry, argv[1], &root_entry->child, &fslen_ub);
551 if (fslen_ub > MAXFSLEN) {
552 fprintf(stderr,
553 "warning: guestimate of required size (upper bound) is %luMB, but maximum image size is %uMB. We might die prematurely.\n",
554 (unsigned long) (fslen_ub >> 20),
555 MAXFSLEN >> 20);
556 fslen_ub = MAXFSLEN;
557 }
558
559 /* find duplicate files. TODO: uses the most inefficient algorithm
560 possible. */
561 eliminate_doubles(root_entry,root_entry);
562
563
564 /* TODO: Why do we use a private/anonymous mapping here
565 followed by a write below, instead of just a shared mapping
566 and a couple of ftruncate calls? Is it just to save us
567 having to deal with removing the file afterwards? If we
568 really need this huge anonymous mapping, we ought to mmap
569 in smaller chunks, so that the user doesn't need nn MB of
570 RAM free. If the reason is to be able to write to
571 un-mmappable block devices, then we could try shared mmap
572 and revert to anonymous mmap if the shared mmap fails. */
573 rom_image = mmap(NULL, fslen_ub, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
574 if (-1 == (int) (long) rom_image) {
575 perror("ROM image map");
576 exit(1);
577 }
578 offset = write_superblock(root_entry, rom_image);
579 printf("Super block: %d bytes\n", offset);
580
581 offset = write_directory_structure(root_entry->child, rom_image, offset);
582 printf("Directory data: %d bytes\n", offset);
583
584 offset = write_data(root_entry, rom_image, offset);
585
586 /* We always write a multiple of blksize bytes, so that
587 losetup works. */
588 offset = ((offset - 1) | (blksize - 1)) + 1;
589 printf("Everything: %d kilobytes\n", offset >> 10);
590
591 written = write(fd, rom_image, offset);
592 if (written < 0) {
593 perror("rom image");
594 exit(1);
595 }
596 if (offset != written) {
597 fprintf(stderr, "ROM image write failed (%d %d)\n", written, offset);
598 exit(1);
599 }
600
601 /* (These warnings used to come at the start, but they scroll off the
602 screen too quickly.) */
603 if (warn_namelen) /* (can't happen when reading from ext2fs) */
604 fprintf(stderr, /* bytes, not chars: think UTF8. */
605 "warning: filenames truncated to 255 bytes.\n");
606 if (warn_size)
607 fprintf(stderr,
608 "warning: file sizes truncated to %luMB (minus 1 byte).\n",
609 1L << (SIZE_WIDTH - 20));
610 if (warn_uid) /* (not possible with current Linux versions) */
611 fprintf(stderr,
612 "warning: uids truncated to %u bits. (This may be a security concern.)\n",
613 UID_WIDTH);
614 if (warn_gid)
615 fprintf(stderr,
616 "warning: gids truncated to %u bits. (This may be a security concern.)\n",
617 GID_WIDTH);
618 if (warn_dev)
619 fprintf(stderr,
620 "WARNING: device numbers truncated to %u bits. This almost certainly means\n"
621 "that some device files will be wrong.\n",
622 OFFSET_WIDTH);
623 return 0;
624 }
625
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.