~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/fs/super.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *  linux/fs/super.c
  3  *
  4  *  Copyright (C) 1991, 1992  Linus Torvalds
  5  *
  6  *  super.c contains code to handle: - mount structures
  7  *                                   - super-block tables
  8  *                                   - filesystem drivers list
  9  *                                   - mount system call
 10  *                                   - umount system call
 11  *                                   - ustat system call
 12  *
 13  * GK 2/5/95  -  Changed to support mounting the root fs via NFS
 14  *
 15  *  Added kerneld support: Jacques Gelinas and Bjorn Ekwall
 16  *  Added change_root: Werner Almesberger & Hans Lermen, Feb '96
 17  *  Added options to /proc/mounts:
 18  *    Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
 19  *  Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
 20  *  Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
 21  */
 22 
 23 #include <linux/config.h>
 24 #include <linux/string.h>
 25 #include <linux/malloc.h>
 26 #include <linux/locks.h>
 27 #include <linux/smp_lock.h>
 28 #include <linux/devfs_fs_kernel.h>
 29 #include <linux/fd.h>
 30 #include <linux/init.h>
 31 #include <linux/quotaops.h>
 32 #include <linux/acct.h>
 33 
 34 #include <asm/uaccess.h>
 35 
 36 #include <linux/nfs_fs.h>
 37 #include <linux/nfs_fs_sb.h>
 38 #include <linux/nfs_mount.h>
 39 
 40 #include <linux/kmod.h>
 41 #define __NO_VERSION__
 42 #include <linux/module.h>
 43 
 44 /*
 45  * We use a semaphore to synchronize all mount/umount
 46  * activity - imagine the mess if we have a race between
 47  * unmounting a filesystem and re-mounting it (or something
 48  * else).
 49  */
 50 static DECLARE_MUTEX(mount_sem);
 51 
 52 extern void wait_for_keypress(void);
 53 
 54 extern int root_mountflags;
 55 
 56 static int do_remount_sb(struct super_block *sb, int flags, char * data);
 57 
 58 /* this is initialized in init/main.c */
 59 kdev_t ROOT_DEV;
 60 
 61 int nr_super_blocks;
 62 int max_super_blocks = NR_SUPER;
 63 LIST_HEAD(super_blocks);
 64 
 65 /*
 66  * Handling of filesystem drivers list.
 67  * Rules:
 68  *      Inclusion to/removals from/scanning of list are protected by spinlock.
 69  *      During the unload module must call unregister_filesystem().
 70  *      We can access the fields of list element if:
 71  *              1) spinlock is held or
 72  *              2) we hold the reference to the module.
 73  *      The latter can be guaranteed by call of try_inc_mod_count(); if it
 74  *      returned 0 we must skip the element, otherwise we got the reference.
 75  *      Once the reference is obtained we can drop the spinlock.
 76  */
 77 
 78 static struct file_system_type *file_systems;
 79 static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
 80 
 81 /* WARNING: This can be used only if we _already_ own a reference */
 82 static void get_filesystem(struct file_system_type *fs)
 83 {
 84         if (fs->owner)
 85                 __MOD_INC_USE_COUNT(fs->owner);
 86 }
 87 
 88 static void put_filesystem(struct file_system_type *fs)
 89 {
 90         if (fs->owner)
 91                 __MOD_DEC_USE_COUNT(fs->owner);
 92 }
 93 
 94 static struct file_system_type **find_filesystem(const char *name)
 95 {
 96         struct file_system_type **p;
 97         for (p=&file_systems; *p; p=&(*p)->next)
 98                 if (strcmp((*p)->name,name) == 0)
 99                         break;
100         return p;
101 }
102 
103 /**
104  *      register_filesystem - register a new filesystem
105  *      @fs: the file system structure
106  *
107  *      Adds the file system passed to the list of file systems the kernel
108  *      is aware of for mount and other syscalls. Returns 0 on success,
109  *      or a negative errno code on an error.
110  *
111  *      The &struct file_system_type that is passed is linked into the kernel 
112  *      structures and must not be freed until the file system has been
113  *      unregistered.
114  */
115  
116 int register_filesystem(struct file_system_type * fs)
117 {
118         int res = 0;
119         struct file_system_type ** p;
120 
121         if (!fs)
122                 return -EINVAL;
123         if (fs->next)
124                 return -EBUSY;
125         write_lock(&file_systems_lock);
126         p = find_filesystem(fs->name);
127         if (*p)
128                 res = -EBUSY;
129         else
130                 *p = fs;
131         write_unlock(&file_systems_lock);
132         return res;
133 }
134 
135 /**
136  *      unregister_filesystem - unregister a file system
137  *      @fs: filesystem to unregister
138  *
139  *      Remove a file system that was previously successfully registered
140  *      with the kernel. An error is returned if the file system is not found.
141  *      Zero is returned on a success.
142  *      
143  *      Once this function has returned the &struct file_system_type structure
144  *      may be freed or reused.
145  */
146  
147 int unregister_filesystem(struct file_system_type * fs)
148 {
149         struct file_system_type ** tmp;
150 
151         write_lock(&file_systems_lock);
152         tmp = &file_systems;
153         while (*tmp) {
154                 if (fs == *tmp) {
155                         *tmp = fs->next;
156                         fs->next = NULL;
157                         write_unlock(&file_systems_lock);
158                         return 0;
159                 }
160                 tmp = &(*tmp)->next;
161         }
162         write_unlock(&file_systems_lock);
163         return -EINVAL;
164 }
165 
166 static int fs_index(const char * __name)
167 {
168         struct file_system_type * tmp;
169         char * name;
170         int err, index;
171 
172         name = getname(__name);
173         err = PTR_ERR(name);
174         if (IS_ERR(name))
175                 return err;
176 
177         err = -EINVAL;
178         read_lock(&file_systems_lock);
179         for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
180                 if (strcmp(tmp->name,name) == 0) {
181                         err = index;
182                         break;
183                 }
184         }
185         read_unlock(&file_systems_lock);
186         putname(name);
187         return err;
188 }
189 
190 static int fs_name(unsigned int index, char * buf)
191 {
192         struct file_system_type * tmp;
193         int len, res;
194 
195         read_lock(&file_systems_lock);
196         for (tmp = file_systems; tmp; tmp = tmp->next, index--)
197                 if (index <= 0 && try_inc_mod_count(tmp->owner))
198                                 break;
199         read_unlock(&file_systems_lock);
200         if (!tmp)
201                 return -EINVAL;
202 
203         /* OK, we got the reference, so we can safely block */
204         len = strlen(tmp->name) + 1;
205         res = copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
206         put_filesystem(tmp);
207         return res;
208 }
209 
210 static int fs_maxindex(void)
211 {
212         struct file_system_type * tmp;
213         int index;
214 
215         read_lock(&file_systems_lock);
216         for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
217                 ;
218         read_unlock(&file_systems_lock);
219         return index;
220 }
221 
222 /*
223  * Whee.. Weird sysv syscall. 
224  */
225 asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
226 {
227         int retval = -EINVAL;
228 
229         switch (option) {
230                 case 1:
231                         retval = fs_index((const char *) arg1);
232                         break;
233 
234                 case 2:
235                         retval = fs_name(arg1, (char *) arg2);
236                         break;
237 
238                 case 3:
239                         retval = fs_maxindex();
240                         break;
241         }
242         return retval;
243 }
244 
245 int get_filesystem_list(char * buf)
246 {
247         int len = 0;
248         struct file_system_type * tmp;
249 
250         read_lock(&file_systems_lock);
251         tmp = file_systems;
252         while (tmp && len < PAGE_SIZE - 80) {
253                 len += sprintf(buf+len, "%s\t%s\n",
254                         (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
255                         tmp->name);
256                 tmp = tmp->next;
257         }
258         read_unlock(&file_systems_lock);
259         return len;
260 }
261 
262 struct file_system_type *get_fs_type(const char *name)
263 {
264         struct file_system_type *fs;
265         
266         read_lock(&file_systems_lock);
267         fs = *(find_filesystem(name));
268         if (fs && !try_inc_mod_count(fs->owner))
269                 fs = NULL;
270         read_unlock(&file_systems_lock);
271         if (!fs && (request_module(name) == 0)) {
272                 read_lock(&file_systems_lock);
273                 fs = *(find_filesystem(name));
274                 if (fs && !try_inc_mod_count(fs->owner))
275                         fs = NULL;
276                 read_unlock(&file_systems_lock);
277         }
278         return fs;
279 }
280 
281 static LIST_HEAD(vfsmntlist);
282 
283 /**
284  *      add_vfsmnt - add a new mount node
285  *      @nd: location of mountpoint or %NULL if we want a root node
286  *      @root: root of (sub)tree to be mounted
287  *      @dev_name: device name to show in /proc/mounts or %NULL (for "none").
288  *
289  *      This is VFS idea of mount. New node is allocated, bound to a tree
290  *      we are mounting and optionally (OK, usually) registered as mounted
291  *      on a given mountpoint. Returns a pointer to new node or %NULL in
292  *      case of failure.
293  *
294  *      Potential reason for failure (aside of trivial lack of memory) is a
295  *      deleted mountpoint. Caller must hold ->i_zombie on mountpoint
296  *      dentry (if any).
297  *
298  *      Node is marked as MNT_VISIBLE (visible in /proc/mounts) unless both
299  *      @nd and @devname are %NULL. It works since we pass non-%NULL @devname
300  *      when we are mounting root and kern_mount() filesystems are deviceless.
301  *      If we will get a kern_mount() filesystem with nontrivial @devname we
302  *      will have to pass the visibility flag explicitly, so if we will add
303  *      support for such beasts we'll have to change prototype.
304  */
305 
306 static struct vfsmount *add_vfsmnt(struct nameidata *nd,
307                                 struct dentry *root,
308                                 const char *dev_name)
309 {
310         struct vfsmount *mnt;
311         struct super_block *sb = root->d_inode->i_sb;
312         char *name;
313 
314         mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
315         if (!mnt)
316                 goto out;
317         memset(mnt, 0, sizeof(struct vfsmount));
318 
319         if (nd || dev_name)
320                 mnt->mnt_flags = MNT_VISIBLE;
321 
322         /* It may be NULL, but who cares? */
323         if (dev_name) {
324                 name = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
325                 if (name) {
326                         strcpy(name, dev_name);
327                         mnt->mnt_devname = name;
328                 }
329         }
330         mnt->mnt_owner = current->uid;
331         atomic_set(&mnt->mnt_count,1);
332         mnt->mnt_sb = sb;
333 
334         spin_lock(&dcache_lock);
335         if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
336                 goto fail;
337         mnt->mnt_root = dget(root);
338         mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root);
339         mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt;
340 
341         if (nd) {
342                 list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
343                 list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt);
344         } else {
345                 INIT_LIST_HEAD(&mnt->mnt_child);
346                 INIT_LIST_HEAD(&mnt->mnt_clash);
347         }
348         INIT_LIST_HEAD(&mnt->mnt_mounts);
349         list_add(&mnt->mnt_instances, &sb->s_mounts);
350         list_add(&mnt->mnt_list, vfsmntlist.prev);
351         spin_unlock(&dcache_lock);
352 out:
353         return mnt;
354 fail:
355         spin_unlock(&dcache_lock);
356         if (mnt->mnt_devname)
357                 kfree(mnt->mnt_devname);
358         kfree(mnt);
359         return NULL;
360 }
361 
362 static void move_vfsmnt(struct vfsmount *mnt,
363                         struct dentry *mountpoint,
364                         struct vfsmount *parent,
365                         const char *dev_name)
366 {
367         struct dentry *old_mountpoint;
368         struct vfsmount *old_parent;
369         char *new_devname = NULL;
370 
371         if (dev_name) {
372                 new_devname = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
373                 if (new_devname)
374                         strcpy(new_devname, dev_name);
375         }
376 
377         spin_lock(&dcache_lock);
378         old_mountpoint = mnt->mnt_mountpoint;
379         old_parent = mnt->mnt_parent;
380 
381         /* flip names */
382         if (new_devname) {
383                 if (mnt->mnt_devname)
384                         kfree(mnt->mnt_devname);
385                 mnt->mnt_devname = new_devname;
386         }
387 
388         /* flip the linkage */
389         mnt->mnt_mountpoint = dget(mountpoint);
390         mnt->mnt_parent = parent ? mntget(parent) : mnt;
391         list_del(&mnt->mnt_clash);
392         list_del(&mnt->mnt_child);
393         if (parent) {
394                 list_add(&mnt->mnt_child, &parent->mnt_mounts);
395                 list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
396         } else {
397                 INIT_LIST_HEAD(&mnt->mnt_child);
398                 INIT_LIST_HEAD(&mnt->mnt_clash);
399         }
400         spin_unlock(&dcache_lock);
401 
402         /* put the old stuff */
403         dput(old_mountpoint);
404         if (old_parent != mnt)
405                 mntput(old_parent);
406 }
407 
408 /*
409  * Called with spinlock held, releases it.
410  */
411 static void remove_vfsmnt(struct vfsmount *mnt)
412 {
413         /* First of all, remove it from all lists */
414         list_del(&mnt->mnt_instances);
415         list_del(&mnt->mnt_clash);
416         list_del(&mnt->mnt_list);
417         list_del(&mnt->mnt_child);
418         spin_unlock(&dcache_lock);
419         /* Now we can work safely */
420         if (mnt->mnt_parent != mnt)
421                 mntput(mnt->mnt_parent);
422 
423         dput(mnt->mnt_mountpoint);
424         dput(mnt->mnt_root);
425         if (mnt->mnt_devname)
426                 kfree(mnt->mnt_devname);
427         kfree(mnt);
428 }
429 
430 
431 /* Use octal escapes, like mount does, for embedded spaces etc. */
432 static unsigned char need_escaping[] = { ' ', '\t', '\n', '\\' };
433 
434 static int
435 mangle(const unsigned char *s, char *buf, int len) {
436         char *sp;
437         int n;
438 
439         sp = buf;
440         while(*s && sp-buf < len-3) {
441                 for (n = 0; n < sizeof(need_escaping); n++) {
442                         if (*s == need_escaping[n]) {
443                                 *sp++ = '\\';
444                                 *sp++ = '' + ((*s & 0300) >> 6);
445                                 *sp++ = '' + ((*s & 070) >> 3);
446                                 *sp++ = '' + (*s & 07);
447                                 goto next;
448                         }
449                 }
450                 *sp++ = *s;
451         next:
452                 s++;
453         }
454         return sp - buf;        /* no trailing NUL */
455 }
456 
457 static struct proc_fs_info {
458         int flag;
459         char *str;
460 } fs_info[] = {
461         { MS_NOEXEC, ",noexec" },
462         { MS_NOSUID, ",nosuid" },
463         { MS_NODEV, ",nodev" },
464         { MS_SYNCHRONOUS, ",sync" },
465         { MS_MANDLOCK, ",mand" },
466         { MS_NOATIME, ",noatime" },
467         { MS_NODIRATIME, ",nodiratime" },
468 #ifdef MS_NOSUB                 /* Can't find this except in mount.c */
469         { MS_NOSUB, ",nosub" },
470 #endif
471         { 0, NULL }
472 };
473 
474 static struct proc_nfs_info {
475         int flag;
476         char *str;
477         char *nostr;
478 } nfs_info[] = {
479         { NFS_MOUNT_SOFT, ",soft", ",hard" },
480         { NFS_MOUNT_INTR, ",intr", "" },
481         { NFS_MOUNT_POSIX, ",posix", "" },
482         { NFS_MOUNT_TCP, ",tcp", ",udp" },
483         { NFS_MOUNT_NOCTO, ",nocto", "" },
484         { NFS_MOUNT_NOAC, ",noac", "" },
485         { NFS_MOUNT_NONLM, ",nolock", ",lock" },
486         { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" },
487         { 0, NULL, NULL }
488 };
489 
490 int get_filesystem_info( char *buf )
491 {
492         struct list_head *p;
493         struct proc_fs_info *fs_infop;
494         struct proc_nfs_info *nfs_infop;
495         struct nfs_server *nfss;
496         int len, prevlen;
497         char *path, *buffer = (char *) __get_free_page(GFP_KERNEL);
498 
499         if (!buffer) return 0;
500         len = prevlen = 0;
501 
502 #define FREEROOM        ((int)PAGE_SIZE-200-len)
503 #define MANGLE(s)       len += mangle((s), buf+len, FREEROOM);
504 
505         for (p = vfsmntlist.next; p != &vfsmntlist; p = p->next) {
506                 struct vfsmount *tmp = list_entry(p, struct vfsmount, mnt_list);
507                 if (!(tmp->mnt_flags & MNT_VISIBLE))
508                         continue;
509                 path = d_path(tmp->mnt_root, tmp, buffer, PAGE_SIZE);
510                 if (!path)
511                         continue;
512                 MANGLE(tmp->mnt_devname ? tmp->mnt_devname : "none");
513                 buf[len++] = ' ';
514                 MANGLE(path);
515                 buf[len++] = ' ';
516                 MANGLE(tmp->mnt_sb->s_type->name);
517                 len += sprintf(buf+len, " %s",
518                                tmp->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
519                 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
520                         if (tmp->mnt_sb->s_flags & fs_infop->flag)
521                                 MANGLE(fs_infop->str);
522                 }
523                 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
524                         nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
525                         len += sprintf(buf+len, ",v%d", nfss->rpc_ops->version);
526 
527                         len += sprintf(buf+len, ",rsize=%d", nfss->rsize);
528 
529                         len += sprintf(buf+len, ",wsize=%d", nfss->wsize);
530 #if 0
531                         if (nfss->timeo != 7*HZ/10) {
532                                 len += sprintf(buf+len, ",timeo=%d",
533                                                nfss->timeo*10/HZ);
534                         }
535                         if (nfss->retrans != 3) {
536                                 len += sprintf(buf+len, ",retrans=%d",
537                                                nfss->retrans);
538                         }
539 #endif
540                         if (nfss->acregmin != 3*HZ) {
541                                 len += sprintf(buf+len, ",acregmin=%d",
542                                                nfss->acregmin/HZ);
543                         }
544                         if (nfss->acregmax != 60*HZ) {
545                                 len += sprintf(buf+len, ",acregmax=%d",
546                                                nfss->acregmax/HZ);
547                         }
548                         if (nfss->acdirmin != 30*HZ) {
549                                 len += sprintf(buf+len, ",acdirmin=%d",
550                                                nfss->acdirmin/HZ);
551                         }
552                         if (nfss->acdirmax != 60*HZ) {
553                                 len += sprintf(buf+len, ",acdirmax=%d",
554                                                nfss->acdirmax/HZ);
555                         }
556                         for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
557                                 char *str;
558                                 if (nfss->flags & nfs_infop->flag)
559                                         str = nfs_infop->str;
560                                 else
561                                         str = nfs_infop->nostr;
562                                 MANGLE(str);
563                         }
564                         len += sprintf(buf+len, ",addr=");
565                         MANGLE(nfss->hostname);
566                 }
567                 len += sprintf(buf + len, " 0 0\n");
568                 if (FREEROOM <= 3) {
569                         len = prevlen;
570                         len += sprintf(buf+len, "# truncated\n");
571                         break;
572                 }
573                 prevlen = len;
574         }
575 
576         free_page((unsigned long) buffer);
577         return len;
578 #undef MANGLE
579 #undef FREEROOM
580 }
581 
582 /**
583  *      __wait_on_super - wait on a superblock
584  *      @sb: superblock to wait on
585  *
586  *      Waits for a superblock to become unlocked and then returns. It does
587  *      not take the lock. This is an internal function. See wait_on_super().
588  */
589  
590 void __wait_on_super(struct super_block * sb)
591 {
592         DECLARE_WAITQUEUE(wait, current);
593 
594         add_wait_queue(&sb->s_wait, &wait);
595 repeat:
596         set_current_state(TASK_UNINTERRUPTIBLE);
597         if (sb->s_lock) {
598                 schedule();
599                 goto repeat;
600         }
601         remove_wait_queue(&sb->s_wait, &wait);
602         current->state = TASK_RUNNING;
603 }
604 
605 /*
606  * Note: check the dirty flag before waiting, so we don't
607  * hold up the sync while mounting a device. (The newly
608  * mounted device won't need syncing.)
609  */
610 void sync_supers(kdev_t dev)
611 {
612         struct super_block * sb;
613 
614         for (sb = sb_entry(super_blocks.next);
615              sb != sb_entry(&super_blocks); 
616              sb = sb_entry(sb->s_list.next)) {
617                 if (!sb->s_dev)
618                         continue;
619                 if (dev && sb->s_dev != dev)
620                         continue;
621                 if (!sb->s_dirt)
622                         continue;
623                 lock_super(sb);
624                 if (sb->s_dev && sb->s_dirt && (!dev || dev == sb->s_dev))
625                         if (sb->s_op && sb->s_op->write_super)
626                                 sb->s_op->write_super(sb);
627                 unlock_super(sb);
628         }
629 }
630 
631 /**
632  *      get_super       -       get the superblock of a device
633  *      @dev: device to get the superblock for
634  *      
635  *      Scans the superblock list and finds the superblock of the file system
636  *      mounted on the device given. %NULL is returned if no match is found.
637  */
638  
639 struct super_block * get_super(kdev_t dev)
640 {
641         struct super_block * s;
642 
643         if (!dev)
644                 return NULL;
645 restart:
646         s = sb_entry(super_blocks.next);
647         while (s != sb_entry(&super_blocks))
648                 if (s->s_dev == dev) {
649                         wait_on_super(s);
650                         if (s->s_dev == dev)
651                                 return s;
652                         goto restart;
653                 } else
654                         s = sb_entry(s->s_list.next);
655         return NULL;
656 }
657 
658 asmlinkage long sys_ustat(dev_t dev, struct ustat * ubuf)
659 {
660         struct super_block *s;
661         struct ustat tmp;
662         struct statfs sbuf;
663         int err = -EINVAL;
664 
665         lock_kernel();
666         s = get_super(to_kdev_t(dev));
667         unlock_kernel();
668         if (s == NULL)
669                 goto out;
670         err = vfs_statfs(s, &sbuf);
671         if (err)
672                 goto out;
673 
674         memset(&tmp,0,sizeof(struct ustat));
675         tmp.f_tfree = sbuf.f_bfree;
676         tmp.f_tinode = sbuf.f_ffree;
677 
678         err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
679 out:
680         return err;
681 }
682 
683 /**
684  *      get_empty_super -       find empty superblocks
685  *
686  *      Find a superblock with no device assigned. A free superblock is 
687  *      found and returned. If neccessary new superblocks are allocated.
688  *      %NULL is returned if there are insufficient resources to complete
689  *      the request.
690  */
691  
692 struct super_block *get_empty_super(void)
693 {
694         struct super_block *s;
695 
696         for (s  = sb_entry(super_blocks.next);
697              s != sb_entry(&super_blocks); 
698              s  = sb_entry(s->s_list.next)) {
699                 if (s->s_dev)
700                         continue;
701                 if (!s->s_lock)
702                         return s;
703                 printk("VFS: empty superblock %p locked!\n", s);
704         }
705         /* Need a new one... */
706         if (nr_super_blocks >= max_super_blocks)
707                 return NULL;
708         s = kmalloc(sizeof(struct super_block),  GFP_USER);
709         if (s) {
710                 nr_super_blocks++;
711                 memset(s, 0, sizeof(struct super_block));
712                 INIT_LIST_HEAD(&s->s_dirty);
713                 list_add (&s->s_list, super_blocks.prev);
714                 init_waitqueue_head(&s->s_wait);
715                 INIT_LIST_HEAD(&s->s_files);
716                 INIT_LIST_HEAD(&s->s_mounts);
717         }
718         return s;
719 }
720 
721 static struct super_block * read_super(kdev_t dev, struct block_device *bdev,
722                                        struct file_system_type *type, int flags,
723                                        void *data, int silent)
724 {
725         struct super_block * s;
726         s = get_empty_super();
727         if (!s)
728                 goto out;
729         s->s_dev = dev;
730         s->s_bdev = bdev;
731         s->s_flags = flags;
732         s->s_dirt = 0;
733         sema_init(&s->s_vfs_rename_sem,1);
734         sema_init(&s->s_nfsd_free_path_sem,1);
735         s->s_type = type;
736         sema_init(&s->s_dquot.dqio_sem, 1);
737         sema_init(&s->s_dquot.dqoff_sem, 1);
738         s->s_dquot.flags = 0;
739         lock_super(s);
740         if (!type->read_super(s, data, silent))
741                 goto out_fail;
742         unlock_super(s);
743         /* tell bdcache that we are going to keep this one */
744         if (bdev)
745                 atomic_inc(&bdev->bd_count);
746 out:
747         return s;
748 
749 out_fail:
750         s->s_dev = 0;
751         s->s_bdev = 0;
752         s->s_type = NULL;
753         unlock_super(s);
754         return NULL;
755 }
756 
757 /*
758  * Unnamed block devices are dummy devices used by virtual
759  * filesystems which don't use real block-devices.  -- jrs
760  */
761 
762 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))];
763 
764 kdev_t get_unnamed_dev(void)
765 {
766         int i;
767 
768         for (i = 1; i < 256; i++) {
769                 if (!test_and_set_bit(i,unnamed_dev_in_use))
770                         return MKDEV(UNNAMED_MAJOR, i);
771         }
772         return 0;
773 }
774 
775 void put_unnamed_dev(kdev_t dev)
776 {
777         if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
778                 return;
779         if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
780                 return;
781         printk("VFS: put_unnamed_dev: freeing unused device %s\n",
782                         kdevname(dev));
783 }
784 
785 static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
786         char *dev_name, int flags, void * data)
787 {
788         struct inode *inode;
789         struct block_device *bdev;
790         struct block_device_operations *bdops;
791         struct super_block * sb;
792         struct nameidata nd;
793         kdev_t dev;
794         int error = 0;
795         /* What device it is? */
796         if (!dev_name || !*dev_name)
797                 return ERR_PTR(-EINVAL);
798         if (path_init(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
799                 error = path_walk(dev_name, &nd);
800         if (error)
801                 return ERR_PTR(error);
802         inode = nd.dentry->d_inode;
803         error = -ENOTBLK;
804         if (!S_ISBLK(inode->i_mode))
805                 goto out;
806         error = -EACCES;
807         if (IS_NODEV(inode))
808                 goto out;
809         bdev = inode->i_bdev;
810         bdops = devfs_get_ops ( devfs_get_handle_from_inode (inode) );
811         if (bdops) bdev->bd_op = bdops;
812         /* Done with lookups, semaphore down */
813         down(&mount_sem);
814         dev = to_kdev_t(bdev->bd_dev);
815         sb = get_super(dev);
816         if (sb) {
817                 if (fs_type == sb->s_type &&
818                     ((flags ^ sb->s_flags) & MS_RDONLY) == 0) {
819                         path_release(&nd);
820                         return sb;
821                 }
822         } else {
823                 mode_t mode = FMODE_READ; /* we always need it ;-) */
824                 if (!(flags & MS_RDONLY))
825                         mode |= FMODE_WRITE;
826                 error = blkdev_get(bdev, mode, 0, BDEV_FS);
827                 if (error)
828                         goto out;
829                 check_disk_change(dev);
830                 error = -EACCES;
831                 if (!(flags & MS_RDONLY) && is_read_only(dev))
832                         goto out1;
833                 error = -EINVAL;
834                 sb = read_super(dev, bdev, fs_type, flags, data, 0);
835                 if (sb) {
836                         get_filesystem(fs_type);
837                         path_release(&nd);
838                         return sb;
839                 }
840 out1:
841                 blkdev_put(bdev, BDEV_FS);
842         }
843 out:
844         path_release(&nd);
845         up(&mount_sem);
846         return ERR_PTR(error);
847 }
848 
849 static struct super_block *get_sb_nodev(struct file_system_type *fs_type,
850         int flags, void * data)
851 {
852         kdev_t dev;
853         int error = -EMFILE;
854         down(&mount_sem);
855         dev = get_unnamed_dev();
856         if (dev) {
857                 struct super_block * sb;
858                 error = -EINVAL;
859                 sb = read_super(dev, NULL, fs_type, flags, data, 0);
860                 if (sb) {
861                         get_filesystem(fs_type);
862                         return sb;
863                 }
864                 put_unnamed_dev(dev);
865         }
866         up(&mount_sem);
867         return ERR_PTR(error);
868 }
869 
870 static struct super_block *get_sb_single(struct file_system_type *fs_type,
871         int flags, void *data)
872 {
873         struct super_block * sb;
874         /*
875          * Get the superblock of kernel-wide instance, but
876          * keep the reference to fs_type.
877          */
878         down(&mount_sem);
879         sb = fs_type->kern_mnt->mnt_sb;
880         if (!sb)
881                 BUG();
882         get_filesystem(fs_type);
883         do_remount_sb(sb, flags, data);
884         return sb;
885 }
886 
887 static void kill_super(struct super_block *sb, int umount_root)
888 {
889         struct block_device *bdev;
890         kdev_t dev;
891         struct dentry *root = sb->s_root;
892         struct file_system_type *fs = sb->s_type;
893         struct super_operations *sop = sb->s_op;
894 
895         sb->s_root = NULL;
896         /* Need to clean after the sucker */
897         if (fs->fs_flags & FS_LITTER)
898                 d_genocide(root);
899         if (fs->fs_flags & (FS_SINGLE|FS_LITTER))
900                 shrink_dcache_parent(root);
901         dput(root);
902         lock_super(sb);
903         if (sop) {
904                 if (sop->write_super && sb->s_dirt)
905                         sop->write_super(sb);
906                 if (sop->put_super)
907                         sop->put_super(sb);
908         }
909 
910         /* Forget any remaining inodes */
911         if (invalidate_inodes(sb)) {
912                 printk("VFS: Busy inodes after unmount. "
913                         "Self-destruct in 5 seconds.  Have a nice day...\n");
914         }
915 
916         dev = sb->s_dev;
917         sb->s_dev = 0;          /* Free the superblock */
918         bdev = sb->s_bdev;
919         sb->s_bdev = NULL;
920         put_filesystem(fs);
921         sb->s_type = NULL;
922         unlock_super(sb);
923         if (umount_root) {
924                 /* special: the old device driver is going to be
925                    a ramdisk and the point of this call is to free its
926                    protected memory (even if dirty). */
927                 destroy_buffers(dev);
928         }
929         if (bdev) {
930                 blkdev_put(bdev, BDEV_FS);
931                 bdput(bdev);
932         } else
933                 put_unnamed_dev(dev);
934 }
935 
936 /*
937  * Alters the mount flags of a mounted file system. Only the mount point
938  * is used as a reference - file system type and the device are ignored.
939  */
940 
941 static int do_remount_sb(struct super_block *sb, int flags, char *data)
942 {
943         int retval;
944         
945         if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
946                 return -EACCES;
947                 /*flags |= MS_RDONLY;*/
948         /* If we are remounting RDONLY, make sure there are no rw files open */
949         if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
950                 if (!fs_may_remount_ro(sb))
951                         return -EBUSY;
952         if (sb->s_op && sb->s_op->remount_fs) {
953                 lock_super(sb);
954                 retval = sb->s_op->remount_fs(sb, &flags, data);
955                 unlock_super(sb);
956                 if (retval)
957                         return retval;
958         }
959         sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
960 
961         /*
962          * We can't invalidate inodes as we can loose data when remounting
963          * (someone might manage to alter data while we are waiting in lock_super()
964          * or in foo_remount_fs()))
965          */
966 
967         return 0;
968 }
969 
970 struct vfsmount *kern_mount(struct file_system_type *type)
971 {
972         kdev_t dev = get_unnamed_dev();
973         struct super_block *sb;
974         struct vfsmount *mnt;
975         if (!dev)
976                 return ERR_PTR(-EMFILE);
977         sb = read_super(dev, NULL, type, 0, NULL, 0);
978         if (!sb) {
979                 put_unnamed_dev(dev);
980                 return ERR_PTR(-EINVAL);
981         }
982         mnt = add_vfsmnt(NULL, sb->s_root, NULL);
983         if (!mnt) {
984                 kill_super(sb, 0);
985                 return ERR_PTR(-ENOMEM);
986         }
987         type->kern_mnt = mnt;
988         return mnt;
989 }
990 
991 /* Call only after unregister_filesystem() - it's a final cleanup */
992 
993 void kern_umount(struct vfsmount *mnt)
994 {
995         struct super_block *sb = mnt->mnt_sb;
996         spin_lock(&dcache_lock);
997         remove_vfsmnt(mnt);
998         kill_super(sb, 0);
999 }
1000 
1001 /*
1002  * Doesn't take quota and stuff into account. IOW, in some cases it will
1003  * give false negatives. The main reason why it's here is that we need
1004  * a non-destructive way to look for easily umountable filesystems.
1005  */
1006 int may_umount(struct vfsmount *mnt)
1007 {
1008         if (atomic_read(&mnt->mnt_count) > 2)
1009                 return -EBUSY;
1010         return 0;
1011 }
1012 
1013 static int do_umount(struct vfsmount *mnt, int umount_root, int flags)
1014 {
1015         struct super_block * sb = mnt->mnt_sb;
1016 
1017         /*
1018          * No sense to grab the lock for this test, but test itself looks
1019          * somewhat bogus. Suggestions for better replacement?
1020          * Ho-hum... In principle, we might treat that as umount + switch
1021          * to rootfs. GC would eventually take care of the old vfsmount.
1022          * The problem being: we have to implement rootfs and GC for that ;-)
1023          * Actually it makes sense, especially if rootfs would contain a
1024          * /reboot - static binary that would close all descriptors and
1025          * call reboot(9). Then init(8) could umount root and exec /reboot.
1026          */
1027         if (mnt == current->fs->rootmnt && !umount_root) {
1028                 int retval = 0;
1029                 /*
1030                  * Special case for "unmounting" root ...
1031                  * we just try to remount it readonly.
1032                  */
1033                 mntput(mnt);
1034                 if (!(sb->s_flags & MS_RDONLY))
1035                         retval = do_remount_sb(sb, MS_RDONLY, 0);
1036                 return retval;
1037         }
1038 
1039         spin_lock(&dcache_lock);
1040 
1041         if (mnt->mnt_instances.next != mnt->mnt_instances.prev) {
1042                 if (atomic_read(&mnt->mnt_count) > 2) {
1043                         spin_unlock(&dcache_lock);
1044                         mntput(mnt);
1045                         return -EBUSY;
1046                 }
1047                 if (sb->s_type->fs_flags & FS_SINGLE)
1048                         put_filesystem(sb->s_type);
1049                 /* We hold two references, so mntput() is safe */
1050                 mntput(mnt);
1051                 remove_vfsmnt(mnt);
1052                 return 0;
1053         }
1054         spin_unlock(&dcache_lock);
1055 
1056         /*
1057          * Before checking whether the filesystem is still busy,
1058          * make sure the kernel doesn't hold any quota files open
1059          * on the device. If the umount fails, too bad -- there
1060          * are no quotas running any more. Just turn them on again.
1061          */
1062         DQUOT_OFF(sb);
1063         acct_auto_close(sb->s_dev);
1064 
1065         /*
1066          * If we may have to abort operations to get out of this
1067          * mount, and they will themselves hold resources we must
1068          * allow the fs to do things. In the Unix tradition of
1069          * 'Gee thats tricky lets do it in userspace' the umount_begin
1070          * might fail to complete on the first run through as other tasks
1071          * must return, and the like. Thats for the mount program to worry
1072          * about for the moment.
1073          */
1074 
1075         if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
1076                 sb->s_op->umount_begin(sb);
1077 
1078         /*
1079          * Shrink dcache, then fsync. This guarantees that if the
1080          * filesystem is quiescent at this point, then (a) only the
1081          * root entry should be in use and (b) that root entry is
1082          * clean.
1083          */
1084         shrink_dcache_sb(sb);
1085         fsync_dev(sb->s_dev);
1086 
1087         if (sb->s_root->d_inode->i_state) {
1088                 mntput(mnt);
1089                 return -EBUSY;
1090         }
1091 
1092         /* Something might grab it again - redo checks */
1093 
1094         spin_lock(&dcache_lock);
1095         if (atomic_read(&mnt->mnt_count) > 2) {
1096                 spin_unlock(&dcache_lock);
1097                 mntput(mnt);
1098                 return -EBUSY;
1099         }
1100 
1101         /* OK, that's the point of no return */
1102         mntput(mnt);
1103         remove_vfsmnt(mnt);
1104 
1105         kill_super(sb, umount_root);
1106         return 0;
1107 }
1108 
1109 /*
1110  * Now umount can handle mount points as well as block devices.
1111  * This is important for filesystems which use unnamed block devices.
1112  *
1113  * We now support a flag for forced unmount like the other 'big iron'
1114  * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
1115  */
1116 
1117 asmlinkage long sys_umount(char * name, int flags)
1118 {
1119         struct nameidata nd;
1120         char *kname;
1121         int retval;
1122 
1123         lock_kernel();
1124         kname = getname(name);
1125         retval = PTR_ERR(kname);
1126         if (IS_ERR(kname))
1127                 goto out;
1128         retval = 0;
1129         if (path_init(kname, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd))
1130                 retval = path_walk(kname, &nd);
1131         putname(kname);
1132         if (retval)
1133                 goto out;
1134         retval = -EINVAL;
1135         if (nd.dentry != nd.mnt->mnt_root)
1136                 goto dput_and_out;
1137 
1138         retval = -EPERM;
1139         if (!capable(CAP_SYS_ADMIN) && current->uid!=nd.mnt->mnt_owner)
1140                 goto dput_and_out;
1141 
1142         dput(nd.dentry);
1143         /* puts nd.mnt */
1144         down(&mount_sem);
1145         retval = do_umount(nd.mnt, 0, flags);
1146         up(&mount_sem);
1147         goto out;
1148 dput_and_out:
1149         path_release(&nd);
1150 out:
1151         unlock_kernel();
1152         return retval;
1153 }
1154 
1155 /*
1156  *      The 2.0 compatible umount. No flags. 
1157  */
1158  
1159 asmlinkage long sys_oldumount(char * name)
1160 {
1161         return sys_umount(name,0);
1162 }
1163 
1164 static int mount_is_safe(struct nameidata *nd)
1165 {
1166         if (capable(CAP_SYS_ADMIN))
1167                 return 0;
1168         return -EPERM;
1169 #ifdef notyet
1170         if (S_ISLNK(nd->dentry->d_inode->i_mode))
1171                 return -EPERM;
1172         if (nd->dentry->d_inode->i_mode & S_ISVTX) {
1173                 if (current->uid != nd->dentry->d_inode->i_uid)
1174                         return -EPERM;
1175         }
1176         if (permission(nd->dentry->d_inode, MAY_WRITE))
1177                 return -EPERM;
1178         return 0;
1179 #endif
1180 }
1181 
1182 /*
1183  * do loopback mount.
1184  */
1185 static int do_loopback(char *old_name, char *new_name)
1186 {
1187         struct nameidata old_nd, new_nd;
1188         int err = 0;
1189         if (!old_name || !*old_name)
1190                 return -EINVAL;
1191         if (path_init(old_name, LOOKUP_POSITIVE, &old_nd))
1192                 err = path_walk(old_name, &old_nd);
1193         if (err)
1194                 goto out;
1195         if (path_init(new_name, LOOKUP_POSITIVE, &new_nd))
1196                 err = path_walk(new_name, &new_nd);
1197         if (err)
1198                 goto out1;
1199         err = mount_is_safe(&new_nd);
1200         if (err)
1201                 goto out2;
1202         err = -EINVAL;
1203         if (S_ISDIR(new_nd.dentry->d_inode->i_mode) !=
1204               S_ISDIR(old_nd.dentry->d_inode->i_mode))
1205                 goto out2;
1206 
1207         err = -ENOMEM;
1208         if (old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE)
1209                 get_filesystem(old_nd.mnt->mnt_sb->s_type);
1210                 
1211         down(&mount_sem);
1212         /* there we go */
1213         down(&new_nd.dentry->d_inode->i_zombie);
1214         if (IS_DEADDIR(new_nd.dentry->d_inode))
1215                 err = -ENOENT;
1216         else if (add_vfsmnt(&new_nd, old_nd.dentry, old_nd.mnt->mnt_devname))
1217                 err = 0;
1218         up(&new_nd.dentry->d_inode->i_zombie);
1219         up(&mount_sem);
1220         if (err && old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE)
1221                 put_filesystem(old_nd.mnt->mnt_sb->s_type);
1222 out2:
1223         path_release(&new_nd);
1224 out1:
1225         path_release(&old_nd);
1226 out:
1227         return err;
1228 }
1229 
1230 /*
1231  * change filesystem flags. dir should be a physical root of filesystem.
1232  * If you've mounted a non-root directory somewhere and want to do remount
1233  * on it - tough luck.
1234  */
1235 
1236 static int do_remount(const char *dir,int flags,char *data)
1237 {
1238         struct nameidata nd;
1239         int retval = 0;
1240 
1241         if (!capable(CAP_SYS_ADMIN))
1242                 return -EPERM;
1243 
1244         if (path_init(dir, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
1245                 retval = path_walk(dir, &nd);
1246         if (!retval) {
1247                 struct super_block * sb = nd.dentry->d_inode->i_sb;
1248                 retval = -ENODEV;
1249                 if (sb) {
1250                         retval = -EINVAL;
1251                         if (nd.dentry == sb->s_root) {
1252                                 /*
1253                                  * Shrink the dcache and sync the device.
1254                                  */
1255                                 shrink_dcache_sb(sb);
1256                                 fsync_dev(sb->s_dev);
1257                                 if (flags & MS_RDONLY)
1258                                         acct_auto_close(sb->s_dev);
1259                                 retval = do_remount_sb(sb, flags, data);
1260                         }
1261                 }
1262                 path_release(&nd);
1263         }
1264         return retval;
1265 }
1266 
1267 static int copy_mount_options (const void *data, unsigned long *where)
1268 {
1269         int i;
1270         unsigned long page;
1271         unsigned long size;
1272         
1273         *where = 0;
1274         if (!data)
1275                 return 0;
1276 
1277         if (!(page = __get_free_page(GFP_KERNEL)))
1278                 return -ENOMEM;
1279 
1280         /* We only care that *some* data at the address the user
1281          * gave us is valid.  Just in case, we'll zero
1282          * the remainder of the page.
1283          */
1284         /* copy_from_user cannot cross TASK_SIZE ! */
1285         size = TASK_SIZE - (unsigned long)data;
1286         if (size > PAGE_SIZE)
1287                 size = PAGE_SIZE;
1288 
1289         i = size - copy_from_user((void *)page, data, size);
1290         if (!i) {
1291                 free_page(page); 
1292                 return -EFAULT;
1293         }
1294         if (i != PAGE_SIZE)
1295                 memset((char *)page + i, 0, PAGE_SIZE - i);
1296         *where = page;
1297         return 0;
1298 }
1299 
1300 /*
1301  * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1302  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1303  *
1304  * data is a (void *) that can point to any structure up to
1305  * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1306  * information (or be NULL).
1307  *
1308  * NOTE! As pre-0.97 versions of mount() didn't use this setup, the
1309  * flags used to have a special 16-bit magic number in the high word:
1310  * 0xC0ED. If this magic number is present, the high word is discarded.
1311  */
1312 long do_mount(char * dev_name, char * dir_name, char *type_page,
1313                   unsigned long flags, void *data_page)
1314 {
1315         struct file_system_type * fstype;
1316         struct nameidata nd;
1317         struct vfsmount *mnt = NULL;
1318         struct super_block *sb;
1319         int retval = 0;
1320 
1321         /* Discard magic */
1322         if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
1323                 flags &= ~MS_MGC_MSK;
1324  
1325         /* Basic sanity checks */
1326 
1327         if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
1328                 return -EINVAL;
1329         if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
1330                 return -EINVAL;
1331 
1332         /* OK, looks good, now let's see what do they want */
1333 
1334         /* just change the flags? - capabilities are checked in do_remount() */
1335         if (flags & MS_REMOUNT)
1336                 return do_remount(dir_name, flags & ~MS_REMOUNT,
1337                                   (char *) data_page);
1338 
1339         /* "mount --bind"? Equivalent to older "mount -t bind" */
1340         /* No capabilities? What if users do thousands of these? */
1341         if (flags & MS_BIND)
1342                 return do_loopback(dev_name, dir_name);
1343 
1344         /* For the rest we need the type */
1345 
1346         if (!type_page || !memchr(type_page, 0, PAGE_SIZE))
1347                 return -EINVAL;
1348 
1349 #if 0   /* Can be deleted again. Introduced in patch-2.3.99-pre6 */
1350         /* loopback mount? This is special - requires fewer capabilities */
1351         if (strcmp(type_page, "bind")==0)
1352                 return do_loopback(dev_name, dir_name);
1353 #endif
1354 
1355         /* for the rest we _really_ need capabilities... */
1356         if (!capable(CAP_SYS_ADMIN))
1357                 return -EPERM;
1358 
1359         /* ... filesystem driver... */
1360         fstype = get_fs_type(type_page);
1361         if (!fstype)            
1362                 return -ENODEV;
1363 
1364         /* ... and mountpoint. Do the lookup first to force automounting. */
1365         if (path_init(dir_name,
1366                       LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &nd))
1367                 retval = path_walk(dir_name, &nd);
1368         if (retval)
1369                 goto fs_out;
1370 
1371         /* get superblock, locks mount_sem on success */
1372         if (fstype->fs_flags & FS_NOMOUNT)
1373                 sb = ERR_PTR(-EINVAL);
1374         else if (fstype->fs_flags & FS_REQUIRES_DEV)
1375                 sb = get_sb_bdev(fstype, dev_name, flags, data_page);
1376         else if (fstype->fs_flags & FS_SINGLE)
1377                 sb = get_sb_single(fstype, flags, data_page);
1378         else
1379                 sb = get_sb_nodev(fstype, flags, data_page);
1380 
1381         retval = PTR_ERR(sb);
1382         if (IS_ERR(sb))
1383                 goto dput_out;
1384 
1385         /* Something was mounted here while we slept */
1386         while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry))
1387                 ;
1388 
1389         /* Refuse the same filesystem on the same mount point */
1390         retval = -EBUSY;
1391         if (nd.mnt && nd.mnt->mnt_sb == sb
1392                    && nd.mnt->mnt_root == nd.dentry)
1393                 goto fail;
1394 
1395         retval = -ENOENT;
1396         if (!nd.dentry->d_inode)
1397                 goto fail;
1398         down(&nd.dentry->d_inode->i_zombie);
1399         if (!IS_DEADDIR(nd.dentry->d_inode)) {
1400                 retval = -ENOMEM;
1401                 mnt = add_vfsmnt(&nd, sb->s_root, dev_name);
1402         }
1403         up(&nd.dentry->d_inode->i_zombie);
1404         if (!mnt)
1405                 goto fail;
1406         retval = 0;
1407 unlock_out:
1408         up(&mount_sem);
1409 dput_out:
1410         path_release(&nd);
1411 fs_out:
1412         put_filesystem(fstype);
1413         return retval;
1414 
1415 fail:
1416         if (list_empty(&sb->s_mounts))
1417                 kill_super(sb, 0);
1418         goto unlock_out;
1419 }
1420 
1421 asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type,
1422                           unsigned long flags, void * data)
1423 {
1424         int retval;
1425         unsigned long data_page;
1426         unsigned long type_page;
1427         unsigned long dev_page;
1428         char *dir_page;
1429 
1430         retval = copy_mount_options (type, &type_page);
1431         if (retval < 0)
1432                 return retval;
1433 
1434         dir_page = getname(dir_name);
1435         retval = PTR_ERR(dir_page);
1436         if (IS_ERR(dir_page))
1437                 goto out1;
1438 
1439         retval = copy_mount_options (dev_name, &dev_page);
1440         if (retval < 0)
1441                 goto out2;
1442 
1443         retval = copy_mount_options (data, &data_page);
1444         if (retval < 0)
1445                 goto out3;
1446 
1447         lock_kernel();
1448         retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
1449                           flags, (void*)data_page);
1450         unlock_kernel();
1451         free_page(data_page);
1452 
1453 out3:
1454         free_page(dev_page);
1455 out2:
1456         putname(dir_page);
1457 out1:
1458         free_page(type_page);
1459         return retval;
1460 }
1461 
1462 void __init mount_root(void)
1463 {
1464         struct file_system_type * fs_type;
1465         struct super_block * sb;
1466         struct vfsmount *vfsmnt;
1467         struct block_device *bdev = NULL;
1468         mode_t mode;
1469         int retval;
1470         void *handle;
1471         char path[64];
1472         int path_start = -1;
1473 
1474 #ifdef CONFIG_ROOT_NFS
1475         void *data;
1476         if (MAJOR(ROOT_DEV) != UNNAMED_MAJOR)
1477                 goto skip_nfs;
1478         fs_type = get_fs_type("nfs");
1479         if (!fs_type)
1480                 goto no_nfs;
1481         ROOT_DEV = get_unnamed_dev();
1482         if (!ROOT_DEV)
1483                 /*
1484                  * Your /linuxrc sucks worse than MSExchange - that's the
1485                  * only way you could run out of anon devices at that point.
1486                  */
1487                 goto no_anon;
1488         data = nfs_root_data();
1489         if (!data)
1490                 goto no_server;
1491         sb = read_super(ROOT_DEV, NULL, fs_type, root_mountflags, data, 1);
1492         if (sb)
1493                 /*
1494                  * We _can_ fail there, but if that will happen we have no
1495                  * chance anyway (no memory for vfsmnt and we _will_ need it,
1496                  * no matter which fs we try to mount).
1497                  */
1498                 goto mount_it;
1499 no_server:
1500         put_unnamed_dev(ROOT_DEV);
1501 no_anon:
1502         put_filesystem(fs_type);
1503 no_nfs:
1504         printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1505         ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1506 skip_nfs:
1507 #endif
1508 
1509 #ifdef CONFIG_BLK_DEV_FD
1510         if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1511 #ifdef CONFIG_BLK_DEV_RAM
1512                 extern int rd_doload;
1513                 extern void rd_load_secondary(void);
1514 #endif
1515                 floppy_eject();
1516 #ifndef CONFIG_BLK_DEV_RAM
1517                 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1518 #else
1519                 /* rd_doload is 2 for a dual initrd/ramload setup */
1520                 if(rd_doload==2)
1521                         rd_load_secondary();
1522                 else
1523 #endif
1524                 {
1525                         printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1526                         wait_for_keypress();
1527                 }
1528         }
1529 #endif
1530 
1531         devfs_make_root (root_device_name);
1532         handle = devfs_find_handle (NULL, ROOT_DEVICE_NAME,
1533                                     MAJOR (ROOT_DEV), MINOR (ROOT_DEV),
1534                                     DEVFS_SPECIAL_BLK, 1);
1535         if (handle)  /*  Sigh: bd*() functions only paper over the cracks  */
1536         {
1537             unsigned major, minor;
1538 
1539             devfs_get_maj_min (handle, &major, &minor);
1540             ROOT_DEV = MKDEV (major, minor);
1541         }
1542 
1543         /*
1544          * Probably pure paranoia, but I'm less than happy about delving into
1545          * devfs crap and checking it right now. Later.
1546          */
1547         if (!ROOT_DEV)
1548                 panic("I have no root and I want to scream");
1549 
1550         bdev = bdget(kdev_t_to_nr(ROOT_DEV));
1551         if (!bdev)
1552                 panic(__FUNCTION__ ": unable to allocate root device");
1553         bdev->bd_op = devfs_get_ops (handle);
1554         path_start = devfs_generate_path (handle, path + 5, sizeof (path) - 5);
1555         mode = FMODE_READ;
1556         if (!(root_mountflags & MS_RDONLY))
1557                 mode |= FMODE_WRITE;
1558         retval = blkdev_get(bdev, mode, 0, BDEV_FS);
1559         if (retval == -EROFS) {
1560                 root_mountflags |= MS_RDONLY;
1561                 retval = blkdev_get(bdev, FMODE_READ, 0, BDEV_FS);
1562         }
1563         if (retval) {
1564                 /*
1565                  * Allow the user to distinguish between failed open
1566                  * and bad superblock on root device.
1567                  */
1568                 printk ("VFS: Cannot open root device \"%s\" or %s\n",
1569                         root_device_name, kdevname (ROOT_DEV));
1570                 printk ("Please append a correct \"root=\" boot option\n");
1571                 panic("VFS: Unable to mount root fs on %s",
1572                         kdevname(ROOT_DEV));
1573         }
1574 
1575         check_disk_change(ROOT_DEV);
1576         sb = get_super(ROOT_DEV);
1577         if (sb) {
1578                 fs_type = sb->s_type;
1579                 goto mount_it;
1580         }
1581 
1582         read_lock(&file_systems_lock);
1583         for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1584                 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1585                         continue;
1586                 if (!try_inc_mod_count(fs_type->owner))
1587                         continue;
1588                 read_unlock(&file_systems_lock);
1589                 sb = read_super(ROOT_DEV,bdev,fs_type,root_mountflags,NULL,1);
1590                 if (sb) 
1591                         goto mount_it;
1592                 read_lock(&file_systems_lock);
1593                 put_filesystem(fs_type);
1594         }
1595         read_unlock(&file_systems_lock);
1596         panic("VFS: Unable to mount root fs on %s", kdevname(ROOT_DEV));
1597 
1598 mount_it:
1599         printk ("VFS: Mounted root (%s filesystem)%s.\n",
1600                 fs_type->name,
1601                 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1602         if (path_start >= 0) {
1603                 devfs_mk_symlink (NULL, "root", DEVFS_FL_DEFAULT,
1604                                   path + 5 + path_start, NULL, NULL);
1605                 memcpy (path + path_start, "/dev/", 5);
1606                 vfsmnt = add_vfsmnt(NULL, sb->s_root, path + path_start);
1607         }
1608         else
1609                 vfsmnt = add_vfsmnt(NULL, sb->s_root, "/dev/root");
1610         /* FIXME: if something will try to umount us right now... */
1611         if (vfsmnt) {
1612                 set_fs_root(current->fs, vfsmnt, sb->s_root);
1613                 set_fs_pwd(current->fs, vfsmnt, sb->s_root);
1614                 if (bdev)
1615                         bdput(bdev); /* sb holds a reference */
1616                 return;
1617         }
1618         panic("VFS: add_vfsmnt failed for root fs");
1619 }
1620 
1621 
1622 static void chroot_fs_refs(struct dentry *old_root,
1623                            struct vfsmount *old_rootmnt,
1624                            struct dentry *new_root,
1625                            struct vfsmount *new_rootmnt)
1626 {
1627         struct task_struct *p;
1628         struct fs_struct *fs;
1629 
1630         read_lock(&tasklist_lock);
1631         for_each_task(p) {
1632                 task_lock(p);
1633                 fs = p->fs;
1634                 if (fs) {
1635                         atomic_inc(&fs->count);
1636                         task_unlock(p);
1637                         if (fs->root==old_root && fs->rootmnt==old_rootmnt)
1638                                 set_fs_root(fs, new_rootmnt, new_root);
1639                         if (fs->pwd==old_root && fs->pwdmnt==old_rootmnt)
1640                                 set_fs_pwd(fs, new_rootmnt, new_root);
1641                         put_fs_struct(fs);
1642                 } else
1643                         task_unlock(p);
1644         }
1645         read_unlock(&tasklist_lock);
1646 }
1647 
1648 /*
1649  * Moves the current root to put_root, and sets root/cwd of all processes
1650  * which had them on the old root to new_root.
1651  *
1652  * Note:
1653  *  - we don't move root/cwd if they are not at the root (reason: if something
1654  *    cared enough to change them, it's probably wrong to force them elsewhere)
1655  *  - it's okay to pick a root that isn't the root of a file system, e.g.
1656  *    /nfs/my_root where /nfs is the mount point. Better avoid creating
1657  *    unreachable mount points this way, though.
1658  */
1659 
1660 asmlinkage long sys_pivot_root(const char *new_root, const char *put_old)
1661 {
1662         struct dentry *root;
1663         struct vfsmount *root_mnt;
1664         struct vfsmount *tmp;
1665         struct nameidata new_nd, old_nd;
1666         char *name;
1667         int error;
1668 
1669         if (!capable(CAP_SYS_ADMIN))
1670                 return -EPERM;
1671 
1672         lock_kernel();
1673 
1674         name = getname(new_root);
1675         error = PTR_ERR(name);
1676         if (IS_ERR(name))
1677                 goto out0;
1678         error = 0;
1679         if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
1680                 error = path_walk(name, &new_nd);
1681         putname(name);
1682         if (error)
1683                 goto out0;
1684 
1685         name = getname(put_old);
1686         error = PTR_ERR(name);
1687         if (IS_ERR(name))
1688                 goto out0;
1689         error = 0;
1690         if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
1691                 error = path_walk(name, &old_nd);
1692         putname(name);
1693         if (error)
1694                 goto out1;
1695 
1696         read_lock(&current->fs->lock);
1697         root_mnt = mntget(current->fs->rootmnt);
1698         root = dget(current->fs->root);
1699         read_unlock(&current->fs->lock);
1700         down(&mount_sem);
1701         down(&old_nd.dentry->d_inode->i_zombie);
1702         error = -ENOENT;
1703         if (IS_DEADDIR(new_nd.dentry->d_inode))
1704                 goto out2;
1705         if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry))
1706                 goto out2;
1707         if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry))
1708                 goto out2;
1709         error = -EBUSY;
1710         if (new_nd.mnt == root_mnt || old_nd.mnt == root_mnt)
1711                 goto out2; /* loop */
1712         error = -EINVAL;
1713         tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
1714         spin_lock(&dcache_lock);
1715         if (tmp != new_nd.mnt) {
1716                 for (;;) {
1717                         if (tmp->mnt_parent == tmp)
1718                                 goto out3;
1719                         if (tmp->mnt_parent == new_nd.mnt)
1720                                 break;
1721                         tmp = tmp->mnt_parent;
1722                 }
1723                 if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry))
1724                         goto out3;
1725         } else if (!is_subdir(old_nd.dentry, new_nd.dentry))
1726                 goto out3;
1727         spin_unlock(&dcache_lock);
1728 
1729         move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL);
1730         move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL);
1731         chroot_fs_refs(root,root_mnt,new_nd.dentry,new_nd.mnt);
1732         error = 0;
1733 out2:
1734         up(&old_nd.dentry->d_inode->i_zombie);
1735         up(&mount_sem);
1736         dput(root);
1737         mntput(root_mnt);
1738         path_release(&old_nd);
1739 out1:
1740         path_release(&new_nd);
1741 out0:
1742         unlock_kernel();
1743         return error;
1744 out3:
1745         spin_unlock(&dcache_lock);
1746         goto out2;
1747 }
1748 
1749 
1750 #ifdef CONFIG_BLK_DEV_INITRD
1751 
1752 int __init change_root(kdev_t new_root_dev,const char *put_old)
1753 {
1754         struct vfsmount *old_rootmnt;
1755         struct nameidata devfs_nd, nd;
1756         int error = 0;
1757 
1758         read_lock(&current->fs->lock);
1759         old_rootmnt = mntget(current->fs->rootmnt);
1760         read_unlock(&current->fs->lock);
1761         /*  First unmount devfs if mounted  */
1762         if (path_init("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE, &devfs_nd))
1763                 error = path_walk("/dev", &devfs_nd);
1764         if (!error) {
1765                 if (devfs_nd.mnt->mnt_sb->s_magic == DEVFS_SUPER_MAGIC &&
1766                     devfs_nd.dentry == devfs_nd.mnt->mnt_root) {
1767                         dput(devfs_nd.dentry);
1768                         down(&mount_sem);
1769                         /* puts devfs_nd.mnt */
1770                         do_umount(devfs_nd.mnt, 0, 0);
1771                         up(&mount_sem);
1772                 } else 
1773                         path_release(&devfs_nd);
1774         }
1775         ROOT_DEV = new_root_dev;
1776         mount_root();
1777 #if 1
1778         shrink_dcache();
1779         printk("change_root: old root has d_count=%d\n", 
1780                atomic_read(&old_rootmnt->mnt_root->d_count));
1781 #endif
1782         mount_devfs_fs ();
1783         /*
1784          * Get the new mount directory
1785          */
1786         error = 0;
1787         if (path_init(put_old, LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &nd))
1788                 error = path_walk(put_old, &nd);
1789         if (error) {
1790                 int blivet;
1791 
1792                 printk(KERN_NOTICE "Trying to unmount old root ... ");
1793                 blivet = do_umount(old_rootmnt, 1, 0);
1794                 if (!blivet) {
1795                         printk("okay\n");
1796                         return 0;
1797                 }
1798                 printk(KERN_ERR "error %d\n", blivet);
1799                 return error;
1800         }
1801         /* FIXME: we should hold i_zombie on nd.dentry */
1802         move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old");
1803         mntput(old_rootmnt);
1804         path_release(&nd);
1805         return 0;
1806 }
1807 
1808 #endif
1809 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.