~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/fs/namei.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *  linux/fs/namei.c
  3  *
  4  *  Copyright (C) 1991, 1992  Linus Torvalds
  5  */
  6 
  7 /*
  8  * Some corrections by tytso.
  9  */
 10 
 11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
 12  * lookup logic.
 13  */
 14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
 15  */
 16 
 17 #include <linux/init.h>
 18 #include <linux/mm.h>
 19 #include <linux/proc_fs.h>
 20 #include <linux/smp_lock.h>
 21 #include <linux/quotaops.h>
 22 #include <linux/pagemap.h>
 23 #include <linux/dcache.h>
 24 #include <linux/dnotify.h>
 25 
 26 #include <asm/uaccess.h>
 27 #include <asm/unaligned.h>
 28 #include <asm/semaphore.h>
 29 #include <asm/page.h>
 30 #include <asm/pgtable.h>
 31 
 32 #include <asm/namei.h>
 33 
 34 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
 35 
 36 /* [Feb-1997 T. Schoebel-Theuer]
 37  * Fundamental changes in the pathname lookup mechanisms (namei)
 38  * were necessary because of omirr.  The reason is that omirr needs
 39  * to know the _real_ pathname, not the user-supplied one, in case
 40  * of symlinks (and also when transname replacements occur).
 41  *
 42  * The new code replaces the old recursive symlink resolution with
 43  * an iterative one (in case of non-nested symlink chains).  It does
 44  * this with calls to <fs>_follow_link().
 45  * As a side effect, dir_namei(), _namei() and follow_link() are now 
 46  * replaced with a single function lookup_dentry() that can handle all 
 47  * the special cases of the former code.
 48  *
 49  * With the new dcache, the pathname is stored at each inode, at least as
 50  * long as the refcount of the inode is positive.  As a side effect, the
 51  * size of the dcache depends on the inode cache and thus is dynamic.
 52  *
 53  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
 54  * resolution to correspond with current state of the code.
 55  *
 56  * Note that the symlink resolution is not *completely* iterative.
 57  * There is still a significant amount of tail- and mid- recursion in
 58  * the algorithm.  Also, note that <fs>_readlink() is not used in
 59  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
 60  * may return different results than <fs>_follow_link().  Many virtual
 61  * filesystems (including /proc) exhibit this behavior.
 62  */
 63 
 64 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
 65  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
 66  * and the name already exists in form of a symlink, try to create the new
 67  * name indicated by the symlink. The old code always complained that the
 68  * name already exists, due to not following the symlink even if its target
 69  * is nonexistent.  The new semantics affects also mknod() and link() when
 70  * the name is a symlink pointing to a non-existant name.
 71  *
 72  * I don't know which semantics is the right one, since I have no access
 73  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
 74  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
 75  * "old" one. Personally, I think the new semantics is much more logical.
 76  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
 77  * file does succeed in both HP-UX and SunOs, but not in Solaris
 78  * and in the old Linux semantics.
 79  */
 80 
 81 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
 82  * semantics.  See the comments in "open_namei" and "do_link" below.
 83  *
 84  * [10-Sep-98 Alan Modra] Another symlink change.
 85  */
 86 
 87 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
 88  *      inside the path - always follow.
 89  *      in the last component in creation/removal/renaming - never follow.
 90  *      if LOOKUP_FOLLOW passed - follow.
 91  *      if the pathname has trailing slashes - follow.
 92  *      otherwise - don't follow.
 93  * (applied in that order).
 94  *
 95  * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
 96  * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
 97  * During the 2.4 we need to fix the userland stuff depending on it -
 98  * hopefully we will be able to get rid of that wart in 2.5. So far only
 99  * XEmacs seems to be relying on it...
100  */
101 
102 /* In order to reduce some races, while at the same time doing additional
103  * checking and hopefully speeding things up, we copy filenames to the
104  * kernel data space before using them..
105  *
106  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
107  */
108 static inline int do_getname(const char *filename, char *page)
109 {
110         int retval;
111         unsigned long len = PATH_MAX + 1;
112 
113         if ((unsigned long) filename >= TASK_SIZE) {
114                 if (!segment_eq(get_fs(), KERNEL_DS))
115                         return -EFAULT;
116         } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE)
117                 len = TASK_SIZE - (unsigned long) filename;
118 
119         retval = strncpy_from_user((char *)page, filename, len);
120         if (retval > 0) {
121                 if (retval < len)
122                         return 0;
123                 return -ENAMETOOLONG;
124         } else if (!retval)
125                 retval = -ENOENT;
126         return retval;
127 }
128 
129 char * getname(const char * filename)
130 {
131         char *tmp, *result;
132 
133         result = ERR_PTR(-ENOMEM);
134         tmp = __getname();
135         if (tmp)  {
136                 int retval = do_getname(filename, tmp);
137 
138                 result = tmp;
139                 if (retval < 0) {
140                         putname(tmp);
141                         result = ERR_PTR(retval);
142                 }
143         }
144         return result;
145 }
146 
147 /*
148  *      permission()
149  *
150  * is used to check for read/write/execute permissions on a file.
151  * We use "fsuid" for this, letting us set arbitrary permissions
152  * for filesystem access without changing the "normal" uids which
153  * are used for other things..
154  */
155 int vfs_permission(struct inode * inode,int mask)
156 {
157         int mode = inode->i_mode;
158 
159         if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
160                  (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
161                 return -EROFS; /* Nobody gets write access to a read-only fs */
162 
163         if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
164                 return -EACCES; /* Nobody gets write access to an immutable file */
165 
166         if (current->fsuid == inode->i_uid)
167                 mode >>= 6;
168         else if (in_group_p(inode->i_gid))
169                 mode >>= 3;
170 
171         if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
172                 return 0;
173 
174         /* read and search access */
175         if ((mask == S_IROTH) ||
176             (S_ISDIR(inode->i_mode)  && !(mask & ~(S_IROTH | S_IXOTH))))
177                 if (capable(CAP_DAC_READ_SEARCH))
178                         return 0;
179 
180         return -EACCES;
181 }
182 
183 int permission(struct inode * inode,int mask)
184 {
185         if (inode->i_op && inode->i_op->permission) {
186                 int retval;
187                 lock_kernel();
188                 retval = inode->i_op->permission(inode, mask);
189                 unlock_kernel();
190                 return retval;
191         }
192         return vfs_permission(inode, mask);
193 }
194 
195 /*
196  * get_write_access() gets write permission for a file.
197  * put_write_access() releases this write permission.
198  * This is used for regular files.
199  * We cannot support write (and maybe mmap read-write shared) accesses and
200  * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
201  * can have the following values:
202  * 0: no writers, no VM_DENYWRITE mappings
203  * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
204  * > 0: (i_writecount) users are writing to the file.
205  *
206  * Normally we operate on that counter with atomic_{inc,dec} and it's safe
207  * except for the cases where we don't hold i_writecount yet. Then we need to
208  * use {get,deny}_write_access() - these functions check the sign and refuse
209  * to do the change if sign is wrong. Exclusion between them is provided by
210  * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
211  * who will try to move it in struct inode - just leave it here.
212  */
213 static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
214 int get_write_access(struct inode * inode)
215 {
216         spin_lock(&arbitration_lock);
217         if (atomic_read(&inode->i_writecount) < 0) {
218                 spin_unlock(&arbitration_lock);
219                 return -ETXTBSY;
220         }
221         atomic_inc(&inode->i_writecount);
222         spin_unlock(&arbitration_lock);
223         return 0;
224 }
225 int deny_write_access(struct file * file)
226 {
227         spin_lock(&arbitration_lock);
228         if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
229                 spin_unlock(&arbitration_lock);
230                 return -ETXTBSY;
231         }
232         atomic_dec(&file->f_dentry->d_inode->i_writecount);
233         spin_unlock(&arbitration_lock);
234         return 0;
235 }
236 
237 void path_release(struct nameidata *nd)
238 {
239         dput(nd->dentry);
240         mntput(nd->mnt);
241 }
242 
243 /*
244  * Internal lookup() using the new generic dcache.
245  * SMP-safe
246  */
247 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
248 {
249         struct dentry * dentry = d_lookup(parent, name);
250 
251         if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
252                 if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
253                         dput(dentry);
254                         dentry = NULL;
255                 }
256         }
257         return dentry;
258 }
259 
260 /*
261  * This is called when everything else fails, and we actually have
262  * to go to the low-level filesystem to find out what we should do..
263  *
264  * We get the directory semaphore, and after getting that we also
265  * make sure that nobody added the entry to the dcache in the meantime..
266  * SMP-safe
267  */
268 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
269 {
270         struct dentry * result;
271         struct inode *dir = parent->d_inode;
272 
273         down(&dir->i_sem);
274         /*
275          * First re-do the cached lookup just in case it was created
276          * while we waited for the directory semaphore..
277          *
278          * FIXME! This could use version numbering or similar to
279          * avoid unnecessary cache lookups.
280          */
281         result = d_lookup(parent, name);
282         if (!result) {
283                 struct dentry * dentry = d_alloc(parent, name);
284                 result = ERR_PTR(-ENOMEM);
285                 if (dentry) {
286                         lock_kernel();
287                         result = dir->i_op->lookup(dir, dentry);
288                         unlock_kernel();
289                         if (result)
290                                 dput(dentry);
291                         else
292                                 result = dentry;
293                 }
294                 up(&dir->i_sem);
295                 return result;
296         }
297 
298         /*
299          * Uhhuh! Nasty case: the cache was re-populated while
300          * we waited on the semaphore. Need to revalidate.
301          */
302         up(&dir->i_sem);
303         if (result->d_op && result->d_op->d_revalidate) {
304                 if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
305                         dput(result);
306                         result = ERR_PTR(-ENOENT);
307                 }
308         }
309         return result;
310 }
311 
312 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
313 {
314         int err;
315         if (current->link_count >= 8)
316                 goto loop;
317         current->link_count++;
318         UPDATE_ATIME(dentry->d_inode);
319         err = dentry->d_inode->i_op->follow_link(dentry, nd);
320         current->link_count--;
321         return err;
322 loop:
323         path_release(nd);
324         return -ELOOP;
325 }
326 
327 static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
328 {
329         struct vfsmount *parent;
330         struct dentry *dentry;
331         spin_lock(&dcache_lock);
332         parent=(*mnt)->mnt_parent;
333         if (parent == *mnt) {
334                 spin_unlock(&dcache_lock);
335                 return 0;
336         }
337         mntget(parent);
338         dentry=dget((*mnt)->mnt_mountpoint);
339         spin_unlock(&dcache_lock);
340         dput(*base);
341         *base = dentry;
342         mntput(*mnt);
343         *mnt = parent;
344         return 1;
345 }
346 
347 int follow_up(struct vfsmount **mnt, struct dentry **dentry)
348 {
349         return __follow_up(mnt, dentry);
350 }
351 
352 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
353 {
354         struct list_head *p;
355         spin_lock(&dcache_lock);
356         p = (*dentry)->d_vfsmnt.next;
357         while (p != &(*dentry)->d_vfsmnt) {
358                 struct vfsmount *tmp;
359                 tmp = list_entry(p, struct vfsmount, mnt_clash);
360                 if (tmp->mnt_parent == *mnt) {
361                         *mnt = mntget(tmp);
362                         spin_unlock(&dcache_lock);
363                         mntput(tmp->mnt_parent);
364                         /* tmp holds the mountpoint, so... */
365                         dput(*dentry);
366                         *dentry = dget(tmp->mnt_root);
367                         return 1;
368                 }
369                 p = p->next;
370         }
371         spin_unlock(&dcache_lock);
372         return 0;
373 }
374 
375 int follow_down(struct vfsmount **mnt, struct dentry **dentry)
376 {
377         return __follow_down(mnt,dentry);
378 }
379  
380 static inline void follow_dotdot(struct nameidata *nd)
381 {
382         while(1) {
383                 struct vfsmount *parent;
384                 struct dentry *dentry;
385                 read_lock(&current->fs->lock);
386                 if (nd->dentry == current->fs->root &&
387                     nd->mnt == current->fs->rootmnt)  {
388                         read_unlock(&current->fs->lock);
389                         break;
390                 }
391                 read_unlock(&current->fs->lock);
392                 spin_lock(&dcache_lock);
393                 if (nd->dentry != nd->mnt->mnt_root) {
394                         dentry = dget(nd->dentry->d_parent);
395                         spin_unlock(&dcache_lock);
396                         dput(nd->dentry);
397                         nd->dentry = dentry;
398                         break;
399                 }
400                 parent=nd->mnt->mnt_parent;
401                 if (parent == nd->mnt) {
402                         spin_unlock(&dcache_lock);
403                         break;
404                 }
405                 mntget(parent);
406                 dentry=dget(nd->mnt->mnt_mountpoint);
407                 spin_unlock(&dcache_lock);
408                 dput(nd->dentry);
409                 nd->dentry = dentry;
410                 mntput(nd->mnt);
411                 nd->mnt = parent;
412         }
413 }
414 /*
415  * Name resolution.
416  *
417  * This is the basic name resolution function, turning a pathname
418  * into the final dentry.
419  *
420  * We expect 'base' to be positive and a directory.
421  */
422 int path_walk(const char * name, struct nameidata *nd)
423 {
424         struct dentry *dentry;
425         struct inode *inode;
426         int err;
427         unsigned int lookup_flags = nd->flags;
428 
429         while (*name=='/')
430                 name++;
431         if (!*name)
432                 goto return_base;
433 
434         inode = nd->dentry->d_inode;
435         if (current->link_count)
436                 lookup_flags = LOOKUP_FOLLOW;
437 
438         /* At this point we know we have a real path component. */
439         for(;;) {
440                 unsigned long hash;
441                 struct qstr this;
442                 unsigned int c;
443 
444                 err = permission(inode, MAY_EXEC);
445                 dentry = ERR_PTR(err);
446                 if (err)
447                         break;
448 
449                 this.name = name;
450                 c = *(const unsigned char *)name;
451 
452                 hash = init_name_hash();
453                 do {
454                         name++;
455                         hash = partial_name_hash(c, hash);
456                         c = *(const unsigned char *)name;
457                 } while (c && (c != '/'));
458                 this.len = name - (const char *) this.name;
459                 this.hash = end_name_hash(hash);
460 
461                 /* remove trailing slashes? */
462                 if (!c)
463                         goto last_component;
464                 while (*++name == '/');
465                 if (!*name)
466                         goto last_with_slashes;
467 
468                 /*
469                  * "." and ".." are special - ".." especially so because it has
470                  * to be able to know about the current root directory and
471                  * parent relationships.
472                  */
473                 if (this.name[0] == '.') switch (this.len) {
474                         default:
475                                 break;
476                         case 2: 
477                                 if (this.name[1] != '.')
478                                         break;
479                                 follow_dotdot(nd);
480                                 inode = nd->dentry->d_inode;
481                                 /* fallthrough */
482                         case 1:
483                                 continue;
484                 }
485                 /*
486                  * See if the low-level filesystem might want
487                  * to use its own hash..
488                  */
489                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
490                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
491                         if (err < 0)
492                                 break;
493                 }
494                 /* This does the actual lookups.. */
495                 dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
496                 if (!dentry) {
497                         dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
498                         err = PTR_ERR(dentry);
499                         if (IS_ERR(dentry))
500                                 break;
501                 }
502                 /* Check mountpoints.. */
503                 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
504                         ;
505 
506                 err = -ENOENT;
507                 inode = dentry->d_inode;
508                 if (!inode)
509                         goto out_dput;
510                 err = -ENOTDIR; 
511                 if (!inode->i_op)
512                         goto out_dput;
513 
514                 if (inode->i_op->follow_link) {
515                         err = do_follow_link(dentry, nd);
516                         dput(dentry);
517                         if (err)
518                                 goto return_err;
519                         err = -ENOENT;
520                         inode = nd->dentry->d_inode;
521                         if (!inode)
522                                 break;
523                         err = -ENOTDIR; 
524                         if (!inode->i_op)
525                                 break;
526                 } else {
527                         dput(nd->dentry);
528                         nd->dentry = dentry;
529                 }
530                 err = -ENOTDIR; 
531                 if (!inode->i_op->lookup)
532                         break;
533                 continue;
534                 /* here ends the main loop */
535 
536 last_with_slashes:
537                 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
538 last_component:
539                 if (lookup_flags & LOOKUP_PARENT)
540                         goto lookup_parent;
541                 if (this.name[0] == '.') switch (this.len) {
542                         default:
543                                 break;
544                         case 2: 
545                                 if (this.name[1] != '.')
546                                         break;
547                                 follow_dotdot(nd);
548                                 inode = nd->dentry->d_inode;
549                                 /* fallthrough */
550                         case 1:
551                                 goto return_base;
552                 }
553                 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
554                         err = nd->dentry->d_op->d_hash(nd->dentry, &this);
555                         if (err < 0)
556                                 break;
557                 }
558                 dentry = cached_lookup(nd->dentry, &this, 0);
559                 if (!dentry) {
560                         dentry = real_lookup(nd->dentry, &this, 0);
561                         err = PTR_ERR(dentry);
562                         if (IS_ERR(dentry))
563                                 break;
564                 }
565                 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
566                         ;
567                 inode = dentry->d_inode;
568                 if ((lookup_flags & LOOKUP_FOLLOW)
569                     && inode && inode->i_op && inode->i_op->follow_link) {
570                         err = do_follow_link(dentry, nd);
571                         dput(dentry);
572                         if (err)
573                                 goto return_err;
574                         inode = nd->dentry->d_inode;
575                 } else {
576                         dput(nd->dentry);
577                         nd->dentry = dentry;
578                 }
579                 err = -ENOENT;
580                 if (!inode)
581                         goto no_inode;
582                 if (lookup_flags & LOOKUP_DIRECTORY) {
583                         err = -ENOTDIR; 
584                         if (!inode->i_op || !inode->i_op->lookup)
585                                 break;
586                 }
587                 goto return_base;
588 no_inode:
589                 err = -ENOENT;
590                 if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
591                         break;
592                 goto return_base;
593 lookup_parent:
594                 nd->last = this;
595                 nd->last_type = LAST_NORM;
596                 if (this.name[0] != '.')
597                         goto return_base;
598                 if (this.len == 1)
599                         nd->last_type = LAST_DOT;
600                 else if (this.len == 2 && this.name[1] == '.')
601                         nd->last_type = LAST_DOTDOT;
602 return_base:
603                 return 0;
604 out_dput:
605                 dput(dentry);
606                 break;
607         }
608         path_release(nd);
609 return_err:
610         return err;
611 }
612 
613 /* SMP-safe */
614 /* returns 1 if everything is done */
615 static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
616 {
617         if (path_walk(name, nd))
618                 return 0;
619 
620         if (!nd->dentry->d_inode) {
621                 struct nameidata nd_root;
622                 nd_root.last_type = LAST_ROOT;
623                 nd_root.flags = nd->flags;
624                 read_lock(&current->fs->lock);
625                 nd_root.mnt = mntget(current->fs->rootmnt);
626                 nd_root.dentry = dget(current->fs->root);
627                 read_unlock(&current->fs->lock);
628                 if (path_walk(name, &nd_root))
629                         return 1;
630                 if (nd_root.dentry->d_inode) {
631                         path_release(nd);
632                         nd->dentry = nd_root.dentry;
633                         nd->mnt = nd_root.mnt;
634                         nd->last = nd_root.last;
635                         return 1;
636                 }
637                 path_release(&nd_root);
638         }
639         return 1;
640 }
641 
642 void set_fs_altroot(void)
643 {
644         char *emul = __emul_prefix();
645         struct nameidata nd;
646         struct vfsmount *mnt = NULL, *oldmnt;
647         struct dentry *dentry = NULL, *olddentry;
648         if (emul) {
649                 read_lock(&current->fs->lock);
650                 nd.mnt = mntget(current->fs->rootmnt);
651                 nd.dentry = dget(current->fs->root);
652                 read_unlock(&current->fs->lock);
653                 nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
654                 if (path_walk(emul,&nd) == 0) {
655                         mnt = nd.mnt;
656                         dentry = nd.dentry;
657                 }
658         }
659         write_lock(&current->fs->lock);
660         oldmnt = current->fs->altrootmnt;
661         olddentry = current->fs->altroot;
662         current->fs->altrootmnt = mnt;
663         current->fs->altroot = dentry;
664         write_unlock(&current->fs->lock);
665         if (olddentry) {
666                 dput(olddentry);
667                 mntput(oldmnt);
668         }
669 }
670 
671 /* SMP-safe */
672 static inline int
673 walk_init_root(const char *name, struct nameidata *nd)
674 {
675         read_lock(&current->fs->lock);
676         if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
677                 nd->mnt = mntget(current->fs->altrootmnt);
678                 nd->dentry = dget(current->fs->altroot);
679                 read_unlock(&current->fs->lock);
680                 if (__emul_lookup_dentry(name,nd))
681                         return 0;
682                 read_lock(&current->fs->lock);
683         }
684         nd->mnt = mntget(current->fs->rootmnt);
685         nd->dentry = dget(current->fs->root);
686         read_unlock(&current->fs->lock);
687         return 1;
688 }
689 
690 /* SMP-safe */
691 int path_init(const char *name, unsigned int flags, struct nameidata *nd)
692 {
693         nd->last_type = LAST_ROOT; /* if there are only slashes... */
694         nd->flags = flags;
695         if (*name=='/')
696                 return walk_init_root(name,nd);
697         read_lock(&current->fs->lock);
698         nd->mnt = mntget(current->fs->pwdmnt);
699         nd->dentry = dget(current->fs->pwd);
700         read_unlock(&current->fs->lock);
701         return 1;
702 }
703 
704 /*
705  * Restricted form of lookup. Doesn't follow links, single-component only,
706  * needs parent already locked. Doesn't follow mounts.
707  * SMP-safe.
708  */
709 struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
710 {
711         struct dentry * dentry;
712         struct inode *inode;
713         int err;
714 
715         inode = base->d_inode;
716         err = permission(inode, MAY_EXEC);
717         dentry = ERR_PTR(err);
718         if (err)
719                 goto out;
720 
721         /*
722          * See if the low-level filesystem might want
723          * to use its own hash..
724          */
725         if (base->d_op && base->d_op->d_hash) {
726                 err = base->d_op->d_hash(base, name);
727                 dentry = ERR_PTR(err);
728                 if (err < 0)
729                         goto out;
730         }
731 
732         dentry = cached_lookup(base, name, 0);
733         if (!dentry) {
734                 struct dentry *new = d_alloc(base, name);
735                 dentry = ERR_PTR(-ENOMEM);
736                 if (!new)
737                         goto out;
738                 lock_kernel();
739                 dentry = inode->i_op->lookup(inode, new);
740                 unlock_kernel();
741                 if (!dentry)
742                         dentry = new;
743                 else
744                         dput(new);
745         }
746 out:
747         return dentry;
748 }
749 
750 /* SMP-safe */
751 struct dentry * lookup_one(const char * name, struct dentry * base)
752 {
753         unsigned long hash;
754         struct qstr this;
755         unsigned int c;
756 
757         this.name = name;
758         c = *(const unsigned char *)name;
759         if (!c)
760                 goto access;
761 
762         hash = init_name_hash();
763         do {
764                 name++;
765                 if (c == '/')
766                         goto access;
767                 hash = partial_name_hash(c, hash);
768                 c = *(const unsigned char *)name;
769         } while (c);
770         this.len = name - (const char *) this.name;
771         this.hash = end_name_hash(hash);
772 
773         return lookup_hash(&this, base);
774 access:
775         return ERR_PTR(-EACCES);
776 }
777 
778 /*
779  *      namei()
780  *
781  * is used by most simple commands to get the inode of a specified name.
782  * Open, link etc use their own routines, but this is enough for things
783  * like 'chmod' etc.
784  *
785  * namei exists in two versions: namei/lnamei. The only difference is
786  * that namei follows links, while lnamei does not.
787  * SMP-safe
788  */
789 int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
790 {
791         char *tmp;
792         int err;
793 
794         tmp = getname(name);
795         err = PTR_ERR(tmp);
796         if (!IS_ERR(tmp)) {
797                 err = 0;
798                 if (path_init(tmp, flags, nd))
799                         err = path_walk(tmp, nd);
800                 putname(tmp);
801         }
802         return err;
803 }
804 
805 /*
806  * It's inline, so penalty for filesystems that don't use sticky bit is
807  * minimal.
808  */
809 static inline int check_sticky(struct inode *dir, struct inode *inode)
810 {
811         if (!(dir->i_mode & S_ISVTX))
812                 return 0;
813         if (inode->i_uid == current->fsuid)
814                 return 0;
815         if (dir->i_uid == current->fsuid)
816                 return 0;
817         return !capable(CAP_FOWNER);
818 }
819 
820 /*
821  *      Check whether we can remove a link victim from directory dir, check
822  *  whether the type of victim is right.
823  *  1. We can't do it if dir is read-only (done in permission())
824  *  2. We should have write and exec permissions on dir
825  *  3. We can't remove anything from append-only dir
826  *  4. We can't do anything with immutable dir (done in permission())
827  *  5. If the sticky bit on dir is set we should either
828  *      a. be owner of dir, or
829  *      b. be owner of victim, or
830  *      c. have CAP_FOWNER capability
831  *  6. If the victim is append-only or immutable we can't do antyhing with
832  *     links pointing to it.
833  *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
834  *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
835  *  9. We can't remove a root or mountpoint.
836  */
837 static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
838 {
839         int error;
840         if (!victim->d_inode || victim->d_parent->d_inode != dir)
841                 return -ENOENT;
842         error = permission(dir,MAY_WRITE | MAY_EXEC);
843         if (error)
844                 return error;
845         if (IS_APPEND(dir))
846                 return -EPERM;
847         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
848             IS_IMMUTABLE(victim->d_inode))
849                 return -EPERM;
850         if (isdir) {
851                 if (!S_ISDIR(victim->d_inode->i_mode))
852                         return -ENOTDIR;
853                 if (IS_ROOT(victim))
854                         return -EBUSY;
855         } else if (S_ISDIR(victim->d_inode->i_mode))
856                 return -EISDIR;
857         return 0;
858 }
859 
860 /*      Check whether we can create an object with dentry child in directory
861  *  dir.
862  *  1. We can't do it if child already exists (open has special treatment for
863  *     this case, but since we are inlined it's OK)
864  *  2. We can't do it if dir is read-only (done in permission())
865  *  3. We should have write and exec permissions on dir
866  *  4. We can't do it if dir is immutable (done in permission())
867  */
868 static inline int may_create(struct inode *dir, struct dentry *child) {
869         if (child->d_inode)
870                 return -EEXIST;
871         if (IS_DEADDIR(dir))
872                 return -ENOENT;
873         return permission(dir,MAY_WRITE | MAY_EXEC);
874 }
875 
876 /* 
877  * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
878  * reasons.
879  *
880  * O_DIRECTORY translates into forcing a directory lookup.
881  */
882 static inline int lookup_flags(unsigned int f)
883 {
884         unsigned long retval = LOOKUP_FOLLOW;
885 
886         if (f & O_NOFOLLOW)
887                 retval &= ~LOOKUP_FOLLOW;
888         
889         if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
890                 retval &= ~LOOKUP_FOLLOW;
891         
892         if (f & O_DIRECTORY)
893                 retval |= LOOKUP_DIRECTORY;
894 
895         return retval;
896 }
897 
898 int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
899 {
900         int error;
901 
902         mode &= S_IALLUGO & ~current->fs->umask;
903         mode |= S_IFREG;
904 
905         down(&dir->i_zombie);
906         error = may_create(dir, dentry);
907         if (error)
908                 goto exit_lock;
909 
910         error = -EACCES;        /* shouldn't it be ENOSYS? */
911         if (!dir->i_op || !dir->i_op->create)
912                 goto exit_lock;
913 
914         DQUOT_INIT(dir);
915         lock_kernel();
916         error = dir->i_op->create(dir, dentry, mode);
917         unlock_kernel();
918 exit_lock:
919         up(&dir->i_zombie);
920         if (!error)
921                 inode_dir_notify(dir, DN_CREATE);
922         return error;
923 }
924 
925 /*
926  *      open_namei()
927  *
928  * namei for open - this is in fact almost the whole open-routine.
929  *
930  * Note that the low bits of "flag" aren't the same as in the open
931  * system call - they are 00 - no permissions needed
932  *                        01 - read permission needed
933  *                        10 - write permission needed
934  *                        11 - read/write permissions needed
935  * which is a lot more logical, and also allows the "no perm" needed
936  * for symlinks (where the permissions are checked later).
937  * SMP-safe
938  */
939 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
940 {
941         int acc_mode, error = 0;
942         struct inode *inode;
943         struct dentry *dentry;
944         struct dentry *dir;
945         int count = 0;
946 
947         acc_mode = ACC_MODE(flag);
948 
949         /*
950          * The simplest case - just a plain lookup.
951          */
952         if (!(flag & O_CREAT)) {
953                 if (path_init(pathname, lookup_flags(flag), nd))
954                         error = path_walk(pathname, nd);
955                 if (error)
956                         return error;
957                 dentry = nd->dentry;
958                 goto ok;
959         }
960 
961         /*
962          * Create - we need to know the parent.
963          */
964         if (path_init(pathname, LOOKUP_PARENT, nd))
965                 error = path_walk(pathname, nd);
966         if (error)
967                 return error;
968 
969         /*
970          * We have the parent and last component. First of all, check
971          * that we are not asked to creat(2) an obvious directory - that
972          * will not do.
973          */
974         error = -EISDIR;
975         if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
976                 goto exit;
977 
978         dir = nd->dentry;
979         down(&dir->d_inode->i_sem);
980         dentry = lookup_hash(&nd->last, nd->dentry);
981 
982 do_last:
983         error = PTR_ERR(dentry);
984         if (IS_ERR(dentry)) {
985                 up(&dir->d_inode->i_sem);
986                 goto exit;
987         }
988 
989         /* Negative dentry, just create the file */
990         if (!dentry->d_inode) {
991                 error = vfs_create(dir->d_inode, dentry, mode);
992                 up(&dir->d_inode->i_sem);
993                 dput(nd->dentry);
994                 nd->dentry = dentry;
995                 if (error)
996                         goto exit;
997                 /* Don't check for write permission, don't truncate */
998                 acc_mode = 0;
999                 flag &= ~O_TRUNC;
1000                 goto ok;
1001         }
1002 
1003         /*
1004          * It already exists.
1005          */
1006         up(&dir->d_inode->i_sem);
1007 
1008         error = -EEXIST;
1009         if (flag & O_EXCL)
1010                 goto exit_dput;
1011 
1012         if (d_mountpoint(dentry)) {
1013                 error = -ELOOP;
1014                 if (flag & O_NOFOLLOW)
1015                         goto exit_dput;
1016                 do __follow_down(&nd->mnt,&dentry); while(d_mountpoint(dentry));
1017         }
1018         error = -ENOENT;
1019         if (!dentry->d_inode)
1020                 goto exit_dput;
1021         if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1022                 goto do_link;
1023 
1024         dput(nd->dentry);
1025         nd->dentry = dentry;
1026         error = -EISDIR;
1027         if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1028                 goto exit;
1029 ok:
1030         error = -ENOENT;
1031         inode = dentry->d_inode;
1032         if (!inode)
1033                 goto exit;
1034 
1035         error = -ELOOP;
1036         if (S_ISLNK(inode->i_mode))
1037                 goto exit;
1038         
1039         error = -EISDIR;
1040         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1041                 goto exit;
1042 
1043         error = permission(inode,acc_mode);
1044         if (error)
1045                 goto exit;
1046 
1047         /*
1048          * FIFO's, sockets and device files are special: they don't
1049          * actually live on the filesystem itself, and as such you
1050          * can write to them even if the filesystem is read-only.
1051          */
1052         if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1053                 flag &= ~O_TRUNC;
1054         } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1055                 error = -EACCES;
1056                 if (IS_NODEV(inode))
1057                         goto exit;
1058 
1059                 flag &= ~O_TRUNC;
1060         } else {
1061                 error = -EROFS;
1062                 if (IS_RDONLY(inode) && (flag & 2))
1063                         goto exit;
1064         }
1065         /*
1066          * An append-only file must be opened in append mode for writing.
1067          */
1068         error = -EPERM;
1069         if (IS_APPEND(inode)) {
1070                 if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1071                         goto exit;
1072                 if (flag & O_TRUNC)
1073                         goto exit;
1074         }
1075 
1076         /*
1077          * Ensure there are no outstanding leases on the file.
1078          */
1079         error = get_lease(inode, flag);
1080         if (error)
1081                 goto exit;
1082 
1083         if (flag & O_TRUNC) {
1084                 error = get_write_access(inode);
1085                 if (error)
1086                         goto exit;
1087 
1088                 /*
1089                  * Refuse to truncate files with mandatory locks held on them.
1090                  */
1091                 error = locks_verify_locked(inode);
1092                 if (!error) {
1093                         DQUOT_INIT(inode);
1094                         
1095                         error = do_truncate(dentry, 0);
1096                 }
1097                 put_write_access(inode);
1098                 if (error)
1099                         goto exit;
1100         } else
1101                 if (flag & FMODE_WRITE)
1102                         DQUOT_INIT(inode);
1103 
1104         return 0;
1105 
1106 exit_dput:
1107         dput(dentry);
1108 exit:
1109         path_release(nd);
1110         return error;
1111 
1112 do_link:
1113         error = -ELOOP;
1114         if (flag & O_NOFOLLOW)
1115                 goto exit_dput;
1116         /*
1117          * This is subtle. Instead of calling do_follow_link() we do the
1118          * thing by hands. The reason is that this way we have zero link_count
1119          * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1120          * After that we have the parent and last component, i.e.
1121          * we are in the same situation as after the first path_walk().
1122          * Well, almost - if the last component is normal we get its copy
1123          * stored in nd->last.name and we will have to putname() it when we
1124          * are done. Procfs-like symlinks just set LAST_BIND.
1125          */
1126         UPDATE_ATIME(dentry->d_inode);
1127         error = dentry->d_inode->i_op->follow_link(dentry, nd);
1128         dput(dentry);
1129         if (error)
1130                 return error;
1131         if (nd->last_type == LAST_BIND) {
1132                 dentry = nd->dentry;
1133                 goto ok;
1134         }
1135         error = -EISDIR;
1136         if (nd->last_type != LAST_NORM)
1137                 goto exit;
1138         if (nd->last.name[nd->last.len]) {
1139                 putname(nd->last.name);
1140                 goto exit;
1141         }
1142         if (count++==32) {
1143                 dentry = nd->dentry;
1144                 putname(nd->last.name);
1145                 goto ok;
1146         }
1147         dir = nd->dentry;
1148         down(&dir->d_inode->i_sem);
1149         dentry = lookup_hash(&nd->last, nd->dentry);
1150         putname(nd->last.name);
1151         goto do_last;
1152 }
1153 
1154 /* SMP-safe */
1155 static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1156 {
1157         struct dentry *dentry;
1158 
1159         down(&nd->dentry->d_inode->i_sem);
1160         dentry = ERR_PTR(-EEXIST);
1161         if (nd->last_type != LAST_NORM)
1162                 goto fail;
1163         dentry = lookup_hash(&nd->last, nd->dentry);
1164         if (IS_ERR(dentry))
1165                 goto fail;
1166         if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1167                 goto enoent;
1168         return dentry;
1169 enoent:
1170         dput(dentry);
1171         dentry = ERR_PTR(-ENOENT);
1172 fail:
1173         return dentry;
1174 }
1175 
1176 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1177 {
1178         int error = -EPERM;
1179 
1180         mode &= ~current->fs->umask;
1181 
1182         down(&dir->i_zombie);
1183         if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1184                 goto exit_lock;
1185 
1186         error = may_create(dir, dentry);
1187         if (error)
1188                 goto exit_lock;
1189 
1190         error = -EPERM;
1191         if (!dir->i_op || !dir->i_op->mknod)
1192                 goto exit_lock;
1193 
1194         DQUOT_INIT(dir);
1195         lock_kernel();
1196         error = dir->i_op->mknod(dir, dentry, mode, dev);
1197         unlock_kernel();
1198 exit_lock:
1199         up(&dir->i_zombie);
1200         if (!error)
1201                 inode_dir_notify(dir, DN_CREATE);
1202         return error;
1203 }
1204 
1205 asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1206 {
1207         int error = 0;
1208         char * tmp;
1209         struct dentry * dentry;
1210         struct nameidata nd;
1211 
1212         if (S_ISDIR(mode))
1213                 return -EPERM;
1214         tmp = getname(filename);
1215         if (IS_ERR(tmp))
1216                 return PTR_ERR(tmp);
1217 
1218         if (path_init(tmp, LOOKUP_PARENT, &nd))
1219                 error = path_walk(tmp, &nd);
1220         if (error)
1221                 goto out;
1222         dentry = lookup_create(&nd, 0);
1223         error = PTR_ERR(dentry);
1224         if (!IS_ERR(dentry)) {
1225                 switch (mode & S_IFMT) {
1226                 case 0: case S_IFREG:
1227                         error = vfs_create(nd.dentry->d_inode,dentry,mode);
1228                         break;
1229                 case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1230                         error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev);
1231                         break;
1232                 case S_IFDIR:
1233                         error = -EPERM;
1234                         break;
1235                 default:
1236                         error = -EINVAL;
1237                 }
1238                 dput(dentry);
1239         }
1240         up(&nd.dentry->d_inode->i_sem);
1241         path_release(&nd);
1242 out:
1243         putname(tmp);
1244 
1245         return error;
1246 }
1247 
1248 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1249 {
1250         int error;
1251 
1252         down(&dir->i_zombie);
1253         error = may_create(dir, dentry);
1254         if (error)
1255                 goto exit_lock;
1256 
1257         error = -EPERM;
1258         if (!dir->i_op || !dir->i_op->mkdir)
1259                 goto exit_lock;
1260 
1261         DQUOT_INIT(dir);
1262         mode &= (S_IRWXUGO|S_ISVTX) & ~current->fs->umask;
1263         lock_kernel();
1264         error = dir->i_op->mkdir(dir, dentry, mode);
1265         unlock_kernel();
1266 
1267 exit_lock:
1268         up(&dir->i_zombie);
1269         if (!error)
1270                 inode_dir_notify(dir, DN_CREATE);
1271         return error;
1272 }
1273 
1274 asmlinkage long sys_mkdir(const char * pathname, int mode)
1275 {
1276         int error = 0;
1277         char * tmp;
1278 
1279         tmp = getname(pathname);
1280         error = PTR_ERR(tmp);
1281         if (!IS_ERR(tmp)) {
1282                 struct dentry *dentry;
1283                 struct nameidata nd;
1284 
1285                 if (path_init(tmp, LOOKUP_PARENT, &nd))
1286                         error = path_walk(tmp, &nd);
1287                 if (error)
1288                         goto out;
1289                 dentry = lookup_create(&nd, 1);
1290                 error = PTR_ERR(dentry);
1291                 if (!IS_ERR(dentry)) {
1292                         error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
1293                         dput(dentry);
1294                 }
1295                 up(&nd.dentry->d_inode->i_sem);
1296                 path_release(&nd);
1297 out:
1298                 putname(tmp);
1299         }
1300 
1301         return error;
1302 }
1303 
1304 /*
1305  * We try to drop the dentry early: we should have
1306  * a usage count of 2 if we're the only user of this
1307  * dentry, and if that is true (possibly after pruning
1308  * the dcache), then we drop the dentry now.
1309  *
1310  * A low-level filesystem can, if it choses, legally
1311  * do a
1312  *
1313  *      if (!d_unhashed(dentry))
1314  *              return -EBUSY;
1315  *
1316  * if it cannot handle the case of removing a directory
1317  * that is still in use by something else..
1318  */
1319 static void d_unhash(struct dentry *dentry)
1320 {
1321         dget(dentry);
1322         switch (atomic_read(&dentry->d_count)) {
1323         default:
1324                 shrink_dcache_parent(dentry);
1325                 if (atomic_read(&dentry->d_count) != 2)
1326                         break;
1327         case 2:
1328                 d_drop(dentry);
1329         }
1330 }
1331 
1332 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1333 {
1334         int error;
1335 
1336         error = may_delete(dir, dentry, 1);
1337         if (error)
1338                 return error;
1339 
1340         if (!dir->i_op || !dir->i_op->rmdir)
1341                 return -EPERM;
1342 
1343         DQUOT_INIT(dir);
1344 
1345         double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1346         d_unhash(dentry);
1347         if (IS_DEADDIR(dir))
1348                 error = -ENOENT;
1349         else if (d_mountpoint(dentry))
1350                 error = -EBUSY;
1351         else {
1352                 lock_kernel();
1353                 error = dir->i_op->rmdir(dir, dentry);
1354                 unlock_kernel();
1355                 if (!error)
1356                         dentry->d_inode->i_flags |= S_DEAD;
1357         }
1358         double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1359         if (!error) {
1360                 inode_dir_notify(dir, DN_DELETE);
1361                 d_delete(dentry);
1362         }
1363         dput(dentry);
1364 
1365         return error;
1366 }
1367 
1368 asmlinkage long sys_rmdir(const char * pathname)
1369 {
1370         int error = 0;
1371         char * name;
1372         struct dentry *dentry;
1373         struct nameidata nd;
1374 
1375         name = getname(pathname);
1376         if(IS_ERR(name))
1377                 return PTR_ERR(name);
1378 
1379         if (path_init(name, LOOKUP_PARENT, &nd))
1380                 error = path_walk(name, &nd);
1381         if (error)
1382                 goto exit;
1383 
1384         switch(nd.last_type) {
1385                 case LAST_DOTDOT:
1386                         error = -ENOTEMPTY;
1387                         goto exit1;
1388                 case LAST_DOT:
1389                         error = -EINVAL;
1390                         goto exit1;
1391                 case LAST_ROOT:
1392                         error = -EBUSY;
1393                         goto exit1;
1394         }
1395         down(&nd.dentry->d_inode->i_sem);
1396         dentry = lookup_hash(&nd.last, nd.dentry);
1397         error = PTR_ERR(dentry);
1398         if (!IS_ERR(dentry)) {
1399                 error = vfs_rmdir(nd.dentry->d_inode, dentry);
1400                 dput(dentry);
1401         }
1402         up(&nd.dentry->d_inode->i_sem);
1403 exit1:
1404         path_release(&nd);
1405 exit:
1406         putname(name);
1407         return error;
1408 }
1409 
1410 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1411 {
1412         int error;
1413 
1414         down(&dir->i_zombie);
1415         error = may_delete(dir, dentry, 0);
1416         if (!error) {
1417                 error = -EPERM;
1418                 if (dir->i_op && dir->i_op->unlink) {
1419                         DQUOT_INIT(dir);
1420                         if (d_mountpoint(dentry))
1421                                 error = -EBUSY;
1422                         else {
1423                                 lock_kernel();
1424                                 error = dir->i_op->unlink(dir, dentry);
1425                                 unlock_kernel();
1426                                 if (!error)
1427                                         d_delete(dentry);
1428                         }
1429                 }
1430         }
1431         up(&dir->i_zombie);
1432         if (!error)
1433                 inode_dir_notify(dir, DN_DELETE);
1434         return error;
1435 }
1436 
1437 asmlinkage long sys_unlink(const char * pathname)
1438 {
1439         int error = 0;
1440         char * name;
1441         struct dentry *dentry;
1442         struct nameidata nd;
1443 
1444         name = getname(pathname);
1445         if(IS_ERR(name))
1446                 return PTR_ERR(name);
1447 
1448         if (path_init(name, LOOKUP_PARENT, &nd))
1449                 error = path_walk(name, &nd);
1450         if (error)
1451                 goto exit;
1452         error = -EISDIR;
1453         if (nd.last_type != LAST_NORM)
1454                 goto exit1;
1455         down(&nd.dentry->d_inode->i_sem);
1456         dentry = lookup_hash(&nd.last, nd.dentry);
1457         error = PTR_ERR(dentry);
1458         if (!IS_ERR(dentry)) {
1459                 /* Why not before? Because we want correct error value */
1460                 if (nd.last.name[nd.last.len])
1461                         goto slashes;
1462                 error = vfs_unlink(nd.dentry->d_inode, dentry);
1463         exit2:
1464                 dput(dentry);
1465         }
1466         up(&nd.dentry->d_inode->i_sem);
1467 exit1:
1468         path_release(&nd);
1469 exit:
1470         putname(name);
1471 
1472         return error;
1473 
1474 slashes:
1475         error = !dentry->d_inode ? -ENOENT :
1476                 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1477         goto exit2;
1478 }
1479 
1480 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1481 {
1482         int error;
1483 
1484         down(&dir->i_zombie);
1485         error = may_create(dir, dentry);
1486         if (error)
1487                 goto exit_lock;
1488 
1489         error = -EPERM;
1490         if (!dir->i_op || !dir->i_op->symlink)
1491                 goto exit_lock;
1492 
1493         DQUOT_INIT(dir);
1494         lock_kernel();
1495         error = dir->i_op->symlink(dir, dentry, oldname);
1496         unlock_kernel();
1497 
1498 exit_lock:
1499         up(&dir->i_zombie);
1500         if (!error)
1501                 inode_dir_notify(dir, DN_CREATE);
1502         return error;
1503 }
1504 
1505 asmlinkage long sys_symlink(const char * oldname, const char * newname)
1506 {
1507         int error = 0;
1508         char * from;
1509         char * to;
1510 
1511         from = getname(oldname);
1512         if(IS_ERR(from))
1513                 return PTR_ERR(from);
1514         to = getname(newname);
1515         error = PTR_ERR(to);
1516         if (!IS_ERR(to)) {
1517                 struct dentry *dentry;
1518                 struct nameidata nd;
1519 
1520                 if (path_init(to, LOOKUP_PARENT, &nd))
1521                         error = path_walk(to, &nd);
1522                 if (error)
1523                         goto out;
1524                 dentry = lookup_create(&nd, 0);
1525                 error = PTR_ERR(dentry);
1526                 if (!IS_ERR(dentry)) {
1527                         error = vfs_symlink(nd.dentry->d_inode, dentry, from);
1528                         dput(dentry);
1529                 }
1530                 up(&nd.dentry->d_inode->i_sem);
1531                 path_release(&nd);
1532 out:
1533                 putname(to);
1534         }
1535         putname(from);
1536         return error;
1537 }
1538 
1539 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1540 {
1541         struct inode *inode;
1542         int error;
1543 
1544         down(&dir->i_zombie);
1545         error = -ENOENT;
1546         inode = old_dentry->d_inode;
1547         if (!inode)
1548                 goto exit_lock;
1549 
1550         error = may_create(dir, new_dentry);
1551         if (error)
1552                 goto exit_lock;
1553 
1554         error = -EXDEV;
1555         if (dir->i_dev != inode->i_dev)
1556                 goto exit_lock;
1557 
1558         /*
1559          * A link to an append-only or immutable file cannot be created.
1560          */
1561         error = -EPERM;
1562         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1563                 goto exit_lock;
1564         if (!dir->i_op || !dir->i_op->link)
1565                 goto exit_lock;
1566 
1567         DQUOT_INIT(dir);
1568         lock_kernel();
1569         error = dir->i_op->link(old_dentry, dir, new_dentry);
1570         unlock_kernel();
1571 
1572 exit_lock:
1573         up(&dir->i_zombie);
1574         if (!error)
1575                 inode_dir_notify(dir, DN_CREATE);
1576         return error;
1577 }
1578 
1579 /*
1580  * Hardlinks are often used in delicate situations.  We avoid
1581  * security-related surprises by not following symlinks on the
1582  * newname.  --KAB
1583  *
1584  * We don't follow them on the oldname either to be compatible
1585  * with linux 2.0, and to avoid hard-linking to directories
1586  * and other special files.  --ADM
1587  */
1588 asmlinkage long sys_link(const char * oldname, const char * newname)
1589 {
1590         int error;
1591         char * from;
1592         char * to;
1593 
1594         from = getname(oldname);
1595         if(IS_ERR(from))
1596                 return PTR_ERR(from);
1597         to = getname(newname);
1598         error = PTR_ERR(to);
1599         if (!IS_ERR(to)) {
1600                 struct dentry *new_dentry;
1601                 struct nameidata nd, old_nd;
1602 
1603                 error = 0;
1604                 if (path_init(from, LOOKUP_POSITIVE, &old_nd))
1605                         error = path_walk(from, &old_nd);
1606                 if (error)
1607                         goto exit;
1608                 if (path_init(to, LOOKUP_PARENT, &nd))
1609                         error = path_walk(to, &nd);
1610                 if (error)
1611                         goto out;
1612                 error = -EXDEV;
1613                 if (old_nd.mnt != nd.mnt)
1614                         goto out_release;
1615                 new_dentry = lookup_create(&nd, 0);
1616                 error = PTR_ERR(new_dentry);
1617                 if (!IS_ERR(new_dentry)) {
1618                         error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1619                         dput(new_dentry);
1620                 }
1621                 up(&nd.dentry->d_inode->i_sem);
1622 out_release:
1623                 path_release(&nd);
1624 out:
1625                 path_release(&old_nd);
1626 exit:
1627                 putname(to);
1628         }
1629         putname(from);
1630 
1631         return error;
1632 }
1633 
1634 /*
1635  * The worst of all namespace operations - renaming directory. "Perverted"
1636  * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1637  * Problems:
1638  *      a) we can get into loop creation. Check is done in is_subdir().
1639  *      b) race potential - two innocent renames can create a loop together.
1640  *         That's where 4.4 screws up. Current fix: serialization on
1641  *         sb->s_vfs_rename_sem. We might be more accurate, but that's another
1642  *         story.
1643  *      c) we have to lock _three_ objects - parents and victim (if it exists).
1644  *         And that - after we got ->i_sem on parents (until then we don't know
1645  *         whether the target exists at all, let alone whether it is a directory
1646  *         or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
1647  *         on link creation/removal of any kind. And taken (without ->i_sem) on
1648  *         directory that will be removed (both in rmdir() and here).
1649  *      d) some filesystems don't support opened-but-unlinked directories,
1650  *         either because of layout or because they are not ready to deal with
1651  *         all cases correctly. The latter will be fixed (taking this sort of
1652  *         stuff into VFS), but the former is not going away. Solution: the same
1653  *         trick as in rmdir().
1654  *      e) conversion from fhandle to dentry may come in the wrong moment - when
1655  *         we are removing the target. Solution: we will have to grab ->i_zombie
1656  *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1657  *         ->i_sem on parents, which works but leads to some truely excessive
1658  *         locking].
1659  */
1660 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1661                struct inode *new_dir, struct dentry *new_dentry)
1662 {
1663         int error;
1664         struct inode *target;
1665 
1666         if (old_dentry->d_inode == new_dentry->d_inode)
1667                 return 0;
1668 
1669         error = may_delete(old_dir, old_dentry, 1);
1670         if (error)
1671                 return error;
1672 
1673         if (new_dir->i_dev != old_dir->i_dev)
1674                 return -EXDEV;
1675 
1676         if (!new_dentry->d_inode)
1677                 error = may_create(new_dir, new_dentry);
1678         else
1679                 error = may_delete(new_dir, new_dentry, 1);
1680         if (error)
1681                 return error;
1682 
1683         if (!old_dir->i_op || !old_dir->i_op->rename)
1684                 return -EPERM;
1685 
1686         /*
1687          * If we are going to change the parent - check write permissions,
1688          * we'll need to flip '..'.
1689          */
1690         if (new_dir != old_dir) {
1691                 error = permission(old_dentry->d_inode, MAY_WRITE);
1692         }
1693         if (error)
1694                 return error;
1695 
1696         DQUOT_INIT(old_dir);
1697         DQUOT_INIT(new_dir);
1698         down(&old_dir->i_sb->s_vfs_rename_sem);
1699         error = -EINVAL;
1700         if (is_subdir(new_dentry, old_dentry))
1701                 goto out_unlock;
1702         target = new_dentry->d_inode;
1703         if (target) { /* Hastur! Hastur! Hastur! */
1704                 triple_down(&old_dir->i_zombie,
1705                             &new_dir->i_zombie,
1706                             &target->i_zombie);
1707                 d_unhash(new_dentry);
1708         } else
1709                 double_down(&old_dir->i_zombie,
1710                             &new_dir->i_zombie);
1711         if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir))
1712                 error = -ENOENT;
1713         else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1714                 error = -EBUSY;
1715         else 
1716                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1717         if (target) {
1718                 if (!error)
1719                         target->i_flags |= S_DEAD;
1720                 triple_up(&old_dir->i_zombie,
1721                           &new_dir->i_zombie,
1722                           &target->i_zombie);
1723                 if (d_unhashed(new_dentry))
1724                         d_rehash(new_dentry);
1725                 dput(new_dentry);
1726         } else
1727                 double_up(&old_dir->i_zombie,
1728                           &new_dir->i_zombie);
1729                 
1730         if (!error)
1731                 d_move(old_dentry,new_dentry);
1732 out_unlock:
1733         up(&old_dir->i_sb->s_vfs_rename_sem);
1734         return error;
1735 }
1736 
1737 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1738                struct inode *new_dir, struct dentry *new_dentry)
1739 {
1740         int error;
1741 
1742         if (old_dentry->d_inode == new_dentry->d_inode)
1743                 return 0;
1744 
1745         error = may_delete(old_dir, old_dentry, 0);
1746         if (error)
1747                 return error;
1748 
1749         if (new_dir->i_dev != old_dir->i_dev)
1750                 return -EXDEV;
1751 
1752         if (!new_dentry->d_inode)
1753                 error = may_create(new_dir, new_dentry);
1754         else
1755                 error = may_delete(new_dir, new_dentry, 0);
1756         if (error)
1757                 return error;
1758 
1759         if (!old_dir->i_op || !old_dir->i_op->rename)
1760                 return -EPERM;
1761 
1762         DQUOT_INIT(old_dir);
1763         DQUOT_INIT(new_dir);
1764         double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1765         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1766                 error = -EBUSY;
1767         else
1768                 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1769         double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1770         if (error)
1771                 return error;
1772         /* The following d_move() should become unconditional */
1773         if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
1774                 d_move(old_dentry, new_dentry);
1775         }
1776         return 0;
1777 }
1778 
1779 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1780                struct inode *new_dir, struct dentry *new_dentry)
1781 {
1782         int error;
1783         if (S_ISDIR(old_dentry->d_inode->i_mode))
1784                 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1785         else
1786                 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1787         if (!error) {
1788                 if (old_dir == new_dir)
1789                         inode_dir_notify(old_dir, DN_RENAME);
1790                 else {
1791                         inode_dir_notify(old_dir, DN_DELETE);
1792                         inode_dir_notify(new_dir, DN_CREATE);
1793                 }
1794         }
1795         return error;
1796 }
1797 
1798 static inline int do_rename(const char * oldname, const char * newname)
1799 {
1800         int error = 0;
1801         struct dentry * old_dir, * new_dir;
1802         struct dentry * old_dentry, *new_dentry;
1803         struct nameidata oldnd, newnd;
1804 
1805         if (path_init(oldname, LOOKUP_PARENT, &oldnd))
1806                 error = path_walk(oldname, &oldnd);
1807 
1808         if (error)
1809                 goto exit;
1810 
1811         if (path_init(newname, LOOKUP_PARENT, &newnd))
1812                 error = path_walk(newname, &newnd);
1813         if (error)
1814                 goto exit1;
1815 
1816         error = -EXDEV;
1817         if (oldnd.mnt != newnd.mnt)
1818                 goto exit2;
1819 
1820         old_dir = oldnd.dentry;
1821         error = -EBUSY;
1822         if (oldnd.last_type != LAST_NORM)
1823                 goto exit2;
1824 
1825         new_dir = newnd.dentry;
1826         if (newnd.last_type != LAST_NORM)
1827                 goto exit2;
1828 
1829         double_lock(new_dir, old_dir);
1830 
1831         old_dentry = lookup_hash(&oldnd.last, old_dir);
1832         error = PTR_ERR(old_dentry);
1833         if (IS_ERR(old_dentry))
1834                 goto exit3;
1835         /* source must exist */
1836         error = -ENOENT;
1837         if (!old_dentry->d_inode)
1838                 goto exit4;
1839         /* unless the source is a directory trailing slashes give -ENOTDIR */
1840         if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1841                 error = -ENOTDIR;
1842                 if (oldnd.last.name[oldnd.last.len])
1843                         goto exit4;
1844                 if (newnd.last.name[newnd.last.len])
1845                         goto exit4;
1846         }
1847         new_dentry = lookup_hash(&newnd.last, new_dir);
1848         error = PTR_ERR(new_dentry);
1849         if (IS_ERR(new_dentry))
1850                 goto exit4;
1851 
1852         lock_kernel();
1853         error = vfs_rename(old_dir->d_inode, old_dentry,
1854                                    new_dir->d_inode, new_dentry);
1855         unlock_kernel();
1856 
1857         dput(new_dentry);
1858 exit4:
1859         dput(old_dentry);
1860 exit3:
1861         double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
1862 exit2:
1863         path_release(&newnd);
1864 exit1:
1865         path_release(&oldnd);
1866 exit:
1867         return error;
1868 }
1869 
1870 asmlinkage long sys_rename(const char * oldname, const char * newname)
1871 {
1872         int error;
1873         char * from;
1874         char * to;
1875 
1876         from = getname(oldname);
1877         if(IS_ERR(from))
1878                 return PTR_ERR(from);
1879         to = getname(newname);
1880         error = PTR_ERR(to);
1881         if (!IS_ERR(to)) {
1882                 error = do_rename(from,to);
1883                 putname(to);
1884         }
1885         putname(from);
1886         return error;
1887 }
1888 
1889 int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1890 {
1891         int len;
1892 
1893         len = PTR_ERR(link);
1894         if (IS_ERR(link))
1895                 goto out;
1896 
1897         len = strlen(link);
1898         if (len > (unsigned) buflen)
1899                 len = buflen;
1900         if (copy_to_user(buffer, link, len))
1901                 len = -EFAULT;
1902 out:
1903         return len;
1904 }
1905 
1906 static inline int
1907 __vfs_follow_link(struct nameidata *nd, const char *link)
1908 {
1909         int res = 0;
1910         char *name;
1911         if (IS_ERR(link))
1912                 goto fail;
1913 
1914         if (*link == '/') {
1915                 path_release(nd);
1916                 if (!walk_init_root(link, nd))
1917                         /* weird __emul_prefix() stuff did it */
1918                         goto out;
1919         }
1920         res = path_walk(link, nd);
1921 out:
1922         if (current->link_count || res || nd->last_type!=LAST_NORM)
1923                 return res;
1924         /*
1925          * If it is an iterative symlinks resolution in open_namei() we
1926          * have to copy the last component. And all that crap because of
1927          * bloody create() on broken symlinks. Furrfu...
1928          */
1929         name = __getname();
1930         if (IS_ERR(name))
1931                 goto fail_name;
1932         strcpy(name, nd->last.name);
1933         nd->last.name = name;
1934         return 0;
1935 fail_name:
1936         link = name;
1937 fail:
1938         path_release(nd);
1939         return PTR_ERR(link);
1940 }
1941 
1942 int vfs_follow_link(struct nameidata *nd, const char *link)
1943 {
1944         return __vfs_follow_link(nd, link);
1945 }
1946 
1947 /* get the link contents into pagecache */
1948 static char *page_getlink(struct dentry * dentry, struct page **ppage)
1949 {
1950         struct page * page;
1951         struct address_space *mapping = dentry->d_inode->i_mapping;
1952         page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
1953                                 NULL);
1954         if (IS_ERR(page))
1955                 goto sync_fail;
1956         wait_on_page(page);
1957         if (!Page_Uptodate(page))
1958                 goto async_fail;
1959         *ppage = page;
1960         return kmap(page);
1961 
1962 async_fail:
1963         page_cache_release(page);
1964         return ERR_PTR(-EIO);
1965 
1966 sync_fail:
1967         return (char*)page;
1968 }
1969 
1970 int page_readlink(struct dentry *dentry, char *buffer, int buflen)
1971 {
1972         struct page *page = NULL;
1973         char *s = page_getlink(dentry, &page);
1974         int res = vfs_readlink(dentry,buffer,buflen,s);
1975         if (page) {
1976                 kunmap(page);
1977                 page_cache_release(page);
1978         }
1979         return res;
1980 }
1981 
1982 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
1983 {
1984         struct page *page = NULL;
1985         char *s = page_getlink(dentry, &page);
1986         int res = __vfs_follow_link(nd, s);
1987         if (page) {
1988                 kunmap(page);
1989                 page_cache_release(page);
1990         }
1991         return res;
1992 }
1993 
1994 struct inode_operations page_symlink_inode_operations = {
1995         readlink:       page_readlink,
1996         follow_link:    page_follow_link,
1997 };
1998 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.