1 /*
2 * linux/fs/namei.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7 /*
8 * Some corrections by tytso.
9 */
10
11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
12 * lookup logic.
13 */
14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
15 */
16
17 #include <linux/init.h>
18 #include <linux/mm.h>
19 #include <linux/proc_fs.h>
20 #include <linux/smp_lock.h>
21 #include <linux/quotaops.h>
22 #include <linux/pagemap.h>
23 #include <linux/dcache.h>
24 #include <linux/dnotify.h>
25
26 #include <asm/uaccess.h>
27 #include <asm/unaligned.h>
28 #include <asm/semaphore.h>
29 #include <asm/page.h>
30 #include <asm/pgtable.h>
31
32 #include <asm/namei.h>
33
34 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
35
36 /* [Feb-1997 T. Schoebel-Theuer]
37 * Fundamental changes in the pathname lookup mechanisms (namei)
38 * were necessary because of omirr. The reason is that omirr needs
39 * to know the _real_ pathname, not the user-supplied one, in case
40 * of symlinks (and also when transname replacements occur).
41 *
42 * The new code replaces the old recursive symlink resolution with
43 * an iterative one (in case of non-nested symlink chains). It does
44 * this with calls to <fs>_follow_link().
45 * As a side effect, dir_namei(), _namei() and follow_link() are now
46 * replaced with a single function lookup_dentry() that can handle all
47 * the special cases of the former code.
48 *
49 * With the new dcache, the pathname is stored at each inode, at least as
50 * long as the refcount of the inode is positive. As a side effect, the
51 * size of the dcache depends on the inode cache and thus is dynamic.
52 *
53 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
54 * resolution to correspond with current state of the code.
55 *
56 * Note that the symlink resolution is not *completely* iterative.
57 * There is still a significant amount of tail- and mid- recursion in
58 * the algorithm. Also, note that <fs>_readlink() is not used in
59 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
60 * may return different results than <fs>_follow_link(). Many virtual
61 * filesystems (including /proc) exhibit this behavior.
62 */
63
64 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
65 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
66 * and the name already exists in form of a symlink, try to create the new
67 * name indicated by the symlink. The old code always complained that the
68 * name already exists, due to not following the symlink even if its target
69 * is nonexistent. The new semantics affects also mknod() and link() when
70 * the name is a symlink pointing to a non-existant name.
71 *
72 * I don't know which semantics is the right one, since I have no access
73 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
74 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
75 * "old" one. Personally, I think the new semantics is much more logical.
76 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
77 * file does succeed in both HP-UX and SunOs, but not in Solaris
78 * and in the old Linux semantics.
79 */
80
81 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
82 * semantics. See the comments in "open_namei" and "do_link" below.
83 *
84 * [10-Sep-98 Alan Modra] Another symlink change.
85 */
86
87 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
88 * inside the path - always follow.
89 * in the last component in creation/removal/renaming - never follow.
90 * if LOOKUP_FOLLOW passed - follow.
91 * if the pathname has trailing slashes - follow.
92 * otherwise - don't follow.
93 * (applied in that order).
94 *
95 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
96 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
97 * During the 2.4 we need to fix the userland stuff depending on it -
98 * hopefully we will be able to get rid of that wart in 2.5. So far only
99 * XEmacs seems to be relying on it...
100 */
101
102 /* In order to reduce some races, while at the same time doing additional
103 * checking and hopefully speeding things up, we copy filenames to the
104 * kernel data space before using them..
105 *
106 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
107 */
108 static inline int do_getname(const char *filename, char *page)
109 {
110 int retval;
111 unsigned long len = PATH_MAX + 1;
112
113 if ((unsigned long) filename >= TASK_SIZE) {
114 if (!segment_eq(get_fs(), KERNEL_DS))
115 return -EFAULT;
116 } else if (TASK_SIZE - (unsigned long) filename < PAGE_SIZE)
117 len = TASK_SIZE - (unsigned long) filename;
118
119 retval = strncpy_from_user((char *)page, filename, len);
120 if (retval > 0) {
121 if (retval < len)
122 return 0;
123 return -ENAMETOOLONG;
124 } else if (!retval)
125 retval = -ENOENT;
126 return retval;
127 }
128
129 char * getname(const char * filename)
130 {
131 char *tmp, *result;
132
133 result = ERR_PTR(-ENOMEM);
134 tmp = __getname();
135 if (tmp) {
136 int retval = do_getname(filename, tmp);
137
138 result = tmp;
139 if (retval < 0) {
140 putname(tmp);
141 result = ERR_PTR(retval);
142 }
143 }
144 return result;
145 }
146
147 /*
148 * permission()
149 *
150 * is used to check for read/write/execute permissions on a file.
151 * We use "fsuid" for this, letting us set arbitrary permissions
152 * for filesystem access without changing the "normal" uids which
153 * are used for other things..
154 */
155 int vfs_permission(struct inode * inode,int mask)
156 {
157 int mode = inode->i_mode;
158
159 if ((mask & S_IWOTH) && IS_RDONLY(inode) &&
160 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
161 return -EROFS; /* Nobody gets write access to a read-only fs */
162
163 if ((mask & S_IWOTH) && IS_IMMUTABLE(inode))
164 return -EACCES; /* Nobody gets write access to an immutable file */
165
166 if (current->fsuid == inode->i_uid)
167 mode >>= 6;
168 else if (in_group_p(inode->i_gid))
169 mode >>= 3;
170
171 if (((mode & mask & S_IRWXO) == mask) || capable(CAP_DAC_OVERRIDE))
172 return 0;
173
174 /* read and search access */
175 if ((mask == S_IROTH) ||
176 (S_ISDIR(inode->i_mode) && !(mask & ~(S_IROTH | S_IXOTH))))
177 if (capable(CAP_DAC_READ_SEARCH))
178 return 0;
179
180 return -EACCES;
181 }
182
183 int permission(struct inode * inode,int mask)
184 {
185 if (inode->i_op && inode->i_op->permission) {
186 int retval;
187 lock_kernel();
188 retval = inode->i_op->permission(inode, mask);
189 unlock_kernel();
190 return retval;
191 }
192 return vfs_permission(inode, mask);
193 }
194
195 /*
196 * get_write_access() gets write permission for a file.
197 * put_write_access() releases this write permission.
198 * This is used for regular files.
199 * We cannot support write (and maybe mmap read-write shared) accesses and
200 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
201 * can have the following values:
202 * 0: no writers, no VM_DENYWRITE mappings
203 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
204 * > 0: (i_writecount) users are writing to the file.
205 *
206 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
207 * except for the cases where we don't hold i_writecount yet. Then we need to
208 * use {get,deny}_write_access() - these functions check the sign and refuse
209 * to do the change if sign is wrong. Exclusion between them is provided by
210 * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
211 * who will try to move it in struct inode - just leave it here.
212 */
213 static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
214 int get_write_access(struct inode * inode)
215 {
216 spin_lock(&arbitration_lock);
217 if (atomic_read(&inode->i_writecount) < 0) {
218 spin_unlock(&arbitration_lock);
219 return -ETXTBSY;
220 }
221 atomic_inc(&inode->i_writecount);
222 spin_unlock(&arbitration_lock);
223 return 0;
224 }
225 int deny_write_access(struct file * file)
226 {
227 spin_lock(&arbitration_lock);
228 if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
229 spin_unlock(&arbitration_lock);
230 return -ETXTBSY;
231 }
232 atomic_dec(&file->f_dentry->d_inode->i_writecount);
233 spin_unlock(&arbitration_lock);
234 return 0;
235 }
236
237 void path_release(struct nameidata *nd)
238 {
239 dput(nd->dentry);
240 mntput(nd->mnt);
241 }
242
243 /*
244 * Internal lookup() using the new generic dcache.
245 * SMP-safe
246 */
247 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
248 {
249 struct dentry * dentry = d_lookup(parent, name);
250
251 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
252 if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
253 dput(dentry);
254 dentry = NULL;
255 }
256 }
257 return dentry;
258 }
259
260 /*
261 * This is called when everything else fails, and we actually have
262 * to go to the low-level filesystem to find out what we should do..
263 *
264 * We get the directory semaphore, and after getting that we also
265 * make sure that nobody added the entry to the dcache in the meantime..
266 * SMP-safe
267 */
268 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
269 {
270 struct dentry * result;
271 struct inode *dir = parent->d_inode;
272
273 down(&dir->i_sem);
274 /*
275 * First re-do the cached lookup just in case it was created
276 * while we waited for the directory semaphore..
277 *
278 * FIXME! This could use version numbering or similar to
279 * avoid unnecessary cache lookups.
280 */
281 result = d_lookup(parent, name);
282 if (!result) {
283 struct dentry * dentry = d_alloc(parent, name);
284 result = ERR_PTR(-ENOMEM);
285 if (dentry) {
286 lock_kernel();
287 result = dir->i_op->lookup(dir, dentry);
288 unlock_kernel();
289 if (result)
290 dput(dentry);
291 else
292 result = dentry;
293 }
294 up(&dir->i_sem);
295 return result;
296 }
297
298 /*
299 * Uhhuh! Nasty case: the cache was re-populated while
300 * we waited on the semaphore. Need to revalidate.
301 */
302 up(&dir->i_sem);
303 if (result->d_op && result->d_op->d_revalidate) {
304 if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
305 dput(result);
306 result = ERR_PTR(-ENOENT);
307 }
308 }
309 return result;
310 }
311
312 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
313 {
314 int err;
315 if (current->link_count >= 8)
316 goto loop;
317 current->link_count++;
318 UPDATE_ATIME(dentry->d_inode);
319 err = dentry->d_inode->i_op->follow_link(dentry, nd);
320 current->link_count--;
321 return err;
322 loop:
323 path_release(nd);
324 return -ELOOP;
325 }
326
327 static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
328 {
329 struct vfsmount *parent;
330 struct dentry *dentry;
331 spin_lock(&dcache_lock);
332 parent=(*mnt)->mnt_parent;
333 if (parent == *mnt) {
334 spin_unlock(&dcache_lock);
335 return 0;
336 }
337 mntget(parent);
338 dentry=dget((*mnt)->mnt_mountpoint);
339 spin_unlock(&dcache_lock);
340 dput(*base);
341 *base = dentry;
342 mntput(*mnt);
343 *mnt = parent;
344 return 1;
345 }
346
347 int follow_up(struct vfsmount **mnt, struct dentry **dentry)
348 {
349 return __follow_up(mnt, dentry);
350 }
351
352 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
353 {
354 struct list_head *p;
355 spin_lock(&dcache_lock);
356 p = (*dentry)->d_vfsmnt.next;
357 while (p != &(*dentry)->d_vfsmnt) {
358 struct vfsmount *tmp;
359 tmp = list_entry(p, struct vfsmount, mnt_clash);
360 if (tmp->mnt_parent == *mnt) {
361 *mnt = mntget(tmp);
362 spin_unlock(&dcache_lock);
363 mntput(tmp->mnt_parent);
364 /* tmp holds the mountpoint, so... */
365 dput(*dentry);
366 *dentry = dget(tmp->mnt_root);
367 return 1;
368 }
369 p = p->next;
370 }
371 spin_unlock(&dcache_lock);
372 return 0;
373 }
374
375 int follow_down(struct vfsmount **mnt, struct dentry **dentry)
376 {
377 return __follow_down(mnt,dentry);
378 }
379
380 static inline void follow_dotdot(struct nameidata *nd)
381 {
382 while(1) {
383 struct vfsmount *parent;
384 struct dentry *dentry;
385 read_lock(¤t->fs->lock);
386 if (nd->dentry == current->fs->root &&
387 nd->mnt == current->fs->rootmnt) {
388 read_unlock(¤t->fs->lock);
389 break;
390 }
391 read_unlock(¤t->fs->lock);
392 spin_lock(&dcache_lock);
393 if (nd->dentry != nd->mnt->mnt_root) {
394 dentry = dget(nd->dentry->d_parent);
395 spin_unlock(&dcache_lock);
396 dput(nd->dentry);
397 nd->dentry = dentry;
398 break;
399 }
400 parent=nd->mnt->mnt_parent;
401 if (parent == nd->mnt) {
402 spin_unlock(&dcache_lock);
403 break;
404 }
405 mntget(parent);
406 dentry=dget(nd->mnt->mnt_mountpoint);
407 spin_unlock(&dcache_lock);
408 dput(nd->dentry);
409 nd->dentry = dentry;
410 mntput(nd->mnt);
411 nd->mnt = parent;
412 }
413 }
414 /*
415 * Name resolution.
416 *
417 * This is the basic name resolution function, turning a pathname
418 * into the final dentry.
419 *
420 * We expect 'base' to be positive and a directory.
421 */
422 int path_walk(const char * name, struct nameidata *nd)
423 {
424 struct dentry *dentry;
425 struct inode *inode;
426 int err;
427 unsigned int lookup_flags = nd->flags;
428
429 while (*name=='/')
430 name++;
431 if (!*name)
432 goto return_base;
433
434 inode = nd->dentry->d_inode;
435 if (current->link_count)
436 lookup_flags = LOOKUP_FOLLOW;
437
438 /* At this point we know we have a real path component. */
439 for(;;) {
440 unsigned long hash;
441 struct qstr this;
442 unsigned int c;
443
444 err = permission(inode, MAY_EXEC);
445 dentry = ERR_PTR(err);
446 if (err)
447 break;
448
449 this.name = name;
450 c = *(const unsigned char *)name;
451
452 hash = init_name_hash();
453 do {
454 name++;
455 hash = partial_name_hash(c, hash);
456 c = *(const unsigned char *)name;
457 } while (c && (c != '/'));
458 this.len = name - (const char *) this.name;
459 this.hash = end_name_hash(hash);
460
461 /* remove trailing slashes? */
462 if (!c)
463 goto last_component;
464 while (*++name == '/');
465 if (!*name)
466 goto last_with_slashes;
467
468 /*
469 * "." and ".." are special - ".." especially so because it has
470 * to be able to know about the current root directory and
471 * parent relationships.
472 */
473 if (this.name[0] == '.') switch (this.len) {
474 default:
475 break;
476 case 2:
477 if (this.name[1] != '.')
478 break;
479 follow_dotdot(nd);
480 inode = nd->dentry->d_inode;
481 /* fallthrough */
482 case 1:
483 continue;
484 }
485 /*
486 * See if the low-level filesystem might want
487 * to use its own hash..
488 */
489 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
490 err = nd->dentry->d_op->d_hash(nd->dentry, &this);
491 if (err < 0)
492 break;
493 }
494 /* This does the actual lookups.. */
495 dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
496 if (!dentry) {
497 dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
498 err = PTR_ERR(dentry);
499 if (IS_ERR(dentry))
500 break;
501 }
502 /* Check mountpoints.. */
503 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
504 ;
505
506 err = -ENOENT;
507 inode = dentry->d_inode;
508 if (!inode)
509 goto out_dput;
510 err = -ENOTDIR;
511 if (!inode->i_op)
512 goto out_dput;
513
514 if (inode->i_op->follow_link) {
515 err = do_follow_link(dentry, nd);
516 dput(dentry);
517 if (err)
518 goto return_err;
519 err = -ENOENT;
520 inode = nd->dentry->d_inode;
521 if (!inode)
522 break;
523 err = -ENOTDIR;
524 if (!inode->i_op)
525 break;
526 } else {
527 dput(nd->dentry);
528 nd->dentry = dentry;
529 }
530 err = -ENOTDIR;
531 if (!inode->i_op->lookup)
532 break;
533 continue;
534 /* here ends the main loop */
535
536 last_with_slashes:
537 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
538 last_component:
539 if (lookup_flags & LOOKUP_PARENT)
540 goto lookup_parent;
541 if (this.name[0] == '.') switch (this.len) {
542 default:
543 break;
544 case 2:
545 if (this.name[1] != '.')
546 break;
547 follow_dotdot(nd);
548 inode = nd->dentry->d_inode;
549 /* fallthrough */
550 case 1:
551 goto return_base;
552 }
553 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
554 err = nd->dentry->d_op->d_hash(nd->dentry, &this);
555 if (err < 0)
556 break;
557 }
558 dentry = cached_lookup(nd->dentry, &this, 0);
559 if (!dentry) {
560 dentry = real_lookup(nd->dentry, &this, 0);
561 err = PTR_ERR(dentry);
562 if (IS_ERR(dentry))
563 break;
564 }
565 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
566 ;
567 inode = dentry->d_inode;
568 if ((lookup_flags & LOOKUP_FOLLOW)
569 && inode && inode->i_op && inode->i_op->follow_link) {
570 err = do_follow_link(dentry, nd);
571 dput(dentry);
572 if (err)
573 goto return_err;
574 inode = nd->dentry->d_inode;
575 } else {
576 dput(nd->dentry);
577 nd->dentry = dentry;
578 }
579 err = -ENOENT;
580 if (!inode)
581 goto no_inode;
582 if (lookup_flags & LOOKUP_DIRECTORY) {
583 err = -ENOTDIR;
584 if (!inode->i_op || !inode->i_op->lookup)
585 break;
586 }
587 goto return_base;
588 no_inode:
589 err = -ENOENT;
590 if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
591 break;
592 goto return_base;
593 lookup_parent:
594 nd->last = this;
595 nd->last_type = LAST_NORM;
596 if (this.name[0] != '.')
597 goto return_base;
598 if (this.len == 1)
599 nd->last_type = LAST_DOT;
600 else if (this.len == 2 && this.name[1] == '.')
601 nd->last_type = LAST_DOTDOT;
602 return_base:
603 return 0;
604 out_dput:
605 dput(dentry);
606 break;
607 }
608 path_release(nd);
609 return_err:
610 return err;
611 }
612
613 /* SMP-safe */
614 /* returns 1 if everything is done */
615 static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
616 {
617 if (path_walk(name, nd))
618 return 0;
619
620 if (!nd->dentry->d_inode) {
621 struct nameidata nd_root;
622 nd_root.last_type = LAST_ROOT;
623 nd_root.flags = nd->flags;
624 read_lock(¤t->fs->lock);
625 nd_root.mnt = mntget(current->fs->rootmnt);
626 nd_root.dentry = dget(current->fs->root);
627 read_unlock(¤t->fs->lock);
628 if (path_walk(name, &nd_root))
629 return 1;
630 if (nd_root.dentry->d_inode) {
631 path_release(nd);
632 nd->dentry = nd_root.dentry;
633 nd->mnt = nd_root.mnt;
634 nd->last = nd_root.last;
635 return 1;
636 }
637 path_release(&nd_root);
638 }
639 return 1;
640 }
641
642 void set_fs_altroot(void)
643 {
644 char *emul = __emul_prefix();
645 struct nameidata nd;
646 struct vfsmount *mnt = NULL, *oldmnt;
647 struct dentry *dentry = NULL, *olddentry;
648 if (emul) {
649 read_lock(¤t->fs->lock);
650 nd.mnt = mntget(current->fs->rootmnt);
651 nd.dentry = dget(current->fs->root);
652 read_unlock(¤t->fs->lock);
653 nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
654 if (path_walk(emul,&nd) == 0) {
655 mnt = nd.mnt;
656 dentry = nd.dentry;
657 }
658 }
659 write_lock(¤t->fs->lock);
660 oldmnt = current->fs->altrootmnt;
661 olddentry = current->fs->altroot;
662 current->fs->altrootmnt = mnt;
663 current->fs->altroot = dentry;
664 write_unlock(¤t->fs->lock);
665 if (olddentry) {
666 dput(olddentry);
667 mntput(oldmnt);
668 }
669 }
670
671 /* SMP-safe */
672 static inline int
673 walk_init_root(const char *name, struct nameidata *nd)
674 {
675 read_lock(¤t->fs->lock);
676 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
677 nd->mnt = mntget(current->fs->altrootmnt);
678 nd->dentry = dget(current->fs->altroot);
679 read_unlock(¤t->fs->lock);
680 if (__emul_lookup_dentry(name,nd))
681 return 0;
682 read_lock(¤t->fs->lock);
683 }
684 nd->mnt = mntget(current->fs->rootmnt);
685 nd->dentry = dget(current->fs->root);
686 read_unlock(¤t->fs->lock);
687 return 1;
688 }
689
690 /* SMP-safe */
691 int path_init(const char *name, unsigned int flags, struct nameidata *nd)
692 {
693 nd->last_type = LAST_ROOT; /* if there are only slashes... */
694 nd->flags = flags;
695 if (*name=='/')
696 return walk_init_root(name,nd);
697 read_lock(¤t->fs->lock);
698 nd->mnt = mntget(current->fs->pwdmnt);
699 nd->dentry = dget(current->fs->pwd);
700 read_unlock(¤t->fs->lock);
701 return 1;
702 }
703
704 /*
705 * Restricted form of lookup. Doesn't follow links, single-component only,
706 * needs parent already locked. Doesn't follow mounts.
707 * SMP-safe.
708 */
709 struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
710 {
711 struct dentry * dentry;
712 struct inode *inode;
713 int err;
714
715 inode = base->d_inode;
716 err = permission(inode, MAY_EXEC);
717 dentry = ERR_PTR(err);
718 if (err)
719 goto out;
720
721 /*
722 * See if the low-level filesystem might want
723 * to use its own hash..
724 */
725 if (base->d_op && base->d_op->d_hash) {
726 err = base->d_op->d_hash(base, name);
727 dentry = ERR_PTR(err);
728 if (err < 0)
729 goto out;
730 }
731
732 dentry = cached_lookup(base, name, 0);
733 if (!dentry) {
734 struct dentry *new = d_alloc(base, name);
735 dentry = ERR_PTR(-ENOMEM);
736 if (!new)
737 goto out;
738 lock_kernel();
739 dentry = inode->i_op->lookup(inode, new);
740 unlock_kernel();
741 if (!dentry)
742 dentry = new;
743 else
744 dput(new);
745 }
746 out:
747 return dentry;
748 }
749
750 /* SMP-safe */
751 struct dentry * lookup_one(const char * name, struct dentry * base)
752 {
753 unsigned long hash;
754 struct qstr this;
755 unsigned int c;
756
757 this.name = name;
758 c = *(const unsigned char *)name;
759 if (!c)
760 goto access;
761
762 hash = init_name_hash();
763 do {
764 name++;
765 if (c == '/')
766 goto access;
767 hash = partial_name_hash(c, hash);
768 c = *(const unsigned char *)name;
769 } while (c);
770 this.len = name - (const char *) this.name;
771 this.hash = end_name_hash(hash);
772
773 return lookup_hash(&this, base);
774 access:
775 return ERR_PTR(-EACCES);
776 }
777
778 /*
779 * namei()
780 *
781 * is used by most simple commands to get the inode of a specified name.
782 * Open, link etc use their own routines, but this is enough for things
783 * like 'chmod' etc.
784 *
785 * namei exists in two versions: namei/lnamei. The only difference is
786 * that namei follows links, while lnamei does not.
787 * SMP-safe
788 */
789 int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
790 {
791 char *tmp;
792 int err;
793
794 tmp = getname(name);
795 err = PTR_ERR(tmp);
796 if (!IS_ERR(tmp)) {
797 err = 0;
798 if (path_init(tmp, flags, nd))
799 err = path_walk(tmp, nd);
800 putname(tmp);
801 }
802 return err;
803 }
804
805 /*
806 * It's inline, so penalty for filesystems that don't use sticky bit is
807 * minimal.
808 */
809 static inline int check_sticky(struct inode *dir, struct inode *inode)
810 {
811 if (!(dir->i_mode & S_ISVTX))
812 return 0;
813 if (inode->i_uid == current->fsuid)
814 return 0;
815 if (dir->i_uid == current->fsuid)
816 return 0;
817 return !capable(CAP_FOWNER);
818 }
819
820 /*
821 * Check whether we can remove a link victim from directory dir, check
822 * whether the type of victim is right.
823 * 1. We can't do it if dir is read-only (done in permission())
824 * 2. We should have write and exec permissions on dir
825 * 3. We can't remove anything from append-only dir
826 * 4. We can't do anything with immutable dir (done in permission())
827 * 5. If the sticky bit on dir is set we should either
828 * a. be owner of dir, or
829 * b. be owner of victim, or
830 * c. have CAP_FOWNER capability
831 * 6. If the victim is append-only or immutable we can't do antyhing with
832 * links pointing to it.
833 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
834 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
835 * 9. We can't remove a root or mountpoint.
836 */
837 static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
838 {
839 int error;
840 if (!victim->d_inode || victim->d_parent->d_inode != dir)
841 return -ENOENT;
842 error = permission(dir,MAY_WRITE | MAY_EXEC);
843 if (error)
844 return error;
845 if (IS_APPEND(dir))
846 return -EPERM;
847 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
848 IS_IMMUTABLE(victim->d_inode))
849 return -EPERM;
850 if (isdir) {
851 if (!S_ISDIR(victim->d_inode->i_mode))
852 return -ENOTDIR;
853 if (IS_ROOT(victim))
854 return -EBUSY;
855 } else if (S_ISDIR(victim->d_inode->i_mode))
856 return -EISDIR;
857 return 0;
858 }
859
860 /* Check whether we can create an object with dentry child in directory
861 * dir.
862 * 1. We can't do it if child already exists (open has special treatment for
863 * this case, but since we are inlined it's OK)
864 * 2. We can't do it if dir is read-only (done in permission())
865 * 3. We should have write and exec permissions on dir
866 * 4. We can't do it if dir is immutable (done in permission())
867 */
868 static inline int may_create(struct inode *dir, struct dentry *child) {
869 if (child->d_inode)
870 return -EEXIST;
871 if (IS_DEADDIR(dir))
872 return -ENOENT;
873 return permission(dir,MAY_WRITE | MAY_EXEC);
874 }
875
876 /*
877 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
878 * reasons.
879 *
880 * O_DIRECTORY translates into forcing a directory lookup.
881 */
882 static inline int lookup_flags(unsigned int f)
883 {
884 unsigned long retval = LOOKUP_FOLLOW;
885
886 if (f & O_NOFOLLOW)
887 retval &= ~LOOKUP_FOLLOW;
888
889 if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
890 retval &= ~LOOKUP_FOLLOW;
891
892 if (f & O_DIRECTORY)
893 retval |= LOOKUP_DIRECTORY;
894
895 return retval;
896 }
897
898 int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
899 {
900 int error;
901
902 mode &= S_IALLUGO & ~current->fs->umask;
903 mode |= S_IFREG;
904
905 down(&dir->i_zombie);
906 error = may_create(dir, dentry);
907 if (error)
908 goto exit_lock;
909
910 error = -EACCES; /* shouldn't it be ENOSYS? */
911 if (!dir->i_op || !dir->i_op->create)
912 goto exit_lock;
913
914 DQUOT_INIT(dir);
915 lock_kernel();
916 error = dir->i_op->create(dir, dentry, mode);
917 unlock_kernel();
918 exit_lock:
919 up(&dir->i_zombie);
920 if (!error)
921 inode_dir_notify(dir, DN_CREATE);
922 return error;
923 }
924
925 /*
926 * open_namei()
927 *
928 * namei for open - this is in fact almost the whole open-routine.
929 *
930 * Note that the low bits of "flag" aren't the same as in the open
931 * system call - they are 00 - no permissions needed
932 * 01 - read permission needed
933 * 10 - write permission needed
934 * 11 - read/write permissions needed
935 * which is a lot more logical, and also allows the "no perm" needed
936 * for symlinks (where the permissions are checked later).
937 * SMP-safe
938 */
939 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
940 {
941 int acc_mode, error = 0;
942 struct inode *inode;
943 struct dentry *dentry;
944 struct dentry *dir;
945 int count = 0;
946
947 acc_mode = ACC_MODE(flag);
948
949 /*
950 * The simplest case - just a plain lookup.
951 */
952 if (!(flag & O_CREAT)) {
953 if (path_init(pathname, lookup_flags(flag), nd))
954 error = path_walk(pathname, nd);
955 if (error)
956 return error;
957 dentry = nd->dentry;
958 goto ok;
959 }
960
961 /*
962 * Create - we need to know the parent.
963 */
964 if (path_init(pathname, LOOKUP_PARENT, nd))
965 error = path_walk(pathname, nd);
966 if (error)
967 return error;
968
969 /*
970 * We have the parent and last component. First of all, check
971 * that we are not asked to creat(2) an obvious directory - that
972 * will not do.
973 */
974 error = -EISDIR;
975 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
976 goto exit;
977
978 dir = nd->dentry;
979 down(&dir->d_inode->i_sem);
980 dentry = lookup_hash(&nd->last, nd->dentry);
981
982 do_last:
983 error = PTR_ERR(dentry);
984 if (IS_ERR(dentry)) {
985 up(&dir->d_inode->i_sem);
986 goto exit;
987 }
988
989 /* Negative dentry, just create the file */
990 if (!dentry->d_inode) {
991 error = vfs_create(dir->d_inode, dentry, mode);
992 up(&dir->d_inode->i_sem);
993 dput(nd->dentry);
994 nd->dentry = dentry;
995 if (error)
996 goto exit;
997 /* Don't check for write permission, don't truncate */
998 acc_mode = 0;
999 flag &= ~O_TRUNC;
1000 goto ok;
1001 }
1002
1003 /*
1004 * It already exists.
1005 */
1006 up(&dir->d_inode->i_sem);
1007
1008 error = -EEXIST;
1009 if (flag & O_EXCL)
1010 goto exit_dput;
1011
1012 if (d_mountpoint(dentry)) {
1013 error = -ELOOP;
1014 if (flag & O_NOFOLLOW)
1015 goto exit_dput;
1016 do __follow_down(&nd->mnt,&dentry); while(d_mountpoint(dentry));
1017 }
1018 error = -ENOENT;
1019 if (!dentry->d_inode)
1020 goto exit_dput;
1021 if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1022 goto do_link;
1023
1024 dput(nd->dentry);
1025 nd->dentry = dentry;
1026 error = -EISDIR;
1027 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1028 goto exit;
1029 ok:
1030 error = -ENOENT;
1031 inode = dentry->d_inode;
1032 if (!inode)
1033 goto exit;
1034
1035 error = -ELOOP;
1036 if (S_ISLNK(inode->i_mode))
1037 goto exit;
1038
1039 error = -EISDIR;
1040 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1041 goto exit;
1042
1043 error = permission(inode,acc_mode);
1044 if (error)
1045 goto exit;
1046
1047 /*
1048 * FIFO's, sockets and device files are special: they don't
1049 * actually live on the filesystem itself, and as such you
1050 * can write to them even if the filesystem is read-only.
1051 */
1052 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1053 flag &= ~O_TRUNC;
1054 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1055 error = -EACCES;
1056 if (IS_NODEV(inode))
1057 goto exit;
1058
1059 flag &= ~O_TRUNC;
1060 } else {
1061 error = -EROFS;
1062 if (IS_RDONLY(inode) && (flag & 2))
1063 goto exit;
1064 }
1065 /*
1066 * An append-only file must be opened in append mode for writing.
1067 */
1068 error = -EPERM;
1069 if (IS_APPEND(inode)) {
1070 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1071 goto exit;
1072 if (flag & O_TRUNC)
1073 goto exit;
1074 }
1075
1076 /*
1077 * Ensure there are no outstanding leases on the file.
1078 */
1079 error = get_lease(inode, flag);
1080 if (error)
1081 goto exit;
1082
1083 if (flag & O_TRUNC) {
1084 error = get_write_access(inode);
1085 if (error)
1086 goto exit;
1087
1088 /*
1089 * Refuse to truncate files with mandatory locks held on them.
1090 */
1091 error = locks_verify_locked(inode);
1092 if (!error) {
1093 DQUOT_INIT(inode);
1094
1095 error = do_truncate(dentry, 0);
1096 }
1097 put_write_access(inode);
1098 if (error)
1099 goto exit;
1100 } else
1101 if (flag & FMODE_WRITE)
1102 DQUOT_INIT(inode);
1103
1104 return 0;
1105
1106 exit_dput:
1107 dput(dentry);
1108 exit:
1109 path_release(nd);
1110 return error;
1111
1112 do_link:
1113 error = -ELOOP;
1114 if (flag & O_NOFOLLOW)
1115 goto exit_dput;
1116 /*
1117 * This is subtle. Instead of calling do_follow_link() we do the
1118 * thing by hands. The reason is that this way we have zero link_count
1119 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1120 * After that we have the parent and last component, i.e.
1121 * we are in the same situation as after the first path_walk().
1122 * Well, almost - if the last component is normal we get its copy
1123 * stored in nd->last.name and we will have to putname() it when we
1124 * are done. Procfs-like symlinks just set LAST_BIND.
1125 */
1126 UPDATE_ATIME(dentry->d_inode);
1127 error = dentry->d_inode->i_op->follow_link(dentry, nd);
1128 dput(dentry);
1129 if (error)
1130 return error;
1131 if (nd->last_type == LAST_BIND) {
1132 dentry = nd->dentry;
1133 goto ok;
1134 }
1135 error = -EISDIR;
1136 if (nd->last_type != LAST_NORM)
1137 goto exit;
1138 if (nd->last.name[nd->last.len]) {
1139 putname(nd->last.name);
1140 goto exit;
1141 }
1142 if (count++==32) {
1143 dentry = nd->dentry;
1144 putname(nd->last.name);
1145 goto ok;
1146 }
1147 dir = nd->dentry;
1148 down(&dir->d_inode->i_sem);
1149 dentry = lookup_hash(&nd->last, nd->dentry);
1150 putname(nd->last.name);
1151 goto do_last;
1152 }
1153
1154 /* SMP-safe */
1155 static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1156 {
1157 struct dentry *dentry;
1158
1159 down(&nd->dentry->d_inode->i_sem);
1160 dentry = ERR_PTR(-EEXIST);
1161 if (nd->last_type != LAST_NORM)
1162 goto fail;
1163 dentry = lookup_hash(&nd->last, nd->dentry);
1164 if (IS_ERR(dentry))
1165 goto fail;
1166 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1167 goto enoent;
1168 return dentry;
1169 enoent:
1170 dput(dentry);
1171 dentry = ERR_PTR(-ENOENT);
1172 fail:
1173 return dentry;
1174 }
1175
1176 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1177 {
1178 int error = -EPERM;
1179
1180 mode &= ~current->fs->umask;
1181
1182 down(&dir->i_zombie);
1183 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1184 goto exit_lock;
1185
1186 error = may_create(dir, dentry);
1187 if (error)
1188 goto exit_lock;
1189
1190 error = -EPERM;
1191 if (!dir->i_op || !dir->i_op->mknod)
1192 goto exit_lock;
1193
1194 DQUOT_INIT(dir);
1195 lock_kernel();
1196 error = dir->i_op->mknod(dir, dentry, mode, dev);
1197 unlock_kernel();
1198 exit_lock:
1199 up(&dir->i_zombie);
1200 if (!error)
1201 inode_dir_notify(dir, DN_CREATE);
1202 return error;
1203 }
1204
1205 asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1206 {
1207 int error = 0;
1208 char * tmp;
1209 struct dentry * dentry;
1210 struct nameidata nd;
1211
1212 if (S_ISDIR(mode))
1213 return -EPERM;
1214 tmp = getname(filename);
1215 if (IS_ERR(tmp))
1216 return PTR_ERR(tmp);
1217
1218 if (path_init(tmp, LOOKUP_PARENT, &nd))
1219 error = path_walk(tmp, &nd);
1220 if (error)
1221 goto out;
1222 dentry = lookup_create(&nd, 0);
1223 error = PTR_ERR(dentry);
1224 if (!IS_ERR(dentry)) {
1225 switch (mode & S_IFMT) {
1226 case 0: case S_IFREG:
1227 error = vfs_create(nd.dentry->d_inode,dentry,mode);
1228 break;
1229 case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1230 error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev);
1231 break;
1232 case S_IFDIR:
1233 error = -EPERM;
1234 break;
1235 default:
1236 error = -EINVAL;
1237 }
1238 dput(dentry);
1239 }
1240 up(&nd.dentry->d_inode->i_sem);
1241 path_release(&nd);
1242 out:
1243 putname(tmp);
1244
1245 return error;
1246 }
1247
1248 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1249 {
1250 int error;
1251
1252 down(&dir->i_zombie);
1253 error = may_create(dir, dentry);
1254 if (error)
1255 goto exit_lock;
1256
1257 error = -EPERM;
1258 if (!dir->i_op || !dir->i_op->mkdir)
1259 goto exit_lock;
1260
1261 DQUOT_INIT(dir);
1262 mode &= (S_IRWXUGO|S_ISVTX) & ~current->fs->umask;
1263 lock_kernel();
1264 error = dir->i_op->mkdir(dir, dentry, mode);
1265 unlock_kernel();
1266
1267 exit_lock:
1268 up(&dir->i_zombie);
1269 if (!error)
1270 inode_dir_notify(dir, DN_CREATE);
1271 return error;
1272 }
1273
1274 asmlinkage long sys_mkdir(const char * pathname, int mode)
1275 {
1276 int error = 0;
1277 char * tmp;
1278
1279 tmp = getname(pathname);
1280 error = PTR_ERR(tmp);
1281 if (!IS_ERR(tmp)) {
1282 struct dentry *dentry;
1283 struct nameidata nd;
1284
1285 if (path_init(tmp, LOOKUP_PARENT, &nd))
1286 error = path_walk(tmp, &nd);
1287 if (error)
1288 goto out;
1289 dentry = lookup_create(&nd, 1);
1290 error = PTR_ERR(dentry);
1291 if (!IS_ERR(dentry)) {
1292 error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
1293 dput(dentry);
1294 }
1295 up(&nd.dentry->d_inode->i_sem);
1296 path_release(&nd);
1297 out:
1298 putname(tmp);
1299 }
1300
1301 return error;
1302 }
1303
1304 /*
1305 * We try to drop the dentry early: we should have
1306 * a usage count of 2 if we're the only user of this
1307 * dentry, and if that is true (possibly after pruning
1308 * the dcache), then we drop the dentry now.
1309 *
1310 * A low-level filesystem can, if it choses, legally
1311 * do a
1312 *
1313 * if (!d_unhashed(dentry))
1314 * return -EBUSY;
1315 *
1316 * if it cannot handle the case of removing a directory
1317 * that is still in use by something else..
1318 */
1319 static void d_unhash(struct dentry *dentry)
1320 {
1321 dget(dentry);
1322 switch (atomic_read(&dentry->d_count)) {
1323 default:
1324 shrink_dcache_parent(dentry);
1325 if (atomic_read(&dentry->d_count) != 2)
1326 break;
1327 case 2:
1328 d_drop(dentry);
1329 }
1330 }
1331
1332 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1333 {
1334 int error;
1335
1336 error = may_delete(dir, dentry, 1);
1337 if (error)
1338 return error;
1339
1340 if (!dir->i_op || !dir->i_op->rmdir)
1341 return -EPERM;
1342
1343 DQUOT_INIT(dir);
1344
1345 double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1346 d_unhash(dentry);
1347 if (IS_DEADDIR(dir))
1348 error = -ENOENT;
1349 else if (d_mountpoint(dentry))
1350 error = -EBUSY;
1351 else {
1352 lock_kernel();
1353 error = dir->i_op->rmdir(dir, dentry);
1354 unlock_kernel();
1355 if (!error)
1356 dentry->d_inode->i_flags |= S_DEAD;
1357 }
1358 double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1359 if (!error) {
1360 inode_dir_notify(dir, DN_DELETE);
1361 d_delete(dentry);
1362 }
1363 dput(dentry);
1364
1365 return error;
1366 }
1367
1368 asmlinkage long sys_rmdir(const char * pathname)
1369 {
1370 int error = 0;
1371 char * name;
1372 struct dentry *dentry;
1373 struct nameidata nd;
1374
1375 name = getname(pathname);
1376 if(IS_ERR(name))
1377 return PTR_ERR(name);
1378
1379 if (path_init(name, LOOKUP_PARENT, &nd))
1380 error = path_walk(name, &nd);
1381 if (error)
1382 goto exit;
1383
1384 switch(nd.last_type) {
1385 case LAST_DOTDOT:
1386 error = -ENOTEMPTY;
1387 goto exit1;
1388 case LAST_DOT:
1389 error = -EINVAL;
1390 goto exit1;
1391 case LAST_ROOT:
1392 error = -EBUSY;
1393 goto exit1;
1394 }
1395 down(&nd.dentry->d_inode->i_sem);
1396 dentry = lookup_hash(&nd.last, nd.dentry);
1397 error = PTR_ERR(dentry);
1398 if (!IS_ERR(dentry)) {
1399 error = vfs_rmdir(nd.dentry->d_inode, dentry);
1400 dput(dentry);
1401 }
1402 up(&nd.dentry->d_inode->i_sem);
1403 exit1:
1404 path_release(&nd);
1405 exit:
1406 putname(name);
1407 return error;
1408 }
1409
1410 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1411 {
1412 int error;
1413
1414 down(&dir->i_zombie);
1415 error = may_delete(dir, dentry, 0);
1416 if (!error) {
1417 error = -EPERM;
1418 if (dir->i_op && dir->i_op->unlink) {
1419 DQUOT_INIT(dir);
1420 if (d_mountpoint(dentry))
1421 error = -EBUSY;
1422 else {
1423 lock_kernel();
1424 error = dir->i_op->unlink(dir, dentry);
1425 unlock_kernel();
1426 if (!error)
1427 d_delete(dentry);
1428 }
1429 }
1430 }
1431 up(&dir->i_zombie);
1432 if (!error)
1433 inode_dir_notify(dir, DN_DELETE);
1434 return error;
1435 }
1436
1437 asmlinkage long sys_unlink(const char * pathname)
1438 {
1439 int error = 0;
1440 char * name;
1441 struct dentry *dentry;
1442 struct nameidata nd;
1443
1444 name = getname(pathname);
1445 if(IS_ERR(name))
1446 return PTR_ERR(name);
1447
1448 if (path_init(name, LOOKUP_PARENT, &nd))
1449 error = path_walk(name, &nd);
1450 if (error)
1451 goto exit;
1452 error = -EISDIR;
1453 if (nd.last_type != LAST_NORM)
1454 goto exit1;
1455 down(&nd.dentry->d_inode->i_sem);
1456 dentry = lookup_hash(&nd.last, nd.dentry);
1457 error = PTR_ERR(dentry);
1458 if (!IS_ERR(dentry)) {
1459 /* Why not before? Because we want correct error value */
1460 if (nd.last.name[nd.last.len])
1461 goto slashes;
1462 error = vfs_unlink(nd.dentry->d_inode, dentry);
1463 exit2:
1464 dput(dentry);
1465 }
1466 up(&nd.dentry->d_inode->i_sem);
1467 exit1:
1468 path_release(&nd);
1469 exit:
1470 putname(name);
1471
1472 return error;
1473
1474 slashes:
1475 error = !dentry->d_inode ? -ENOENT :
1476 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1477 goto exit2;
1478 }
1479
1480 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1481 {
1482 int error;
1483
1484 down(&dir->i_zombie);
1485 error = may_create(dir, dentry);
1486 if (error)
1487 goto exit_lock;
1488
1489 error = -EPERM;
1490 if (!dir->i_op || !dir->i_op->symlink)
1491 goto exit_lock;
1492
1493 DQUOT_INIT(dir);
1494 lock_kernel();
1495 error = dir->i_op->symlink(dir, dentry, oldname);
1496 unlock_kernel();
1497
1498 exit_lock:
1499 up(&dir->i_zombie);
1500 if (!error)
1501 inode_dir_notify(dir, DN_CREATE);
1502 return error;
1503 }
1504
1505 asmlinkage long sys_symlink(const char * oldname, const char * newname)
1506 {
1507 int error = 0;
1508 char * from;
1509 char * to;
1510
1511 from = getname(oldname);
1512 if(IS_ERR(from))
1513 return PTR_ERR(from);
1514 to = getname(newname);
1515 error = PTR_ERR(to);
1516 if (!IS_ERR(to)) {
1517 struct dentry *dentry;
1518 struct nameidata nd;
1519
1520 if (path_init(to, LOOKUP_PARENT, &nd))
1521 error = path_walk(to, &nd);
1522 if (error)
1523 goto out;
1524 dentry = lookup_create(&nd, 0);
1525 error = PTR_ERR(dentry);
1526 if (!IS_ERR(dentry)) {
1527 error = vfs_symlink(nd.dentry->d_inode, dentry, from);
1528 dput(dentry);
1529 }
1530 up(&nd.dentry->d_inode->i_sem);
1531 path_release(&nd);
1532 out:
1533 putname(to);
1534 }
1535 putname(from);
1536 return error;
1537 }
1538
1539 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1540 {
1541 struct inode *inode;
1542 int error;
1543
1544 down(&dir->i_zombie);
1545 error = -ENOENT;
1546 inode = old_dentry->d_inode;
1547 if (!inode)
1548 goto exit_lock;
1549
1550 error = may_create(dir, new_dentry);
1551 if (error)
1552 goto exit_lock;
1553
1554 error = -EXDEV;
1555 if (dir->i_dev != inode->i_dev)
1556 goto exit_lock;
1557
1558 /*
1559 * A link to an append-only or immutable file cannot be created.
1560 */
1561 error = -EPERM;
1562 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1563 goto exit_lock;
1564 if (!dir->i_op || !dir->i_op->link)
1565 goto exit_lock;
1566
1567 DQUOT_INIT(dir);
1568 lock_kernel();
1569 error = dir->i_op->link(old_dentry, dir, new_dentry);
1570 unlock_kernel();
1571
1572 exit_lock:
1573 up(&dir->i_zombie);
1574 if (!error)
1575 inode_dir_notify(dir, DN_CREATE);
1576 return error;
1577 }
1578
1579 /*
1580 * Hardlinks are often used in delicate situations. We avoid
1581 * security-related surprises by not following symlinks on the
1582 * newname. --KAB
1583 *
1584 * We don't follow them on the oldname either to be compatible
1585 * with linux 2.0, and to avoid hard-linking to directories
1586 * and other special files. --ADM
1587 */
1588 asmlinkage long sys_link(const char * oldname, const char * newname)
1589 {
1590 int error;
1591 char * from;
1592 char * to;
1593
1594 from = getname(oldname);
1595 if(IS_ERR(from))
1596 return PTR_ERR(from);
1597 to = getname(newname);
1598 error = PTR_ERR(to);
1599 if (!IS_ERR(to)) {
1600 struct dentry *new_dentry;
1601 struct nameidata nd, old_nd;
1602
1603 error = 0;
1604 if (path_init(from, LOOKUP_POSITIVE, &old_nd))
1605 error = path_walk(from, &old_nd);
1606 if (error)
1607 goto exit;
1608 if (path_init(to, LOOKUP_PARENT, &nd))
1609 error = path_walk(to, &nd);
1610 if (error)
1611 goto out;
1612 error = -EXDEV;
1613 if (old_nd.mnt != nd.mnt)
1614 goto out_release;
1615 new_dentry = lookup_create(&nd, 0);
1616 error = PTR_ERR(new_dentry);
1617 if (!IS_ERR(new_dentry)) {
1618 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1619 dput(new_dentry);
1620 }
1621 up(&nd.dentry->d_inode->i_sem);
1622 out_release:
1623 path_release(&nd);
1624 out:
1625 path_release(&old_nd);
1626 exit:
1627 putname(to);
1628 }
1629 putname(from);
1630
1631 return error;
1632 }
1633
1634 /*
1635 * The worst of all namespace operations - renaming directory. "Perverted"
1636 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1637 * Problems:
1638 * a) we can get into loop creation. Check is done in is_subdir().
1639 * b) race potential - two innocent renames can create a loop together.
1640 * That's where 4.4 screws up. Current fix: serialization on
1641 * sb->s_vfs_rename_sem. We might be more accurate, but that's another
1642 * story.
1643 * c) we have to lock _three_ objects - parents and victim (if it exists).
1644 * And that - after we got ->i_sem on parents (until then we don't know
1645 * whether the target exists at all, let alone whether it is a directory
1646 * or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
1647 * on link creation/removal of any kind. And taken (without ->i_sem) on
1648 * directory that will be removed (both in rmdir() and here).
1649 * d) some filesystems don't support opened-but-unlinked directories,
1650 * either because of layout or because they are not ready to deal with
1651 * all cases correctly. The latter will be fixed (taking this sort of
1652 * stuff into VFS), but the former is not going away. Solution: the same
1653 * trick as in rmdir().
1654 * e) conversion from fhandle to dentry may come in the wrong moment - when
1655 * we are removing the target. Solution: we will have to grab ->i_zombie
1656 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1657 * ->i_sem on parents, which works but leads to some truely excessive
1658 * locking].
1659 */
1660 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1661 struct inode *new_dir, struct dentry *new_dentry)
1662 {
1663 int error;
1664 struct inode *target;
1665
1666 if (old_dentry->d_inode == new_dentry->d_inode)
1667 return 0;
1668
1669 error = may_delete(old_dir, old_dentry, 1);
1670 if (error)
1671 return error;
1672
1673 if (new_dir->i_dev != old_dir->i_dev)
1674 return -EXDEV;
1675
1676 if (!new_dentry->d_inode)
1677 error = may_create(new_dir, new_dentry);
1678 else
1679 error = may_delete(new_dir, new_dentry, 1);
1680 if (error)
1681 return error;
1682
1683 if (!old_dir->i_op || !old_dir->i_op->rename)
1684 return -EPERM;
1685
1686 /*
1687 * If we are going to change the parent - check write permissions,
1688 * we'll need to flip '..'.
1689 */
1690 if (new_dir != old_dir) {
1691 error = permission(old_dentry->d_inode, MAY_WRITE);
1692 }
1693 if (error)
1694 return error;
1695
1696 DQUOT_INIT(old_dir);
1697 DQUOT_INIT(new_dir);
1698 down(&old_dir->i_sb->s_vfs_rename_sem);
1699 error = -EINVAL;
1700 if (is_subdir(new_dentry, old_dentry))
1701 goto out_unlock;
1702 target = new_dentry->d_inode;
1703 if (target) { /* Hastur! Hastur! Hastur! */
1704 triple_down(&old_dir->i_zombie,
1705 &new_dir->i_zombie,
1706 &target->i_zombie);
1707 d_unhash(new_dentry);
1708 } else
1709 double_down(&old_dir->i_zombie,
1710 &new_dir->i_zombie);
1711 if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir))
1712 error = -ENOENT;
1713 else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1714 error = -EBUSY;
1715 else
1716 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1717 if (target) {
1718 if (!error)
1719 target->i_flags |= S_DEAD;
1720 triple_up(&old_dir->i_zombie,
1721 &new_dir->i_zombie,
1722 &target->i_zombie);
1723 if (d_unhashed(new_dentry))
1724 d_rehash(new_dentry);
1725 dput(new_dentry);
1726 } else
1727 double_up(&old_dir->i_zombie,
1728 &new_dir->i_zombie);
1729
1730 if (!error)
1731 d_move(old_dentry,new_dentry);
1732 out_unlock:
1733 up(&old_dir->i_sb->s_vfs_rename_sem);
1734 return error;
1735 }
1736
1737 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1738 struct inode *new_dir, struct dentry *new_dentry)
1739 {
1740 int error;
1741
1742 if (old_dentry->d_inode == new_dentry->d_inode)
1743 return 0;
1744
1745 error = may_delete(old_dir, old_dentry, 0);
1746 if (error)
1747 return error;
1748
1749 if (new_dir->i_dev != old_dir->i_dev)
1750 return -EXDEV;
1751
1752 if (!new_dentry->d_inode)
1753 error = may_create(new_dir, new_dentry);
1754 else
1755 error = may_delete(new_dir, new_dentry, 0);
1756 if (error)
1757 return error;
1758
1759 if (!old_dir->i_op || !old_dir->i_op->rename)
1760 return -EPERM;
1761
1762 DQUOT_INIT(old_dir);
1763 DQUOT_INIT(new_dir);
1764 double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1765 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1766 error = -EBUSY;
1767 else
1768 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1769 double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1770 if (error)
1771 return error;
1772 /* The following d_move() should become unconditional */
1773 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
1774 d_move(old_dentry, new_dentry);
1775 }
1776 return 0;
1777 }
1778
1779 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1780 struct inode *new_dir, struct dentry *new_dentry)
1781 {
1782 int error;
1783 if (S_ISDIR(old_dentry->d_inode->i_mode))
1784 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1785 else
1786 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1787 if (!error) {
1788 if (old_dir == new_dir)
1789 inode_dir_notify(old_dir, DN_RENAME);
1790 else {
1791 inode_dir_notify(old_dir, DN_DELETE);
1792 inode_dir_notify(new_dir, DN_CREATE);
1793 }
1794 }
1795 return error;
1796 }
1797
1798 static inline int do_rename(const char * oldname, const char * newname)
1799 {
1800 int error = 0;
1801 struct dentry * old_dir, * new_dir;
1802 struct dentry * old_dentry, *new_dentry;
1803 struct nameidata oldnd, newnd;
1804
1805 if (path_init(oldname, LOOKUP_PARENT, &oldnd))
1806 error = path_walk(oldname, &oldnd);
1807
1808 if (error)
1809 goto exit;
1810
1811 if (path_init(newname, LOOKUP_PARENT, &newnd))
1812 error = path_walk(newname, &newnd);
1813 if (error)
1814 goto exit1;
1815
1816 error = -EXDEV;
1817 if (oldnd.mnt != newnd.mnt)
1818 goto exit2;
1819
1820 old_dir = oldnd.dentry;
1821 error = -EBUSY;
1822 if (oldnd.last_type != LAST_NORM)
1823 goto exit2;
1824
1825 new_dir = newnd.dentry;
1826 if (newnd.last_type != LAST_NORM)
1827 goto exit2;
1828
1829 double_lock(new_dir, old_dir);
1830
1831 old_dentry = lookup_hash(&oldnd.last, old_dir);
1832 error = PTR_ERR(old_dentry);
1833 if (IS_ERR(old_dentry))
1834 goto exit3;
1835 /* source must exist */
1836 error = -ENOENT;
1837 if (!old_dentry->d_inode)
1838 goto exit4;
1839 /* unless the source is a directory trailing slashes give -ENOTDIR */
1840 if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1841 error = -ENOTDIR;
1842 if (oldnd.last.name[oldnd.last.len])
1843 goto exit4;
1844 if (newnd.last.name[newnd.last.len])
1845 goto exit4;
1846 }
1847 new_dentry = lookup_hash(&newnd.last, new_dir);
1848 error = PTR_ERR(new_dentry);
1849 if (IS_ERR(new_dentry))
1850 goto exit4;
1851
1852 lock_kernel();
1853 error = vfs_rename(old_dir->d_inode, old_dentry,
1854 new_dir->d_inode, new_dentry);
1855 unlock_kernel();
1856
1857 dput(new_dentry);
1858 exit4:
1859 dput(old_dentry);
1860 exit3:
1861 double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
1862 exit2:
1863 path_release(&newnd);
1864 exit1:
1865 path_release(&oldnd);
1866 exit:
1867 return error;
1868 }
1869
1870 asmlinkage long sys_rename(const char * oldname, const char * newname)
1871 {
1872 int error;
1873 char * from;
1874 char * to;
1875
1876 from = getname(oldname);
1877 if(IS_ERR(from))
1878 return PTR_ERR(from);
1879 to = getname(newname);
1880 error = PTR_ERR(to);
1881 if (!IS_ERR(to)) {
1882 error = do_rename(from,to);
1883 putname(to);
1884 }
1885 putname(from);
1886 return error;
1887 }
1888
1889 int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1890 {
1891 int len;
1892
1893 len = PTR_ERR(link);
1894 if (IS_ERR(link))
1895 goto out;
1896
1897 len = strlen(link);
1898 if (len > (unsigned) buflen)
1899 len = buflen;
1900 if (copy_to_user(buffer, link, len))
1901 len = -EFAULT;
1902 out:
1903 return len;
1904 }
1905
1906 static inline int
1907 __vfs_follow_link(struct nameidata *nd, const char *link)
1908 {
1909 int res = 0;
1910 char *name;
1911 if (IS_ERR(link))
1912 goto fail;
1913
1914 if (*link == '/') {
1915 path_release(nd);
1916 if (!walk_init_root(link, nd))
1917 /* weird __emul_prefix() stuff did it */
1918 goto out;
1919 }
1920 res = path_walk(link, nd);
1921 out:
1922 if (current->link_count || res || nd->last_type!=LAST_NORM)
1923 return res;
1924 /*
1925 * If it is an iterative symlinks resolution in open_namei() we
1926 * have to copy the last component. And all that crap because of
1927 * bloody create() on broken symlinks. Furrfu...
1928 */
1929 name = __getname();
1930 if (IS_ERR(name))
1931 goto fail_name;
1932 strcpy(name, nd->last.name);
1933 nd->last.name = name;
1934 return 0;
1935 fail_name:
1936 link = name;
1937 fail:
1938 path_release(nd);
1939 return PTR_ERR(link);
1940 }
1941
1942 int vfs_follow_link(struct nameidata *nd, const char *link)
1943 {
1944 return __vfs_follow_link(nd, link);
1945 }
1946
1947 /* get the link contents into pagecache */
1948 static char *page_getlink(struct dentry * dentry, struct page **ppage)
1949 {
1950 struct page * page;
1951 struct address_space *mapping = dentry->d_inode->i_mapping;
1952 page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
1953 NULL);
1954 if (IS_ERR(page))
1955 goto sync_fail;
1956 wait_on_page(page);
1957 if (!Page_Uptodate(page))
1958 goto async_fail;
1959 *ppage = page;
1960 return kmap(page);
1961
1962 async_fail:
1963 page_cache_release(page);
1964 return ERR_PTR(-EIO);
1965
1966 sync_fail:
1967 return (char*)page;
1968 }
1969
1970 int page_readlink(struct dentry *dentry, char *buffer, int buflen)
1971 {
1972 struct page *page = NULL;
1973 char *s = page_getlink(dentry, &page);
1974 int res = vfs_readlink(dentry,buffer,buflen,s);
1975 if (page) {
1976 kunmap(page);
1977 page_cache_release(page);
1978 }
1979 return res;
1980 }
1981
1982 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
1983 {
1984 struct page *page = NULL;
1985 char *s = page_getlink(dentry, &page);
1986 int res = __vfs_follow_link(nd, s);
1987 if (page) {
1988 kunmap(page);
1989 page_cache_release(page);
1990 }
1991 return res;
1992 }
1993
1994 struct inode_operations page_symlink_inode_operations = {
1995 readlink: page_readlink,
1996 follow_link: page_follow_link,
1997 };
1998
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.