~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/mm/mmap.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *      linux/mm/mmap.c
  3  *
  4  * Written by obz.
  5  */
  6 #include <linux/slab.h>
  7 #include <linux/shm.h>
  8 #include <linux/mman.h>
  9 #include <linux/pagemap.h>
 10 #include <linux/swap.h>
 11 #include <linux/swapctl.h>
 12 #include <linux/smp_lock.h>
 13 #include <linux/init.h>
 14 #include <linux/file.h>
 15 
 16 #include <asm/uaccess.h>
 17 #include <asm/pgalloc.h>
 18 
 19 /* description of effects of mapping type and prot in current implementation.
 20  * this is due to the limited x86 page protection hardware.  The expected
 21  * behavior is in parens:
 22  *
 23  * map_type     prot
 24  *              PROT_NONE       PROT_READ       PROT_WRITE      PROT_EXEC
 25  * MAP_SHARED   r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
 26  *              w: (no) no      w: (no) no      w: (yes) yes    w: (no) no
 27  *              x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
 28  *              
 29  * MAP_PRIVATE  r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
 30  *              w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
 31  *              x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
 32  *
 33  */
 34 pgprot_t protection_map[16] = {
 35         __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
 36         __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
 37 };
 38 
 39 int sysctl_overcommit_memory;
 40 
 41 /* Check that a process has enough memory to allocate a
 42  * new virtual mapping.
 43  */
 44 int vm_enough_memory(long pages)
 45 {
 46         /* Stupid algorithm to decide if we have enough memory: while
 47          * simple, it hopefully works in most obvious cases.. Easy to
 48          * fool it, but this should catch most mistakes.
 49          */
 50         /* 23/11/98 NJC: Somewhat less stupid version of algorithm,
 51          * which tries to do "TheRightThing".  Instead of using half of
 52          * (buffers+cache), use the minimum values.  Allow an extra 2%
 53          * of num_physpages for safety margin.
 54          */
 55 
 56         long free;
 57         
 58         /* Sometimes we want to use more memory than we have. */
 59         if (sysctl_overcommit_memory)
 60             return 1;
 61 
 62         free = atomic_read(&buffermem_pages);
 63         free += atomic_read(&page_cache_size);
 64         free += nr_free_pages();
 65         free += nr_swap_pages;
 66         return free > pages;
 67 }
 68 
 69 /* Remove one vm structure from the inode's i_mapping address space. */
 70 static inline void __remove_shared_vm_struct(struct vm_area_struct *vma)
 71 {
 72         struct file * file = vma->vm_file;
 73 
 74         if (file) {
 75                 struct inode *inode = file->f_dentry->d_inode;
 76                 if (vma->vm_flags & VM_DENYWRITE)
 77                         atomic_inc(&inode->i_writecount);
 78                 if(vma->vm_next_share)
 79                         vma->vm_next_share->vm_pprev_share = vma->vm_pprev_share;
 80                 *vma->vm_pprev_share = vma->vm_next_share;
 81         }
 82 }
 83 
 84 static inline void remove_shared_vm_struct(struct vm_area_struct *vma)
 85 {
 86         lock_vma_mappings(vma);
 87         __remove_shared_vm_struct(vma);
 88         unlock_vma_mappings(vma);
 89 }
 90 
 91 void lock_vma_mappings(struct vm_area_struct *vma)
 92 {
 93         struct address_space *mapping;
 94 
 95         mapping = NULL;
 96         if (vma->vm_file)
 97                 mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
 98         if (mapping)
 99                 spin_lock(&mapping->i_shared_lock);
100 }
101 
102 void unlock_vma_mappings(struct vm_area_struct *vma)
103 {
104         struct address_space *mapping;
105 
106         mapping = NULL;
107         if (vma->vm_file)
108                 mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
109         if (mapping)
110                 spin_unlock(&mapping->i_shared_lock);
111 }
112 
113 /*
114  *  sys_brk() for the most part doesn't need the global kernel
115  *  lock, except when an application is doing something nasty
116  *  like trying to un-brk an area that has already been mapped
117  *  to a regular file.  in this case, the unmapping will need
118  *  to invoke file system routines that need the global lock.
119  */
120 asmlinkage unsigned long sys_brk(unsigned long brk)
121 {
122         unsigned long rlim, retval;
123         unsigned long newbrk, oldbrk;
124         struct mm_struct *mm = current->mm;
125 
126         down(&mm->mmap_sem);
127 
128         if (brk < mm->end_code)
129                 goto out;
130         newbrk = PAGE_ALIGN(brk);
131         oldbrk = PAGE_ALIGN(mm->brk);
132         if (oldbrk == newbrk)
133                 goto set_brk;
134 
135         /* Always allow shrinking brk. */
136         if (brk <= mm->brk) {
137                 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
138                         goto set_brk;
139                 goto out;
140         }
141 
142         /* Check against rlimit.. */
143         rlim = current->rlim[RLIMIT_DATA].rlim_cur;
144         if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
145                 goto out;
146 
147         /* Check against existing mmap mappings. */
148         if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
149                 goto out;
150 
151         /* Check if we have enough memory.. */
152         if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
153                 goto out;
154 
155         /* Ok, looks good - let it rip. */
156         if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
157                 goto out;
158 set_brk:
159         mm->brk = brk;
160 out:
161         retval = mm->brk;
162         up(&mm->mmap_sem);
163         return retval;
164 }
165 
166 /* Combine the mmap "prot" and "flags" argument into one "vm_flags" used
167  * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
168  * into "VM_xxx".
169  */
170 static inline unsigned long vm_flags(unsigned long prot, unsigned long flags)
171 {
172 #define _trans(x,bit1,bit2) \
173 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
174 
175         unsigned long prot_bits, flag_bits;
176         prot_bits =
177                 _trans(prot, PROT_READ, VM_READ) |
178                 _trans(prot, PROT_WRITE, VM_WRITE) |
179                 _trans(prot, PROT_EXEC, VM_EXEC);
180         flag_bits =
181                 _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
182                 _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
183                 _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
184         return prot_bits | flag_bits;
185 #undef _trans
186 }
187 
188 unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len,
189         unsigned long prot, unsigned long flags, unsigned long pgoff)
190 {
191         struct mm_struct * mm = current->mm;
192         struct vm_area_struct * vma;
193         int correct_wcount = 0;
194         int error;
195 
196         if (file && (!file->f_op || !file->f_op->mmap))
197                 return -ENODEV;
198 
199         if ((len = PAGE_ALIGN(len)) == 0)
200                 return addr;
201 
202         if (len > TASK_SIZE || addr > TASK_SIZE-len)
203                 return -EINVAL;
204 
205         /* offset overflow? */
206         if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
207                 return -EINVAL;
208 
209         /* Too many mappings? */
210         if (mm->map_count > MAX_MAP_COUNT)
211                 return -ENOMEM;
212 
213         /* mlock MCL_FUTURE? */
214         if (mm->def_flags & VM_LOCKED) {
215                 unsigned long locked = mm->locked_vm << PAGE_SHIFT;
216                 locked += len;
217                 if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
218                         return -EAGAIN;
219         }
220 
221         /* Do simple checking here so the lower-level routines won't have
222          * to. we assume access permissions have been handled by the open
223          * of the memory object, so we don't do any here.
224          */
225         if (file != NULL) {
226                 switch (flags & MAP_TYPE) {
227                 case MAP_SHARED:
228                         if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
229                                 return -EACCES;
230 
231                         /* Make sure we don't allow writing to an append-only file.. */
232                         if (IS_APPEND(file->f_dentry->d_inode) && (file->f_mode & FMODE_WRITE))
233                                 return -EACCES;
234 
235                         /* make sure there are no mandatory locks on the file. */
236                         if (locks_verify_locked(file->f_dentry->d_inode))
237                                 return -EAGAIN;
238 
239                         /* fall through */
240                 case MAP_PRIVATE:
241                         if (!(file->f_mode & FMODE_READ))
242                                 return -EACCES;
243                         break;
244 
245                 default:
246                         return -EINVAL;
247                 }
248         }
249 
250         /* Obtain the address to map to. we verify (or select) it and ensure
251          * that it represents a valid section of the address space.
252          */
253         if (flags & MAP_FIXED) {
254                 if (addr & ~PAGE_MASK)
255                         return -EINVAL;
256         } else {
257                 addr = get_unmapped_area(addr, len);
258                 if (!addr)
259                         return -ENOMEM;
260         }
261 
262         /* Determine the object being mapped and call the appropriate
263          * specific mapper. the address has already been validated, but
264          * not unmapped, but the maps are removed from the list.
265          */
266         vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
267         if (!vma)
268                 return -ENOMEM;
269 
270         vma->vm_mm = mm;
271         vma->vm_start = addr;
272         vma->vm_end = addr + len;
273         vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;
274 
275         if (file) {
276                 VM_ClearReadHint(vma);
277                 vma->vm_raend = 0;
278 
279                 if (file->f_mode & FMODE_READ)
280                         vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
281                 if (flags & MAP_SHARED) {
282                         vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
283 
284                         /* This looks strange, but when we don't have the file open
285                          * for writing, we can demote the shared mapping to a simpler
286                          * private mapping. That also takes care of a security hole
287                          * with ptrace() writing to a shared mapping without write
288                          * permissions.
289                          *
290                          * We leave the VM_MAYSHARE bit on, just to get correct output
291                          * from /proc/xxx/maps..
292                          */
293                         if (!(file->f_mode & FMODE_WRITE))
294                                 vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
295                 }
296         } else {
297                 vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
298                 if (flags & MAP_SHARED)
299                         vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
300         }
301         vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f];
302         vma->vm_ops = NULL;
303         vma->vm_pgoff = pgoff;
304         vma->vm_file = NULL;
305         vma->vm_private_data = NULL;
306 
307         /* Clear old maps */
308         error = -ENOMEM;
309         if (do_munmap(mm, addr, len))
310                 goto free_vma;
311 
312         /* Check against address space limit. */
313         if ((mm->total_vm << PAGE_SHIFT) + len
314             > current->rlim[RLIMIT_AS].rlim_cur)
315                 goto free_vma;
316 
317         /* Private writable mapping? Check memory availability.. */
318         if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
319             !(flags & MAP_NORESERVE)                             &&
320             !vm_enough_memory(len >> PAGE_SHIFT))
321                 goto free_vma;
322 
323         if (file) {
324                 if (vma->vm_flags & VM_DENYWRITE) {
325                         error = deny_write_access(file);
326                         if (error)
327                                 goto free_vma;
328                         correct_wcount = 1;
329                 }
330                 vma->vm_file = file;
331                 get_file(file);
332                 error = file->f_op->mmap(file, vma);
333                 if (error)
334                         goto unmap_and_free_vma;
335         } else if (flags & MAP_SHARED) {
336                 error = shmem_zero_setup(vma);
337                 if (error)
338                         goto free_vma;
339         }
340 
341         /* Can addr have changed??
342          *
343          * Answer: Yes, several device drivers can do it in their
344          *         f_op->mmap method. -DaveM
345          */
346         flags = vma->vm_flags;
347         addr = vma->vm_start;
348 
349         insert_vm_struct(mm, vma);
350         if (correct_wcount)
351                 atomic_inc(&file->f_dentry->d_inode->i_writecount);
352         
353         mm->total_vm += len >> PAGE_SHIFT;
354         if (flags & VM_LOCKED) {
355                 mm->locked_vm += len >> PAGE_SHIFT;
356                 make_pages_present(addr, addr + len);
357         }
358         return addr;
359 
360 unmap_and_free_vma:
361         if (correct_wcount)
362                 atomic_inc(&file->f_dentry->d_inode->i_writecount);
363         vma->vm_file = NULL;
364         fput(file);
365         /* Undo any partial mapping done by a device driver. */
366         flush_cache_range(mm, vma->vm_start, vma->vm_end);
367         zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
368         flush_tlb_range(mm, vma->vm_start, vma->vm_end);
369 free_vma:
370         kmem_cache_free(vm_area_cachep, vma);
371         return error;
372 }
373 
374 /* Get an address range which is currently unmapped.
375  * For mmap() without MAP_FIXED and shmat() with addr=0.
376  * Return value 0 means ENOMEM.
377  */
378 #ifndef HAVE_ARCH_UNMAPPED_AREA
379 unsigned long get_unmapped_area(unsigned long addr, unsigned long len)
380 {
381         struct vm_area_struct * vmm;
382 
383         if (len > TASK_SIZE)
384                 return 0;
385         if (!addr)
386                 addr = TASK_UNMAPPED_BASE;
387         addr = PAGE_ALIGN(addr);
388 
389         for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
390                 /* At this point:  (!vmm || addr < vmm->vm_end). */
391                 if (TASK_SIZE - len < addr)
392                         return 0;
393                 if (!vmm || addr + len <= vmm->vm_start)
394                         return addr;
395                 addr = vmm->vm_end;
396         }
397 }
398 #endif
399 
400 #define vm_avl_empty    (struct vm_area_struct *) NULL
401 
402 #include "mmap_avl.c"
403 
404 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
405 struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
406 {
407         struct vm_area_struct *vma = NULL;
408 
409         if (mm) {
410                 /* Check the cache first. */
411                 /* (Cache hit rate is typically around 35%.) */
412                 vma = mm->mmap_cache;
413                 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
414                         if (!mm->mmap_avl) {
415                                 /* Go through the linear list. */
416                                 vma = mm->mmap;
417                                 while (vma && vma->vm_end <= addr)
418                                         vma = vma->vm_next;
419                         } else {
420                                 /* Then go through the AVL tree quickly. */
421                                 struct vm_area_struct * tree = mm->mmap_avl;
422                                 vma = NULL;
423                                 for (;;) {
424                                         if (tree == vm_avl_empty)
425                                                 break;
426                                         if (tree->vm_end > addr) {
427                                                 vma = tree;
428                                                 if (tree->vm_start <= addr)
429                                                         break;
430                                                 tree = tree->vm_avl_left;
431                                         } else
432                                                 tree = tree->vm_avl_right;
433                                 }
434                         }
435                         if (vma)
436                                 mm->mmap_cache = vma;
437                 }
438         }
439         return vma;
440 }
441 
442 /* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
443 struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
444                                       struct vm_area_struct **pprev)
445 {
446         if (mm) {
447                 if (!mm->mmap_avl) {
448                         /* Go through the linear list. */
449                         struct vm_area_struct * prev = NULL;
450                         struct vm_area_struct * vma = mm->mmap;
451                         while (vma && vma->vm_end <= addr) {
452                                 prev = vma;
453                                 vma = vma->vm_next;
454                         }
455                         *pprev = prev;
456                         return vma;
457                 } else {
458                         /* Go through the AVL tree quickly. */
459                         struct vm_area_struct * vma = NULL;
460                         struct vm_area_struct * last_turn_right = NULL;
461                         struct vm_area_struct * prev = NULL;
462                         struct vm_area_struct * tree = mm->mmap_avl;
463                         for (;;) {
464                                 if (tree == vm_avl_empty)
465                                         break;
466                                 if (tree->vm_end > addr) {
467                                         vma = tree;
468                                         prev = last_turn_right;
469                                         if (tree->vm_start <= addr)
470                                                 break;
471                                         tree = tree->vm_avl_left;
472                                 } else {
473                                         last_turn_right = tree;
474                                         tree = tree->vm_avl_right;
475                                 }
476                         }
477                         if (vma) {
478                                 if (vma->vm_avl_left != vm_avl_empty) {
479                                         prev = vma->vm_avl_left;
480                                         while (prev->vm_avl_right != vm_avl_empty)
481                                                 prev = prev->vm_avl_right;
482                                 }
483                                 if ((prev ? prev->vm_next : mm->mmap) != vma)
484                                         printk("find_vma_prev: tree inconsistent with list\n");
485                                 *pprev = prev;
486                                 return vma;
487                         }
488                 }
489         }
490         *pprev = NULL;
491         return NULL;
492 }
493 
494 struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
495 {
496         struct vm_area_struct * vma;
497         unsigned long start;
498 
499         addr &= PAGE_MASK;
500         vma = find_vma(mm,addr);
501         if (!vma)
502                 return NULL;
503         if (vma->vm_start <= addr)
504                 return vma;
505         if (!(vma->vm_flags & VM_GROWSDOWN))
506                 return NULL;
507         start = vma->vm_start;
508         if (expand_stack(vma, addr))
509                 return NULL;
510         if (vma->vm_flags & VM_LOCKED) {
511                 make_pages_present(addr, start);
512         }
513         return vma;
514 }
515 
516 /* Normal function to fix up a mapping
517  * This function is the default for when an area has no specific
518  * function.  This may be used as part of a more specific routine.
519  * This function works out what part of an area is affected and
520  * adjusts the mapping information.  Since the actual page
521  * manipulation is done in do_mmap(), none need be done here,
522  * though it would probably be more appropriate.
523  *
524  * By the time this function is called, the area struct has been
525  * removed from the process mapping list, so it needs to be
526  * reinserted if necessary.
527  *
528  * The 4 main cases are:
529  *    Unmapping the whole area
530  *    Unmapping from the start of the segment to a point in it
531  *    Unmapping from an intermediate point to the end
532  *    Unmapping between to intermediate points, making a hole.
533  *
534  * Case 4 involves the creation of 2 new areas, for each side of
535  * the hole.  If possible, we reuse the existing area rather than
536  * allocate a new one, and the return indicates whether the old
537  * area was reused.
538  */
539 static struct vm_area_struct * unmap_fixup(struct mm_struct *mm, 
540         struct vm_area_struct *area, unsigned long addr, size_t len, 
541         struct vm_area_struct *extra)
542 {
543         struct vm_area_struct *mpnt;
544         unsigned long end = addr + len;
545 
546         area->vm_mm->total_vm -= len >> PAGE_SHIFT;
547         if (area->vm_flags & VM_LOCKED)
548                 area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
549 
550         /* Unmapping the whole area. */
551         if (addr == area->vm_start && end == area->vm_end) {
552                 if (area->vm_ops && area->vm_ops->close)
553                         area->vm_ops->close(area);
554                 if (area->vm_file)
555                         fput(area->vm_file);
556                 kmem_cache_free(vm_area_cachep, area);
557                 return extra;
558         }
559 
560         /* Work out to one of the ends. */
561         if (end == area->vm_end) {
562                 area->vm_end = addr;
563                 lock_vma_mappings(area);
564                 spin_lock(&mm->page_table_lock);
565         } else if (addr == area->vm_start) {
566                 area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
567                 area->vm_start = end;
568                 lock_vma_mappings(area);
569                 spin_lock(&mm->page_table_lock);
570         } else {
571         /* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
572                 /* Add end mapping -- leave beginning for below */
573                 mpnt = extra;
574                 extra = NULL;
575 
576                 mpnt->vm_mm = area->vm_mm;
577                 mpnt->vm_start = end;
578                 mpnt->vm_end = area->vm_end;
579                 mpnt->vm_page_prot = area->vm_page_prot;
580                 mpnt->vm_flags = area->vm_flags;
581                 mpnt->vm_raend = 0;
582                 mpnt->vm_ops = area->vm_ops;
583                 mpnt->vm_pgoff = area->vm_pgoff + ((end - area->vm_start) >> PAGE_SHIFT);
584                 mpnt->vm_file = area->vm_file;
585                 mpnt->vm_private_data = area->vm_private_data;
586                 if (mpnt->vm_file)
587                         get_file(mpnt->vm_file);
588                 if (mpnt->vm_ops && mpnt->vm_ops->open)
589                         mpnt->vm_ops->open(mpnt);
590                 area->vm_end = addr;    /* Truncate area */
591 
592                 /* Because mpnt->vm_file == area->vm_file this locks
593                  * things correctly.
594                  */
595                 lock_vma_mappings(area);
596                 spin_lock(&mm->page_table_lock);
597                 __insert_vm_struct(mm, mpnt);
598         }
599 
600         __insert_vm_struct(mm, area);
601         spin_unlock(&mm->page_table_lock);
602         unlock_vma_mappings(area);
603         return extra;
604 }
605 
606 /*
607  * Try to free as many page directory entries as we can,
608  * without having to work very hard at actually scanning
609  * the page tables themselves.
610  *
611  * Right now we try to free page tables if we have a nice
612  * PGDIR-aligned area that got free'd up. We could be more
613  * granular if we want to, but this is fast and simple,
614  * and covers the bad cases.
615  *
616  * "prev", if it exists, points to a vma before the one
617  * we just free'd - but there's no telling how much before.
618  */
619 static void free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
620         unsigned long start, unsigned long end)
621 {
622         unsigned long first = start & PGDIR_MASK;
623         unsigned long last = end + PGDIR_SIZE - 1;
624         unsigned long start_index, end_index;
625 
626         if (!prev) {
627                 prev = mm->mmap;
628                 if (!prev)
629                         goto no_mmaps;
630                 if (prev->vm_end > start) {
631                         if (last > prev->vm_start)
632                                 last = prev->vm_start;
633                         goto no_mmaps;
634                 }
635         }
636         for (;;) {
637                 struct vm_area_struct *next = prev->vm_next;
638 
639                 if (next) {
640                         if (next->vm_start < start) {
641                                 prev = next;
642                                 continue;
643                         }
644                         if (last > next->vm_start)
645                                 last = next->vm_start;
646                 }
647                 if (prev->vm_end > first)
648                         first = prev->vm_end + PGDIR_SIZE - 1;
649                 break;
650         }
651 no_mmaps:
652         /*
653          * If the PGD bits are not consecutive in the virtual address, the
654          * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
655          */
656         start_index = pgd_index(first);
657         end_index = pgd_index(last);
658         if (end_index > start_index) {
659                 clear_page_tables(mm, start_index, end_index - start_index);
660                 flush_tlb_pgtables(mm, first & PGDIR_MASK, last & PGDIR_MASK);
661         }
662 }
663 
664 /* Munmap is split into 2 main parts -- this part which finds
665  * what needs doing, and the areas themselves, which do the
666  * work.  This now handles partial unmappings.
667  * Jeremy Fitzhardine <jeremy@sw.oz.au>
668  */
669 int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
670 {
671         struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
672 
673         if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
674                 return -EINVAL;
675 
676         if ((len = PAGE_ALIGN(len)) == 0)
677                 return -EINVAL;
678 
679         /* Check if this memory area is ok - put it on the temporary
680          * list if so..  The checks here are pretty simple --
681          * every area affected in some way (by any overlap) is put
682          * on the list.  If nothing is put on, nothing is affected.
683          */
684         mpnt = find_vma_prev(mm, addr, &prev);
685         if (!mpnt)
686                 return 0;
687         /* we have  addr < mpnt->vm_end  */
688 
689         if (mpnt->vm_start >= addr+len)
690                 return 0;
691 
692         /* If we'll make "hole", check the vm areas limit */
693         if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
694             && mm->map_count >= MAX_MAP_COUNT)
695                 return -ENOMEM;
696 
697         /*
698          * We may need one additional vma to fix up the mappings ... 
699          * and this is the last chance for an easy error exit.
700          */
701         extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
702         if (!extra)
703                 return -ENOMEM;
704 
705         npp = (prev ? &prev->vm_next : &mm->mmap);
706         free = NULL;
707         spin_lock(&mm->page_table_lock);
708         for ( ; mpnt && mpnt->vm_start < addr+len; mpnt = *npp) {
709                 *npp = mpnt->vm_next;
710                 mpnt->vm_next = free;
711                 free = mpnt;
712                 if (mm->mmap_avl)
713                         avl_remove(mpnt, &mm->mmap_avl);
714         }
715         mm->mmap_cache = NULL;  /* Kill the cache. */
716         spin_unlock(&mm->page_table_lock);
717 
718         /* Ok - we have the memory areas we should free on the 'free' list,
719          * so release them, and unmap the page range..
720          * If the one of the segments is only being partially unmapped,
721          * it will put new vm_area_struct(s) into the address space.
722          * In that case we have to be careful with VM_DENYWRITE.
723          */
724         while ((mpnt = free) != NULL) {
725                 unsigned long st, end, size;
726                 struct file *file = NULL;
727 
728                 free = free->vm_next;
729 
730                 st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
731                 end = addr+len;
732                 end = end > mpnt->vm_end ? mpnt->vm_end : end;
733                 size = end - st;
734 
735                 if (mpnt->vm_flags & VM_DENYWRITE &&
736                     (st != mpnt->vm_start || end != mpnt->vm_end) &&
737                     (file = mpnt->vm_file) != NULL) {
738                         atomic_dec(&file->f_dentry->d_inode->i_writecount);
739                 }
740                 remove_shared_vm_struct(mpnt);
741                 mm->map_count--;
742 
743                 flush_cache_range(mm, st, end);
744                 zap_page_range(mm, st, size);
745                 flush_tlb_range(mm, st, end);
746 
747                 /*
748                  * Fix the mapping, and free the old area if it wasn't reused.
749                  */
750                 extra = unmap_fixup(mm, mpnt, st, size, extra);
751                 if (file)
752                         atomic_inc(&file->f_dentry->d_inode->i_writecount);
753         }
754 
755         /* Release the extra vma struct if it wasn't used */
756         if (extra)
757                 kmem_cache_free(vm_area_cachep, extra);
758 
759         free_pgtables(mm, prev, addr, addr+len);
760 
761         return 0;
762 }
763 
764 asmlinkage long sys_munmap(unsigned long addr, size_t len)
765 {
766         int ret;
767         struct mm_struct *mm = current->mm;
768 
769         down(&mm->mmap_sem);
770         ret = do_munmap(mm, addr, len);
771         up(&mm->mmap_sem);
772         return ret;
773 }
774 
775 /*
776  *  this is really a simplified "do_mmap".  it only handles
777  *  anonymous maps.  eventually we may be able to do some
778  *  brk-specific accounting here.
779  */
780 unsigned long do_brk(unsigned long addr, unsigned long len)
781 {
782         struct mm_struct * mm = current->mm;
783         struct vm_area_struct * vma;
784         unsigned long flags, retval;
785 
786         len = PAGE_ALIGN(len);
787         if (!len)
788                 return addr;
789 
790         /*
791          * mlock MCL_FUTURE?
792          */
793         if (mm->def_flags & VM_LOCKED) {
794                 unsigned long locked = mm->locked_vm << PAGE_SHIFT;
795                 locked += len;
796                 if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
797                         return -EAGAIN;
798         }
799 
800         /*
801          * Clear old maps.  this also does some error checking for us
802          */
803         retval = do_munmap(mm, addr, len);
804         if (retval != 0)
805                 return retval;
806 
807         /* Check against address space limits *after* clearing old maps... */
808         if ((mm->total_vm << PAGE_SHIFT) + len
809             > current->rlim[RLIMIT_AS].rlim_cur)
810                 return -ENOMEM;
811 
812         if (mm->map_count > MAX_MAP_COUNT)
813                 return -ENOMEM;
814 
815         if (!vm_enough_memory(len >> PAGE_SHIFT))
816                 return -ENOMEM;
817 
818         flags = vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC,
819                                 MAP_FIXED|MAP_PRIVATE) | mm->def_flags;
820 
821         flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
822         
823 
824         /* Can we just expand an old anonymous mapping? */
825         if (addr) {
826                 struct vm_area_struct * vma = find_vma(mm, addr-1);
827                 if (vma && vma->vm_end == addr && !vma->vm_file && 
828                     vma->vm_flags == flags) {
829                         vma->vm_end = addr + len;
830                         goto out;
831                 }
832         }       
833 
834 
835         /*
836          * create a vma struct for an anonymous mapping
837          */
838         vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
839         if (!vma)
840                 return -ENOMEM;
841 
842         vma->vm_mm = mm;
843         vma->vm_start = addr;
844         vma->vm_end = addr + len;
845         vma->vm_flags = flags;
846         vma->vm_page_prot = protection_map[flags & 0x0f];
847         vma->vm_ops = NULL;
848         vma->vm_pgoff = 0;
849         vma->vm_file = NULL;
850         vma->vm_private_data = NULL;
851 
852         insert_vm_struct(mm, vma);
853 
854 out:
855         mm->total_vm += len >> PAGE_SHIFT;
856         if (flags & VM_LOCKED) {
857                 mm->locked_vm += len >> PAGE_SHIFT;
858                 make_pages_present(addr, addr + len);
859         }
860         return addr;
861 }
862 
863 /* Build the AVL tree corresponding to the VMA list. */
864 void build_mmap_avl(struct mm_struct * mm)
865 {
866         struct vm_area_struct * vma;
867 
868         mm->mmap_avl = NULL;
869         for (vma = mm->mmap; vma; vma = vma->vm_next)
870                 avl_insert(vma, &mm->mmap_avl);
871 }
872 
873 /* Release all mmaps. */
874 void exit_mmap(struct mm_struct * mm)
875 {
876         struct vm_area_struct * mpnt;
877 
878         release_segments(mm);
879         spin_lock(&mm->page_table_lock);
880         mpnt = mm->mmap;
881         mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
882         spin_unlock(&mm->page_table_lock);
883         mm->rss = 0;
884         mm->total_vm = 0;
885         mm->locked_vm = 0;
886         while (mpnt) {
887                 struct vm_area_struct * next = mpnt->vm_next;
888                 unsigned long start = mpnt->vm_start;
889                 unsigned long end = mpnt->vm_end;
890                 unsigned long size = end - start;
891 
892                 if (mpnt->vm_ops) {
893                         if (mpnt->vm_ops->close)
894                                 mpnt->vm_ops->close(mpnt);
895                 }
896                 mm->map_count--;
897                 remove_shared_vm_struct(mpnt);
898                 flush_cache_range(mm, start, end);
899                 zap_page_range(mm, start, size);
900                 if (mpnt->vm_file)
901                         fput(mpnt->vm_file);
902                 kmem_cache_free(vm_area_cachep, mpnt);
903                 mpnt = next;
904         }
905 
906         /* This is just debugging */
907         if (mm->map_count)
908                 printk("exit_mmap: map count is %d\n", mm->map_count);
909 
910         clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
911 }
912 
913 /* Insert vm structure into process list sorted by address
914  * and into the inode's i_mmap ring.  If vm_file is non-NULL
915  * then the i_shared_lock must be held here.
916  */
917 void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp)
918 {
919         struct vm_area_struct **pprev;
920         struct file * file;
921 
922         if (!mm->mmap_avl) {
923                 pprev = &mm->mmap;
924                 while (*pprev && (*pprev)->vm_start <= vmp->vm_start)
925                         pprev = &(*pprev)->vm_next;
926         } else {
927                 struct vm_area_struct *prev, *next;
928                 avl_insert_neighbours(vmp, &mm->mmap_avl, &prev, &next);
929                 pprev = (prev ? &prev->vm_next : &mm->mmap);
930                 if (*pprev != next)
931                         printk("insert_vm_struct: tree inconsistent with list\n");
932         }
933         vmp->vm_next = *pprev;
934         *pprev = vmp;
935 
936         mm->map_count++;
937         if (mm->map_count >= AVL_MIN_MAP_COUNT && !mm->mmap_avl)
938                 build_mmap_avl(mm);
939 
940         file = vmp->vm_file;
941         if (file) {
942                 struct inode * inode = file->f_dentry->d_inode;
943                 struct address_space *mapping = inode->i_mapping;
944                 struct vm_area_struct **head;
945 
946                 if (vmp->vm_flags & VM_DENYWRITE)
947                         atomic_dec(&inode->i_writecount);
948 
949                 head = &mapping->i_mmap;
950                 if (vmp->vm_flags & VM_SHARED)
951                         head = &mapping->i_mmap_shared;
952       
953                 /* insert vmp into inode's share list */
954                 if((vmp->vm_next_share = *head) != NULL)
955                         (*head)->vm_pprev_share = &vmp->vm_next_share;
956                 *head = vmp;
957                 vmp->vm_pprev_share = head;
958         }
959 }
960 
961 void insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp)
962 {
963         lock_vma_mappings(vmp);
964         spin_lock(&current->mm->page_table_lock);
965         __insert_vm_struct(mm, vmp);
966         spin_unlock(&current->mm->page_table_lock);
967         unlock_vma_mappings(vmp);
968 }
969 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.