1 /*
2 * linux/mm/page_alloc.c
3 *
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 * Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
8 * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
9 * Zone balancing, Kanoj Sarcar, SGI, Jan 2000
10 */
11
12 #include <linux/config.h>
13 #include <linux/mm.h>
14 #include <linux/swap.h>
15 #include <linux/swapctl.h>
16 #include <linux/interrupt.h>
17 #include <linux/pagemap.h>
18 #include <linux/bootmem.h>
19
20 int nr_swap_pages;
21 int nr_active_pages;
22 int nr_inactive_dirty_pages;
23 pg_data_t *pgdat_list;
24
25 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
26 static int zone_balance_ratio[MAX_NR_ZONES] = { 32, 128, 128, };
27 static int zone_balance_min[MAX_NR_ZONES] = { 10 , 10, 10, };
28 static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, };
29
30 struct list_head active_list;
31 struct list_head inactive_dirty_list;
32 /*
33 * Free_page() adds the page to the free lists. This is optimized for
34 * fast normal cases (no error jumps taken normally).
35 *
36 * The way to optimize jumps for gcc-2.2.2 is to:
37 * - select the "normal" case and put it inside the if () { XXX }
38 * - no else-statements if you can avoid them
39 *
40 * With the above two rules, you get a straight-line execution path
41 * for the normal case, giving better asm-code.
42 */
43
44 #define memlist_init(x) INIT_LIST_HEAD(x)
45 #define memlist_add_head list_add
46 #define memlist_add_tail list_add_tail
47 #define memlist_del list_del
48 #define memlist_entry list_entry
49 #define memlist_next(x) ((x)->next)
50 #define memlist_prev(x) ((x)->prev)
51
52 /*
53 * Temporary debugging check.
54 */
55 #define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->offset) || (((x)-mem_map) >= (zone)->offset+(zone)->size))
56
57 /*
58 * Buddy system. Hairy. You really aren't expected to understand this
59 *
60 * Hint: -mask = 1+~mask
61 */
62
63 static void FASTCALL(__free_pages_ok (struct page *page, unsigned long order));
64 static void __free_pages_ok (struct page *page, unsigned long order)
65 {
66 unsigned long index, page_idx, mask, flags;
67 free_area_t *area;
68 struct page *base;
69 zone_t *zone;
70
71 if (page->buffers)
72 BUG();
73 if (page->mapping)
74 BUG();
75 if (!VALID_PAGE(page))
76 BUG();
77 if (PageSwapCache(page))
78 BUG();
79 if (PageLocked(page))
80 BUG();
81 if (PageDecrAfter(page))
82 BUG();
83 if (PageActive(page))
84 BUG();
85 if (PageInactiveDirty(page))
86 BUG();
87 if (PageInactiveClean(page))
88 BUG();
89
90 page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty));
91 page->age = PAGE_AGE_START;
92
93 zone = page->zone;
94
95 mask = (~0UL) << order;
96 base = mem_map + zone->offset;
97 page_idx = page - base;
98 if (page_idx & ~mask)
99 BUG();
100 index = page_idx >> (1 + order);
101
102 area = zone->free_area + order;
103
104 spin_lock_irqsave(&zone->lock, flags);
105
106 zone->free_pages -= mask;
107
108 while (mask + (1 << (MAX_ORDER-1))) {
109 struct page *buddy1, *buddy2;
110
111 if (area >= zone->free_area + MAX_ORDER)
112 BUG();
113 if (!test_and_change_bit(index, area->map))
114 /*
115 * the buddy page is still allocated.
116 */
117 break;
118 /*
119 * Move the buddy up one level.
120 */
121 buddy1 = base + (page_idx ^ -mask);
122 buddy2 = base + page_idx;
123 if (BAD_RANGE(zone,buddy1))
124 BUG();
125 if (BAD_RANGE(zone,buddy2))
126 BUG();
127
128 memlist_del(&buddy1->list);
129 mask <<= 1;
130 area++;
131 index >>= 1;
132 page_idx &= mask;
133 }
134 memlist_add_head(&(base + page_idx)->list, &area->free_list);
135
136 spin_unlock_irqrestore(&zone->lock, flags);
137
138 /*
139 * We don't want to protect this variable from race conditions
140 * since it's nothing important, but we do want to make sure
141 * it never gets negative.
142 */
143 if (memory_pressure > NR_CPUS)
144 memory_pressure--;
145 }
146
147 #define MARK_USED(index, order, area) \
148 change_bit((index) >> (1+(order)), (area)->map)
149
150 static inline struct page * expand (zone_t *zone, struct page *page,
151 unsigned long index, int low, int high, free_area_t * area)
152 {
153 unsigned long size = 1 << high;
154
155 while (high > low) {
156 if (BAD_RANGE(zone,page))
157 BUG();
158 area--;
159 high--;
160 size >>= 1;
161 memlist_add_head(&(page)->list, &(area)->free_list);
162 MARK_USED(index, high, area);
163 index += size;
164 page += size;
165 }
166 if (BAD_RANGE(zone,page))
167 BUG();
168 return page;
169 }
170
171 static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned long order));
172 static struct page * rmqueue(zone_t *zone, unsigned long order)
173 {
174 free_area_t * area = zone->free_area + order;
175 unsigned long curr_order = order;
176 struct list_head *head, *curr;
177 unsigned long flags;
178 struct page *page;
179
180 spin_lock_irqsave(&zone->lock, flags);
181 do {
182 head = &area->free_list;
183 curr = memlist_next(head);
184
185 if (curr != head) {
186 unsigned int index;
187
188 page = memlist_entry(curr, struct page, list);
189 if (BAD_RANGE(zone,page))
190 BUG();
191 memlist_del(curr);
192 index = (page - mem_map) - zone->offset;
193 MARK_USED(index, curr_order, area);
194 zone->free_pages -= 1 << order;
195
196 page = expand(zone, page, index, order, curr_order, area);
197 spin_unlock_irqrestore(&zone->lock, flags);
198
199 set_page_count(page, 1);
200 if (BAD_RANGE(zone,page))
201 BUG();
202 DEBUG_ADD_PAGE
203 return page;
204 }
205 curr_order++;
206 area++;
207 } while (curr_order < MAX_ORDER);
208 spin_unlock_irqrestore(&zone->lock, flags);
209
210 return NULL;
211 }
212
213 #define PAGES_MIN 0
214 #define PAGES_LOW 1
215 #define PAGES_HIGH 2
216
217 /*
218 * This function does the dirty work for __alloc_pages
219 * and is separated out to keep the code size smaller.
220 * (suggested by Davem at 1:30 AM, typed by Rik at 6 AM)
221 */
222 static struct page * __alloc_pages_limit(zonelist_t *zonelist,
223 unsigned long order, int limit, int direct_reclaim)
224 {
225 zone_t **zone = zonelist->zones;
226
227 for (;;) {
228 zone_t *z = *(zone++);
229 unsigned long water_mark;
230
231 if (!z)
232 break;
233 if (!z->size)
234 BUG();
235
236 /*
237 * We allocate if the number of free + inactive_clean
238 * pages is above the watermark.
239 */
240 switch (limit) {
241 default:
242 case PAGES_MIN:
243 water_mark = z->pages_min;
244 break;
245 case PAGES_LOW:
246 water_mark = z->pages_low;
247 break;
248 case PAGES_HIGH:
249 water_mark = z->pages_high;
250 }
251
252 if (z->free_pages + z->inactive_clean_pages > water_mark) {
253 struct page *page = NULL;
254 /* If possible, reclaim a page directly. */
255 if (direct_reclaim && z->free_pages < z->pages_min + 8)
256 page = reclaim_page(z);
257 /* If that fails, fall back to rmqueue. */
258 if (!page)
259 page = rmqueue(z, order);
260 if (page)
261 return page;
262 }
263 }
264
265 /* Found nothing. */
266 return NULL;
267 }
268
269
270 /*
271 * This is the 'heart' of the zoned buddy allocator:
272 */
273 struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
274 {
275 zone_t **zone;
276 int direct_reclaim = 0;
277 unsigned int gfp_mask = zonelist->gfp_mask;
278 struct page * page;
279
280 /*
281 * Allocations put pressure on the VM subsystem.
282 */
283 memory_pressure++;
284
285 /*
286 * (If anyone calls gfp from interrupts nonatomically then it
287 * will sooner or later tripped up by a schedule().)
288 *
289 * We are falling back to lower-level zones if allocation
290 * in a higher zone fails.
291 */
292
293 /*
294 * Can we take pages directly from the inactive_clean
295 * list?
296 */
297 if (order == 0 && (gfp_mask & __GFP_WAIT) &&
298 !(current->flags & PF_MEMALLOC))
299 direct_reclaim = 1;
300
301 /*
302 * If we are about to get low on free pages and we also have
303 * an inactive page shortage, wake up kswapd.
304 */
305 if (inactive_shortage() > inactive_target / 2 && free_shortage())
306 wakeup_kswapd(0);
307 /*
308 * If we are about to get low on free pages and cleaning
309 * the inactive_dirty pages would fix the situation,
310 * wake up bdflush.
311 */
312 else if (free_shortage() && nr_inactive_dirty_pages > free_shortage()
313 && nr_inactive_dirty_pages >= freepages.high)
314 wakeup_bdflush(0);
315
316 try_again:
317 /*
318 * First, see if we have any zones with lots of free memory.
319 *
320 * We allocate free memory first because it doesn't contain
321 * any data ... DUH!
322 */
323 zone = zonelist->zones;
324 for (;;) {
325 zone_t *z = *(zone++);
326 if (!z)
327 break;
328 if (!z->size)
329 BUG();
330
331 if (z->free_pages >= z->pages_low) {
332 page = rmqueue(z, order);
333 if (page)
334 return page;
335 } else if (z->free_pages < z->pages_min &&
336 waitqueue_active(&kreclaimd_wait)) {
337 wake_up_interruptible(&kreclaimd_wait);
338 }
339 }
340
341 /*
342 * Try to allocate a page from a zone with a HIGH
343 * amount of free + inactive_clean pages.
344 *
345 * If there is a lot of activity, inactive_target
346 * will be high and we'll have a good chance of
347 * finding a page using the HIGH limit.
348 */
349 page = __alloc_pages_limit(zonelist, order, PAGES_HIGH, direct_reclaim);
350 if (page)
351 return page;
352
353 /*
354 * Then try to allocate a page from a zone with more
355 * than zone->pages_low free + inactive_clean pages.
356 *
357 * When the working set is very large and VM activity
358 * is low, we're most likely to have our allocation
359 * succeed here.
360 */
361 page = __alloc_pages_limit(zonelist, order, PAGES_LOW, direct_reclaim);
362 if (page)
363 return page;
364
365 /*
366 * OK, none of the zones on our zonelist has lots
367 * of pages free.
368 *
369 * We wake up kswapd, in the hope that kswapd will
370 * resolve this situation before memory gets tight.
371 *
372 * We also yield the CPU, because that:
373 * - gives kswapd a chance to do something
374 * - slows down allocations, in particular the
375 * allocations from the fast allocator that's
376 * causing the problems ...
377 * - ... which minimises the impact the "bad guys"
378 * have on the rest of the system
379 * - if we don't have __GFP_IO set, kswapd may be
380 * able to free some memory we can't free ourselves
381 */
382 wakeup_kswapd(0);
383 if (gfp_mask & __GFP_WAIT) {
384 __set_current_state(TASK_RUNNING);
385 current->policy |= SCHED_YIELD;
386 schedule();
387 }
388
389 /*
390 * After waking up kswapd, we try to allocate a page
391 * from any zone which isn't critical yet.
392 *
393 * Kswapd should, in most situations, bring the situation
394 * back to normal in no time.
395 */
396 page = __alloc_pages_limit(zonelist, order, PAGES_MIN, direct_reclaim);
397 if (page)
398 return page;
399
400 /*
401 * Damn, we didn't succeed.
402 *
403 * This can be due to 2 reasons:
404 * - we're doing a higher-order allocation
405 * --> move pages to the free list until we succeed
406 * - we're /really/ tight on memory
407 * --> wait on the kswapd waitqueue until memory is freed
408 */
409 if (!(current->flags & PF_MEMALLOC)) {
410 /*
411 * Are we dealing with a higher order allocation?
412 *
413 * Move pages from the inactive_clean to the free list
414 * in the hope of creating a large, physically contiguous
415 * piece of free memory.
416 */
417 if (order > 0 && (gfp_mask & __GFP_WAIT)) {
418 zone = zonelist->zones;
419 /* First, clean some dirty pages. */
420 current->flags |= PF_MEMALLOC;
421 page_launder(gfp_mask, 1);
422 current->flags &= ~PF_MEMALLOC;
423 for (;;) {
424 zone_t *z = *(zone++);
425 if (!z)
426 break;
427 if (!z->size)
428 continue;
429 while (z->inactive_clean_pages) {
430 struct page * page;
431 /* Move one page to the free list. */
432 page = reclaim_page(z);
433 if (!page)
434 break;
435 __free_page(page);
436 /* Try if the allocation succeeds. */
437 page = rmqueue(z, order);
438 if (page)
439 return page;
440 }
441 }
442 }
443 /*
444 * When we arrive here, we are really tight on memory.
445 *
446 * We wake up kswapd and sleep until kswapd wakes us
447 * up again. After that we loop back to the start.
448 *
449 * We have to do this because something else might eat
450 * the memory kswapd frees for us and we need to be
451 * reliable. Note that we don't loop back for higher
452 * order allocations since it is possible that kswapd
453 * simply cannot free a large enough contiguous area
454 * of memory *ever*.
455 */
456 if ((gfp_mask & (__GFP_WAIT|__GFP_IO)) == (__GFP_WAIT|__GFP_IO)) {
457 wakeup_kswapd(1);
458 memory_pressure++;
459 if (!order)
460 goto try_again;
461 /*
462 * If __GFP_IO isn't set, we can't wait on kswapd because
463 * kswapd just might need some IO locks /we/ are holding ...
464 *
465 * SUBTLE: The scheduling point above makes sure that
466 * kswapd does get the chance to free memory we can't
467 * free ourselves...
468 */
469 } else if (gfp_mask & __GFP_WAIT) {
470 try_to_free_pages(gfp_mask);
471 memory_pressure++;
472 if (!order)
473 goto try_again;
474 }
475
476 }
477
478 /*
479 * Final phase: allocate anything we can!
480 *
481 * Higher order allocations, GFP_ATOMIC allocations and
482 * recursive allocations (PF_MEMALLOC) end up here.
483 *
484 * Only recursive allocations can use the very last pages
485 * in the system, otherwise it would be just too easy to
486 * deadlock the system...
487 */
488 zone = zonelist->zones;
489 for (;;) {
490 zone_t *z = *(zone++);
491 struct page * page = NULL;
492 if (!z)
493 break;
494 if (!z->size)
495 BUG();
496
497 /*
498 * SUBTLE: direct_reclaim is only possible if the task
499 * becomes PF_MEMALLOC while looping above. This will
500 * happen when the OOM killer selects this task for
501 * instant execution...
502 */
503 if (direct_reclaim) {
504 page = reclaim_page(z);
505 if (page)
506 return page;
507 }
508
509 /* XXX: is pages_min/4 a good amount to reserve for this? */
510 if (z->free_pages < z->pages_min / 4 &&
511 !(current->flags & PF_MEMALLOC))
512 continue;
513 page = rmqueue(z, order);
514 if (page)
515 return page;
516 }
517
518 /* No luck.. */
519 printk(KERN_ERR "__alloc_pages: %lu-order allocation failed.\n", order);
520 return NULL;
521 }
522
523 /*
524 * Common helper functions.
525 */
526 unsigned long __get_free_pages(int gfp_mask, unsigned long order)
527 {
528 struct page * page;
529
530 page = alloc_pages(gfp_mask, order);
531 if (!page)
532 return 0;
533 return (unsigned long) page_address(page);
534 }
535
536 unsigned long get_zeroed_page(int gfp_mask)
537 {
538 struct page * page;
539
540 page = alloc_pages(gfp_mask, 0);
541 if (page) {
542 void *address = page_address(page);
543 clear_page(address);
544 return (unsigned long) address;
545 }
546 return 0;
547 }
548
549 void __free_pages(struct page *page, unsigned long order)
550 {
551 if (!PageReserved(page) && put_page_testzero(page))
552 __free_pages_ok(page, order);
553 }
554
555 void free_pages(unsigned long addr, unsigned long order)
556 {
557 struct page *fpage;
558
559 #ifdef CONFIG_DISCONTIGMEM
560 if (addr == 0) return;
561 #endif
562 fpage = virt_to_page(addr);
563 if (VALID_PAGE(fpage))
564 __free_pages(fpage, order);
565 }
566
567 /*
568 * Total amount of free (allocatable) RAM:
569 */
570 unsigned int nr_free_pages (void)
571 {
572 unsigned int sum;
573 zone_t *zone;
574 pg_data_t *pgdat = pgdat_list;
575
576 sum = 0;
577 while (pgdat) {
578 for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++)
579 sum += zone->free_pages;
580 pgdat = pgdat->node_next;
581 }
582 return sum;
583 }
584
585 /*
586 * Total amount of inactive_clean (allocatable) RAM:
587 */
588 unsigned int nr_inactive_clean_pages (void)
589 {
590 unsigned int sum;
591 zone_t *zone;
592 pg_data_t *pgdat = pgdat_list;
593
594 sum = 0;
595 while (pgdat) {
596 for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++)
597 sum += zone->inactive_clean_pages;
598 pgdat = pgdat->node_next;
599 }
600 return sum;
601 }
602
603 /*
604 * Amount of free RAM allocatable as buffer memory:
605 */
606 unsigned int nr_free_buffer_pages (void)
607 {
608 unsigned int sum;
609
610 sum = nr_free_pages();
611 sum += nr_inactive_clean_pages();
612 sum += nr_inactive_dirty_pages;
613
614 /*
615 * Keep our write behind queue filled, even if
616 * kswapd lags a bit right now.
617 */
618 if (sum < freepages.high + inactive_target)
619 sum = freepages.high + inactive_target;
620 /*
621 * We don't want dirty page writebehind to put too
622 * much pressure on the working set, but we want it
623 * to be possible to have some dirty pages in the
624 * working set without upsetting the writebehind logic.
625 */
626 sum += nr_active_pages >> 4;
627
628 return sum;
629 }
630
631 #if CONFIG_HIGHMEM
632 unsigned int nr_free_highpages (void)
633 {
634 pg_data_t *pgdat = pgdat_list;
635 unsigned int pages = 0;
636
637 while (pgdat) {
638 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
639 pgdat = pgdat->node_next;
640 }
641 return pages;
642 }
643 #endif
644
645 /*
646 * Show free area list (used inside shift_scroll-lock stuff)
647 * We also calculate the percentage fragmentation. We do this by counting the
648 * memory on each free list with the exception of the first item on the list.
649 */
650 void show_free_areas_core(pg_data_t *pgdat)
651 {
652 unsigned long order;
653 unsigned type;
654
655 printk("Free pages: %6dkB (%6dkB HighMem)\n",
656 nr_free_pages() << (PAGE_SHIFT-10),
657 nr_free_highpages() << (PAGE_SHIFT-10));
658
659 printk("( Active: %d, inactive_dirty: %d, inactive_clean: %d, free: %d (%d %d %d) )\n",
660 nr_active_pages,
661 nr_inactive_dirty_pages,
662 nr_inactive_clean_pages(),
663 nr_free_pages(),
664 freepages.min,
665 freepages.low,
666 freepages.high);
667
668 for (type = 0; type < MAX_NR_ZONES; type++) {
669 struct list_head *head, *curr;
670 zone_t *zone = pgdat->node_zones + type;
671 unsigned long nr, total, flags;
672
673 total = 0;
674 if (zone->size) {
675 spin_lock_irqsave(&zone->lock, flags);
676 for (order = 0; order < MAX_ORDER; order++) {
677 head = &(zone->free_area + order)->free_list;
678 curr = head;
679 nr = 0;
680 for (;;) {
681 curr = memlist_next(curr);
682 if (curr == head)
683 break;
684 nr++;
685 }
686 total += nr * (1 << order);
687 printk("%lu*%lukB ", nr,
688 (PAGE_SIZE>>10) << order);
689 }
690 spin_unlock_irqrestore(&zone->lock, flags);
691 }
692 printk("= %lukB)\n", total * (PAGE_SIZE>>10));
693 }
694
695 #ifdef SWAP_CACHE_INFO
696 show_swap_cache_info();
697 #endif
698 }
699
700 void show_free_areas(void)
701 {
702 show_free_areas_core(pgdat_list);
703 }
704
705 /*
706 * Builds allocation fallback zone lists.
707 */
708 static inline void build_zonelists(pg_data_t *pgdat)
709 {
710 int i, j, k;
711
712 for (i = 0; i < NR_GFPINDEX; i++) {
713 zonelist_t *zonelist;
714 zone_t *zone;
715
716 zonelist = pgdat->node_zonelists + i;
717 memset(zonelist, 0, sizeof(*zonelist));
718
719 zonelist->gfp_mask = i;
720 j = 0;
721 k = ZONE_NORMAL;
722 if (i & __GFP_HIGHMEM)
723 k = ZONE_HIGHMEM;
724 if (i & __GFP_DMA)
725 k = ZONE_DMA;
726
727 switch (k) {
728 default:
729 BUG();
730 /*
731 * fallthrough:
732 */
733 case ZONE_HIGHMEM:
734 zone = pgdat->node_zones + ZONE_HIGHMEM;
735 if (zone->size) {
736 #ifndef CONFIG_HIGHMEM
737 BUG();
738 #endif
739 zonelist->zones[j++] = zone;
740 }
741 case ZONE_NORMAL:
742 zone = pgdat->node_zones + ZONE_NORMAL;
743 if (zone->size)
744 zonelist->zones[j++] = zone;
745 case ZONE_DMA:
746 zone = pgdat->node_zones + ZONE_DMA;
747 if (zone->size)
748 zonelist->zones[j++] = zone;
749 }
750 zonelist->zones[j++] = NULL;
751 }
752 }
753
754 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
755
756 /*
757 * Set up the zone data structures:
758 * - mark all pages reserved
759 * - mark all memory queues empty
760 * - clear the memory bitmaps
761 */
762 void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
763 unsigned long *zones_size, unsigned long zone_start_paddr,
764 unsigned long *zholes_size, struct page *lmem_map)
765 {
766 struct page *p;
767 unsigned long i, j;
768 unsigned long map_size;
769 unsigned long totalpages, offset, realtotalpages;
770 unsigned int cumulative = 0;
771
772 totalpages = 0;
773 for (i = 0; i < MAX_NR_ZONES; i++) {
774 unsigned long size = zones_size[i];
775 totalpages += size;
776 }
777 realtotalpages = totalpages;
778 if (zholes_size)
779 for (i = 0; i < MAX_NR_ZONES; i++)
780 realtotalpages -= zholes_size[i];
781
782 printk("On node %d totalpages: %lu\n", nid, realtotalpages);
783
784 memlist_init(&active_list);
785 memlist_init(&inactive_dirty_list);
786
787 /*
788 * Some architectures (with lots of mem and discontinous memory
789 * maps) have to search for a good mem_map area:
790 * For discontigmem, the conceptual mem map array starts from
791 * PAGE_OFFSET, we need to align the actual array onto a mem map
792 * boundary, so that MAP_NR works.
793 */
794 map_size = (totalpages + 1)*sizeof(struct page);
795 if (lmem_map == (struct page *)0) {
796 lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size);
797 lmem_map = (struct page *)(PAGE_OFFSET +
798 MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET));
799 }
800 *gmap = pgdat->node_mem_map = lmem_map;
801 pgdat->node_size = totalpages;
802 pgdat->node_start_paddr = zone_start_paddr;
803 pgdat->node_start_mapnr = (lmem_map - mem_map);
804
805 /*
806 * Initially all pages are reserved - free ones are freed
807 * up by free_all_bootmem() once the early boot process is
808 * done.
809 */
810 for (p = lmem_map; p < lmem_map + totalpages; p++) {
811 set_page_count(p, 0);
812 SetPageReserved(p);
813 init_waitqueue_head(&p->wait);
814 memlist_init(&p->list);
815 }
816
817 offset = lmem_map - mem_map;
818 for (j = 0; j < MAX_NR_ZONES; j++) {
819 zone_t *zone = pgdat->node_zones + j;
820 unsigned long mask;
821 unsigned long size, realsize;
822
823 realsize = size = zones_size[j];
824 if (zholes_size)
825 realsize -= zholes_size[j];
826
827 printk("zone(%lu): %lu pages.\n", j, size);
828 zone->size = size;
829 zone->name = zone_names[j];
830 zone->lock = SPIN_LOCK_UNLOCKED;
831 zone->zone_pgdat = pgdat;
832 zone->free_pages = 0;
833 zone->inactive_clean_pages = 0;
834 zone->inactive_dirty_pages = 0;
835 memlist_init(&zone->inactive_clean_list);
836 if (!size)
837 continue;
838
839 zone->offset = offset;
840 cumulative += size;
841 mask = (realsize / zone_balance_ratio[j]);
842 if (mask < zone_balance_min[j])
843 mask = zone_balance_min[j];
844 else if (mask > zone_balance_max[j])
845 mask = zone_balance_max[j];
846 zone->pages_min = mask;
847 zone->pages_low = mask*2;
848 zone->pages_high = mask*3;
849 /*
850 * Add these free targets to the global free target;
851 * we have to be SURE that freepages.high is higher
852 * than SUM [zone->pages_min] for all zones, otherwise
853 * we may have bad bad problems.
854 *
855 * This means we cannot make the freepages array writable
856 * in /proc, but have to add a separate extra_free_target
857 * for people who require it to catch load spikes in eg.
858 * gigabit ethernet routing...
859 */
860 freepages.min += mask;
861 freepages.low += mask*2;
862 freepages.high += mask*3;
863 zone->zone_mem_map = mem_map + offset;
864 zone->zone_start_mapnr = offset;
865 zone->zone_start_paddr = zone_start_paddr;
866
867 for (i = 0; i < size; i++) {
868 struct page *page = mem_map + offset + i;
869 page->zone = zone;
870 if (j != ZONE_HIGHMEM) {
871 page->virtual = __va(zone_start_paddr);
872 zone_start_paddr += PAGE_SIZE;
873 }
874 }
875
876 offset += size;
877 mask = -1;
878 for (i = 0; i < MAX_ORDER; i++) {
879 unsigned long bitmap_size;
880
881 memlist_init(&zone->free_area[i].free_list);
882 mask += mask;
883 size = (size + ~mask) & mask;
884 bitmap_size = size >> i;
885 bitmap_size = (bitmap_size + 7) >> 3;
886 bitmap_size = LONG_ALIGN(bitmap_size);
887 zone->free_area[i].map =
888 (unsigned int *) alloc_bootmem_node(pgdat, bitmap_size);
889 }
890 }
891 build_zonelists(pgdat);
892 }
893
894 void __init free_area_init(unsigned long *zones_size)
895 {
896 free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0);
897 }
898
899 static int __init setup_mem_frac(char *str)
900 {
901 int j = 0;
902
903 while (get_option(&str, &zone_balance_ratio[j++]) == 2);
904 printk("setup_mem_frac: ");
905 for (j = 0; j < MAX_NR_ZONES; j++) printk("%d ", zone_balance_ratio[j]);
906 printk("\n");
907 return 1;
908 }
909
910 __setup("memfrac=", setup_mem_frac);
911
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.