1 /*
2 * linux/mm/slab.c
3 * Written by Mark Hemment, 1996/97.
4 * (markhe@nextd.demon.co.uk)
5 *
6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
7 *
8 * Major cleanup, different bufctl logic, per-cpu arrays
9 * (c) 2000 Manfred Spraul
10 *
11 * An implementation of the Slab Allocator as described in outline in;
12 * UNIX Internals: The New Frontiers by Uresh Vahalia
13 * Pub: Prentice Hall ISBN 0-13-101908-2
14 * or with a little more detail in;
15 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
16 * Jeff Bonwick (Sun Microsystems).
17 * Presented at: USENIX Summer 1994 Technical Conference
18 *
19 *
20 * The memory is organized in caches, one cache for each object type.
21 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
22 * Each cache consists out of many slabs (they are small (usually one
23 * page long) and always contiguous), and each slab contains multiple
24 * initialized objects.
25 *
26 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
27 * normal). If you need a special memory type, then must create a new
28 * cache for that memory type.
29 *
30 * In order to reduce fragmentation, the slabs are sorted in 3 groups:
31 * full slabs with 0 free objects
32 * partial slabs
33 * empty slabs with no allocated objects
34 *
35 * If partial slabs exist, then new allocations come from these slabs,
36 * otherwise from empty slabs or new slabs are allocated.
37 *
38 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
39 * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
40 *
41 * On SMP systems, each cache has a short per-cpu head array, most allocs
42 * and frees go into that array, and if that array overflows, then 1/2
43 * of the entries in the array are given back into the global cache.
44 * This reduces the number of spinlock operations.
45 *
46 * The c_cpuarray may not be read with enabled local interrupts.
47 *
48 * SMP synchronization:
49 * constructors and destructors are called without any locking.
50 * Several members in kmem_cache_t and slab_t never change, they
51 * are accessed without any locking.
52 * The per-cpu arrays are never accessed from the wrong cpu, no locking.
53 * The non-constant members are protected with a per-cache irq spinlock.
54 *
55 * Further notes from the original documentation:
56 *
57 * 11 April '97. Started multi-threading - markhe
58 * The global cache-chain is protected by the semaphore 'cache_chain_sem'.
59 * The sem is only needed when accessing/extending the cache-chain, which
60 * can never happen inside an interrupt (kmem_cache_create(),
61 * kmem_cache_shrink() and kmem_cache_reap()).
62 *
63 * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
64 * maybe be sleeping and therefore not holding the semaphore/lock), the
65 * growing field is used. This also prevents reaping from a cache.
66 *
67 * At present, each engine can be growing a cache. This should be blocked.
68 *
69 */
70
71 #include <linux/config.h>
72 #include <linux/slab.h>
73 #include <linux/interrupt.h>
74 #include <linux/init.h>
75 #include <asm/uaccess.h>
76
77 /*
78 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
79 * SLAB_RED_ZONE & SLAB_POISON.
80 * 0 for faster, smaller code (especially in the critical paths).
81 *
82 * STATS - 1 to collect stats for /proc/slabinfo.
83 * 0 for faster, smaller code (especially in the critical paths).
84 *
85 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
86 */
87
88 #define DEBUG 0
89 #define STATS 0
90 #define FORCED_DEBUG 0
91
92 /*
93 * Parameters for kmem_cache_reap
94 */
95 #define REAP_SCANLEN 10
96 #define REAP_PERFECT 10
97
98 /* Shouldn't this be in a header file somewhere? */
99 #define BYTES_PER_WORD sizeof(void *)
100
101 /* Legal flag mask for kmem_cache_create(). */
102 #if DEBUG
103 # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
104 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
105 SLAB_NO_REAP | SLAB_CACHE_DMA)
106 #else
107 # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA)
108 #endif
109
110 /*
111 * kmem_bufctl_t:
112 *
113 * Bufctl's are used for linking objs within a slab
114 * linked offsets.
115 *
116 * This implementaion relies on "struct page" for locating the cache &
117 * slab an object belongs to.
118 * This allows the bufctl structure to be small (one int), but limits
119 * the number of objects a slab (not a cache) can contain when off-slab
120 * bufctls are used. The limit is the size of the largest general cache
121 * that does not use off-slab slabs.
122 * For 32bit archs with 4 kB pages, is this 56.
123 * This is not serious, as it is only for large objects, when it is unwise
124 * to have too many per slab.
125 * Note: This limit can be raised by introducing a general cache whose size
126 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
127 */
128
129 #define BUFCTL_END 0xffffFFFF
130 #define SLAB_LIMIT 0xffffFFFE
131 typedef unsigned int kmem_bufctl_t;
132
133 /* Max number of objs-per-slab for caches which use off-slab slabs.
134 * Needed to avoid a possible looping condition in kmem_cache_grow().
135 */
136 static unsigned long offslab_limit;
137
138 /*
139 * slab_t
140 *
141 * Manages the objs in a slab. Placed either at the beginning of mem allocated
142 * for a slab, or allocated from an general cache.
143 * Slabs are chained into one ordered list: fully used, partial, then fully
144 * free slabs.
145 */
146 typedef struct slab_s {
147 struct list_head list;
148 unsigned long colouroff;
149 void *s_mem; /* including colour offset */
150 unsigned int inuse; /* num of objs active in slab */
151 kmem_bufctl_t free;
152 } slab_t;
153
154 #define slab_bufctl(slabp) \
155 ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
156
157 /*
158 * cpucache_t
159 *
160 * Per cpu structures
161 * The limit is stored in the per-cpu structure to reduce the data cache
162 * footprint.
163 */
164 typedef struct cpucache_s {
165 unsigned int avail;
166 unsigned int limit;
167 } cpucache_t;
168
169 #define cc_entry(cpucache) \
170 ((void **)(((cpucache_t*)cpucache)+1))
171 #define cc_data(cachep) \
172 ((cachep)->cpudata[smp_processor_id()])
173 /*
174 * kmem_cache_t
175 *
176 * manages a cache.
177 */
178
179 #define CACHE_NAMELEN 20 /* max name length for a slab cache */
180
181 struct kmem_cache_s {
182 /* 1) each alloc & free */
183 /* full, partial first, then free */
184 struct list_head slabs;
185 struct list_head *firstnotfull;
186 unsigned int objsize;
187 unsigned int flags; /* constant flags */
188 unsigned int num; /* # of objs per slab */
189 spinlock_t spinlock;
190 #ifdef CONFIG_SMP
191 unsigned int batchcount;
192 #endif
193
194 /* 2) slab additions /removals */
195 /* order of pgs per slab (2^n) */
196 unsigned int gfporder;
197
198 /* force GFP flags, e.g. GFP_DMA */
199 unsigned int gfpflags;
200
201 size_t colour; /* cache colouring range */
202 unsigned int colour_off; /* colour offset */
203 unsigned int colour_next; /* cache colouring */
204 kmem_cache_t *slabp_cache;
205 unsigned int growing;
206 unsigned int dflags; /* dynamic flags */
207
208 /* constructor func */
209 void (*ctor)(void *, kmem_cache_t *, unsigned long);
210
211 /* de-constructor func */
212 void (*dtor)(void *, kmem_cache_t *, unsigned long);
213
214 unsigned long failures;
215
216 /* 3) cache creation/removal */
217 char name[CACHE_NAMELEN];
218 struct list_head next;
219 #ifdef CONFIG_SMP
220 /* 4) per-cpu data */
221 cpucache_t *cpudata[NR_CPUS];
222 #endif
223 #if STATS
224 unsigned long num_active;
225 unsigned long num_allocations;
226 unsigned long high_mark;
227 unsigned long grown;
228 unsigned long reaped;
229 unsigned long errors;
230 #ifdef CONFIG_SMP
231 atomic_t allochit;
232 atomic_t allocmiss;
233 atomic_t freehit;
234 atomic_t freemiss;
235 #endif
236 #endif
237 };
238
239 /* internal c_flags */
240 #define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
241 #define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
242
243 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
244 #define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
245
246 #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
247 #define OPTIMIZE(x) ((x)->flags & CFLGS_OPTIMIZE)
248 #define GROWN(x) ((x)->dlags & DFLGS_GROWN)
249
250 #if STATS
251 #define STATS_INC_ACTIVE(x) ((x)->num_active++)
252 #define STATS_DEC_ACTIVE(x) ((x)->num_active--)
253 #define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
254 #define STATS_INC_GROWN(x) ((x)->grown++)
255 #define STATS_INC_REAPED(x) ((x)->reaped++)
256 #define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \
257 (x)->high_mark = (x)->num_active; \
258 } while (0)
259 #define STATS_INC_ERR(x) ((x)->errors++)
260 #else
261 #define STATS_INC_ACTIVE(x) do { } while (0)
262 #define STATS_DEC_ACTIVE(x) do { } while (0)
263 #define STATS_INC_ALLOCED(x) do { } while (0)
264 #define STATS_INC_GROWN(x) do { } while (0)
265 #define STATS_INC_REAPED(x) do { } while (0)
266 #define STATS_SET_HIGH(x) do { } while (0)
267 #define STATS_INC_ERR(x) do { } while (0)
268 #endif
269
270 #if STATS && defined(CONFIG_SMP)
271 #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
272 #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
273 #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
274 #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
275 #else
276 #define STATS_INC_ALLOCHIT(x) do { } while (0)
277 #define STATS_INC_ALLOCMISS(x) do { } while (0)
278 #define STATS_INC_FREEHIT(x) do { } while (0)
279 #define STATS_INC_FREEMISS(x) do { } while (0)
280 #endif
281
282 #if DEBUG
283 /* Magic nums for obj red zoning.
284 * Placed in the first word before and the first word after an obj.
285 */
286 #define RED_MAGIC1 0x5A2CF071UL /* when obj is active */
287 #define RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
288
289 /* ...and for poisoning */
290 #define POISON_BYTE 0x5a /* byte value for poisoning */
291 #define POISON_END 0xa5 /* end-byte of poisoning */
292
293 #endif
294
295 /* maximum size of an obj (in 2^order pages) */
296 #define MAX_OBJ_ORDER 5 /* 32 pages */
297
298 /*
299 * Do not go above this order unless 0 objects fit into the slab.
300 */
301 #define BREAK_GFP_ORDER_HI 2
302 #define BREAK_GFP_ORDER_LO 1
303 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
304
305 /*
306 * Absolute limit for the gfp order
307 */
308 #define MAX_GFP_ORDER 5 /* 32 pages */
309
310
311 /* Macros for storing/retrieving the cachep and or slab from the
312 * global 'mem_map'. These are used to find the slab an obj belongs to.
313 * With kfree(), these are used to find the cache which an obj belongs to.
314 */
315 #define SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x))
316 #define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->list.next)
317 #define SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))
318 #define GET_PAGE_SLAB(pg) ((slab_t *)(pg)->list.prev)
319
320 /* Size description struct for general caches. */
321 typedef struct cache_sizes {
322 size_t cs_size;
323 kmem_cache_t *cs_cachep;
324 kmem_cache_t *cs_dmacachep;
325 } cache_sizes_t;
326
327 static cache_sizes_t cache_sizes[] = {
328 #if PAGE_SIZE == 4096
329 { 32, NULL, NULL},
330 #endif
331 { 64, NULL, NULL},
332 { 128, NULL, NULL},
333 { 256, NULL, NULL},
334 { 512, NULL, NULL},
335 { 1024, NULL, NULL},
336 { 2048, NULL, NULL},
337 { 4096, NULL, NULL},
338 { 8192, NULL, NULL},
339 { 16384, NULL, NULL},
340 { 32768, NULL, NULL},
341 { 65536, NULL, NULL},
342 {131072, NULL, NULL},
343 { 0, NULL, NULL}
344 };
345
346 /* internal cache of cache description objs */
347 static kmem_cache_t cache_cache = {
348 slabs: LIST_HEAD_INIT(cache_cache.slabs),
349 firstnotfull: &cache_cache.slabs,
350 objsize: sizeof(kmem_cache_t),
351 flags: SLAB_NO_REAP,
352 spinlock: SPIN_LOCK_UNLOCKED,
353 colour_off: L1_CACHE_BYTES,
354 name: "kmem_cache",
355 };
356
357 /* Guard access to the cache-chain. */
358 static struct semaphore cache_chain_sem;
359
360 /* Place maintainer for reaping. */
361 static kmem_cache_t *clock_searchp = &cache_cache;
362
363 #define cache_chain (cache_cache.next)
364
365 #ifdef CONFIG_SMP
366 /*
367 * chicken and egg problem: delay the per-cpu array allocation
368 * until the general caches are up.
369 */
370 static int g_cpucache_up;
371
372 static void enable_cpucache (kmem_cache_t *cachep);
373 static void enable_all_cpucaches (void);
374 #endif
375
376 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
377 static void kmem_cache_estimate (unsigned long gfporder, size_t size,
378 int flags, size_t *left_over, unsigned int *num)
379 {
380 int i;
381 size_t wastage = PAGE_SIZE<<gfporder;
382 size_t extra = 0;
383 size_t base = 0;
384
385 if (!(flags & CFLGS_OFF_SLAB)) {
386 base = sizeof(slab_t);
387 extra = sizeof(kmem_bufctl_t);
388 }
389 i = 0;
390 while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
391 i++;
392 if (i > 0)
393 i--;
394
395 if (i > SLAB_LIMIT)
396 i = SLAB_LIMIT;
397
398 *num = i;
399 wastage -= i*size;
400 wastage -= L1_CACHE_ALIGN(base+i*extra);
401 *left_over = wastage;
402 }
403
404 /* Initialisation - setup the `cache' cache. */
405 void __init kmem_cache_init(void)
406 {
407 size_t left_over;
408
409 init_MUTEX(&cache_chain_sem);
410 INIT_LIST_HEAD(&cache_chain);
411
412 kmem_cache_estimate(0, cache_cache.objsize, 0,
413 &left_over, &cache_cache.num);
414 if (!cache_cache.num)
415 BUG();
416
417 cache_cache.colour = left_over/cache_cache.colour_off;
418 cache_cache.colour_next = 0;
419 }
420
421
422 /* Initialisation - setup remaining internal and general caches.
423 * Called after the gfp() functions have been enabled, and before smp_init().
424 */
425 void __init kmem_cache_sizes_init(void)
426 {
427 cache_sizes_t *sizes = cache_sizes;
428 char name[20];
429 /*
430 * Fragmentation resistance on low memory - only use bigger
431 * page orders on machines with more than 32MB of memory.
432 */
433 if (num_physpages > (32 << 20) >> PAGE_SHIFT)
434 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
435 do {
436 /* For performance, all the general caches are L1 aligned.
437 * This should be particularly beneficial on SMP boxes, as it
438 * eliminates "false sharing".
439 * Note for systems short on memory removing the alignment will
440 * allow tighter packing of the smaller caches. */
441 sprintf(name,"size-%Zd",sizes->cs_size);
442 if (!(sizes->cs_cachep =
443 kmem_cache_create(name, sizes->cs_size,
444 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
445 BUG();
446 }
447
448 /* Inc off-slab bufctl limit until the ceiling is hit. */
449 if (!(OFF_SLAB(sizes->cs_cachep))) {
450 offslab_limit = sizes->cs_size-sizeof(slab_t);
451 offslab_limit /= 2;
452 }
453 sprintf(name, "size-%Zd(DMA)",sizes->cs_size);
454 sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0,
455 SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
456 if (!sizes->cs_dmacachep)
457 BUG();
458 sizes++;
459 } while (sizes->cs_size);
460 }
461
462 int __init kmem_cpucache_init(void)
463 {
464 #ifdef CONFIG_SMP
465 g_cpucache_up = 1;
466 enable_all_cpucaches();
467 #endif
468 return 0;
469 }
470
471 __initcall(kmem_cpucache_init);
472
473 /* Interface to system's page allocator. No need to hold the cache-lock.
474 */
475 static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
476 {
477 void *addr;
478
479 /*
480 * If we requested dmaable memory, we will get it. Even if we
481 * did not request dmaable memory, we might get it, but that
482 * would be relatively rare and ignorable.
483 */
484 flags |= cachep->gfpflags;
485 addr = (void*) __get_free_pages(flags, cachep->gfporder);
486 /* Assume that now we have the pages no one else can legally
487 * messes with the 'struct page's.
488 * However vm_scan() might try to test the structure to see if
489 * it is a named-page or buffer-page. The members it tests are
490 * of no interest here.....
491 */
492 return addr;
493 }
494
495 /* Interface to system's page release. */
496 static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
497 {
498 unsigned long i = (1<<cachep->gfporder);
499 struct page *page = virt_to_page(addr);
500
501 /* free_pages() does not clear the type bit - we do that.
502 * The pages have been unlinked from their cache-slab,
503 * but their 'struct page's might be accessed in
504 * vm_scan(). Shouldn't be a worry.
505 */
506 while (i--) {
507 PageClearSlab(page);
508 page++;
509 }
510 free_pages((unsigned long)addr, cachep->gfporder);
511 }
512
513 #if DEBUG
514 static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
515 {
516 int size = cachep->objsize;
517 if (cachep->flags & SLAB_RED_ZONE) {
518 addr += BYTES_PER_WORD;
519 size -= 2*BYTES_PER_WORD;
520 }
521 memset(addr, POISON_BYTE, size);
522 *(unsigned char *)(addr+size-1) = POISON_END;
523 }
524
525 static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
526 {
527 int size = cachep->objsize;
528 void *end;
529 if (cachep->flags & SLAB_RED_ZONE) {
530 addr += BYTES_PER_WORD;
531 size -= 2*BYTES_PER_WORD;
532 }
533 end = memchr(addr, POISON_END, size);
534 if (end != (addr+size-1))
535 return 1;
536 return 0;
537 }
538 #endif
539
540 /* Destroy all the objs in a slab, and release the mem back to the system.
541 * Before calling the slab must have been unlinked from the cache.
542 * The cache-lock is not held/needed.
543 */
544 static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
545 {
546 if (cachep->dtor
547 #if DEBUG
548 || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
549 #endif
550 ) {
551 int i;
552 for (i = 0; i < cachep->num; i++) {
553 void* objp = slabp->s_mem+cachep->objsize*i;
554 #if DEBUG
555 if (cachep->flags & SLAB_RED_ZONE) {
556 if (*((unsigned long*)(objp)) != RED_MAGIC1)
557 BUG();
558 if (*((unsigned long*)(objp + cachep->objsize
559 -BYTES_PER_WORD)) != RED_MAGIC1)
560 BUG();
561 objp += BYTES_PER_WORD;
562 }
563 #endif
564 if (cachep->dtor)
565 (cachep->dtor)(objp, cachep, 0);
566 #if DEBUG
567 if (cachep->flags & SLAB_RED_ZONE) {
568 objp -= BYTES_PER_WORD;
569 }
570 if ((cachep->flags & SLAB_POISON) &&
571 kmem_check_poison_obj(cachep, objp))
572 BUG();
573 #endif
574 }
575 }
576
577 kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
578 if (OFF_SLAB(cachep))
579 kmem_cache_free(cachep->slabp_cache, slabp);
580 }
581
582 /**
583 * kmem_cache_create - Create a cache.
584 * @name: A string which is used in /proc/slabinfo to identify this cache.
585 * @size: The size of objects to be created in this cache.
586 * @offset: The offset to use within the page.
587 * @flags: SLAB flags
588 * @ctor: A constructor for the objects.
589 * @dtor: A destructor for the objects.
590 *
591 * Returns a ptr to the cache on success, NULL on failure.
592 * Cannot be called within a int, but can be interrupted.
593 * The @ctor is run when new pages are allocated by the cache
594 * and the @dtor is run before the pages are handed back.
595 * The flags are
596 *
597 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
598 * to catch references to uninitialised memory.
599 *
600 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
601 * for buffer overruns.
602 *
603 * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
604 * memory pressure.
605 *
606 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
607 * cacheline. This can be beneficial if you're counting cycles as closely
608 * as davem.
609 */
610 kmem_cache_t *
611 kmem_cache_create (const char *name, size_t size, size_t offset,
612 unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
613 void (*dtor)(void*, kmem_cache_t *, unsigned long))
614 {
615 const char *func_nm = KERN_ERR "kmem_create: ";
616 size_t left_over, align, slab_size;
617 kmem_cache_t *cachep = NULL;
618
619 /*
620 * Sanity checks... these are all serious usage bugs.
621 */
622 if ((!name) ||
623 ((strlen(name) >= CACHE_NAMELEN - 1)) ||
624 in_interrupt() ||
625 (size < BYTES_PER_WORD) ||
626 (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
627 (dtor && !ctor) ||
628 (offset < 0 || offset > size))
629 BUG();
630
631 #if DEBUG
632 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
633 /* No constructor, but inital state check requested */
634 printk("%sNo con, but init state check requested - %s\n", func_nm, name);
635 flags &= ~SLAB_DEBUG_INITIAL;
636 }
637
638 if ((flags & SLAB_POISON) && ctor) {
639 /* request for poisoning, but we can't do that with a constructor */
640 printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
641 flags &= ~SLAB_POISON;
642 }
643 #if FORCED_DEBUG
644 if (size < (PAGE_SIZE>>3))
645 /*
646 * do not red zone large object, causes severe
647 * fragmentation.
648 */
649 flags |= SLAB_RED_ZONE;
650 if (!ctor)
651 flags |= SLAB_POISON;
652 #endif
653 #endif
654
655 /*
656 * Always checks flags, a caller might be expecting debug
657 * support which isn't available.
658 */
659 if (flags & ~CREATE_MASK)
660 BUG();
661
662 /* Get cache's description obj. */
663 cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
664 if (!cachep)
665 goto opps;
666 memset(cachep, 0, sizeof(kmem_cache_t));
667
668 /* Check that size is in terms of words. This is needed to avoid
669 * unaligned accesses for some archs when redzoning is used, and makes
670 * sure any on-slab bufctl's are also correctly aligned.
671 */
672 if (size & (BYTES_PER_WORD-1)) {
673 size += (BYTES_PER_WORD-1);
674 size &= ~(BYTES_PER_WORD-1);
675 printk("%sForcing size word alignment - %s\n", func_nm, name);
676 }
677
678 #if DEBUG
679 if (flags & SLAB_RED_ZONE) {
680 /*
681 * There is no point trying to honour cache alignment
682 * when redzoning.
683 */
684 flags &= ~SLAB_HWCACHE_ALIGN;
685 size += 2*BYTES_PER_WORD; /* words for redzone */
686 }
687 #endif
688 align = BYTES_PER_WORD;
689 if (flags & SLAB_HWCACHE_ALIGN)
690 align = L1_CACHE_BYTES;
691
692 /* Determine if the slab management is 'on' or 'off' slab. */
693 if (size >= (PAGE_SIZE>>3))
694 /*
695 * Size is large, assume best to place the slab management obj
696 * off-slab (should allow better packing of objs).
697 */
698 flags |= CFLGS_OFF_SLAB;
699
700 if (flags & SLAB_HWCACHE_ALIGN) {
701 /* Need to adjust size so that objs are cache aligned. */
702 /* Small obj size, can get at least two per cache line. */
703 /* FIXME: only power of 2 supported, was better */
704 while (size < align/2)
705 align /= 2;
706 size = (size+align-1)&(~(align-1));
707 }
708
709 /* Cal size (in pages) of slabs, and the num of objs per slab.
710 * This could be made much more intelligent. For now, try to avoid
711 * using high page-orders for slabs. When the gfp() funcs are more
712 * friendly towards high-order requests, this should be changed.
713 */
714 do {
715 unsigned int break_flag = 0;
716 cal_wastage:
717 kmem_cache_estimate(cachep->gfporder, size, flags,
718 &left_over, &cachep->num);
719 if (break_flag)
720 break;
721 if (cachep->gfporder >= MAX_GFP_ORDER)
722 break;
723 if (!cachep->num)
724 goto next;
725 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
726 /* Oops, this num of objs will cause problems. */
727 cachep->gfporder--;
728 break_flag++;
729 goto cal_wastage;
730 }
731
732 /*
733 * Large num of objs is good, but v. large slabs are currently
734 * bad for the gfp()s.
735 */
736 if (cachep->gfporder >= slab_break_gfp_order)
737 break;
738
739 if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
740 break; /* Acceptable internal fragmentation. */
741 next:
742 cachep->gfporder++;
743 } while (1);
744
745 if (!cachep->num) {
746 printk("kmem_cache_create: couldn't create cache %s.\n", name);
747 kmem_cache_free(&cache_cache, cachep);
748 cachep = NULL;
749 goto opps;
750 }
751 slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
752
753 /*
754 * If the slab has been placed off-slab, and we have enough space then
755 * move it on-slab. This is at the expense of any extra colouring.
756 */
757 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
758 flags &= ~CFLGS_OFF_SLAB;
759 left_over -= slab_size;
760 }
761
762 /* Offset must be a multiple of the alignment. */
763 offset += (align-1);
764 offset &= ~(align-1);
765 if (!offset)
766 offset = L1_CACHE_BYTES;
767 cachep->colour_off = offset;
768 cachep->colour = left_over/offset;
769
770 /* init remaining fields */
771 if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
772 flags |= CFLGS_OPTIMIZE;
773
774 cachep->flags = flags;
775 cachep->gfpflags = 0;
776 if (flags & SLAB_CACHE_DMA)
777 cachep->gfpflags |= GFP_DMA;
778 spin_lock_init(&cachep->spinlock);
779 cachep->objsize = size;
780 INIT_LIST_HEAD(&cachep->slabs);
781 cachep->firstnotfull = &cachep->slabs;
782
783 if (flags & CFLGS_OFF_SLAB)
784 cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
785 cachep->ctor = ctor;
786 cachep->dtor = dtor;
787 /* Copy name over so we don't have problems with unloaded modules */
788 strcpy(cachep->name, name);
789
790 #ifdef CONFIG_SMP
791 if (g_cpucache_up)
792 enable_cpucache(cachep);
793 #endif
794 /* Need the semaphore to access the chain. */
795 down(&cache_chain_sem);
796 {
797 struct list_head *p;
798
799 list_for_each(p, &cache_chain) {
800 kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
801
802 /* The name field is constant - no lock needed. */
803 if (!strcmp(pc->name, name))
804 BUG();
805 }
806 }
807
808 /* There is no reason to lock our new cache before we
809 * link it in - no one knows about it yet...
810 */
811 list_add(&cachep->next, &cache_chain);
812 up(&cache_chain_sem);
813 opps:
814 return cachep;
815 }
816
817 /*
818 * This check if the kmem_cache_t pointer is chained in the cache_cache
819 * list. -arca
820 */
821 static int is_chained_kmem_cache(kmem_cache_t * cachep)
822 {
823 struct list_head *p;
824 int ret = 0;
825
826 /* Find the cache in the chain of caches. */
827 down(&cache_chain_sem);
828 list_for_each(p, &cache_chain) {
829 if (p == &cachep->next) {
830 ret = 1;
831 break;
832 }
833 }
834 up(&cache_chain_sem);
835
836 return ret;
837 }
838
839 #ifdef CONFIG_SMP
840 /*
841 * Waits for all CPUs to execute func().
842 */
843 static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
844 {
845 local_irq_disable();
846 func(arg);
847 local_irq_enable();
848
849 if (smp_call_function(func, arg, 1, 1))
850 BUG();
851 }
852 typedef struct ccupdate_struct_s
853 {
854 kmem_cache_t *cachep;
855 cpucache_t *new[NR_CPUS];
856 } ccupdate_struct_t;
857
858 static void do_ccupdate_local(void *info)
859 {
860 ccupdate_struct_t *new = (ccupdate_struct_t *)info;
861 cpucache_t *old = cc_data(new->cachep);
862
863 cc_data(new->cachep) = new->new[smp_processor_id()];
864 new->new[smp_processor_id()] = old;
865 }
866
867 static void free_block (kmem_cache_t* cachep, void** objpp, int len);
868
869 static void drain_cpu_caches(kmem_cache_t *cachep)
870 {
871 ccupdate_struct_t new;
872 int i;
873
874 memset(&new.new,0,sizeof(new.new));
875
876 new.cachep = cachep;
877
878 down(&cache_chain_sem);
879 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
880
881 for (i = 0; i < smp_num_cpus; i++) {
882 cpucache_t* ccold = new.new[cpu_logical_map(i)];
883 if (!ccold || (ccold->avail == 0))
884 continue;
885 local_irq_disable();
886 free_block(cachep, cc_entry(ccold), ccold->avail);
887 local_irq_enable();
888 ccold->avail = 0;
889 }
890 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
891 up(&cache_chain_sem);
892 }
893
894 #else
895 #define drain_cpu_caches(cachep) do { } while (0)
896 #endif
897
898 static int __kmem_cache_shrink(kmem_cache_t *cachep)
899 {
900 slab_t *slabp;
901 int ret;
902
903 drain_cpu_caches(cachep);
904
905 spin_lock_irq(&cachep->spinlock);
906
907 /* If the cache is growing, stop shrinking. */
908 while (!cachep->growing) {
909 struct list_head *p;
910
911 p = cachep->slabs.prev;
912 if (p == &cachep->slabs)
913 break;
914
915 slabp = list_entry(cachep->slabs.prev, slab_t, list);
916 if (slabp->inuse)
917 break;
918
919 list_del(&slabp->list);
920 if (cachep->firstnotfull == &slabp->list)
921 cachep->firstnotfull = &cachep->slabs;
922
923 spin_unlock_irq(&cachep->spinlock);
924 kmem_slab_destroy(cachep, slabp);
925 spin_lock_irq(&cachep->spinlock);
926 }
927 ret = !list_empty(&cachep->slabs);
928 spin_unlock_irq(&cachep->spinlock);
929 return ret;
930 }
931
932 /**
933 * kmem_cache_shrink - Shrink a cache.
934 * @cachep: The cache to shrink.
935 *
936 * Releases as many slabs as possible for a cache.
937 * To help debugging, a zero exit status indicates all slabs were released.
938 */
939 int kmem_cache_shrink(kmem_cache_t *cachep)
940 {
941 if (!cachep || in_interrupt() || !is_chained_kmem_cache(cachep))
942 BUG();
943
944 return __kmem_cache_shrink(cachep);
945 }
946
947 /**
948 * kmem_cache_destroy - delete a cache
949 * @cachep: the cache to destroy
950 *
951 * Remove a kmem_cache_t object from the slab cache.
952 * Returns 0 on success.
953 *
954 * It is expected this function will be called by a module when it is
955 * unloaded. This will remove the cache completely, and avoid a duplicate
956 * cache being allocated each time a module is loaded and unloaded, if the
957 * module doesn't have persistent in-kernel storage across loads and unloads.
958 *
959 * The caller must guarantee that noone will allocate memory from the cache
960 * during the kmem_cache_destroy().
961 */
962 int kmem_cache_destroy (kmem_cache_t * cachep)
963 {
964 if (!cachep || in_interrupt() || cachep->growing)
965 BUG();
966
967 /* Find the cache in the chain of caches. */
968 down(&cache_chain_sem);
969 /* the chain is never empty, cache_cache is never destroyed */
970 if (clock_searchp == cachep)
971 clock_searchp = list_entry(cachep->next.next,
972 kmem_cache_t, next);
973 list_del(&cachep->next);
974 up(&cache_chain_sem);
975
976 if (__kmem_cache_shrink(cachep)) {
977 printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
978 cachep);
979 down(&cache_chain_sem);
980 list_add(&cachep->next,&cache_chain);
981 up(&cache_chain_sem);
982 return 1;
983 }
984 #ifdef CONFIG_SMP
985 {
986 int i;
987 for (i = 0; i < NR_CPUS; i++)
988 kfree(cachep->cpudata[i]);
989 }
990 #endif
991 kmem_cache_free(&cache_cache, cachep);
992
993 return 0;
994 }
995
996 /* Get the memory for a slab management obj. */
997 static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
998 void *objp, int colour_off, int local_flags)
999 {
1000 slab_t *slabp;
1001
1002 if (OFF_SLAB(cachep)) {
1003 /* Slab management obj is off-slab. */
1004 slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
1005 if (!slabp)
1006 return NULL;
1007 } else {
1008 /* FIXME: change to
1009 slabp = objp
1010 * if you enable OPTIMIZE
1011 */
1012 slabp = objp+colour_off;
1013 colour_off += L1_CACHE_ALIGN(cachep->num *
1014 sizeof(kmem_bufctl_t) + sizeof(slab_t));
1015 }
1016 slabp->inuse = 0;
1017 slabp->colouroff = colour_off;
1018 slabp->s_mem = objp+colour_off;
1019
1020 return slabp;
1021 }
1022
1023 static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
1024 slab_t * slabp, unsigned long ctor_flags)
1025 {
1026 int i;
1027
1028 for (i = 0; i < cachep->num; i++) {
1029 void* objp = slabp->s_mem+cachep->objsize*i;
1030 #if DEBUG
1031 if (cachep->flags & SLAB_RED_ZONE) {
1032 *((unsigned long*)(objp)) = RED_MAGIC1;
1033 *((unsigned long*)(objp + cachep->objsize -
1034 BYTES_PER_WORD)) = RED_MAGIC1;
1035 objp += BYTES_PER_WORD;
1036 }
1037 #endif
1038
1039 /*
1040 * Constructors are not allowed to allocate memory from
1041 * the same cache which they are a constructor for.
1042 * Otherwise, deadlock. They must also be threaded.
1043 */
1044 if (cachep->ctor)
1045 cachep->ctor(objp, cachep, ctor_flags);
1046 #if DEBUG
1047 if (cachep->flags & SLAB_RED_ZONE)
1048 objp -= BYTES_PER_WORD;
1049 if (cachep->flags & SLAB_POISON)
1050 /* need to poison the objs */
1051 kmem_poison_obj(cachep, objp);
1052 if (cachep->flags & SLAB_RED_ZONE) {
1053 if (*((unsigned long*)(objp)) != RED_MAGIC1)
1054 BUG();
1055 if (*((unsigned long*)(objp + cachep->objsize -
1056 BYTES_PER_WORD)) != RED_MAGIC1)
1057 BUG();
1058 }
1059 #endif
1060 slab_bufctl(slabp)[i] = i+1;
1061 }
1062 slab_bufctl(slabp)[i-1] = BUFCTL_END;
1063 slabp->free = 0;
1064 }
1065
1066 /*
1067 * Grow (by 1) the number of slabs within a cache. This is called by
1068 * kmem_cache_alloc() when there are no active objs left in a cache.
1069 */
1070 static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
1071 {
1072 slab_t *slabp;
1073 struct page *page;
1074 void *objp;
1075 size_t offset;
1076 unsigned int i, local_flags;
1077 unsigned long ctor_flags;
1078 unsigned long save_flags;
1079
1080 /* Be lazy and only check for valid flags here,
1081 * keeping it out of the critical path in kmem_cache_alloc().
1082 */
1083 if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
1084 BUG();
1085 if (flags & SLAB_NO_GROW)
1086 return 0;
1087
1088 /*
1089 * The test for missing atomic flag is performed here, rather than
1090 * the more obvious place, simply to reduce the critical path length
1091 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
1092 * will eventually be caught here (where it matters).
1093 */
1094 if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
1095 BUG();
1096
1097 ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1098 local_flags = (flags & SLAB_LEVEL_MASK);
1099 if (local_flags == SLAB_ATOMIC)
1100 /*
1101 * Not allowed to sleep. Need to tell a constructor about
1102 * this - it might need to know...
1103 */
1104 ctor_flags |= SLAB_CTOR_ATOMIC;
1105
1106 /* About to mess with non-constant members - lock. */
1107 spin_lock_irqsave(&cachep->spinlock, save_flags);
1108
1109 /* Get colour for the slab, and cal the next value. */
1110 offset = cachep->colour_next;
1111 cachep->colour_next++;
1112 if (cachep->colour_next >= cachep->colour)
1113 cachep->colour_next = 0;
1114 offset *= cachep->colour_off;
1115 cachep->dflags |= DFLGS_GROWN;
1116
1117 cachep->growing++;
1118 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1119
1120 /* A series of memory allocations for a new slab.
1121 * Neither the cache-chain semaphore, or cache-lock, are
1122 * held, but the incrementing c_growing prevents this
1123 * cache from being reaped or shrunk.
1124 * Note: The cache could be selected in for reaping in
1125 * kmem_cache_reap(), but when the final test is made the
1126 * growing value will be seen.
1127 */
1128
1129 /* Get mem for the objs. */
1130 if (!(objp = kmem_getpages(cachep, flags)))
1131 goto failed;
1132
1133 /* Get slab management. */
1134 if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
1135 goto opps1;
1136
1137 /* Nasty!!!!!! I hope this is OK. */
1138 i = 1 << cachep->gfporder;
1139 page = virt_to_page(objp);
1140 do {
1141 SET_PAGE_CACHE(page, cachep);
1142 SET_PAGE_SLAB(page, slabp);
1143 PageSetSlab(page);
1144 page++;
1145 } while (--i);
1146
1147 kmem_cache_init_objs(cachep, slabp, ctor_flags);
1148
1149 spin_lock_irqsave(&cachep->spinlock, save_flags);
1150 cachep->growing--;
1151
1152 /* Make slab active. */
1153 list_add_tail(&slabp->list,&cachep->slabs);
1154 if (cachep->firstnotfull == &cachep->slabs)
1155 cachep->firstnotfull = &slabp->list;
1156 STATS_INC_GROWN(cachep);
1157 cachep->failures = 0;
1158
1159 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1160 return 1;
1161 opps1:
1162 kmem_freepages(cachep, objp);
1163 failed:
1164 spin_lock_irqsave(&cachep->spinlock, save_flags);
1165 cachep->growing--;
1166 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1167 return 0;
1168 }
1169
1170 /*
1171 * Perform extra freeing checks:
1172 * - detect double free
1173 * - detect bad pointers.
1174 * Called with the cache-lock held.
1175 */
1176
1177 #if DEBUG
1178 static int kmem_extra_free_checks (kmem_cache_t * cachep,
1179 slab_t *slabp, void * objp)
1180 {
1181 int i;
1182 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1183
1184 if (objnr >= cachep->num)
1185 BUG();
1186 if (objp != slabp->s_mem + objnr*cachep->objsize)
1187 BUG();
1188
1189 /* Check slab's freelist to see if this obj is there. */
1190 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
1191 if (i == objnr)
1192 BUG();
1193 }
1194 return 0;
1195 }
1196 #endif
1197
1198 static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
1199 {
1200 #if DEBUG
1201 if (flags & SLAB_DMA) {
1202 if (!(cachep->gfpflags & GFP_DMA))
1203 BUG();
1204 } else {
1205 if (cachep->gfpflags & GFP_DMA)
1206 BUG();
1207 }
1208 #endif
1209 }
1210
1211 static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
1212 slab_t *slabp)
1213 {
1214 void *objp;
1215
1216 STATS_INC_ALLOCED(cachep);
1217 STATS_INC_ACTIVE(cachep);
1218 STATS_SET_HIGH(cachep);
1219
1220 /* get obj pointer */
1221 slabp->inuse++;
1222 objp = slabp->s_mem + slabp->free*cachep->objsize;
1223 slabp->free=slab_bufctl(slabp)[slabp->free];
1224
1225 if (slabp->free == BUFCTL_END)
1226 /* slab now full: move to next slab for next alloc */
1227 cachep->firstnotfull = slabp->list.next;
1228 #if DEBUG
1229 if (cachep->flags & SLAB_POISON)
1230 if (kmem_check_poison_obj(cachep, objp))
1231 BUG();
1232 if (cachep->flags & SLAB_RED_ZONE) {
1233 /* Set alloc red-zone, and check old one. */
1234 if (xchg((unsigned long *)objp, RED_MAGIC2) !=
1235 RED_MAGIC1)
1236 BUG();
1237 if (xchg((unsigned long *)(objp+cachep->objsize -
1238 BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
1239 BUG();
1240 objp += BYTES_PER_WORD;
1241 }
1242 #endif
1243 return objp;
1244 }
1245
1246 /*
1247 * Returns a ptr to an obj in the given cache.
1248 * caller must guarantee synchronization
1249 * #define for the goto optimization 8-)
1250 */
1251 #define kmem_cache_alloc_one(cachep) \
1252 ({ \
1253 slab_t *slabp; \
1254 \
1255 /* Get slab alloc is to come from. */ \
1256 { \
1257 struct list_head* p = cachep->firstnotfull; \
1258 if (p == &cachep->slabs) \
1259 goto alloc_new_slab; \
1260 slabp = list_entry(p,slab_t, list); \
1261 } \
1262 kmem_cache_alloc_one_tail(cachep, slabp); \
1263 })
1264
1265 #ifdef CONFIG_SMP
1266 void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
1267 {
1268 int batchcount = cachep->batchcount;
1269 cpucache_t* cc = cc_data(cachep);
1270
1271 spin_lock(&cachep->spinlock);
1272 while (batchcount--) {
1273 /* Get slab alloc is to come from. */
1274 struct list_head *p = cachep->firstnotfull;
1275 slab_t *slabp;
1276
1277 if (p == &cachep->slabs)
1278 break;
1279 slabp = list_entry(p,slab_t, list);
1280 cc_entry(cc)[cc->avail++] =
1281 kmem_cache_alloc_one_tail(cachep, slabp);
1282 }
1283 spin_unlock(&cachep->spinlock);
1284
1285 if (cc->avail)
1286 return cc_entry(cc)[--cc->avail];
1287 return NULL;
1288 }
1289 #endif
1290
1291 static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1292 {
1293 unsigned long save_flags;
1294 void* objp;
1295
1296 kmem_cache_alloc_head(cachep, flags);
1297 try_again:
1298 local_irq_save(save_flags);
1299 #ifdef CONFIG_SMP
1300 {
1301 cpucache_t *cc = cc_data(cachep);
1302
1303 if (cc) {
1304 if (cc->avail) {
1305 STATS_INC_ALLOCHIT(cachep);
1306 objp = cc_entry(cc)[--cc->avail];
1307 } else {
1308 STATS_INC_ALLOCMISS(cachep);
1309 objp = kmem_cache_alloc_batch(cachep,flags);
1310 if (!objp)
1311 goto alloc_new_slab_nolock;
1312 }
1313 } else {
1314 spin_lock(&cachep->spinlock);
1315 objp = kmem_cache_alloc_one(cachep);
1316 spin_unlock(&cachep->spinlock);
1317 }
1318 }
1319 #else
1320 objp = kmem_cache_alloc_one(cachep);
1321 #endif
1322 local_irq_restore(save_flags);
1323 return objp;
1324 alloc_new_slab:
1325 #ifdef CONFIG_SMP
1326 spin_unlock(&cachep->spinlock);
1327 alloc_new_slab_nolock:
1328 #endif
1329 local_irq_restore(save_flags);
1330 if (kmem_cache_grow(cachep, flags))
1331 /* Someone may have stolen our objs. Doesn't matter, we'll
1332 * just come back here again.
1333 */
1334 goto try_again;
1335 return NULL;
1336 }
1337
1338 /*
1339 * Release an obj back to its cache. If the obj has a constructed
1340 * state, it should be in this state _before_ it is released.
1341 * - caller is responsible for the synchronization
1342 */
1343
1344 #if DEBUG
1345 # define CHECK_NR(pg) \
1346 do { \
1347 if (!VALID_PAGE(pg)) { \
1348 printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
1349 (unsigned long)objp); \
1350 BUG(); \
1351 } \
1352 } while (0)
1353 # define CHECK_PAGE(page) \
1354 do { \
1355 CHECK_NR(page); \
1356 if (!PageSlab(page)) { \
1357 printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
1358 (unsigned long)objp); \
1359 BUG(); \
1360 } \
1361 } while (0)
1362
1363 #else
1364 # define CHECK_PAGE(pg) do { } while (0)
1365 #endif
1366
1367 static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
1368 {
1369 slab_t* slabp;
1370
1371 CHECK_PAGE(virt_to_page(objp));
1372 /* reduces memory footprint
1373 *
1374 if (OPTIMIZE(cachep))
1375 slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1376 else
1377 */
1378 slabp = GET_PAGE_SLAB(virt_to_page(objp));
1379
1380 #if DEBUG
1381 if (cachep->flags & SLAB_DEBUG_INITIAL)
1382 /* Need to call the slab's constructor so the
1383 * caller can perform a verify of its state (debugging).
1384 * Called without the cache-lock held.
1385 */
1386 cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1387
1388 if (cachep->flags & SLAB_RED_ZONE) {
1389 objp -= BYTES_PER_WORD;
1390 if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
1391 /* Either write before start, or a double free. */
1392 BUG();
1393 if (xchg((unsigned long *)(objp+cachep->objsize -
1394 BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
1395 /* Either write past end, or a double free. */
1396 BUG();
1397 }
1398 if (cachep->flags & SLAB_POISON)
1399 kmem_poison_obj(cachep, objp);
1400 if (kmem_extra_free_checks(cachep, slabp, objp))
1401 return;
1402 #endif
1403 {
1404 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1405
1406 slab_bufctl(slabp)[objnr] = slabp->free;
1407 slabp->free = objnr;
1408 }
1409 STATS_DEC_ACTIVE(cachep);
1410
1411 /* fixup slab chain */
1412 if (slabp->inuse-- == cachep->num)
1413 goto moveslab_partial;
1414 if (!slabp->inuse)
1415 goto moveslab_free;
1416 return;
1417
1418 moveslab_partial:
1419 /* was full.
1420 * Even if the page is now empty, we can set c_firstnotfull to
1421 * slabp: there are no partial slabs in this case
1422 */
1423 {
1424 struct list_head *t = cachep->firstnotfull;
1425
1426 cachep->firstnotfull = &slabp->list;
1427 if (slabp->list.next == t)
1428 return;
1429 list_del(&slabp->list);
1430 list_add_tail(&slabp->list, t);
1431 return;
1432 }
1433 moveslab_free:
1434 /*
1435 * was partial, now empty.
1436 * c_firstnotfull might point to slabp
1437 * FIXME: optimize
1438 */
1439 {
1440 struct list_head *t = cachep->firstnotfull->prev;
1441
1442 list_del(&slabp->list);
1443 list_add_tail(&slabp->list, &cachep->slabs);
1444 if (cachep->firstnotfull == &slabp->list)
1445 cachep->firstnotfull = t->next;
1446 return;
1447 }
1448 }
1449
1450 #ifdef CONFIG_SMP
1451 static inline void __free_block (kmem_cache_t* cachep,
1452 void** objpp, int len)
1453 {
1454 for ( ; len > 0; len--, objpp++)
1455 kmem_cache_free_one(cachep, *objpp);
1456 }
1457
1458 static void free_block (kmem_cache_t* cachep, void** objpp, int len)
1459 {
1460 spin_lock(&cachep->spinlock);
1461 __free_block(cachep, objpp, len);
1462 spin_unlock(&cachep->spinlock);
1463 }
1464 #endif
1465
1466 /*
1467 * __kmem_cache_free
1468 * called with disabled ints
1469 */
1470 static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
1471 {
1472 #ifdef CONFIG_SMP
1473 cpucache_t *cc = cc_data(cachep);
1474
1475 CHECK_PAGE(virt_to_page(objp));
1476 if (cc) {
1477 int batchcount;
1478 if (cc->avail < cc->limit) {
1479 STATS_INC_FREEHIT(cachep);
1480 cc_entry(cc)[cc->avail++] = objp;
1481 return;
1482 }
1483 STATS_INC_FREEMISS(cachep);
1484 batchcount = cachep->batchcount;
1485 cc->avail -= batchcount;
1486 free_block(cachep,
1487 &cc_entry(cc)[cc->avail],batchcount);
1488 cc_entry(cc)[cc->avail++] = objp;
1489 return;
1490 } else {
1491 free_block(cachep, &objp, 1);
1492 }
1493 #else
1494 kmem_cache_free_one(cachep, objp);
1495 #endif
1496 }
1497
1498 /**
1499 * kmem_cache_alloc - Allocate an object
1500 * @cachep: The cache to allocate from.
1501 * @flags: See kmalloc().
1502 *
1503 * Allocate an object from this cache. The flags are only relevant
1504 * if the cache has no available objects.
1505 */
1506 void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
1507 {
1508 return __kmem_cache_alloc(cachep, flags);
1509 }
1510
1511 /**
1512 * kmalloc - allocate memory
1513 * @size: how many bytes of memory are required.
1514 * @flags: the type of memory to allocate.
1515 *
1516 * kmalloc is the normal method of allocating memory
1517 * in the kernel. The @flags argument may be one of:
1518 *
1519 * %GFP_BUFFER - XXX
1520 *
1521 * %GFP_ATOMIC - allocation will not sleep. Use inside interrupt handlers.
1522 *
1523 * %GFP_USER - allocate memory on behalf of user. May sleep.
1524 *
1525 * %GFP_KERNEL - allocate normal kernel ram. May sleep.
1526 *
1527 * %GFP_NFS - has a slightly lower probability of sleeping than %GFP_KERNEL.
1528 * Don't use unless you're in the NFS code.
1529 *
1530 * %GFP_KSWAPD - Don't use unless you're modifying kswapd.
1531 */
1532 void * kmalloc (size_t size, int flags)
1533 {
1534 cache_sizes_t *csizep = cache_sizes;
1535
1536 for (; csizep->cs_size; csizep++) {
1537 if (size > csizep->cs_size)
1538 continue;
1539 return __kmem_cache_alloc(flags & GFP_DMA ?
1540 csizep->cs_dmacachep : csizep->cs_cachep, flags);
1541 }
1542 BUG(); // too big size
1543 return NULL;
1544 }
1545
1546 /**
1547 * kmem_cache_free - Deallocate an object
1548 * @cachep: The cache the allocation was from.
1549 * @objp: The previously allocated object.
1550 *
1551 * Free an object which was previously allocated from this
1552 * cache.
1553 */
1554 void kmem_cache_free (kmem_cache_t *cachep, void *objp)
1555 {
1556 unsigned long flags;
1557 #if DEBUG
1558 CHECK_PAGE(virt_to_page(objp));
1559 if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
1560 BUG();
1561 #endif
1562
1563 local_irq_save(flags);
1564 __kmem_cache_free(cachep, objp);
1565 local_irq_restore(flags);
1566 }
1567
1568 /**
1569 * kfree - free previously allocated memory
1570 * @objp: pointer returned by kmalloc.
1571 *
1572 * Don't free memory not originally allocated by kmalloc()
1573 * or you will run into trouble.
1574 */
1575 void kfree (const void *objp)
1576 {
1577 kmem_cache_t *c;
1578 unsigned long flags;
1579
1580 if (!objp)
1581 return;
1582 local_irq_save(flags);
1583 CHECK_PAGE(virt_to_page(objp));
1584 c = GET_PAGE_CACHE(virt_to_page(objp));
1585 __kmem_cache_free(c, (void*)objp);
1586 local_irq_restore(flags);
1587 }
1588
1589 kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
1590 {
1591 cache_sizes_t *csizep = cache_sizes;
1592
1593 /* This function could be moved to the header file, and
1594 * made inline so consumers can quickly determine what
1595 * cache pointer they require.
1596 */
1597 for ( ; csizep->cs_size; csizep++) {
1598 if (size > csizep->cs_size)
1599 continue;
1600 break;
1601 }
1602 return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
1603 }
1604
1605 #ifdef CONFIG_SMP
1606
1607 /* called with cache_chain_sem acquired. */
1608 static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
1609 {
1610 ccupdate_struct_t new;
1611 int i;
1612
1613 /*
1614 * These are admin-provided, so we are more graceful.
1615 */
1616 if (limit < 0)
1617 return -EINVAL;
1618 if (batchcount < 0)
1619 return -EINVAL;
1620 if (batchcount > limit)
1621 return -EINVAL;
1622 if (limit != 0 && !batchcount)
1623 return -EINVAL;
1624
1625 memset(&new.new,0,sizeof(new.new));
1626 if (limit) {
1627 for (i = 0; i< smp_num_cpus; i++) {
1628 cpucache_t* ccnew;
1629
1630 ccnew = kmalloc(sizeof(void*)*limit+
1631 sizeof(cpucache_t), GFP_KERNEL);
1632 if (!ccnew)
1633 goto oom;
1634 ccnew->limit = limit;
1635 ccnew->avail = 0;
1636 new.new[cpu_logical_map(i)] = ccnew;
1637 }
1638 }
1639 new.cachep = cachep;
1640 spin_lock_irq(&cachep->spinlock);
1641 cachep->batchcount = batchcount;
1642 spin_unlock_irq(&cachep->spinlock);
1643
1644 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
1645
1646 for (i = 0; i < smp_num_cpus; i++) {
1647 cpucache_t* ccold = new.new[cpu_logical_map(i)];
1648 if (!ccold)
1649 continue;
1650 local_irq_disable();
1651 free_block(cachep, cc_entry(ccold), ccold->avail);
1652 local_irq_enable();
1653 kfree(ccold);
1654 }
1655 return 0;
1656 oom:
1657 for (i--; i >= 0; i--)
1658 kfree(new.new[cpu_logical_map(i)]);
1659 return -ENOMEM;
1660 }
1661
1662 static void enable_cpucache (kmem_cache_t *cachep)
1663 {
1664 int err;
1665 int limit;
1666
1667 /* FIXME: optimize */
1668 if (cachep->objsize > PAGE_SIZE)
1669 return;
1670 if (cachep->objsize > 1024)
1671 limit = 60;
1672 else if (cachep->objsize > 256)
1673 limit = 124;
1674 else
1675 limit = 252;
1676
1677 err = kmem_tune_cpucache(cachep, limit, limit/2);
1678 if (err)
1679 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
1680 cachep->name, -err);
1681 }
1682
1683 static void enable_all_cpucaches (void)
1684 {
1685 struct list_head* p;
1686
1687 down(&cache_chain_sem);
1688
1689 p = &cache_cache.next;
1690 do {
1691 kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
1692
1693 enable_cpucache(cachep);
1694 p = cachep->next.next;
1695 } while (p != &cache_cache.next);
1696
1697 up(&cache_chain_sem);
1698 }
1699 #endif
1700
1701 /**
1702 * kmem_cache_reap - Reclaim memory from caches.
1703 * @gfp_mask: the type of memory required.
1704 *
1705 * Called from try_to_free_page().
1706 */
1707 void kmem_cache_reap (int gfp_mask)
1708 {
1709 slab_t *slabp;
1710 kmem_cache_t *searchp;
1711 kmem_cache_t *best_cachep;
1712 unsigned int best_pages;
1713 unsigned int best_len;
1714 unsigned int scan;
1715
1716 if (gfp_mask & __GFP_WAIT)
1717 down(&cache_chain_sem);
1718 else
1719 if (down_trylock(&cache_chain_sem))
1720 return;
1721
1722 scan = REAP_SCANLEN;
1723 best_len = 0;
1724 best_pages = 0;
1725 best_cachep = NULL;
1726 searchp = clock_searchp;
1727 do {
1728 unsigned int pages;
1729 struct list_head* p;
1730 unsigned int full_free;
1731
1732 /* It's safe to test this without holding the cache-lock. */
1733 if (searchp->flags & SLAB_NO_REAP)
1734 goto next;
1735 spin_lock_irq(&searchp->spinlock);
1736 if (searchp->growing)
1737 goto next_unlock;
1738 if (searchp->dflags & DFLGS_GROWN) {
1739 searchp->dflags &= ~DFLGS_GROWN;
1740 goto next_unlock;
1741 }
1742 #ifdef CONFIG_SMP
1743 {
1744 cpucache_t *cc = cc_data(searchp);
1745 if (cc && cc->avail) {
1746 __free_block(searchp, cc_entry(cc), cc->avail);
1747 cc->avail = 0;
1748 }
1749 }
1750 #endif
1751
1752 full_free = 0;
1753 p = searchp->slabs.prev;
1754 while (p != &searchp->slabs) {
1755 slabp = list_entry(p, slab_t, list);
1756 if (slabp->inuse)
1757 break;
1758 full_free++;
1759 p = p->prev;
1760 }
1761
1762 /*
1763 * Try to avoid slabs with constructors and/or
1764 * more than one page per slab (as it can be difficult
1765 * to get high orders from gfp()).
1766 */
1767 pages = full_free * (1<<searchp->gfporder);
1768 if (searchp->ctor)
1769 pages = (pages*4+1)/5;
1770 if (searchp->gfporder)
1771 pages = (pages*4+1)/5;
1772 if (pages > best_pages) {
1773 best_cachep = searchp;
1774 best_len = full_free;
1775 best_pages = pages;
1776 if (full_free >= REAP_PERFECT) {
1777 clock_searchp = list_entry(searchp->next.next,
1778 kmem_cache_t,next);
1779 goto perfect;
1780 }
1781 }
1782 next_unlock:
1783 spin_unlock_irq(&searchp->spinlock);
1784 next:
1785 searchp = list_entry(searchp->next.next,kmem_cache_t,next);
1786 } while (--scan && searchp != clock_searchp);
1787
1788 clock_searchp = searchp;
1789
1790 if (!best_cachep)
1791 /* couldn't find anything to reap */
1792 goto out;
1793
1794 spin_lock_irq(&best_cachep->spinlock);
1795 perfect:
1796 /* free only 80% of the free slabs */
1797 best_len = (best_len*4 + 1)/5;
1798 for (scan = 0; scan < best_len; scan++) {
1799 struct list_head *p;
1800
1801 if (best_cachep->growing)
1802 break;
1803 p = best_cachep->slabs.prev;
1804 if (p == &best_cachep->slabs)
1805 break;
1806 slabp = list_entry(p,slab_t,list);
1807 if (slabp->inuse)
1808 break;
1809 list_del(&slabp->list);
1810 if (best_cachep->firstnotfull == &slabp->list)
1811 best_cachep->firstnotfull = &best_cachep->slabs;
1812 STATS_INC_REAPED(best_cachep);
1813
1814 /* Safe to drop the lock. The slab is no longer linked to the
1815 * cache.
1816 */
1817 spin_unlock_irq(&best_cachep->spinlock);
1818 kmem_slab_destroy(best_cachep, slabp);
1819 spin_lock_irq(&best_cachep->spinlock);
1820 }
1821 spin_unlock_irq(&best_cachep->spinlock);
1822 out:
1823 up(&cache_chain_sem);
1824 return;
1825 }
1826
1827 #ifdef CONFIG_PROC_FS
1828 /* /proc/slabinfo
1829 * cache-name num-active-objs total-objs
1830 * obj-size num-active-slabs total-slabs
1831 * num-pages-per-slab
1832 */
1833 #define FIXUP(t) \
1834 do { \
1835 if (len <= off) { \
1836 off -= len; \
1837 len = 0; \
1838 } else { \
1839 if (len-off > count) \
1840 goto t; \
1841 } \
1842 } while (0)
1843
1844 static int proc_getdata (char*page, char**start, off_t off, int count)
1845 {
1846 struct list_head *p;
1847 int len = 0;
1848
1849 /* Output format version, so at least we can change it without _too_
1850 * many complaints.
1851 */
1852 len += sprintf(page+len, "slabinfo - version: 1.1"
1853 #if STATS
1854 " (statistics)"
1855 #endif
1856 #ifdef CONFIG_SMP
1857 " (SMP)"
1858 #endif
1859 "\n");
1860 FIXUP(got_data);
1861
1862 down(&cache_chain_sem);
1863 p = &cache_cache.next;
1864 do {
1865 kmem_cache_t *cachep;
1866 struct list_head *q;
1867 slab_t *slabp;
1868 unsigned long active_objs;
1869 unsigned long num_objs;
1870 unsigned long active_slabs = 0;
1871 unsigned long num_slabs;
1872 cachep = list_entry(p, kmem_cache_t, next);
1873
1874 spin_lock_irq(&cachep->spinlock);
1875 active_objs = 0;
1876 num_slabs = 0;
1877 list_for_each(q,&cachep->slabs) {
1878 slabp = list_entry(q, slab_t, list);
1879 active_objs += slabp->inuse;
1880 num_objs += cachep->num;
1881 if (slabp->inuse)
1882 active_slabs++;
1883 else
1884 num_slabs++;
1885 }
1886 num_slabs+=active_slabs;
1887 num_objs = num_slabs*cachep->num;
1888
1889 len += sprintf(page+len, "%-17s %6lu %6lu %6u %4lu %4lu %4u",
1890 cachep->name, active_objs, num_objs, cachep->objsize,
1891 active_slabs, num_slabs, (1<<cachep->gfporder));
1892
1893 #if STATS
1894 {
1895 unsigned long errors = cachep->errors;
1896 unsigned long high = cachep->high_mark;
1897 unsigned long grown = cachep->grown;
1898 unsigned long reaped = cachep->reaped;
1899 unsigned long allocs = cachep->num_allocations;
1900
1901 len += sprintf(page+len, " : %6lu %7lu %5lu %4lu %4lu",
1902 high, allocs, grown, reaped, errors);
1903 }
1904 #endif
1905 #ifdef CONFIG_SMP
1906 {
1907 unsigned int batchcount = cachep->batchcount;
1908 unsigned int limit;
1909
1910 if (cc_data(cachep))
1911 limit = cc_data(cachep)->limit;
1912 else
1913 limit = 0;
1914 len += sprintf(page+len, " : %4u %4u",
1915 limit, batchcount);
1916 }
1917 #endif
1918 #if STATS && defined(CONFIG_SMP)
1919 {
1920 unsigned long allochit = atomic_read(&cachep->allochit);
1921 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
1922 unsigned long freehit = atomic_read(&cachep->freehit);
1923 unsigned long freemiss = atomic_read(&cachep->freemiss);
1924 len += sprintf(page+len, " : %6lu %6lu %6lu %6lu",
1925 allochit, allocmiss, freehit, freemiss);
1926 }
1927 #endif
1928 len += sprintf(page+len,"\n");
1929 spin_unlock_irq(&cachep->spinlock);
1930 FIXUP(got_data_up);
1931 p = cachep->next.next;
1932 } while (p != &cache_cache.next);
1933 got_data_up:
1934 up(&cache_chain_sem);
1935
1936 got_data:
1937 *start = page+off;
1938 return len;
1939 }
1940
1941 /**
1942 * slabinfo_read_proc - generates /proc/slabinfo
1943 * @page: scratch area, one page long
1944 * @start: pointer to the pointer to the output buffer
1945 * @off: offset within /proc/slabinfo the caller is interested in
1946 * @count: requested len in bytes
1947 * @eof: eof marker
1948 * @data: unused
1949 *
1950 * The contents of the buffer are
1951 * cache-name
1952 * num-active-objs
1953 * total-objs
1954 * object size
1955 * num-active-slabs
1956 * total-slabs
1957 * num-pages-per-slab
1958 * + further values on SMP and with statistics enabled
1959 */
1960 int slabinfo_read_proc (char *page, char **start, off_t off,
1961 int count, int *eof, void *data)
1962 {
1963 int len = proc_getdata(page, start, off, count);
1964 len -= (*start-page);
1965 if (len <= count)
1966 *eof = 1;
1967 if (len>count) len = count;
1968 if (len<0) len = 0;
1969 return len;
1970 }
1971
1972 #define MAX_SLABINFO_WRITE 128
1973 /**
1974 * slabinfo_write_proc - SMP tuning for the slab allocator
1975 * @file: unused
1976 * @buffer: user buffer
1977 * @count: data len
1978 * @data: unused
1979 */
1980 int slabinfo_write_proc (struct file *file, const char *buffer,
1981 unsigned long count, void *data)
1982 {
1983 #ifdef CONFIG_SMP
1984 char kbuf[MAX_SLABINFO_WRITE], *tmp;
1985 int limit, batchcount, res;
1986 struct list_head *p;
1987
1988 if (count > MAX_SLABINFO_WRITE)
1989 return -EINVAL;
1990 if (copy_from_user(&kbuf, buffer, count))
1991 return -EFAULT;
1992
1993 tmp = strchr(kbuf, ' ');
1994 if (!tmp)
1995 return -EINVAL;
1996 *tmp = '\0';
1997 tmp++;
1998 limit = simple_strtol(tmp, &tmp, 10);
1999 while (*tmp == ' ')
2000 tmp++;
2001 batchcount = simple_strtol(tmp, &tmp, 10);
2002
2003 /* Find the cache in the chain of caches. */
2004 down(&cache_chain_sem);
2005 res = -EINVAL;
2006 list_for_each(p,&cache_chain) {
2007 kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
2008
2009 if (!strcmp(cachep->name, kbuf)) {
2010 res = kmem_tune_cpucache(cachep, limit, batchcount);
2011 break;
2012 }
2013 }
2014 up(&cache_chain_sem);
2015 if (res >= 0)
2016 res = count;
2017 return res;
2018 #else
2019 return -EINVAL;
2020 #endif
2021 }
2022 #endif
2023
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.