1039363f3SChristoph Lameter /* 2039363f3SChristoph Lameter * Slab allocator functions that are independent of the allocator strategy 3039363f3SChristoph Lameter * 4039363f3SChristoph Lameter * (C) 2012 Christoph Lameter <cl@linux.com> 5039363f3SChristoph Lameter */ 6039363f3SChristoph Lameter #include <linux/slab.h> 7039363f3SChristoph Lameter 8039363f3SChristoph Lameter #include <linux/mm.h> 9039363f3SChristoph Lameter #include <linux/poison.h> 10039363f3SChristoph Lameter #include <linux/interrupt.h> 11039363f3SChristoph Lameter #include <linux/memory.h> 12039363f3SChristoph Lameter #include <linux/compiler.h> 13039363f3SChristoph Lameter #include <linux/module.h> 1420cea968SChristoph Lameter #include <linux/cpu.h> 1520cea968SChristoph Lameter #include <linux/uaccess.h> 16b7454ad3SGlauber Costa #include <linux/seq_file.h> 17b7454ad3SGlauber Costa #include <linux/proc_fs.h> 18039363f3SChristoph Lameter #include <asm/cacheflush.h> 19039363f3SChristoph Lameter #include <asm/tlbflush.h> 20039363f3SChristoph Lameter #include <asm/page.h> 212633d7a0SGlauber Costa #include <linux/memcontrol.h> 22f1b6eb6eSChristoph Lameter #include <trace/events/kmem.h> 23039363f3SChristoph Lameter 2497d06609SChristoph Lameter #include "slab.h" 2597d06609SChristoph Lameter 2697d06609SChristoph Lameter enum slab_state slab_state; 2718004c5dSChristoph Lameter LIST_HEAD(slab_caches); 2818004c5dSChristoph Lameter DEFINE_MUTEX(slab_mutex); 299b030cb8SChristoph Lameter struct kmem_cache *kmem_cache; 3097d06609SChristoph Lameter 3177be4b13SShuah Khan #ifdef CONFIG_DEBUG_VM 322633d7a0SGlauber Costa static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, 332633d7a0SGlauber Costa size_t size) 3477be4b13SShuah Khan { 3577be4b13SShuah Khan struct kmem_cache *s = NULL; 3677be4b13SShuah Khan 3777be4b13SShuah Khan if (!name || in_interrupt() || size < sizeof(void *) || 3877be4b13SShuah Khan size > KMALLOC_MAX_SIZE) { 3977be4b13SShuah Khan pr_err("kmem_cache_create(%s) integrity check failed\n", name); 4077be4b13SShuah Khan return -EINVAL; 4177be4b13SShuah Khan } 4277be4b13SShuah Khan 4377be4b13SShuah Khan list_for_each_entry(s, &slab_caches, list) { 4477be4b13SShuah Khan char tmp; 4577be4b13SShuah Khan int res; 4677be4b13SShuah Khan 4777be4b13SShuah Khan /* 4877be4b13SShuah Khan * This happens when the module gets unloaded and doesn't 4977be4b13SShuah Khan * destroy its slab cache and no-one else reuses the vmalloc 5077be4b13SShuah Khan * area of the module. Print a warning. 5177be4b13SShuah Khan */ 5277be4b13SShuah Khan res = probe_kernel_address(s->name, tmp); 5377be4b13SShuah Khan if (res) { 5477be4b13SShuah Khan pr_err("Slab cache with size %d has lost its name\n", 5577be4b13SShuah Khan s->object_size); 5677be4b13SShuah Khan continue; 5777be4b13SShuah Khan } 5877be4b13SShuah Khan 593e374919SChristoph Lameter #if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) 602633d7a0SGlauber Costa /* 612633d7a0SGlauber Costa * For simplicity, we won't check this in the list of memcg 622633d7a0SGlauber Costa * caches. We have control over memcg naming, and if there 632633d7a0SGlauber Costa * aren't duplicates in the global list, there won't be any 642633d7a0SGlauber Costa * duplicates in the memcg lists as well. 652633d7a0SGlauber Costa */ 662633d7a0SGlauber Costa if (!memcg && !strcmp(s->name, name)) { 6777be4b13SShuah Khan pr_err("%s (%s): Cache name already exists.\n", 6877be4b13SShuah Khan __func__, name); 6977be4b13SShuah Khan dump_stack(); 7077be4b13SShuah Khan s = NULL; 7177be4b13SShuah Khan return -EINVAL; 7277be4b13SShuah Khan } 733e374919SChristoph Lameter #endif 7477be4b13SShuah Khan } 7577be4b13SShuah Khan 7677be4b13SShuah Khan WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 7777be4b13SShuah Khan return 0; 7877be4b13SShuah Khan } 7977be4b13SShuah Khan #else 802633d7a0SGlauber Costa static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg, 812633d7a0SGlauber Costa const char *name, size_t size) 8277be4b13SShuah Khan { 8377be4b13SShuah Khan return 0; 8477be4b13SShuah Khan } 8577be4b13SShuah Khan #endif 8677be4b13SShuah Khan 8755007d84SGlauber Costa #ifdef CONFIG_MEMCG_KMEM 8855007d84SGlauber Costa int memcg_update_all_caches(int num_memcgs) 8955007d84SGlauber Costa { 9055007d84SGlauber Costa struct kmem_cache *s; 9155007d84SGlauber Costa int ret = 0; 9255007d84SGlauber Costa mutex_lock(&slab_mutex); 9355007d84SGlauber Costa 9455007d84SGlauber Costa list_for_each_entry(s, &slab_caches, list) { 9555007d84SGlauber Costa if (!is_root_cache(s)) 9655007d84SGlauber Costa continue; 9755007d84SGlauber Costa 9855007d84SGlauber Costa ret = memcg_update_cache_size(s, num_memcgs); 9955007d84SGlauber Costa /* 10055007d84SGlauber Costa * See comment in memcontrol.c, memcg_update_cache_size: 10155007d84SGlauber Costa * Instead of freeing the memory, we'll just leave the caches 10255007d84SGlauber Costa * up to this point in an updated state. 10355007d84SGlauber Costa */ 10455007d84SGlauber Costa if (ret) 10555007d84SGlauber Costa goto out; 10655007d84SGlauber Costa } 10755007d84SGlauber Costa 10855007d84SGlauber Costa memcg_update_array_size(num_memcgs); 10955007d84SGlauber Costa out: 11055007d84SGlauber Costa mutex_unlock(&slab_mutex); 11155007d84SGlauber Costa return ret; 11255007d84SGlauber Costa } 11355007d84SGlauber Costa #endif 11455007d84SGlauber Costa 115039363f3SChristoph Lameter /* 11645906855SChristoph Lameter * Figure out what the alignment of the objects will be given a set of 11745906855SChristoph Lameter * flags, a user specified alignment and the size of the objects. 11845906855SChristoph Lameter */ 11945906855SChristoph Lameter unsigned long calculate_alignment(unsigned long flags, 12045906855SChristoph Lameter unsigned long align, unsigned long size) 12145906855SChristoph Lameter { 12245906855SChristoph Lameter /* 12345906855SChristoph Lameter * If the user wants hardware cache aligned objects then follow that 12445906855SChristoph Lameter * suggestion if the object is sufficiently large. 12545906855SChristoph Lameter * 12645906855SChristoph Lameter * The hardware cache alignment cannot override the specified 12745906855SChristoph Lameter * alignment though. If that is greater then use it. 12845906855SChristoph Lameter */ 12945906855SChristoph Lameter if (flags & SLAB_HWCACHE_ALIGN) { 13045906855SChristoph Lameter unsigned long ralign = cache_line_size(); 13145906855SChristoph Lameter while (size <= ralign / 2) 13245906855SChristoph Lameter ralign /= 2; 13345906855SChristoph Lameter align = max(align, ralign); 13445906855SChristoph Lameter } 13545906855SChristoph Lameter 13645906855SChristoph Lameter if (align < ARCH_SLAB_MINALIGN) 13745906855SChristoph Lameter align = ARCH_SLAB_MINALIGN; 13845906855SChristoph Lameter 13945906855SChristoph Lameter return ALIGN(align, sizeof(void *)); 14045906855SChristoph Lameter } 14145906855SChristoph Lameter 14245906855SChristoph Lameter 14345906855SChristoph Lameter /* 144039363f3SChristoph Lameter * kmem_cache_create - Create a cache. 145039363f3SChristoph Lameter * @name: A string which is used in /proc/slabinfo to identify this cache. 146039363f3SChristoph Lameter * @size: The size of objects to be created in this cache. 147039363f3SChristoph Lameter * @align: The required alignment for the objects. 148039363f3SChristoph Lameter * @flags: SLAB flags 149039363f3SChristoph Lameter * @ctor: A constructor for the objects. 150039363f3SChristoph Lameter * 151039363f3SChristoph Lameter * Returns a ptr to the cache on success, NULL on failure. 152039363f3SChristoph Lameter * Cannot be called within a interrupt, but can be interrupted. 153039363f3SChristoph Lameter * The @ctor is run when new pages are allocated by the cache. 154039363f3SChristoph Lameter * 155039363f3SChristoph Lameter * The flags are 156039363f3SChristoph Lameter * 157039363f3SChristoph Lameter * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 158039363f3SChristoph Lameter * to catch references to uninitialised memory. 159039363f3SChristoph Lameter * 160039363f3SChristoph Lameter * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 161039363f3SChristoph Lameter * for buffer overruns. 162039363f3SChristoph Lameter * 163039363f3SChristoph Lameter * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 164039363f3SChristoph Lameter * cacheline. This can be beneficial if you're counting cycles as closely 165039363f3SChristoph Lameter * as davem. 166039363f3SChristoph Lameter */ 167039363f3SChristoph Lameter 1682633d7a0SGlauber Costa struct kmem_cache * 1692633d7a0SGlauber Costa kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, 170943a451aSGlauber Costa size_t align, unsigned long flags, void (*ctor)(void *), 171943a451aSGlauber Costa struct kmem_cache *parent_cache) 172039363f3SChristoph Lameter { 173039363f3SChristoph Lameter struct kmem_cache *s = NULL; 1743965fc36SVladimir Davydov int err; 175039363f3SChristoph Lameter 176b920536aSPekka Enberg get_online_cpus(); 177b920536aSPekka Enberg mutex_lock(&slab_mutex); 178686d550dSChristoph Lameter 1793965fc36SVladimir Davydov err = kmem_cache_sanity_check(memcg, name, size); 1803965fc36SVladimir Davydov if (err) 1813965fc36SVladimir Davydov goto out_unlock; 182686d550dSChristoph Lameter 1832edefe11SVladimir Davydov if (memcg) { 1842edefe11SVladimir Davydov /* 1852edefe11SVladimir Davydov * Since per-memcg caches are created asynchronously on first 1862edefe11SVladimir Davydov * allocation (see memcg_kmem_get_cache()), several threads can 1872edefe11SVladimir Davydov * try to create the same cache, but only one of them may 1882edefe11SVladimir Davydov * succeed. Therefore if we get here and see the cache has 1892edefe11SVladimir Davydov * already been created, we silently return NULL. 1902edefe11SVladimir Davydov */ 1912edefe11SVladimir Davydov if (cache_from_memcg_idx(parent_cache, memcg_cache_id(memcg))) 1922edefe11SVladimir Davydov goto out_unlock; 1932edefe11SVladimir Davydov } 1942edefe11SVladimir Davydov 195d8843922SGlauber Costa /* 196d8843922SGlauber Costa * Some allocators will constraint the set of valid flags to a subset 197d8843922SGlauber Costa * of all flags. We expect them to define CACHE_CREATE_MASK in this 198d8843922SGlauber Costa * case, and we'll just provide them with a sanitized version of the 199d8843922SGlauber Costa * passed flags. 200d8843922SGlauber Costa */ 201d8843922SGlauber Costa flags &= CACHE_CREATE_MASK; 202686d550dSChristoph Lameter 2032633d7a0SGlauber Costa s = __kmem_cache_alias(memcg, name, size, align, flags, ctor); 204cbb79694SChristoph Lameter if (s) 2053965fc36SVladimir Davydov goto out_unlock; 206cbb79694SChristoph Lameter 2073965fc36SVladimir Davydov err = -ENOMEM; 208278b1bb1SChristoph Lameter s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); 2093965fc36SVladimir Davydov if (!s) 2103965fc36SVladimir Davydov goto out_unlock; 2113965fc36SVladimir Davydov 2128a13a4ccSChristoph Lameter s->object_size = s->size = size; 21345906855SChristoph Lameter s->align = calculate_alignment(flags, align, size); 2148a13a4ccSChristoph Lameter s->ctor = ctor; 2152633d7a0SGlauber Costa 2168a13a4ccSChristoph Lameter s->name = kstrdup(name, GFP_KERNEL); 2173965fc36SVladimir Davydov if (!s->name) 2183965fc36SVladimir Davydov goto out_free_cache; 2193965fc36SVladimir Davydov 220363a044fSVladimir Davydov err = memcg_alloc_cache_params(memcg, s, parent_cache); 2213965fc36SVladimir Davydov if (err) 2223965fc36SVladimir Davydov goto out_free_cache; 2238a13a4ccSChristoph Lameter 2248a13a4ccSChristoph Lameter err = __kmem_cache_create(s, flags); 2253965fc36SVladimir Davydov if (err) 2263965fc36SVladimir Davydov goto out_free_cache; 2273965fc36SVladimir Davydov 228cce89f4fSChristoph Lameter s->refcount = 1; 2297c9adf5aSChristoph Lameter list_add(&s->list, &slab_caches); 2301aa13254SVladimir Davydov memcg_register_cache(s); 231db265ecaSChristoph Lameter 2323965fc36SVladimir Davydov out_unlock: 23320cea968SChristoph Lameter mutex_unlock(&slab_mutex); 23420cea968SChristoph Lameter put_online_cpus(); 23520cea968SChristoph Lameter 236*ba3253c7SDave Jones if (err) { 237f717eb3aSVladimir Davydov /* 238*ba3253c7SDave Jones * There is no point in flooding logs with warnings or 239*ba3253c7SDave Jones * especially crashing the system if we fail to create a cache 240*ba3253c7SDave Jones * for a memcg. In this case we will be accounting the memcg 241*ba3253c7SDave Jones * allocation to the root cgroup until we succeed to create its 242*ba3253c7SDave Jones * own cache, but it isn't that critical. 243f717eb3aSVladimir Davydov */ 244*ba3253c7SDave Jones if (!memcg) 245*ba3253c7SDave Jones return NULL; 246*ba3253c7SDave Jones 247686d550dSChristoph Lameter if (flags & SLAB_PANIC) 248686d550dSChristoph Lameter panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", 249686d550dSChristoph Lameter name, err); 250686d550dSChristoph Lameter else { 251686d550dSChristoph Lameter printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d", 252686d550dSChristoph Lameter name, err); 253686d550dSChristoph Lameter dump_stack(); 254686d550dSChristoph Lameter } 255686d550dSChristoph Lameter return NULL; 256686d550dSChristoph Lameter } 257039363f3SChristoph Lameter return s; 2583965fc36SVladimir Davydov 2593965fc36SVladimir Davydov out_free_cache: 260363a044fSVladimir Davydov memcg_free_cache_params(s); 2613965fc36SVladimir Davydov kfree(s->name); 2623965fc36SVladimir Davydov kmem_cache_free(kmem_cache, s); 2633965fc36SVladimir Davydov goto out_unlock; 264039363f3SChristoph Lameter } 2652633d7a0SGlauber Costa 2662633d7a0SGlauber Costa struct kmem_cache * 2672633d7a0SGlauber Costa kmem_cache_create(const char *name, size_t size, size_t align, 2682633d7a0SGlauber Costa unsigned long flags, void (*ctor)(void *)) 2692633d7a0SGlauber Costa { 270943a451aSGlauber Costa return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL); 2712633d7a0SGlauber Costa } 272039363f3SChristoph Lameter EXPORT_SYMBOL(kmem_cache_create); 27397d06609SChristoph Lameter 274945cf2b6SChristoph Lameter void kmem_cache_destroy(struct kmem_cache *s) 275945cf2b6SChristoph Lameter { 2767cf27982SGlauber Costa /* Destroy all the children caches if we aren't a memcg cache */ 2777cf27982SGlauber Costa kmem_cache_destroy_memcg_children(s); 2787cf27982SGlauber Costa 279945cf2b6SChristoph Lameter get_online_cpus(); 280945cf2b6SChristoph Lameter mutex_lock(&slab_mutex); 281945cf2b6SChristoph Lameter s->refcount--; 282945cf2b6SChristoph Lameter if (!s->refcount) { 283945cf2b6SChristoph Lameter list_del(&s->list); 284945cf2b6SChristoph Lameter 285945cf2b6SChristoph Lameter if (!__kmem_cache_shutdown(s)) { 2862edefe11SVladimir Davydov memcg_unregister_cache(s); 287210ed9deSJiri Kosina mutex_unlock(&slab_mutex); 288945cf2b6SChristoph Lameter if (s->flags & SLAB_DESTROY_BY_RCU) 289945cf2b6SChristoph Lameter rcu_barrier(); 290945cf2b6SChristoph Lameter 2911aa13254SVladimir Davydov memcg_free_cache_params(s); 292db265ecaSChristoph Lameter kfree(s->name); 2938f4c765cSChristoph Lameter kmem_cache_free(kmem_cache, s); 294945cf2b6SChristoph Lameter } else { 295945cf2b6SChristoph Lameter list_add(&s->list, &slab_caches); 296210ed9deSJiri Kosina mutex_unlock(&slab_mutex); 297945cf2b6SChristoph Lameter printk(KERN_ERR "kmem_cache_destroy %s: Slab cache still has objects\n", 298945cf2b6SChristoph Lameter s->name); 299945cf2b6SChristoph Lameter dump_stack(); 300945cf2b6SChristoph Lameter } 301210ed9deSJiri Kosina } else { 302945cf2b6SChristoph Lameter mutex_unlock(&slab_mutex); 303210ed9deSJiri Kosina } 304945cf2b6SChristoph Lameter put_online_cpus(); 305945cf2b6SChristoph Lameter } 306945cf2b6SChristoph Lameter EXPORT_SYMBOL(kmem_cache_destroy); 307945cf2b6SChristoph Lameter 30897d06609SChristoph Lameter int slab_is_available(void) 30997d06609SChristoph Lameter { 31097d06609SChristoph Lameter return slab_state >= UP; 31197d06609SChristoph Lameter } 312b7454ad3SGlauber Costa 31345530c44SChristoph Lameter #ifndef CONFIG_SLOB 31445530c44SChristoph Lameter /* Create a cache during boot when no slab services are available yet */ 31545530c44SChristoph Lameter void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, 31645530c44SChristoph Lameter unsigned long flags) 31745530c44SChristoph Lameter { 31845530c44SChristoph Lameter int err; 31945530c44SChristoph Lameter 32045530c44SChristoph Lameter s->name = name; 32145530c44SChristoph Lameter s->size = s->object_size = size; 32245906855SChristoph Lameter s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); 32345530c44SChristoph Lameter err = __kmem_cache_create(s, flags); 32445530c44SChristoph Lameter 32545530c44SChristoph Lameter if (err) 32631ba7346SChristoph Lameter panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n", 32745530c44SChristoph Lameter name, size, err); 32845530c44SChristoph Lameter 32945530c44SChristoph Lameter s->refcount = -1; /* Exempt from merging for now */ 33045530c44SChristoph Lameter } 33145530c44SChristoph Lameter 33245530c44SChristoph Lameter struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, 33345530c44SChristoph Lameter unsigned long flags) 33445530c44SChristoph Lameter { 33545530c44SChristoph Lameter struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); 33645530c44SChristoph Lameter 33745530c44SChristoph Lameter if (!s) 33845530c44SChristoph Lameter panic("Out of memory when creating slab %s\n", name); 33945530c44SChristoph Lameter 34045530c44SChristoph Lameter create_boot_cache(s, name, size, flags); 34145530c44SChristoph Lameter list_add(&s->list, &slab_caches); 34245530c44SChristoph Lameter s->refcount = 1; 34345530c44SChristoph Lameter return s; 34445530c44SChristoph Lameter } 34545530c44SChristoph Lameter 3469425c58eSChristoph Lameter struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; 3479425c58eSChristoph Lameter EXPORT_SYMBOL(kmalloc_caches); 3489425c58eSChristoph Lameter 3499425c58eSChristoph Lameter #ifdef CONFIG_ZONE_DMA 3509425c58eSChristoph Lameter struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; 3519425c58eSChristoph Lameter EXPORT_SYMBOL(kmalloc_dma_caches); 3529425c58eSChristoph Lameter #endif 3539425c58eSChristoph Lameter 354f97d5f63SChristoph Lameter /* 3552c59dd65SChristoph Lameter * Conversion table for small slabs sizes / 8 to the index in the 3562c59dd65SChristoph Lameter * kmalloc array. This is necessary for slabs < 192 since we have non power 3572c59dd65SChristoph Lameter * of two cache sizes there. The size of larger slabs can be determined using 3582c59dd65SChristoph Lameter * fls. 3592c59dd65SChristoph Lameter */ 3602c59dd65SChristoph Lameter static s8 size_index[24] = { 3612c59dd65SChristoph Lameter 3, /* 8 */ 3622c59dd65SChristoph Lameter 4, /* 16 */ 3632c59dd65SChristoph Lameter 5, /* 24 */ 3642c59dd65SChristoph Lameter 5, /* 32 */ 3652c59dd65SChristoph Lameter 6, /* 40 */ 3662c59dd65SChristoph Lameter 6, /* 48 */ 3672c59dd65SChristoph Lameter 6, /* 56 */ 3682c59dd65SChristoph Lameter 6, /* 64 */ 3692c59dd65SChristoph Lameter 1, /* 72 */ 3702c59dd65SChristoph Lameter 1, /* 80 */ 3712c59dd65SChristoph Lameter 1, /* 88 */ 3722c59dd65SChristoph Lameter 1, /* 96 */ 3732c59dd65SChristoph Lameter 7, /* 104 */ 3742c59dd65SChristoph Lameter 7, /* 112 */ 3752c59dd65SChristoph Lameter 7, /* 120 */ 3762c59dd65SChristoph Lameter 7, /* 128 */ 3772c59dd65SChristoph Lameter 2, /* 136 */ 3782c59dd65SChristoph Lameter 2, /* 144 */ 3792c59dd65SChristoph Lameter 2, /* 152 */ 3802c59dd65SChristoph Lameter 2, /* 160 */ 3812c59dd65SChristoph Lameter 2, /* 168 */ 3822c59dd65SChristoph Lameter 2, /* 176 */ 3832c59dd65SChristoph Lameter 2, /* 184 */ 3842c59dd65SChristoph Lameter 2 /* 192 */ 3852c59dd65SChristoph Lameter }; 3862c59dd65SChristoph Lameter 3872c59dd65SChristoph Lameter static inline int size_index_elem(size_t bytes) 3882c59dd65SChristoph Lameter { 3892c59dd65SChristoph Lameter return (bytes - 1) / 8; 3902c59dd65SChristoph Lameter } 3912c59dd65SChristoph Lameter 3922c59dd65SChristoph Lameter /* 3932c59dd65SChristoph Lameter * Find the kmem_cache structure that serves a given size of 3942c59dd65SChristoph Lameter * allocation 3952c59dd65SChristoph Lameter */ 3962c59dd65SChristoph Lameter struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) 3972c59dd65SChristoph Lameter { 3982c59dd65SChristoph Lameter int index; 3992c59dd65SChristoph Lameter 4009de1bc87SJoonsoo Kim if (unlikely(size > KMALLOC_MAX_SIZE)) { 401907985f4SSasha Levin WARN_ON_ONCE(!(flags & __GFP_NOWARN)); 4026286ae97SChristoph Lameter return NULL; 403907985f4SSasha Levin } 4046286ae97SChristoph Lameter 4052c59dd65SChristoph Lameter if (size <= 192) { 4062c59dd65SChristoph Lameter if (!size) 4072c59dd65SChristoph Lameter return ZERO_SIZE_PTR; 4082c59dd65SChristoph Lameter 4092c59dd65SChristoph Lameter index = size_index[size_index_elem(size)]; 4102c59dd65SChristoph Lameter } else 4112c59dd65SChristoph Lameter index = fls(size - 1); 4122c59dd65SChristoph Lameter 4132c59dd65SChristoph Lameter #ifdef CONFIG_ZONE_DMA 414b1e05416SJoonsoo Kim if (unlikely((flags & GFP_DMA))) 4152c59dd65SChristoph Lameter return kmalloc_dma_caches[index]; 4162c59dd65SChristoph Lameter 4172c59dd65SChristoph Lameter #endif 4182c59dd65SChristoph Lameter return kmalloc_caches[index]; 4192c59dd65SChristoph Lameter } 4202c59dd65SChristoph Lameter 4212c59dd65SChristoph Lameter /* 422f97d5f63SChristoph Lameter * Create the kmalloc array. Some of the regular kmalloc arrays 423f97d5f63SChristoph Lameter * may already have been created because they were needed to 424f97d5f63SChristoph Lameter * enable allocations for slab creation. 425f97d5f63SChristoph Lameter */ 426f97d5f63SChristoph Lameter void __init create_kmalloc_caches(unsigned long flags) 427f97d5f63SChristoph Lameter { 428f97d5f63SChristoph Lameter int i; 429f97d5f63SChristoph Lameter 4302c59dd65SChristoph Lameter /* 4312c59dd65SChristoph Lameter * Patch up the size_index table if we have strange large alignment 4322c59dd65SChristoph Lameter * requirements for the kmalloc array. This is only the case for 4332c59dd65SChristoph Lameter * MIPS it seems. The standard arches will not generate any code here. 4342c59dd65SChristoph Lameter * 4352c59dd65SChristoph Lameter * Largest permitted alignment is 256 bytes due to the way we 4362c59dd65SChristoph Lameter * handle the index determination for the smaller caches. 4372c59dd65SChristoph Lameter * 4382c59dd65SChristoph Lameter * Make sure that nothing crazy happens if someone starts tinkering 4392c59dd65SChristoph Lameter * around with ARCH_KMALLOC_MINALIGN 4402c59dd65SChristoph Lameter */ 4412c59dd65SChristoph Lameter BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 4422c59dd65SChristoph Lameter (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 4432c59dd65SChristoph Lameter 4442c59dd65SChristoph Lameter for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { 4452c59dd65SChristoph Lameter int elem = size_index_elem(i); 4462c59dd65SChristoph Lameter 4472c59dd65SChristoph Lameter if (elem >= ARRAY_SIZE(size_index)) 4482c59dd65SChristoph Lameter break; 4492c59dd65SChristoph Lameter size_index[elem] = KMALLOC_SHIFT_LOW; 4502c59dd65SChristoph Lameter } 4512c59dd65SChristoph Lameter 4522c59dd65SChristoph Lameter if (KMALLOC_MIN_SIZE >= 64) { 4532c59dd65SChristoph Lameter /* 4542c59dd65SChristoph Lameter * The 96 byte size cache is not used if the alignment 4552c59dd65SChristoph Lameter * is 64 byte. 4562c59dd65SChristoph Lameter */ 4572c59dd65SChristoph Lameter for (i = 64 + 8; i <= 96; i += 8) 4582c59dd65SChristoph Lameter size_index[size_index_elem(i)] = 7; 4592c59dd65SChristoph Lameter 4602c59dd65SChristoph Lameter } 4612c59dd65SChristoph Lameter 4622c59dd65SChristoph Lameter if (KMALLOC_MIN_SIZE >= 128) { 4632c59dd65SChristoph Lameter /* 4642c59dd65SChristoph Lameter * The 192 byte sized cache is not used if the alignment 4652c59dd65SChristoph Lameter * is 128 byte. Redirect kmalloc to use the 256 byte cache 4662c59dd65SChristoph Lameter * instead. 4672c59dd65SChristoph Lameter */ 4682c59dd65SChristoph Lameter for (i = 128 + 8; i <= 192; i += 8) 4692c59dd65SChristoph Lameter size_index[size_index_elem(i)] = 8; 4702c59dd65SChristoph Lameter } 4718a965b3bSChristoph Lameter for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { 4728a965b3bSChristoph Lameter if (!kmalloc_caches[i]) { 473f97d5f63SChristoph Lameter kmalloc_caches[i] = create_kmalloc_cache(NULL, 474f97d5f63SChristoph Lameter 1 << i, flags); 475956e46efSChris Mason } 476f97d5f63SChristoph Lameter 4778a965b3bSChristoph Lameter /* 4788a965b3bSChristoph Lameter * Caches that are not of the two-to-the-power-of size. 4798a965b3bSChristoph Lameter * These have to be created immediately after the 4808a965b3bSChristoph Lameter * earlier power of two caches 4818a965b3bSChristoph Lameter */ 4828a965b3bSChristoph Lameter if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) 4838a965b3bSChristoph Lameter kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); 4848a965b3bSChristoph Lameter 4858a965b3bSChristoph Lameter if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) 4868a965b3bSChristoph Lameter kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags); 4878a965b3bSChristoph Lameter } 4888a965b3bSChristoph Lameter 489f97d5f63SChristoph Lameter /* Kmalloc array is now usable */ 490f97d5f63SChristoph Lameter slab_state = UP; 491f97d5f63SChristoph Lameter 492f97d5f63SChristoph Lameter for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 493f97d5f63SChristoph Lameter struct kmem_cache *s = kmalloc_caches[i]; 494f97d5f63SChristoph Lameter char *n; 495f97d5f63SChristoph Lameter 496f97d5f63SChristoph Lameter if (s) { 497f97d5f63SChristoph Lameter n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); 498f97d5f63SChristoph Lameter 499f97d5f63SChristoph Lameter BUG_ON(!n); 500f97d5f63SChristoph Lameter s->name = n; 501f97d5f63SChristoph Lameter } 502f97d5f63SChristoph Lameter } 503f97d5f63SChristoph Lameter 504f97d5f63SChristoph Lameter #ifdef CONFIG_ZONE_DMA 505f97d5f63SChristoph Lameter for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 506f97d5f63SChristoph Lameter struct kmem_cache *s = kmalloc_caches[i]; 507f97d5f63SChristoph Lameter 508f97d5f63SChristoph Lameter if (s) { 509f97d5f63SChristoph Lameter int size = kmalloc_size(i); 510f97d5f63SChristoph Lameter char *n = kasprintf(GFP_NOWAIT, 511f97d5f63SChristoph Lameter "dma-kmalloc-%d", size); 512f97d5f63SChristoph Lameter 513f97d5f63SChristoph Lameter BUG_ON(!n); 514f97d5f63SChristoph Lameter kmalloc_dma_caches[i] = create_kmalloc_cache(n, 515f97d5f63SChristoph Lameter size, SLAB_CACHE_DMA | flags); 516f97d5f63SChristoph Lameter } 517f97d5f63SChristoph Lameter } 518f97d5f63SChristoph Lameter #endif 519f97d5f63SChristoph Lameter } 52045530c44SChristoph Lameter #endif /* !CONFIG_SLOB */ 52145530c44SChristoph Lameter 522f1b6eb6eSChristoph Lameter #ifdef CONFIG_TRACING 523f1b6eb6eSChristoph Lameter void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) 524f1b6eb6eSChristoph Lameter { 525f1b6eb6eSChristoph Lameter void *ret = kmalloc_order(size, flags, order); 526f1b6eb6eSChristoph Lameter trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); 527f1b6eb6eSChristoph Lameter return ret; 528f1b6eb6eSChristoph Lameter } 529f1b6eb6eSChristoph Lameter EXPORT_SYMBOL(kmalloc_order_trace); 530f1b6eb6eSChristoph Lameter #endif 53145530c44SChristoph Lameter 532b7454ad3SGlauber Costa #ifdef CONFIG_SLABINFO 533e9b4db2bSWanpeng Li 534e9b4db2bSWanpeng Li #ifdef CONFIG_SLAB 535e9b4db2bSWanpeng Li #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) 536e9b4db2bSWanpeng Li #else 537e9b4db2bSWanpeng Li #define SLABINFO_RIGHTS S_IRUSR 538e9b4db2bSWanpeng Li #endif 539e9b4db2bSWanpeng Li 540749c5415SGlauber Costa void print_slabinfo_header(struct seq_file *m) 541bcee6e2aSGlauber Costa { 542bcee6e2aSGlauber Costa /* 543bcee6e2aSGlauber Costa * Output format version, so at least we can change it 544bcee6e2aSGlauber Costa * without _too_ many complaints. 545bcee6e2aSGlauber Costa */ 546bcee6e2aSGlauber Costa #ifdef CONFIG_DEBUG_SLAB 547bcee6e2aSGlauber Costa seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); 548bcee6e2aSGlauber Costa #else 549bcee6e2aSGlauber Costa seq_puts(m, "slabinfo - version: 2.1\n"); 550bcee6e2aSGlauber Costa #endif 551bcee6e2aSGlauber Costa seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 552bcee6e2aSGlauber Costa "<objperslab> <pagesperslab>"); 553bcee6e2aSGlauber Costa seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 554bcee6e2aSGlauber Costa seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 555bcee6e2aSGlauber Costa #ifdef CONFIG_DEBUG_SLAB 556bcee6e2aSGlauber Costa seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " 557bcee6e2aSGlauber Costa "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); 558bcee6e2aSGlauber Costa seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 559bcee6e2aSGlauber Costa #endif 560bcee6e2aSGlauber Costa seq_putc(m, '\n'); 561bcee6e2aSGlauber Costa } 562bcee6e2aSGlauber Costa 563b7454ad3SGlauber Costa static void *s_start(struct seq_file *m, loff_t *pos) 564b7454ad3SGlauber Costa { 565b7454ad3SGlauber Costa loff_t n = *pos; 566b7454ad3SGlauber Costa 567b7454ad3SGlauber Costa mutex_lock(&slab_mutex); 568b7454ad3SGlauber Costa if (!n) 569b7454ad3SGlauber Costa print_slabinfo_header(m); 570b7454ad3SGlauber Costa 571b7454ad3SGlauber Costa return seq_list_start(&slab_caches, *pos); 572b7454ad3SGlauber Costa } 573b7454ad3SGlauber Costa 574276a2439SWanpeng Li void *slab_next(struct seq_file *m, void *p, loff_t *pos) 575b7454ad3SGlauber Costa { 576b7454ad3SGlauber Costa return seq_list_next(p, &slab_caches, pos); 577b7454ad3SGlauber Costa } 578b7454ad3SGlauber Costa 579276a2439SWanpeng Li void slab_stop(struct seq_file *m, void *p) 580b7454ad3SGlauber Costa { 581b7454ad3SGlauber Costa mutex_unlock(&slab_mutex); 582b7454ad3SGlauber Costa } 583b7454ad3SGlauber Costa 584749c5415SGlauber Costa static void 585749c5415SGlauber Costa memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) 586b7454ad3SGlauber Costa { 587749c5415SGlauber Costa struct kmem_cache *c; 588749c5415SGlauber Costa struct slabinfo sinfo; 589749c5415SGlauber Costa int i; 590749c5415SGlauber Costa 591749c5415SGlauber Costa if (!is_root_cache(s)) 592749c5415SGlauber Costa return; 593749c5415SGlauber Costa 594749c5415SGlauber Costa for_each_memcg_cache_index(i) { 5952ade4de8SQiang Huang c = cache_from_memcg_idx(s, i); 596749c5415SGlauber Costa if (!c) 597749c5415SGlauber Costa continue; 598749c5415SGlauber Costa 599749c5415SGlauber Costa memset(&sinfo, 0, sizeof(sinfo)); 600749c5415SGlauber Costa get_slabinfo(c, &sinfo); 601749c5415SGlauber Costa 602749c5415SGlauber Costa info->active_slabs += sinfo.active_slabs; 603749c5415SGlauber Costa info->num_slabs += sinfo.num_slabs; 604749c5415SGlauber Costa info->shared_avail += sinfo.shared_avail; 605749c5415SGlauber Costa info->active_objs += sinfo.active_objs; 606749c5415SGlauber Costa info->num_objs += sinfo.num_objs; 607749c5415SGlauber Costa } 608749c5415SGlauber Costa } 609749c5415SGlauber Costa 610749c5415SGlauber Costa int cache_show(struct kmem_cache *s, struct seq_file *m) 611749c5415SGlauber Costa { 6120d7561c6SGlauber Costa struct slabinfo sinfo; 6130d7561c6SGlauber Costa 6140d7561c6SGlauber Costa memset(&sinfo, 0, sizeof(sinfo)); 6150d7561c6SGlauber Costa get_slabinfo(s, &sinfo); 6160d7561c6SGlauber Costa 617749c5415SGlauber Costa memcg_accumulate_slabinfo(s, &sinfo); 618749c5415SGlauber Costa 6190d7561c6SGlauber Costa seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 620749c5415SGlauber Costa cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, 6210d7561c6SGlauber Costa sinfo.objects_per_slab, (1 << sinfo.cache_order)); 6220d7561c6SGlauber Costa 6230d7561c6SGlauber Costa seq_printf(m, " : tunables %4u %4u %4u", 6240d7561c6SGlauber Costa sinfo.limit, sinfo.batchcount, sinfo.shared); 6250d7561c6SGlauber Costa seq_printf(m, " : slabdata %6lu %6lu %6lu", 6260d7561c6SGlauber Costa sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail); 6270d7561c6SGlauber Costa slabinfo_show_stats(m, s); 6280d7561c6SGlauber Costa seq_putc(m, '\n'); 6290d7561c6SGlauber Costa return 0; 630b7454ad3SGlauber Costa } 631b7454ad3SGlauber Costa 632749c5415SGlauber Costa static int s_show(struct seq_file *m, void *p) 633749c5415SGlauber Costa { 634749c5415SGlauber Costa struct kmem_cache *s = list_entry(p, struct kmem_cache, list); 635749c5415SGlauber Costa 636749c5415SGlauber Costa if (!is_root_cache(s)) 637749c5415SGlauber Costa return 0; 638749c5415SGlauber Costa return cache_show(s, m); 639749c5415SGlauber Costa } 640749c5415SGlauber Costa 641b7454ad3SGlauber Costa /* 642b7454ad3SGlauber Costa * slabinfo_op - iterator that generates /proc/slabinfo 643b7454ad3SGlauber Costa * 644b7454ad3SGlauber Costa * Output layout: 645b7454ad3SGlauber Costa * cache-name 646b7454ad3SGlauber Costa * num-active-objs 647b7454ad3SGlauber Costa * total-objs 648b7454ad3SGlauber Costa * object size 649b7454ad3SGlauber Costa * num-active-slabs 650b7454ad3SGlauber Costa * total-slabs 651b7454ad3SGlauber Costa * num-pages-per-slab 652b7454ad3SGlauber Costa * + further values on SMP and with statistics enabled 653b7454ad3SGlauber Costa */ 654b7454ad3SGlauber Costa static const struct seq_operations slabinfo_op = { 655b7454ad3SGlauber Costa .start = s_start, 656276a2439SWanpeng Li .next = slab_next, 657276a2439SWanpeng Li .stop = slab_stop, 658b7454ad3SGlauber Costa .show = s_show, 659b7454ad3SGlauber Costa }; 660b7454ad3SGlauber Costa 661b7454ad3SGlauber Costa static int slabinfo_open(struct inode *inode, struct file *file) 662b7454ad3SGlauber Costa { 663b7454ad3SGlauber Costa return seq_open(file, &slabinfo_op); 664b7454ad3SGlauber Costa } 665b7454ad3SGlauber Costa 666b7454ad3SGlauber Costa static const struct file_operations proc_slabinfo_operations = { 667b7454ad3SGlauber Costa .open = slabinfo_open, 668b7454ad3SGlauber Costa .read = seq_read, 669b7454ad3SGlauber Costa .write = slabinfo_write, 670b7454ad3SGlauber Costa .llseek = seq_lseek, 671b7454ad3SGlauber Costa .release = seq_release, 672b7454ad3SGlauber Costa }; 673b7454ad3SGlauber Costa 674b7454ad3SGlauber Costa static int __init slab_proc_init(void) 675b7454ad3SGlauber Costa { 676e9b4db2bSWanpeng Li proc_create("slabinfo", SLABINFO_RIGHTS, NULL, 677e9b4db2bSWanpeng Li &proc_slabinfo_operations); 678b7454ad3SGlauber Costa return 0; 679b7454ad3SGlauber Costa } 680b7454ad3SGlauber Costa module_init(slab_proc_init); 681b7454ad3SGlauber Costa #endif /* CONFIG_SLABINFO */ 682