1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Contiguous Memory Allocator
4  *
5  * Copyright (c) 2010-2011 by Samsung Electronics.
6  * Copyright IBM Corporation, 2013
7  * Copyright LG Electronics Inc., 2014
8  * Written by:
9  *	Marek Szyprowski <m.szyprowski@samsung.com>
10  *	Michal Nazarewicz <mina86@mina86.com>
11  *	Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
12  *	Joonsoo Kim <iamjoonsoo.kim@lge.com>
13  */
14 
15 #define pr_fmt(fmt) "cma: " fmt
16 
17 #define CREATE_TRACE_POINTS
18 
19 #include <linux/memblock.h>
20 #include <linux/err.h>
21 #include <linux/list.h>
22 #include <linux/mm.h>
23 #include <linux/sizes.h>
24 #include <linux/slab.h>
25 #include <linux/log2.h>
26 #include <linux/cma.h>
27 #include <linux/highmem.h>
28 #include <linux/io.h>
29 #include <linux/kmemleak.h>
30 #include <trace/events/cma.h>
31 
32 #include "internal.h"
33 #include "cma.h"
34 
35 struct cma cma_areas[MAX_CMA_AREAS];
36 unsigned int cma_area_count;
37 
38 static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
39 			phys_addr_t size, phys_addr_t limit,
40 			phys_addr_t alignment, unsigned int order_per_bit,
41 			bool fixed, const char *name, struct cma **res_cma,
42 			int nid);
43 
cma_get_base(const struct cma * cma)44 phys_addr_t cma_get_base(const struct cma *cma)
45 {
46 	WARN_ON_ONCE(cma->nranges != 1);
47 	return PFN_PHYS(cma->ranges[0].base_pfn);
48 }
49 
cma_get_size(const struct cma * cma)50 unsigned long cma_get_size(const struct cma *cma)
51 {
52 	return cma->count << PAGE_SHIFT;
53 }
54 
cma_get_name(const struct cma * cma)55 const char *cma_get_name(const struct cma *cma)
56 {
57 	return cma->name;
58 }
59 
cma_bitmap_aligned_mask(const struct cma * cma,unsigned int align_order)60 static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
61 					     unsigned int align_order)
62 {
63 	if (align_order <= cma->order_per_bit)
64 		return 0;
65 	return (1UL << (align_order - cma->order_per_bit)) - 1;
66 }
67 
68 /*
69  * Find the offset of the base PFN from the specified align_order.
70  * The value returned is represented in order_per_bits.
71  */
cma_bitmap_aligned_offset(const struct cma * cma,const struct cma_memrange * cmr,unsigned int align_order)72 static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
73 					       const struct cma_memrange *cmr,
74 					       unsigned int align_order)
75 {
76 	return (cmr->base_pfn & ((1UL << align_order) - 1))
77 		>> cma->order_per_bit;
78 }
79 
cma_bitmap_pages_to_bits(const struct cma * cma,unsigned long pages)80 static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
81 					      unsigned long pages)
82 {
83 	return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit;
84 }
85 
cma_clear_bitmap(struct cma * cma,const struct cma_memrange * cmr,unsigned long pfn,unsigned long count)86 static void cma_clear_bitmap(struct cma *cma, const struct cma_memrange *cmr,
87 			     unsigned long pfn, unsigned long count)
88 {
89 	unsigned long bitmap_no, bitmap_count;
90 	unsigned long flags;
91 
92 	bitmap_no = (pfn - cmr->base_pfn) >> cma->order_per_bit;
93 	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
94 
95 	spin_lock_irqsave(&cma->lock, flags);
96 	bitmap_clear(cmr->bitmap, bitmap_no, bitmap_count);
97 	cma->available_count += count;
98 	spin_unlock_irqrestore(&cma->lock, flags);
99 }
100 
101 /*
102  * Check if a CMA area contains no ranges that intersect with
103  * multiple zones. Store the result in the flags in case
104  * this gets called more than once.
105  */
cma_validate_zones(struct cma * cma)106 bool cma_validate_zones(struct cma *cma)
107 {
108 	int r;
109 	unsigned long base_pfn;
110 	struct cma_memrange *cmr;
111 	bool valid_bit_set;
112 
113 	/*
114 	 * If already validated, return result of previous check.
115 	 * Either the valid or invalid bit will be set if this
116 	 * check has already been done. If neither is set, the
117 	 * check has not been performed yet.
118 	 */
119 	valid_bit_set = test_bit(CMA_ZONES_VALID, &cma->flags);
120 	if (valid_bit_set || test_bit(CMA_ZONES_INVALID, &cma->flags))
121 		return valid_bit_set;
122 
123 	for (r = 0; r < cma->nranges; r++) {
124 		cmr = &cma->ranges[r];
125 		base_pfn = cmr->base_pfn;
126 
127 		/*
128 		 * alloc_contig_range() requires the pfn range specified
129 		 * to be in the same zone. Simplify by forcing the entire
130 		 * CMA resv range to be in the same zone.
131 		 */
132 		WARN_ON_ONCE(!pfn_valid(base_pfn));
133 		if (pfn_range_intersects_zones(cma->nid, base_pfn, cmr->count)) {
134 			set_bit(CMA_ZONES_INVALID, &cma->flags);
135 			return false;
136 		}
137 	}
138 
139 	set_bit(CMA_ZONES_VALID, &cma->flags);
140 
141 	return true;
142 }
143 
cma_activate_area(struct cma * cma)144 static void __init cma_activate_area(struct cma *cma)
145 {
146 	unsigned long pfn, end_pfn;
147 	int allocrange, r;
148 	struct cma_memrange *cmr;
149 	unsigned long bitmap_count, count;
150 
151 	for (allocrange = 0; allocrange < cma->nranges; allocrange++) {
152 		cmr = &cma->ranges[allocrange];
153 		cmr->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma, cmr),
154 					    GFP_KERNEL);
155 		if (!cmr->bitmap)
156 			goto cleanup;
157 	}
158 
159 	if (!cma_validate_zones(cma))
160 		goto cleanup;
161 
162 	for (r = 0; r < cma->nranges; r++) {
163 		cmr = &cma->ranges[r];
164 		if (cmr->early_pfn != cmr->base_pfn) {
165 			count = cmr->early_pfn - cmr->base_pfn;
166 			bitmap_count = cma_bitmap_pages_to_bits(cma, count);
167 			bitmap_set(cmr->bitmap, 0, bitmap_count);
168 		}
169 
170 		for (pfn = cmr->early_pfn; pfn < cmr->base_pfn + cmr->count;
171 		     pfn += pageblock_nr_pages)
172 			init_cma_reserved_pageblock(pfn_to_page(pfn));
173 	}
174 
175 	spin_lock_init(&cma->lock);
176 
177 	mutex_init(&cma->alloc_mutex);
178 
179 #ifdef CONFIG_CMA_DEBUGFS
180 	INIT_HLIST_HEAD(&cma->mem_head);
181 	spin_lock_init(&cma->mem_head_lock);
182 #endif
183 	set_bit(CMA_ACTIVATED, &cma->flags);
184 
185 	return;
186 
187 cleanup:
188 	for (r = 0; r < allocrange; r++)
189 		bitmap_free(cma->ranges[r].bitmap);
190 
191 	/* Expose all pages to the buddy, they are useless for CMA. */
192 	if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) {
193 		for (r = 0; r < allocrange; r++) {
194 			cmr = &cma->ranges[r];
195 			end_pfn = cmr->base_pfn + cmr->count;
196 			for (pfn = cmr->early_pfn; pfn < end_pfn; pfn++)
197 				free_reserved_page(pfn_to_page(pfn));
198 		}
199 	}
200 	totalcma_pages -= cma->count;
201 	cma->available_count = cma->count = 0;
202 	pr_err("CMA area %s could not be activated\n", cma->name);
203 }
204 
cma_init_reserved_areas(void)205 static int __init cma_init_reserved_areas(void)
206 {
207 	int i;
208 
209 	for (i = 0; i < cma_area_count; i++)
210 		cma_activate_area(&cma_areas[i]);
211 
212 	return 0;
213 }
214 core_initcall(cma_init_reserved_areas);
215 
cma_reserve_pages_on_error(struct cma * cma)216 void __init cma_reserve_pages_on_error(struct cma *cma)
217 {
218 	set_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags);
219 }
220 
cma_new_area(const char * name,phys_addr_t size,unsigned int order_per_bit,struct cma ** res_cma)221 static int __init cma_new_area(const char *name, phys_addr_t size,
222 			       unsigned int order_per_bit,
223 			       struct cma **res_cma)
224 {
225 	struct cma *cma;
226 
227 	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
228 		pr_err("Not enough slots for CMA reserved regions!\n");
229 		return -ENOSPC;
230 	}
231 
232 	/*
233 	 * Each reserved area must be initialised later, when more kernel
234 	 * subsystems (like slab allocator) are available.
235 	 */
236 	cma = &cma_areas[cma_area_count];
237 	cma_area_count++;
238 
239 	if (name)
240 		snprintf(cma->name, CMA_MAX_NAME, "%s", name);
241 	else
242 		snprintf(cma->name, CMA_MAX_NAME,  "cma%d\n", cma_area_count);
243 
244 	cma->available_count = cma->count = size >> PAGE_SHIFT;
245 	cma->order_per_bit = order_per_bit;
246 	*res_cma = cma;
247 	totalcma_pages += cma->count;
248 
249 	return 0;
250 }
251 
cma_drop_area(struct cma * cma)252 static void __init cma_drop_area(struct cma *cma)
253 {
254 	totalcma_pages -= cma->count;
255 	cma_area_count--;
256 }
257 
258 /**
259  * cma_init_reserved_mem() - create custom contiguous area from reserved memory
260  * @base: Base address of the reserved area
261  * @size: Size of the reserved area (in bytes),
262  * @order_per_bit: Order of pages represented by one bit on bitmap.
263  * @name: The name of the area. If this parameter is NULL, the name of
264  *        the area will be set to "cmaN", where N is a running counter of
265  *        used areas.
266  * @res_cma: Pointer to store the created cma region.
267  *
268  * This function creates custom contiguous area from already reserved memory.
269  */
cma_init_reserved_mem(phys_addr_t base,phys_addr_t size,unsigned int order_per_bit,const char * name,struct cma ** res_cma)270 int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
271 				 unsigned int order_per_bit,
272 				 const char *name,
273 				 struct cma **res_cma)
274 {
275 	struct cma *cma;
276 	int ret;
277 
278 	/* Sanity checks */
279 	if (!size || !memblock_is_region_reserved(base, size))
280 		return -EINVAL;
281 
282 	/*
283 	 * CMA uses CMA_MIN_ALIGNMENT_BYTES as alignment requirement which
284 	 * needs pageblock_order to be initialized. Let's enforce it.
285 	 */
286 	if (!pageblock_order) {
287 		pr_err("pageblock_order not yet initialized. Called during early boot?\n");
288 		return -EINVAL;
289 	}
290 
291 	/* ensure minimal alignment required by mm core */
292 	if (!IS_ALIGNED(base | size, CMA_MIN_ALIGNMENT_BYTES))
293 		return -EINVAL;
294 
295 	ret = cma_new_area(name, size, order_per_bit, &cma);
296 	if (ret != 0)
297 		return ret;
298 
299 	cma->ranges[0].base_pfn = PFN_DOWN(base);
300 	cma->ranges[0].early_pfn = PFN_DOWN(base);
301 	cma->ranges[0].count = cma->count;
302 	cma->nranges = 1;
303 	cma->nid = NUMA_NO_NODE;
304 
305 	*res_cma = cma;
306 
307 	return 0;
308 }
309 
310 /*
311  * Structure used while walking physical memory ranges and finding out
312  * which one(s) to use for a CMA area.
313  */
314 struct cma_init_memrange {
315 	phys_addr_t base;
316 	phys_addr_t size;
317 	struct list_head list;
318 };
319 
320 /*
321  * Work array used during CMA initialization.
322  */
323 static struct cma_init_memrange memranges[CMA_MAX_RANGES] __initdata;
324 
revsizecmp(struct cma_init_memrange * mlp,struct cma_init_memrange * mrp)325 static bool __init revsizecmp(struct cma_init_memrange *mlp,
326 			      struct cma_init_memrange *mrp)
327 {
328 	return mlp->size > mrp->size;
329 }
330 
basecmp(struct cma_init_memrange * mlp,struct cma_init_memrange * mrp)331 static bool __init basecmp(struct cma_init_memrange *mlp,
332 			   struct cma_init_memrange *mrp)
333 {
334 	return mlp->base < mrp->base;
335 }
336 
337 /*
338  * Helper function to create sorted lists.
339  */
list_insert_sorted(struct list_head * ranges,struct cma_init_memrange * mrp,bool (* cmp)(struct cma_init_memrange * lh,struct cma_init_memrange * rh))340 static void __init list_insert_sorted(
341 	struct list_head *ranges,
342 	struct cma_init_memrange *mrp,
343 	bool (*cmp)(struct cma_init_memrange *lh, struct cma_init_memrange *rh))
344 {
345 	struct list_head *mp;
346 	struct cma_init_memrange *mlp;
347 
348 	if (list_empty(ranges))
349 		list_add(&mrp->list, ranges);
350 	else {
351 		list_for_each(mp, ranges) {
352 			mlp = list_entry(mp, struct cma_init_memrange, list);
353 			if (cmp(mlp, mrp))
354 				break;
355 		}
356 		__list_add(&mrp->list, mlp->list.prev, &mlp->list);
357 	}
358 }
359 
360 /*
361  * Create CMA areas with a total size of @total_size. A normal allocation
362  * for one area is tried first. If that fails, the biggest memblock
363  * ranges above 4G are selected, and allocated bottom up.
364  *
365  * The complexity here is not great, but this function will only be
366  * called during boot, and the lists operated on have fewer than
367  * CMA_MAX_RANGES elements (default value: 8).
368  */
cma_declare_contiguous_multi(phys_addr_t total_size,phys_addr_t align,unsigned int order_per_bit,const char * name,struct cma ** res_cma,int nid)369 int __init cma_declare_contiguous_multi(phys_addr_t total_size,
370 			phys_addr_t align, unsigned int order_per_bit,
371 			const char *name, struct cma **res_cma, int nid)
372 {
373 	phys_addr_t start = 0, end;
374 	phys_addr_t size, sizesum, sizeleft;
375 	struct cma_init_memrange *mrp, *mlp, *failed;
376 	struct cma_memrange *cmrp;
377 	LIST_HEAD(ranges);
378 	LIST_HEAD(final_ranges);
379 	struct list_head *mp, *next;
380 	int ret, nr = 1;
381 	u64 i;
382 	struct cma *cma;
383 
384 	/*
385 	 * First, try it the normal way, producing just one range.
386 	 */
387 	ret = __cma_declare_contiguous_nid(&start, total_size, 0, align,
388 			order_per_bit, false, name, res_cma, nid);
389 	if (ret != -ENOMEM)
390 		goto out;
391 
392 	/*
393 	 * Couldn't find one range that fits our needs, so try multiple
394 	 * ranges.
395 	 *
396 	 * No need to do the alignment checks here, the call to
397 	 * cma_declare_contiguous_nid above would have caught
398 	 * any issues. With the checks, we know that:
399 	 *
400 	 * - @align is a power of 2
401 	 * - @align is >= pageblock alignment
402 	 * - @size is aligned to @align and to @order_per_bit
403 	 *
404 	 * So, as long as we create ranges that have a base
405 	 * aligned to @align, and a size that is aligned to
406 	 * both @align and @order_to_bit, things will work out.
407 	 */
408 	nr = 0;
409 	sizesum = 0;
410 	failed = NULL;
411 
412 	ret = cma_new_area(name, total_size, order_per_bit, &cma);
413 	if (ret != 0)
414 		goto out;
415 
416 	align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
417 	/*
418 	 * Create a list of ranges above 4G, largest range first.
419 	 */
420 	for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) {
421 		if (upper_32_bits(start) == 0)
422 			continue;
423 
424 		start = ALIGN(start, align);
425 		if (start >= end)
426 			continue;
427 
428 		end = ALIGN_DOWN(end, align);
429 		if (end <= start)
430 			continue;
431 
432 		size = end - start;
433 		size = ALIGN_DOWN(size, (PAGE_SIZE << order_per_bit));
434 		if (!size)
435 			continue;
436 		sizesum += size;
437 
438 		pr_debug("consider %016llx - %016llx\n", (u64)start, (u64)end);
439 
440 		/*
441 		 * If we don't yet have used the maximum number of
442 		 * areas, grab a new one.
443 		 *
444 		 * If we can't use anymore, see if this range is not
445 		 * smaller than the smallest one already recorded. If
446 		 * not, re-use the smallest element.
447 		 */
448 		if (nr < CMA_MAX_RANGES)
449 			mrp = &memranges[nr++];
450 		else {
451 			mrp = list_last_entry(&ranges,
452 					      struct cma_init_memrange, list);
453 			if (size < mrp->size)
454 				continue;
455 			list_del(&mrp->list);
456 			sizesum -= mrp->size;
457 			pr_debug("deleted %016llx - %016llx from the list\n",
458 				(u64)mrp->base, (u64)mrp->base + size);
459 		}
460 		mrp->base = start;
461 		mrp->size = size;
462 
463 		/*
464 		 * Now do a sorted insert.
465 		 */
466 		list_insert_sorted(&ranges, mrp, revsizecmp);
467 		pr_debug("added %016llx - %016llx to the list\n",
468 		    (u64)mrp->base, (u64)mrp->base + size);
469 		pr_debug("total size now %llu\n", (u64)sizesum);
470 	}
471 
472 	/*
473 	 * There is not enough room in the CMA_MAX_RANGES largest
474 	 * ranges, so bail out.
475 	 */
476 	if (sizesum < total_size) {
477 		cma_drop_area(cma);
478 		ret = -ENOMEM;
479 		goto out;
480 	}
481 
482 	/*
483 	 * Found ranges that provide enough combined space.
484 	 * Now, sorted them by address, smallest first, because we
485 	 * want to mimic a bottom-up memblock allocation.
486 	 */
487 	sizesum = 0;
488 	list_for_each_safe(mp, next, &ranges) {
489 		mlp = list_entry(mp, struct cma_init_memrange, list);
490 		list_del(mp);
491 		list_insert_sorted(&final_ranges, mlp, basecmp);
492 		sizesum += mlp->size;
493 		if (sizesum >= total_size)
494 			break;
495 	}
496 
497 	/*
498 	 * Walk the final list, and add a CMA range for
499 	 * each range, possibly not using the last one fully.
500 	 */
501 	nr = 0;
502 	sizeleft = total_size;
503 	list_for_each(mp, &final_ranges) {
504 		mlp = list_entry(mp, struct cma_init_memrange, list);
505 		size = min(sizeleft, mlp->size);
506 		if (memblock_reserve(mlp->base, size)) {
507 			/*
508 			 * Unexpected error. Could go on to
509 			 * the next one, but just abort to
510 			 * be safe.
511 			 */
512 			failed = mlp;
513 			break;
514 		}
515 
516 		pr_debug("created region %d: %016llx - %016llx\n",
517 		    nr, (u64)mlp->base, (u64)mlp->base + size);
518 		cmrp = &cma->ranges[nr++];
519 		cmrp->base_pfn = PHYS_PFN(mlp->base);
520 		cmrp->early_pfn = cmrp->base_pfn;
521 		cmrp->count = size >> PAGE_SHIFT;
522 
523 		sizeleft -= size;
524 		if (sizeleft == 0)
525 			break;
526 	}
527 
528 	if (failed) {
529 		list_for_each(mp, &final_ranges) {
530 			mlp = list_entry(mp, struct cma_init_memrange, list);
531 			if (mlp == failed)
532 				break;
533 			memblock_phys_free(mlp->base, mlp->size);
534 		}
535 		cma_drop_area(cma);
536 		ret = -ENOMEM;
537 		goto out;
538 	}
539 
540 	cma->nranges = nr;
541 	cma->nid = nid;
542 	*res_cma = cma;
543 
544 out:
545 	if (ret != 0)
546 		pr_err("Failed to reserve %lu MiB\n",
547 			(unsigned long)total_size / SZ_1M);
548 	else
549 		pr_info("Reserved %lu MiB in %d range%s\n",
550 			(unsigned long)total_size / SZ_1M, nr,
551 			nr > 1 ? "s" : "");
552 	return ret;
553 }
554 
555 /**
556  * cma_declare_contiguous_nid() - reserve custom contiguous area
557  * @base: Base address of the reserved area optional, use 0 for any
558  * @size: Size of the reserved area (in bytes),
559  * @limit: End address of the reserved memory (optional, 0 for any).
560  * @alignment: Alignment for the CMA area, should be power of 2 or zero
561  * @order_per_bit: Order of pages represented by one bit on bitmap.
562  * @fixed: hint about where to place the reserved area
563  * @name: The name of the area. See function cma_init_reserved_mem()
564  * @res_cma: Pointer to store the created cma region.
565  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
566  *
567  * This function reserves memory from early allocator. It should be
568  * called by arch specific code once the early allocator (memblock or bootmem)
569  * has been activated and all other subsystems have already allocated/reserved
570  * memory. This function allows to create custom reserved areas.
571  *
572  * If @fixed is true, reserve contiguous area at exactly @base.  If false,
573  * reserve in range from @base to @limit.
574  */
cma_declare_contiguous_nid(phys_addr_t base,phys_addr_t size,phys_addr_t limit,phys_addr_t alignment,unsigned int order_per_bit,bool fixed,const char * name,struct cma ** res_cma,int nid)575 int __init cma_declare_contiguous_nid(phys_addr_t base,
576 			phys_addr_t size, phys_addr_t limit,
577 			phys_addr_t alignment, unsigned int order_per_bit,
578 			bool fixed, const char *name, struct cma **res_cma,
579 			int nid)
580 {
581 	int ret;
582 
583 	ret = __cma_declare_contiguous_nid(&base, size, limit, alignment,
584 			order_per_bit, fixed, name, res_cma, nid);
585 	if (ret != 0)
586 		pr_err("Failed to reserve %ld MiB\n",
587 				(unsigned long)size / SZ_1M);
588 	else
589 		pr_info("Reserved %ld MiB at %pa\n",
590 				(unsigned long)size / SZ_1M, &base);
591 
592 	return ret;
593 }
594 
__cma_declare_contiguous_nid(phys_addr_t * basep,phys_addr_t size,phys_addr_t limit,phys_addr_t alignment,unsigned int order_per_bit,bool fixed,const char * name,struct cma ** res_cma,int nid)595 static int __init __cma_declare_contiguous_nid(phys_addr_t *basep,
596 			phys_addr_t size, phys_addr_t limit,
597 			phys_addr_t alignment, unsigned int order_per_bit,
598 			bool fixed, const char *name, struct cma **res_cma,
599 			int nid)
600 {
601 	phys_addr_t memblock_end = memblock_end_of_DRAM();
602 	phys_addr_t highmem_start, base = *basep;
603 	int ret;
604 
605 	/*
606 	 * We can't use __pa(high_memory) directly, since high_memory
607 	 * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly)
608 	 * complain. Find the boundary by adding one to the last valid
609 	 * address.
610 	 */
611 	if (IS_ENABLED(CONFIG_HIGHMEM))
612 		highmem_start = __pa(high_memory - 1) + 1;
613 	else
614 		highmem_start = memblock_end_of_DRAM();
615 	pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n",
616 		__func__, &size, &base, &limit, &alignment);
617 
618 	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
619 		pr_err("Not enough slots for CMA reserved regions!\n");
620 		return -ENOSPC;
621 	}
622 
623 	if (!size)
624 		return -EINVAL;
625 
626 	if (alignment && !is_power_of_2(alignment))
627 		return -EINVAL;
628 
629 	if (!IS_ENABLED(CONFIG_NUMA))
630 		nid = NUMA_NO_NODE;
631 
632 	/* Sanitise input arguments. */
633 	alignment = max_t(phys_addr_t, alignment, CMA_MIN_ALIGNMENT_BYTES);
634 	if (fixed && base & (alignment - 1)) {
635 		pr_err("Region at %pa must be aligned to %pa bytes\n",
636 			&base, &alignment);
637 		return -EINVAL;
638 	}
639 	base = ALIGN(base, alignment);
640 	size = ALIGN(size, alignment);
641 	limit &= ~(alignment - 1);
642 
643 	if (!base)
644 		fixed = false;
645 
646 	/* size should be aligned with order_per_bit */
647 	if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit))
648 		return -EINVAL;
649 
650 	/*
651 	 * If allocating at a fixed base the request region must not cross the
652 	 * low/high memory boundary.
653 	 */
654 	if (fixed && base < highmem_start && base + size > highmem_start) {
655 		pr_err("Region at %pa defined on low/high memory boundary (%pa)\n",
656 			&base, &highmem_start);
657 		return -EINVAL;
658 	}
659 
660 	/*
661 	 * If the limit is unspecified or above the memblock end, its effective
662 	 * value will be the memblock end. Set it explicitly to simplify further
663 	 * checks.
664 	 */
665 	if (limit == 0 || limit > memblock_end)
666 		limit = memblock_end;
667 
668 	if (base + size > limit) {
669 		pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n",
670 			&size, &base, &limit);
671 		return -EINVAL;
672 	}
673 
674 	/* Reserve memory */
675 	if (fixed) {
676 		if (memblock_is_region_reserved(base, size) ||
677 		    memblock_reserve(base, size) < 0) {
678 			return -EBUSY;
679 		}
680 	} else {
681 		phys_addr_t addr = 0;
682 
683 		/*
684 		 * If there is enough memory, try a bottom-up allocation first.
685 		 * It will place the new cma area close to the start of the node
686 		 * and guarantee that the compaction is moving pages out of the
687 		 * cma area and not into it.
688 		 * Avoid using first 4GB to not interfere with constrained zones
689 		 * like DMA/DMA32.
690 		 */
691 #ifdef CONFIG_PHYS_ADDR_T_64BIT
692 		if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) {
693 			memblock_set_bottom_up(true);
694 			addr = memblock_alloc_range_nid(size, alignment, SZ_4G,
695 							limit, nid, true);
696 			memblock_set_bottom_up(false);
697 		}
698 #endif
699 
700 		/*
701 		 * All pages in the reserved area must come from the same zone.
702 		 * If the requested region crosses the low/high memory boundary,
703 		 * try allocating from high memory first and fall back to low
704 		 * memory in case of failure.
705 		 */
706 		if (!addr && base < highmem_start && limit > highmem_start) {
707 			addr = memblock_alloc_range_nid(size, alignment,
708 					highmem_start, limit, nid, true);
709 			limit = highmem_start;
710 		}
711 
712 		if (!addr) {
713 			addr = memblock_alloc_range_nid(size, alignment, base,
714 					limit, nid, true);
715 			if (!addr)
716 				return -ENOMEM;
717 		}
718 
719 		/*
720 		 * kmemleak scans/reads tracked objects for pointers to other
721 		 * objects but this address isn't mapped and accessible
722 		 */
723 		kmemleak_ignore_phys(addr);
724 		base = addr;
725 	}
726 
727 	ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma);
728 	if (ret) {
729 		memblock_phys_free(base, size);
730 		return ret;
731 	}
732 
733 	(*res_cma)->nid = nid;
734 	*basep = base;
735 
736 	return 0;
737 }
738 
cma_debug_show_areas(struct cma * cma)739 static void cma_debug_show_areas(struct cma *cma)
740 {
741 	unsigned long next_zero_bit, next_set_bit, nr_zero;
742 	unsigned long start;
743 	unsigned long nr_part;
744 	unsigned long nbits;
745 	int r;
746 	struct cma_memrange *cmr;
747 
748 	spin_lock_irq(&cma->lock);
749 	pr_info("number of available pages: ");
750 	for (r = 0; r < cma->nranges; r++) {
751 		cmr = &cma->ranges[r];
752 
753 		start = 0;
754 		nbits = cma_bitmap_maxno(cma, cmr);
755 
756 		pr_info("range %d: ", r);
757 		for (;;) {
758 			next_zero_bit = find_next_zero_bit(cmr->bitmap,
759 							   nbits, start);
760 			if (next_zero_bit >= nbits)
761 				break;
762 			next_set_bit = find_next_bit(cmr->bitmap, nbits,
763 						     next_zero_bit);
764 			nr_zero = next_set_bit - next_zero_bit;
765 			nr_part = nr_zero << cma->order_per_bit;
766 			pr_cont("%s%lu@%lu", start ? "+" : "", nr_part,
767 				next_zero_bit);
768 			start = next_zero_bit + nr_zero;
769 		}
770 		pr_info("\n");
771 	}
772 	pr_cont("=> %lu free of %lu total pages\n", cma->available_count,
773 			cma->count);
774 	spin_unlock_irq(&cma->lock);
775 }
776 
cma_range_alloc(struct cma * cma,struct cma_memrange * cmr,unsigned long count,unsigned int align,struct page ** pagep,gfp_t gfp)777 static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
778 				unsigned long count, unsigned int align,
779 				struct page **pagep, gfp_t gfp)
780 {
781 	unsigned long mask, offset;
782 	unsigned long pfn = -1;
783 	unsigned long start = 0;
784 	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
785 	int ret = -EBUSY;
786 	struct page *page = NULL;
787 
788 	mask = cma_bitmap_aligned_mask(cma, align);
789 	offset = cma_bitmap_aligned_offset(cma, cmr, align);
790 	bitmap_maxno = cma_bitmap_maxno(cma, cmr);
791 	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
792 
793 	if (bitmap_count > bitmap_maxno)
794 		goto out;
795 
796 	for (;;) {
797 		spin_lock_irq(&cma->lock);
798 		/*
799 		 * If the request is larger than the available number
800 		 * of pages, stop right away.
801 		 */
802 		if (count > cma->available_count) {
803 			spin_unlock_irq(&cma->lock);
804 			break;
805 		}
806 		bitmap_no = bitmap_find_next_zero_area_off(cmr->bitmap,
807 				bitmap_maxno, start, bitmap_count, mask,
808 				offset);
809 		if (bitmap_no >= bitmap_maxno) {
810 			spin_unlock_irq(&cma->lock);
811 			break;
812 		}
813 		bitmap_set(cmr->bitmap, bitmap_no, bitmap_count);
814 		cma->available_count -= count;
815 		/*
816 		 * It's safe to drop the lock here. We've marked this region for
817 		 * our exclusive use. If the migration fails we will take the
818 		 * lock again and unmark it.
819 		 */
820 		spin_unlock_irq(&cma->lock);
821 
822 		pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
823 		mutex_lock(&cma->alloc_mutex);
824 		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA, gfp);
825 		mutex_unlock(&cma->alloc_mutex);
826 		if (ret == 0) {
827 			page = pfn_to_page(pfn);
828 			break;
829 		}
830 
831 		cma_clear_bitmap(cma, cmr, pfn, count);
832 		if (ret != -EBUSY)
833 			break;
834 
835 		pr_debug("%s(): memory range at pfn 0x%lx %p is busy, retrying\n",
836 			 __func__, pfn, pfn_to_page(pfn));
837 
838 		trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
839 					   count, align);
840 		/* try again with a bit different memory target */
841 		start = bitmap_no + mask + 1;
842 	}
843 out:
844 	*pagep = page;
845 	return ret;
846 }
847 
__cma_alloc(struct cma * cma,unsigned long count,unsigned int align,gfp_t gfp)848 static struct page *__cma_alloc(struct cma *cma, unsigned long count,
849 		       unsigned int align, gfp_t gfp)
850 {
851 	struct page *page = NULL;
852 	int ret = -ENOMEM, r;
853 	unsigned long i;
854 	const char *name = cma ? cma->name : NULL;
855 
856 	trace_cma_alloc_start(name, count, align);
857 
858 	if (!cma || !cma->count)
859 		return page;
860 
861 	pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__,
862 		(void *)cma, cma->name, count, align);
863 
864 	if (!count)
865 		return page;
866 
867 	for (r = 0; r < cma->nranges; r++) {
868 		page = NULL;
869 
870 		ret = cma_range_alloc(cma, &cma->ranges[r], count, align,
871 				       &page, gfp);
872 		if (ret != -EBUSY || page)
873 			break;
874 	}
875 
876 	/*
877 	 * CMA can allocate multiple page blocks, which results in different
878 	 * blocks being marked with different tags. Reset the tags to ignore
879 	 * those page blocks.
880 	 */
881 	if (page) {
882 		for (i = 0; i < count; i++)
883 			page_kasan_tag_reset(nth_page(page, i));
884 	}
885 
886 	if (ret && !(gfp & __GFP_NOWARN)) {
887 		pr_err_ratelimited("%s: %s: alloc failed, req-size: %lu pages, ret: %d\n",
888 				   __func__, cma->name, count, ret);
889 		cma_debug_show_areas(cma);
890 	}
891 
892 	pr_debug("%s(): returned %p\n", __func__, page);
893 	trace_cma_alloc_finish(name, page ? page_to_pfn(page) : 0,
894 			       page, count, align, ret);
895 	if (page) {
896 		count_vm_event(CMA_ALLOC_SUCCESS);
897 		cma_sysfs_account_success_pages(cma, count);
898 	} else {
899 		count_vm_event(CMA_ALLOC_FAIL);
900 		cma_sysfs_account_fail_pages(cma, count);
901 	}
902 
903 	return page;
904 }
905 
906 /**
907  * cma_alloc() - allocate pages from contiguous area
908  * @cma:   Contiguous memory region for which the allocation is performed.
909  * @count: Requested number of pages.
910  * @align: Requested alignment of pages (in PAGE_SIZE order).
911  * @no_warn: Avoid printing message about failed allocation
912  *
913  * This function allocates part of contiguous memory on specific
914  * contiguous memory area.
915  */
cma_alloc(struct cma * cma,unsigned long count,unsigned int align,bool no_warn)916 struct page *cma_alloc(struct cma *cma, unsigned long count,
917 		       unsigned int align, bool no_warn)
918 {
919 	return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
920 }
921 
cma_alloc_folio(struct cma * cma,int order,gfp_t gfp)922 struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
923 {
924 	struct page *page;
925 
926 	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
927 		return NULL;
928 
929 	page = __cma_alloc(cma, 1 << order, order, gfp);
930 
931 	return page ? page_folio(page) : NULL;
932 }
933 
cma_pages_valid(struct cma * cma,const struct page * pages,unsigned long count)934 bool cma_pages_valid(struct cma *cma, const struct page *pages,
935 		     unsigned long count)
936 {
937 	unsigned long pfn, end;
938 	int r;
939 	struct cma_memrange *cmr;
940 	bool ret;
941 
942 	if (!cma || !pages || count > cma->count)
943 		return false;
944 
945 	pfn = page_to_pfn(pages);
946 	ret = false;
947 
948 	for (r = 0; r < cma->nranges; r++) {
949 		cmr = &cma->ranges[r];
950 		end = cmr->base_pfn + cmr->count;
951 		if (pfn >= cmr->base_pfn && pfn < end) {
952 			ret = pfn + count <= end;
953 			break;
954 		}
955 	}
956 
957 	if (!ret)
958 		pr_debug("%s(page %p, count %lu)\n",
959 				__func__, (void *)pages, count);
960 
961 	return ret;
962 }
963 
964 /**
965  * cma_release() - release allocated pages
966  * @cma:   Contiguous memory region for which the allocation is performed.
967  * @pages: Allocated pages.
968  * @count: Number of allocated pages.
969  *
970  * This function releases memory allocated by cma_alloc().
971  * It returns false when provided pages do not belong to contiguous area and
972  * true otherwise.
973  */
cma_release(struct cma * cma,const struct page * pages,unsigned long count)974 bool cma_release(struct cma *cma, const struct page *pages,
975 		 unsigned long count)
976 {
977 	struct cma_memrange *cmr;
978 	unsigned long pfn, end_pfn;
979 	int r;
980 
981 	pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
982 
983 	if (!cma_pages_valid(cma, pages, count))
984 		return false;
985 
986 	pfn = page_to_pfn(pages);
987 	end_pfn = pfn + count;
988 
989 	for (r = 0; r < cma->nranges; r++) {
990 		cmr = &cma->ranges[r];
991 		if (pfn >= cmr->base_pfn &&
992 		    pfn < (cmr->base_pfn + cmr->count)) {
993 			VM_BUG_ON(end_pfn > cmr->base_pfn + cmr->count);
994 			break;
995 		}
996 	}
997 
998 	if (r == cma->nranges)
999 		return false;
1000 
1001 	free_contig_range(pfn, count);
1002 	cma_clear_bitmap(cma, cmr, pfn, count);
1003 	cma_sysfs_account_release_pages(cma, count);
1004 	trace_cma_release(cma->name, pfn, pages, count);
1005 
1006 	return true;
1007 }
1008 
cma_free_folio(struct cma * cma,const struct folio * folio)1009 bool cma_free_folio(struct cma *cma, const struct folio *folio)
1010 {
1011 	if (WARN_ON(!folio_test_large(folio)))
1012 		return false;
1013 
1014 	return cma_release(cma, &folio->page, folio_nr_pages(folio));
1015 }
1016 
cma_for_each_area(int (* it)(struct cma * cma,void * data),void * data)1017 int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
1018 {
1019 	int i;
1020 
1021 	for (i = 0; i < cma_area_count; i++) {
1022 		int ret = it(&cma_areas[i], data);
1023 
1024 		if (ret)
1025 			return ret;
1026 	}
1027 
1028 	return 0;
1029 }
1030 
cma_intersects(struct cma * cma,unsigned long start,unsigned long end)1031 bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end)
1032 {
1033 	int r;
1034 	struct cma_memrange *cmr;
1035 	unsigned long rstart, rend;
1036 
1037 	for (r = 0; r < cma->nranges; r++) {
1038 		cmr = &cma->ranges[r];
1039 
1040 		rstart = PFN_PHYS(cmr->base_pfn);
1041 		rend = PFN_PHYS(cmr->base_pfn + cmr->count);
1042 		if (end < rstart)
1043 			continue;
1044 		if (start >= rend)
1045 			continue;
1046 		return true;
1047 	}
1048 
1049 	return false;
1050 }
1051 
1052 /*
1053  * Very basic function to reserve memory from a CMA area that has not
1054  * yet been activated. This is expected to be called early, when the
1055  * system is single-threaded, so there is no locking. The alignment
1056  * checking is restrictive - only pageblock-aligned areas
1057  * (CMA_MIN_ALIGNMENT_BYTES) may be reserved through this function.
1058  * This keeps things simple, and is enough for the current use case.
1059  *
1060  * The CMA bitmaps have not yet been allocated, so just start
1061  * reserving from the bottom up, using a PFN to keep track
1062  * of what has been reserved. Unreserving is not possible.
1063  *
1064  * The caller is responsible for initializing the page structures
1065  * in the area properly, since this just points to memblock-allocated
1066  * memory. The caller should subsequently use init_cma_pageblock to
1067  * set the migrate type and CMA stats  the pageblocks that were reserved.
1068  *
1069  * If the CMA area fails to activate later, memory obtained through
1070  * this interface is not handed to the page allocator, this is
1071  * the responsibility of the caller (e.g. like normal memblock-allocated
1072  * memory).
1073  */
cma_reserve_early(struct cma * cma,unsigned long size)1074 void __init *cma_reserve_early(struct cma *cma, unsigned long size)
1075 {
1076 	int r;
1077 	struct cma_memrange *cmr;
1078 	unsigned long available;
1079 	void *ret = NULL;
1080 
1081 	if (!cma || !cma->count)
1082 		return NULL;
1083 	/*
1084 	 * Can only be called early in init.
1085 	 */
1086 	if (test_bit(CMA_ACTIVATED, &cma->flags))
1087 		return NULL;
1088 
1089 	if (!IS_ALIGNED(size, CMA_MIN_ALIGNMENT_BYTES))
1090 		return NULL;
1091 
1092 	if (!IS_ALIGNED(size, (PAGE_SIZE << cma->order_per_bit)))
1093 		return NULL;
1094 
1095 	size >>= PAGE_SHIFT;
1096 
1097 	if (size > cma->available_count)
1098 		return NULL;
1099 
1100 	for (r = 0; r < cma->nranges; r++) {
1101 		cmr = &cma->ranges[r];
1102 		available = cmr->count - (cmr->early_pfn - cmr->base_pfn);
1103 		if (size <= available) {
1104 			ret = phys_to_virt(PFN_PHYS(cmr->early_pfn));
1105 			cmr->early_pfn += size;
1106 			cma->available_count -= size;
1107 			return ret;
1108 		}
1109 	}
1110 
1111 	return ret;
1112 }
1113