xref: /kvm-unit-tests/lib/alloc_page.c (revision 550b4683d05f70eee52230ef77a8f97f48642181)
1 /*
2  * This work is licensed under the terms of the GNU LGPL, version 2.
3  *
4  * This is a simple allocator that provides contiguous physical addresses
5  * with page granularity.
6  */
7 #include "libcflat.h"
8 #include "alloc.h"
9 #include "alloc_phys.h"
10 #include "alloc_page.h"
11 #include "bitops.h"
12 #include "list.h"
13 #include <asm/page.h>
14 #include <asm/io.h>
15 #include <asm/spinlock.h>
16 #include <asm/memory_areas.h>
17 
18 #define IS_ALIGNED_ORDER(x,order) IS_ALIGNED((x),BIT_ULL(order))
19 #define NLISTS ((BITS_PER_LONG) - (PAGE_SHIFT))
20 
21 #define ORDER_MASK		0x3f
22 #define STATUS_MASK		0xc0
23 
24 #define STATUS_FRESH		0x00
25 #define STATUS_FREE		0x40
26 #define STATUS_ALLOCATED	0x80
27 #define STATUS_SPECIAL		0xc0
28 
29 #define IS_FRESH(x)	(((x) & STATUS_MASK) == STATUS_FRESH)
30 #define IS_FREE(x)	(((x) & STATUS_MASK) == STATUS_FREE)
31 #define IS_ALLOCATED(x)	(((x) & STATUS_MASK) == STATUS_ALLOCATED)
32 #define IS_SPECIAL(x)	(((x) & STATUS_MASK) == STATUS_SPECIAL)
33 
34 #define IS_USABLE(x)	(IS_FREE(x) || IS_FRESH(x))
35 
36 typedef phys_addr_t pfn_t;
37 
38 struct mem_area {
39 	/* Physical frame number of the first usable frame in the area */
40 	pfn_t base;
41 	/* Physical frame number of the first frame outside the area */
42 	pfn_t top;
43 	/* Per page metadata, each entry is a combination *_MASK and order */
44 	u8 *page_states;
45 	/* Highest block order available in this area */
46 	u8 max_order;
47 	/* One freelist for each possible block size, up to NLISTS */
48 	struct linked_list freelists[NLISTS];
49 };
50 
51 /* Descriptors for each possible area */
52 static struct mem_area areas[MAX_AREAS];
53 /* Mask of initialized areas */
54 static unsigned int areas_mask;
55 /* Protects areas and areas mask */
56 static struct spinlock lock;
57 
58 bool page_alloc_initialized(void)
59 {
60 	return areas_mask != 0;
61 }
62 
63 /*
64  * Each memory area contains an array of metadata entries at the very
65  * beginning. The usable memory follows immediately afterwards.
66  * This function returns true if the given pfn falls anywhere within the
67  * memory area, including the metadata area.
68  */
69 static inline bool area_contains_pfn(struct mem_area *a, pfn_t pfn)
70 {
71 	return (pfn >= virt_to_pfn(a->page_states)) && (pfn < a->top);
72 }
73 
74 /*
75  * Each memory area contains an array of metadata entries at the very
76  * beginning. The usable memory follows immediately afterwards.
77  * This function returns true if the given pfn falls in the usable range of
78  * the given memory area.
79  */
80 static inline bool usable_area_contains_pfn(struct mem_area *a, pfn_t pfn)
81 {
82 	return (pfn >= a->base) && (pfn < a->top);
83 }
84 
85 /*
86  * Splits the free block starting at addr into 2 blocks of half the size.
87  *
88  * The function depends on the following assumptions:
89  * - The allocator must have been initialized
90  * - the block must be within the memory area
91  * - all pages in the block must be free and not special
92  * - the pointer must point to the start of the block
93  * - all pages in the block must have the same block size.
94  * - the block size must be greater than 0
95  * - the block size must be smaller than the maximum allowed
96  * - the block must be in a free list
97  * - the function is called with the lock held
98  */
99 static void split(struct mem_area *a, void *addr)
100 {
101 	pfn_t i, idx, pfn = virt_to_pfn(addr);
102 	u8 metadata, order;
103 
104 	assert(a && usable_area_contains_pfn(a, pfn));
105 	idx = pfn - a->base;
106 	metadata = a->page_states[idx];
107 	order = metadata & ORDER_MASK;
108 	assert(IS_USABLE(metadata) && order && (order < NLISTS));
109 	assert(IS_ALIGNED_ORDER(pfn, order));
110 	assert(usable_area_contains_pfn(a, pfn + BIT(order) - 1));
111 
112 	/* Remove the block from its free list */
113 	list_remove(addr);
114 
115 	/* update the block size for each page in the block */
116 	for (i = 0; i < BIT(order); i++) {
117 		assert(a->page_states[idx + i] == metadata);
118 		a->page_states[idx + i] = metadata - 1;
119 	}
120 	if ((order == a->max_order) && (is_list_empty(a->freelists + order)))
121 		a->max_order--;
122 	order--;
123 	/* add the first half block to the appropriate free list */
124 	list_add(a->freelists + order, addr);
125 	/* add the second half block to the appropriate free list */
126 	list_add(a->freelists + order, pfn_to_virt(pfn + BIT(order)));
127 }
128 
129 /*
130  * Returns a block whose alignment and size are at least the parameter values.
131  * If there is not enough free memory, NULL is returned.
132  *
133  * Both parameters must be not larger than the largest allowed order
134  */
135 static void *page_memalign_order(struct mem_area *a, u8 al, u8 sz)
136 {
137 	struct linked_list *p;
138 	u8 order;
139 
140 	assert((al < NLISTS) && (sz < NLISTS));
141 	/* we need the bigger of the two as starting point */
142 	order = sz > al ? sz : al;
143 
144 	/* search all free lists for some memory */
145 	for ( ; order <= a->max_order; order++) {
146 		p = a->freelists[order].next;
147 		if (!is_list_empty(p))
148 			break;
149 	}
150 	/* out of memory */
151 	if (order > a->max_order)
152 		return NULL;
153 
154 	/*
155 	 * the block is bigger than what we need because either there were
156 	 * no smaller blocks, or the smaller blocks were not aligned to our
157 	 * needs; therefore we split the block until we reach the needed size
158 	 */
159 	for (; order > sz; order--)
160 		split(a, p);
161 
162 	list_remove(p);
163 	memset(a->page_states + (virt_to_pfn(p) - a->base), STATUS_ALLOCATED | order, BIT(order));
164 	return p;
165 }
166 
167 static struct mem_area *get_area(pfn_t pfn)
168 {
169 	uintptr_t i;
170 
171 	for (i = 0; i < MAX_AREAS; i++)
172 		if ((areas_mask & BIT(i)) && usable_area_contains_pfn(areas + i, pfn))
173 			return areas + i;
174 	return NULL;
175 }
176 
177 /*
178  * Try to merge two blocks into a bigger one.
179  * Returns true in case of a successful merge.
180  * Merging will succeed only if both blocks have the same block size and are
181  * both free.
182  *
183  * The function depends on the following assumptions:
184  * - the first parameter is strictly smaller than the second
185  * - the parameters must point each to the start of their block
186  * - the two parameters point to adjacent blocks
187  * - the two blocks are both in a free list
188  * - all of the pages of the two blocks must be free
189  * - all of the pages of the two blocks must have the same block size
190  * - the function is called with the lock held
191  */
192 static bool coalesce(struct mem_area *a, u8 order, pfn_t pfn, pfn_t pfn2)
193 {
194 	pfn_t first, second, i;
195 
196 	assert(IS_ALIGNED_ORDER(pfn, order) && IS_ALIGNED_ORDER(pfn2, order));
197 	assert(pfn2 == pfn + BIT(order));
198 	assert(a);
199 
200 	/* attempting to coalesce two blocks that belong to different areas */
201 	if (!usable_area_contains_pfn(a, pfn) || !usable_area_contains_pfn(a, pfn2 + BIT(order) - 1))
202 		return false;
203 	first = pfn - a->base;
204 	second = pfn2 - a->base;
205 	/* the two blocks have different sizes, cannot coalesce */
206 	if ((a->page_states[first] != order) || (a->page_states[second] != order))
207 		return false;
208 
209 	/* we can coalesce, remove both blocks from their freelists */
210 	list_remove(pfn_to_virt(pfn2));
211 	list_remove(pfn_to_virt(pfn));
212 	/* check the metadata entries and update with the new size */
213 	for (i = 0; i < (2ull << order); i++) {
214 		assert(a->page_states[first + i] == order);
215 		a->page_states[first + i] = order + 1;
216 	}
217 	/* finally add the newly coalesced block to the appropriate freelist */
218 	list_add(a->freelists + order + 1, pfn_to_virt(pfn));
219 	if (order + 1 > a->max_order)
220 		a->max_order = order + 1;
221 	return true;
222 }
223 
224 /*
225  * Free a block of memory.
226  * The parameter can be NULL, in which case nothing happens.
227  *
228  * The function depends on the following assumptions:
229  * - the parameter is page aligned
230  * - the parameter belongs to an existing memory area
231  * - the parameter points to the beginning of the block
232  * - the size of the block is less than the maximum allowed
233  * - the block is completely contained in its memory area
234  * - all pages in the block have the same block size
235  * - no pages in the memory block were already free
236  * - no pages in the memory block are special
237  */
238 static void _free_pages(void *mem)
239 {
240 	pfn_t pfn2, pfn = virt_to_pfn(mem);
241 	struct mem_area *a = NULL;
242 	uintptr_t i, p;
243 	u8 order;
244 
245 	if (!mem)
246 		return;
247 	assert(IS_ALIGNED((uintptr_t)mem, PAGE_SIZE));
248 
249 	/* find which area this pointer belongs to*/
250 	a = get_area(pfn);
251 	assert_msg(a, "memory does not belong to any area: %p", mem);
252 
253 	p = pfn - a->base;
254 	order = a->page_states[p] & ORDER_MASK;
255 
256 	/* ensure that the first page is allocated and not special */
257 	assert(IS_ALLOCATED(a->page_states[p]));
258 	/* ensure that the order has a sane value */
259 	assert(order < NLISTS);
260 	/* ensure that the block is aligned properly for its size */
261 	assert(IS_ALIGNED_ORDER(pfn, order));
262 	/* ensure that the area can contain the whole block */
263 	assert(usable_area_contains_pfn(a, pfn + BIT(order) - 1));
264 
265 	for (i = 0; i < BIT(order); i++) {
266 		/* check that all pages of the block have consistent metadata */
267 		assert(a->page_states[p + i] == (STATUS_ALLOCATED | order));
268 		/* set the page as free */
269 		a->page_states[p + i] = STATUS_FREE | order;
270 	}
271 	/* provisionally add the block to the appropriate free list */
272 	list_add(a->freelists + order, mem);
273 	/* try to coalesce the block with neighbouring blocks if possible */
274 	do {
275 		/*
276 		 * get the order again since it might have changed after
277 		 * coalescing in a previous iteration
278 		 */
279 		order = a->page_states[p] & ORDER_MASK;
280 		/*
281 		 * let's consider this block and the next one if this block
282 		 * is aligned to the next size, otherwise let's consider the
283 		 * previous block and this one
284 		 */
285 		if (!IS_ALIGNED_ORDER(pfn, order + 1))
286 			pfn = pfn - BIT(order);
287 		pfn2 = pfn + BIT(order);
288 		/* repeat as long as we manage to coalesce something */
289 	} while (coalesce(a, order, pfn, pfn2));
290 }
291 
292 void free_pages(void *mem)
293 {
294 	spin_lock(&lock);
295 	_free_pages(mem);
296 	spin_unlock(&lock);
297 }
298 
299 static int _reserve_one_page(pfn_t pfn)
300 {
301 	struct mem_area *a;
302 	pfn_t mask, i;
303 
304 	a = get_area(pfn);
305 	if (!a)
306 		return -1;
307 	i = pfn - a->base;
308 	if (!IS_USABLE(a->page_states[i]))
309 		return -1;
310 	while (a->page_states[i]) {
311 		mask = GENMASK_ULL(63, a->page_states[i]);
312 		split(a, pfn_to_virt(pfn & mask));
313 	}
314 	a->page_states[i] = STATUS_SPECIAL;
315 	return 0;
316 }
317 
318 static void _unreserve_one_page(pfn_t pfn)
319 {
320 	struct mem_area *a;
321 	pfn_t i;
322 
323 	a = get_area(pfn);
324 	assert(a);
325 	i = pfn - a->base;
326 	assert(a->page_states[i] == STATUS_SPECIAL);
327 	a->page_states[i] = STATUS_ALLOCATED;
328 	_free_pages(pfn_to_virt(pfn));
329 }
330 
331 int reserve_pages(phys_addr_t addr, size_t n)
332 {
333 	pfn_t pfn;
334 	size_t i;
335 
336 	assert(IS_ALIGNED(addr, PAGE_SIZE));
337 	pfn = addr >> PAGE_SHIFT;
338 	spin_lock(&lock);
339 	for (i = 0; i < n; i++)
340 		if (_reserve_one_page(pfn + i))
341 			break;
342 	if (i < n) {
343 		for (n = 0 ; n < i; n++)
344 			_unreserve_one_page(pfn + n);
345 		n = 0;
346 	}
347 	spin_unlock(&lock);
348 	return -!n;
349 }
350 
351 void unreserve_pages(phys_addr_t addr, size_t n)
352 {
353 	pfn_t pfn;
354 	size_t i;
355 
356 	assert(IS_ALIGNED(addr, PAGE_SIZE));
357 	pfn = addr >> PAGE_SHIFT;
358 	spin_lock(&lock);
359 	for (i = 0; i < n; i++)
360 		_unreserve_one_page(pfn + i);
361 	spin_unlock(&lock);
362 }
363 
364 static void *page_memalign_order_flags(u8 al, u8 ord, u32 flags)
365 {
366 	void *res = NULL;
367 	int i, area;
368 
369 	spin_lock(&lock);
370 	area = (flags & AREA_MASK) ? flags & areas_mask : areas_mask;
371 	for (i = 0; !res && (i < MAX_AREAS); i++)
372 		if (area & BIT(i))
373 			res = page_memalign_order(areas + i, al, ord);
374 	spin_unlock(&lock);
375 	return res;
376 }
377 
378 /*
379  * Allocates (1 << order) physically contiguous and naturally aligned pages.
380  * Returns NULL if the allocation was not possible.
381  */
382 void *alloc_pages_flags(unsigned int order, unsigned int flags)
383 {
384 	return page_memalign_order_flags(order, order, flags);
385 }
386 
387 /*
388  * Allocates (1 << order) physically contiguous aligned pages.
389  * Returns NULL if the allocation was not possible.
390  */
391 void *memalign_pages_flags(size_t alignment, size_t size, unsigned int flags)
392 {
393 	assert(is_power_of_2(alignment));
394 	alignment = get_order(PAGE_ALIGN(alignment) >> PAGE_SHIFT);
395 	size = get_order(PAGE_ALIGN(size) >> PAGE_SHIFT);
396 	assert(alignment < NLISTS);
397 	assert(size < NLISTS);
398 	return page_memalign_order_flags(alignment, size, flags);
399 }
400 
401 
402 static struct alloc_ops page_alloc_ops = {
403 	.memalign = memalign_pages,
404 	.free = free_pages,
405 };
406 
407 /*
408  * Enables the page allocator.
409  *
410  * Prerequisites:
411  * - at least one memory area has been initialized
412  */
413 void page_alloc_ops_enable(void)
414 {
415 	spin_lock(&lock);
416 	assert(page_alloc_initialized());
417 	alloc_ops = &page_alloc_ops;
418 	spin_unlock(&lock);
419 }
420 
421 /*
422  * Adds a new memory area to the pool of available memory.
423  *
424  * Prerequisites:
425  * - the lock is held
426  * - start and top are page frame numbers
427  * - start is smaller than top
428  * - top does not fall outside of addressable memory
429  * - there is at least one more slot free for memory areas
430  * - if a specific memory area number has been indicated, it needs to be free
431  * - the memory area to add does not overlap with existing areas
432  * - the memory area to add has at least 5 pages available
433  */
434 static void _page_alloc_init_area(u8 n, pfn_t start_pfn, pfn_t top_pfn)
435 {
436 	size_t table_size, npages, i;
437 	struct mem_area *a;
438 	u8 order = 0;
439 
440 	/* the number must be within the allowed range */
441 	assert(n < MAX_AREAS);
442 	/* the new area number must be unused */
443 	assert(!(areas_mask & BIT(n)));
444 
445 	/* other basic sanity checks */
446 	assert(top_pfn > start_pfn);
447 	assert(top_pfn - start_pfn > 4);
448 	assert(top_pfn < BIT_ULL(sizeof(void *) * 8 - PAGE_SHIFT));
449 
450 	/* calculate the size of the metadata table in pages */
451 	table_size = (top_pfn - start_pfn + PAGE_SIZE) / (PAGE_SIZE + 1);
452 
453 	/* fill in the values of the new area */
454 	a = areas + n;
455 	a->page_states = pfn_to_virt(start_pfn);
456 	a->base = start_pfn + table_size;
457 	a->top = top_pfn;
458 	a->max_order = 0;
459 	npages = top_pfn - a->base;
460 	assert((a->base - start_pfn) * PAGE_SIZE >= npages);
461 
462 	/* check that the new area does not overlap with any existing areas */
463 	for (i = 0; i < MAX_AREAS; i++) {
464 		if (!(areas_mask & BIT(i)))
465 			continue;
466 		assert(!area_contains_pfn(areas + i, start_pfn));
467 		assert(!area_contains_pfn(areas + i, top_pfn - 1));
468 		assert(!area_contains_pfn(a, virt_to_pfn(areas[i].page_states)));
469 		assert(!area_contains_pfn(a, areas[i].top - 1));
470 	}
471 	/* initialize all freelists for the new area */
472 	for (i = 0; i < NLISTS; i++)
473 		a->freelists[i].prev = a->freelists[i].next = a->freelists + i;
474 
475 	/* initialize the metadata for the available memory */
476 	for (i = a->base; i < a->top; i += 1ull << order) {
477 		/* search which order to start from */
478 		while (i + BIT(order) > a->top) {
479 			assert(order);
480 			order--;
481 		}
482 		/*
483 		 * we need both loops, one for the start and the other for
484 		 * the end of the block, in case it spans a power of two
485 		 * boundary
486 		 */
487 		while (IS_ALIGNED_ORDER(i, order + 1) && (i + BIT(order + 1) <= a->top))
488 			order++;
489 		assert(order < NLISTS);
490 		/* initialize the metadata and add to the freelist */
491 		memset(a->page_states + (i - a->base), STATUS_FRESH | order, BIT(order));
492 		list_add(a->freelists + order, pfn_to_virt(i));
493 		if (order > a->max_order)
494 			a->max_order = order;
495 	}
496 	/* finally mark the area as present */
497 	areas_mask |= BIT(n);
498 }
499 
500 static void __page_alloc_init_area(u8 n, pfn_t cutoff, pfn_t base_pfn, pfn_t *top_pfn)
501 {
502 	if (*top_pfn > cutoff) {
503 		spin_lock(&lock);
504 		if (base_pfn >= cutoff) {
505 			_page_alloc_init_area(n, base_pfn, *top_pfn);
506 			*top_pfn = 0;
507 		} else {
508 			_page_alloc_init_area(n, cutoff, *top_pfn);
509 			*top_pfn = cutoff;
510 		}
511 		spin_unlock(&lock);
512 	}
513 }
514 
515 /*
516  * Adds a new memory area to the pool of available memory.
517  *
518  * Prerequisites:
519  * see _page_alloc_init_area
520  */
521 void page_alloc_init_area(u8 n, phys_addr_t base_pfn, phys_addr_t top_pfn)
522 {
523 	if (n != AREA_ANY_NUMBER) {
524 		__page_alloc_init_area(n, 0, base_pfn, &top_pfn);
525 		return;
526 	}
527 #ifdef AREA_HIGH_PFN
528 	__page_alloc_init_area(AREA_HIGH_NUMBER, AREA_HIGH_PFN, base_pfn, &top_pfn);
529 #endif
530 	__page_alloc_init_area(AREA_NORMAL_NUMBER, AREA_NORMAL_PFN, base_pfn, &top_pfn);
531 #ifdef AREA_LOW_PFN
532 	__page_alloc_init_area(AREA_LOW_NUMBER, AREA_LOW_PFN, base_pfn, &top_pfn);
533 #endif
534 #ifdef AREA_LOWEST_PFN
535 	__page_alloc_init_area(AREA_LOWEST_NUMBER, AREA_LOWEST_PFN, base_pfn, &top_pfn);
536 #endif
537 }
538