xref: /kvm-unit-tests/lib/alloc_page.c (revision 4a54e8a3a88be171814e31dd6ab9b7a766644e32)
1 /*
2  * This work is licensed under the terms of the GNU LGPL, version 2.
3  *
4  * This is a simple allocator that provides contiguous physical addresses
5  * with page granularity.
6  */
7 #include "libcflat.h"
8 #include "alloc.h"
9 #include "alloc_phys.h"
10 #include "alloc_page.h"
11 #include "bitops.h"
12 #include "list.h"
13 #include <asm/page.h>
14 #include <asm/io.h>
15 #include <asm/spinlock.h>
16 #include <asm/memory_areas.h>
17 
18 #define IS_ALIGNED_ORDER(x,order) IS_ALIGNED((x),BIT_ULL(order))
19 #define NLISTS ((BITS_PER_LONG) - (PAGE_SHIFT))
20 
21 #define ORDER_MASK		0x3f
22 #define STATUS_MASK		0xc0
23 
24 #define STATUS_FRESH		0x00
25 #define STATUS_FREE		0x40
26 #define STATUS_ALLOCATED	0x80
27 #define STATUS_SPECIAL		0xc0
28 
29 #define IS_FRESH(x)	(((x) & STATUS_MASK) == STATUS_FRESH)
30 #define IS_FREE(x)	(((x) & STATUS_MASK) == STATUS_FREE)
31 #define IS_ALLOCATED(x)	(((x) & STATUS_MASK) == STATUS_ALLOCATED)
32 #define IS_SPECIAL(x)	(((x) & STATUS_MASK) == STATUS_SPECIAL)
33 
34 #define IS_USABLE(x)	(IS_FREE(x) || IS_FRESH(x))
35 
36 typedef phys_addr_t pfn_t;
37 
38 struct mem_area {
39 	/* Physical frame number of the first usable frame in the area */
40 	pfn_t base;
41 	/* Physical frame number of the first frame outside the area */
42 	pfn_t top;
43 	/* Per page metadata, each entry is a combination *_MASK and order */
44 	u8 *page_states;
45 	/* Highest block order available in this area */
46 	u8 max_order;
47 	/* One freelist for each possible block size, up to NLISTS */
48 	struct linked_list freelists[NLISTS];
49 };
50 
51 /* Descriptors for each possible area */
52 static struct mem_area areas[MAX_AREAS];
53 /* Mask of initialized areas */
54 static unsigned int areas_mask;
55 /* Protects areas and areas mask */
56 static struct spinlock lock;
57 
page_alloc_initialized(void)58 bool page_alloc_initialized(void)
59 {
60 	return areas_mask != 0;
61 }
62 
63 /*
64  * Each memory area contains an array of metadata entries at the very
65  * beginning. The usable memory follows immediately afterwards.
66  * This function returns true if the given pfn falls anywhere within the
67  * memory area, including the metadata area.
68  */
area_contains_pfn(struct mem_area * a,pfn_t pfn)69 static inline bool area_contains_pfn(struct mem_area *a, pfn_t pfn)
70 {
71 	return (pfn >= virt_to_pfn(a->page_states)) && (pfn < a->top);
72 }
73 
74 /*
75  * Each memory area contains an array of metadata entries at the very
76  * beginning. The usable memory follows immediately afterwards.
77  * This function returns true if the given pfn falls in the usable range of
78  * the given memory area.
79  */
usable_area_contains_pfn(struct mem_area * a,pfn_t pfn)80 static inline bool usable_area_contains_pfn(struct mem_area *a, pfn_t pfn)
81 {
82 	return (pfn >= a->base) && (pfn < a->top);
83 }
84 
85 /*
86  * Splits the free block starting at addr into 2 blocks of half the size.
87  *
88  * The function depends on the following assumptions:
89  * - The allocator must have been initialized
90  * - the block must be within the memory area
91  * - all pages in the block must be free and not special
92  * - the pointer must point to the start of the block
93  * - all pages in the block must have the same block size.
94  * - the block size must be greater than 0
95  * - the block size must be smaller than the maximum allowed
96  * - the block must be in a free list
97  * - the function is called with the lock held
98  */
split(struct mem_area * a,void * addr)99 static void split(struct mem_area *a, void *addr)
100 {
101 	pfn_t i, idx, pfn = virt_to_pfn(addr);
102 	u8 metadata, order;
103 
104 	assert(a && usable_area_contains_pfn(a, pfn));
105 	idx = pfn - a->base;
106 	metadata = a->page_states[idx];
107 	order = metadata & ORDER_MASK;
108 	assert(IS_USABLE(metadata) && order && (order < NLISTS));
109 	assert(IS_ALIGNED_ORDER(pfn, order));
110 	assert(usable_area_contains_pfn(a, pfn + BIT(order) - 1));
111 
112 	/* Remove the block from its free list */
113 	list_remove(addr);
114 
115 	/* update the block size for each page in the block */
116 	for (i = 0; i < BIT(order); i++) {
117 		assert(a->page_states[idx + i] == metadata);
118 		a->page_states[idx + i] = metadata - 1;
119 	}
120 	if ((order == a->max_order) && (is_list_empty(a->freelists + order)))
121 		a->max_order--;
122 	order--;
123 
124 	/* add the two half blocks to the appropriate free list */
125 	if (IS_FRESH(metadata)) {
126 		/* add to the tail if the blocks are fresh */
127 		list_add_tail(a->freelists + order, addr);
128 		list_add_tail(a->freelists + order, pfn_to_virt(pfn + BIT(order)));
129 	} else {
130 		/* add to the front if the blocks are dirty */
131 		list_add(a->freelists + order, addr);
132 		list_add(a->freelists + order, pfn_to_virt(pfn + BIT(order)));
133 	}
134 }
135 
136 /*
137  * Returns a block whose alignment and size are at least the parameter values.
138  * If there is not enough free memory, NULL is returned.
139  *
140  * Both parameters must be not larger than the largest allowed order
141  */
page_memalign_order(struct mem_area * a,u8 al,u8 sz,bool fresh)142 static void *page_memalign_order(struct mem_area *a, u8 al, u8 sz, bool fresh)
143 {
144 	struct linked_list *p;
145 	pfn_t idx;
146 	u8 order;
147 
148 	assert((al < NLISTS) && (sz < NLISTS));
149 	/* we need the bigger of the two as starting point */
150 	order = sz > al ? sz : al;
151 	/*
152 	 * we need to go one order up if we want a completely fresh block,
153 	 * since the first page contains the freelist pointers, and
154 	 * therefore it is always dirty
155 	 */
156 	order += fresh;
157 
158 	/* search all free lists for some memory */
159 	for ( ; order <= a->max_order; order++) {
160 		p = fresh ? a->freelists[order].prev : a->freelists[order].next;
161 		if (is_list_empty(p))
162 			continue;
163 		idx = virt_to_pfn(p) - a->base;
164 		if (fresh && !IS_FRESH(a->page_states[idx]))
165 			continue;
166 		break;
167 	}
168 
169 	/* out of memory */
170 	if (order > a->max_order)
171 		return NULL;
172 
173 	/*
174 	 * the block is bigger than what we need because either there were
175 	 * no smaller blocks, or the smaller blocks were not aligned to our
176 	 * needs; therefore we split the block until we reach the needed size
177 	 */
178 	for (; order > sz; order--)
179 		split(a, p);
180 
181 	list_remove(p);
182 	/* We now have a block twice the size, but the first page is dirty. */
183 	if (fresh) {
184 		order--;
185 		/* Put back the first (partially dirty) half of the block */
186 		memset(a->page_states + idx, STATUS_FRESH | order, BIT(order));
187 		list_add_tail(a->freelists + order, p);
188 		idx += BIT(order);
189 		p = pfn_to_virt(a->base + idx);
190 	}
191 	memset(a->page_states + idx, STATUS_ALLOCATED | order, BIT(order));
192 	return p;
193 }
194 
get_area(pfn_t pfn)195 static struct mem_area *get_area(pfn_t pfn)
196 {
197 	uintptr_t i;
198 
199 	for (i = 0; i < MAX_AREAS; i++)
200 		if ((areas_mask & BIT(i)) && usable_area_contains_pfn(areas + i, pfn))
201 			return areas + i;
202 	return NULL;
203 }
204 
205 /*
206  * Try to merge two blocks into a bigger one.
207  * Returns true in case of a successful merge.
208  * Merging will succeed only if both blocks have the same block size and are
209  * both free.
210  *
211  * The function depends on the following assumptions:
212  * - the first parameter is strictly smaller than the second
213  * - the parameters must point each to the start of their block
214  * - the two parameters point to adjacent blocks
215  * - the two blocks are both in a free list
216  * - all of the pages of the two blocks must be free
217  * - all of the pages of the two blocks must have the same block size
218  * - the function is called with the lock held
219  */
coalesce(struct mem_area * a,u8 order,pfn_t pfn,pfn_t pfn2)220 static bool coalesce(struct mem_area *a, u8 order, pfn_t pfn, pfn_t pfn2)
221 {
222 	pfn_t first, second, i;
223 
224 	assert(IS_ALIGNED_ORDER(pfn, order) && IS_ALIGNED_ORDER(pfn2, order));
225 	assert(pfn2 == pfn + BIT(order));
226 	assert(a);
227 
228 	/* attempting to coalesce two blocks that belong to different areas */
229 	if (!usable_area_contains_pfn(a, pfn) || !usable_area_contains_pfn(a, pfn2 + BIT(order) - 1))
230 		return false;
231 	first = pfn - a->base;
232 	second = pfn2 - a->base;
233 	/* the two blocks have different sizes, cannot coalesce */
234 	if ((a->page_states[first] != order) || (a->page_states[second] != order))
235 		return false;
236 
237 	/* we can coalesce, remove both blocks from their freelists */
238 	list_remove(pfn_to_virt(pfn2));
239 	list_remove(pfn_to_virt(pfn));
240 	/* check the metadata entries and update with the new size */
241 	for (i = 0; i < (2ull << order); i++) {
242 		assert(a->page_states[first + i] == order);
243 		a->page_states[first + i] = order + 1;
244 	}
245 	/* finally add the newly coalesced block to the appropriate freelist */
246 	list_add(a->freelists + order + 1, pfn_to_virt(pfn));
247 	if (order + 1 > a->max_order)
248 		a->max_order = order + 1;
249 	return true;
250 }
251 
252 /*
253  * Free a block of memory.
254  * The parameter can be NULL, in which case nothing happens.
255  *
256  * The function depends on the following assumptions:
257  * - the parameter is page aligned
258  * - the parameter belongs to an existing memory area
259  * - the parameter points to the beginning of the block
260  * - the size of the block is less than the maximum allowed
261  * - the block is completely contained in its memory area
262  * - all pages in the block have the same block size
263  * - no pages in the memory block were already free
264  * - no pages in the memory block are special
265  */
_free_pages(void * mem)266 static void _free_pages(void *mem)
267 {
268 	pfn_t pfn2, pfn = virt_to_pfn(mem);
269 	struct mem_area *a = NULL;
270 	uintptr_t i, p;
271 	u8 order;
272 
273 	if (!mem)
274 		return;
275 	assert(IS_ALIGNED((uintptr_t)mem, PAGE_SIZE));
276 
277 	/* find which area this pointer belongs to*/
278 	a = get_area(pfn);
279 	assert_msg(a, "memory does not belong to any area: %p", mem);
280 
281 	p = pfn - a->base;
282 	order = a->page_states[p] & ORDER_MASK;
283 
284 	/* ensure that the first page is allocated and not special */
285 	assert(IS_ALLOCATED(a->page_states[p]));
286 	/* ensure that the order has a sane value */
287 	assert(order < NLISTS);
288 	/* ensure that the block is aligned properly for its size */
289 	assert(IS_ALIGNED_ORDER(pfn, order));
290 	/* ensure that the area can contain the whole block */
291 	assert(usable_area_contains_pfn(a, pfn + BIT(order) - 1));
292 
293 	for (i = 0; i < BIT(order); i++) {
294 		/* check that all pages of the block have consistent metadata */
295 		assert(a->page_states[p + i] == (STATUS_ALLOCATED | order));
296 		/* set the page as free */
297 		a->page_states[p + i] = STATUS_FREE | order;
298 	}
299 	/* provisionally add the block to the appropriate free list */
300 	list_add(a->freelists + order, mem);
301 	/* try to coalesce the block with neighbouring blocks if possible */
302 	do {
303 		/*
304 		 * get the order again since it might have changed after
305 		 * coalescing in a previous iteration
306 		 */
307 		order = a->page_states[p] & ORDER_MASK;
308 		/*
309 		 * let's consider this block and the next one if this block
310 		 * is aligned to the next size, otherwise let's consider the
311 		 * previous block and this one
312 		 */
313 		if (!IS_ALIGNED_ORDER(pfn, order + 1))
314 			pfn = pfn - BIT(order);
315 		pfn2 = pfn + BIT(order);
316 		/* repeat as long as we manage to coalesce something */
317 	} while (coalesce(a, order, pfn, pfn2));
318 }
319 
free_pages(void * mem)320 void free_pages(void *mem)
321 {
322 	spin_lock(&lock);
323 	_free_pages(mem);
324 	spin_unlock(&lock);
325 }
326 
_reserve_one_page(pfn_t pfn)327 static int _reserve_one_page(pfn_t pfn)
328 {
329 	struct mem_area *a;
330 	pfn_t mask, i;
331 
332 	a = get_area(pfn);
333 	if (!a)
334 		return -1;
335 	i = pfn - a->base;
336 	if (!IS_USABLE(a->page_states[i]))
337 		return -1;
338 	while (a->page_states[i]) {
339 		mask = GENMASK_ULL(63, a->page_states[i]);
340 		split(a, pfn_to_virt(pfn & mask));
341 	}
342 	a->page_states[i] = STATUS_SPECIAL;
343 	return 0;
344 }
345 
_unreserve_one_page(pfn_t pfn)346 static void _unreserve_one_page(pfn_t pfn)
347 {
348 	struct mem_area *a;
349 	pfn_t i;
350 
351 	a = get_area(pfn);
352 	assert(a);
353 	i = pfn - a->base;
354 	assert(a->page_states[i] == STATUS_SPECIAL);
355 	a->page_states[i] = STATUS_ALLOCATED;
356 	_free_pages(pfn_to_virt(pfn));
357 }
358 
reserve_pages(phys_addr_t addr,size_t n)359 int reserve_pages(phys_addr_t addr, size_t n)
360 {
361 	pfn_t pfn;
362 	size_t i;
363 
364 	assert(IS_ALIGNED(addr, PAGE_SIZE));
365 	pfn = addr >> PAGE_SHIFT;
366 	spin_lock(&lock);
367 	for (i = 0; i < n; i++)
368 		if (_reserve_one_page(pfn + i))
369 			break;
370 	if (i < n) {
371 		for (n = 0 ; n < i; n++)
372 			_unreserve_one_page(pfn + n);
373 		n = 0;
374 	}
375 	spin_unlock(&lock);
376 	return -!n;
377 }
378 
unreserve_pages(phys_addr_t addr,size_t n)379 void unreserve_pages(phys_addr_t addr, size_t n)
380 {
381 	pfn_t pfn;
382 	size_t i;
383 
384 	assert(IS_ALIGNED(addr, PAGE_SIZE));
385 	pfn = addr >> PAGE_SHIFT;
386 	spin_lock(&lock);
387 	for (i = 0; i < n; i++)
388 		_unreserve_one_page(pfn + i);
389 	spin_unlock(&lock);
390 }
391 
page_memalign_order_flags(u8 al,u8 ord,u32 flags)392 static void *page_memalign_order_flags(u8 al, u8 ord, u32 flags)
393 {
394 	void *res = NULL;
395 	int i, area, fresh;
396 
397 	fresh = !!(flags & FLAG_FRESH);
398 	spin_lock(&lock);
399 	area = (flags & AREA_MASK) ? flags & areas_mask : areas_mask;
400 	for (i = 0; !res && (i < MAX_AREAS); i++)
401 		if (area & BIT(i))
402 			res = page_memalign_order(areas + i, al, ord, fresh);
403 	spin_unlock(&lock);
404 	if (res && !(flags & FLAG_DONTZERO))
405 		memset(res, 0, BIT(ord) * PAGE_SIZE);
406 	return res;
407 }
408 
409 /*
410  * Allocates (1 << order) physically contiguous and naturally aligned pages.
411  * Returns NULL if the allocation was not possible.
412  */
alloc_pages_flags(unsigned int order,unsigned int flags)413 void *alloc_pages_flags(unsigned int order, unsigned int flags)
414 {
415 	return page_memalign_order_flags(order, order, flags);
416 }
417 
418 /*
419  * Allocates (1 << order) physically contiguous aligned pages.
420  * Returns NULL if the allocation was not possible.
421  */
memalign_pages_flags(size_t alignment,size_t size,unsigned int flags)422 void *memalign_pages_flags(size_t alignment, size_t size, unsigned int flags)
423 {
424 	assert(is_power_of_2(alignment));
425 	alignment = get_order(PAGE_ALIGN(alignment) >> PAGE_SHIFT);
426 	size = get_order(PAGE_ALIGN(size) >> PAGE_SHIFT);
427 	assert(alignment < NLISTS);
428 	assert(size < NLISTS);
429 	return page_memalign_order_flags(alignment, size, flags);
430 }
431 
432 
433 static struct alloc_ops page_alloc_ops = {
434 	.memalign = memalign_pages,
435 	.free = free_pages,
436 };
437 
438 /*
439  * Enables the page allocator.
440  *
441  * Prerequisites:
442  * - at least one memory area has been initialized
443  */
page_alloc_ops_enable(void)444 void page_alloc_ops_enable(void)
445 {
446 	spin_lock(&lock);
447 	assert(page_alloc_initialized());
448 	alloc_ops = &page_alloc_ops;
449 	spin_unlock(&lock);
450 }
451 
452 /*
453  * Adds a new memory area to the pool of available memory.
454  *
455  * Prerequisites:
456  * - the lock is held
457  * - start and top are page frame numbers
458  * - start is smaller than top
459  * - top does not fall outside of addressable memory
460  * - there is at least one more slot free for memory areas
461  * - if a specific memory area number has been indicated, it needs to be free
462  * - the memory area to add does not overlap with existing areas
463  * - the memory area to add has at least 5 pages available
464  */
_page_alloc_init_area(u8 n,pfn_t start_pfn,pfn_t top_pfn)465 static void _page_alloc_init_area(u8 n, pfn_t start_pfn, pfn_t top_pfn)
466 {
467 	size_t table_size, npages, i;
468 	struct mem_area *a;
469 	u8 order = 0;
470 
471 	/* the number must be within the allowed range */
472 	assert(n < MAX_AREAS);
473 	/* the new area number must be unused */
474 	assert(!(areas_mask & BIT(n)));
475 
476 	/* other basic sanity checks */
477 	assert(top_pfn > start_pfn);
478 	assert(top_pfn - start_pfn > 4);
479 	assert(top_pfn < BIT_ULL(sizeof(void *) * 8 - PAGE_SHIFT));
480 
481 	/* calculate the size of the metadata table in pages */
482 	table_size = (top_pfn - start_pfn + PAGE_SIZE) / (PAGE_SIZE + 1);
483 
484 	/* fill in the values of the new area */
485 	a = areas + n;
486 	a->page_states = pfn_to_virt(start_pfn);
487 	a->base = start_pfn + table_size;
488 	a->top = top_pfn;
489 	a->max_order = 0;
490 	npages = top_pfn - a->base;
491 	assert((a->base - start_pfn) * PAGE_SIZE >= npages);
492 
493 	/* check that the new area does not overlap with any existing areas */
494 	for (i = 0; i < MAX_AREAS; i++) {
495 		if (!(areas_mask & BIT(i)))
496 			continue;
497 		assert(!area_contains_pfn(areas + i, start_pfn));
498 		assert(!area_contains_pfn(areas + i, top_pfn - 1));
499 		assert(!area_contains_pfn(a, virt_to_pfn(areas[i].page_states)));
500 		assert(!area_contains_pfn(a, areas[i].top - 1));
501 	}
502 	/* initialize all freelists for the new area */
503 	for (i = 0; i < NLISTS; i++)
504 		a->freelists[i].prev = a->freelists[i].next = a->freelists + i;
505 
506 	/* initialize the metadata for the available memory */
507 	for (i = a->base; i < a->top; i += 1ull << order) {
508 		/* search which order to start from */
509 		while (i + BIT(order) > a->top) {
510 			assert(order);
511 			order--;
512 		}
513 		/*
514 		 * we need both loops, one for the start and the other for
515 		 * the end of the block, in case it spans a power of two
516 		 * boundary
517 		 */
518 		while (IS_ALIGNED_ORDER(i, order + 1) && (i + BIT(order + 1) <= a->top))
519 			order++;
520 		assert(order < NLISTS);
521 		/* initialize the metadata and add to the freelist */
522 		memset(a->page_states + (i - a->base), STATUS_FRESH | order, BIT(order));
523 		list_add(a->freelists + order, pfn_to_virt(i));
524 		if (order > a->max_order)
525 			a->max_order = order;
526 	}
527 	/* finally mark the area as present */
528 	areas_mask |= BIT(n);
529 }
530 
__page_alloc_init_area(u8 n,pfn_t cutoff,pfn_t base_pfn,pfn_t * top_pfn)531 static void __page_alloc_init_area(u8 n, pfn_t cutoff, pfn_t base_pfn, pfn_t *top_pfn)
532 {
533 	if (*top_pfn > cutoff) {
534 		spin_lock(&lock);
535 		if (base_pfn >= cutoff) {
536 			_page_alloc_init_area(n, base_pfn, *top_pfn);
537 			*top_pfn = 0;
538 		} else {
539 			_page_alloc_init_area(n, cutoff, *top_pfn);
540 			*top_pfn = cutoff;
541 		}
542 		spin_unlock(&lock);
543 	}
544 }
545 
546 /*
547  * Adds a new memory area to the pool of available memory.
548  *
549  * Prerequisites:
550  * see _page_alloc_init_area
551  */
page_alloc_init_area(u8 n,phys_addr_t base_pfn,phys_addr_t top_pfn)552 void page_alloc_init_area(u8 n, phys_addr_t base_pfn, phys_addr_t top_pfn)
553 {
554 	if (n != AREA_ANY_NUMBER) {
555 		__page_alloc_init_area(n, 0, base_pfn, &top_pfn);
556 		return;
557 	}
558 #ifdef AREA_HIGH_PFN
559 	__page_alloc_init_area(AREA_HIGH_NUMBER, AREA_HIGH_PFN, base_pfn, &top_pfn);
560 #endif
561 	__page_alloc_init_area(AREA_NORMAL_NUMBER, AREA_NORMAL_PFN, base_pfn, &top_pfn);
562 #ifdef AREA_LOW_PFN
563 	__page_alloc_init_area(AREA_LOW_NUMBER, AREA_LOW_PFN, base_pfn, &top_pfn);
564 #endif
565 #ifdef AREA_LOWEST_PFN
566 	__page_alloc_init_area(AREA_LOWEST_NUMBER, AREA_LOWEST_PFN, base_pfn, &top_pfn);
567 #endif
568 }
569