xref: /kvm-unit-tests/lib/alloc_page.c (revision ea71612b8b179d3c38e6878faa5f010306168dc3)
1 /*
2  * This work is licensed under the terms of the GNU LGPL, version 2.
3  *
4  * This is a simple allocator that provides contiguous physical addresses
5  * with page granularity.
6  */
7 #include "libcflat.h"
8 #include "alloc.h"
9 #include "alloc_phys.h"
10 #include "alloc_page.h"
11 #include "bitops.h"
12 #include "list.h"
13 #include <asm/page.h>
14 #include <asm/io.h>
15 #include <asm/spinlock.h>
16 #include <asm/memory_areas.h>
17 
18 #define IS_ALIGNED_ORDER(x,order) IS_ALIGNED((x),BIT_ULL(order))
19 #define NLISTS ((BITS_PER_LONG) - (PAGE_SHIFT))
20 #define PFN(x) ((uintptr_t)(x) >> PAGE_SHIFT)
21 
22 #define MAX_AREAS	6
23 
24 #define ORDER_MASK	0x3f
25 #define ALLOC_MASK	0x40
26 #define SPECIAL_MASK	0x80
27 
28 struct mem_area {
29 	/* Physical frame number of the first usable frame in the area */
30 	uintptr_t base;
31 	/* Physical frame number of the first frame outside the area */
32 	uintptr_t top;
33 	/* Combination of SPECIAL_MASK, ALLOC_MASK, and order */
34 	u8 *page_states;
35 	/* One freelist for each possible block size, up to NLISTS */
36 	struct linked_list freelists[NLISTS];
37 };
38 
39 static struct mem_area areas[MAX_AREAS];
40 static unsigned int areas_mask;
41 static struct spinlock lock;
42 
43 bool page_alloc_initialized(void)
44 {
45 	return areas_mask != 0;
46 }
47 
48 static inline bool area_or_metadata_contains(struct mem_area *a, uintptr_t pfn)
49 {
50 	return (pfn >= PFN(a->page_states)) && (pfn < a->top);
51 }
52 
53 static inline bool area_contains(struct mem_area *a, uintptr_t pfn)
54 {
55 	return (pfn >= a->base) && (pfn < a->top);
56 }
57 
58 /*
59  * Splits the free block starting at addr into 2 blocks of half the size.
60  *
61  * The function depends on the following assumptions:
62  * - The allocator must have been initialized
63  * - the block must be within the memory area
64  * - all pages in the block must be free and not special
65  * - the pointer must point to the start of the block
66  * - all pages in the block must have the same block size.
67  * - the block size must be greater than 0
68  * - the block size must be smaller than the maximum allowed
69  * - the block must be in a free list
70  * - the function is called with the lock held
71  */
72 static void split(struct mem_area *a, void *addr)
73 {
74 	uintptr_t pfn = PFN(addr);
75 	struct linked_list *p;
76 	uintptr_t i, idx;
77 	u8 order;
78 
79 	assert(a && area_contains(a, pfn));
80 	idx = pfn - a->base;
81 	order = a->page_states[idx];
82 	assert(!(order & ~ORDER_MASK) && order && (order < NLISTS));
83 	assert(IS_ALIGNED_ORDER(pfn, order));
84 	assert(area_contains(a, pfn + BIT(order) - 1));
85 
86 	/* Remove the block from its free list */
87 	p = list_remove(addr);
88 	assert(p);
89 
90 	/* update the block size for each page in the block */
91 	for (i = 0; i < BIT(order); i++) {
92 		assert(a->page_states[idx + i] == order);
93 		a->page_states[idx + i] = order - 1;
94 	}
95 	order--;
96 	/* add the first half block to the appropriate free list */
97 	list_add(a->freelists + order, p);
98 	/* add the second half block to the appropriate free list */
99 	list_add(a->freelists + order, (void *)((pfn + BIT(order)) * PAGE_SIZE));
100 }
101 
102 /*
103  * Returns a block whose alignment and size are at least the parameter values.
104  * If there is not enough free memory, NULL is returned.
105  *
106  * Both parameters must be not larger than the largest allowed order
107  */
108 static void *page_memalign_order(struct mem_area *a, u8 al, u8 sz)
109 {
110 	struct linked_list *p, *res = NULL;
111 	u8 order;
112 
113 	assert((al < NLISTS) && (sz < NLISTS));
114 	/* we need the bigger of the two as starting point */
115 	order = sz > al ? sz : al;
116 
117 	/* search all free lists for some memory */
118 	for ( ; order < NLISTS; order++) {
119 		p = a->freelists[order].next;
120 		if (!is_list_empty(p))
121 			break;
122 	}
123 	/* out of memory */
124 	if (order >= NLISTS)
125 		return NULL;
126 
127 	/*
128 	 * the block is bigger than what we need because either there were
129 	 * no smaller blocks, or the smaller blocks were not aligned to our
130 	 * needs; therefore we split the block until we reach the needed size
131 	 */
132 	for (; order > sz; order--)
133 		split(a, p);
134 
135 	res = list_remove(p);
136 	memset(a->page_states + (PFN(res) - a->base), ALLOC_MASK | order, BIT(order));
137 	return res;
138 }
139 
140 static struct mem_area *get_area(uintptr_t pfn)
141 {
142 	uintptr_t i;
143 
144 	for (i = 0; i < MAX_AREAS; i++)
145 		if ((areas_mask & BIT(i)) && area_contains(areas + i, pfn))
146 			return areas + i;
147 	return NULL;
148 }
149 
150 /*
151  * Try to merge two blocks into a bigger one.
152  * Returns true in case of a successful merge.
153  * Merging will succeed only if both blocks have the same block size and are
154  * both free.
155  *
156  * The function depends on the following assumptions:
157  * - the first parameter is strictly smaller than the second
158  * - the parameters must point each to the start of their block
159  * - the two parameters point to adjacent blocks
160  * - the two blocks are both in a free list
161  * - all of the pages of the two blocks must be free
162  * - all of the pages of the two blocks must have the same block size
163  * - the function is called with the lock held
164  */
165 static bool coalesce(struct mem_area *a, u8 order, uintptr_t pfn, uintptr_t pfn2)
166 {
167 	uintptr_t first, second, i;
168 	struct linked_list *li;
169 
170 	assert(IS_ALIGNED_ORDER(pfn, order) && IS_ALIGNED_ORDER(pfn2, order));
171 	assert(pfn2 == pfn + BIT(order));
172 	assert(a);
173 
174 	/* attempting to coalesce two blocks that belong to different areas */
175 	if (!area_contains(a, pfn) || !area_contains(a, pfn2 + BIT(order) - 1))
176 		return false;
177 	first = pfn - a->base;
178 	second = pfn2 - a->base;
179 	/* the two blocks have different sizes, cannot coalesce */
180 	if ((a->page_states[first] != order) || (a->page_states[second] != order))
181 		return false;
182 
183 	/* we can coalesce, remove both blocks from their freelists */
184 	li = list_remove((void *)(pfn2 << PAGE_SHIFT));
185 	assert(li);
186 	li = list_remove((void *)(pfn << PAGE_SHIFT));
187 	assert(li);
188 	/* check the metadata entries and update with the new size */
189 	for (i = 0; i < (2ull << order); i++) {
190 		assert(a->page_states[first + i] == order);
191 		a->page_states[first + i] = order + 1;
192 	}
193 	/* finally add the newly coalesced block to the appropriate freelist */
194 	list_add(a->freelists + order + 1, li);
195 	return true;
196 }
197 
198 /*
199  * Free a block of memory.
200  * The parameter can be NULL, in which case nothing happens.
201  *
202  * The function depends on the following assumptions:
203  * - the parameter is page aligned
204  * - the parameter belongs to an existing memory area
205  * - the parameter points to the beginning of the block
206  * - the size of the block is less than the maximum allowed
207  * - the block is completely contained in its memory area
208  * - all pages in the block have the same block size
209  * - no pages in the memory block were already free
210  * - no pages in the memory block are special
211  */
212 static void _free_pages(void *mem)
213 {
214 	uintptr_t pfn2, pfn = PFN(mem);
215 	struct mem_area *a = NULL;
216 	uintptr_t i, p;
217 	u8 order;
218 
219 	if (!mem)
220 		return;
221 	assert(IS_ALIGNED((uintptr_t)mem, PAGE_SIZE));
222 
223 	/* find which area this pointer belongs to*/
224 	a = get_area(pfn);
225 	assert_msg(a, "memory does not belong to any area: %p", mem);
226 
227 	p = pfn - a->base;
228 	order = a->page_states[p] & ORDER_MASK;
229 
230 	/* ensure that the first page is allocated and not special */
231 	assert(a->page_states[p] == (order | ALLOC_MASK));
232 	/* ensure that the order has a sane value */
233 	assert(order < NLISTS);
234 	/* ensure that the block is aligned properly for its size */
235 	assert(IS_ALIGNED_ORDER(pfn, order));
236 	/* ensure that the area can contain the whole block */
237 	assert(area_contains(a, pfn + BIT(order) - 1));
238 
239 	for (i = 0; i < BIT(order); i++) {
240 		/* check that all pages of the block have consistent metadata */
241 		assert(a->page_states[p + i] == (ALLOC_MASK | order));
242 		/* set the page as free */
243 		a->page_states[p + i] &= ~ALLOC_MASK;
244 	}
245 	/* provisionally add the block to the appropriate free list */
246 	list_add(a->freelists + order, mem);
247 	/* try to coalesce the block with neighbouring blocks if possible */
248 	do {
249 		/*
250 		 * get the order again since it might have changed after
251 		 * coalescing in a previous iteration
252 		 */
253 		order = a->page_states[p] & ORDER_MASK;
254 		/*
255 		 * let's consider this block and the next one if this block
256 		 * is aligned to the next size, otherwise let's consider the
257 		 * previous block and this one
258 		 */
259 		if (!IS_ALIGNED_ORDER(pfn, order + 1))
260 			pfn = pfn - BIT(order);
261 		pfn2 = pfn + BIT(order);
262 		/* repeat as long as we manage to coalesce something */
263 	} while (coalesce(a, order, pfn, pfn2));
264 }
265 
266 void free_pages(void *mem)
267 {
268 	spin_lock(&lock);
269 	_free_pages(mem);
270 	spin_unlock(&lock);
271 }
272 
273 static void *_alloc_page_special(uintptr_t addr)
274 {
275 	struct mem_area *a;
276 	uintptr_t mask, i;
277 
278 	a = get_area(PFN(addr));
279 	assert(a);
280 	i = PFN(addr) - a->base;
281 	if (a->page_states[i] & (ALLOC_MASK | SPECIAL_MASK))
282 		return NULL;
283 	while (a->page_states[i]) {
284 		mask = GENMASK_ULL(63, PAGE_SHIFT + a->page_states[i]);
285 		split(a, (void *)(addr & mask));
286 	}
287 	a->page_states[i] = SPECIAL_MASK;
288 	return (void *)addr;
289 }
290 
291 static void _free_page_special(uintptr_t addr)
292 {
293 	struct mem_area *a;
294 	uintptr_t i;
295 
296 	a = get_area(PFN(addr));
297 	assert(a);
298 	i = PFN(addr) - a->base;
299 	assert(a->page_states[i] == SPECIAL_MASK);
300 	a->page_states[i] = ALLOC_MASK;
301 	_free_pages((void *)addr);
302 }
303 
304 void *alloc_pages_special(uintptr_t addr, size_t n)
305 {
306 	uintptr_t i;
307 
308 	assert(IS_ALIGNED(addr, PAGE_SIZE));
309 	spin_lock(&lock);
310 	for (i = 0; i < n; i++)
311 		if (!_alloc_page_special(addr + i * PAGE_SIZE))
312 			break;
313 	if (i < n) {
314 		for (n = 0 ; n < i; n++)
315 			_free_page_special(addr + n * PAGE_SIZE);
316 		addr = 0;
317 	}
318 	spin_unlock(&lock);
319 	return (void *)addr;
320 }
321 
322 void free_pages_special(uintptr_t addr, size_t n)
323 {
324 	uintptr_t i;
325 
326 	assert(IS_ALIGNED(addr, PAGE_SIZE));
327 	spin_lock(&lock);
328 	for (i = 0; i < n; i++)
329 		_free_page_special(addr + i * PAGE_SIZE);
330 	spin_unlock(&lock);
331 }
332 
333 static void *page_memalign_order_area(unsigned area, u8 ord, u8 al)
334 {
335 	void *res = NULL;
336 	int i;
337 
338 	spin_lock(&lock);
339 	area &= areas_mask;
340 	for (i = 0; !res && (i < MAX_AREAS); i++)
341 		if (area & BIT(i))
342 			res = page_memalign_order(areas + i, ord, al);
343 	spin_unlock(&lock);
344 	return res;
345 }
346 
347 /*
348  * Allocates (1 << order) physically contiguous and naturally aligned pages.
349  * Returns NULL if the allocation was not possible.
350  */
351 void *alloc_pages_area(unsigned int area, unsigned int order)
352 {
353 	return page_memalign_order_area(area, order, order);
354 }
355 
356 void *alloc_pages(unsigned int order)
357 {
358 	return alloc_pages_area(AREA_ANY, order);
359 }
360 
361 /*
362  * Allocates (1 << order) physically contiguous aligned pages.
363  * Returns NULL if the allocation was not possible.
364  */
365 void *memalign_pages_area(unsigned int area, size_t alignment, size_t size)
366 {
367 	assert(is_power_of_2(alignment));
368 	alignment = get_order(PAGE_ALIGN(alignment) >> PAGE_SHIFT);
369 	size = get_order(PAGE_ALIGN(size) >> PAGE_SHIFT);
370 	assert(alignment < NLISTS);
371 	assert(size < NLISTS);
372 	return page_memalign_order_area(area, size, alignment);
373 }
374 
375 void *memalign_pages(size_t alignment, size_t size)
376 {
377 	return memalign_pages_area(AREA_ANY, alignment, size);
378 }
379 
380 /*
381  * Allocates one page
382  */
383 void *alloc_page()
384 {
385 	return alloc_pages(0);
386 }
387 
388 static struct alloc_ops page_alloc_ops = {
389 	.memalign = memalign_pages,
390 	.free = free_pages,
391 	.align_min = PAGE_SIZE,
392 };
393 
394 /*
395  * Enables the page allocator.
396  *
397  * Prerequisites:
398  * - at least one memory area has been initialized
399  */
400 void page_alloc_ops_enable(void)
401 {
402 	spin_lock(&lock);
403 	assert(page_alloc_initialized());
404 	alloc_ops = &page_alloc_ops;
405 	spin_unlock(&lock);
406 }
407 
408 /*
409  * Adds a new memory area to the pool of available memory.
410  *
411  * Prerequisites:
412  * - the lock is held
413  * - start and top are page frame numbers
414  * - start is smaller than top
415  * - top does not fall outside of addressable memory
416  * - there is at least one more slot free for memory areas
417  * - if a specific memory area number has been indicated, it needs to be free
418  * - the memory area to add does not overlap with existing areas
419  * - the memory area to add has at least 5 pages available
420  */
421 static void _page_alloc_init_area(u8 n, uintptr_t start_pfn, uintptr_t top_pfn)
422 {
423 	size_t table_size, npages, i;
424 	struct mem_area *a;
425 	u8 order = 0;
426 
427 	/* the number must be within the allowed range */
428 	assert(n < MAX_AREAS);
429 	/* the new area number must be unused */
430 	assert(!(areas_mask & BIT(n)));
431 
432 	/* other basic sanity checks */
433 	assert(top_pfn > start_pfn);
434 	assert(top_pfn - start_pfn > 4);
435 	assert(top_pfn < BIT_ULL(sizeof(void *) * 8 - PAGE_SHIFT));
436 
437 	/* calculate the size of the metadata table in pages */
438 	table_size = (top_pfn - start_pfn + PAGE_SIZE) / (PAGE_SIZE + 1);
439 
440 	/* fill in the values of the new area */
441 	a = areas + n;
442 	a->page_states = (void *)(start_pfn << PAGE_SHIFT);
443 	a->base = start_pfn + table_size;
444 	a->top = top_pfn;
445 	npages = top_pfn - a->base;
446 	assert((a->base - start_pfn) * PAGE_SIZE >= npages);
447 
448 	/* check that the new area does not overlap with any existing areas */
449 	for (i = 0; i < MAX_AREAS; i++) {
450 		if (!(areas_mask & BIT(i)))
451 			continue;
452 		assert(!area_or_metadata_contains(areas + i, start_pfn));
453 		assert(!area_or_metadata_contains(areas + i, top_pfn - 1));
454 		assert(!area_or_metadata_contains(a, PFN(areas[i].page_states)));
455 		assert(!area_or_metadata_contains(a, areas[i].top - 1));
456 	}
457 	/* initialize all freelists for the new area */
458 	for (i = 0; i < NLISTS; i++)
459 		a->freelists[i].next = a->freelists[i].prev = a->freelists + i;
460 
461 	/* initialize the metadata for the available memory */
462 	for (i = a->base; i < a->top; i += 1ull << order) {
463 		/* search which order to start from */
464 		while (i + BIT(order) > a->top) {
465 			assert(order);
466 			order--;
467 		}
468 		/*
469 		 * we need both loops, one for the start and the other for
470 		 * the end of the block, in case it spans a power of two
471 		 * boundary
472 		 */
473 		while (IS_ALIGNED_ORDER(i, order + 1) && (i + BIT(order + 1) <= a->top))
474 			order++;
475 		assert(order < NLISTS);
476 		/* initialize the metadata and add to the freelist */
477 		memset(a->page_states + (i - a->base), order, BIT(order));
478 		list_add(a->freelists + order, (void *)(i << PAGE_SHIFT));
479 	}
480 	/* finally mark the area as present */
481 	areas_mask |= BIT(n);
482 }
483 
484 static void __page_alloc_init_area(u8 n, uintptr_t cutoff, uintptr_t base_pfn, uintptr_t *top_pfn)
485 {
486 	if (*top_pfn > cutoff) {
487 		spin_lock(&lock);
488 		if (base_pfn >= cutoff) {
489 			_page_alloc_init_area(n, base_pfn, *top_pfn);
490 			*top_pfn = 0;
491 		} else {
492 			_page_alloc_init_area(n, cutoff, *top_pfn);
493 			*top_pfn = cutoff;
494 		}
495 		spin_unlock(&lock);
496 	}
497 }
498 
499 /*
500  * Adds a new memory area to the pool of available memory.
501  *
502  * Prerequisites:
503  * see _page_alloc_init_area
504  */
505 void page_alloc_init_area(u8 n, uintptr_t base_pfn, uintptr_t top_pfn)
506 {
507 	if (n != AREA_ANY_NUMBER) {
508 		__page_alloc_init_area(n, 0, base_pfn, &top_pfn);
509 		return;
510 	}
511 #ifdef AREA_HIGH_PFN
512 	__page_alloc_init_area(AREA_HIGH_NUMBER, AREA_HIGH_PFN), base_pfn, &top_pfn);
513 #endif
514 	__page_alloc_init_area(AREA_NORMAL_NUMBER, AREA_NORMAL_PFN, base_pfn, &top_pfn);
515 #ifdef AREA_LOW_PFN
516 	__page_alloc_init_area(AREA_LOW_NUMBER, AREA_LOW_PFN, base_pfn, &top_pfn);
517 #endif
518 #ifdef AREA_LOWEST_PFN
519 	__page_alloc_init_area(AREA_LOWEST_NUMBER, AREA_LOWEST_PFN, base_pfn, &top_pfn);
520 #endif
521 }
522