xref: /kvm-unit-tests/lib/x86/vm.c (revision fd6aada0dac74cf00e2d0b701362e1f89d2c28e3)
1 #include "fwcfg.h"
2 #include "vm.h"
3 #include "libcflat.h"
4 
5 static void *free = 0;
6 static void *vfree_top = 0;
7 
8 static void free_memory(void *mem, unsigned long size)
9 {
10 	void *end;
11 
12 	assert_msg((unsigned long) mem % PAGE_SIZE == 0,
13 		   "mem not page aligned: %p", mem);
14 
15 	assert_msg(size % PAGE_SIZE == 0, "size not page aligned: %#lx", size);
16 
17 	assert_msg(size == 0 || mem + size > mem,
18 		   "mem + size overflow: %p + %#lx", mem, size);
19 
20 	if (size == 0) {
21 		free = NULL;
22 		return;
23 	}
24 
25 	free = mem;
26 	end = mem + size;
27 	while (mem + PAGE_SIZE != end) {
28 		*(void **)mem = (mem + PAGE_SIZE);
29 		mem += PAGE_SIZE;
30 	}
31 
32 	*(void **)mem = NULL;
33 }
34 
35 void *alloc_page()
36 {
37     void *p;
38 
39     if (!free)
40 	return 0;
41 
42     p = free;
43     free = *(void **)free;
44 
45     return p;
46 }
47 
48 /*
49  * Allocates (1 << order) physically contiguous and naturally aligned pages.
50  * Returns NULL if there's no memory left.
51  */
52 void *alloc_pages(unsigned long order)
53 {
54 	/* Generic list traversal. */
55 	void *prev;
56 	void *curr = NULL;
57 	void *next = free;
58 
59 	/* Looking for a run of length (1 << order). */
60 	unsigned long run = 0;
61 	const unsigned long n = 1ul << order;
62 	const unsigned long align_mask = (n << PAGE_SHIFT) - 1;
63 	void *run_start = NULL;
64 	void *run_prev = NULL;
65 	unsigned long run_next_pa = 0;
66 	unsigned long pa;
67 
68 	assert(order < sizeof(unsigned long) * 8);
69 
70 	for (;;) {
71 		prev = curr;
72 		curr = next;
73 		next = curr ? *((void **) curr) : NULL;
74 
75 		if (!curr)
76 			return 0;
77 
78 		pa = virt_to_phys(curr);
79 
80 		if (run == 0) {
81 			if (!(pa & align_mask)) {
82 				run_start = curr;
83 				run_prev = prev;
84 				run_next_pa = pa + PAGE_SIZE;
85 				run = 1;
86 			}
87 		} else if (pa == run_next_pa) {
88 			run_next_pa += PAGE_SIZE;
89 			run += 1;
90 		} else {
91 			run = 0;
92 		}
93 
94 		if (run == n) {
95 			if (run_prev)
96 				*((void **) run_prev) = next;
97 			else
98 				free = next;
99 			return run_start;
100 		}
101 	}
102 }
103 
104 
105 void free_page(void *page)
106 {
107     *(void **)page = free;
108     free = page;
109 }
110 
111 extern char edata;
112 static unsigned long end_of_memory;
113 
114 unsigned long *install_pte(unsigned long *cr3,
115 			   int pte_level,
116 			   void *virt,
117 			   unsigned long pte,
118 			   unsigned long *pt_page)
119 {
120     int level;
121     unsigned long *pt = cr3;
122     unsigned offset;
123 
124     for (level = PAGE_LEVEL; level > pte_level; --level) {
125 	offset = PGDIR_OFFSET((unsigned long)virt, level);
126 	if (!(pt[offset] & PT_PRESENT_MASK)) {
127 	    unsigned long *new_pt = pt_page;
128             if (!new_pt)
129                 new_pt = alloc_page();
130             else
131                 pt_page = 0;
132 	    memset(new_pt, 0, PAGE_SIZE);
133 	    pt[offset] = virt_to_phys(new_pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
134 	}
135 	pt = phys_to_virt(pt[offset] & PT_ADDR_MASK);
136     }
137     offset = PGDIR_OFFSET((unsigned long)virt, level);
138     pt[offset] = pte;
139     return &pt[offset];
140 }
141 
142 /*
143  * Finds last PTE in the mapping of @virt that's at or above @lowest_level. The
144  * returned PTE isn't necessarily present, but its parent is.
145  */
146 struct pte_search find_pte_level(unsigned long *cr3, void *virt,
147 				 int lowest_level)
148 {
149 	unsigned long *pt = cr3, pte;
150 	unsigned offset;
151 	unsigned long shift;
152 	struct pte_search r;
153 
154 	assert(lowest_level >= 1 && lowest_level <= PAGE_LEVEL);
155 
156 	for (r.level = PAGE_LEVEL;; --r.level) {
157 		shift = (r.level - 1) * PGDIR_WIDTH + 12;
158 		offset = ((unsigned long)virt >> shift) & PGDIR_MASK;
159 		r.pte = &pt[offset];
160 		pte = *r.pte;
161 
162 		if (!(pte & PT_PRESENT_MASK))
163 			return r;
164 
165 		if ((r.level == 2 || r.level == 3) && (pte & PT_PAGE_SIZE_MASK))
166 			return r;
167 
168 		if (r.level == lowest_level)
169 			return r;
170 
171 		pt = phys_to_virt(pte & 0xffffffffff000ull);
172 	}
173 }
174 
175 /*
176  * Returns the leaf PTE in the mapping of @virt (i.e., 4K PTE or a present huge
177  * PTE). Returns NULL if no leaf PTE exists.
178  */
179 unsigned long *get_pte(unsigned long *cr3, void *virt)
180 {
181 	struct pte_search search;
182 
183 	search = find_pte_level(cr3, virt, 1);
184 	return found_leaf_pte(search) ? search.pte : NULL;
185 }
186 
187 /*
188  * Returns the PTE in the mapping of @virt at the given level @pte_level.
189  * Returns NULL if the PT at @pte_level isn't present (i.e., the mapping at
190  * @pte_level - 1 isn't present).
191  */
192 unsigned long *get_pte_level(unsigned long *cr3, void *virt, int pte_level)
193 {
194 	struct pte_search search;
195 
196 	search = find_pte_level(cr3, virt, pte_level);
197 	return search.level == pte_level ? search.pte : NULL;
198 }
199 
200 unsigned long *install_large_page(unsigned long *cr3,
201 				  unsigned long phys,
202 				  void *virt)
203 {
204     return install_pte(cr3, 2, virt,
205 		       phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK | PT_PAGE_SIZE_MASK, 0);
206 }
207 
208 unsigned long *install_page(unsigned long *cr3,
209 			    unsigned long phys,
210 			    void *virt)
211 {
212     return install_pte(cr3, 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK, 0);
213 }
214 
215 void install_pages(unsigned long *cr3, unsigned long phys, unsigned long len,
216 		   void *virt)
217 {
218 	unsigned long max = (u64)len + (u64)phys;
219 	assert(phys % PAGE_SIZE == 0);
220 	assert((unsigned long) virt % PAGE_SIZE == 0);
221 	assert(len % PAGE_SIZE == 0);
222 
223 	while (phys + PAGE_SIZE <= max) {
224 		install_page(cr3, phys, virt);
225 		phys += PAGE_SIZE;
226 		virt = (char *) virt + PAGE_SIZE;
227 	}
228 }
229 
230 bool any_present_pages(unsigned long *cr3, void *virt, unsigned long len)
231 {
232 	unsigned long max = (unsigned long) virt + len;
233 	unsigned long curr;
234 
235 	for (curr = (unsigned long) virt; curr < max; curr += PAGE_SIZE) {
236 		unsigned long *ptep = get_pte(cr3, (void *) curr);
237 		if (ptep && (*ptep & PT_PRESENT_MASK))
238 			return true;
239 	}
240 	return false;
241 }
242 
243 static void setup_mmu_range(unsigned long *cr3, unsigned long start,
244 			    unsigned long len)
245 {
246 	u64 max = (u64)len + (u64)start;
247 	u64 phys = start;
248 
249 	while (phys + LARGE_PAGE_SIZE <= max) {
250 		install_large_page(cr3, phys, (void *)(ulong)phys);
251 		phys += LARGE_PAGE_SIZE;
252 	}
253 	install_pages(cr3, phys, max - phys, (void *)(ulong)phys);
254 }
255 
256 static void setup_mmu(unsigned long len)
257 {
258     unsigned long *cr3 = alloc_page();
259 
260     memset(cr3, 0, PAGE_SIZE);
261 
262 #ifdef __x86_64__
263     if (len < (1ul << 32))
264         len = (1ul << 32);  /* map mmio 1:1 */
265 
266     setup_mmu_range(cr3, 0, len);
267 #else
268     if (len > (1ul << 31))
269 	    len = (1ul << 31);
270 
271     /* 0 - 2G memory, 2G-3G valloc area, 3G-4G mmio */
272     setup_mmu_range(cr3, 0, len);
273     setup_mmu_range(cr3, 3ul << 30, (1ul << 30));
274     vfree_top = (void*)(3ul << 30);
275 #endif
276 
277     write_cr3(virt_to_phys(cr3));
278 #ifndef __x86_64__
279     write_cr4(X86_CR4_PSE);
280 #endif
281     write_cr0(X86_CR0_PG |X86_CR0_PE | X86_CR0_WP);
282 
283     printf("paging enabled\n");
284     printf("cr0 = %lx\n", read_cr0());
285     printf("cr3 = %lx\n", read_cr3());
286     printf("cr4 = %lx\n", read_cr4());
287 }
288 
289 void setup_vm()
290 {
291     assert(!end_of_memory);
292     end_of_memory = fwcfg_get_u64(FW_CFG_RAM_SIZE);
293     free_memory(&edata, end_of_memory - (unsigned long)&edata);
294     setup_mmu(end_of_memory);
295 }
296 
297 void *vmalloc(unsigned long size)
298 {
299     void *mem, *p;
300     unsigned pages;
301 
302     size += sizeof(unsigned long);
303 
304     size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
305     vfree_top -= size;
306     mem = p = vfree_top;
307     pages = size / PAGE_SIZE;
308     while (pages--) {
309 	install_page(phys_to_virt(read_cr3()), virt_to_phys(alloc_page()), p);
310 	p += PAGE_SIZE;
311     }
312     *(unsigned long *)mem = size;
313     mem += sizeof(unsigned long);
314     return mem;
315 }
316 
317 uint64_t virt_to_phys_cr3(void *mem)
318 {
319     return (*get_pte(phys_to_virt(read_cr3()), mem) & PT_ADDR_MASK) + ((ulong)mem & (PAGE_SIZE - 1));
320 }
321 
322 void vfree(void *mem)
323 {
324     unsigned long size = ((unsigned long *)mem)[-1];
325 
326     while (size) {
327 	free_page(phys_to_virt(*get_pte(phys_to_virt(read_cr3()), mem) & PT_ADDR_MASK));
328 	mem += PAGE_SIZE;
329 	size -= PAGE_SIZE;
330     }
331 }
332 
333 void *vmap(unsigned long long phys, unsigned long size)
334 {
335     void *mem, *p;
336     unsigned pages;
337 
338     size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
339     vfree_top -= size;
340     phys &= ~(unsigned long long)(PAGE_SIZE - 1);
341 
342     mem = p = vfree_top;
343     pages = size / PAGE_SIZE;
344     while (pages--) {
345 	install_page(phys_to_virt(read_cr3()), phys, p);
346 	phys += PAGE_SIZE;
347 	p += PAGE_SIZE;
348     }
349     return mem;
350 }
351 
352 void *alloc_vpages(ulong nr)
353 {
354 	vfree_top -= PAGE_SIZE * nr;
355 	return vfree_top;
356 }
357 
358 void *alloc_vpage(void)
359 {
360     return alloc_vpages(1);
361 }
362