1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * tools/testing/selftests/kvm/lib/kvm_util.c
4 *
5 * Copyright (C) 2018, Google LLC.
6 */
7 #include "test_util.h"
8 #include "kvm_util.h"
9 #include "processor.h"
10 #include "ucall_common.h"
11
12 #include <assert.h>
13 #include <sched.h>
14 #include <sys/mman.h>
15 #include <sys/resource.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <unistd.h>
19 #include <linux/kernel.h>
20
21 #define KVM_UTIL_MIN_PFN 2
22
23 uint32_t guest_random_seed;
24 struct guest_random_state guest_rng;
25 static uint32_t last_guest_seed;
26
27 static size_t vcpu_mmap_sz(void);
28
__open_path_or_exit(const char * path,int flags,const char * enoent_help)29 int __open_path_or_exit(const char *path, int flags, const char *enoent_help)
30 {
31 int fd;
32
33 fd = open(path, flags);
34 if (fd < 0)
35 goto error;
36
37 return fd;
38
39 error:
40 if (errno == EACCES || errno == ENOENT)
41 ksft_exit_skip("- Cannot open '%s': %s. %s\n",
42 path, strerror(errno),
43 errno == EACCES ? "Root required?" : enoent_help);
44 TEST_FAIL("Failed to open '%s'", path);
45 }
46
open_path_or_exit(const char * path,int flags)47 int open_path_or_exit(const char *path, int flags)
48 {
49 return __open_path_or_exit(path, flags, "");
50 }
51
52 /*
53 * Open KVM_DEV_PATH if available, otherwise exit the entire program.
54 *
55 * Input Args:
56 * flags - The flags to pass when opening KVM_DEV_PATH.
57 *
58 * Return:
59 * The opened file descriptor of /dev/kvm.
60 */
_open_kvm_dev_path_or_exit(int flags)61 static int _open_kvm_dev_path_or_exit(int flags)
62 {
63 return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?");
64 }
65
open_kvm_dev_path_or_exit(void)66 int open_kvm_dev_path_or_exit(void)
67 {
68 return _open_kvm_dev_path_or_exit(O_RDONLY);
69 }
70
get_module_param(const char * module_name,const char * param,void * buffer,size_t buffer_size)71 static ssize_t get_module_param(const char *module_name, const char *param,
72 void *buffer, size_t buffer_size)
73 {
74 const int path_size = 128;
75 char path[path_size];
76 ssize_t bytes_read;
77 int fd, r;
78
79 /* Verify KVM is loaded, to provide a more helpful SKIP message. */
80 close(open_kvm_dev_path_or_exit());
81
82 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
83 module_name, param);
84 TEST_ASSERT(r < path_size,
85 "Failed to construct sysfs path in %d bytes.", path_size);
86
87 fd = open_path_or_exit(path, O_RDONLY);
88
89 bytes_read = read(fd, buffer, buffer_size);
90 TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes",
91 path, bytes_read, buffer_size);
92
93 r = close(fd);
94 TEST_ASSERT(!r, "close(%s) failed", path);
95 return bytes_read;
96 }
97
kvm_get_module_param_integer(const char * module_name,const char * param)98 int kvm_get_module_param_integer(const char *module_name, const char *param)
99 {
100 /*
101 * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the
102 * NUL char, and 1 byte because the kernel sucks and inserts a newline
103 * at the end.
104 */
105 char value[16 + 1 + 1];
106 ssize_t r;
107
108 memset(value, '\0', sizeof(value));
109
110 r = get_module_param(module_name, param, value, sizeof(value));
111 TEST_ASSERT(value[r - 1] == '\n',
112 "Expected trailing newline, got char '%c'", value[r - 1]);
113
114 /*
115 * Squash the newline, otherwise atoi_paranoid() will complain about
116 * trailing non-NUL characters in the string.
117 */
118 value[r - 1] = '\0';
119 return atoi_paranoid(value);
120 }
121
kvm_get_module_param_bool(const char * module_name,const char * param)122 bool kvm_get_module_param_bool(const char *module_name, const char *param)
123 {
124 char value;
125 ssize_t r;
126
127 r = get_module_param(module_name, param, &value, sizeof(value));
128 TEST_ASSERT_EQ(r, 1);
129
130 if (value == 'Y')
131 return true;
132 else if (value == 'N')
133 return false;
134
135 TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
136 }
137
138 /*
139 * Capability
140 *
141 * Input Args:
142 * cap - Capability
143 *
144 * Output Args: None
145 *
146 * Return:
147 * On success, the Value corresponding to the capability (KVM_CAP_*)
148 * specified by the value of cap. On failure a TEST_ASSERT failure
149 * is produced.
150 *
151 * Looks up and returns the value corresponding to the capability
152 * (KVM_CAP_*) given by cap.
153 */
kvm_check_cap(long cap)154 unsigned int kvm_check_cap(long cap)
155 {
156 int ret;
157 int kvm_fd;
158
159 kvm_fd = open_kvm_dev_path_or_exit();
160 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap);
161 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));
162
163 close(kvm_fd);
164
165 return (unsigned int)ret;
166 }
167
vm_enable_dirty_ring(struct kvm_vm * vm,uint32_t ring_size)168 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
169 {
170 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL))
171 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size);
172 else
173 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);
174 vm->dirty_ring_size = ring_size;
175 }
176
vm_open(struct kvm_vm * vm)177 static void vm_open(struct kvm_vm *vm)
178 {
179 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR);
180
181 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT));
182
183 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
184 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
185
186 if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
187 vm->stats.fd = vm_get_stats_fd(vm);
188 else
189 vm->stats.fd = -1;
190 }
191
vm_guest_mode_string(uint32_t i)192 const char *vm_guest_mode_string(uint32_t i)
193 {
194 static const char * const strings[] = {
195 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",
196 [VM_MODE_P52V48_16K] = "PA-bits:52, VA-bits:48, 16K pages",
197 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages",
198 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages",
199 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages",
200 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages",
201 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
202 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
203 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
204 [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages",
205 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
206 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
207 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
208 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
209 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
210 [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages",
211 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
212 [VM_MODE_P56V57_4K] = "PA-bits:56, VA-bits:57, 4K pages",
213 [VM_MODE_P56V48_4K] = "PA-bits:56, VA-bits:48, 4K pages",
214 [VM_MODE_P56V39_4K] = "PA-bits:56, VA-bits:39, 4K pages",
215 [VM_MODE_P50V57_4K] = "PA-bits:50, VA-bits:57, 4K pages",
216 [VM_MODE_P50V48_4K] = "PA-bits:50, VA-bits:48, 4K pages",
217 [VM_MODE_P50V39_4K] = "PA-bits:50, VA-bits:39, 4K pages",
218 [VM_MODE_P41V57_4K] = "PA-bits:41, VA-bits:57, 4K pages",
219 [VM_MODE_P41V48_4K] = "PA-bits:41, VA-bits:48, 4K pages",
220 [VM_MODE_P41V39_4K] = "PA-bits:41, VA-bits:39, 4K pages",
221 };
222 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
223 "Missing new mode strings?");
224
225 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
226
227 return strings[i];
228 }
229
230 const struct vm_guest_mode_params vm_guest_mode_params[] = {
231 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },
232 [VM_MODE_P52V48_16K] = { 52, 48, 0x4000, 14 },
233 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },
234 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },
235 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 },
236 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 },
237 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
238 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
239 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
240 [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 },
241 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
242 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
243 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
244 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
245 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
246 [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 },
247 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
248 [VM_MODE_P56V57_4K] = { 56, 57, 0x1000, 12 },
249 [VM_MODE_P56V48_4K] = { 56, 48, 0x1000, 12 },
250 [VM_MODE_P56V39_4K] = { 56, 39, 0x1000, 12 },
251 [VM_MODE_P50V57_4K] = { 50, 57, 0x1000, 12 },
252 [VM_MODE_P50V48_4K] = { 50, 48, 0x1000, 12 },
253 [VM_MODE_P50V39_4K] = { 50, 39, 0x1000, 12 },
254 [VM_MODE_P41V57_4K] = { 41, 57, 0x1000, 12 },
255 [VM_MODE_P41V48_4K] = { 41, 48, 0x1000, 12 },
256 [VM_MODE_P41V39_4K] = { 41, 39, 0x1000, 12 },
257 };
258 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
259 "Missing new mode params?");
260
261 /*
262 * Initializes vm->vpages_valid to match the canonical VA space of the
263 * architecture.
264 *
265 * The default implementation is valid for architectures which split the
266 * range addressed by a single page table into a low and high region
267 * based on the MSB of the VA. On architectures with this behavior
268 * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1].
269 */
vm_vaddr_populate_bitmap(struct kvm_vm * vm)270 __weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
271 {
272 sparsebit_set_num(vm->vpages_valid,
273 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
274 sparsebit_set_num(vm->vpages_valid,
275 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
276 (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
277 }
278
____vm_create(struct vm_shape shape)279 struct kvm_vm *____vm_create(struct vm_shape shape)
280 {
281 struct kvm_vm *vm;
282
283 vm = calloc(1, sizeof(*vm));
284 TEST_ASSERT(vm != NULL, "Insufficient Memory");
285
286 INIT_LIST_HEAD(&vm->vcpus);
287 vm->regions.gpa_tree = RB_ROOT;
288 vm->regions.hva_tree = RB_ROOT;
289 hash_init(vm->regions.slot_hash);
290
291 vm->mode = shape.mode;
292 vm->type = shape.type;
293
294 vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits;
295 vm->va_bits = vm_guest_mode_params[vm->mode].va_bits;
296 vm->page_size = vm_guest_mode_params[vm->mode].page_size;
297 vm->page_shift = vm_guest_mode_params[vm->mode].page_shift;
298
299 /* Setup mode specific traits. */
300 switch (vm->mode) {
301 case VM_MODE_P52V48_4K:
302 vm->mmu.pgtable_levels = 4;
303 break;
304 case VM_MODE_P52V48_64K:
305 vm->mmu.pgtable_levels = 3;
306 break;
307 case VM_MODE_P48V48_4K:
308 vm->mmu.pgtable_levels = 4;
309 break;
310 case VM_MODE_P48V48_64K:
311 vm->mmu.pgtable_levels = 3;
312 break;
313 case VM_MODE_P40V48_4K:
314 case VM_MODE_P36V48_4K:
315 vm->mmu.pgtable_levels = 4;
316 break;
317 case VM_MODE_P40V48_64K:
318 case VM_MODE_P36V48_64K:
319 vm->mmu.pgtable_levels = 3;
320 break;
321 case VM_MODE_P52V48_16K:
322 case VM_MODE_P48V48_16K:
323 case VM_MODE_P40V48_16K:
324 case VM_MODE_P36V48_16K:
325 vm->mmu.pgtable_levels = 4;
326 break;
327 case VM_MODE_P47V47_16K:
328 case VM_MODE_P36V47_16K:
329 vm->mmu.pgtable_levels = 3;
330 break;
331 case VM_MODE_PXXVYY_4K:
332 #ifdef __x86_64__
333 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
334 kvm_init_vm_address_properties(vm);
335
336 pr_debug("Guest physical address width detected: %d\n",
337 vm->pa_bits);
338 pr_debug("Guest virtual address width detected: %d\n",
339 vm->va_bits);
340
341 if (vm->va_bits == 57) {
342 vm->mmu.pgtable_levels = 5;
343 } else {
344 TEST_ASSERT(vm->va_bits == 48,
345 "Unexpected guest virtual address width: %d",
346 vm->va_bits);
347 vm->mmu.pgtable_levels = 4;
348 }
349 #else
350 TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms");
351 #endif
352 break;
353 case VM_MODE_P47V64_4K:
354 vm->mmu.pgtable_levels = 5;
355 break;
356 case VM_MODE_P44V64_4K:
357 vm->mmu.pgtable_levels = 5;
358 break;
359 case VM_MODE_P56V57_4K:
360 case VM_MODE_P50V57_4K:
361 case VM_MODE_P41V57_4K:
362 vm->mmu.pgtable_levels = 5;
363 break;
364 case VM_MODE_P56V48_4K:
365 case VM_MODE_P50V48_4K:
366 case VM_MODE_P41V48_4K:
367 vm->mmu.pgtable_levels = 4;
368 break;
369 case VM_MODE_P56V39_4K:
370 case VM_MODE_P50V39_4K:
371 case VM_MODE_P41V39_4K:
372 vm->mmu.pgtable_levels = 3;
373 break;
374 default:
375 TEST_FAIL("Unknown guest mode: 0x%x", vm->mode);
376 }
377
378 #ifdef __aarch64__
379 TEST_ASSERT(!vm->type, "ARM doesn't support test-provided types");
380 if (vm->pa_bits != 40)
381 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
382 #endif
383
384 vm_open(vm);
385
386 /* Limit to VA-bit canonical virtual addresses. */
387 vm->vpages_valid = sparsebit_alloc();
388 vm_vaddr_populate_bitmap(vm);
389
390 /* Limit physical addresses to PA-bits. */
391 vm->max_gfn = vm_compute_max_gfn(vm);
392
393 /* Allocate and setup memory for guest. */
394 vm->vpages_mapped = sparsebit_alloc();
395
396 return vm;
397 }
398
vm_nr_pages_required(enum vm_guest_mode mode,uint32_t nr_runnable_vcpus,uint64_t extra_mem_pages)399 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
400 uint32_t nr_runnable_vcpus,
401 uint64_t extra_mem_pages)
402 {
403 uint64_t page_size = vm_guest_mode_params[mode].page_size;
404 uint64_t nr_pages;
405
406 TEST_ASSERT(nr_runnable_vcpus,
407 "Use vm_create_barebones() for VMs that _never_ have vCPUs");
408
409 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
410 "nr_vcpus = %d too large for host, max-vcpus = %d",
411 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
412
413 /*
414 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the
415 * test code and other per-VM assets that will be loaded into memslot0.
416 */
417 nr_pages = 512;
418
419 /* Account for the per-vCPU stacks on behalf of the test. */
420 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS;
421
422 /*
423 * Account for the number of pages needed for the page tables. The
424 * maximum page table size for a memory region will be when the
425 * smallest page size is used. Considering each page contains x page
426 * table descriptors, the total extra size for page tables (for extra
427 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
428 * than N/x*2.
429 */
430 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;
431
432 /* Account for the number of pages needed by ucall. */
433 nr_pages += ucall_nr_pages_required(page_size);
434
435 return vm_adjust_num_guest_pages(mode, nr_pages);
436 }
437
kvm_set_files_rlimit(uint32_t nr_vcpus)438 void kvm_set_files_rlimit(uint32_t nr_vcpus)
439 {
440 /*
441 * Each vCPU will open two file descriptors: the vCPU itself and the
442 * vCPU's binary stats file descriptor. Add an arbitrary amount of
443 * buffer for all other files a test may open.
444 */
445 int nr_fds_wanted = nr_vcpus * 2 + 100;
446 struct rlimit rl;
447
448 /*
449 * Check that we're allowed to open nr_fds_wanted file descriptors and
450 * try raising the limits if needed.
451 */
452 TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
453
454 if (rl.rlim_cur < nr_fds_wanted) {
455 rl.rlim_cur = nr_fds_wanted;
456 if (rl.rlim_max < nr_fds_wanted) {
457 int old_rlim_max = rl.rlim_max;
458
459 rl.rlim_max = nr_fds_wanted;
460 __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0,
461 "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
462 old_rlim_max, nr_fds_wanted);
463 } else {
464 TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
465 }
466 }
467
468 }
469
is_guest_memfd_required(struct vm_shape shape)470 static bool is_guest_memfd_required(struct vm_shape shape)
471 {
472 #ifdef __x86_64__
473 return shape.type == KVM_X86_SNP_VM;
474 #else
475 return false;
476 #endif
477 }
478
__vm_create(struct vm_shape shape,uint32_t nr_runnable_vcpus,uint64_t nr_extra_pages)479 struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
480 uint64_t nr_extra_pages)
481 {
482 uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus,
483 nr_extra_pages);
484 struct userspace_mem_region *slot0;
485 struct kvm_vm *vm;
486 int i, flags;
487
488 kvm_set_files_rlimit(nr_runnable_vcpus);
489
490 pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
491 vm_guest_mode_string(shape.mode), shape.type, nr_pages);
492
493 vm = ____vm_create(shape);
494
495 /*
496 * Force GUEST_MEMFD for the primary memory region if necessary, e.g.
497 * for CoCo VMs that require GUEST_MEMFD backed private memory.
498 */
499 flags = 0;
500 if (is_guest_memfd_required(shape))
501 flags |= KVM_MEM_GUEST_MEMFD;
502
503 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags);
504 for (i = 0; i < NR_MEM_REGIONS; i++)
505 vm->memslots[i] = 0;
506
507 kvm_vm_elf_load(vm, program_invocation_name);
508
509 /*
510 * TODO: Add proper defines to protect the library's memslots, and then
511 * carve out memslot1 for the ucall MMIO address. KVM treats writes to
512 * read-only memslots as MMIO, and creating a read-only memslot for the
513 * MMIO region would prevent silently clobbering the MMIO region.
514 */
515 slot0 = memslot2region(vm, 0);
516 ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
517
518 if (guest_random_seed != last_guest_seed) {
519 pr_info("Random seed: 0x%x\n", guest_random_seed);
520 last_guest_seed = guest_random_seed;
521 }
522 guest_rng = new_guest_random_state(guest_random_seed);
523 sync_global_to_guest(vm, guest_rng);
524
525 kvm_arch_vm_post_create(vm, nr_runnable_vcpus);
526
527 return vm;
528 }
529
530 /*
531 * VM Create with customized parameters
532 *
533 * Input Args:
534 * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
535 * nr_vcpus - VCPU count
536 * extra_mem_pages - Non-slot0 physical memory total size
537 * guest_code - Guest entry point
538 * vcpuids - VCPU IDs
539 *
540 * Output Args: None
541 *
542 * Return:
543 * Pointer to opaque structure that describes the created VM.
544 *
545 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
546 * extra_mem_pages is only used to calculate the maximum page table size,
547 * no real memory allocation for non-slot0 memory in this function.
548 */
__vm_create_with_vcpus(struct vm_shape shape,uint32_t nr_vcpus,uint64_t extra_mem_pages,void * guest_code,struct kvm_vcpu * vcpus[])549 struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
550 uint64_t extra_mem_pages,
551 void *guest_code, struct kvm_vcpu *vcpus[])
552 {
553 struct kvm_vm *vm;
554 int i;
555
556 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array");
557
558 vm = __vm_create(shape, nr_vcpus, extra_mem_pages);
559
560 for (i = 0; i < nr_vcpus; ++i)
561 vcpus[i] = vm_vcpu_add(vm, i, guest_code);
562
563 kvm_arch_vm_finalize_vcpus(vm);
564 return vm;
565 }
566
__vm_create_shape_with_one_vcpu(struct vm_shape shape,struct kvm_vcpu ** vcpu,uint64_t extra_mem_pages,void * guest_code)567 struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
568 struct kvm_vcpu **vcpu,
569 uint64_t extra_mem_pages,
570 void *guest_code)
571 {
572 struct kvm_vcpu *vcpus[1];
573 struct kvm_vm *vm;
574
575 vm = __vm_create_with_vcpus(shape, 1, extra_mem_pages, guest_code, vcpus);
576
577 *vcpu = vcpus[0];
578 return vm;
579 }
580
581 /*
582 * VM Restart
583 *
584 * Input Args:
585 * vm - VM that has been released before
586 *
587 * Output Args: None
588 *
589 * Reopens the file descriptors associated to the VM and reinstates the
590 * global state, such as the irqchip and the memory regions that are mapped
591 * into the guest.
592 */
kvm_vm_restart(struct kvm_vm * vmp)593 void kvm_vm_restart(struct kvm_vm *vmp)
594 {
595 int ctr;
596 struct userspace_mem_region *region;
597
598 vm_open(vmp);
599 if (vmp->has_irqchip)
600 vm_create_irqchip(vmp);
601
602 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
603 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
604
605 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
606 " rc: %i errno: %i\n"
607 " slot: %u flags: 0x%x\n"
608 " guest_phys_addr: 0x%llx size: 0x%llx",
609 ret, errno, region->region.slot,
610 region->region.flags,
611 region->region.guest_phys_addr,
612 region->region.memory_size);
613 }
614 }
615
vm_arch_vcpu_recreate(struct kvm_vm * vm,uint32_t vcpu_id)616 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm,
617 uint32_t vcpu_id)
618 {
619 return __vm_vcpu_add(vm, vcpu_id);
620 }
621
vm_recreate_with_one_vcpu(struct kvm_vm * vm)622 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
623 {
624 kvm_vm_restart(vm);
625
626 return vm_vcpu_recreate(vm, 0);
627 }
628
__pin_task_to_cpu(pthread_t task,int cpu)629 int __pin_task_to_cpu(pthread_t task, int cpu)
630 {
631 cpu_set_t cpuset;
632
633 CPU_ZERO(&cpuset);
634 CPU_SET(cpu, &cpuset);
635
636 return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset);
637 }
638
parse_pcpu(const char * cpu_str,const cpu_set_t * allowed_mask)639 static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
640 {
641 uint32_t pcpu = atoi_non_negative("CPU number", cpu_str);
642
643 TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask),
644 "Not allowed to run on pCPU '%d', check cgroups?", pcpu);
645 return pcpu;
646 }
647
kvm_print_vcpu_pinning_help(void)648 void kvm_print_vcpu_pinning_help(void)
649 {
650 const char *name = program_invocation_name;
651
652 printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n"
653 " values (target pCPU), one for each vCPU, plus an optional\n"
654 " entry for the main application task (specified via entry\n"
655 " <nr_vcpus + 1>). If used, entries must be provided for all\n"
656 " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"
657 " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n"
658 " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n"
659 " %s -v 3 -c 22,23,24,50\n\n"
660 " To leave the application task unpinned, drop the final entry:\n\n"
661 " %s -v 3 -c 22,23,24\n\n"
662 " (default: no pinning)\n", name, name);
663 }
664
kvm_parse_vcpu_pinning(const char * pcpus_string,uint32_t vcpu_to_pcpu[],int nr_vcpus)665 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
666 int nr_vcpus)
667 {
668 cpu_set_t allowed_mask;
669 char *cpu, *cpu_list;
670 char delim[2] = ",";
671 int i, r;
672
673 cpu_list = strdup(pcpus_string);
674 TEST_ASSERT(cpu_list, "strdup() allocation failed.");
675
676 r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
677 TEST_ASSERT(!r, "sched_getaffinity() failed");
678
679 cpu = strtok(cpu_list, delim);
680
681 /* 1. Get all pcpus for vcpus. */
682 for (i = 0; i < nr_vcpus; i++) {
683 TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i);
684 vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask);
685 cpu = strtok(NULL, delim);
686 }
687
688 /* 2. Check if the main worker needs to be pinned. */
689 if (cpu) {
690 pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask));
691 cpu = strtok(NULL, delim);
692 }
693
694 TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu);
695 free(cpu_list);
696 }
697
698 /*
699 * Userspace Memory Region Find
700 *
701 * Input Args:
702 * vm - Virtual Machine
703 * start - Starting VM physical address
704 * end - Ending VM physical address, inclusive.
705 *
706 * Output Args: None
707 *
708 * Return:
709 * Pointer to overlapping region, NULL if no such region.
710 *
711 * Searches for a region with any physical memory that overlaps with
712 * any portion of the guest physical addresses from start to end
713 * inclusive. If multiple overlapping regions exist, a pointer to any
714 * of the regions is returned. Null is returned only when no overlapping
715 * region exists.
716 */
717 static struct userspace_mem_region *
userspace_mem_region_find(struct kvm_vm * vm,uint64_t start,uint64_t end)718 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
719 {
720 struct rb_node *node;
721
722 for (node = vm->regions.gpa_tree.rb_node; node; ) {
723 struct userspace_mem_region *region =
724 container_of(node, struct userspace_mem_region, gpa_node);
725 uint64_t existing_start = region->region.guest_phys_addr;
726 uint64_t existing_end = region->region.guest_phys_addr
727 + region->region.memory_size - 1;
728 if (start <= existing_end && end >= existing_start)
729 return region;
730
731 if (start < existing_start)
732 node = node->rb_left;
733 else
734 node = node->rb_right;
735 }
736
737 return NULL;
738 }
739
kvm_stats_release(struct kvm_binary_stats * stats)740 static void kvm_stats_release(struct kvm_binary_stats *stats)
741 {
742 if (stats->fd < 0)
743 return;
744
745 if (stats->desc) {
746 free(stats->desc);
747 stats->desc = NULL;
748 }
749
750 kvm_close(stats->fd);
751 stats->fd = -1;
752 }
753
vcpu_arch_free(struct kvm_vcpu * vcpu)754 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
755 {
756
757 }
758
759 /*
760 * VM VCPU Remove
761 *
762 * Input Args:
763 * vcpu - VCPU to remove
764 *
765 * Output Args: None
766 *
767 * Return: None, TEST_ASSERT failures for all error conditions
768 *
769 * Removes a vCPU from a VM and frees its resources.
770 */
vm_vcpu_rm(struct kvm_vm * vm,struct kvm_vcpu * vcpu)771 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
772 {
773 if (vcpu->dirty_gfns) {
774 kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
775 vcpu->dirty_gfns = NULL;
776 }
777
778 kvm_munmap(vcpu->run, vcpu_mmap_sz());
779
780 kvm_close(vcpu->fd);
781 kvm_stats_release(&vcpu->stats);
782
783 list_del(&vcpu->list);
784
785 vcpu_arch_free(vcpu);
786 free(vcpu);
787 }
788
kvm_vm_release(struct kvm_vm * vmp)789 void kvm_vm_release(struct kvm_vm *vmp)
790 {
791 struct kvm_vcpu *vcpu, *tmp;
792
793 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
794 vm_vcpu_rm(vmp, vcpu);
795
796 kvm_close(vmp->fd);
797 kvm_close(vmp->kvm_fd);
798
799 /* Free cached stats metadata and close FD */
800 kvm_stats_release(&vmp->stats);
801
802 kvm_arch_vm_release(vmp);
803 }
804
__vm_mem_region_delete(struct kvm_vm * vm,struct userspace_mem_region * region)805 static void __vm_mem_region_delete(struct kvm_vm *vm,
806 struct userspace_mem_region *region)
807 {
808 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree);
809 rb_erase(®ion->hva_node, &vm->regions.hva_tree);
810 hash_del(®ion->slot_node);
811
812 sparsebit_free(®ion->unused_phy_pages);
813 sparsebit_free(®ion->protected_phy_pages);
814 kvm_munmap(region->mmap_start, region->mmap_size);
815 if (region->fd >= 0) {
816 /* There's an extra map when using shared memory. */
817 kvm_munmap(region->mmap_alias, region->mmap_size);
818 close(region->fd);
819 }
820 if (region->region.guest_memfd >= 0)
821 close(region->region.guest_memfd);
822
823 free(region);
824 }
825
826 /*
827 * Destroys and frees the VM pointed to by vmp.
828 */
kvm_vm_free(struct kvm_vm * vmp)829 void kvm_vm_free(struct kvm_vm *vmp)
830 {
831 int ctr;
832 struct hlist_node *node;
833 struct userspace_mem_region *region;
834
835 if (vmp == NULL)
836 return;
837
838 /* Free userspace_mem_regions. */
839 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
840 __vm_mem_region_delete(vmp, region);
841
842 /* Free sparsebit arrays. */
843 sparsebit_free(&vmp->vpages_valid);
844 sparsebit_free(&vmp->vpages_mapped);
845
846 kvm_vm_release(vmp);
847
848 /* Free the structure describing the VM. */
849 free(vmp);
850 }
851
kvm_memfd_alloc(size_t size,bool hugepages)852 int kvm_memfd_alloc(size_t size, bool hugepages)
853 {
854 int memfd_flags = MFD_CLOEXEC;
855 int fd;
856
857 if (hugepages)
858 memfd_flags |= MFD_HUGETLB;
859
860 fd = memfd_create("kvm_selftest", memfd_flags);
861 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
862
863 kvm_ftruncate(fd, size);
864 kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
865
866 return fd;
867 }
868
vm_userspace_mem_region_gpa_insert(struct rb_root * gpa_tree,struct userspace_mem_region * region)869 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
870 struct userspace_mem_region *region)
871 {
872 struct rb_node **cur, *parent;
873
874 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
875 struct userspace_mem_region *cregion;
876
877 cregion = container_of(*cur, typeof(*cregion), gpa_node);
878 parent = *cur;
879 if (region->region.guest_phys_addr <
880 cregion->region.guest_phys_addr)
881 cur = &(*cur)->rb_left;
882 else {
883 TEST_ASSERT(region->region.guest_phys_addr !=
884 cregion->region.guest_phys_addr,
885 "Duplicate GPA in region tree");
886
887 cur = &(*cur)->rb_right;
888 }
889 }
890
891 rb_link_node(®ion->gpa_node, parent, cur);
892 rb_insert_color(®ion->gpa_node, gpa_tree);
893 }
894
vm_userspace_mem_region_hva_insert(struct rb_root * hva_tree,struct userspace_mem_region * region)895 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
896 struct userspace_mem_region *region)
897 {
898 struct rb_node **cur, *parent;
899
900 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
901 struct userspace_mem_region *cregion;
902
903 cregion = container_of(*cur, typeof(*cregion), hva_node);
904 parent = *cur;
905 if (region->host_mem < cregion->host_mem)
906 cur = &(*cur)->rb_left;
907 else {
908 TEST_ASSERT(region->host_mem !=
909 cregion->host_mem,
910 "Duplicate HVA in region tree");
911
912 cur = &(*cur)->rb_right;
913 }
914 }
915
916 rb_link_node(®ion->hva_node, parent, cur);
917 rb_insert_color(®ion->hva_node, hva_tree);
918 }
919
920
__vm_set_user_memory_region(struct kvm_vm * vm,uint32_t slot,uint32_t flags,uint64_t gpa,uint64_t size,void * hva)921 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
922 uint64_t gpa, uint64_t size, void *hva)
923 {
924 struct kvm_userspace_memory_region region = {
925 .slot = slot,
926 .flags = flags,
927 .guest_phys_addr = gpa,
928 .memory_size = size,
929 .userspace_addr = (uintptr_t)hva,
930 };
931
932 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion);
933 }
934
vm_set_user_memory_region(struct kvm_vm * vm,uint32_t slot,uint32_t flags,uint64_t gpa,uint64_t size,void * hva)935 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
936 uint64_t gpa, uint64_t size, void *hva)
937 {
938 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva);
939
940 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)",
941 errno, strerror(errno));
942 }
943
944 #define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \
945 __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \
946 "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)")
947
__vm_set_user_memory_region2(struct kvm_vm * vm,uint32_t slot,uint32_t flags,uint64_t gpa,uint64_t size,void * hva,uint32_t guest_memfd,uint64_t guest_memfd_offset)948 int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
949 uint64_t gpa, uint64_t size, void *hva,
950 uint32_t guest_memfd, uint64_t guest_memfd_offset)
951 {
952 struct kvm_userspace_memory_region2 region = {
953 .slot = slot,
954 .flags = flags,
955 .guest_phys_addr = gpa,
956 .memory_size = size,
957 .userspace_addr = (uintptr_t)hva,
958 .guest_memfd = guest_memfd,
959 .guest_memfd_offset = guest_memfd_offset,
960 };
961
962 TEST_REQUIRE_SET_USER_MEMORY_REGION2();
963
964 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion);
965 }
966
vm_set_user_memory_region2(struct kvm_vm * vm,uint32_t slot,uint32_t flags,uint64_t gpa,uint64_t size,void * hva,uint32_t guest_memfd,uint64_t guest_memfd_offset)967 void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
968 uint64_t gpa, uint64_t size, void *hva,
969 uint32_t guest_memfd, uint64_t guest_memfd_offset)
970 {
971 int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva,
972 guest_memfd, guest_memfd_offset);
973
974 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)",
975 errno, strerror(errno));
976 }
977
978
979 /* FIXME: This thing needs to be ripped apart and rewritten. */
vm_mem_add(struct kvm_vm * vm,enum vm_mem_backing_src_type src_type,uint64_t gpa,uint32_t slot,uint64_t npages,uint32_t flags,int guest_memfd,uint64_t guest_memfd_offset)980 void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
981 uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
982 int guest_memfd, uint64_t guest_memfd_offset)
983 {
984 int ret;
985 struct userspace_mem_region *region;
986 size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
987 size_t mem_size = npages * vm->page_size;
988 size_t alignment;
989
990 TEST_REQUIRE_SET_USER_MEMORY_REGION2();
991
992 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
993 "Number of guest pages is not compatible with the host. "
994 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
995
996 TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical "
997 "address not on a page boundary.\n"
998 " gpa: 0x%lx vm->page_size: 0x%x",
999 gpa, vm->page_size);
1000 TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1)
1001 <= vm->max_gfn, "Physical range beyond maximum "
1002 "supported physical address,\n"
1003 " gpa: 0x%lx npages: 0x%lx\n"
1004 " vm->max_gfn: 0x%lx vm->page_size: 0x%x",
1005 gpa, npages, vm->max_gfn, vm->page_size);
1006
1007 /*
1008 * Confirm a mem region with an overlapping address doesn't
1009 * already exist.
1010 */
1011 region = (struct userspace_mem_region *) userspace_mem_region_find(
1012 vm, gpa, (gpa + npages * vm->page_size) - 1);
1013 if (region != NULL)
1014 TEST_FAIL("overlapping userspace_mem_region already "
1015 "exists\n"
1016 " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n"
1017 " existing gpa: 0x%lx size: 0x%lx",
1018 gpa, npages, vm->page_size,
1019 (uint64_t) region->region.guest_phys_addr,
1020 (uint64_t) region->region.memory_size);
1021
1022 /* Confirm no region with the requested slot already exists. */
1023 hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
1024 slot) {
1025 if (region->region.slot != slot)
1026 continue;
1027
1028 TEST_FAIL("A mem region with the requested slot "
1029 "already exists.\n"
1030 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
1031 " existing slot: %u paddr: 0x%lx size: 0x%lx",
1032 slot, gpa, npages, region->region.slot,
1033 (uint64_t) region->region.guest_phys_addr,
1034 (uint64_t) region->region.memory_size);
1035 }
1036
1037 /* Allocate and initialize new mem region structure. */
1038 region = calloc(1, sizeof(*region));
1039 TEST_ASSERT(region != NULL, "Insufficient Memory");
1040 region->mmap_size = mem_size;
1041
1042 #ifdef __s390x__
1043 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
1044 alignment = 0x100000;
1045 #else
1046 alignment = 1;
1047 #endif
1048
1049 /*
1050 * When using THP mmap is not guaranteed to returned a hugepage aligned
1051 * address so we have to pad the mmap. Padding is not needed for HugeTLB
1052 * because mmap will always return an address aligned to the HugeTLB
1053 * page size.
1054 */
1055 if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
1056 alignment = max(backing_src_pagesz, alignment);
1057
1058 TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz));
1059
1060 /* Add enough memory to align up if necessary */
1061 if (alignment > 1)
1062 region->mmap_size += alignment;
1063
1064 region->fd = -1;
1065 if (backing_src_is_shared(src_type))
1066 region->fd = kvm_memfd_alloc(region->mmap_size,
1067 src_type == VM_MEM_SRC_SHARED_HUGETLB);
1068
1069 region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
1070 vm_mem_backing_src_alias(src_type)->flag,
1071 region->fd);
1072
1073 TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
1074 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
1075 "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
1076 region->mmap_start, backing_src_pagesz);
1077
1078 /* Align host address */
1079 region->host_mem = align_ptr_up(region->mmap_start, alignment);
1080
1081 /* As needed perform madvise */
1082 if ((src_type == VM_MEM_SRC_ANONYMOUS ||
1083 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
1084 ret = madvise(region->host_mem, mem_size,
1085 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
1086 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
1087 region->host_mem, mem_size,
1088 vm_mem_backing_src_alias(src_type)->name);
1089 }
1090
1091 region->backing_src_type = src_type;
1092
1093 if (flags & KVM_MEM_GUEST_MEMFD) {
1094 if (guest_memfd < 0) {
1095 uint32_t guest_memfd_flags = 0;
1096 TEST_ASSERT(!guest_memfd_offset,
1097 "Offset must be zero when creating new guest_memfd");
1098 guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
1099 } else {
1100 /*
1101 * Install a unique fd for each memslot so that the fd
1102 * can be closed when the region is deleted without
1103 * needing to track if the fd is owned by the framework
1104 * or by the caller.
1105 */
1106 guest_memfd = kvm_dup(guest_memfd);
1107 }
1108
1109 region->region.guest_memfd = guest_memfd;
1110 region->region.guest_memfd_offset = guest_memfd_offset;
1111 } else {
1112 region->region.guest_memfd = -1;
1113 }
1114
1115 region->unused_phy_pages = sparsebit_alloc();
1116 if (vm_arch_has_protected_memory(vm))
1117 region->protected_phy_pages = sparsebit_alloc();
1118 sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages);
1119 region->region.slot = slot;
1120 region->region.flags = flags;
1121 region->region.guest_phys_addr = gpa;
1122 region->region.memory_size = npages * vm->page_size;
1123 region->region.userspace_addr = (uintptr_t) region->host_mem;
1124 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
1125 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
1126 " rc: %i errno: %i\n"
1127 " slot: %u flags: 0x%x\n"
1128 " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d",
1129 ret, errno, slot, flags, gpa, region->region.memory_size,
1130 region->region.guest_memfd);
1131
1132 /* Add to quick lookup data structures */
1133 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
1134 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
1135 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot);
1136
1137 /* If shared memory, create an alias. */
1138 if (region->fd >= 0) {
1139 region->mmap_alias = kvm_mmap(region->mmap_size,
1140 PROT_READ | PROT_WRITE,
1141 vm_mem_backing_src_alias(src_type)->flag,
1142 region->fd);
1143
1144 /* Align host alias address */
1145 region->host_alias = align_ptr_up(region->mmap_alias, alignment);
1146 }
1147 }
1148
vm_userspace_mem_region_add(struct kvm_vm * vm,enum vm_mem_backing_src_type src_type,uint64_t gpa,uint32_t slot,uint64_t npages,uint32_t flags)1149 void vm_userspace_mem_region_add(struct kvm_vm *vm,
1150 enum vm_mem_backing_src_type src_type,
1151 uint64_t gpa, uint32_t slot, uint64_t npages,
1152 uint32_t flags)
1153 {
1154 vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0);
1155 }
1156
1157 /*
1158 * Memslot to region
1159 *
1160 * Input Args:
1161 * vm - Virtual Machine
1162 * memslot - KVM memory slot ID
1163 *
1164 * Output Args: None
1165 *
1166 * Return:
1167 * Pointer to memory region structure that describe memory region
1168 * using kvm memory slot ID given by memslot. TEST_ASSERT failure
1169 * on error (e.g. currently no memory region using memslot as a KVM
1170 * memory slot ID).
1171 */
1172 struct userspace_mem_region *
memslot2region(struct kvm_vm * vm,uint32_t memslot)1173 memslot2region(struct kvm_vm *vm, uint32_t memslot)
1174 {
1175 struct userspace_mem_region *region;
1176
1177 hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
1178 memslot)
1179 if (region->region.slot == memslot)
1180 return region;
1181
1182 fprintf(stderr, "No mem region with the requested slot found,\n"
1183 " requested slot: %u\n", memslot);
1184 fputs("---- vm dump ----\n", stderr);
1185 vm_dump(stderr, vm, 2);
1186 TEST_FAIL("Mem region not found");
1187 return NULL;
1188 }
1189
1190 /*
1191 * VM Memory Region Flags Set
1192 *
1193 * Input Args:
1194 * vm - Virtual Machine
1195 * flags - Starting guest physical address
1196 *
1197 * Output Args: None
1198 *
1199 * Return: None
1200 *
1201 * Sets the flags of the memory region specified by the value of slot,
1202 * to the values given by flags.
1203 */
vm_mem_region_set_flags(struct kvm_vm * vm,uint32_t slot,uint32_t flags)1204 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
1205 {
1206 int ret;
1207 struct userspace_mem_region *region;
1208
1209 region = memslot2region(vm, slot);
1210
1211 region->region.flags = flags;
1212
1213 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
1214
1215 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
1216 " rc: %i errno: %i slot: %u flags: 0x%x",
1217 ret, errno, slot, flags);
1218 }
1219
vm_mem_region_reload(struct kvm_vm * vm,uint32_t slot)1220 void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot)
1221 {
1222 struct userspace_mem_region *region = memslot2region(vm, slot);
1223 struct kvm_userspace_memory_region2 tmp = region->region;
1224
1225 tmp.memory_size = 0;
1226 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);
1227 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
1228 }
1229
1230 /*
1231 * VM Memory Region Move
1232 *
1233 * Input Args:
1234 * vm - Virtual Machine
1235 * slot - Slot of the memory region to move
1236 * new_gpa - Starting guest physical address
1237 *
1238 * Output Args: None
1239 *
1240 * Return: None
1241 *
1242 * Change the gpa of a memory region.
1243 */
vm_mem_region_move(struct kvm_vm * vm,uint32_t slot,uint64_t new_gpa)1244 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
1245 {
1246 struct userspace_mem_region *region;
1247 int ret;
1248
1249 region = memslot2region(vm, slot);
1250
1251 region->region.guest_phys_addr = new_gpa;
1252
1253 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
1254
1255 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed\n"
1256 "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
1257 ret, errno, slot, new_gpa);
1258 }
1259
1260 /*
1261 * VM Memory Region Delete
1262 *
1263 * Input Args:
1264 * vm - Virtual Machine
1265 * slot - Slot of the memory region to delete
1266 *
1267 * Output Args: None
1268 *
1269 * Return: None
1270 *
1271 * Delete a memory region.
1272 */
vm_mem_region_delete(struct kvm_vm * vm,uint32_t slot)1273 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
1274 {
1275 struct userspace_mem_region *region = memslot2region(vm, slot);
1276
1277 region->region.memory_size = 0;
1278 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
1279
1280 __vm_mem_region_delete(vm, region);
1281 }
1282
vm_guest_mem_fallocate(struct kvm_vm * vm,uint64_t base,uint64_t size,bool punch_hole)1283 void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
1284 bool punch_hole)
1285 {
1286 const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0);
1287 struct userspace_mem_region *region;
1288 uint64_t end = base + size;
1289 uint64_t gpa, len;
1290 off_t fd_offset;
1291 int ret;
1292
1293 for (gpa = base; gpa < end; gpa += len) {
1294 uint64_t offset;
1295
1296 region = userspace_mem_region_find(vm, gpa, gpa);
1297 TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD,
1298 "Private memory region not found for GPA 0x%lx", gpa);
1299
1300 offset = gpa - region->region.guest_phys_addr;
1301 fd_offset = region->region.guest_memfd_offset + offset;
1302 len = min_t(uint64_t, end - gpa, region->region.memory_size - offset);
1303
1304 ret = fallocate(region->region.guest_memfd, mode, fd_offset, len);
1305 TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx",
1306 punch_hole ? "punch hole" : "allocate", gpa, len,
1307 region->region.guest_memfd, mode, fd_offset);
1308 }
1309 }
1310
1311 /* Returns the size of a vCPU's kvm_run structure. */
vcpu_mmap_sz(void)1312 static size_t vcpu_mmap_sz(void)
1313 {
1314 int dev_fd, ret;
1315
1316 dev_fd = open_kvm_dev_path_or_exit();
1317
1318 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
1319 TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run),
1320 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));
1321
1322 close(dev_fd);
1323
1324 return ret;
1325 }
1326
vcpu_exists(struct kvm_vm * vm,uint32_t vcpu_id)1327 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id)
1328 {
1329 struct kvm_vcpu *vcpu;
1330
1331 list_for_each_entry(vcpu, &vm->vcpus, list) {
1332 if (vcpu->id == vcpu_id)
1333 return true;
1334 }
1335
1336 return false;
1337 }
1338
1339 /*
1340 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id.
1341 * No additional vCPU setup is done. Returns the vCPU.
1342 */
__vm_vcpu_add(struct kvm_vm * vm,uint32_t vcpu_id)1343 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
1344 {
1345 struct kvm_vcpu *vcpu;
1346
1347 /* Confirm a vcpu with the specified id doesn't already exist. */
1348 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id);
1349
1350 /* Allocate and initialize new vcpu structure. */
1351 vcpu = calloc(1, sizeof(*vcpu));
1352 TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
1353
1354 vcpu->vm = vm;
1355 vcpu->id = vcpu_id;
1356 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id);
1357 TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm);
1358
1359 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
1360 "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi",
1361 vcpu_mmap_sz(), sizeof(*vcpu->run));
1362 vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE,
1363 MAP_SHARED, vcpu->fd);
1364
1365 if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
1366 vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
1367 else
1368 vcpu->stats.fd = -1;
1369
1370 /* Add to linked-list of VCPUs. */
1371 list_add(&vcpu->list, &vm->vcpus);
1372
1373 return vcpu;
1374 }
1375
1376 /*
1377 * VM Virtual Address Unused Gap
1378 *
1379 * Input Args:
1380 * vm - Virtual Machine
1381 * sz - Size (bytes)
1382 * vaddr_min - Minimum Virtual Address
1383 *
1384 * Output Args: None
1385 *
1386 * Return:
1387 * Lowest virtual address at or above vaddr_min, with at least
1388 * sz unused bytes. TEST_ASSERT failure if no area of at least
1389 * size sz is available.
1390 *
1391 * Within the VM specified by vm, locates the lowest starting virtual
1392 * address >= vaddr_min, that has at least sz unallocated bytes. A
1393 * TEST_ASSERT failure occurs for invalid input or no area of at least
1394 * sz unallocated bytes >= vaddr_min is available.
1395 */
vm_vaddr_unused_gap(struct kvm_vm * vm,size_t sz,vm_vaddr_t vaddr_min)1396 vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
1397 vm_vaddr_t vaddr_min)
1398 {
1399 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
1400
1401 /* Determine lowest permitted virtual page index. */
1402 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
1403 if ((pgidx_start * vm->page_size) < vaddr_min)
1404 goto no_va_found;
1405
1406 /* Loop over section with enough valid virtual page indexes. */
1407 if (!sparsebit_is_set_num(vm->vpages_valid,
1408 pgidx_start, pages))
1409 pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
1410 pgidx_start, pages);
1411 do {
1412 /*
1413 * Are there enough unused virtual pages available at
1414 * the currently proposed starting virtual page index.
1415 * If not, adjust proposed starting index to next
1416 * possible.
1417 */
1418 if (sparsebit_is_clear_num(vm->vpages_mapped,
1419 pgidx_start, pages))
1420 goto va_found;
1421 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
1422 pgidx_start, pages);
1423 if (pgidx_start == 0)
1424 goto no_va_found;
1425
1426 /*
1427 * If needed, adjust proposed starting virtual address,
1428 * to next range of valid virtual addresses.
1429 */
1430 if (!sparsebit_is_set_num(vm->vpages_valid,
1431 pgidx_start, pages)) {
1432 pgidx_start = sparsebit_next_set_num(
1433 vm->vpages_valid, pgidx_start, pages);
1434 if (pgidx_start == 0)
1435 goto no_va_found;
1436 }
1437 } while (pgidx_start != 0);
1438
1439 no_va_found:
1440 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
1441
1442 /* NOT REACHED */
1443 return -1;
1444
1445 va_found:
1446 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
1447 pgidx_start, pages),
1448 "Unexpected, invalid virtual page index range,\n"
1449 " pgidx_start: 0x%lx\n"
1450 " pages: 0x%lx",
1451 pgidx_start, pages);
1452 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
1453 pgidx_start, pages),
1454 "Unexpected, pages already mapped,\n"
1455 " pgidx_start: 0x%lx\n"
1456 " pages: 0x%lx",
1457 pgidx_start, pages);
1458
1459 return pgidx_start * vm->page_size;
1460 }
1461
____vm_vaddr_alloc(struct kvm_vm * vm,size_t sz,vm_vaddr_t vaddr_min,enum kvm_mem_region_type type,bool protected)1462 static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
1463 vm_vaddr_t vaddr_min,
1464 enum kvm_mem_region_type type,
1465 bool protected)
1466 {
1467 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
1468
1469 virt_pgd_alloc(vm);
1470 vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages,
1471 KVM_UTIL_MIN_PFN * vm->page_size,
1472 vm->memslots[type], protected);
1473
1474 /*
1475 * Find an unused range of virtual page addresses of at least
1476 * pages in length.
1477 */
1478 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
1479
1480 /* Map the virtual pages. */
1481 for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
1482 pages--, vaddr += vm->page_size, paddr += vm->page_size) {
1483
1484 virt_pg_map(vm, vaddr, paddr);
1485 }
1486
1487 return vaddr_start;
1488 }
1489
__vm_vaddr_alloc(struct kvm_vm * vm,size_t sz,vm_vaddr_t vaddr_min,enum kvm_mem_region_type type)1490 vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
1491 enum kvm_mem_region_type type)
1492 {
1493 return ____vm_vaddr_alloc(vm, sz, vaddr_min, type,
1494 vm_arch_has_protected_memory(vm));
1495 }
1496
vm_vaddr_alloc_shared(struct kvm_vm * vm,size_t sz,vm_vaddr_t vaddr_min,enum kvm_mem_region_type type)1497 vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
1498 vm_vaddr_t vaddr_min,
1499 enum kvm_mem_region_type type)
1500 {
1501 return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false);
1502 }
1503
1504 /*
1505 * VM Virtual Address Allocate
1506 *
1507 * Input Args:
1508 * vm - Virtual Machine
1509 * sz - Size in bytes
1510 * vaddr_min - Minimum starting virtual address
1511 *
1512 * Output Args: None
1513 *
1514 * Return:
1515 * Starting guest virtual address
1516 *
1517 * Allocates at least sz bytes within the virtual address space of the vm
1518 * given by vm. The allocated bytes are mapped to a virtual address >=
1519 * the address given by vaddr_min. Note that each allocation uses a
1520 * a unique set of pages, with the minimum real allocation being at least
1521 * a page. The allocated physical space comes from the TEST_DATA memory region.
1522 */
vm_vaddr_alloc(struct kvm_vm * vm,size_t sz,vm_vaddr_t vaddr_min)1523 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
1524 {
1525 return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA);
1526 }
1527
1528 /*
1529 * VM Virtual Address Allocate Pages
1530 *
1531 * Input Args:
1532 * vm - Virtual Machine
1533 *
1534 * Output Args: None
1535 *
1536 * Return:
1537 * Starting guest virtual address
1538 *
1539 * Allocates at least N system pages worth of bytes within the virtual address
1540 * space of the vm.
1541 */
vm_vaddr_alloc_pages(struct kvm_vm * vm,int nr_pages)1542 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
1543 {
1544 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
1545 }
1546
__vm_vaddr_alloc_page(struct kvm_vm * vm,enum kvm_mem_region_type type)1547 vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)
1548 {
1549 return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
1550 }
1551
1552 /*
1553 * VM Virtual Address Allocate Page
1554 *
1555 * Input Args:
1556 * vm - Virtual Machine
1557 *
1558 * Output Args: None
1559 *
1560 * Return:
1561 * Starting guest virtual address
1562 *
1563 * Allocates at least one system page worth of bytes within the virtual address
1564 * space of the vm.
1565 */
vm_vaddr_alloc_page(struct kvm_vm * vm)1566 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
1567 {
1568 return vm_vaddr_alloc_pages(vm, 1);
1569 }
1570
1571 /*
1572 * Map a range of VM virtual address to the VM's physical address
1573 *
1574 * Input Args:
1575 * vm - Virtual Machine
1576 * vaddr - Virtuall address to map
1577 * paddr - VM Physical Address
1578 * npages - The number of pages to map
1579 *
1580 * Output Args: None
1581 *
1582 * Return: None
1583 *
1584 * Within the VM given by @vm, creates a virtual translation for
1585 * @npages starting at @vaddr to the page range starting at @paddr.
1586 */
virt_map(struct kvm_vm * vm,uint64_t vaddr,uint64_t paddr,unsigned int npages)1587 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
1588 unsigned int npages)
1589 {
1590 size_t page_size = vm->page_size;
1591 size_t size = npages * page_size;
1592
1593 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
1594 TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
1595
1596 while (npages--) {
1597 virt_pg_map(vm, vaddr, paddr);
1598
1599 vaddr += page_size;
1600 paddr += page_size;
1601 }
1602 }
1603
1604 /*
1605 * Address VM Physical to Host Virtual
1606 *
1607 * Input Args:
1608 * vm - Virtual Machine
1609 * gpa - VM physical address
1610 *
1611 * Output Args: None
1612 *
1613 * Return:
1614 * Equivalent host virtual address
1615 *
1616 * Locates the memory region containing the VM physical address given
1617 * by gpa, within the VM given by vm. When found, the host virtual
1618 * address providing the memory to the vm physical address is returned.
1619 * A TEST_ASSERT failure occurs if no region containing gpa exists.
1620 */
addr_gpa2hva(struct kvm_vm * vm,vm_paddr_t gpa)1621 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
1622 {
1623 struct userspace_mem_region *region;
1624
1625 gpa = vm_untag_gpa(vm, gpa);
1626
1627 region = userspace_mem_region_find(vm, gpa, gpa);
1628 if (!region) {
1629 TEST_FAIL("No vm physical memory at 0x%lx", gpa);
1630 return NULL;
1631 }
1632
1633 return (void *)((uintptr_t)region->host_mem
1634 + (gpa - region->region.guest_phys_addr));
1635 }
1636
1637 /*
1638 * Address Host Virtual to VM Physical
1639 *
1640 * Input Args:
1641 * vm - Virtual Machine
1642 * hva - Host virtual address
1643 *
1644 * Output Args: None
1645 *
1646 * Return:
1647 * Equivalent VM physical address
1648 *
1649 * Locates the memory region containing the host virtual address given
1650 * by hva, within the VM given by vm. When found, the equivalent
1651 * VM physical address is returned. A TEST_ASSERT failure occurs if no
1652 * region containing hva exists.
1653 */
addr_hva2gpa(struct kvm_vm * vm,void * hva)1654 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
1655 {
1656 struct rb_node *node;
1657
1658 for (node = vm->regions.hva_tree.rb_node; node; ) {
1659 struct userspace_mem_region *region =
1660 container_of(node, struct userspace_mem_region, hva_node);
1661
1662 if (hva >= region->host_mem) {
1663 if (hva <= (region->host_mem
1664 + region->region.memory_size - 1))
1665 return (vm_paddr_t)((uintptr_t)
1666 region->region.guest_phys_addr
1667 + (hva - (uintptr_t)region->host_mem));
1668
1669 node = node->rb_right;
1670 } else
1671 node = node->rb_left;
1672 }
1673
1674 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
1675 return -1;
1676 }
1677
1678 /*
1679 * Address VM physical to Host Virtual *alias*.
1680 *
1681 * Input Args:
1682 * vm - Virtual Machine
1683 * gpa - VM physical address
1684 *
1685 * Output Args: None
1686 *
1687 * Return:
1688 * Equivalent address within the host virtual *alias* area, or NULL
1689 * (without failing the test) if the guest memory is not shared (so
1690 * no alias exists).
1691 *
1692 * Create a writable, shared virtual=>physical alias for the specific GPA.
1693 * The primary use case is to allow the host selftest to manipulate guest
1694 * memory without mapping said memory in the guest's address space. And, for
1695 * userfaultfd-based demand paging, to do so without triggering userfaults.
1696 */
addr_gpa2alias(struct kvm_vm * vm,vm_paddr_t gpa)1697 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
1698 {
1699 struct userspace_mem_region *region;
1700 uintptr_t offset;
1701
1702 region = userspace_mem_region_find(vm, gpa, gpa);
1703 if (!region)
1704 return NULL;
1705
1706 if (!region->host_alias)
1707 return NULL;
1708
1709 offset = gpa - region->region.guest_phys_addr;
1710 return (void *) ((uintptr_t) region->host_alias + offset);
1711 }
1712
1713 /* Create an interrupt controller chip for the specified VM. */
vm_create_irqchip(struct kvm_vm * vm)1714 void vm_create_irqchip(struct kvm_vm *vm)
1715 {
1716 int r;
1717
1718 /*
1719 * Allocate a fully in-kernel IRQ chip by default, but fall back to a
1720 * split model (x86 only) if that fails (KVM x86 allows compiling out
1721 * support for KVM_CREATE_IRQCHIP).
1722 */
1723 r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
1724 if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))
1725 vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
1726 else
1727 TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);
1728
1729 vm->has_irqchip = true;
1730 }
1731
_vcpu_run(struct kvm_vcpu * vcpu)1732 int _vcpu_run(struct kvm_vcpu *vcpu)
1733 {
1734 int rc;
1735
1736 do {
1737 rc = __vcpu_run(vcpu);
1738 } while (rc == -1 && errno == EINTR);
1739
1740 if (!rc)
1741 assert_on_unhandled_exception(vcpu);
1742
1743 return rc;
1744 }
1745
1746 /*
1747 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR.
1748 * Assert if the KVM returns an error (other than -EINTR).
1749 */
vcpu_run(struct kvm_vcpu * vcpu)1750 void vcpu_run(struct kvm_vcpu *vcpu)
1751 {
1752 int ret = _vcpu_run(vcpu);
1753
1754 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret));
1755 }
1756
vcpu_run_complete_io(struct kvm_vcpu * vcpu)1757 void vcpu_run_complete_io(struct kvm_vcpu *vcpu)
1758 {
1759 int ret;
1760
1761 vcpu->run->immediate_exit = 1;
1762 ret = __vcpu_run(vcpu);
1763 vcpu->run->immediate_exit = 0;
1764
1765 TEST_ASSERT(ret == -1 && errno == EINTR,
1766 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
1767 ret, errno);
1768 }
1769
1770 /*
1771 * Get the list of guest registers which are supported for
1772 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer,
1773 * it is the caller's responsibility to free the list.
1774 */
vcpu_get_reg_list(struct kvm_vcpu * vcpu)1775 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)
1776 {
1777 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
1778 int ret;
1779
1780 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n);
1781 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
1782
1783 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
1784 reg_list->n = reg_list_n.n;
1785 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list);
1786 return reg_list;
1787 }
1788
vcpu_map_dirty_ring(struct kvm_vcpu * vcpu)1789 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
1790 {
1791 uint32_t page_size = getpagesize();
1792 uint32_t size = vcpu->vm->dirty_ring_size;
1793
1794 TEST_ASSERT(size > 0, "Should enable dirty ring first");
1795
1796 if (!vcpu->dirty_gfns) {
1797 void *addr;
1798
1799 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd,
1800 page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1801 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
1802
1803 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd,
1804 page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1805 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
1806
1807 addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
1808 page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1809
1810 vcpu->dirty_gfns = addr;
1811 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
1812 }
1813
1814 return vcpu->dirty_gfns;
1815 }
1816
1817 /*
1818 * Device Ioctl
1819 */
1820
__kvm_has_device_attr(int dev_fd,uint32_t group,uint64_t attr)1821 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
1822 {
1823 struct kvm_device_attr attribute = {
1824 .group = group,
1825 .attr = attr,
1826 .flags = 0,
1827 };
1828
1829 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
1830 }
1831
__kvm_test_create_device(struct kvm_vm * vm,uint64_t type)1832 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type)
1833 {
1834 struct kvm_create_device create_dev = {
1835 .type = type,
1836 .flags = KVM_CREATE_DEVICE_TEST,
1837 };
1838
1839 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
1840 }
1841
__kvm_create_device(struct kvm_vm * vm,uint64_t type)1842 int __kvm_create_device(struct kvm_vm *vm, uint64_t type)
1843 {
1844 struct kvm_create_device create_dev = {
1845 .type = type,
1846 .fd = -1,
1847 .flags = 0,
1848 };
1849 int err;
1850
1851 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
1852 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value");
1853 return err ? : create_dev.fd;
1854 }
1855
__kvm_device_attr_get(int dev_fd,uint32_t group,uint64_t attr,void * val)1856 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val)
1857 {
1858 struct kvm_device_attr kvmattr = {
1859 .group = group,
1860 .attr = attr,
1861 .flags = 0,
1862 .addr = (uintptr_t)val,
1863 };
1864
1865 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr);
1866 }
1867
__kvm_device_attr_set(int dev_fd,uint32_t group,uint64_t attr,void * val)1868 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val)
1869 {
1870 struct kvm_device_attr kvmattr = {
1871 .group = group,
1872 .attr = attr,
1873 .flags = 0,
1874 .addr = (uintptr_t)val,
1875 };
1876
1877 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr);
1878 }
1879
1880 /*
1881 * IRQ related functions.
1882 */
1883
_kvm_irq_line(struct kvm_vm * vm,uint32_t irq,int level)1884 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
1885 {
1886 struct kvm_irq_level irq_level = {
1887 .irq = irq,
1888 .level = level,
1889 };
1890
1891 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
1892 }
1893
kvm_irq_line(struct kvm_vm * vm,uint32_t irq,int level)1894 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
1895 {
1896 int ret = _kvm_irq_line(vm, irq, level);
1897
1898 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
1899 }
1900
kvm_gsi_routing_create(void)1901 struct kvm_irq_routing *kvm_gsi_routing_create(void)
1902 {
1903 struct kvm_irq_routing *routing;
1904 size_t size;
1905
1906 size = sizeof(struct kvm_irq_routing);
1907 /* Allocate space for the max number of entries: this wastes 196 KBs. */
1908 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);
1909 routing = calloc(1, size);
1910 assert(routing);
1911
1912 return routing;
1913 }
1914
kvm_gsi_routing_irqchip_add(struct kvm_irq_routing * routing,uint32_t gsi,uint32_t pin)1915 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
1916 uint32_t gsi, uint32_t pin)
1917 {
1918 int i;
1919
1920 assert(routing);
1921 assert(routing->nr < KVM_MAX_IRQ_ROUTES);
1922
1923 i = routing->nr;
1924 routing->entries[i].gsi = gsi;
1925 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
1926 routing->entries[i].flags = 0;
1927 routing->entries[i].u.irqchip.irqchip = 0;
1928 routing->entries[i].u.irqchip.pin = pin;
1929 routing->nr++;
1930 }
1931
_kvm_gsi_routing_write(struct kvm_vm * vm,struct kvm_irq_routing * routing)1932 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
1933 {
1934 int ret;
1935
1936 assert(routing);
1937 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
1938 free(routing);
1939
1940 return ret;
1941 }
1942
kvm_gsi_routing_write(struct kvm_vm * vm,struct kvm_irq_routing * routing)1943 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
1944 {
1945 int ret;
1946
1947 ret = _kvm_gsi_routing_write(vm, routing);
1948 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret));
1949 }
1950
1951 /*
1952 * VM Dump
1953 *
1954 * Input Args:
1955 * vm - Virtual Machine
1956 * indent - Left margin indent amount
1957 *
1958 * Output Args:
1959 * stream - Output FILE stream
1960 *
1961 * Return: None
1962 *
1963 * Dumps the current state of the VM given by vm, to the FILE stream
1964 * given by stream.
1965 */
vm_dump(FILE * stream,struct kvm_vm * vm,uint8_t indent)1966 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
1967 {
1968 int ctr;
1969 struct userspace_mem_region *region;
1970 struct kvm_vcpu *vcpu;
1971
1972 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
1973 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
1974 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
1975 fprintf(stream, "%*sMem Regions:\n", indent, "");
1976 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
1977 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
1978 "host_virt: %p\n", indent + 2, "",
1979 (uint64_t) region->region.guest_phys_addr,
1980 (uint64_t) region->region.memory_size,
1981 region->host_mem);
1982 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
1983 sparsebit_dump(stream, region->unused_phy_pages, 0);
1984 if (region->protected_phy_pages) {
1985 fprintf(stream, "%*sprotected_phy_pages: ", indent + 2, "");
1986 sparsebit_dump(stream, region->protected_phy_pages, 0);
1987 }
1988 }
1989 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
1990 sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
1991 fprintf(stream, "%*spgd_created: %u\n", indent, "",
1992 vm->mmu.pgd_created);
1993 if (vm->mmu.pgd_created) {
1994 fprintf(stream, "%*sVirtual Translation Tables:\n",
1995 indent + 2, "");
1996 virt_dump(stream, vm, indent + 4);
1997 }
1998 fprintf(stream, "%*sVCPUs:\n", indent, "");
1999
2000 list_for_each_entry(vcpu, &vm->vcpus, list)
2001 vcpu_dump(stream, vcpu, indent + 2);
2002 }
2003
2004 #define KVM_EXIT_STRING(x) {KVM_EXIT_##x, #x}
2005
2006 /* Known KVM exit reasons */
2007 static struct exit_reason {
2008 unsigned int reason;
2009 const char *name;
2010 } exit_reasons_known[] = {
2011 KVM_EXIT_STRING(UNKNOWN),
2012 KVM_EXIT_STRING(EXCEPTION),
2013 KVM_EXIT_STRING(IO),
2014 KVM_EXIT_STRING(HYPERCALL),
2015 KVM_EXIT_STRING(DEBUG),
2016 KVM_EXIT_STRING(HLT),
2017 KVM_EXIT_STRING(MMIO),
2018 KVM_EXIT_STRING(IRQ_WINDOW_OPEN),
2019 KVM_EXIT_STRING(SHUTDOWN),
2020 KVM_EXIT_STRING(FAIL_ENTRY),
2021 KVM_EXIT_STRING(INTR),
2022 KVM_EXIT_STRING(SET_TPR),
2023 KVM_EXIT_STRING(TPR_ACCESS),
2024 KVM_EXIT_STRING(S390_SIEIC),
2025 KVM_EXIT_STRING(S390_RESET),
2026 KVM_EXIT_STRING(DCR),
2027 KVM_EXIT_STRING(NMI),
2028 KVM_EXIT_STRING(INTERNAL_ERROR),
2029 KVM_EXIT_STRING(OSI),
2030 KVM_EXIT_STRING(PAPR_HCALL),
2031 KVM_EXIT_STRING(S390_UCONTROL),
2032 KVM_EXIT_STRING(WATCHDOG),
2033 KVM_EXIT_STRING(S390_TSCH),
2034 KVM_EXIT_STRING(EPR),
2035 KVM_EXIT_STRING(SYSTEM_EVENT),
2036 KVM_EXIT_STRING(S390_STSI),
2037 KVM_EXIT_STRING(IOAPIC_EOI),
2038 KVM_EXIT_STRING(HYPERV),
2039 KVM_EXIT_STRING(ARM_NISV),
2040 KVM_EXIT_STRING(X86_RDMSR),
2041 KVM_EXIT_STRING(X86_WRMSR),
2042 KVM_EXIT_STRING(DIRTY_RING_FULL),
2043 KVM_EXIT_STRING(AP_RESET_HOLD),
2044 KVM_EXIT_STRING(X86_BUS_LOCK),
2045 KVM_EXIT_STRING(XEN),
2046 KVM_EXIT_STRING(RISCV_SBI),
2047 KVM_EXIT_STRING(RISCV_CSR),
2048 KVM_EXIT_STRING(NOTIFY),
2049 KVM_EXIT_STRING(LOONGARCH_IOCSR),
2050 KVM_EXIT_STRING(MEMORY_FAULT),
2051 KVM_EXIT_STRING(ARM_SEA),
2052 };
2053
2054 /*
2055 * Exit Reason String
2056 *
2057 * Input Args:
2058 * exit_reason - Exit reason
2059 *
2060 * Output Args: None
2061 *
2062 * Return:
2063 * Constant string pointer describing the exit reason.
2064 *
2065 * Locates and returns a constant string that describes the KVM exit
2066 * reason given by exit_reason. If no such string is found, a constant
2067 * string of "Unknown" is returned.
2068 */
exit_reason_str(unsigned int exit_reason)2069 const char *exit_reason_str(unsigned int exit_reason)
2070 {
2071 unsigned int n1;
2072
2073 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
2074 if (exit_reason == exit_reasons_known[n1].reason)
2075 return exit_reasons_known[n1].name;
2076 }
2077
2078 return "Unknown";
2079 }
2080
2081 /*
2082 * Physical Contiguous Page Allocator
2083 *
2084 * Input Args:
2085 * vm - Virtual Machine
2086 * num - number of pages
2087 * paddr_min - Physical address minimum
2088 * memslot - Memory region to allocate page from
2089 * protected - True if the pages will be used as protected/private memory
2090 *
2091 * Output Args: None
2092 *
2093 * Return:
2094 * Starting physical address
2095 *
2096 * Within the VM specified by vm, locates a range of available physical
2097 * pages at or above paddr_min. If found, the pages are marked as in use
2098 * and their base address is returned. A TEST_ASSERT failure occurs if
2099 * not enough pages are available at or above paddr_min.
2100 */
__vm_phy_pages_alloc(struct kvm_vm * vm,size_t num,vm_paddr_t paddr_min,uint32_t memslot,bool protected)2101 vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
2102 vm_paddr_t paddr_min, uint32_t memslot,
2103 bool protected)
2104 {
2105 struct userspace_mem_region *region;
2106 sparsebit_idx_t pg, base;
2107
2108 TEST_ASSERT(num > 0, "Must allocate at least one page");
2109
2110 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
2111 "not divisible by page size.\n"
2112 " paddr_min: 0x%lx page_size: 0x%x",
2113 paddr_min, vm->page_size);
2114
2115 region = memslot2region(vm, memslot);
2116 TEST_ASSERT(!protected || region->protected_phy_pages,
2117 "Region doesn't support protected memory");
2118
2119 base = pg = paddr_min >> vm->page_shift;
2120 do {
2121 for (; pg < base + num; ++pg) {
2122 if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
2123 base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
2124 break;
2125 }
2126 }
2127 } while (pg && pg != base + num);
2128
2129 if (pg == 0) {
2130 fprintf(stderr, "No guest physical page available, "
2131 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
2132 paddr_min, vm->page_size, memslot);
2133 fputs("---- vm dump ----\n", stderr);
2134 vm_dump(stderr, vm, 2);
2135 abort();
2136 }
2137
2138 for (pg = base; pg < base + num; ++pg) {
2139 sparsebit_clear(region->unused_phy_pages, pg);
2140 if (protected)
2141 sparsebit_set(region->protected_phy_pages, pg);
2142 }
2143
2144 return base * vm->page_size;
2145 }
2146
vm_phy_page_alloc(struct kvm_vm * vm,vm_paddr_t paddr_min,uint32_t memslot)2147 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
2148 uint32_t memslot)
2149 {
2150 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
2151 }
2152
vm_alloc_page_table(struct kvm_vm * vm)2153 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
2154 {
2155 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR,
2156 vm->memslots[MEM_REGION_PT]);
2157 }
2158
2159 /*
2160 * Address Guest Virtual to Host Virtual
2161 *
2162 * Input Args:
2163 * vm - Virtual Machine
2164 * gva - VM virtual address
2165 *
2166 * Output Args: None
2167 *
2168 * Return:
2169 * Equivalent host virtual address
2170 */
addr_gva2hva(struct kvm_vm * vm,vm_vaddr_t gva)2171 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
2172 {
2173 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
2174 }
2175
vm_compute_max_gfn(struct kvm_vm * vm)2176 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm)
2177 {
2178 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
2179 }
2180
vm_calc_num_pages(unsigned int num_pages,unsigned int page_shift,unsigned int new_page_shift,bool ceil)2181 static unsigned int vm_calc_num_pages(unsigned int num_pages,
2182 unsigned int page_shift,
2183 unsigned int new_page_shift,
2184 bool ceil)
2185 {
2186 unsigned int n = 1 << (new_page_shift - page_shift);
2187
2188 if (page_shift >= new_page_shift)
2189 return num_pages * (1 << (page_shift - new_page_shift));
2190
2191 return num_pages / n + !!(ceil && num_pages % n);
2192 }
2193
getpageshift(void)2194 static inline int getpageshift(void)
2195 {
2196 return __builtin_ffs(getpagesize()) - 1;
2197 }
2198
2199 unsigned int
vm_num_host_pages(enum vm_guest_mode mode,unsigned int num_guest_pages)2200 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
2201 {
2202 return vm_calc_num_pages(num_guest_pages,
2203 vm_guest_mode_params[mode].page_shift,
2204 getpageshift(), true);
2205 }
2206
2207 unsigned int
vm_num_guest_pages(enum vm_guest_mode mode,unsigned int num_host_pages)2208 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
2209 {
2210 return vm_calc_num_pages(num_host_pages, getpageshift(),
2211 vm_guest_mode_params[mode].page_shift, false);
2212 }
2213
vm_calc_num_guest_pages(enum vm_guest_mode mode,size_t size)2214 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
2215 {
2216 unsigned int n;
2217 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
2218 return vm_adjust_num_guest_pages(mode, n);
2219 }
2220
2221 /*
2222 * Read binary stats descriptors
2223 *
2224 * Input Args:
2225 * stats_fd - the file descriptor for the binary stats file from which to read
2226 * header - the binary stats metadata header corresponding to the given FD
2227 *
2228 * Output Args: None
2229 *
2230 * Return:
2231 * A pointer to a newly allocated series of stat descriptors.
2232 * Caller is responsible for freeing the returned kvm_stats_desc.
2233 *
2234 * Read the stats descriptors from the binary stats interface.
2235 */
read_stats_descriptors(int stats_fd,struct kvm_stats_header * header)2236 struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
2237 struct kvm_stats_header *header)
2238 {
2239 struct kvm_stats_desc *stats_desc;
2240 ssize_t desc_size, total_size, ret;
2241
2242 desc_size = get_stats_descriptor_size(header);
2243 total_size = header->num_desc * desc_size;
2244
2245 stats_desc = calloc(header->num_desc, desc_size);
2246 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
2247
2248 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset);
2249 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors");
2250
2251 return stats_desc;
2252 }
2253
2254 /*
2255 * Read stat data for a particular stat
2256 *
2257 * Input Args:
2258 * stats_fd - the file descriptor for the binary stats file from which to read
2259 * header - the binary stats metadata header corresponding to the given FD
2260 * desc - the binary stat metadata for the particular stat to be read
2261 * max_elements - the maximum number of 8-byte values to read into data
2262 *
2263 * Output Args:
2264 * data - the buffer into which stat data should be read
2265 *
2266 * Read the data values of a specified stat from the binary stats interface.
2267 */
read_stat_data(int stats_fd,struct kvm_stats_header * header,struct kvm_stats_desc * desc,uint64_t * data,size_t max_elements)2268 void read_stat_data(int stats_fd, struct kvm_stats_header *header,
2269 struct kvm_stats_desc *desc, uint64_t *data,
2270 size_t max_elements)
2271 {
2272 size_t nr_elements = min_t(ssize_t, desc->size, max_elements);
2273 size_t size = nr_elements * sizeof(*data);
2274 ssize_t ret;
2275
2276 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name);
2277 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name);
2278
2279 ret = pread(stats_fd, data, size,
2280 header->data_offset + desc->offset);
2281
2282 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)",
2283 desc->name, errno, strerror(errno));
2284 TEST_ASSERT(ret == size,
2285 "pread() on stat '%s' read %ld bytes, wanted %lu bytes",
2286 desc->name, size, ret);
2287 }
2288
kvm_get_stat(struct kvm_binary_stats * stats,const char * name,uint64_t * data,size_t max_elements)2289 void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
2290 uint64_t *data, size_t max_elements)
2291 {
2292 struct kvm_stats_desc *desc;
2293 size_t size_desc;
2294 int i;
2295
2296 if (!stats->desc) {
2297 read_stats_header(stats->fd, &stats->header);
2298 stats->desc = read_stats_descriptors(stats->fd, &stats->header);
2299 }
2300
2301 size_desc = get_stats_descriptor_size(&stats->header);
2302
2303 for (i = 0; i < stats->header.num_desc; ++i) {
2304 desc = (void *)stats->desc + (i * size_desc);
2305
2306 if (strcmp(desc->name, name))
2307 continue;
2308
2309 read_stat_data(stats->fd, &stats->header, desc, data, max_elements);
2310 return;
2311 }
2312
2313 TEST_FAIL("Unable to find stat '%s'", name);
2314 }
2315
kvm_arch_vm_post_create(struct kvm_vm * vm,unsigned int nr_vcpus)2316 __weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
2317 {
2318 }
2319
kvm_arch_vm_finalize_vcpus(struct kvm_vm * vm)2320 __weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
2321 {
2322 }
2323
kvm_arch_vm_release(struct kvm_vm * vm)2324 __weak void kvm_arch_vm_release(struct kvm_vm *vm)
2325 {
2326 }
2327
kvm_selftest_arch_init(void)2328 __weak void kvm_selftest_arch_init(void)
2329 {
2330 }
2331
report_unexpected_signal(int signum)2332 static void report_unexpected_signal(int signum)
2333 {
2334 #define KVM_CASE_SIGNUM(sig) \
2335 case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum)
2336
2337 switch (signum) {
2338 KVM_CASE_SIGNUM(SIGBUS);
2339 KVM_CASE_SIGNUM(SIGSEGV);
2340 KVM_CASE_SIGNUM(SIGILL);
2341 KVM_CASE_SIGNUM(SIGFPE);
2342 default:
2343 TEST_FAIL("Unexpected signal %d\n", signum);
2344 }
2345 }
2346
kvm_selftest_init(void)2347 void __attribute((constructor)) kvm_selftest_init(void)
2348 {
2349 struct sigaction sig_sa = {
2350 .sa_handler = report_unexpected_signal,
2351 };
2352
2353 /* Tell stdout not to buffer its content. */
2354 setbuf(stdout, NULL);
2355
2356 sigaction(SIGBUS, &sig_sa, NULL);
2357 sigaction(SIGSEGV, &sig_sa, NULL);
2358 sigaction(SIGILL, &sig_sa, NULL);
2359 sigaction(SIGFPE, &sig_sa, NULL);
2360
2361 guest_random_seed = last_guest_seed = random();
2362 pr_info("Random seed: 0x%x\n", guest_random_seed);
2363
2364 kvm_selftest_arch_init();
2365 }
2366
vm_is_gpa_protected(struct kvm_vm * vm,vm_paddr_t paddr)2367 bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
2368 {
2369 sparsebit_idx_t pg = 0;
2370 struct userspace_mem_region *region;
2371
2372 if (!vm_arch_has_protected_memory(vm))
2373 return false;
2374
2375 region = userspace_mem_region_find(vm, paddr, paddr);
2376 TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr);
2377
2378 pg = paddr >> vm->page_shift;
2379 return sparsebit_is_set(region->protected_phy_pages, pg);
2380 }
2381
kvm_arch_has_default_irqchip(void)2382 __weak bool kvm_arch_has_default_irqchip(void)
2383 {
2384 return false;
2385 }
2386