1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2022, Google LLC.
4 */
5 #define _GNU_SOURCE /* for program_invocation_short_name */
6 #include <fcntl.h>
7 #include <limits.h>
8 #include <pthread.h>
9 #include <sched.h>
10 #include <signal.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15
16 #include <linux/compiler.h>
17 #include <linux/kernel.h>
18 #include <linux/kvm_para.h>
19 #include <linux/memfd.h>
20 #include <linux/sizes.h>
21
22 #include <test_util.h>
23 #include <kvm_util.h>
24 #include <processor.h>
25
26 #define BASE_DATA_SLOT 10
27 #define BASE_DATA_GPA ((uint64_t)(1ull << 32))
28 #define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE))
29
30 /* Horrific macro so that the line info is captured accurately :-( */
31 #define memcmp_g(gpa, pattern, size) \
32 do { \
33 uint8_t *mem = (uint8_t *)gpa; \
34 size_t i; \
35 \
36 for (i = 0; i < size; i++) \
37 __GUEST_ASSERT(mem[i] == pattern, \
38 "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \
39 pattern, i, gpa + i, mem[i]); \
40 } while (0)
41
memcmp_h(uint8_t * mem,uint64_t gpa,uint8_t pattern,size_t size)42 static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
43 {
44 size_t i;
45
46 for (i = 0; i < size; i++)
47 TEST_ASSERT(mem[i] == pattern,
48 "Host expected 0x%x at gpa 0x%lx, got 0x%x",
49 pattern, gpa + i, mem[i]);
50 }
51
52 /*
53 * Run memory conversion tests with explicit conversion:
54 * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
55 * to back/unback private memory. Subsequent accesses by guest to the gpa range
56 * will not cause exit to userspace.
57 *
58 * Test memory conversion scenarios with following steps:
59 * 1) Access private memory using private access and verify that memory contents
60 * are not visible to userspace.
61 * 2) Convert memory to shared using explicit conversions and ensure that
62 * userspace is able to access the shared regions.
63 * 3) Convert memory back to private using explicit conversions and ensure that
64 * userspace is again not able to access converted private regions.
65 */
66
67 #define GUEST_STAGE(o, s) { .offset = o, .size = s }
68
69 enum ucall_syncs {
70 SYNC_SHARED,
71 SYNC_PRIVATE,
72 };
73
guest_sync_shared(uint64_t gpa,uint64_t size,uint8_t current_pattern,uint8_t new_pattern)74 static void guest_sync_shared(uint64_t gpa, uint64_t size,
75 uint8_t current_pattern, uint8_t new_pattern)
76 {
77 GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
78 }
79
guest_sync_private(uint64_t gpa,uint64_t size,uint8_t pattern)80 static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
81 {
82 GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
83 }
84
85 /* Arbitrary values, KVM doesn't care about the attribute flags. */
86 #define MAP_GPA_SET_ATTRIBUTES BIT(0)
87 #define MAP_GPA_SHARED BIT(1)
88 #define MAP_GPA_DO_FALLOCATE BIT(2)
89
guest_map_mem(uint64_t gpa,uint64_t size,bool map_shared,bool do_fallocate)90 static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
91 bool do_fallocate)
92 {
93 uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
94
95 if (map_shared)
96 flags |= MAP_GPA_SHARED;
97 if (do_fallocate)
98 flags |= MAP_GPA_DO_FALLOCATE;
99 kvm_hypercall_map_gpa_range(gpa, size, flags);
100 }
101
guest_map_shared(uint64_t gpa,uint64_t size,bool do_fallocate)102 static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
103 {
104 guest_map_mem(gpa, size, true, do_fallocate);
105 }
106
guest_map_private(uint64_t gpa,uint64_t size,bool do_fallocate)107 static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
108 {
109 guest_map_mem(gpa, size, false, do_fallocate);
110 }
111
112 struct {
113 uint64_t offset;
114 uint64_t size;
115 } static const test_ranges[] = {
116 GUEST_STAGE(0, PAGE_SIZE),
117 GUEST_STAGE(0, SZ_2M),
118 GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
119 GUEST_STAGE(PAGE_SIZE, SZ_2M),
120 GUEST_STAGE(SZ_2M, PAGE_SIZE),
121 };
122
guest_test_explicit_conversion(uint64_t base_gpa,bool do_fallocate)123 static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
124 {
125 const uint8_t def_p = 0xaa;
126 const uint8_t init_p = 0xcc;
127 uint64_t j;
128 int i;
129
130 /* Memory should be shared by default. */
131 memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
132 memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
133 guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
134
135 memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
136
137 for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
138 uint64_t gpa = base_gpa + test_ranges[i].offset;
139 uint64_t size = test_ranges[i].size;
140 uint8_t p1 = 0x11;
141 uint8_t p2 = 0x22;
142 uint8_t p3 = 0x33;
143 uint8_t p4 = 0x44;
144
145 /*
146 * Set the test region to pattern one to differentiate it from
147 * the data range as a whole (contains the initial pattern).
148 */
149 memset((void *)gpa, p1, size);
150
151 /*
152 * Convert to private, set and verify the private data, and
153 * then verify that the rest of the data (map shared) still
154 * holds the initial pattern, and that the host always sees the
155 * shared memory (initial pattern). Unlike shared memory,
156 * punching a hole in private memory is destructive, i.e.
157 * previous values aren't guaranteed to be preserved.
158 */
159 guest_map_private(gpa, size, do_fallocate);
160
161 if (size > PAGE_SIZE) {
162 memset((void *)gpa, p2, PAGE_SIZE);
163 goto skip;
164 }
165
166 memset((void *)gpa, p2, size);
167 guest_sync_private(gpa, size, p1);
168
169 /*
170 * Verify that the private memory was set to pattern two, and
171 * that shared memory still holds the initial pattern.
172 */
173 memcmp_g(gpa, p2, size);
174 if (gpa > base_gpa)
175 memcmp_g(base_gpa, init_p, gpa - base_gpa);
176 if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
177 memcmp_g(gpa + size, init_p,
178 (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
179
180 /*
181 * Convert odd-number page frames back to shared to verify KVM
182 * also correctly handles holes in private ranges.
183 */
184 for (j = 0; j < size; j += PAGE_SIZE) {
185 if ((j >> PAGE_SHIFT) & 1) {
186 guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
187 guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
188
189 memcmp_g(gpa + j, p3, PAGE_SIZE);
190 } else {
191 guest_sync_private(gpa + j, PAGE_SIZE, p1);
192 }
193 }
194
195 skip:
196 /*
197 * Convert the entire region back to shared, explicitly write
198 * pattern three to fill in the even-number frames before
199 * asking the host to verify (and write pattern four).
200 */
201 guest_map_shared(gpa, size, do_fallocate);
202 memset((void *)gpa, p3, size);
203 guest_sync_shared(gpa, size, p3, p4);
204 memcmp_g(gpa, p4, size);
205
206 /* Reset the shared memory back to the initial pattern. */
207 memset((void *)gpa, init_p, size);
208
209 /*
210 * Free (via PUNCH_HOLE) *all* private memory so that the next
211 * iteration starts from a clean slate, e.g. with respect to
212 * whether or not there are pages/folios in guest_mem.
213 */
214 guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
215 }
216 }
217
guest_punch_hole(uint64_t gpa,uint64_t size)218 static void guest_punch_hole(uint64_t gpa, uint64_t size)
219 {
220 /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
221 uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
222
223 kvm_hypercall_map_gpa_range(gpa, size, flags);
224 }
225
226 /*
227 * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
228 * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
229 * (subsequent fault) should zero memory.
230 */
guest_test_punch_hole(uint64_t base_gpa,bool precise)231 static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
232 {
233 const uint8_t init_p = 0xcc;
234 int i;
235
236 /*
237 * Convert the entire range to private, this testcase is all about
238 * punching holes in guest_memfd, i.e. shared mappings aren't needed.
239 */
240 guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
241
242 for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
243 uint64_t gpa = base_gpa + test_ranges[i].offset;
244 uint64_t size = test_ranges[i].size;
245
246 /*
247 * Free all memory before each iteration, even for the !precise
248 * case where the memory will be faulted back in. Freeing and
249 * reallocating should obviously work, and freeing all memory
250 * minimizes the probability of cross-testcase influence.
251 */
252 guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
253
254 /* Fault-in and initialize memory, and verify the pattern. */
255 if (precise) {
256 memset((void *)gpa, init_p, size);
257 memcmp_g(gpa, init_p, size);
258 } else {
259 memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
260 memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
261 }
262
263 /*
264 * Punch a hole at the target range and verify that reads from
265 * the guest succeed and return zeroes.
266 */
267 guest_punch_hole(gpa, size);
268 memcmp_g(gpa, 0, size);
269 }
270 }
271
guest_code(uint64_t base_gpa)272 static void guest_code(uint64_t base_gpa)
273 {
274 /*
275 * Run the conversion test twice, with and without doing fallocate() on
276 * the guest_memfd backing when converting between shared and private.
277 */
278 guest_test_explicit_conversion(base_gpa, false);
279 guest_test_explicit_conversion(base_gpa, true);
280
281 /*
282 * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
283 * faulted in, once with only the target range faulted in.
284 */
285 guest_test_punch_hole(base_gpa, false);
286 guest_test_punch_hole(base_gpa, true);
287 GUEST_DONE();
288 }
289
handle_exit_hypercall(struct kvm_vcpu * vcpu)290 static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
291 {
292 struct kvm_run *run = vcpu->run;
293 uint64_t gpa = run->hypercall.args[0];
294 uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
295 bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
296 bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
297 bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
298 struct kvm_vm *vm = vcpu->vm;
299
300 TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
301 "Wanted MAP_GPA_RANGE (%u), got '%llu'",
302 KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
303
304 if (do_fallocate)
305 vm_guest_mem_fallocate(vm, gpa, size, map_shared);
306
307 if (set_attributes)
308 vm_set_memory_attributes(vm, gpa, size,
309 map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
310 run->hypercall.ret = 0;
311 }
312
313 static bool run_vcpus;
314
__test_mem_conversions(void * __vcpu)315 static void *__test_mem_conversions(void *__vcpu)
316 {
317 struct kvm_vcpu *vcpu = __vcpu;
318 struct kvm_run *run = vcpu->run;
319 struct kvm_vm *vm = vcpu->vm;
320 struct ucall uc;
321
322 while (!READ_ONCE(run_vcpus))
323 ;
324
325 for ( ;; ) {
326 vcpu_run(vcpu);
327
328 if (run->exit_reason == KVM_EXIT_HYPERCALL) {
329 handle_exit_hypercall(vcpu);
330 continue;
331 }
332
333 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
334 "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
335 run->exit_reason, exit_reason_str(run->exit_reason));
336
337 switch (get_ucall(vcpu, &uc)) {
338 case UCALL_ABORT:
339 REPORT_GUEST_ASSERT(uc);
340 case UCALL_SYNC: {
341 uint64_t gpa = uc.args[1];
342 size_t size = uc.args[2];
343 size_t i;
344
345 TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
346 uc.args[0] == SYNC_PRIVATE,
347 "Unknown sync command '%ld'", uc.args[0]);
348
349 for (i = 0; i < size; i += vm->page_size) {
350 size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
351 uint8_t *hva = addr_gpa2hva(vm, gpa + i);
352
353 /* In all cases, the host should observe the shared data. */
354 memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
355
356 /* For shared, write the new pattern to guest memory. */
357 if (uc.args[0] == SYNC_SHARED)
358 memset(hva, uc.args[4], nr_bytes);
359 }
360 break;
361 }
362 case UCALL_DONE:
363 return NULL;
364 default:
365 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
366 }
367 }
368 }
369
test_mem_conversions(enum vm_mem_backing_src_type src_type,uint32_t nr_vcpus,uint32_t nr_memslots)370 static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
371 uint32_t nr_memslots)
372 {
373 /*
374 * Allocate enough memory so that each vCPU's chunk of memory can be
375 * naturally aligned with respect to the size of the backing store.
376 */
377 const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
378 const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
379 const size_t memfd_size = per_cpu_size * nr_vcpus;
380 const size_t slot_size = memfd_size / nr_memslots;
381 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
382 pthread_t threads[KVM_MAX_VCPUS];
383 struct kvm_vm *vm;
384 int memfd, i, r;
385
386 const struct vm_shape shape = {
387 .mode = VM_MODE_DEFAULT,
388 .type = KVM_X86_SW_PROTECTED_VM,
389 };
390
391 TEST_ASSERT(slot_size * nr_memslots == memfd_size,
392 "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
393 memfd_size, nr_memslots);
394 vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
395
396 vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
397
398 memfd = vm_create_guest_memfd(vm, memfd_size, 0);
399
400 for (i = 0; i < nr_memslots; i++)
401 vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
402 BASE_DATA_SLOT + i, slot_size / vm->page_size,
403 KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
404
405 for (i = 0; i < nr_vcpus; i++) {
406 uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size;
407
408 vcpu_args_set(vcpus[i], 1, gpa);
409
410 /*
411 * Map only what is needed so that an out-of-bounds access
412 * results #PF => SHUTDOWN instead of data corruption.
413 */
414 virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
415
416 pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
417 }
418
419 WRITE_ONCE(run_vcpus, true);
420
421 for (i = 0; i < nr_vcpus; i++)
422 pthread_join(threads[i], NULL);
423
424 kvm_vm_free(vm);
425
426 /*
427 * Allocate and free memory from the guest_memfd after closing the VM
428 * fd. The guest_memfd is gifted a reference to its owning VM, i.e.
429 * should prevent the VM from being fully destroyed until the last
430 * reference to the guest_memfd is also put.
431 */
432 r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
433 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
434
435 r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
436 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
437 }
438
usage(const char * cmd)439 static void usage(const char *cmd)
440 {
441 puts("");
442 printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
443 puts("");
444 backing_src_help("-s");
445 puts("");
446 puts(" -n: specify the number of vcpus (default: 1)");
447 puts("");
448 puts(" -m: specify the number of memslots (default: 1)");
449 puts("");
450 }
451
main(int argc,char * argv[])452 int main(int argc, char *argv[])
453 {
454 enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
455 uint32_t nr_memslots = 1;
456 uint32_t nr_vcpus = 1;
457 int opt;
458
459 TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
460
461 while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
462 switch (opt) {
463 case 's':
464 src_type = parse_backing_src_type(optarg);
465 break;
466 case 'n':
467 nr_vcpus = atoi_positive("nr_vcpus", optarg);
468 break;
469 case 'm':
470 nr_memslots = atoi_positive("nr_memslots", optarg);
471 break;
472 case 'h':
473 default:
474 usage(argv[0]);
475 exit(0);
476 }
477 }
478
479 test_mem_conversions(src_type, nr_vcpus, nr_memslots);
480
481 return 0;
482 }
483