1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Userfaultfd unit tests.
4 *
5 * Copyright (C) 2015-2023 Red Hat, Inc.
6 */
7
8 #include "uffd-common.h"
9
10 #include "../../../../mm/gup_test.h"
11
12 #ifdef __NR_userfaultfd
13
14 /* The unit test doesn't need a large or random size, make it 32MB for now */
15 #define UFFD_TEST_MEM_SIZE (32UL << 20)
16
17 #define MEM_ANON BIT_ULL(0)
18 #define MEM_SHMEM BIT_ULL(1)
19 #define MEM_SHMEM_PRIVATE BIT_ULL(2)
20 #define MEM_HUGETLB BIT_ULL(3)
21 #define MEM_HUGETLB_PRIVATE BIT_ULL(4)
22
23 #define MEM_ALL (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \
24 MEM_HUGETLB | MEM_HUGETLB_PRIVATE)
25
26 #define ALIGN_UP(x, align_to) \
27 ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1)))
28
29 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
30
31 struct mem_type {
32 const char *name;
33 unsigned int mem_flag;
34 uffd_test_ops_t *mem_ops;
35 bool shared;
36 };
37 typedef struct mem_type mem_type_t;
38
39 mem_type_t mem_types[] = {
40 {
41 .name = "anon",
42 .mem_flag = MEM_ANON,
43 .mem_ops = &anon_uffd_test_ops,
44 .shared = false,
45 },
46 {
47 .name = "shmem",
48 .mem_flag = MEM_SHMEM,
49 .mem_ops = &shmem_uffd_test_ops,
50 .shared = true,
51 },
52 {
53 .name = "shmem-private",
54 .mem_flag = MEM_SHMEM_PRIVATE,
55 .mem_ops = &shmem_uffd_test_ops,
56 .shared = false,
57 },
58 {
59 .name = "hugetlb",
60 .mem_flag = MEM_HUGETLB,
61 .mem_ops = &hugetlb_uffd_test_ops,
62 .shared = true,
63 },
64 {
65 .name = "hugetlb-private",
66 .mem_flag = MEM_HUGETLB_PRIVATE,
67 .mem_ops = &hugetlb_uffd_test_ops,
68 .shared = false,
69 },
70 };
71
72 /* Arguments to be passed over to each uffd unit test */
73 struct uffd_test_args {
74 mem_type_t *mem_type;
75 };
76 typedef struct uffd_test_args uffd_test_args_t;
77
78 /* Returns: UFFD_TEST_* */
79 typedef void (*uffd_test_fn)(uffd_test_args_t *);
80
81 typedef struct {
82 const char *name;
83 uffd_test_fn uffd_fn;
84 unsigned int mem_targets;
85 uint64_t uffd_feature_required;
86 uffd_test_case_ops_t *test_case_ops;
87 } uffd_test_case_t;
88
uffd_test_report(void)89 static void uffd_test_report(void)
90 {
91 printf("Userfaults unit tests: pass=%u, skip=%u, fail=%u (total=%u)\n",
92 ksft_get_pass_cnt(),
93 ksft_get_xskip_cnt(),
94 ksft_get_fail_cnt(),
95 ksft_test_num());
96 }
97
uffd_test_pass(void)98 static void uffd_test_pass(void)
99 {
100 printf("done\n");
101 ksft_inc_pass_cnt();
102 }
103
104 #define uffd_test_start(...) do { \
105 printf("Testing "); \
106 printf(__VA_ARGS__); \
107 printf("... "); \
108 fflush(stdout); \
109 } while (0)
110
111 #define uffd_test_fail(...) do { \
112 printf("failed [reason: "); \
113 printf(__VA_ARGS__); \
114 printf("]\n"); \
115 ksft_inc_fail_cnt(); \
116 } while (0)
117
uffd_test_skip(const char * message)118 static void uffd_test_skip(const char *message)
119 {
120 printf("skipped [reason: %s]\n", message);
121 ksft_inc_xskip_cnt();
122 }
123
124 /*
125 * Returns 1 if specific userfaultfd supported, 0 otherwise. Note, we'll
126 * return 1 even if some test failed as long as uffd supported, because in
127 * that case we still want to proceed with the rest uffd unit tests.
128 */
test_uffd_api(bool use_dev)129 static int test_uffd_api(bool use_dev)
130 {
131 struct uffdio_api uffdio_api;
132 int uffd;
133
134 uffd_test_start("UFFDIO_API (with %s)",
135 use_dev ? "/dev/userfaultfd" : "syscall");
136
137 if (use_dev)
138 uffd = uffd_open_dev(UFFD_FLAGS);
139 else
140 uffd = uffd_open_sys(UFFD_FLAGS);
141 if (uffd < 0) {
142 uffd_test_skip("cannot open userfaultfd handle");
143 return 0;
144 }
145
146 /* Test wrong UFFD_API */
147 uffdio_api.api = 0xab;
148 uffdio_api.features = 0;
149 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
150 uffd_test_fail("UFFDIO_API should fail with wrong api but didn't");
151 goto out;
152 }
153
154 /* Test wrong feature bit */
155 uffdio_api.api = UFFD_API;
156 uffdio_api.features = BIT_ULL(63);
157 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
158 uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't");
159 goto out;
160 }
161
162 /* Test normal UFFDIO_API */
163 uffdio_api.api = UFFD_API;
164 uffdio_api.features = 0;
165 if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
166 uffd_test_fail("UFFDIO_API should succeed but failed");
167 goto out;
168 }
169
170 /* Test double requests of UFFDIO_API with a random feature set */
171 uffdio_api.features = BIT_ULL(0);
172 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
173 uffd_test_fail("UFFDIO_API should reject initialized uffd");
174 goto out;
175 }
176
177 uffd_test_pass();
178 out:
179 close(uffd);
180 /* We have a valid uffd handle */
181 return 1;
182 }
183
184 /*
185 * This function initializes the global variables. TODO: remove global
186 * vars and then remove this.
187 */
188 static int
uffd_setup_environment(uffd_test_args_t * args,uffd_test_case_t * test,mem_type_t * mem_type,const char ** errmsg)189 uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test,
190 mem_type_t *mem_type, const char **errmsg)
191 {
192 map_shared = mem_type->shared;
193 uffd_test_ops = mem_type->mem_ops;
194 uffd_test_case_ops = test->test_case_ops;
195
196 if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
197 page_size = default_huge_page_size();
198 else
199 page_size = psize();
200
201 /* Ensure we have at least 2 pages */
202 nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size;
203 /* TODO: remove this global var.. it's so ugly */
204 nr_parallel = 1;
205
206 /* Initialize test arguments */
207 args->mem_type = mem_type;
208
209 return uffd_test_ctx_init(test->uffd_feature_required, errmsg);
210 }
211
uffd_feature_supported(uffd_test_case_t * test)212 static bool uffd_feature_supported(uffd_test_case_t *test)
213 {
214 uint64_t features;
215
216 if (uffd_get_features(&features))
217 return false;
218
219 return (features & test->uffd_feature_required) ==
220 test->uffd_feature_required;
221 }
222
pagemap_open(void)223 static int pagemap_open(void)
224 {
225 int fd = open("/proc/self/pagemap", O_RDONLY);
226
227 if (fd < 0)
228 err("open pagemap");
229
230 return fd;
231 }
232
233 /* This macro let __LINE__ works in err() */
234 #define pagemap_check_wp(value, wp) do { \
235 if (!!(value & PM_UFFD_WP) != wp) \
236 err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
237 } while (0)
238
239 typedef struct {
240 int parent_uffd, child_uffd;
241 } fork_event_args;
242
fork_event_consumer(void * data)243 static void *fork_event_consumer(void *data)
244 {
245 fork_event_args *args = data;
246 struct uffd_msg msg = { 0 };
247
248 ready_for_fork = true;
249
250 /* Read until a full msg received */
251 while (uffd_read_msg(args->parent_uffd, &msg));
252
253 if (msg.event != UFFD_EVENT_FORK)
254 err("wrong message: %u\n", msg.event);
255
256 /* Just to be properly freed later */
257 args->child_uffd = msg.arg.fork.ufd;
258 return NULL;
259 }
260
261 typedef struct {
262 int gup_fd;
263 bool pinned;
264 } pin_args;
265
266 /*
267 * Returns 0 if succeed, <0 for errors. pin_pages() needs to be paired
268 * with unpin_pages(). Currently it needs to be RO longterm pin to satisfy
269 * all needs of the test cases (e.g., trigger unshare, trigger fork() early
270 * CoW, etc.).
271 */
pin_pages(pin_args * args,void * buffer,size_t size)272 static int pin_pages(pin_args *args, void *buffer, size_t size)
273 {
274 struct pin_longterm_test test = {
275 .addr = (uintptr_t)buffer,
276 .size = size,
277 /* Read-only pins */
278 .flags = 0,
279 };
280
281 if (args->pinned)
282 err("already pinned");
283
284 args->gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
285 if (args->gup_fd < 0)
286 return -errno;
287
288 if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_START, &test)) {
289 /* Even if gup_test existed, can be an old gup_test / kernel */
290 close(args->gup_fd);
291 return -errno;
292 }
293 args->pinned = true;
294 return 0;
295 }
296
unpin_pages(pin_args * args)297 static void unpin_pages(pin_args *args)
298 {
299 if (!args->pinned)
300 err("unpin without pin first");
301 if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_STOP))
302 err("PIN_LONGTERM_TEST_STOP");
303 close(args->gup_fd);
304 args->pinned = false;
305 }
306
pagemap_test_fork(int uffd,bool with_event,bool test_pin)307 static int pagemap_test_fork(int uffd, bool with_event, bool test_pin)
308 {
309 fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 };
310 pthread_t thread;
311 pid_t child;
312 uint64_t value;
313 int fd, result;
314
315 /* Prepare a thread to resolve EVENT_FORK */
316 if (with_event) {
317 ready_for_fork = false;
318 if (pthread_create(&thread, NULL, fork_event_consumer, &args))
319 err("pthread_create()");
320 while (!ready_for_fork)
321 ; /* Wait for the poll_thread to start executing before forking */
322 }
323
324 child = fork();
325 if (!child) {
326 /* Open the pagemap fd of the child itself */
327 pin_args args = {};
328
329 fd = pagemap_open();
330
331 if (test_pin && pin_pages(&args, area_dst, page_size))
332 /*
333 * Normally when reach here we have pinned in
334 * previous tests, so shouldn't fail anymore
335 */
336 err("pin page failed in child");
337
338 value = pagemap_get_entry(fd, area_dst);
339 /*
340 * After fork(), we should handle uffd-wp bit differently:
341 *
342 * (1) when with EVENT_FORK, it should persist
343 * (2) when without EVENT_FORK, it should be dropped
344 */
345 pagemap_check_wp(value, with_event);
346 if (test_pin)
347 unpin_pages(&args);
348 /* Succeed */
349 exit(0);
350 }
351 waitpid(child, &result, 0);
352
353 if (with_event) {
354 if (pthread_join(thread, NULL))
355 err("pthread_join()");
356 if (args.child_uffd < 0)
357 err("Didn't receive child uffd");
358 close(args.child_uffd);
359 }
360
361 return result;
362 }
363
uffd_wp_unpopulated_test(uffd_test_args_t * args)364 static void uffd_wp_unpopulated_test(uffd_test_args_t *args)
365 {
366 uint64_t value;
367 int pagemap_fd;
368
369 if (uffd_register(uffd, area_dst, nr_pages * page_size,
370 false, true, false))
371 err("register failed");
372
373 pagemap_fd = pagemap_open();
374
375 /* Test applying pte marker to anon unpopulated */
376 wp_range(uffd, (uint64_t)area_dst, page_size, true);
377 value = pagemap_get_entry(pagemap_fd, area_dst);
378 pagemap_check_wp(value, true);
379
380 /* Test unprotect on anon pte marker */
381 wp_range(uffd, (uint64_t)area_dst, page_size, false);
382 value = pagemap_get_entry(pagemap_fd, area_dst);
383 pagemap_check_wp(value, false);
384
385 /* Test zap on anon marker */
386 wp_range(uffd, (uint64_t)area_dst, page_size, true);
387 if (madvise(area_dst, page_size, MADV_DONTNEED))
388 err("madvise(MADV_DONTNEED) failed");
389 value = pagemap_get_entry(pagemap_fd, area_dst);
390 pagemap_check_wp(value, false);
391
392 /* Test fault in after marker removed */
393 *area_dst = 1;
394 value = pagemap_get_entry(pagemap_fd, area_dst);
395 pagemap_check_wp(value, false);
396 /* Drop it to make pte none again */
397 if (madvise(area_dst, page_size, MADV_DONTNEED))
398 err("madvise(MADV_DONTNEED) failed");
399
400 /* Test read-zero-page upon pte marker */
401 wp_range(uffd, (uint64_t)area_dst, page_size, true);
402 *(volatile char *)area_dst;
403 /* Drop it to make pte none again */
404 if (madvise(area_dst, page_size, MADV_DONTNEED))
405 err("madvise(MADV_DONTNEED) failed");
406
407 uffd_test_pass();
408 }
409
uffd_wp_fork_test_common(uffd_test_args_t * args,bool with_event)410 static void uffd_wp_fork_test_common(uffd_test_args_t *args,
411 bool with_event)
412 {
413 int pagemap_fd;
414 uint64_t value;
415
416 if (uffd_register(uffd, area_dst, nr_pages * page_size,
417 false, true, false))
418 err("register failed");
419
420 pagemap_fd = pagemap_open();
421
422 /* Touch the page */
423 *area_dst = 1;
424 wp_range(uffd, (uint64_t)area_dst, page_size, true);
425 value = pagemap_get_entry(pagemap_fd, area_dst);
426 pagemap_check_wp(value, true);
427 if (pagemap_test_fork(uffd, with_event, false)) {
428 uffd_test_fail("Detected %s uffd-wp bit in child in present pte",
429 with_event ? "missing" : "stall");
430 goto out;
431 }
432
433 /*
434 * This is an attempt for zapping the pgtable so as to test the
435 * markers.
436 *
437 * For private mappings, PAGEOUT will only work on exclusive ptes
438 * (PM_MMAP_EXCLUSIVE) which we should satisfy.
439 *
440 * For shared, PAGEOUT may not work. Use DONTNEED instead which
441 * plays a similar role of zapping (rather than freeing the page)
442 * to expose pte markers.
443 */
444 if (args->mem_type->shared) {
445 if (madvise(area_dst, page_size, MADV_DONTNEED))
446 err("MADV_DONTNEED");
447 } else {
448 /*
449 * NOTE: ignore retval because private-hugetlb doesn't yet
450 * support swapping, so it could fail.
451 */
452 madvise(area_dst, page_size, MADV_PAGEOUT);
453 }
454
455 /* Uffd-wp should persist even swapped out */
456 value = pagemap_get_entry(pagemap_fd, area_dst);
457 pagemap_check_wp(value, true);
458 if (pagemap_test_fork(uffd, with_event, false)) {
459 uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte",
460 with_event ? "missing" : "stall");
461 goto out;
462 }
463
464 /* Unprotect; this tests swap pte modifications */
465 wp_range(uffd, (uint64_t)area_dst, page_size, false);
466 value = pagemap_get_entry(pagemap_fd, area_dst);
467 pagemap_check_wp(value, false);
468
469 /* Fault in the page from disk */
470 *area_dst = 2;
471 value = pagemap_get_entry(pagemap_fd, area_dst);
472 pagemap_check_wp(value, false);
473 uffd_test_pass();
474 out:
475 if (uffd_unregister(uffd, area_dst, nr_pages * page_size))
476 err("unregister failed");
477 close(pagemap_fd);
478 }
479
uffd_wp_fork_test(uffd_test_args_t * args)480 static void uffd_wp_fork_test(uffd_test_args_t *args)
481 {
482 uffd_wp_fork_test_common(args, false);
483 }
484
uffd_wp_fork_with_event_test(uffd_test_args_t * args)485 static void uffd_wp_fork_with_event_test(uffd_test_args_t *args)
486 {
487 uffd_wp_fork_test_common(args, true);
488 }
489
uffd_wp_fork_pin_test_common(uffd_test_args_t * args,bool with_event)490 static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args,
491 bool with_event)
492 {
493 int pagemap_fd;
494 pin_args pin_args = {};
495
496 if (uffd_register(uffd, area_dst, page_size, false, true, false))
497 err("register failed");
498
499 pagemap_fd = pagemap_open();
500
501 /* Touch the page */
502 *area_dst = 1;
503 wp_range(uffd, (uint64_t)area_dst, page_size, true);
504
505 /*
506 * 1. First pin, then fork(). This tests fork() special path when
507 * doing early CoW if the page is private.
508 */
509 if (pin_pages(&pin_args, area_dst, page_size)) {
510 uffd_test_skip("Possibly CONFIG_GUP_TEST missing "
511 "or unprivileged");
512 close(pagemap_fd);
513 uffd_unregister(uffd, area_dst, page_size);
514 return;
515 }
516
517 if (pagemap_test_fork(uffd, with_event, false)) {
518 uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()",
519 with_event ? "missing" : "stall");
520 unpin_pages(&pin_args);
521 goto out;
522 }
523
524 unpin_pages(&pin_args);
525
526 /*
527 * 2. First fork(), then pin (in the child, where test_pin==true).
528 * This tests COR, aka, page unsharing on private memories.
529 */
530 if (pagemap_test_fork(uffd, with_event, true)) {
531 uffd_test_fail("Detected %s uffd-wp bit when RO pin",
532 with_event ? "missing" : "stall");
533 goto out;
534 }
535 uffd_test_pass();
536 out:
537 if (uffd_unregister(uffd, area_dst, page_size))
538 err("register failed");
539 close(pagemap_fd);
540 }
541
uffd_wp_fork_pin_test(uffd_test_args_t * args)542 static void uffd_wp_fork_pin_test(uffd_test_args_t *args)
543 {
544 uffd_wp_fork_pin_test_common(args, false);
545 }
546
uffd_wp_fork_pin_with_event_test(uffd_test_args_t * args)547 static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args)
548 {
549 uffd_wp_fork_pin_test_common(args, true);
550 }
551
check_memory_contents(char * p)552 static void check_memory_contents(char *p)
553 {
554 unsigned long i, j;
555 uint8_t expected_byte;
556
557 for (i = 0; i < nr_pages; ++i) {
558 expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
559 for (j = 0; j < page_size; j++) {
560 uint8_t v = *(uint8_t *)(p + (i * page_size) + j);
561 if (v != expected_byte)
562 err("unexpected page contents");
563 }
564 }
565 }
566
uffd_minor_test_common(bool test_collapse,bool test_wp)567 static void uffd_minor_test_common(bool test_collapse, bool test_wp)
568 {
569 unsigned long p;
570 pthread_t uffd_mon;
571 char c;
572 struct uffd_args args = { 0 };
573
574 /*
575 * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing
576 * both do not make much sense.
577 */
578 assert(!(test_collapse && test_wp));
579
580 if (uffd_register(uffd, area_dst_alias, nr_pages * page_size,
581 /* NOTE! MADV_COLLAPSE may not work with uffd-wp */
582 false, test_wp, true))
583 err("register failure");
584
585 /*
586 * After registering with UFFD, populate the non-UFFD-registered side of
587 * the shared mapping. This should *not* trigger any UFFD minor faults.
588 */
589 for (p = 0; p < nr_pages; ++p)
590 memset(area_dst + (p * page_size), p % ((uint8_t)-1),
591 page_size);
592
593 args.apply_wp = test_wp;
594 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
595 err("uffd_poll_thread create");
596
597 /*
598 * Read each of the pages back using the UFFD-registered mapping. We
599 * expect that the first time we touch a page, it will result in a minor
600 * fault. uffd_poll_thread will resolve the fault by bit-flipping the
601 * page's contents, and then issuing a CONTINUE ioctl.
602 */
603 check_memory_contents(area_dst_alias);
604
605 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
606 err("pipe write");
607 if (pthread_join(uffd_mon, NULL))
608 err("join() failed");
609
610 if (test_collapse) {
611 if (madvise(area_dst_alias, nr_pages * page_size,
612 MADV_COLLAPSE)) {
613 /* It's fine to fail for this one... */
614 uffd_test_skip("MADV_COLLAPSE failed");
615 return;
616 }
617
618 uffd_test_ops->check_pmd_mapping(area_dst,
619 nr_pages * page_size /
620 read_pmd_pagesize());
621 /*
622 * This won't cause uffd-fault - it purely just makes sure there
623 * was no corruption.
624 */
625 check_memory_contents(area_dst_alias);
626 }
627
628 if (args.missing_faults != 0 || args.minor_faults != nr_pages)
629 uffd_test_fail("stats check error");
630 else
631 uffd_test_pass();
632 }
633
uffd_minor_test(uffd_test_args_t * args)634 void uffd_minor_test(uffd_test_args_t *args)
635 {
636 uffd_minor_test_common(false, false);
637 }
638
uffd_minor_wp_test(uffd_test_args_t * args)639 void uffd_minor_wp_test(uffd_test_args_t *args)
640 {
641 uffd_minor_test_common(false, true);
642 }
643
uffd_minor_collapse_test(uffd_test_args_t * args)644 void uffd_minor_collapse_test(uffd_test_args_t *args)
645 {
646 uffd_minor_test_common(true, false);
647 }
648
649 static sigjmp_buf jbuf, *sigbuf;
650
sighndl(int sig,siginfo_t * siginfo,void * ptr)651 static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
652 {
653 if (sig == SIGBUS) {
654 if (sigbuf)
655 siglongjmp(*sigbuf, 1);
656 abort();
657 }
658 }
659
660 /*
661 * For non-cooperative userfaultfd test we fork() a process that will
662 * generate pagefaults, will mremap the area monitored by the
663 * userfaultfd and at last this process will release the monitored
664 * area.
665 * For the anonymous and shared memory the area is divided into two
666 * parts, the first part is accessed before mremap, and the second
667 * part is accessed after mremap. Since hugetlbfs does not support
668 * mremap, the entire monitored area is accessed in a single pass for
669 * HUGETLB_TEST.
670 * The release of the pages currently generates event for shmem and
671 * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
672 * for hugetlb.
673 * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
674 * monitored area, generate pagefaults and test that signal is delivered.
675 * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
676 * test robustness use case - we release monitored area, fork a process
677 * that will generate pagefaults and verify signal is generated.
678 * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
679 * feature. Using monitor thread, verify no userfault events are generated.
680 */
faulting_process(int signal_test,bool wp)681 static int faulting_process(int signal_test, bool wp)
682 {
683 unsigned long nr, i;
684 unsigned long long count;
685 unsigned long split_nr_pages;
686 unsigned long lastnr;
687 struct sigaction act;
688 volatile unsigned long signalled = 0;
689
690 split_nr_pages = (nr_pages + 1) / 2;
691
692 if (signal_test) {
693 sigbuf = &jbuf;
694 memset(&act, 0, sizeof(act));
695 act.sa_sigaction = sighndl;
696 act.sa_flags = SA_SIGINFO;
697 if (sigaction(SIGBUS, &act, 0))
698 err("sigaction");
699 lastnr = (unsigned long)-1;
700 }
701
702 for (nr = 0; nr < split_nr_pages; nr++) {
703 volatile int steps = 1;
704 unsigned long offset = nr * page_size;
705
706 if (signal_test) {
707 if (sigsetjmp(*sigbuf, 1) != 0) {
708 if (steps == 1 && nr == lastnr)
709 err("Signal repeated");
710
711 lastnr = nr;
712 if (signal_test == 1) {
713 if (steps == 1) {
714 /* This is a MISSING request */
715 steps++;
716 if (copy_page(uffd, offset, wp))
717 signalled++;
718 } else {
719 /* This is a WP request */
720 assert(steps == 2);
721 wp_range(uffd,
722 (__u64)area_dst +
723 offset,
724 page_size, false);
725 }
726 } else {
727 signalled++;
728 continue;
729 }
730 }
731 }
732
733 count = *area_count(area_dst, nr);
734 if (count != count_verify[nr])
735 err("nr %lu memory corruption %llu %llu\n",
736 nr, count, count_verify[nr]);
737 /*
738 * Trigger write protection if there is by writing
739 * the same value back.
740 */
741 *area_count(area_dst, nr) = count;
742 }
743
744 if (signal_test)
745 return signalled != split_nr_pages;
746
747 area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
748 MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
749 if (area_dst == MAP_FAILED)
750 err("mremap");
751 /* Reset area_src since we just clobbered it */
752 area_src = NULL;
753
754 for (; nr < nr_pages; nr++) {
755 count = *area_count(area_dst, nr);
756 if (count != count_verify[nr]) {
757 err("nr %lu memory corruption %llu %llu\n",
758 nr, count, count_verify[nr]);
759 }
760 /*
761 * Trigger write protection if there is by writing
762 * the same value back.
763 */
764 *area_count(area_dst, nr) = count;
765 }
766
767 uffd_test_ops->release_pages(area_dst);
768
769 for (nr = 0; nr < nr_pages; nr++)
770 for (i = 0; i < page_size; i++)
771 if (*(area_dst + nr * page_size + i) != 0)
772 err("page %lu offset %lu is not zero", nr, i);
773
774 return 0;
775 }
776
uffd_sigbus_test_common(bool wp)777 static void uffd_sigbus_test_common(bool wp)
778 {
779 unsigned long userfaults;
780 pthread_t uffd_mon;
781 pid_t pid;
782 int err;
783 char c;
784 struct uffd_args args = { 0 };
785
786 ready_for_fork = false;
787
788 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
789
790 if (uffd_register(uffd, area_dst, nr_pages * page_size,
791 true, wp, false))
792 err("register failure");
793
794 if (faulting_process(1, wp))
795 err("faulting process failed");
796
797 uffd_test_ops->release_pages(area_dst);
798
799 args.apply_wp = wp;
800 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
801 err("uffd_poll_thread create");
802
803 while (!ready_for_fork)
804 ; /* Wait for the poll_thread to start executing before forking */
805
806 pid = fork();
807 if (pid < 0)
808 err("fork");
809
810 if (!pid)
811 exit(faulting_process(2, wp));
812
813 waitpid(pid, &err, 0);
814 if (err)
815 err("faulting process failed");
816 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
817 err("pipe write");
818 if (pthread_join(uffd_mon, (void **)&userfaults))
819 err("pthread_join()");
820
821 if (userfaults)
822 uffd_test_fail("Signal test failed, userfaults: %ld", userfaults);
823 else
824 uffd_test_pass();
825 }
826
uffd_sigbus_test(uffd_test_args_t * args)827 static void uffd_sigbus_test(uffd_test_args_t *args)
828 {
829 uffd_sigbus_test_common(false);
830 }
831
uffd_sigbus_wp_test(uffd_test_args_t * args)832 static void uffd_sigbus_wp_test(uffd_test_args_t *args)
833 {
834 uffd_sigbus_test_common(true);
835 }
836
uffd_events_test_common(bool wp)837 static void uffd_events_test_common(bool wp)
838 {
839 pthread_t uffd_mon;
840 pid_t pid;
841 int err;
842 char c;
843 struct uffd_args args = { 0 };
844
845 ready_for_fork = false;
846
847 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
848 if (uffd_register(uffd, area_dst, nr_pages * page_size,
849 true, wp, false))
850 err("register failure");
851
852 args.apply_wp = wp;
853 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
854 err("uffd_poll_thread create");
855
856 while (!ready_for_fork)
857 ; /* Wait for the poll_thread to start executing before forking */
858
859 pid = fork();
860 if (pid < 0)
861 err("fork");
862
863 if (!pid)
864 exit(faulting_process(0, wp));
865
866 waitpid(pid, &err, 0);
867 if (err)
868 err("faulting process failed");
869 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
870 err("pipe write");
871 if (pthread_join(uffd_mon, NULL))
872 err("pthread_join()");
873
874 if (args.missing_faults != nr_pages)
875 uffd_test_fail("Fault counts wrong");
876 else
877 uffd_test_pass();
878 }
879
uffd_events_test(uffd_test_args_t * args)880 static void uffd_events_test(uffd_test_args_t *args)
881 {
882 uffd_events_test_common(false);
883 }
884
uffd_events_wp_test(uffd_test_args_t * args)885 static void uffd_events_wp_test(uffd_test_args_t *args)
886 {
887 uffd_events_test_common(true);
888 }
889
retry_uffdio_zeropage(int ufd,struct uffdio_zeropage * uffdio_zeropage)890 static void retry_uffdio_zeropage(int ufd,
891 struct uffdio_zeropage *uffdio_zeropage)
892 {
893 uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
894 uffdio_zeropage->range.len,
895 0);
896 if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
897 if (uffdio_zeropage->zeropage != -EEXIST)
898 err("UFFDIO_ZEROPAGE error: %"PRId64,
899 (int64_t)uffdio_zeropage->zeropage);
900 } else {
901 err("UFFDIO_ZEROPAGE error: %"PRId64,
902 (int64_t)uffdio_zeropage->zeropage);
903 }
904 }
905
do_uffdio_zeropage(int ufd,bool has_zeropage)906 static bool do_uffdio_zeropage(int ufd, bool has_zeropage)
907 {
908 struct uffdio_zeropage uffdio_zeropage = { 0 };
909 int ret;
910 __s64 res;
911
912 uffdio_zeropage.range.start = (unsigned long) area_dst;
913 uffdio_zeropage.range.len = page_size;
914 uffdio_zeropage.mode = 0;
915 ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
916 res = uffdio_zeropage.zeropage;
917 if (ret) {
918 /* real retval in ufdio_zeropage.zeropage */
919 if (has_zeropage)
920 err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
921 else if (res != -EINVAL)
922 err("UFFDIO_ZEROPAGE not -EINVAL");
923 } else if (has_zeropage) {
924 if (res != page_size)
925 err("UFFDIO_ZEROPAGE unexpected size");
926 else
927 retry_uffdio_zeropage(ufd, &uffdio_zeropage);
928 return true;
929 } else
930 err("UFFDIO_ZEROPAGE succeeded");
931
932 return false;
933 }
934
935 /*
936 * Registers a range with MISSING mode only for zeropage test. Return true
937 * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register()
938 * because we want to detect .ioctls along the way.
939 */
940 static bool
uffd_register_detect_zeropage(int uffd,void * addr,uint64_t len)941 uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len)
942 {
943 uint64_t ioctls = 0;
944
945 if (uffd_register_with_ioctls(uffd, addr, len, true,
946 false, false, &ioctls))
947 err("zeropage register fail");
948
949 return ioctls & (1 << _UFFDIO_ZEROPAGE);
950 }
951
952 /* exercise UFFDIO_ZEROPAGE */
uffd_zeropage_test(uffd_test_args_t * args)953 static void uffd_zeropage_test(uffd_test_args_t *args)
954 {
955 bool has_zeropage;
956 int i;
957
958 has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size);
959 if (area_dst_alias)
960 /* Ignore the retval; we already have it */
961 uffd_register_detect_zeropage(uffd, area_dst_alias, page_size);
962
963 if (do_uffdio_zeropage(uffd, has_zeropage))
964 for (i = 0; i < page_size; i++)
965 if (area_dst[i] != 0)
966 err("data non-zero at offset %d\n", i);
967
968 if (uffd_unregister(uffd, area_dst, page_size))
969 err("unregister");
970
971 if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size))
972 err("unregister");
973
974 uffd_test_pass();
975 }
976
uffd_register_poison(int uffd,void * addr,uint64_t len)977 static void uffd_register_poison(int uffd, void *addr, uint64_t len)
978 {
979 uint64_t ioctls = 0;
980 uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON);
981
982 if (uffd_register_with_ioctls(uffd, addr, len, true,
983 false, false, &ioctls))
984 err("poison register fail");
985
986 if ((ioctls & expected) != expected)
987 err("registered area doesn't support COPY and POISON ioctls");
988 }
989
do_uffdio_poison(int uffd,unsigned long offset)990 static void do_uffdio_poison(int uffd, unsigned long offset)
991 {
992 struct uffdio_poison uffdio_poison = { 0 };
993 int ret;
994 __s64 res;
995
996 uffdio_poison.range.start = (unsigned long) area_dst + offset;
997 uffdio_poison.range.len = page_size;
998 uffdio_poison.mode = 0;
999 ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison);
1000 res = uffdio_poison.updated;
1001
1002 if (ret)
1003 err("UFFDIO_POISON error: %"PRId64, (int64_t)res);
1004 else if (res != page_size)
1005 err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res);
1006 }
1007
uffd_poison_handle_fault(struct uffd_msg * msg,struct uffd_args * args)1008 static void uffd_poison_handle_fault(
1009 struct uffd_msg *msg, struct uffd_args *args)
1010 {
1011 unsigned long offset;
1012
1013 if (msg->event != UFFD_EVENT_PAGEFAULT)
1014 err("unexpected msg event %u", msg->event);
1015
1016 if (msg->arg.pagefault.flags &
1017 (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR))
1018 err("unexpected fault type %llu", msg->arg.pagefault.flags);
1019
1020 offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
1021 offset &= ~(page_size-1);
1022
1023 /* Odd pages -> copy zeroed page; even pages -> poison. */
1024 if (offset & page_size)
1025 copy_page(uffd, offset, false);
1026 else
1027 do_uffdio_poison(uffd, offset);
1028 }
1029
1030 /* Make sure to cover odd/even, and minimum duplications */
1031 #define UFFD_POISON_TEST_NPAGES 4
1032
uffd_poison_test(uffd_test_args_t * targs)1033 static void uffd_poison_test(uffd_test_args_t *targs)
1034 {
1035 pthread_t uffd_mon;
1036 char c;
1037 struct uffd_args args = { 0 };
1038 struct sigaction act = { 0 };
1039 unsigned long nr_sigbus = 0;
1040 unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES;
1041
1042 if (nr_pages < poison_pages) {
1043 uffd_test_skip("Too few pages for POISON test");
1044 return;
1045 }
1046
1047 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
1048
1049 uffd_register_poison(uffd, area_dst, poison_pages * page_size);
1050 memset(area_src, 0, poison_pages * page_size);
1051
1052 args.handle_fault = uffd_poison_handle_fault;
1053 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
1054 err("uffd_poll_thread create");
1055
1056 sigbuf = &jbuf;
1057 act.sa_sigaction = sighndl;
1058 act.sa_flags = SA_SIGINFO;
1059 if (sigaction(SIGBUS, &act, 0))
1060 err("sigaction");
1061
1062 for (nr = 0; nr < poison_pages; ++nr) {
1063 unsigned long offset = nr * page_size;
1064 const char *bytes = (const char *) area_dst + offset;
1065 const char *i;
1066
1067 if (sigsetjmp(*sigbuf, 1)) {
1068 /*
1069 * Access below triggered a SIGBUS, which was caught by
1070 * sighndl, which then jumped here. Count this SIGBUS,
1071 * and move on to next page.
1072 */
1073 ++nr_sigbus;
1074 continue;
1075 }
1076
1077 for (i = bytes; i < bytes + page_size; ++i) {
1078 if (*i)
1079 err("nonzero byte in area_dst (%p) at %p: %u",
1080 area_dst, i, *i);
1081 }
1082 }
1083
1084 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
1085 err("pipe write");
1086 if (pthread_join(uffd_mon, NULL))
1087 err("pthread_join()");
1088
1089 if (nr_sigbus != poison_pages / 2)
1090 err("expected to receive %lu SIGBUS, actually received %lu",
1091 poison_pages / 2, nr_sigbus);
1092
1093 uffd_test_pass();
1094 }
1095
1096 static void
uffd_move_handle_fault_common(struct uffd_msg * msg,struct uffd_args * args,unsigned long len)1097 uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args,
1098 unsigned long len)
1099 {
1100 unsigned long offset;
1101
1102 if (msg->event != UFFD_EVENT_PAGEFAULT)
1103 err("unexpected msg event %u", msg->event);
1104
1105 if (msg->arg.pagefault.flags &
1106 (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE))
1107 err("unexpected fault type %llu", msg->arg.pagefault.flags);
1108
1109 offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
1110 offset &= ~(len-1);
1111
1112 if (move_page(uffd, offset, len))
1113 args->missing_faults++;
1114 }
1115
uffd_move_handle_fault(struct uffd_msg * msg,struct uffd_args * args)1116 static void uffd_move_handle_fault(struct uffd_msg *msg,
1117 struct uffd_args *args)
1118 {
1119 uffd_move_handle_fault_common(msg, args, page_size);
1120 }
1121
uffd_move_pmd_handle_fault(struct uffd_msg * msg,struct uffd_args * args)1122 static void uffd_move_pmd_handle_fault(struct uffd_msg *msg,
1123 struct uffd_args *args)
1124 {
1125 uffd_move_handle_fault_common(msg, args, read_pmd_pagesize());
1126 }
1127
1128 static void
uffd_move_test_common(uffd_test_args_t * targs,unsigned long chunk_size,void (* handle_fault)(struct uffd_msg * msg,struct uffd_args * args))1129 uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size,
1130 void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args))
1131 {
1132 unsigned long nr;
1133 pthread_t uffd_mon;
1134 char c;
1135 unsigned long long count;
1136 struct uffd_args args = { 0 };
1137 char *orig_area_src = NULL, *orig_area_dst = NULL;
1138 unsigned long step_size, step_count;
1139 unsigned long src_offs = 0;
1140 unsigned long dst_offs = 0;
1141
1142 /* Prevent source pages from being mapped more than once */
1143 if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK))
1144 err("madvise(MADV_DONTFORK) failure");
1145
1146 if (uffd_register(uffd, area_dst, nr_pages * page_size,
1147 true, false, false))
1148 err("register failure");
1149
1150 args.handle_fault = handle_fault;
1151 if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
1152 err("uffd_poll_thread create");
1153
1154 step_size = chunk_size / page_size;
1155 step_count = nr_pages / step_size;
1156
1157 if (chunk_size > page_size) {
1158 char *aligned_src = ALIGN_UP(area_src, chunk_size);
1159 char *aligned_dst = ALIGN_UP(area_dst, chunk_size);
1160
1161 if (aligned_src != area_src || aligned_dst != area_dst) {
1162 src_offs = (aligned_src - area_src) / page_size;
1163 dst_offs = (aligned_dst - area_dst) / page_size;
1164 step_count--;
1165 }
1166 orig_area_src = area_src;
1167 orig_area_dst = area_dst;
1168 area_src = aligned_src;
1169 area_dst = aligned_dst;
1170 }
1171
1172 /*
1173 * Read each of the pages back using the UFFD-registered mapping. We
1174 * expect that the first time we touch a page, it will result in a missing
1175 * fault. uffd_poll_thread will resolve the fault by moving source
1176 * page to destination.
1177 */
1178 for (nr = 0; nr < step_count * step_size; nr += step_size) {
1179 unsigned long i;
1180
1181 /* Check area_src content */
1182 for (i = 0; i < step_size; i++) {
1183 count = *area_count(area_src, nr + i);
1184 if (count != count_verify[src_offs + nr + i])
1185 err("nr %lu source memory invalid %llu %llu\n",
1186 nr + i, count, count_verify[src_offs + nr + i]);
1187 }
1188
1189 /* Faulting into area_dst should move the page or the huge page */
1190 for (i = 0; i < step_size; i++) {
1191 count = *area_count(area_dst, nr + i);
1192 if (count != count_verify[dst_offs + nr + i])
1193 err("nr %lu memory corruption %llu %llu\n",
1194 nr, count, count_verify[dst_offs + nr + i]);
1195 }
1196
1197 /* Re-check area_src content which should be empty */
1198 for (i = 0; i < step_size; i++) {
1199 count = *area_count(area_src, nr + i);
1200 if (count != 0)
1201 err("nr %lu move failed %llu %llu\n",
1202 nr, count, count_verify[src_offs + nr + i]);
1203 }
1204 }
1205 if (chunk_size > page_size) {
1206 area_src = orig_area_src;
1207 area_dst = orig_area_dst;
1208 }
1209
1210 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
1211 err("pipe write");
1212 if (pthread_join(uffd_mon, NULL))
1213 err("join() failed");
1214
1215 if (args.missing_faults != step_count || args.minor_faults != 0)
1216 uffd_test_fail("stats check error");
1217 else
1218 uffd_test_pass();
1219 }
1220
uffd_move_test(uffd_test_args_t * targs)1221 static void uffd_move_test(uffd_test_args_t *targs)
1222 {
1223 uffd_move_test_common(targs, page_size, uffd_move_handle_fault);
1224 }
1225
uffd_move_pmd_test(uffd_test_args_t * targs)1226 static void uffd_move_pmd_test(uffd_test_args_t *targs)
1227 {
1228 if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE))
1229 err("madvise(MADV_HUGEPAGE) failure");
1230 uffd_move_test_common(targs, read_pmd_pagesize(),
1231 uffd_move_pmd_handle_fault);
1232 }
1233
uffd_move_pmd_split_test(uffd_test_args_t * targs)1234 static void uffd_move_pmd_split_test(uffd_test_args_t *targs)
1235 {
1236 if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE))
1237 err("madvise(MADV_NOHUGEPAGE) failure");
1238 uffd_move_test_common(targs, read_pmd_pagesize(),
1239 uffd_move_pmd_handle_fault);
1240 }
1241
1242 static bool
uffdio_verify_results(const char * name,int ret,int error,long result)1243 uffdio_verify_results(const char *name, int ret, int error, long result)
1244 {
1245 /*
1246 * Should always return -1 with errno=EAGAIN, with corresponding
1247 * result field updated in ioctl() args to be -EAGAIN too
1248 * (e.g. copy.copy field for UFFDIO_COPY).
1249 */
1250 if (ret != -1) {
1251 uffd_test_fail("%s should have returned -1", name);
1252 return false;
1253 }
1254
1255 if (error != EAGAIN) {
1256 uffd_test_fail("%s should have errno==EAGAIN", name);
1257 return false;
1258 }
1259
1260 if (result != -EAGAIN) {
1261 uffd_test_fail("%s should have been updated for -EAGAIN",
1262 name);
1263 return false;
1264 }
1265
1266 return true;
1267 }
1268
1269 /*
1270 * This defines a function to test one ioctl. Note that here "field" can
1271 * be 1 or anything not -EAGAIN. With that initial value set, we can
1272 * verify later that it should be updated by kernel (when -EAGAIN
1273 * returned), by checking whether it is also updated to -EAGAIN.
1274 */
1275 #define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field) \
1276 static bool uffdio_mmap_changing_test_##name(int fd) \
1277 { \
1278 int ret; \
1279 struct uffdio_##name args = { \
1280 .field = 1, \
1281 }; \
1282 ret = ioctl(fd, ioctl_name, &args); \
1283 return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \
1284 }
1285
1286 DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage)
1287 DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy)
1288 DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move)
1289 DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated)
1290 DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped)
1291
1292 typedef enum {
1293 /* We actually do not care about any state except UNINTERRUPTIBLE.. */
1294 THR_STATE_UNKNOWN = 0,
1295 THR_STATE_UNINTERRUPTIBLE,
1296 } thread_state;
1297
sleep_short(void)1298 static void sleep_short(void)
1299 {
1300 usleep(1000);
1301 }
1302
thread_state_get(pid_t tid)1303 static thread_state thread_state_get(pid_t tid)
1304 {
1305 const char *header = "State:\t";
1306 char tmp[256], *p, c;
1307 FILE *fp;
1308
1309 snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid);
1310 fp = fopen(tmp, "r");
1311
1312 if (!fp)
1313 return THR_STATE_UNKNOWN;
1314
1315 while (fgets(tmp, sizeof(tmp), fp)) {
1316 p = strstr(tmp, header);
1317 if (p) {
1318 /* For example, "State:\tD (disk sleep)" */
1319 c = *(p + sizeof(header) - 1);
1320 return c == 'D' ?
1321 THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN;
1322 }
1323 }
1324
1325 return THR_STATE_UNKNOWN;
1326 }
1327
thread_state_until(pid_t tid,thread_state state)1328 static void thread_state_until(pid_t tid, thread_state state)
1329 {
1330 thread_state s;
1331
1332 do {
1333 s = thread_state_get(tid);
1334 sleep_short();
1335 } while (s != state);
1336 }
1337
uffd_mmap_changing_thread(void * opaque)1338 static void *uffd_mmap_changing_thread(void *opaque)
1339 {
1340 volatile pid_t *pid = opaque;
1341 int ret;
1342
1343 /* Unfortunately, it's only fetch-able from the thread itself.. */
1344 assert(*pid == 0);
1345 *pid = syscall(SYS_gettid);
1346
1347 /* Inject an event, this will hang solid until the event read */
1348 ret = madvise(area_dst, page_size, MADV_REMOVE);
1349 if (ret)
1350 err("madvise(MADV_REMOVE) failed");
1351
1352 return NULL;
1353 }
1354
uffd_consume_message(int fd)1355 static void uffd_consume_message(int fd)
1356 {
1357 struct uffd_msg msg = { 0 };
1358
1359 while (uffd_read_msg(fd, &msg));
1360 }
1361
uffd_mmap_changing_test(uffd_test_args_t * targs)1362 static void uffd_mmap_changing_test(uffd_test_args_t *targs)
1363 {
1364 /*
1365 * This stores the real PID (which can be different from how tid is
1366 * defined..) for the child thread, 0 means not initialized.
1367 */
1368 pid_t pid = 0;
1369 pthread_t tid;
1370 int ret;
1371
1372 if (uffd_register(uffd, area_dst, nr_pages * page_size,
1373 true, false, false))
1374 err("uffd_register() failed");
1375
1376 /* Create a thread to generate the racy event */
1377 ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid);
1378 if (ret)
1379 err("pthread_create() failed");
1380
1381 /*
1382 * Wait until the thread setup the pid. Use volatile to make sure
1383 * it reads from RAM not regs.
1384 */
1385 while (!(volatile pid_t)pid)
1386 sleep_short();
1387
1388 /* Wait until the thread hangs at REMOVE event */
1389 thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE);
1390
1391 if (!uffdio_mmap_changing_test_copy(uffd))
1392 return;
1393
1394 if (!uffdio_mmap_changing_test_zeropage(uffd))
1395 return;
1396
1397 if (!uffdio_mmap_changing_test_move(uffd))
1398 return;
1399
1400 if (!uffdio_mmap_changing_test_poison(uffd))
1401 return;
1402
1403 if (!uffdio_mmap_changing_test_continue(uffd))
1404 return;
1405
1406 /*
1407 * All succeeded above! Recycle everything. Start by reading the
1408 * event so as to kick the thread roll again..
1409 */
1410 uffd_consume_message(uffd);
1411
1412 ret = pthread_join(tid, NULL);
1413 assert(ret == 0);
1414
1415 uffd_test_pass();
1416 }
1417
prevent_hugepages(const char ** errmsg)1418 static int prevent_hugepages(const char **errmsg)
1419 {
1420 /* This should be done before source area is populated */
1421 if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) {
1422 /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */
1423 if (errno != EINVAL) {
1424 if (errmsg)
1425 *errmsg = "madvise(MADV_NOHUGEPAGE) failed";
1426 return -errno;
1427 }
1428 }
1429 return 0;
1430 }
1431
request_hugepages(const char ** errmsg)1432 static int request_hugepages(const char **errmsg)
1433 {
1434 /* This should be done before source area is populated */
1435 if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) {
1436 if (errmsg) {
1437 *errmsg = (errno == EINVAL) ?
1438 "CONFIG_TRANSPARENT_HUGEPAGE is not set" :
1439 "madvise(MADV_HUGEPAGE) failed";
1440 }
1441 return -errno;
1442 }
1443 return 0;
1444 }
1445
1446 struct uffd_test_case_ops uffd_move_test_case_ops = {
1447 .post_alloc = prevent_hugepages,
1448 };
1449
1450 struct uffd_test_case_ops uffd_move_test_pmd_case_ops = {
1451 .post_alloc = request_hugepages,
1452 };
1453
1454 /*
1455 * Test the returned uffdio_register.ioctls with different register modes.
1456 * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test.
1457 */
1458 static void
do_register_ioctls_test(uffd_test_args_t * args,bool miss,bool wp,bool minor)1459 do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor)
1460 {
1461 uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE);
1462 mem_type_t *mem_type = args->mem_type;
1463 int ret;
1464
1465 ret = uffd_register_with_ioctls(uffd, area_dst, page_size,
1466 miss, wp, minor, &ioctls);
1467
1468 /*
1469 * Handle special cases of UFFDIO_REGISTER here where it should
1470 * just fail with -EINVAL first..
1471 *
1472 * Case 1: register MINOR on anon
1473 * Case 2: register with no mode selected
1474 */
1475 if ((minor && (mem_type->mem_flag == MEM_ANON)) ||
1476 (!miss && !wp && !minor)) {
1477 if (ret != -EINVAL)
1478 err("register (miss=%d, wp=%d, minor=%d) failed "
1479 "with wrong errno=%d", miss, wp, minor, ret);
1480 return;
1481 }
1482
1483 /* UFFDIO_REGISTER should succeed, then check ioctls returned */
1484 if (miss)
1485 expected |= BIT_ULL(_UFFDIO_COPY);
1486 if (wp)
1487 expected |= BIT_ULL(_UFFDIO_WRITEPROTECT);
1488 if (minor)
1489 expected |= BIT_ULL(_UFFDIO_CONTINUE);
1490
1491 if ((ioctls & expected) != expected)
1492 err("unexpected uffdio_register.ioctls "
1493 "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", "
1494 "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls);
1495
1496 if (uffd_unregister(uffd, area_dst, page_size))
1497 err("unregister");
1498 }
1499
uffd_register_ioctls_test(uffd_test_args_t * args)1500 static void uffd_register_ioctls_test(uffd_test_args_t *args)
1501 {
1502 int miss, wp, minor;
1503
1504 for (miss = 0; miss <= 1; miss++)
1505 for (wp = 0; wp <= 1; wp++)
1506 for (minor = 0; minor <= 1; minor++)
1507 do_register_ioctls_test(args, miss, wp, minor);
1508
1509 uffd_test_pass();
1510 }
1511
1512 uffd_test_case_t uffd_tests[] = {
1513 {
1514 /* Test returned uffdio_register.ioctls. */
1515 .name = "register-ioctls",
1516 .uffd_fn = uffd_register_ioctls_test,
1517 .mem_targets = MEM_ALL,
1518 .uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS |
1519 UFFD_FEATURE_MISSING_SHMEM |
1520 UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1521 UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1522 UFFD_FEATURE_MINOR_HUGETLBFS |
1523 UFFD_FEATURE_MINOR_SHMEM,
1524 },
1525 {
1526 .name = "zeropage",
1527 .uffd_fn = uffd_zeropage_test,
1528 .mem_targets = MEM_ALL,
1529 .uffd_feature_required = 0,
1530 },
1531 {
1532 .name = "move",
1533 .uffd_fn = uffd_move_test,
1534 .mem_targets = MEM_ANON,
1535 .uffd_feature_required = UFFD_FEATURE_MOVE,
1536 .test_case_ops = &uffd_move_test_case_ops,
1537 },
1538 {
1539 .name = "move-pmd",
1540 .uffd_fn = uffd_move_pmd_test,
1541 .mem_targets = MEM_ANON,
1542 .uffd_feature_required = UFFD_FEATURE_MOVE,
1543 .test_case_ops = &uffd_move_test_pmd_case_ops,
1544 },
1545 {
1546 .name = "move-pmd-split",
1547 .uffd_fn = uffd_move_pmd_split_test,
1548 .mem_targets = MEM_ANON,
1549 .uffd_feature_required = UFFD_FEATURE_MOVE,
1550 .test_case_ops = &uffd_move_test_pmd_case_ops,
1551 },
1552 {
1553 .name = "wp-fork",
1554 .uffd_fn = uffd_wp_fork_test,
1555 .mem_targets = MEM_ALL,
1556 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1557 UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1558 },
1559 {
1560 .name = "wp-fork-with-event",
1561 .uffd_fn = uffd_wp_fork_with_event_test,
1562 .mem_targets = MEM_ALL,
1563 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1564 UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1565 /* when set, child process should inherit uffd-wp bits */
1566 UFFD_FEATURE_EVENT_FORK,
1567 },
1568 {
1569 .name = "wp-fork-pin",
1570 .uffd_fn = uffd_wp_fork_pin_test,
1571 .mem_targets = MEM_ALL,
1572 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1573 UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1574 },
1575 {
1576 .name = "wp-fork-pin-with-event",
1577 .uffd_fn = uffd_wp_fork_pin_with_event_test,
1578 .mem_targets = MEM_ALL,
1579 .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1580 UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
1581 /* when set, child process should inherit uffd-wp bits */
1582 UFFD_FEATURE_EVENT_FORK,
1583 },
1584 {
1585 .name = "wp-unpopulated",
1586 .uffd_fn = uffd_wp_unpopulated_test,
1587 .mem_targets = MEM_ANON,
1588 .uffd_feature_required =
1589 UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED,
1590 },
1591 {
1592 .name = "minor",
1593 .uffd_fn = uffd_minor_test,
1594 .mem_targets = MEM_SHMEM | MEM_HUGETLB,
1595 .uffd_feature_required =
1596 UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM,
1597 },
1598 {
1599 .name = "minor-wp",
1600 .uffd_fn = uffd_minor_wp_test,
1601 .mem_targets = MEM_SHMEM | MEM_HUGETLB,
1602 .uffd_feature_required =
1603 UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM |
1604 UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1605 /*
1606 * HACK: here we leveraged WP_UNPOPULATED to detect whether
1607 * minor mode supports wr-protect. There's no feature flag
1608 * for it so this is the best we can test against.
1609 */
1610 UFFD_FEATURE_WP_UNPOPULATED,
1611 },
1612 {
1613 .name = "minor-collapse",
1614 .uffd_fn = uffd_minor_collapse_test,
1615 /* MADV_COLLAPSE only works with shmem */
1616 .mem_targets = MEM_SHMEM,
1617 /* We can't test MADV_COLLAPSE, so try our luck */
1618 .uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM,
1619 },
1620 {
1621 .name = "sigbus",
1622 .uffd_fn = uffd_sigbus_test,
1623 .mem_targets = MEM_ALL,
1624 .uffd_feature_required = UFFD_FEATURE_SIGBUS |
1625 UFFD_FEATURE_EVENT_FORK,
1626 },
1627 {
1628 .name = "sigbus-wp",
1629 .uffd_fn = uffd_sigbus_wp_test,
1630 .mem_targets = MEM_ALL,
1631 .uffd_feature_required = UFFD_FEATURE_SIGBUS |
1632 UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1633 UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1634 },
1635 {
1636 .name = "events",
1637 .uffd_fn = uffd_events_test,
1638 .mem_targets = MEM_ALL,
1639 .uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
1640 UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE,
1641 },
1642 {
1643 .name = "events-wp",
1644 .uffd_fn = uffd_events_wp_test,
1645 .mem_targets = MEM_ALL,
1646 .uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
1647 UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE |
1648 UFFD_FEATURE_PAGEFAULT_FLAG_WP |
1649 UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
1650 },
1651 {
1652 .name = "poison",
1653 .uffd_fn = uffd_poison_test,
1654 .mem_targets = MEM_ALL,
1655 .uffd_feature_required = UFFD_FEATURE_POISON,
1656 },
1657 {
1658 .name = "mmap-changing",
1659 .uffd_fn = uffd_mmap_changing_test,
1660 /*
1661 * There's no point running this test over all mem types as
1662 * they share the same code paths.
1663 *
1664 * Choose shmem for simplicity, because (1) shmem supports
1665 * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is
1666 * almost always available (unlike hugetlb). Here we
1667 * abused SHMEM for UFFDIO_MOVE, but the test we want to
1668 * cover doesn't yet need the correct memory type..
1669 */
1670 .mem_targets = MEM_SHMEM,
1671 /*
1672 * Any UFFD_FEATURE_EVENT_* should work to trigger the
1673 * race logically, but choose the simplest (REMOVE).
1674 *
1675 * Meanwhile, since we'll cover quite a few new ioctl()s
1676 * (CONTINUE, POISON, MOVE), skip this test for old kernels
1677 * by choosing all of them.
1678 */
1679 .uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE |
1680 UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON |
1681 UFFD_FEATURE_MINOR_SHMEM,
1682 },
1683 };
1684
usage(const char * prog)1685 static void usage(const char *prog)
1686 {
1687 printf("usage: %s [-f TESTNAME]\n", prog);
1688 puts("");
1689 puts(" -f: test name to filter (e.g., event)");
1690 puts(" -h: show the help msg");
1691 puts(" -l: list tests only");
1692 puts("");
1693 exit(KSFT_FAIL);
1694 }
1695
main(int argc,char * argv[])1696 int main(int argc, char *argv[])
1697 {
1698 int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t);
1699 int n_mems = sizeof(mem_types) / sizeof(mem_type_t);
1700 const char *test_filter = NULL;
1701 bool list_only = false;
1702 uffd_test_case_t *test;
1703 mem_type_t *mem_type;
1704 uffd_test_args_t args;
1705 const char *errmsg;
1706 int has_uffd, opt;
1707 int i, j;
1708
1709 while ((opt = getopt(argc, argv, "f:hl")) != -1) {
1710 switch (opt) {
1711 case 'f':
1712 test_filter = optarg;
1713 break;
1714 case 'l':
1715 list_only = true;
1716 break;
1717 case 'h':
1718 default:
1719 /* Unknown */
1720 usage(argv[0]);
1721 break;
1722 }
1723 }
1724
1725 if (!test_filter && !list_only) {
1726 has_uffd = test_uffd_api(false);
1727 has_uffd |= test_uffd_api(true);
1728
1729 if (!has_uffd) {
1730 printf("Userfaultfd not supported or unprivileged, skip all tests\n");
1731 exit(KSFT_SKIP);
1732 }
1733 }
1734
1735 for (i = 0; i < n_tests; i++) {
1736 test = &uffd_tests[i];
1737 if (test_filter && !strstr(test->name, test_filter))
1738 continue;
1739 if (list_only) {
1740 printf("%s\n", test->name);
1741 continue;
1742 }
1743 for (j = 0; j < n_mems; j++) {
1744 mem_type = &mem_types[j];
1745 if (!(test->mem_targets & mem_type->mem_flag))
1746 continue;
1747
1748 uffd_test_start("%s on %s", test->name, mem_type->name);
1749 if ((mem_type->mem_flag == MEM_HUGETLB ||
1750 mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
1751 (default_huge_page_size() == 0)) {
1752 uffd_test_skip("huge page size is 0, feature missing?");
1753 continue;
1754 }
1755 if (!uffd_feature_supported(test)) {
1756 uffd_test_skip("feature missing");
1757 continue;
1758 }
1759 if (uffd_setup_environment(&args, test, mem_type,
1760 &errmsg)) {
1761 uffd_test_skip(errmsg);
1762 continue;
1763 }
1764 test->uffd_fn(&args);
1765 uffd_test_ctx_clear();
1766 }
1767 }
1768
1769 if (!list_only)
1770 uffd_test_report();
1771
1772 return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
1773 }
1774
1775 #else /* __NR_userfaultfd */
1776
1777 #warning "missing __NR_userfaultfd definition"
1778
main(void)1779 int main(void)
1780 {
1781 printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
1782 return KSFT_SKIP;
1783 }
1784
1785 #endif /* __NR_userfaultfd */
1786