1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * COW (Copy On Write) tests. 4 * 5 * Copyright 2022, Red Hat, Inc. 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 #define _GNU_SOURCE 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdbool.h> 13 #include <stdint.h> 14 #include <unistd.h> 15 #include <errno.h> 16 #include <fcntl.h> 17 #include <assert.h> 18 #include <linux/mman.h> 19 #include <sys/mman.h> 20 #include <sys/ioctl.h> 21 #include <sys/wait.h> 22 #include <linux/memfd.h> 23 24 #include "local_config.h" 25 #ifdef LOCAL_CONFIG_HAVE_LIBURING 26 #include <liburing.h> 27 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 28 29 #include "../../../../mm/gup_test.h" 30 #include "../kselftest.h" 31 #include "vm_util.h" 32 #include "thp_settings.h" 33 34 static size_t pagesize; 35 static int pagemap_fd; 36 static size_t pmdsize; 37 static int nr_thpsizes; 38 static size_t thpsizes[20]; 39 static int nr_hugetlbsizes; 40 static size_t hugetlbsizes[10]; 41 static int gup_fd; 42 static bool has_huge_zeropage; 43 44 static int sz2ord(size_t size) 45 { 46 return __builtin_ctzll(size / pagesize); 47 } 48 49 static int detect_thp_sizes(size_t sizes[], int max) 50 { 51 int count = 0; 52 unsigned long orders; 53 size_t kb; 54 int i; 55 56 /* thp not supported at all. */ 57 if (!pmdsize) 58 return 0; 59 60 orders = 1UL << sz2ord(pmdsize); 61 orders |= thp_supported_orders(); 62 63 for (i = 0; orders && count < max; i++) { 64 if (!(orders & (1UL << i))) 65 continue; 66 orders &= ~(1UL << i); 67 kb = (pagesize >> 10) << i; 68 sizes[count++] = kb * 1024; 69 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); 70 } 71 72 return count; 73 } 74 75 static void detect_huge_zeropage(void) 76 { 77 int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", 78 O_RDONLY); 79 size_t enabled = 0; 80 char buf[15]; 81 int ret; 82 83 if (fd < 0) 84 return; 85 86 ret = pread(fd, buf, sizeof(buf), 0); 87 if (ret > 0 && ret < sizeof(buf)) { 88 buf[ret] = 0; 89 90 enabled = strtoul(buf, NULL, 10); 91 if (enabled == 1) { 92 has_huge_zeropage = true; 93 ksft_print_msg("[INFO] huge zeropage is enabled\n"); 94 } 95 } 96 97 close(fd); 98 } 99 100 static bool range_is_swapped(void *addr, size_t size) 101 { 102 for (; size; addr += pagesize, size -= pagesize) 103 if (!pagemap_is_swapped(pagemap_fd, addr)) 104 return false; 105 return true; 106 } 107 108 struct comm_pipes { 109 int child_ready[2]; 110 int parent_ready[2]; 111 }; 112 113 static int setup_comm_pipes(struct comm_pipes *comm_pipes) 114 { 115 if (pipe(comm_pipes->child_ready) < 0) { 116 ksft_perror("pipe()"); 117 return -errno; 118 } 119 if (pipe(comm_pipes->parent_ready) < 0) { 120 ksft_perror("pipe()"); 121 close(comm_pipes->child_ready[0]); 122 close(comm_pipes->child_ready[1]); 123 return -errno; 124 } 125 126 return 0; 127 } 128 129 static void close_comm_pipes(struct comm_pipes *comm_pipes) 130 { 131 close(comm_pipes->child_ready[0]); 132 close(comm_pipes->child_ready[1]); 133 close(comm_pipes->parent_ready[0]); 134 close(comm_pipes->parent_ready[1]); 135 } 136 137 static int child_memcmp_fn(char *mem, size_t size, 138 struct comm_pipes *comm_pipes) 139 { 140 char *old = malloc(size); 141 char buf; 142 143 /* Backup the original content. */ 144 memcpy(old, mem, size); 145 146 /* Wait until the parent modified the page. */ 147 write(comm_pipes->child_ready[1], "0", 1); 148 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 149 ; 150 151 /* See if we still read the old values. */ 152 return memcmp(old, mem, size); 153 } 154 155 static int child_vmsplice_memcmp_fn(char *mem, size_t size, 156 struct comm_pipes *comm_pipes) 157 { 158 struct iovec iov = { 159 .iov_base = mem, 160 .iov_len = size, 161 }; 162 ssize_t cur, total, transferred; 163 char *old, *new; 164 int fds[2]; 165 char buf; 166 167 old = malloc(size); 168 new = malloc(size); 169 170 /* Backup the original content. */ 171 memcpy(old, mem, size); 172 173 if (pipe(fds) < 0) 174 return -errno; 175 176 /* Trigger a read-only pin. */ 177 transferred = vmsplice(fds[1], &iov, 1, 0); 178 if (transferred < 0) 179 return -errno; 180 if (transferred == 0) 181 return -EINVAL; 182 183 /* Unmap it from our page tables. */ 184 if (munmap(mem, size) < 0) 185 return -errno; 186 187 /* Wait until the parent modified it. */ 188 write(comm_pipes->child_ready[1], "0", 1); 189 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1) 190 ; 191 192 /* See if we still read the old values via the pipe. */ 193 for (total = 0; total < transferred; total += cur) { 194 cur = read(fds[0], new + total, transferred - total); 195 if (cur < 0) 196 return -errno; 197 } 198 199 return memcmp(old, new, transferred); 200 } 201 202 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); 203 204 static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, 205 child_fn fn, bool xfail) 206 { 207 struct comm_pipes comm_pipes; 208 char buf; 209 int ret; 210 211 ret = setup_comm_pipes(&comm_pipes); 212 if (ret) { 213 log_test_result(KSFT_FAIL); 214 return; 215 } 216 217 ret = fork(); 218 if (ret < 0) { 219 ksft_perror("fork() failed"); 220 log_test_result(KSFT_FAIL); 221 goto close_comm_pipes; 222 } else if (!ret) { 223 exit(fn(mem, size, &comm_pipes)); 224 } 225 226 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 227 ; 228 229 if (do_mprotect) { 230 /* 231 * mprotect() optimizations might try avoiding 232 * write-faults by directly mapping pages writable. 233 */ 234 ret = mprotect(mem, size, PROT_READ); 235 if (ret) { 236 ksft_perror("mprotect() failed"); 237 log_test_result(KSFT_FAIL); 238 write(comm_pipes.parent_ready[1], "0", 1); 239 wait(&ret); 240 goto close_comm_pipes; 241 } 242 243 ret = mprotect(mem, size, PROT_READ|PROT_WRITE); 244 if (ret) { 245 ksft_perror("mprotect() failed"); 246 log_test_result(KSFT_FAIL); 247 write(comm_pipes.parent_ready[1], "0", 1); 248 wait(&ret); 249 goto close_comm_pipes; 250 } 251 } 252 253 /* Modify the page. */ 254 memset(mem, 0xff, size); 255 write(comm_pipes.parent_ready[1], "0", 1); 256 257 wait(&ret); 258 if (WIFEXITED(ret)) 259 ret = WEXITSTATUS(ret); 260 else 261 ret = -EINVAL; 262 263 if (!ret) { 264 log_test_result(KSFT_PASS); 265 } else if (xfail) { 266 /* 267 * With hugetlb, some vmsplice() tests are currently expected to 268 * fail because (a) harder to fix and (b) nobody really cares. 269 * Flag them as expected failure for now. 270 */ 271 log_test_result(KSFT_XFAIL); 272 } else { 273 log_test_result(KSFT_FAIL); 274 } 275 close_comm_pipes: 276 close_comm_pipes(&comm_pipes); 277 } 278 279 static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb) 280 { 281 do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false); 282 } 283 284 static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb) 285 { 286 do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false); 287 } 288 289 static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb) 290 { 291 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn, 292 is_hugetlb); 293 } 294 295 static void test_vmsplice_in_child_mprotect(char *mem, size_t size, 296 bool is_hugetlb) 297 { 298 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn, 299 is_hugetlb); 300 } 301 302 static void do_test_vmsplice_in_parent(char *mem, size_t size, 303 bool before_fork, bool xfail) 304 { 305 struct iovec iov = { 306 .iov_base = mem, 307 .iov_len = size, 308 }; 309 ssize_t cur, total, transferred = 0; 310 struct comm_pipes comm_pipes; 311 char *old, *new; 312 int ret, fds[2]; 313 char buf; 314 315 old = malloc(size); 316 new = malloc(size); 317 318 memcpy(old, mem, size); 319 320 ret = setup_comm_pipes(&comm_pipes); 321 if (ret) { 322 log_test_result(KSFT_FAIL); 323 goto free; 324 } 325 326 if (pipe(fds) < 0) { 327 ksft_perror("pipe() failed"); 328 log_test_result(KSFT_FAIL); 329 goto close_comm_pipes; 330 } 331 332 if (before_fork) { 333 transferred = vmsplice(fds[1], &iov, 1, 0); 334 if (transferred <= 0) { 335 ksft_print_msg("vmsplice() failed\n"); 336 log_test_result(KSFT_FAIL); 337 goto close_pipe; 338 } 339 } 340 341 ret = fork(); 342 if (ret < 0) { 343 ksft_perror("fork() failed\n"); 344 log_test_result(KSFT_FAIL); 345 goto close_pipe; 346 } else if (!ret) { 347 write(comm_pipes.child_ready[1], "0", 1); 348 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 349 ; 350 /* Modify page content in the child. */ 351 memset(mem, 0xff, size); 352 exit(0); 353 } 354 355 if (!before_fork) { 356 transferred = vmsplice(fds[1], &iov, 1, 0); 357 if (transferred <= 0) { 358 ksft_perror("vmsplice() failed"); 359 log_test_result(KSFT_FAIL); 360 wait(&ret); 361 goto close_pipe; 362 } 363 } 364 365 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 366 ; 367 if (munmap(mem, size) < 0) { 368 ksft_perror("munmap() failed"); 369 log_test_result(KSFT_FAIL); 370 goto close_pipe; 371 } 372 write(comm_pipes.parent_ready[1], "0", 1); 373 374 /* Wait until the child is done writing. */ 375 wait(&ret); 376 if (!WIFEXITED(ret)) { 377 ksft_perror("wait() failed"); 378 log_test_result(KSFT_FAIL); 379 goto close_pipe; 380 } 381 382 /* See if we still read the old values. */ 383 for (total = 0; total < transferred; total += cur) { 384 cur = read(fds[0], new + total, transferred - total); 385 if (cur < 0) { 386 ksft_perror("read() failed"); 387 log_test_result(KSFT_FAIL); 388 goto close_pipe; 389 } 390 } 391 392 if (!memcmp(old, new, transferred)) { 393 log_test_result(KSFT_PASS); 394 } else if (xfail) { 395 /* 396 * With hugetlb, some vmsplice() tests are currently expected to 397 * fail because (a) harder to fix and (b) nobody really cares. 398 * Flag them as expected failure for now. 399 */ 400 log_test_result(KSFT_XFAIL); 401 } else { 402 log_test_result(KSFT_FAIL); 403 } 404 close_pipe: 405 close(fds[0]); 406 close(fds[1]); 407 close_comm_pipes: 408 close_comm_pipes(&comm_pipes); 409 free: 410 free(old); 411 free(new); 412 } 413 414 static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb) 415 { 416 do_test_vmsplice_in_parent(mem, size, true, is_hugetlb); 417 } 418 419 static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb) 420 { 421 do_test_vmsplice_in_parent(mem, size, false, is_hugetlb); 422 } 423 424 #ifdef LOCAL_CONFIG_HAVE_LIBURING 425 static void do_test_iouring(char *mem, size_t size, bool use_fork) 426 { 427 struct comm_pipes comm_pipes; 428 struct io_uring_cqe *cqe; 429 struct io_uring_sqe *sqe; 430 struct io_uring ring; 431 ssize_t cur, total; 432 struct iovec iov; 433 char *buf, *tmp; 434 int ret, fd; 435 FILE *file; 436 437 ret = setup_comm_pipes(&comm_pipes); 438 if (ret) { 439 log_test_result(KSFT_FAIL); 440 return; 441 } 442 443 file = tmpfile(); 444 if (!file) { 445 ksft_perror("tmpfile() failed"); 446 log_test_result(KSFT_FAIL); 447 goto close_comm_pipes; 448 } 449 fd = fileno(file); 450 assert(fd); 451 452 tmp = malloc(size); 453 if (!tmp) { 454 ksft_print_msg("malloc() failed\n"); 455 log_test_result(KSFT_FAIL); 456 goto close_file; 457 } 458 459 /* Skip on errors, as we might just lack kernel support. */ 460 ret = io_uring_queue_init(1, &ring, 0); 461 if (ret < 0) { 462 ksft_print_msg("io_uring_queue_init() failed\n"); 463 log_test_result(KSFT_SKIP); 464 goto free_tmp; 465 } 466 467 /* 468 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN 469 * | FOLL_LONGTERM the range. 470 * 471 * Skip on errors, as we might just lack kernel support or might not 472 * have sufficient MEMLOCK permissions. 473 */ 474 iov.iov_base = mem; 475 iov.iov_len = size; 476 ret = io_uring_register_buffers(&ring, &iov, 1); 477 if (ret) { 478 ksft_print_msg("io_uring_register_buffers() failed\n"); 479 log_test_result(KSFT_SKIP); 480 goto queue_exit; 481 } 482 483 if (use_fork) { 484 /* 485 * fork() and keep the child alive until we're done. Note that 486 * we expect the pinned page to not get shared with the child. 487 */ 488 ret = fork(); 489 if (ret < 0) { 490 ksft_perror("fork() failed"); 491 log_test_result(KSFT_FAIL); 492 goto unregister_buffers; 493 } else if (!ret) { 494 write(comm_pipes.child_ready[1], "0", 1); 495 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 496 ; 497 exit(0); 498 } 499 500 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 501 ; 502 } else { 503 /* 504 * Map the page R/O into the page table. Enable softdirty 505 * tracking to stop the page from getting mapped R/W immediately 506 * again by mprotect() optimizations. Note that we don't have an 507 * easy way to test if that worked (the pagemap does not export 508 * if the page is mapped R/O vs. R/W). 509 */ 510 ret = mprotect(mem, size, PROT_READ); 511 if (ret) { 512 ksft_perror("mprotect() failed"); 513 log_test_result(KSFT_FAIL); 514 goto unregister_buffers; 515 } 516 517 clear_softdirty(); 518 ret = mprotect(mem, size, PROT_READ | PROT_WRITE); 519 if (ret) { 520 ksft_perror("mprotect() failed"); 521 log_test_result(KSFT_FAIL); 522 goto unregister_buffers; 523 } 524 } 525 526 /* 527 * Modify the page and write page content as observed by the fixed 528 * buffer pin to the file so we can verify it. 529 */ 530 memset(mem, 0xff, size); 531 sqe = io_uring_get_sqe(&ring); 532 if (!sqe) { 533 ksft_print_msg("io_uring_get_sqe() failed\n"); 534 log_test_result(KSFT_FAIL); 535 goto quit_child; 536 } 537 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); 538 539 ret = io_uring_submit(&ring); 540 if (ret < 0) { 541 ksft_print_msg("io_uring_submit() failed\n"); 542 log_test_result(KSFT_FAIL); 543 goto quit_child; 544 } 545 546 ret = io_uring_wait_cqe(&ring, &cqe); 547 if (ret < 0) { 548 ksft_print_msg("io_uring_wait_cqe() failed\n"); 549 log_test_result(KSFT_FAIL); 550 goto quit_child; 551 } 552 553 if (cqe->res != size) { 554 ksft_print_msg("write_fixed failed\n"); 555 log_test_result(KSFT_FAIL); 556 goto quit_child; 557 } 558 io_uring_cqe_seen(&ring, cqe); 559 560 /* Read back the file content to the temporary buffer. */ 561 total = 0; 562 while (total < size) { 563 cur = pread(fd, tmp + total, size - total, total); 564 if (cur < 0) { 565 ksft_print_msg("pread() failed\n"); 566 log_test_result(KSFT_FAIL); 567 goto quit_child; 568 } 569 total += cur; 570 } 571 572 /* Finally, check if we read what we expected. */ 573 if (!memcmp(mem, tmp, size)) 574 log_test_result(KSFT_PASS); 575 else 576 log_test_result(KSFT_FAIL); 577 578 quit_child: 579 if (use_fork) { 580 write(comm_pipes.parent_ready[1], "0", 1); 581 wait(&ret); 582 } 583 unregister_buffers: 584 io_uring_unregister_buffers(&ring); 585 queue_exit: 586 io_uring_queue_exit(&ring); 587 free_tmp: 588 free(tmp); 589 close_file: 590 fclose(file); 591 close_comm_pipes: 592 close_comm_pipes(&comm_pipes); 593 } 594 595 static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb) 596 { 597 do_test_iouring(mem, size, false); 598 } 599 600 static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb) 601 { 602 do_test_iouring(mem, size, true); 603 } 604 605 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 606 607 enum ro_pin_test { 608 RO_PIN_TEST, 609 RO_PIN_TEST_SHARED, 610 RO_PIN_TEST_PREVIOUSLY_SHARED, 611 RO_PIN_TEST_RO_EXCLUSIVE, 612 }; 613 614 static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, 615 bool fast) 616 { 617 struct pin_longterm_test args; 618 struct comm_pipes comm_pipes; 619 char *tmp, buf; 620 __u64 tmp_val; 621 int ret; 622 623 if (gup_fd < 0) { 624 ksft_print_msg("gup_test not available\n"); 625 log_test_result(KSFT_SKIP); 626 return; 627 } 628 629 tmp = malloc(size); 630 if (!tmp) { 631 ksft_print_msg("malloc() failed\n"); 632 log_test_result(KSFT_FAIL); 633 return; 634 } 635 636 ret = setup_comm_pipes(&comm_pipes); 637 if (ret) { 638 log_test_result(KSFT_FAIL); 639 goto free_tmp; 640 } 641 642 switch (test) { 643 case RO_PIN_TEST: 644 break; 645 case RO_PIN_TEST_SHARED: 646 case RO_PIN_TEST_PREVIOUSLY_SHARED: 647 /* 648 * Share the pages with our child. As the pages are not pinned, 649 * this should just work. 650 */ 651 ret = fork(); 652 if (ret < 0) { 653 ksft_perror("fork() failed"); 654 log_test_result(KSFT_FAIL); 655 goto close_comm_pipes; 656 } else if (!ret) { 657 write(comm_pipes.child_ready[1], "0", 1); 658 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1) 659 ; 660 exit(0); 661 } 662 663 /* Wait until our child is ready. */ 664 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 665 ; 666 667 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) { 668 /* 669 * Tell the child to quit now and wait until it quit. 670 * The pages should now be mapped R/O into our page 671 * tables, but they are no longer shared. 672 */ 673 write(comm_pipes.parent_ready[1], "0", 1); 674 wait(&ret); 675 if (!WIFEXITED(ret)) 676 ksft_print_msg("[INFO] wait() failed\n"); 677 } 678 break; 679 case RO_PIN_TEST_RO_EXCLUSIVE: 680 /* 681 * Map the page R/O into the page table. Enable softdirty 682 * tracking to stop the page from getting mapped R/W immediately 683 * again by mprotect() optimizations. Note that we don't have an 684 * easy way to test if that worked (the pagemap does not export 685 * if the page is mapped R/O vs. R/W). 686 */ 687 ret = mprotect(mem, size, PROT_READ); 688 clear_softdirty(); 689 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); 690 if (ret) { 691 ksft_perror("mprotect() failed"); 692 log_test_result(KSFT_FAIL); 693 goto close_comm_pipes; 694 } 695 break; 696 default: 697 assert(false); 698 } 699 700 /* Take a R/O pin. This should trigger unsharing. */ 701 args.addr = (__u64)(uintptr_t)mem; 702 args.size = size; 703 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; 704 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); 705 if (ret) { 706 if (errno == EINVAL) 707 ret = KSFT_SKIP; 708 else 709 ret = KSFT_FAIL; 710 ksft_perror("PIN_LONGTERM_TEST_START failed"); 711 log_test_result(ret); 712 goto wait; 713 } 714 715 /* Modify the page. */ 716 memset(mem, 0xff, size); 717 718 /* 719 * Read back the content via the pin to the temporary buffer and 720 * test if we observed the modification. 721 */ 722 tmp_val = (__u64)(uintptr_t)tmp; 723 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); 724 if (ret) { 725 ksft_perror("PIN_LONGTERM_TEST_READ failed"); 726 log_test_result(KSFT_FAIL); 727 } else { 728 if (!memcmp(mem, tmp, size)) 729 log_test_result(KSFT_PASS); 730 else 731 log_test_result(KSFT_FAIL); 732 } 733 734 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); 735 if (ret) 736 ksft_perror("PIN_LONGTERM_TEST_STOP failed"); 737 wait: 738 switch (test) { 739 case RO_PIN_TEST_SHARED: 740 write(comm_pipes.parent_ready[1], "0", 1); 741 wait(&ret); 742 if (!WIFEXITED(ret)) 743 ksft_perror("wait() failed"); 744 break; 745 default: 746 break; 747 } 748 close_comm_pipes: 749 close_comm_pipes(&comm_pipes); 750 free_tmp: 751 free(tmp); 752 } 753 754 static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 755 { 756 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); 757 } 758 759 static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb) 760 { 761 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); 762 } 763 764 static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size, 765 bool is_hugetlb) 766 { 767 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); 768 } 769 770 static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size, 771 bool is_hugetlb) 772 { 773 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); 774 } 775 776 static void test_ro_pin_on_ro_exclusive(char *mem, size_t size, 777 bool is_hugetlb) 778 { 779 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); 780 } 781 782 static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size, 783 bool is_hugetlb) 784 { 785 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); 786 } 787 788 typedef void (*test_fn)(char *mem, size_t size, bool hugetlb); 789 790 static void do_run_with_base_page(test_fn fn, bool swapout) 791 { 792 char *mem; 793 int ret; 794 795 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 796 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 797 if (mem == MAP_FAILED) { 798 ksft_perror("mmap() failed"); 799 log_test_result(KSFT_FAIL); 800 return; 801 } 802 803 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); 804 /* Ignore if not around on a kernel. */ 805 if (ret && errno != EINVAL) { 806 ksft_perror("MADV_NOHUGEPAGE failed"); 807 log_test_result(KSFT_FAIL); 808 goto munmap; 809 } 810 811 /* Populate a base page. */ 812 memset(mem, 1, pagesize); 813 814 if (swapout) { 815 madvise(mem, pagesize, MADV_PAGEOUT); 816 if (!pagemap_is_swapped(pagemap_fd, mem)) { 817 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); 818 log_test_result(KSFT_SKIP); 819 goto munmap; 820 } 821 } 822 823 fn(mem, pagesize, false); 824 munmap: 825 munmap(mem, pagesize); 826 } 827 828 static void run_with_base_page(test_fn fn, const char *desc) 829 { 830 log_test_start("%s ... with base page", desc); 831 do_run_with_base_page(fn, false); 832 } 833 834 static void run_with_base_page_swap(test_fn fn, const char *desc) 835 { 836 log_test_start("%s ... with swapped out base page", desc); 837 do_run_with_base_page(fn, true); 838 } 839 840 enum thp_run { 841 THP_RUN_PMD, 842 THP_RUN_PMD_SWAPOUT, 843 THP_RUN_PTE, 844 THP_RUN_PTE_SWAPOUT, 845 THP_RUN_SINGLE_PTE, 846 THP_RUN_SINGLE_PTE_SWAPOUT, 847 THP_RUN_PARTIAL_MREMAP, 848 THP_RUN_PARTIAL_SHARED, 849 }; 850 851 static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) 852 { 853 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; 854 size_t size, mmap_size, mremap_size; 855 int ret; 856 857 /* For alignment purposes, we need twice the thp size. */ 858 mmap_size = 2 * thpsize; 859 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 860 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 861 if (mmap_mem == MAP_FAILED) { 862 ksft_perror("mmap() failed"); 863 log_test_result(KSFT_FAIL); 864 return; 865 } 866 867 /* We need a THP-aligned memory area. */ 868 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); 869 870 ret = madvise(mem, thpsize, MADV_HUGEPAGE); 871 if (ret) { 872 ksft_perror("MADV_HUGEPAGE failed"); 873 log_test_result(KSFT_FAIL); 874 goto munmap; 875 } 876 877 /* 878 * Try to populate a THP. Touch the first sub-page and test if 879 * we get the last sub-page populated automatically. 880 */ 881 mem[0] = 1; 882 if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { 883 ksft_print_msg("Did not get a THP populated\n"); 884 log_test_result(KSFT_SKIP); 885 goto munmap; 886 } 887 memset(mem, 1, thpsize); 888 889 size = thpsize; 890 switch (thp_run) { 891 case THP_RUN_PMD: 892 case THP_RUN_PMD_SWAPOUT: 893 assert(thpsize == pmdsize); 894 break; 895 case THP_RUN_PTE: 896 case THP_RUN_PTE_SWAPOUT: 897 /* 898 * Trigger PTE-mapping the THP by temporarily mapping a single 899 * subpage R/O. This is a noop if the THP is not pmdsize (and 900 * therefore already PTE-mapped). 901 */ 902 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 903 if (ret) { 904 ksft_perror("mprotect() failed"); 905 log_test_result(KSFT_FAIL); 906 goto munmap; 907 } 908 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 909 if (ret) { 910 ksft_perror("mprotect() failed"); 911 log_test_result(KSFT_FAIL); 912 goto munmap; 913 } 914 break; 915 case THP_RUN_SINGLE_PTE: 916 case THP_RUN_SINGLE_PTE_SWAPOUT: 917 /* 918 * Discard all but a single subpage of that PTE-mapped THP. What 919 * remains is a single PTE mapping a single subpage. 920 */ 921 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); 922 if (ret) { 923 ksft_perror("MADV_DONTNEED failed"); 924 log_test_result(KSFT_FAIL); 925 goto munmap; 926 } 927 size = pagesize; 928 break; 929 case THP_RUN_PARTIAL_MREMAP: 930 /* 931 * Remap half of the THP. We need some new memory location 932 * for that. 933 */ 934 mremap_size = thpsize / 2; 935 mremap_mem = mmap(NULL, mremap_size, PROT_NONE, 936 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 937 if (mremap_mem == MAP_FAILED) { 938 ksft_perror("mmap() failed"); 939 log_test_result(KSFT_FAIL); 940 goto munmap; 941 } 942 tmp = mremap(mem + mremap_size, mremap_size, mremap_size, 943 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); 944 if (tmp != mremap_mem) { 945 ksft_perror("mremap() failed"); 946 log_test_result(KSFT_FAIL); 947 goto munmap; 948 } 949 size = mremap_size; 950 break; 951 case THP_RUN_PARTIAL_SHARED: 952 /* 953 * Share the first page of the THP with a child and quit the 954 * child. This will result in some parts of the THP never 955 * have been shared. 956 */ 957 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); 958 if (ret) { 959 ksft_perror("MADV_DONTFORK failed"); 960 log_test_result(KSFT_FAIL); 961 goto munmap; 962 } 963 ret = fork(); 964 if (ret < 0) { 965 ksft_perror("fork() failed"); 966 log_test_result(KSFT_FAIL); 967 goto munmap; 968 } else if (!ret) { 969 exit(0); 970 } 971 wait(&ret); 972 /* Allow for sharing all pages again. */ 973 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); 974 if (ret) { 975 ksft_perror("MADV_DOFORK failed"); 976 log_test_result(KSFT_FAIL); 977 goto munmap; 978 } 979 break; 980 default: 981 assert(false); 982 } 983 984 switch (thp_run) { 985 case THP_RUN_PMD_SWAPOUT: 986 case THP_RUN_PTE_SWAPOUT: 987 case THP_RUN_SINGLE_PTE_SWAPOUT: 988 madvise(mem, size, MADV_PAGEOUT); 989 if (!range_is_swapped(mem, size)) { 990 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); 991 log_test_result(KSFT_SKIP); 992 goto munmap; 993 } 994 break; 995 default: 996 break; 997 } 998 999 fn(mem, size, false); 1000 munmap: 1001 munmap(mmap_mem, mmap_size); 1002 if (mremap_mem != MAP_FAILED) 1003 munmap(mremap_mem, mremap_size); 1004 } 1005 1006 static void run_with_thp(test_fn fn, const char *desc, size_t size) 1007 { 1008 log_test_start("%s ... with THP (%zu kB)", 1009 desc, size / 1024); 1010 do_run_with_thp(fn, THP_RUN_PMD, size); 1011 } 1012 1013 static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) 1014 { 1015 log_test_start("%s ... with swapped-out THP (%zu kB)", 1016 desc, size / 1024); 1017 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); 1018 } 1019 1020 static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) 1021 { 1022 log_test_start("%s ... with PTE-mapped THP (%zu kB)", 1023 desc, size / 1024); 1024 do_run_with_thp(fn, THP_RUN_PTE, size); 1025 } 1026 1027 static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) 1028 { 1029 log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)", 1030 desc, size / 1024); 1031 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); 1032 } 1033 1034 static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) 1035 { 1036 log_test_start("%s ... with single PTE of THP (%zu kB)", 1037 desc, size / 1024); 1038 do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); 1039 } 1040 1041 static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) 1042 { 1043 log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)", 1044 desc, size / 1024); 1045 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); 1046 } 1047 1048 static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) 1049 { 1050 log_test_start("%s ... with partially mremap()'ed THP (%zu kB)", 1051 desc, size / 1024); 1052 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); 1053 } 1054 1055 static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) 1056 { 1057 log_test_start("%s ... with partially shared THP (%zu kB)", 1058 desc, size / 1024); 1059 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); 1060 } 1061 1062 static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) 1063 { 1064 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; 1065 char *mem, *dummy; 1066 1067 log_test_start("%s ... with hugetlb (%zu kB)", desc, 1068 hugetlbsize / 1024); 1069 1070 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; 1071 1072 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1073 if (mem == MAP_FAILED) { 1074 ksft_perror("need more free huge pages"); 1075 log_test_result(KSFT_SKIP); 1076 return; 1077 } 1078 1079 /* Populate an huge page. */ 1080 memset(mem, 1, hugetlbsize); 1081 1082 /* 1083 * We need a total of two hugetlb pages to handle COW/unsharing 1084 * properly, otherwise we might get zapped by a SIGBUS. 1085 */ 1086 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); 1087 if (dummy == MAP_FAILED) { 1088 ksft_perror("need more free huge pages"); 1089 log_test_result(KSFT_SKIP); 1090 goto munmap; 1091 } 1092 munmap(dummy, hugetlbsize); 1093 1094 fn(mem, hugetlbsize, true); 1095 munmap: 1096 munmap(mem, hugetlbsize); 1097 } 1098 1099 struct test_case { 1100 const char *desc; 1101 test_fn fn; 1102 }; 1103 1104 /* 1105 * Test cases that are specific to anonymous pages: pages in private mappings 1106 * that may get shared via COW during fork(). 1107 */ 1108 static const struct test_case anon_test_cases[] = { 1109 /* 1110 * Basic COW tests for fork() without any GUP. If we miss to break COW, 1111 * either the child can observe modifications by the parent or the 1112 * other way around. 1113 */ 1114 { 1115 "Basic COW after fork()", 1116 test_cow_in_parent, 1117 }, 1118 /* 1119 * Basic test, but do an additional mprotect(PROT_READ)+ 1120 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1121 */ 1122 { 1123 "Basic COW after fork() with mprotect() optimization", 1124 test_cow_in_parent_mprotect, 1125 }, 1126 /* 1127 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If 1128 * we miss to break COW, the child observes modifications by the parent. 1129 * This is CVE-2020-29374 reported by Jann Horn. 1130 */ 1131 { 1132 "vmsplice() + unmap in child", 1133 test_vmsplice_in_child, 1134 }, 1135 /* 1136 * vmsplice() test, but do an additional mprotect(PROT_READ)+ 1137 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access. 1138 */ 1139 { 1140 "vmsplice() + unmap in child with mprotect() optimization", 1141 test_vmsplice_in_child_mprotect, 1142 }, 1143 /* 1144 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after 1145 * fork(); modify in the child. If we miss to break COW, the parent 1146 * observes modifications by the child. 1147 */ 1148 { 1149 "vmsplice() before fork(), unmap in parent after fork()", 1150 test_vmsplice_before_fork, 1151 }, 1152 /* 1153 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the 1154 * child. If we miss to break COW, the parent observes modifications by 1155 * the child. 1156 */ 1157 { 1158 "vmsplice() + unmap in parent after fork()", 1159 test_vmsplice_after_fork, 1160 }, 1161 #ifdef LOCAL_CONFIG_HAVE_LIBURING 1162 /* 1163 * Take a R/W longterm pin and then map the page R/O into the page 1164 * table to trigger a write fault on next access. When modifying the 1165 * page, the page content must be visible via the pin. 1166 */ 1167 { 1168 "R/O-mapping a page registered as iouring fixed buffer", 1169 test_iouring_ro, 1170 }, 1171 /* 1172 * Take a R/W longterm pin and then fork() a child. When modifying the 1173 * page, the page content must be visible via the pin. We expect the 1174 * pinned page to not get shared with the child. 1175 */ 1176 { 1177 "fork() with an iouring fixed buffer", 1178 test_iouring_fork, 1179 }, 1180 1181 #endif /* LOCAL_CONFIG_HAVE_LIBURING */ 1182 /* 1183 * Take a R/O longterm pin on a R/O-mapped shared anonymous page. 1184 * When modifying the page via the page table, the page content change 1185 * must be visible via the pin. 1186 */ 1187 { 1188 "R/O GUP pin on R/O-mapped shared page", 1189 test_ro_pin_on_shared, 1190 }, 1191 /* Same as above, but using GUP-fast. */ 1192 { 1193 "R/O GUP-fast pin on R/O-mapped shared page", 1194 test_ro_fast_pin_on_shared, 1195 }, 1196 /* 1197 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that 1198 * was previously shared. When modifying the page via the page table, 1199 * the page content change must be visible via the pin. 1200 */ 1201 { 1202 "R/O GUP pin on R/O-mapped previously-shared page", 1203 test_ro_pin_on_ro_previously_shared, 1204 }, 1205 /* Same as above, but using GUP-fast. */ 1206 { 1207 "R/O GUP-fast pin on R/O-mapped previously-shared page", 1208 test_ro_fast_pin_on_ro_previously_shared, 1209 }, 1210 /* 1211 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page. 1212 * When modifying the page via the page table, the page content change 1213 * must be visible via the pin. 1214 */ 1215 { 1216 "R/O GUP pin on R/O-mapped exclusive page", 1217 test_ro_pin_on_ro_exclusive, 1218 }, 1219 /* Same as above, but using GUP-fast. */ 1220 { 1221 "R/O GUP-fast pin on R/O-mapped exclusive page", 1222 test_ro_fast_pin_on_ro_exclusive, 1223 }, 1224 }; 1225 1226 static void run_anon_test_case(struct test_case const *test_case) 1227 { 1228 int i; 1229 1230 run_with_base_page(test_case->fn, test_case->desc); 1231 run_with_base_page_swap(test_case->fn, test_case->desc); 1232 for (i = 0; i < nr_thpsizes; i++) { 1233 size_t size = thpsizes[i]; 1234 struct thp_settings settings = *thp_current_settings(); 1235 1236 settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; 1237 settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; 1238 thp_push_settings(&settings); 1239 1240 if (size == pmdsize) { 1241 run_with_thp(test_case->fn, test_case->desc, size); 1242 run_with_thp_swap(test_case->fn, test_case->desc, size); 1243 } 1244 1245 run_with_pte_mapped_thp(test_case->fn, test_case->desc, size); 1246 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size); 1247 run_with_single_pte_of_thp(test_case->fn, test_case->desc, size); 1248 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size); 1249 run_with_partial_mremap_thp(test_case->fn, test_case->desc, size); 1250 run_with_partial_shared_thp(test_case->fn, test_case->desc, size); 1251 1252 thp_pop_settings(); 1253 } 1254 for (i = 0; i < nr_hugetlbsizes; i++) 1255 run_with_hugetlb(test_case->fn, test_case->desc, 1256 hugetlbsizes[i]); 1257 } 1258 1259 static void run_anon_test_cases(void) 1260 { 1261 int i; 1262 1263 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n"); 1264 1265 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++) 1266 run_anon_test_case(&anon_test_cases[i]); 1267 } 1268 1269 static int tests_per_anon_test_case(void) 1270 { 1271 int tests = 2 + nr_hugetlbsizes; 1272 1273 tests += 6 * nr_thpsizes; 1274 if (pmdsize) 1275 tests += 2; 1276 return tests; 1277 } 1278 1279 enum anon_thp_collapse_test { 1280 ANON_THP_COLLAPSE_UNSHARED, 1281 ANON_THP_COLLAPSE_FULLY_SHARED, 1282 ANON_THP_COLLAPSE_LOWER_SHARED, 1283 ANON_THP_COLLAPSE_UPPER_SHARED, 1284 }; 1285 1286 static void do_test_anon_thp_collapse(char *mem, size_t size, 1287 enum anon_thp_collapse_test test) 1288 { 1289 struct comm_pipes comm_pipes; 1290 char buf; 1291 int ret; 1292 1293 ret = setup_comm_pipes(&comm_pipes); 1294 if (ret) { 1295 log_test_result(KSFT_FAIL); 1296 return; 1297 } 1298 1299 /* 1300 * Trigger PTE-mapping the THP by temporarily mapping a single subpage 1301 * R/O, such that we can try collapsing it later. 1302 */ 1303 ret = mprotect(mem + pagesize, pagesize, PROT_READ); 1304 if (ret) { 1305 ksft_perror("mprotect() failed"); 1306 log_test_result(KSFT_FAIL); 1307 goto close_comm_pipes; 1308 } 1309 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); 1310 if (ret) { 1311 ksft_perror("mprotect() failed"); 1312 log_test_result(KSFT_FAIL); 1313 goto close_comm_pipes; 1314 } 1315 1316 switch (test) { 1317 case ANON_THP_COLLAPSE_UNSHARED: 1318 /* Collapse before actually COW-sharing the page. */ 1319 ret = madvise(mem, size, MADV_COLLAPSE); 1320 if (ret) { 1321 ksft_perror("MADV_COLLAPSE failed"); 1322 log_test_result(KSFT_SKIP); 1323 goto close_comm_pipes; 1324 } 1325 break; 1326 case ANON_THP_COLLAPSE_FULLY_SHARED: 1327 /* COW-share the full PTE-mapped THP. */ 1328 break; 1329 case ANON_THP_COLLAPSE_LOWER_SHARED: 1330 /* Don't COW-share the upper part of the THP. */ 1331 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); 1332 if (ret) { 1333 ksft_perror("MADV_DONTFORK failed"); 1334 log_test_result(KSFT_FAIL); 1335 goto close_comm_pipes; 1336 } 1337 break; 1338 case ANON_THP_COLLAPSE_UPPER_SHARED: 1339 /* Don't COW-share the lower part of the THP. */ 1340 ret = madvise(mem, size / 2, MADV_DONTFORK); 1341 if (ret) { 1342 ksft_perror("MADV_DONTFORK failed"); 1343 log_test_result(KSFT_FAIL); 1344 goto close_comm_pipes; 1345 } 1346 break; 1347 default: 1348 assert(false); 1349 } 1350 1351 ret = fork(); 1352 if (ret < 0) { 1353 ksft_perror("fork() failed"); 1354 log_test_result(KSFT_FAIL); 1355 goto close_comm_pipes; 1356 } else if (!ret) { 1357 switch (test) { 1358 case ANON_THP_COLLAPSE_UNSHARED: 1359 case ANON_THP_COLLAPSE_FULLY_SHARED: 1360 exit(child_memcmp_fn(mem, size, &comm_pipes)); 1361 break; 1362 case ANON_THP_COLLAPSE_LOWER_SHARED: 1363 exit(child_memcmp_fn(mem, size / 2, &comm_pipes)); 1364 break; 1365 case ANON_THP_COLLAPSE_UPPER_SHARED: 1366 exit(child_memcmp_fn(mem + size / 2, size / 2, 1367 &comm_pipes)); 1368 break; 1369 default: 1370 assert(false); 1371 } 1372 } 1373 1374 while (read(comm_pipes.child_ready[0], &buf, 1) != 1) 1375 ; 1376 1377 switch (test) { 1378 case ANON_THP_COLLAPSE_UNSHARED: 1379 break; 1380 case ANON_THP_COLLAPSE_UPPER_SHARED: 1381 case ANON_THP_COLLAPSE_LOWER_SHARED: 1382 /* 1383 * Revert MADV_DONTFORK such that we merge the VMAs and are 1384 * able to actually collapse. 1385 */ 1386 ret = madvise(mem, size, MADV_DOFORK); 1387 if (ret) { 1388 ksft_perror("MADV_DOFORK failed"); 1389 log_test_result(KSFT_FAIL); 1390 write(comm_pipes.parent_ready[1], "0", 1); 1391 wait(&ret); 1392 goto close_comm_pipes; 1393 } 1394 /* FALLTHROUGH */ 1395 case ANON_THP_COLLAPSE_FULLY_SHARED: 1396 /* Collapse before anyone modified the COW-shared page. */ 1397 ret = madvise(mem, size, MADV_COLLAPSE); 1398 if (ret) { 1399 ksft_perror("MADV_COLLAPSE failed"); 1400 log_test_result(KSFT_SKIP); 1401 write(comm_pipes.parent_ready[1], "0", 1); 1402 wait(&ret); 1403 goto close_comm_pipes; 1404 } 1405 break; 1406 default: 1407 assert(false); 1408 } 1409 1410 /* Modify the page. */ 1411 memset(mem, 0xff, size); 1412 write(comm_pipes.parent_ready[1], "0", 1); 1413 1414 wait(&ret); 1415 if (WIFEXITED(ret)) 1416 ret = WEXITSTATUS(ret); 1417 else 1418 ret = -EINVAL; 1419 1420 if (!ret) 1421 log_test_result(KSFT_PASS); 1422 else 1423 log_test_result(KSFT_FAIL); 1424 close_comm_pipes: 1425 close_comm_pipes(&comm_pipes); 1426 } 1427 1428 static void test_anon_thp_collapse_unshared(char *mem, size_t size, 1429 bool is_hugetlb) 1430 { 1431 assert(!is_hugetlb); 1432 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); 1433 } 1434 1435 static void test_anon_thp_collapse_fully_shared(char *mem, size_t size, 1436 bool is_hugetlb) 1437 { 1438 assert(!is_hugetlb); 1439 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); 1440 } 1441 1442 static void test_anon_thp_collapse_lower_shared(char *mem, size_t size, 1443 bool is_hugetlb) 1444 { 1445 assert(!is_hugetlb); 1446 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); 1447 } 1448 1449 static void test_anon_thp_collapse_upper_shared(char *mem, size_t size, 1450 bool is_hugetlb) 1451 { 1452 assert(!is_hugetlb); 1453 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); 1454 } 1455 1456 /* 1457 * Test cases that are specific to anonymous THP: pages in private mappings 1458 * that may get shared via COW during fork(). 1459 */ 1460 static const struct test_case anon_thp_test_cases[] = { 1461 /* 1462 * Basic COW test for fork() without any GUP when collapsing a THP 1463 * before fork(). 1464 * 1465 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place 1466 * collapse") might easily get COW handling wrong when not collapsing 1467 * exclusivity information properly. 1468 */ 1469 { 1470 "Basic COW after fork() when collapsing before fork()", 1471 test_anon_thp_collapse_unshared, 1472 }, 1473 /* Basic COW test, but collapse after COW-sharing a full THP. */ 1474 { 1475 "Basic COW after fork() when collapsing after fork() (fully shared)", 1476 test_anon_thp_collapse_fully_shared, 1477 }, 1478 /* 1479 * Basic COW test, but collapse after COW-sharing the lower half of a 1480 * THP. 1481 */ 1482 { 1483 "Basic COW after fork() when collapsing after fork() (lower shared)", 1484 test_anon_thp_collapse_lower_shared, 1485 }, 1486 /* 1487 * Basic COW test, but collapse after COW-sharing the upper half of a 1488 * THP. 1489 */ 1490 { 1491 "Basic COW after fork() when collapsing after fork() (upper shared)", 1492 test_anon_thp_collapse_upper_shared, 1493 }, 1494 }; 1495 1496 static void run_anon_thp_test_cases(void) 1497 { 1498 int i; 1499 1500 if (!pmdsize) 1501 return; 1502 1503 ksft_print_msg("[INFO] Anonymous THP tests\n"); 1504 1505 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { 1506 struct test_case const *test_case = &anon_thp_test_cases[i]; 1507 1508 log_test_start("%s", test_case->desc); 1509 do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); 1510 } 1511 } 1512 1513 static int tests_per_anon_thp_test_case(void) 1514 { 1515 return pmdsize ? 1 : 0; 1516 } 1517 1518 typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); 1519 1520 static void test_cow(char *mem, const char *smem, size_t size) 1521 { 1522 char *old = malloc(size); 1523 1524 /* Backup the original content. */ 1525 memcpy(old, smem, size); 1526 1527 /* Modify the page. */ 1528 memset(mem, 0xff, size); 1529 1530 /* See if we still read the old values via the other mapping. */ 1531 if (!memcmp(smem, old, size)) 1532 log_test_result(KSFT_PASS); 1533 else 1534 log_test_result(KSFT_FAIL); 1535 free(old); 1536 } 1537 1538 static void test_ro_pin(char *mem, const char *smem, size_t size) 1539 { 1540 do_test_ro_pin(mem, size, RO_PIN_TEST, false); 1541 } 1542 1543 static void test_ro_fast_pin(char *mem, const char *smem, size_t size) 1544 { 1545 do_test_ro_pin(mem, size, RO_PIN_TEST, true); 1546 } 1547 1548 static void run_with_zeropage(non_anon_test_fn fn, const char *desc) 1549 { 1550 char *mem, *smem, tmp; 1551 1552 log_test_start("%s ... with shared zeropage", desc); 1553 1554 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, 1555 MAP_PRIVATE | MAP_ANON, -1, 0); 1556 if (mem == MAP_FAILED) { 1557 ksft_perror("mmap() failed"); 1558 log_test_result(KSFT_FAIL); 1559 return; 1560 } 1561 1562 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 1563 if (smem == MAP_FAILED) { 1564 ksft_perror("mmap() failed"); 1565 log_test_result(KSFT_FAIL); 1566 goto munmap; 1567 } 1568 1569 /* Read from the page to populate the shared zeropage. */ 1570 tmp = *mem + *smem; 1571 asm volatile("" : "+r" (tmp)); 1572 1573 fn(mem, smem, pagesize); 1574 munmap: 1575 munmap(mem, pagesize); 1576 if (smem != MAP_FAILED) 1577 munmap(smem, pagesize); 1578 } 1579 1580 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) 1581 { 1582 char *mem, *smem, *mmap_mem, *mmap_smem, tmp; 1583 size_t mmap_size; 1584 int ret; 1585 1586 log_test_start("%s ... with huge zeropage", desc); 1587 1588 if (!has_huge_zeropage) { 1589 ksft_print_msg("Huge zeropage not enabled\n"); 1590 log_test_result(KSFT_SKIP); 1591 return; 1592 } 1593 1594 /* For alignment purposes, we need twice the thp size. */ 1595 mmap_size = 2 * pmdsize; 1596 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, 1597 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1598 if (mmap_mem == MAP_FAILED) { 1599 ksft_perror("mmap() failed"); 1600 log_test_result(KSFT_FAIL); 1601 return; 1602 } 1603 mmap_smem = mmap(NULL, mmap_size, PROT_READ, 1604 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1605 if (mmap_smem == MAP_FAILED) { 1606 ksft_perror("mmap() failed"); 1607 log_test_result(KSFT_FAIL); 1608 goto munmap; 1609 } 1610 1611 /* We need a THP-aligned memory area. */ 1612 mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); 1613 smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); 1614 1615 ret = madvise(mem, pmdsize, MADV_HUGEPAGE); 1616 if (ret != 0) { 1617 ksft_perror("madvise()"); 1618 log_test_result(KSFT_FAIL); 1619 goto munmap; 1620 } 1621 ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); 1622 if (ret != 0) { 1623 ksft_perror("madvise()"); 1624 log_test_result(KSFT_FAIL); 1625 goto munmap; 1626 } 1627 1628 /* 1629 * Read from the memory to populate the huge shared zeropage. Read from 1630 * the first sub-page and test if we get another sub-page populated 1631 * automatically. 1632 */ 1633 tmp = *mem + *smem; 1634 asm volatile("" : "+r" (tmp)); 1635 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) || 1636 !pagemap_is_populated(pagemap_fd, smem + pagesize)) { 1637 ksft_test_result_skip("Did not get THPs populated\n"); 1638 goto munmap; 1639 } 1640 1641 fn(mem, smem, pmdsize); 1642 munmap: 1643 munmap(mmap_mem, mmap_size); 1644 if (mmap_smem != MAP_FAILED) 1645 munmap(mmap_smem, mmap_size); 1646 } 1647 1648 static void run_with_memfd(non_anon_test_fn fn, const char *desc) 1649 { 1650 char *mem, *smem, tmp; 1651 int fd; 1652 1653 log_test_start("%s ... with memfd", desc); 1654 1655 fd = memfd_create("test", 0); 1656 if (fd < 0) { 1657 ksft_perror("memfd_create() failed"); 1658 log_test_result(KSFT_FAIL); 1659 return; 1660 } 1661 1662 /* File consists of a single page filled with zeroes. */ 1663 if (fallocate(fd, 0, 0, pagesize)) { 1664 ksft_perror("fallocate() failed"); 1665 log_test_result(KSFT_FAIL); 1666 goto close; 1667 } 1668 1669 /* Create a private mapping of the memfd. */ 1670 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1671 if (mem == MAP_FAILED) { 1672 ksft_perror("mmap() failed"); 1673 log_test_result(KSFT_FAIL); 1674 goto close; 1675 } 1676 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1677 if (smem == MAP_FAILED) { 1678 ksft_perror("mmap() failed"); 1679 log_test_result(KSFT_FAIL); 1680 goto munmap; 1681 } 1682 1683 /* Fault the page in. */ 1684 tmp = *mem + *smem; 1685 asm volatile("" : "+r" (tmp)); 1686 1687 fn(mem, smem, pagesize); 1688 munmap: 1689 munmap(mem, pagesize); 1690 if (smem != MAP_FAILED) 1691 munmap(smem, pagesize); 1692 close: 1693 close(fd); 1694 } 1695 1696 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) 1697 { 1698 char *mem, *smem, tmp; 1699 FILE *file; 1700 int fd; 1701 1702 log_test_start("%s ... with tmpfile", desc); 1703 1704 file = tmpfile(); 1705 if (!file) { 1706 ksft_perror("tmpfile() failed"); 1707 log_test_result(KSFT_FAIL); 1708 return; 1709 } 1710 1711 fd = fileno(file); 1712 if (fd < 0) { 1713 ksft_perror("fileno() failed"); 1714 log_test_result(KSFT_SKIP); 1715 return; 1716 } 1717 1718 /* File consists of a single page filled with zeroes. */ 1719 if (fallocate(fd, 0, 0, pagesize)) { 1720 ksft_perror("fallocate() failed"); 1721 log_test_result(KSFT_FAIL); 1722 goto close; 1723 } 1724 1725 /* Create a private mapping of the memfd. */ 1726 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 1727 if (mem == MAP_FAILED) { 1728 ksft_perror("mmap() failed"); 1729 log_test_result(KSFT_FAIL); 1730 goto close; 1731 } 1732 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); 1733 if (smem == MAP_FAILED) { 1734 ksft_perror("mmap() failed"); 1735 log_test_result(KSFT_FAIL); 1736 goto munmap; 1737 } 1738 1739 /* Fault the page in. */ 1740 tmp = *mem + *smem; 1741 asm volatile("" : "+r" (tmp)); 1742 1743 fn(mem, smem, pagesize); 1744 munmap: 1745 munmap(mem, pagesize); 1746 if (smem != MAP_FAILED) 1747 munmap(smem, pagesize); 1748 close: 1749 fclose(file); 1750 } 1751 1752 static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, 1753 size_t hugetlbsize) 1754 { 1755 int flags = MFD_HUGETLB; 1756 char *mem, *smem, tmp; 1757 int fd; 1758 1759 log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, 1760 hugetlbsize / 1024); 1761 1762 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; 1763 1764 fd = memfd_create("test", flags); 1765 if (fd < 0) { 1766 ksft_perror("memfd_create() failed"); 1767 log_test_result(KSFT_SKIP); 1768 return; 1769 } 1770 1771 /* File consists of a single page filled with zeroes. */ 1772 if (fallocate(fd, 0, 0, hugetlbsize)) { 1773 ksft_perror("need more free huge pages"); 1774 log_test_result(KSFT_SKIP); 1775 goto close; 1776 } 1777 1778 /* Create a private mapping of the memfd. */ 1779 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 1780 0); 1781 if (mem == MAP_FAILED) { 1782 ksft_perror("need more free huge pages"); 1783 log_test_result(KSFT_SKIP); 1784 goto close; 1785 } 1786 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); 1787 if (smem == MAP_FAILED) { 1788 ksft_perror("mmap() failed"); 1789 log_test_result(KSFT_FAIL); 1790 goto munmap; 1791 } 1792 1793 /* Fault the page in. */ 1794 tmp = *mem + *smem; 1795 asm volatile("" : "+r" (tmp)); 1796 1797 fn(mem, smem, hugetlbsize); 1798 munmap: 1799 munmap(mem, hugetlbsize); 1800 if (smem != MAP_FAILED) 1801 munmap(smem, hugetlbsize); 1802 close: 1803 close(fd); 1804 } 1805 1806 struct non_anon_test_case { 1807 const char *desc; 1808 non_anon_test_fn fn; 1809 }; 1810 1811 /* 1812 * Test cases that target any pages in private mappings that are not anonymous: 1813 * pages that may get shared via COW ndependent of fork(). This includes 1814 * the shared zeropage(s), pagecache pages, ... 1815 */ 1816 static const struct non_anon_test_case non_anon_test_cases[] = { 1817 /* 1818 * Basic COW test without any GUP. If we miss to break COW, changes are 1819 * visible via other private/shared mappings. 1820 */ 1821 { 1822 "Basic COW", 1823 test_cow, 1824 }, 1825 /* 1826 * Take a R/O longterm pin. When modifying the page via the page table, 1827 * the page content change must be visible via the pin. 1828 */ 1829 { 1830 "R/O longterm GUP pin", 1831 test_ro_pin, 1832 }, 1833 /* Same as above, but using GUP-fast. */ 1834 { 1835 "R/O longterm GUP-fast pin", 1836 test_ro_fast_pin, 1837 }, 1838 }; 1839 1840 static void run_non_anon_test_case(struct non_anon_test_case const *test_case) 1841 { 1842 int i; 1843 1844 run_with_zeropage(test_case->fn, test_case->desc); 1845 run_with_memfd(test_case->fn, test_case->desc); 1846 run_with_tmpfile(test_case->fn, test_case->desc); 1847 if (pmdsize) 1848 run_with_huge_zeropage(test_case->fn, test_case->desc); 1849 for (i = 0; i < nr_hugetlbsizes; i++) 1850 run_with_memfd_hugetlb(test_case->fn, test_case->desc, 1851 hugetlbsizes[i]); 1852 } 1853 1854 static void run_non_anon_test_cases(void) 1855 { 1856 int i; 1857 1858 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n"); 1859 1860 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++) 1861 run_non_anon_test_case(&non_anon_test_cases[i]); 1862 } 1863 1864 static int tests_per_non_anon_test_case(void) 1865 { 1866 int tests = 3 + nr_hugetlbsizes; 1867 1868 if (pmdsize) 1869 tests += 1; 1870 return tests; 1871 } 1872 1873 int main(int argc, char **argv) 1874 { 1875 struct thp_settings default_settings; 1876 1877 ksft_print_header(); 1878 1879 pagesize = getpagesize(); 1880 pmdsize = read_pmd_pagesize(); 1881 if (pmdsize) { 1882 /* Only if THP is supported. */ 1883 thp_read_settings(&default_settings); 1884 default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; 1885 thp_save_settings(); 1886 thp_push_settings(&default_settings); 1887 1888 ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", 1889 pmdsize / 1024); 1890 nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); 1891 } 1892 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, 1893 ARRAY_SIZE(hugetlbsizes)); 1894 detect_huge_zeropage(); 1895 1896 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1897 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1898 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); 1899 1900 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); 1901 pagemap_fd = open("/proc/self/pagemap", O_RDONLY); 1902 if (pagemap_fd < 0) 1903 ksft_exit_fail_msg("opening pagemap failed\n"); 1904 1905 run_anon_test_cases(); 1906 run_anon_thp_test_cases(); 1907 run_non_anon_test_cases(); 1908 1909 if (pmdsize) { 1910 /* Only if THP is supported. */ 1911 thp_restore_settings(); 1912 } 1913 1914 ksft_finished(); 1915 } 1916