1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM guest address space mapping code 4 * 5 * Copyright IBM Corp. 2007, 2020 6 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 7 * David Hildenbrand <david@redhat.com> 8 * Janosch Frank <frankja@linux.vnet.ibm.com> 9 */ 10 11 #include <linux/cpufeature.h> 12 #include <linux/kernel.h> 13 #include <linux/pagewalk.h> 14 #include <linux/swap.h> 15 #include <linux/smp.h> 16 #include <linux/spinlock.h> 17 #include <linux/slab.h> 18 #include <linux/swapops.h> 19 #include <linux/ksm.h> 20 #include <linux/mman.h> 21 #include <linux/pgtable.h> 22 #include <asm/page-states.h> 23 #include <asm/pgalloc.h> 24 #include <asm/machine.h> 25 #include <asm/gmap_helpers.h> 26 #include <asm/gmap.h> 27 #include <asm/page.h> 28 29 /* 30 * The address is saved in a radix tree directly; NULL would be ambiguous, 31 * since 0 is a valid address, and NULL is returned when nothing was found. 32 * The lower bits are ignored by all users of the macro, so it can be used 33 * to distinguish a valid address 0 from a NULL. 34 */ 35 #define VALID_GADDR_FLAG 1 36 #define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG) 37 #define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG) 38 39 #define GMAP_SHADOW_FAKE_TABLE 1ULL 40 41 static struct page *gmap_alloc_crst(void) 42 { 43 struct page *page; 44 45 page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); 46 if (!page) 47 return NULL; 48 __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER); 49 return page; 50 } 51 52 /** 53 * gmap_alloc - allocate and initialize a guest address space 54 * @limit: maximum address of the gmap address space 55 * 56 * Returns a guest address space structure. 57 */ 58 struct gmap *gmap_alloc(unsigned long limit) 59 { 60 struct gmap *gmap; 61 struct page *page; 62 unsigned long *table; 63 unsigned long etype, atype; 64 65 if (limit < _REGION3_SIZE) { 66 limit = _REGION3_SIZE - 1; 67 atype = _ASCE_TYPE_SEGMENT; 68 etype = _SEGMENT_ENTRY_EMPTY; 69 } else if (limit < _REGION2_SIZE) { 70 limit = _REGION2_SIZE - 1; 71 atype = _ASCE_TYPE_REGION3; 72 etype = _REGION3_ENTRY_EMPTY; 73 } else if (limit < _REGION1_SIZE) { 74 limit = _REGION1_SIZE - 1; 75 atype = _ASCE_TYPE_REGION2; 76 etype = _REGION2_ENTRY_EMPTY; 77 } else { 78 limit = -1UL; 79 atype = _ASCE_TYPE_REGION1; 80 etype = _REGION1_ENTRY_EMPTY; 81 } 82 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); 83 if (!gmap) 84 goto out; 85 INIT_LIST_HEAD(&gmap->children); 86 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); 87 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); 88 INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); 89 spin_lock_init(&gmap->guest_table_lock); 90 spin_lock_init(&gmap->shadow_lock); 91 refcount_set(&gmap->ref_count, 1); 92 page = gmap_alloc_crst(); 93 if (!page) 94 goto out_free; 95 table = page_to_virt(page); 96 crst_table_init(table, etype); 97 gmap->table = table; 98 gmap->asce = atype | _ASCE_TABLE_LENGTH | 99 _ASCE_USER_BITS | __pa(table); 100 gmap->asce_end = limit; 101 return gmap; 102 103 out_free: 104 kfree(gmap); 105 out: 106 return NULL; 107 } 108 EXPORT_SYMBOL_GPL(gmap_alloc); 109 110 /** 111 * gmap_create - create a guest address space 112 * @mm: pointer to the parent mm_struct 113 * @limit: maximum size of the gmap address space 114 * 115 * Returns a guest address space structure. 116 */ 117 struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) 118 { 119 struct gmap *gmap; 120 unsigned long gmap_asce; 121 122 gmap = gmap_alloc(limit); 123 if (!gmap) 124 return NULL; 125 gmap->mm = mm; 126 spin_lock(&mm->context.lock); 127 list_add_rcu(&gmap->list, &mm->context.gmap_list); 128 if (list_is_singular(&mm->context.gmap_list)) 129 gmap_asce = gmap->asce; 130 else 131 gmap_asce = -1UL; 132 WRITE_ONCE(mm->context.gmap_asce, gmap_asce); 133 spin_unlock(&mm->context.lock); 134 return gmap; 135 } 136 EXPORT_SYMBOL_GPL(gmap_create); 137 138 static void gmap_flush_tlb(struct gmap *gmap) 139 { 140 if (cpu_has_idte()) 141 __tlb_flush_idte(gmap->asce); 142 else 143 __tlb_flush_global(); 144 } 145 146 static void gmap_radix_tree_free(struct radix_tree_root *root) 147 { 148 struct radix_tree_iter iter; 149 unsigned long indices[16]; 150 unsigned long index; 151 void __rcu **slot; 152 int i, nr; 153 154 /* A radix tree is freed by deleting all of its entries */ 155 index = 0; 156 do { 157 nr = 0; 158 radix_tree_for_each_slot(slot, root, &iter, index) { 159 indices[nr] = iter.index; 160 if (++nr == 16) 161 break; 162 } 163 for (i = 0; i < nr; i++) { 164 index = indices[i]; 165 radix_tree_delete(root, index); 166 } 167 } while (nr > 0); 168 } 169 170 static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) 171 { 172 struct gmap_rmap *rmap, *rnext, *head; 173 struct radix_tree_iter iter; 174 unsigned long indices[16]; 175 unsigned long index; 176 void __rcu **slot; 177 int i, nr; 178 179 /* A radix tree is freed by deleting all of its entries */ 180 index = 0; 181 do { 182 nr = 0; 183 radix_tree_for_each_slot(slot, root, &iter, index) { 184 indices[nr] = iter.index; 185 if (++nr == 16) 186 break; 187 } 188 for (i = 0; i < nr; i++) { 189 index = indices[i]; 190 head = radix_tree_delete(root, index); 191 gmap_for_each_rmap_safe(rmap, rnext, head) 192 kfree(rmap); 193 } 194 } while (nr > 0); 195 } 196 197 static void gmap_free_crst(unsigned long *table, bool free_ptes) 198 { 199 bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0; 200 int i; 201 202 if (is_segment) { 203 if (!free_ptes) 204 goto out; 205 for (i = 0; i < _CRST_ENTRIES; i++) 206 if (!(table[i] & _SEGMENT_ENTRY_INVALID)) 207 page_table_free_pgste(page_ptdesc(phys_to_page(table[i]))); 208 } else { 209 for (i = 0; i < _CRST_ENTRIES; i++) 210 if (!(table[i] & _REGION_ENTRY_INVALID)) 211 gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes); 212 } 213 214 out: 215 free_pages((unsigned long)table, CRST_ALLOC_ORDER); 216 } 217 218 /** 219 * gmap_free - free a guest address space 220 * @gmap: pointer to the guest address space structure 221 * 222 * No locks required. There are no references to this gmap anymore. 223 */ 224 void gmap_free(struct gmap *gmap) 225 { 226 /* Flush tlb of all gmaps (if not already done for shadows) */ 227 if (!(gmap_is_shadow(gmap) && gmap->removed)) 228 gmap_flush_tlb(gmap); 229 /* Free all segment & region tables. */ 230 gmap_free_crst(gmap->table, gmap_is_shadow(gmap)); 231 232 gmap_radix_tree_free(&gmap->guest_to_host); 233 gmap_radix_tree_free(&gmap->host_to_guest); 234 235 /* Free additional data for a shadow gmap */ 236 if (gmap_is_shadow(gmap)) { 237 gmap_rmap_radix_tree_free(&gmap->host_to_rmap); 238 /* Release reference to the parent */ 239 gmap_put(gmap->parent); 240 } 241 242 kfree(gmap); 243 } 244 EXPORT_SYMBOL_GPL(gmap_free); 245 246 /** 247 * gmap_get - increase reference counter for guest address space 248 * @gmap: pointer to the guest address space structure 249 * 250 * Returns the gmap pointer 251 */ 252 struct gmap *gmap_get(struct gmap *gmap) 253 { 254 refcount_inc(&gmap->ref_count); 255 return gmap; 256 } 257 EXPORT_SYMBOL_GPL(gmap_get); 258 259 /** 260 * gmap_put - decrease reference counter for guest address space 261 * @gmap: pointer to the guest address space structure 262 * 263 * If the reference counter reaches zero the guest address space is freed. 264 */ 265 void gmap_put(struct gmap *gmap) 266 { 267 if (refcount_dec_and_test(&gmap->ref_count)) 268 gmap_free(gmap); 269 } 270 EXPORT_SYMBOL_GPL(gmap_put); 271 272 /** 273 * gmap_remove - remove a guest address space but do not free it yet 274 * @gmap: pointer to the guest address space structure 275 */ 276 void gmap_remove(struct gmap *gmap) 277 { 278 struct gmap *sg, *next; 279 unsigned long gmap_asce; 280 281 /* Remove all shadow gmaps linked to this gmap */ 282 if (!list_empty(&gmap->children)) { 283 spin_lock(&gmap->shadow_lock); 284 list_for_each_entry_safe(sg, next, &gmap->children, list) { 285 list_del(&sg->list); 286 gmap_put(sg); 287 } 288 spin_unlock(&gmap->shadow_lock); 289 } 290 /* Remove gmap from the pre-mm list */ 291 spin_lock(&gmap->mm->context.lock); 292 list_del_rcu(&gmap->list); 293 if (list_empty(&gmap->mm->context.gmap_list)) 294 gmap_asce = 0; 295 else if (list_is_singular(&gmap->mm->context.gmap_list)) 296 gmap_asce = list_first_entry(&gmap->mm->context.gmap_list, 297 struct gmap, list)->asce; 298 else 299 gmap_asce = -1UL; 300 WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce); 301 spin_unlock(&gmap->mm->context.lock); 302 synchronize_rcu(); 303 /* Put reference */ 304 gmap_put(gmap); 305 } 306 EXPORT_SYMBOL_GPL(gmap_remove); 307 308 /* 309 * gmap_alloc_table is assumed to be called with mmap_lock held 310 */ 311 static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, 312 unsigned long init, unsigned long gaddr) 313 { 314 struct page *page; 315 unsigned long *new; 316 317 /* since we dont free the gmap table until gmap_free we can unlock */ 318 page = gmap_alloc_crst(); 319 if (!page) 320 return -ENOMEM; 321 new = page_to_virt(page); 322 crst_table_init(new, init); 323 spin_lock(&gmap->guest_table_lock); 324 if (*table & _REGION_ENTRY_INVALID) { 325 *table = __pa(new) | _REGION_ENTRY_LENGTH | 326 (*table & _REGION_ENTRY_TYPE_MASK); 327 page = NULL; 328 } 329 spin_unlock(&gmap->guest_table_lock); 330 if (page) 331 __free_pages(page, CRST_ALLOC_ORDER); 332 return 0; 333 } 334 335 static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr) 336 { 337 return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 338 } 339 340 static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr) 341 { 342 return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 343 } 344 345 static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr, 346 unsigned long *gaddr) 347 { 348 *gaddr = host_to_guest_delete(gmap, vmaddr); 349 if (IS_GADDR_VALID(*gaddr)) 350 return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1); 351 return NULL; 352 } 353 354 /** 355 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address 356 * @gmap: pointer to the guest address space structure 357 * @vmaddr: address in the host process address space 358 * 359 * Returns 1 if a TLB flush is required 360 */ 361 static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) 362 { 363 unsigned long gaddr; 364 int flush = 0; 365 pmd_t *pmdp; 366 367 BUG_ON(gmap_is_shadow(gmap)); 368 spin_lock(&gmap->guest_table_lock); 369 370 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 371 if (pmdp) { 372 flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY); 373 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 374 } 375 376 spin_unlock(&gmap->guest_table_lock); 377 return flush; 378 } 379 380 /** 381 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address 382 * @gmap: pointer to the guest address space structure 383 * @gaddr: address in the guest address space 384 * 385 * Returns 1 if a TLB flush is required 386 */ 387 static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) 388 { 389 unsigned long vmaddr; 390 391 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, 392 gaddr >> PMD_SHIFT); 393 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; 394 } 395 396 /** 397 * gmap_unmap_segment - unmap segment from the guest address space 398 * @gmap: pointer to the guest address space structure 399 * @to: address in the guest address space 400 * @len: length of the memory area to unmap 401 * 402 * Returns 0 if the unmap succeeded, -EINVAL if not. 403 */ 404 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 405 { 406 unsigned long off; 407 int flush; 408 409 BUG_ON(gmap_is_shadow(gmap)); 410 if ((to | len) & (PMD_SIZE - 1)) 411 return -EINVAL; 412 if (len == 0 || to + len < to) 413 return -EINVAL; 414 415 flush = 0; 416 mmap_write_lock(gmap->mm); 417 for (off = 0; off < len; off += PMD_SIZE) 418 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 419 mmap_write_unlock(gmap->mm); 420 if (flush) 421 gmap_flush_tlb(gmap); 422 return 0; 423 } 424 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 425 426 /** 427 * gmap_map_segment - map a segment to the guest address space 428 * @gmap: pointer to the guest address space structure 429 * @from: source address in the parent address space 430 * @to: target address in the guest address space 431 * @len: length of the memory area to map 432 * 433 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 434 */ 435 int gmap_map_segment(struct gmap *gmap, unsigned long from, 436 unsigned long to, unsigned long len) 437 { 438 unsigned long off; 439 int flush; 440 441 BUG_ON(gmap_is_shadow(gmap)); 442 if ((from | to | len) & (PMD_SIZE - 1)) 443 return -EINVAL; 444 if (len == 0 || from + len < from || to + len < to || 445 from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end) 446 return -EINVAL; 447 448 flush = 0; 449 mmap_write_lock(gmap->mm); 450 for (off = 0; off < len; off += PMD_SIZE) { 451 /* Remove old translation */ 452 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 453 /* Store new translation */ 454 if (radix_tree_insert(&gmap->guest_to_host, 455 (to + off) >> PMD_SHIFT, 456 (void *) from + off)) 457 break; 458 } 459 mmap_write_unlock(gmap->mm); 460 if (flush) 461 gmap_flush_tlb(gmap); 462 if (off >= len) 463 return 0; 464 gmap_unmap_segment(gmap, to, len); 465 return -ENOMEM; 466 } 467 EXPORT_SYMBOL_GPL(gmap_map_segment); 468 469 /** 470 * __gmap_translate - translate a guest address to a user space address 471 * @gmap: pointer to guest mapping meta data structure 472 * @gaddr: guest address 473 * 474 * Returns user space address which corresponds to the guest address or 475 * -EFAULT if no such mapping exists. 476 * This function does not establish potentially missing page table entries. 477 * The mmap_lock of the mm that belongs to the address space must be held 478 * when this function gets called. 479 * 480 * Note: Can also be called for shadow gmaps. 481 */ 482 unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) 483 { 484 unsigned long vmaddr; 485 486 vmaddr = (unsigned long) 487 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); 488 /* Note: guest_to_host is empty for a shadow gmap */ 489 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; 490 } 491 EXPORT_SYMBOL_GPL(__gmap_translate); 492 493 /** 494 * gmap_unlink - disconnect a page table from the gmap shadow tables 495 * @mm: pointer to the parent mm_struct 496 * @table: pointer to the host page table 497 * @vmaddr: vm address associated with the host page table 498 */ 499 void gmap_unlink(struct mm_struct *mm, unsigned long *table, 500 unsigned long vmaddr) 501 { 502 struct gmap *gmap; 503 int flush; 504 505 rcu_read_lock(); 506 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 507 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); 508 if (flush) 509 gmap_flush_tlb(gmap); 510 } 511 rcu_read_unlock(); 512 } 513 514 static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, 515 unsigned long gaddr); 516 517 /** 518 * __gmap_link - set up shadow page tables to connect a host to a guest address 519 * @gmap: pointer to guest mapping meta data structure 520 * @gaddr: guest address 521 * @vmaddr: vm address 522 * 523 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 524 * if the vm address is already mapped to a different guest segment. 525 * The mmap_lock of the mm that belongs to the address space must be held 526 * when this function gets called. 527 */ 528 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) 529 { 530 struct mm_struct *mm; 531 unsigned long *table; 532 spinlock_t *ptl; 533 pgd_t *pgd; 534 p4d_t *p4d; 535 pud_t *pud; 536 pmd_t *pmd; 537 u64 unprot; 538 int rc; 539 540 BUG_ON(gmap_is_shadow(gmap)); 541 /* Create higher level tables in the gmap page table */ 542 table = gmap->table; 543 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 544 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; 545 if ((*table & _REGION_ENTRY_INVALID) && 546 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 547 gaddr & _REGION1_MASK)) 548 return -ENOMEM; 549 table = __va(*table & _REGION_ENTRY_ORIGIN); 550 } 551 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 552 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; 553 if ((*table & _REGION_ENTRY_INVALID) && 554 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 555 gaddr & _REGION2_MASK)) 556 return -ENOMEM; 557 table = __va(*table & _REGION_ENTRY_ORIGIN); 558 } 559 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 560 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; 561 if ((*table & _REGION_ENTRY_INVALID) && 562 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 563 gaddr & _REGION3_MASK)) 564 return -ENOMEM; 565 table = __va(*table & _REGION_ENTRY_ORIGIN); 566 } 567 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 568 /* Walk the parent mm page table */ 569 mm = gmap->mm; 570 pgd = pgd_offset(mm, vmaddr); 571 VM_BUG_ON(pgd_none(*pgd)); 572 p4d = p4d_offset(pgd, vmaddr); 573 VM_BUG_ON(p4d_none(*p4d)); 574 pud = pud_offset(p4d, vmaddr); 575 VM_BUG_ON(pud_none(*pud)); 576 /* large puds cannot yet be handled */ 577 if (pud_leaf(*pud)) 578 return -EFAULT; 579 pmd = pmd_offset(pud, vmaddr); 580 VM_BUG_ON(pmd_none(*pmd)); 581 /* Are we allowed to use huge pages? */ 582 if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) 583 return -EFAULT; 584 /* Link gmap segment table entry location to page table. */ 585 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 586 if (rc) 587 return rc; 588 ptl = pmd_lock(mm, pmd); 589 spin_lock(&gmap->guest_table_lock); 590 if (*table == _SEGMENT_ENTRY_EMPTY) { 591 rc = radix_tree_insert(&gmap->host_to_guest, 592 vmaddr >> PMD_SHIFT, 593 (void *)MAKE_VALID_GADDR(gaddr)); 594 if (!rc) { 595 if (pmd_leaf(*pmd)) { 596 *table = (pmd_val(*pmd) & 597 _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) 598 | _SEGMENT_ENTRY_GMAP_UC 599 | _SEGMENT_ENTRY; 600 } else 601 *table = pmd_val(*pmd) & 602 _SEGMENT_ENTRY_HARDWARE_BITS; 603 } 604 } else if (*table & _SEGMENT_ENTRY_PROTECT && 605 !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) { 606 unprot = (u64)*table; 607 unprot &= ~_SEGMENT_ENTRY_PROTECT; 608 unprot |= _SEGMENT_ENTRY_GMAP_UC; 609 gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr); 610 } 611 spin_unlock(&gmap->guest_table_lock); 612 spin_unlock(ptl); 613 radix_tree_preload_end(); 614 return rc; 615 } 616 EXPORT_SYMBOL(__gmap_link); 617 618 /* 619 * this function is assumed to be called with mmap_lock held 620 */ 621 void __gmap_zap(struct gmap *gmap, unsigned long gaddr) 622 { 623 unsigned long vmaddr; 624 625 mmap_assert_locked(gmap->mm); 626 627 /* Find the vm address for the guest address */ 628 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, 629 gaddr >> PMD_SHIFT); 630 if (vmaddr) { 631 vmaddr |= gaddr & ~PMD_MASK; 632 gmap_helper_zap_one_page(gmap->mm, vmaddr); 633 } 634 } 635 EXPORT_SYMBOL_GPL(__gmap_zap); 636 637 static LIST_HEAD(gmap_notifier_list); 638 static DEFINE_SPINLOCK(gmap_notifier_lock); 639 640 /** 641 * gmap_register_pte_notifier - register a pte invalidation callback 642 * @nb: pointer to the gmap notifier block 643 */ 644 void gmap_register_pte_notifier(struct gmap_notifier *nb) 645 { 646 spin_lock(&gmap_notifier_lock); 647 list_add_rcu(&nb->list, &gmap_notifier_list); 648 spin_unlock(&gmap_notifier_lock); 649 } 650 EXPORT_SYMBOL_GPL(gmap_register_pte_notifier); 651 652 /** 653 * gmap_unregister_pte_notifier - remove a pte invalidation callback 654 * @nb: pointer to the gmap notifier block 655 */ 656 void gmap_unregister_pte_notifier(struct gmap_notifier *nb) 657 { 658 spin_lock(&gmap_notifier_lock); 659 list_del_rcu(&nb->list); 660 spin_unlock(&gmap_notifier_lock); 661 synchronize_rcu(); 662 } 663 EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier); 664 665 /** 666 * gmap_call_notifier - call all registered invalidation callbacks 667 * @gmap: pointer to guest mapping meta data structure 668 * @start: start virtual address in the guest address space 669 * @end: end virtual address in the guest address space 670 */ 671 static void gmap_call_notifier(struct gmap *gmap, unsigned long start, 672 unsigned long end) 673 { 674 struct gmap_notifier *nb; 675 676 list_for_each_entry(nb, &gmap_notifier_list, list) 677 nb->notifier_call(gmap, start, end); 678 } 679 680 /** 681 * gmap_table_walk - walk the gmap page tables 682 * @gmap: pointer to guest mapping meta data structure 683 * @gaddr: virtual address in the guest address space 684 * @level: page table level to stop at 685 * 686 * Returns a table entry pointer for the given guest address and @level 687 * @level=0 : returns a pointer to a page table table entry (or NULL) 688 * @level=1 : returns a pointer to a segment table entry (or NULL) 689 * @level=2 : returns a pointer to a region-3 table entry (or NULL) 690 * @level=3 : returns a pointer to a region-2 table entry (or NULL) 691 * @level=4 : returns a pointer to a region-1 table entry (or NULL) 692 * 693 * Returns NULL if the gmap page tables could not be walked to the 694 * requested level. 695 * 696 * Note: Can also be called for shadow gmaps. 697 */ 698 unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) 699 { 700 const int asce_type = gmap->asce & _ASCE_TYPE_MASK; 701 unsigned long *table = gmap->table; 702 703 if (gmap_is_shadow(gmap) && gmap->removed) 704 return NULL; 705 706 if (WARN_ON_ONCE(level > (asce_type >> 2) + 1)) 707 return NULL; 708 709 if (asce_type != _ASCE_TYPE_REGION1 && 710 gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) 711 return NULL; 712 713 switch (asce_type) { 714 case _ASCE_TYPE_REGION1: 715 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; 716 if (level == 4) 717 break; 718 if (*table & _REGION_ENTRY_INVALID) 719 return NULL; 720 table = __va(*table & _REGION_ENTRY_ORIGIN); 721 fallthrough; 722 case _ASCE_TYPE_REGION2: 723 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; 724 if (level == 3) 725 break; 726 if (*table & _REGION_ENTRY_INVALID) 727 return NULL; 728 table = __va(*table & _REGION_ENTRY_ORIGIN); 729 fallthrough; 730 case _ASCE_TYPE_REGION3: 731 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; 732 if (level == 2) 733 break; 734 if (*table & _REGION_ENTRY_INVALID) 735 return NULL; 736 table = __va(*table & _REGION_ENTRY_ORIGIN); 737 fallthrough; 738 case _ASCE_TYPE_SEGMENT: 739 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 740 if (level == 1) 741 break; 742 if (*table & _REGION_ENTRY_INVALID) 743 return NULL; 744 table = __va(*table & _SEGMENT_ENTRY_ORIGIN); 745 table += (gaddr & _PAGE_INDEX) >> PAGE_SHIFT; 746 } 747 return table; 748 } 749 EXPORT_SYMBOL(gmap_table_walk); 750 751 /** 752 * gmap_pte_op_walk - walk the gmap page table, get the page table lock 753 * and return the pte pointer 754 * @gmap: pointer to guest mapping meta data structure 755 * @gaddr: virtual address in the guest address space 756 * @ptl: pointer to the spinlock pointer 757 * 758 * Returns a pointer to the locked pte for a guest address, or NULL 759 */ 760 static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr, 761 spinlock_t **ptl) 762 { 763 unsigned long *table; 764 765 BUG_ON(gmap_is_shadow(gmap)); 766 /* Walk the gmap page table, lock and get pte pointer */ 767 table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */ 768 if (!table || *table & _SEGMENT_ENTRY_INVALID) 769 return NULL; 770 return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl); 771 } 772 773 /** 774 * gmap_pte_op_fixup - force a page in and connect the gmap page table 775 * @gmap: pointer to guest mapping meta data structure 776 * @gaddr: virtual address in the guest address space 777 * @vmaddr: address in the host process address space 778 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 779 * 780 * Returns 0 if the caller can retry __gmap_translate (might fail again), 781 * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing 782 * up or connecting the gmap page table. 783 */ 784 static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, 785 unsigned long vmaddr, int prot) 786 { 787 struct mm_struct *mm = gmap->mm; 788 unsigned int fault_flags; 789 bool unlocked = false; 790 791 BUG_ON(gmap_is_shadow(gmap)); 792 fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; 793 if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked)) 794 return -EFAULT; 795 if (unlocked) 796 /* lost mmap_lock, caller has to retry __gmap_translate */ 797 return 0; 798 /* Connect the page tables */ 799 return __gmap_link(gmap, gaddr, vmaddr); 800 } 801 802 /** 803 * gmap_pte_op_end - release the page table lock 804 * @ptep: pointer to the locked pte 805 * @ptl: pointer to the page table spinlock 806 */ 807 static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl) 808 { 809 pte_unmap_unlock(ptep, ptl); 810 } 811 812 /** 813 * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock 814 * and return the pmd pointer 815 * @gmap: pointer to guest mapping meta data structure 816 * @gaddr: virtual address in the guest address space 817 * 818 * Returns a pointer to the pmd for a guest address, or NULL 819 */ 820 static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) 821 { 822 pmd_t *pmdp; 823 824 BUG_ON(gmap_is_shadow(gmap)); 825 pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1); 826 if (!pmdp) 827 return NULL; 828 829 /* without huge pages, there is no need to take the table lock */ 830 if (!gmap->mm->context.allow_gmap_hpage_1m) 831 return pmd_none(*pmdp) ? NULL : pmdp; 832 833 spin_lock(&gmap->guest_table_lock); 834 if (pmd_none(*pmdp)) { 835 spin_unlock(&gmap->guest_table_lock); 836 return NULL; 837 } 838 839 /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */ 840 if (!pmd_leaf(*pmdp)) 841 spin_unlock(&gmap->guest_table_lock); 842 return pmdp; 843 } 844 845 /** 846 * gmap_pmd_op_end - release the guest_table_lock if needed 847 * @gmap: pointer to the guest mapping meta data structure 848 * @pmdp: pointer to the pmd 849 */ 850 static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) 851 { 852 if (pmd_leaf(*pmdp)) 853 spin_unlock(&gmap->guest_table_lock); 854 } 855 856 /* 857 * gmap_protect_pmd - remove access rights to memory and set pmd notification bits 858 * @pmdp: pointer to the pmd to be protected 859 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 860 * @bits: notification bits to set 861 * 862 * Returns: 863 * 0 if successfully protected 864 * -EAGAIN if a fixup is needed 865 * -EINVAL if unsupported notifier bits have been specified 866 * 867 * Expected to be called with sg->mm->mmap_lock in read and 868 * guest_table_lock held. 869 */ 870 static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, 871 pmd_t *pmdp, int prot, unsigned long bits) 872 { 873 int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID; 874 int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT; 875 pmd_t new = *pmdp; 876 877 /* Fixup needed */ 878 if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE))) 879 return -EAGAIN; 880 881 if (prot == PROT_NONE && !pmd_i) { 882 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 883 gmap_pmdp_xchg(gmap, pmdp, new, gaddr); 884 } 885 886 if (prot == PROT_READ && !pmd_p) { 887 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 888 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT)); 889 gmap_pmdp_xchg(gmap, pmdp, new, gaddr); 890 } 891 892 if (bits & GMAP_NOTIFY_MPROT) 893 set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); 894 895 /* Shadow GMAP protection needs split PMDs */ 896 if (bits & GMAP_NOTIFY_SHADOW) 897 return -EINVAL; 898 899 return 0; 900 } 901 902 /* 903 * gmap_protect_pte - remove access rights to memory and set pgste bits 904 * @gmap: pointer to guest mapping meta data structure 905 * @gaddr: virtual address in the guest address space 906 * @pmdp: pointer to the pmd associated with the pte 907 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 908 * @bits: notification bits to set 909 * 910 * Returns 0 if successfully protected, -ENOMEM if out of memory and 911 * -EAGAIN if a fixup is needed. 912 * 913 * Expected to be called with sg->mm->mmap_lock in read 914 */ 915 static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, 916 pmd_t *pmdp, int prot, unsigned long bits) 917 { 918 int rc; 919 pte_t *ptep; 920 spinlock_t *ptl; 921 unsigned long pbits = 0; 922 923 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) 924 return -EAGAIN; 925 926 ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl); 927 if (!ptep) 928 return -ENOMEM; 929 930 pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0; 931 pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0; 932 /* Protect and unlock. */ 933 rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits); 934 gmap_pte_op_end(ptep, ptl); 935 return rc; 936 } 937 938 /* 939 * gmap_protect_range - remove access rights to memory and set pgste bits 940 * @gmap: pointer to guest mapping meta data structure 941 * @gaddr: virtual address in the guest address space 942 * @len: size of area 943 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 944 * @bits: pgste notification bits to set 945 * 946 * Returns: 947 * PAGE_SIZE if a small page was successfully protected; 948 * HPAGE_SIZE if a large page was successfully protected; 949 * -ENOMEM if out of memory; 950 * -EFAULT if gaddr is invalid (or mapping for shadows is missing); 951 * -EAGAIN if the guest mapping is missing and should be fixed by the caller. 952 * 953 * Context: Called with sg->mm->mmap_lock in read. 954 */ 955 int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits) 956 { 957 pmd_t *pmdp; 958 int rc = 0; 959 960 BUG_ON(gmap_is_shadow(gmap)); 961 962 pmdp = gmap_pmd_op_walk(gmap, gaddr); 963 if (!pmdp) 964 return -EAGAIN; 965 966 if (!pmd_leaf(*pmdp)) { 967 rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits); 968 if (!rc) 969 rc = PAGE_SIZE; 970 } else { 971 rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits); 972 if (!rc) 973 rc = HPAGE_SIZE; 974 } 975 gmap_pmd_op_end(gmap, pmdp); 976 977 return rc; 978 } 979 EXPORT_SYMBOL_GPL(gmap_protect_one); 980 981 /** 982 * gmap_read_table - get an unsigned long value from a guest page table using 983 * absolute addressing, without marking the page referenced. 984 * @gmap: pointer to guest mapping meta data structure 985 * @gaddr: virtual address in the guest address space 986 * @val: pointer to the unsigned long value to return 987 * 988 * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT 989 * if reading using the virtual address failed. -EINVAL if called on a gmap 990 * shadow. 991 * 992 * Called with gmap->mm->mmap_lock in read. 993 */ 994 int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) 995 { 996 unsigned long address, vmaddr; 997 spinlock_t *ptl; 998 pte_t *ptep, pte; 999 int rc; 1000 1001 if (gmap_is_shadow(gmap)) 1002 return -EINVAL; 1003 1004 while (1) { 1005 rc = -EAGAIN; 1006 ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); 1007 if (ptep) { 1008 pte = *ptep; 1009 if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) { 1010 address = pte_val(pte) & PAGE_MASK; 1011 address += gaddr & ~PAGE_MASK; 1012 *val = *(unsigned long *)__va(address); 1013 set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG))); 1014 /* Do *NOT* clear the _PAGE_INVALID bit! */ 1015 rc = 0; 1016 } 1017 gmap_pte_op_end(ptep, ptl); 1018 } 1019 if (!rc) 1020 break; 1021 vmaddr = __gmap_translate(gmap, gaddr); 1022 if (IS_ERR_VALUE(vmaddr)) { 1023 rc = vmaddr; 1024 break; 1025 } 1026 rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ); 1027 if (rc) 1028 break; 1029 } 1030 return rc; 1031 } 1032 EXPORT_SYMBOL_GPL(gmap_read_table); 1033 1034 /** 1035 * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree 1036 * @sg: pointer to the shadow guest address space structure 1037 * @vmaddr: vm address associated with the rmap 1038 * @rmap: pointer to the rmap structure 1039 * 1040 * Called with the sg->guest_table_lock 1041 */ 1042 static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, 1043 struct gmap_rmap *rmap) 1044 { 1045 struct gmap_rmap *temp; 1046 void __rcu **slot; 1047 1048 BUG_ON(!gmap_is_shadow(sg)); 1049 slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); 1050 if (slot) { 1051 rmap->next = radix_tree_deref_slot_protected(slot, 1052 &sg->guest_table_lock); 1053 for (temp = rmap->next; temp; temp = temp->next) { 1054 if (temp->raddr == rmap->raddr) { 1055 kfree(rmap); 1056 return; 1057 } 1058 } 1059 radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap); 1060 } else { 1061 rmap->next = NULL; 1062 radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT, 1063 rmap); 1064 } 1065 } 1066 1067 /** 1068 * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap 1069 * @sg: pointer to the shadow guest address space structure 1070 * @raddr: rmap address in the shadow gmap 1071 * @paddr: address in the parent guest address space 1072 * @len: length of the memory area to protect 1073 * 1074 * Returns 0 if successfully protected and the rmap was created, -ENOMEM 1075 * if out of memory and -EFAULT if paddr is invalid. 1076 */ 1077 static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, 1078 unsigned long paddr, unsigned long len) 1079 { 1080 struct gmap *parent; 1081 struct gmap_rmap *rmap; 1082 unsigned long vmaddr; 1083 spinlock_t *ptl; 1084 pte_t *ptep; 1085 int rc; 1086 1087 BUG_ON(!gmap_is_shadow(sg)); 1088 parent = sg->parent; 1089 while (len) { 1090 vmaddr = __gmap_translate(parent, paddr); 1091 if (IS_ERR_VALUE(vmaddr)) 1092 return vmaddr; 1093 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); 1094 if (!rmap) 1095 return -ENOMEM; 1096 rmap->raddr = raddr; 1097 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 1098 if (rc) { 1099 kfree(rmap); 1100 return rc; 1101 } 1102 rc = -EAGAIN; 1103 ptep = gmap_pte_op_walk(parent, paddr, &ptl); 1104 if (ptep) { 1105 spin_lock(&sg->guest_table_lock); 1106 rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ, 1107 PGSTE_VSIE_BIT); 1108 if (!rc) 1109 gmap_insert_rmap(sg, vmaddr, rmap); 1110 spin_unlock(&sg->guest_table_lock); 1111 gmap_pte_op_end(ptep, ptl); 1112 } 1113 radix_tree_preload_end(); 1114 if (rc) { 1115 kfree(rmap); 1116 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ); 1117 if (rc) 1118 return rc; 1119 continue; 1120 } 1121 paddr += PAGE_SIZE; 1122 len -= PAGE_SIZE; 1123 } 1124 return 0; 1125 } 1126 1127 #define _SHADOW_RMAP_MASK 0x7 1128 #define _SHADOW_RMAP_REGION1 0x5 1129 #define _SHADOW_RMAP_REGION2 0x4 1130 #define _SHADOW_RMAP_REGION3 0x3 1131 #define _SHADOW_RMAP_SEGMENT 0x2 1132 #define _SHADOW_RMAP_PGTABLE 0x1 1133 1134 /** 1135 * gmap_idte_one - invalidate a single region or segment table entry 1136 * @asce: region or segment table *origin* + table-type bits 1137 * @vaddr: virtual address to identify the table entry to flush 1138 * 1139 * The invalid bit of a single region or segment table entry is set 1140 * and the associated TLB entries depending on the entry are flushed. 1141 * The table-type of the @asce identifies the portion of the @vaddr 1142 * that is used as the invalidation index. 1143 */ 1144 static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr) 1145 { 1146 asm volatile( 1147 " idte %0,0,%1" 1148 : : "a" (asce), "a" (vaddr) : "cc", "memory"); 1149 } 1150 1151 /** 1152 * gmap_unshadow_page - remove a page from a shadow page table 1153 * @sg: pointer to the shadow guest address space structure 1154 * @raddr: rmap address in the shadow guest address space 1155 * 1156 * Called with the sg->guest_table_lock 1157 */ 1158 static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr) 1159 { 1160 unsigned long *table; 1161 1162 BUG_ON(!gmap_is_shadow(sg)); 1163 table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ 1164 if (!table || *table & _PAGE_INVALID) 1165 return; 1166 gmap_call_notifier(sg, raddr, raddr + PAGE_SIZE - 1); 1167 ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); 1168 } 1169 1170 /** 1171 * __gmap_unshadow_pgt - remove all entries from a shadow page table 1172 * @sg: pointer to the shadow guest address space structure 1173 * @raddr: rmap address in the shadow guest address space 1174 * @pgt: pointer to the start of a shadow page table 1175 * 1176 * Called with the sg->guest_table_lock 1177 */ 1178 static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, 1179 unsigned long *pgt) 1180 { 1181 int i; 1182 1183 BUG_ON(!gmap_is_shadow(sg)); 1184 for (i = 0; i < _PAGE_ENTRIES; i++, raddr += PAGE_SIZE) 1185 pgt[i] = _PAGE_INVALID; 1186 } 1187 1188 /** 1189 * gmap_unshadow_pgt - remove a shadow page table from a segment entry 1190 * @sg: pointer to the shadow guest address space structure 1191 * @raddr: address in the shadow guest address space 1192 * 1193 * Called with the sg->guest_table_lock 1194 */ 1195 static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) 1196 { 1197 unsigned long *ste; 1198 phys_addr_t sto, pgt; 1199 struct ptdesc *ptdesc; 1200 1201 BUG_ON(!gmap_is_shadow(sg)); 1202 ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ 1203 if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) 1204 return; 1205 gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1); 1206 sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); 1207 gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); 1208 pgt = *ste & _SEGMENT_ENTRY_ORIGIN; 1209 *ste = _SEGMENT_ENTRY_EMPTY; 1210 __gmap_unshadow_pgt(sg, raddr, __va(pgt)); 1211 /* Free page table */ 1212 ptdesc = page_ptdesc(phys_to_page(pgt)); 1213 page_table_free_pgste(ptdesc); 1214 } 1215 1216 /** 1217 * __gmap_unshadow_sgt - remove all entries from a shadow segment table 1218 * @sg: pointer to the shadow guest address space structure 1219 * @raddr: rmap address in the shadow guest address space 1220 * @sgt: pointer to the start of a shadow segment table 1221 * 1222 * Called with the sg->guest_table_lock 1223 */ 1224 static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, 1225 unsigned long *sgt) 1226 { 1227 struct ptdesc *ptdesc; 1228 phys_addr_t pgt; 1229 int i; 1230 1231 BUG_ON(!gmap_is_shadow(sg)); 1232 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { 1233 if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) 1234 continue; 1235 pgt = sgt[i] & _REGION_ENTRY_ORIGIN; 1236 sgt[i] = _SEGMENT_ENTRY_EMPTY; 1237 __gmap_unshadow_pgt(sg, raddr, __va(pgt)); 1238 /* Free page table */ 1239 ptdesc = page_ptdesc(phys_to_page(pgt)); 1240 page_table_free_pgste(ptdesc); 1241 } 1242 } 1243 1244 /** 1245 * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry 1246 * @sg: pointer to the shadow guest address space structure 1247 * @raddr: rmap address in the shadow guest address space 1248 * 1249 * Called with the shadow->guest_table_lock 1250 */ 1251 static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) 1252 { 1253 unsigned long r3o, *r3e; 1254 phys_addr_t sgt; 1255 struct page *page; 1256 1257 BUG_ON(!gmap_is_shadow(sg)); 1258 r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ 1259 if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) 1260 return; 1261 gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1); 1262 r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT)); 1263 gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr); 1264 sgt = *r3e & _REGION_ENTRY_ORIGIN; 1265 *r3e = _REGION3_ENTRY_EMPTY; 1266 __gmap_unshadow_sgt(sg, raddr, __va(sgt)); 1267 /* Free segment table */ 1268 page = phys_to_page(sgt); 1269 __free_pages(page, CRST_ALLOC_ORDER); 1270 } 1271 1272 /** 1273 * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table 1274 * @sg: pointer to the shadow guest address space structure 1275 * @raddr: address in the shadow guest address space 1276 * @r3t: pointer to the start of a shadow region-3 table 1277 * 1278 * Called with the sg->guest_table_lock 1279 */ 1280 static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, 1281 unsigned long *r3t) 1282 { 1283 struct page *page; 1284 phys_addr_t sgt; 1285 int i; 1286 1287 BUG_ON(!gmap_is_shadow(sg)); 1288 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { 1289 if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) 1290 continue; 1291 sgt = r3t[i] & _REGION_ENTRY_ORIGIN; 1292 r3t[i] = _REGION3_ENTRY_EMPTY; 1293 __gmap_unshadow_sgt(sg, raddr, __va(sgt)); 1294 /* Free segment table */ 1295 page = phys_to_page(sgt); 1296 __free_pages(page, CRST_ALLOC_ORDER); 1297 } 1298 } 1299 1300 /** 1301 * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry 1302 * @sg: pointer to the shadow guest address space structure 1303 * @raddr: rmap address in the shadow guest address space 1304 * 1305 * Called with the sg->guest_table_lock 1306 */ 1307 static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) 1308 { 1309 unsigned long r2o, *r2e; 1310 phys_addr_t r3t; 1311 struct page *page; 1312 1313 BUG_ON(!gmap_is_shadow(sg)); 1314 r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ 1315 if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) 1316 return; 1317 gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1); 1318 r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT)); 1319 gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr); 1320 r3t = *r2e & _REGION_ENTRY_ORIGIN; 1321 *r2e = _REGION2_ENTRY_EMPTY; 1322 __gmap_unshadow_r3t(sg, raddr, __va(r3t)); 1323 /* Free region 3 table */ 1324 page = phys_to_page(r3t); 1325 __free_pages(page, CRST_ALLOC_ORDER); 1326 } 1327 1328 /** 1329 * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table 1330 * @sg: pointer to the shadow guest address space structure 1331 * @raddr: rmap address in the shadow guest address space 1332 * @r2t: pointer to the start of a shadow region-2 table 1333 * 1334 * Called with the sg->guest_table_lock 1335 */ 1336 static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, 1337 unsigned long *r2t) 1338 { 1339 phys_addr_t r3t; 1340 struct page *page; 1341 int i; 1342 1343 BUG_ON(!gmap_is_shadow(sg)); 1344 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { 1345 if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) 1346 continue; 1347 r3t = r2t[i] & _REGION_ENTRY_ORIGIN; 1348 r2t[i] = _REGION2_ENTRY_EMPTY; 1349 __gmap_unshadow_r3t(sg, raddr, __va(r3t)); 1350 /* Free region 3 table */ 1351 page = phys_to_page(r3t); 1352 __free_pages(page, CRST_ALLOC_ORDER); 1353 } 1354 } 1355 1356 /** 1357 * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry 1358 * @sg: pointer to the shadow guest address space structure 1359 * @raddr: rmap address in the shadow guest address space 1360 * 1361 * Called with the sg->guest_table_lock 1362 */ 1363 static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) 1364 { 1365 unsigned long r1o, *r1e; 1366 struct page *page; 1367 phys_addr_t r2t; 1368 1369 BUG_ON(!gmap_is_shadow(sg)); 1370 r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ 1371 if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) 1372 return; 1373 gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1); 1374 r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT)); 1375 gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr); 1376 r2t = *r1e & _REGION_ENTRY_ORIGIN; 1377 *r1e = _REGION1_ENTRY_EMPTY; 1378 __gmap_unshadow_r2t(sg, raddr, __va(r2t)); 1379 /* Free region 2 table */ 1380 page = phys_to_page(r2t); 1381 __free_pages(page, CRST_ALLOC_ORDER); 1382 } 1383 1384 /** 1385 * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table 1386 * @sg: pointer to the shadow guest address space structure 1387 * @raddr: rmap address in the shadow guest address space 1388 * @r1t: pointer to the start of a shadow region-1 table 1389 * 1390 * Called with the shadow->guest_table_lock 1391 */ 1392 static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, 1393 unsigned long *r1t) 1394 { 1395 unsigned long asce; 1396 struct page *page; 1397 phys_addr_t r2t; 1398 int i; 1399 1400 BUG_ON(!gmap_is_shadow(sg)); 1401 asce = __pa(r1t) | _ASCE_TYPE_REGION1; 1402 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) { 1403 if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) 1404 continue; 1405 r2t = r1t[i] & _REGION_ENTRY_ORIGIN; 1406 __gmap_unshadow_r2t(sg, raddr, __va(r2t)); 1407 /* Clear entry and flush translation r1t -> r2t */ 1408 gmap_idte_one(asce, raddr); 1409 r1t[i] = _REGION1_ENTRY_EMPTY; 1410 /* Free region 2 table */ 1411 page = phys_to_page(r2t); 1412 __free_pages(page, CRST_ALLOC_ORDER); 1413 } 1414 } 1415 1416 /** 1417 * gmap_unshadow - remove a shadow page table completely 1418 * @sg: pointer to the shadow guest address space structure 1419 * 1420 * Called with sg->guest_table_lock 1421 */ 1422 void gmap_unshadow(struct gmap *sg) 1423 { 1424 unsigned long *table; 1425 1426 BUG_ON(!gmap_is_shadow(sg)); 1427 if (sg->removed) 1428 return; 1429 sg->removed = 1; 1430 gmap_call_notifier(sg, 0, -1UL); 1431 gmap_flush_tlb(sg); 1432 table = __va(sg->asce & _ASCE_ORIGIN); 1433 switch (sg->asce & _ASCE_TYPE_MASK) { 1434 case _ASCE_TYPE_REGION1: 1435 __gmap_unshadow_r1t(sg, 0, table); 1436 break; 1437 case _ASCE_TYPE_REGION2: 1438 __gmap_unshadow_r2t(sg, 0, table); 1439 break; 1440 case _ASCE_TYPE_REGION3: 1441 __gmap_unshadow_r3t(sg, 0, table); 1442 break; 1443 case _ASCE_TYPE_SEGMENT: 1444 __gmap_unshadow_sgt(sg, 0, table); 1445 break; 1446 } 1447 } 1448 EXPORT_SYMBOL(gmap_unshadow); 1449 1450 /** 1451 * gmap_shadow_r2t - create an empty shadow region 2 table 1452 * @sg: pointer to the shadow guest address space structure 1453 * @saddr: faulting address in the shadow gmap 1454 * @r2t: parent gmap address of the region 2 table to get shadowed 1455 * @fake: r2t references contiguous guest memory block, not a r2t 1456 * 1457 * The r2t parameter specifies the address of the source table. The 1458 * four pages of the source table are made read-only in the parent gmap 1459 * address space. A write to the source table area @r2t will automatically 1460 * remove the shadow r2 table and all of its descendants. 1461 * 1462 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1463 * shadow table structure is incomplete, -ENOMEM if out of memory and 1464 * -EFAULT if an address in the parent gmap could not be resolved. 1465 * 1466 * Called with sg->mm->mmap_lock in read. 1467 */ 1468 int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, 1469 int fake) 1470 { 1471 unsigned long raddr, origin, offset, len; 1472 unsigned long *table; 1473 phys_addr_t s_r2t; 1474 struct page *page; 1475 int rc; 1476 1477 BUG_ON(!gmap_is_shadow(sg)); 1478 /* Allocate a shadow region second table */ 1479 page = gmap_alloc_crst(); 1480 if (!page) 1481 return -ENOMEM; 1482 s_r2t = page_to_phys(page); 1483 /* Install shadow region second table */ 1484 spin_lock(&sg->guest_table_lock); 1485 table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */ 1486 if (!table) { 1487 rc = -EAGAIN; /* Race with unshadow */ 1488 goto out_free; 1489 } 1490 if (!(*table & _REGION_ENTRY_INVALID)) { 1491 rc = 0; /* Already established */ 1492 goto out_free; 1493 } else if (*table & _REGION_ENTRY_ORIGIN) { 1494 rc = -EAGAIN; /* Race with shadow */ 1495 goto out_free; 1496 } 1497 crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY); 1498 /* mark as invalid as long as the parent table is not protected */ 1499 *table = s_r2t | _REGION_ENTRY_LENGTH | 1500 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; 1501 if (sg->edat_level >= 1) 1502 *table |= (r2t & _REGION_ENTRY_PROTECT); 1503 if (fake) { 1504 /* nothing to protect for fake tables */ 1505 *table &= ~_REGION_ENTRY_INVALID; 1506 spin_unlock(&sg->guest_table_lock); 1507 return 0; 1508 } 1509 spin_unlock(&sg->guest_table_lock); 1510 /* Make r2t read-only in parent gmap page table */ 1511 raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1; 1512 origin = r2t & _REGION_ENTRY_ORIGIN; 1513 offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1514 len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1515 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1516 spin_lock(&sg->guest_table_lock); 1517 if (!rc) { 1518 table = gmap_table_walk(sg, saddr, 4); 1519 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t) 1520 rc = -EAGAIN; /* Race with unshadow */ 1521 else 1522 *table &= ~_REGION_ENTRY_INVALID; 1523 } else { 1524 gmap_unshadow_r2t(sg, raddr); 1525 } 1526 spin_unlock(&sg->guest_table_lock); 1527 return rc; 1528 out_free: 1529 spin_unlock(&sg->guest_table_lock); 1530 __free_pages(page, CRST_ALLOC_ORDER); 1531 return rc; 1532 } 1533 EXPORT_SYMBOL_GPL(gmap_shadow_r2t); 1534 1535 /** 1536 * gmap_shadow_r3t - create a shadow region 3 table 1537 * @sg: pointer to the shadow guest address space structure 1538 * @saddr: faulting address in the shadow gmap 1539 * @r3t: parent gmap address of the region 3 table to get shadowed 1540 * @fake: r3t references contiguous guest memory block, not a r3t 1541 * 1542 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1543 * shadow table structure is incomplete, -ENOMEM if out of memory and 1544 * -EFAULT if an address in the parent gmap could not be resolved. 1545 * 1546 * Called with sg->mm->mmap_lock in read. 1547 */ 1548 int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, 1549 int fake) 1550 { 1551 unsigned long raddr, origin, offset, len; 1552 unsigned long *table; 1553 phys_addr_t s_r3t; 1554 struct page *page; 1555 int rc; 1556 1557 BUG_ON(!gmap_is_shadow(sg)); 1558 /* Allocate a shadow region second table */ 1559 page = gmap_alloc_crst(); 1560 if (!page) 1561 return -ENOMEM; 1562 s_r3t = page_to_phys(page); 1563 /* Install shadow region second table */ 1564 spin_lock(&sg->guest_table_lock); 1565 table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */ 1566 if (!table) { 1567 rc = -EAGAIN; /* Race with unshadow */ 1568 goto out_free; 1569 } 1570 if (!(*table & _REGION_ENTRY_INVALID)) { 1571 rc = 0; /* Already established */ 1572 goto out_free; 1573 } else if (*table & _REGION_ENTRY_ORIGIN) { 1574 rc = -EAGAIN; /* Race with shadow */ 1575 goto out_free; 1576 } 1577 crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY); 1578 /* mark as invalid as long as the parent table is not protected */ 1579 *table = s_r3t | _REGION_ENTRY_LENGTH | 1580 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; 1581 if (sg->edat_level >= 1) 1582 *table |= (r3t & _REGION_ENTRY_PROTECT); 1583 if (fake) { 1584 /* nothing to protect for fake tables */ 1585 *table &= ~_REGION_ENTRY_INVALID; 1586 spin_unlock(&sg->guest_table_lock); 1587 return 0; 1588 } 1589 spin_unlock(&sg->guest_table_lock); 1590 /* Make r3t read-only in parent gmap page table */ 1591 raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2; 1592 origin = r3t & _REGION_ENTRY_ORIGIN; 1593 offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1594 len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1595 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1596 spin_lock(&sg->guest_table_lock); 1597 if (!rc) { 1598 table = gmap_table_walk(sg, saddr, 3); 1599 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t) 1600 rc = -EAGAIN; /* Race with unshadow */ 1601 else 1602 *table &= ~_REGION_ENTRY_INVALID; 1603 } else { 1604 gmap_unshadow_r3t(sg, raddr); 1605 } 1606 spin_unlock(&sg->guest_table_lock); 1607 return rc; 1608 out_free: 1609 spin_unlock(&sg->guest_table_lock); 1610 __free_pages(page, CRST_ALLOC_ORDER); 1611 return rc; 1612 } 1613 EXPORT_SYMBOL_GPL(gmap_shadow_r3t); 1614 1615 /** 1616 * gmap_shadow_sgt - create a shadow segment table 1617 * @sg: pointer to the shadow guest address space structure 1618 * @saddr: faulting address in the shadow gmap 1619 * @sgt: parent gmap address of the segment table to get shadowed 1620 * @fake: sgt references contiguous guest memory block, not a sgt 1621 * 1622 * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the 1623 * shadow table structure is incomplete, -ENOMEM if out of memory and 1624 * -EFAULT if an address in the parent gmap could not be resolved. 1625 * 1626 * Called with sg->mm->mmap_lock in read. 1627 */ 1628 int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, 1629 int fake) 1630 { 1631 unsigned long raddr, origin, offset, len; 1632 unsigned long *table; 1633 phys_addr_t s_sgt; 1634 struct page *page; 1635 int rc; 1636 1637 BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); 1638 /* Allocate a shadow segment table */ 1639 page = gmap_alloc_crst(); 1640 if (!page) 1641 return -ENOMEM; 1642 s_sgt = page_to_phys(page); 1643 /* Install shadow region second table */ 1644 spin_lock(&sg->guest_table_lock); 1645 table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */ 1646 if (!table) { 1647 rc = -EAGAIN; /* Race with unshadow */ 1648 goto out_free; 1649 } 1650 if (!(*table & _REGION_ENTRY_INVALID)) { 1651 rc = 0; /* Already established */ 1652 goto out_free; 1653 } else if (*table & _REGION_ENTRY_ORIGIN) { 1654 rc = -EAGAIN; /* Race with shadow */ 1655 goto out_free; 1656 } 1657 crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY); 1658 /* mark as invalid as long as the parent table is not protected */ 1659 *table = s_sgt | _REGION_ENTRY_LENGTH | 1660 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; 1661 if (sg->edat_level >= 1) 1662 *table |= sgt & _REGION_ENTRY_PROTECT; 1663 if (fake) { 1664 /* nothing to protect for fake tables */ 1665 *table &= ~_REGION_ENTRY_INVALID; 1666 spin_unlock(&sg->guest_table_lock); 1667 return 0; 1668 } 1669 spin_unlock(&sg->guest_table_lock); 1670 /* Make sgt read-only in parent gmap page table */ 1671 raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3; 1672 origin = sgt & _REGION_ENTRY_ORIGIN; 1673 offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1674 len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1675 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1676 spin_lock(&sg->guest_table_lock); 1677 if (!rc) { 1678 table = gmap_table_walk(sg, saddr, 2); 1679 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt) 1680 rc = -EAGAIN; /* Race with unshadow */ 1681 else 1682 *table &= ~_REGION_ENTRY_INVALID; 1683 } else { 1684 gmap_unshadow_sgt(sg, raddr); 1685 } 1686 spin_unlock(&sg->guest_table_lock); 1687 return rc; 1688 out_free: 1689 spin_unlock(&sg->guest_table_lock); 1690 __free_pages(page, CRST_ALLOC_ORDER); 1691 return rc; 1692 } 1693 EXPORT_SYMBOL_GPL(gmap_shadow_sgt); 1694 1695 static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr) 1696 { 1697 unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc)); 1698 1699 pgstes += _PAGE_ENTRIES; 1700 1701 pgstes[0] &= ~PGSTE_ST2_MASK; 1702 pgstes[1] &= ~PGSTE_ST2_MASK; 1703 pgstes[2] &= ~PGSTE_ST2_MASK; 1704 pgstes[3] &= ~PGSTE_ST2_MASK; 1705 1706 pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK; 1707 pgstes[1] |= pgt_addr & PGSTE_ST2_MASK; 1708 pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK; 1709 pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK; 1710 } 1711 1712 /** 1713 * gmap_shadow_pgt - instantiate a shadow page table 1714 * @sg: pointer to the shadow guest address space structure 1715 * @saddr: faulting address in the shadow gmap 1716 * @pgt: parent gmap address of the page table to get shadowed 1717 * @fake: pgt references contiguous guest memory block, not a pgtable 1718 * 1719 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1720 * shadow table structure is incomplete, -ENOMEM if out of memory, 1721 * -EFAULT if an address in the parent gmap could not be resolved and 1722 * 1723 * Called with gmap->mm->mmap_lock in read 1724 */ 1725 int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, 1726 int fake) 1727 { 1728 unsigned long raddr, origin; 1729 unsigned long *table; 1730 struct ptdesc *ptdesc; 1731 phys_addr_t s_pgt; 1732 int rc; 1733 1734 BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE)); 1735 /* Allocate a shadow page table */ 1736 ptdesc = page_table_alloc_pgste(sg->mm); 1737 if (!ptdesc) 1738 return -ENOMEM; 1739 origin = pgt & _SEGMENT_ENTRY_ORIGIN; 1740 if (fake) 1741 origin |= GMAP_SHADOW_FAKE_TABLE; 1742 gmap_pgste_set_pgt_addr(ptdesc, origin); 1743 s_pgt = page_to_phys(ptdesc_page(ptdesc)); 1744 /* Install shadow page table */ 1745 spin_lock(&sg->guest_table_lock); 1746 table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ 1747 if (!table) { 1748 rc = -EAGAIN; /* Race with unshadow */ 1749 goto out_free; 1750 } 1751 if (!(*table & _SEGMENT_ENTRY_INVALID)) { 1752 rc = 0; /* Already established */ 1753 goto out_free; 1754 } else if (*table & _SEGMENT_ENTRY_ORIGIN) { 1755 rc = -EAGAIN; /* Race with shadow */ 1756 goto out_free; 1757 } 1758 /* mark as invalid as long as the parent table is not protected */ 1759 *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | 1760 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; 1761 if (fake) { 1762 /* nothing to protect for fake tables */ 1763 *table &= ~_SEGMENT_ENTRY_INVALID; 1764 spin_unlock(&sg->guest_table_lock); 1765 return 0; 1766 } 1767 spin_unlock(&sg->guest_table_lock); 1768 /* Make pgt read-only in parent gmap page table (not the pgste) */ 1769 raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT; 1770 origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; 1771 rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE); 1772 spin_lock(&sg->guest_table_lock); 1773 if (!rc) { 1774 table = gmap_table_walk(sg, saddr, 1); 1775 if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt) 1776 rc = -EAGAIN; /* Race with unshadow */ 1777 else 1778 *table &= ~_SEGMENT_ENTRY_INVALID; 1779 } else { 1780 gmap_unshadow_pgt(sg, raddr); 1781 } 1782 spin_unlock(&sg->guest_table_lock); 1783 return rc; 1784 out_free: 1785 spin_unlock(&sg->guest_table_lock); 1786 page_table_free_pgste(ptdesc); 1787 return rc; 1788 1789 } 1790 EXPORT_SYMBOL_GPL(gmap_shadow_pgt); 1791 1792 /** 1793 * gmap_shadow_page - create a shadow page mapping 1794 * @sg: pointer to the shadow guest address space structure 1795 * @saddr: faulting address in the shadow gmap 1796 * @pte: pte in parent gmap address space to get shadowed 1797 * 1798 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1799 * shadow table structure is incomplete, -ENOMEM if out of memory and 1800 * -EFAULT if an address in the parent gmap could not be resolved. 1801 * 1802 * Called with sg->mm->mmap_lock in read. 1803 */ 1804 int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) 1805 { 1806 struct gmap *parent; 1807 struct gmap_rmap *rmap; 1808 unsigned long vmaddr, paddr; 1809 spinlock_t *ptl; 1810 pte_t *sptep, *tptep; 1811 int prot; 1812 int rc; 1813 1814 BUG_ON(!gmap_is_shadow(sg)); 1815 parent = sg->parent; 1816 prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE; 1817 1818 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); 1819 if (!rmap) 1820 return -ENOMEM; 1821 rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE; 1822 1823 while (1) { 1824 paddr = pte_val(pte) & PAGE_MASK; 1825 vmaddr = __gmap_translate(parent, paddr); 1826 if (IS_ERR_VALUE(vmaddr)) { 1827 rc = vmaddr; 1828 break; 1829 } 1830 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 1831 if (rc) 1832 break; 1833 rc = -EAGAIN; 1834 sptep = gmap_pte_op_walk(parent, paddr, &ptl); 1835 if (sptep) { 1836 spin_lock(&sg->guest_table_lock); 1837 /* Get page table pointer */ 1838 tptep = (pte_t *) gmap_table_walk(sg, saddr, 0); 1839 if (!tptep) { 1840 spin_unlock(&sg->guest_table_lock); 1841 gmap_pte_op_end(sptep, ptl); 1842 radix_tree_preload_end(); 1843 break; 1844 } 1845 rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte); 1846 if (rc > 0) { 1847 /* Success and a new mapping */ 1848 gmap_insert_rmap(sg, vmaddr, rmap); 1849 rmap = NULL; 1850 rc = 0; 1851 } 1852 gmap_pte_op_end(sptep, ptl); 1853 spin_unlock(&sg->guest_table_lock); 1854 } 1855 radix_tree_preload_end(); 1856 if (!rc) 1857 break; 1858 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); 1859 if (rc) 1860 break; 1861 } 1862 kfree(rmap); 1863 return rc; 1864 } 1865 EXPORT_SYMBOL_GPL(gmap_shadow_page); 1866 1867 /* 1868 * gmap_shadow_notify - handle notifications for shadow gmap 1869 * 1870 * Called with sg->parent->shadow_lock. 1871 */ 1872 static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, 1873 unsigned long gaddr) 1874 { 1875 struct gmap_rmap *rmap, *rnext, *head; 1876 unsigned long start, end, bits, raddr; 1877 1878 BUG_ON(!gmap_is_shadow(sg)); 1879 1880 spin_lock(&sg->guest_table_lock); 1881 if (sg->removed) { 1882 spin_unlock(&sg->guest_table_lock); 1883 return; 1884 } 1885 /* Check for top level table */ 1886 start = sg->orig_asce & _ASCE_ORIGIN; 1887 end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE; 1888 if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && 1889 gaddr < end) { 1890 /* The complete shadow table has to go */ 1891 gmap_unshadow(sg); 1892 spin_unlock(&sg->guest_table_lock); 1893 list_del(&sg->list); 1894 gmap_put(sg); 1895 return; 1896 } 1897 /* Remove the page table tree from on specific entry */ 1898 head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); 1899 gmap_for_each_rmap_safe(rmap, rnext, head) { 1900 bits = rmap->raddr & _SHADOW_RMAP_MASK; 1901 raddr = rmap->raddr ^ bits; 1902 switch (bits) { 1903 case _SHADOW_RMAP_REGION1: 1904 gmap_unshadow_r2t(sg, raddr); 1905 break; 1906 case _SHADOW_RMAP_REGION2: 1907 gmap_unshadow_r3t(sg, raddr); 1908 break; 1909 case _SHADOW_RMAP_REGION3: 1910 gmap_unshadow_sgt(sg, raddr); 1911 break; 1912 case _SHADOW_RMAP_SEGMENT: 1913 gmap_unshadow_pgt(sg, raddr); 1914 break; 1915 case _SHADOW_RMAP_PGTABLE: 1916 gmap_unshadow_page(sg, raddr); 1917 break; 1918 } 1919 kfree(rmap); 1920 } 1921 spin_unlock(&sg->guest_table_lock); 1922 } 1923 1924 /** 1925 * ptep_notify - call all invalidation callbacks for a specific pte. 1926 * @mm: pointer to the process mm_struct 1927 * @vmaddr: virtual address in the process address space 1928 * @pte: pointer to the page table entry 1929 * @bits: bits from the pgste that caused the notify call 1930 * 1931 * This function is assumed to be called with the page table lock held 1932 * for the pte to notify. 1933 */ 1934 void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, 1935 pte_t *pte, unsigned long bits) 1936 { 1937 unsigned long offset, gaddr = 0; 1938 struct gmap *gmap, *sg, *next; 1939 1940 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 1941 offset = offset * (PAGE_SIZE / sizeof(pte_t)); 1942 rcu_read_lock(); 1943 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 1944 spin_lock(&gmap->guest_table_lock); 1945 gaddr = host_to_guest_lookup(gmap, vmaddr) + offset; 1946 spin_unlock(&gmap->guest_table_lock); 1947 if (!IS_GADDR_VALID(gaddr)) 1948 continue; 1949 1950 if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { 1951 spin_lock(&gmap->shadow_lock); 1952 list_for_each_entry_safe(sg, next, 1953 &gmap->children, list) 1954 gmap_shadow_notify(sg, vmaddr, gaddr); 1955 spin_unlock(&gmap->shadow_lock); 1956 } 1957 if (bits & PGSTE_IN_BIT) 1958 gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); 1959 } 1960 rcu_read_unlock(); 1961 } 1962 EXPORT_SYMBOL_GPL(ptep_notify); 1963 1964 static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, 1965 unsigned long gaddr) 1966 { 1967 set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); 1968 gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); 1969 } 1970 1971 /** 1972 * gmap_pmdp_xchg - exchange a gmap pmd with another 1973 * @gmap: pointer to the guest address space structure 1974 * @pmdp: pointer to the pmd entry 1975 * @new: replacement entry 1976 * @gaddr: the affected guest address 1977 * 1978 * This function is assumed to be called with the guest_table_lock 1979 * held. 1980 */ 1981 static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, 1982 unsigned long gaddr) 1983 { 1984 gaddr &= HPAGE_MASK; 1985 pmdp_notify_gmap(gmap, pmdp, gaddr); 1986 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); 1987 if (machine_has_tlb_guest()) 1988 __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, 1989 IDTE_GLOBAL); 1990 else if (cpu_has_idte()) 1991 __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); 1992 else 1993 __pmdp_csp(pmdp); 1994 set_pmd(pmdp, new); 1995 } 1996 1997 static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, 1998 int purge) 1999 { 2000 pmd_t *pmdp; 2001 struct gmap *gmap; 2002 unsigned long gaddr; 2003 2004 rcu_read_lock(); 2005 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2006 spin_lock(&gmap->guest_table_lock); 2007 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2008 if (pmdp) { 2009 pmdp_notify_gmap(gmap, pmdp, gaddr); 2010 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2011 _SEGMENT_ENTRY_GMAP_UC | 2012 _SEGMENT_ENTRY)); 2013 if (purge) 2014 __pmdp_csp(pmdp); 2015 set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 2016 } 2017 spin_unlock(&gmap->guest_table_lock); 2018 } 2019 rcu_read_unlock(); 2020 } 2021 2022 /** 2023 * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without 2024 * flushing 2025 * @mm: pointer to the process mm_struct 2026 * @vmaddr: virtual address in the process address space 2027 */ 2028 void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr) 2029 { 2030 gmap_pmdp_clear(mm, vmaddr, 0); 2031 } 2032 EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate); 2033 2034 /** 2035 * gmap_pmdp_csp - csp all affected guest pmd entries 2036 * @mm: pointer to the process mm_struct 2037 * @vmaddr: virtual address in the process address space 2038 */ 2039 void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr) 2040 { 2041 gmap_pmdp_clear(mm, vmaddr, 1); 2042 } 2043 EXPORT_SYMBOL_GPL(gmap_pmdp_csp); 2044 2045 /** 2046 * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry 2047 * @mm: pointer to the process mm_struct 2048 * @vmaddr: virtual address in the process address space 2049 */ 2050 void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) 2051 { 2052 unsigned long gaddr; 2053 struct gmap *gmap; 2054 pmd_t *pmdp; 2055 2056 rcu_read_lock(); 2057 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2058 spin_lock(&gmap->guest_table_lock); 2059 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2060 if (pmdp) { 2061 pmdp_notify_gmap(gmap, pmdp, gaddr); 2062 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2063 _SEGMENT_ENTRY_GMAP_UC | 2064 _SEGMENT_ENTRY)); 2065 if (machine_has_tlb_guest()) 2066 __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, 2067 gmap->asce, IDTE_LOCAL); 2068 else if (cpu_has_idte()) 2069 __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); 2070 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 2071 } 2072 spin_unlock(&gmap->guest_table_lock); 2073 } 2074 rcu_read_unlock(); 2075 } 2076 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); 2077 2078 /** 2079 * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry 2080 * @mm: pointer to the process mm_struct 2081 * @vmaddr: virtual address in the process address space 2082 */ 2083 void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) 2084 { 2085 unsigned long gaddr; 2086 struct gmap *gmap; 2087 pmd_t *pmdp; 2088 2089 rcu_read_lock(); 2090 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2091 spin_lock(&gmap->guest_table_lock); 2092 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2093 if (pmdp) { 2094 pmdp_notify_gmap(gmap, pmdp, gaddr); 2095 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2096 _SEGMENT_ENTRY_GMAP_UC | 2097 _SEGMENT_ENTRY)); 2098 if (machine_has_tlb_guest()) 2099 __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, 2100 gmap->asce, IDTE_GLOBAL); 2101 else if (cpu_has_idte()) 2102 __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); 2103 else 2104 __pmdp_csp(pmdp); 2105 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 2106 } 2107 spin_unlock(&gmap->guest_table_lock); 2108 } 2109 rcu_read_unlock(); 2110 } 2111 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); 2112 2113 /** 2114 * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status 2115 * @gmap: pointer to guest address space 2116 * @pmdp: pointer to the pmd to be tested 2117 * @gaddr: virtual address in the guest address space 2118 * 2119 * This function is assumed to be called with the guest_table_lock 2120 * held. 2121 */ 2122 static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, 2123 unsigned long gaddr) 2124 { 2125 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) 2126 return false; 2127 2128 /* Already protected memory, which did not change is clean */ 2129 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT && 2130 !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC)) 2131 return false; 2132 2133 /* Clear UC indication and reset protection */ 2134 set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC))); 2135 gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); 2136 return true; 2137 } 2138 2139 /** 2140 * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment 2141 * @gmap: pointer to guest address space 2142 * @bitmap: dirty bitmap for this pmd 2143 * @gaddr: virtual address in the guest address space 2144 * @vmaddr: virtual address in the host address space 2145 * 2146 * This function is assumed to be called with the guest_table_lock 2147 * held. 2148 */ 2149 void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], 2150 unsigned long gaddr, unsigned long vmaddr) 2151 { 2152 int i; 2153 pmd_t *pmdp; 2154 pte_t *ptep; 2155 spinlock_t *ptl; 2156 2157 pmdp = gmap_pmd_op_walk(gmap, gaddr); 2158 if (!pmdp) 2159 return; 2160 2161 if (pmd_leaf(*pmdp)) { 2162 if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr)) 2163 bitmap_fill(bitmap, _PAGE_ENTRIES); 2164 } else { 2165 for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) { 2166 ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl); 2167 if (!ptep) 2168 continue; 2169 if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep)) 2170 set_bit(i, bitmap); 2171 pte_unmap_unlock(ptep, ptl); 2172 } 2173 } 2174 gmap_pmd_op_end(gmap, pmdp); 2175 } 2176 EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); 2177 2178 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 2179 static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, 2180 unsigned long end, struct mm_walk *walk) 2181 { 2182 struct vm_area_struct *vma = walk->vma; 2183 2184 split_huge_pmd(vma, pmd, addr); 2185 return 0; 2186 } 2187 2188 static const struct mm_walk_ops thp_split_walk_ops = { 2189 .pmd_entry = thp_split_walk_pmd_entry, 2190 .walk_lock = PGWALK_WRLOCK_VERIFY, 2191 }; 2192 2193 static inline void thp_split_mm(struct mm_struct *mm) 2194 { 2195 struct vm_area_struct *vma; 2196 VMA_ITERATOR(vmi, mm, 0); 2197 2198 for_each_vma(vmi, vma) { 2199 vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE); 2200 walk_page_vma(vma, &thp_split_walk_ops, NULL); 2201 } 2202 mm->def_flags |= VM_NOHUGEPAGE; 2203 } 2204 #else 2205 static inline void thp_split_mm(struct mm_struct *mm) 2206 { 2207 } 2208 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 2209 2210 /* 2211 * switch on pgstes for its userspace process (for kvm) 2212 */ 2213 int s390_enable_sie(void) 2214 { 2215 struct mm_struct *mm = current->mm; 2216 2217 /* Do we have pgstes? if yes, we are done */ 2218 if (mm_has_pgste(mm)) 2219 return 0; 2220 mmap_write_lock(mm); 2221 mm->context.has_pgste = 1; 2222 /* split thp mappings and disable thp for future mappings */ 2223 thp_split_mm(mm); 2224 mmap_write_unlock(mm); 2225 return 0; 2226 } 2227 EXPORT_SYMBOL_GPL(s390_enable_sie); 2228 2229 /* 2230 * Enable storage key handling from now on and initialize the storage 2231 * keys with the default key. 2232 */ 2233 static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, 2234 unsigned long next, struct mm_walk *walk) 2235 { 2236 /* Clear storage key */ 2237 ptep_zap_key(walk->mm, addr, pte); 2238 return 0; 2239 } 2240 2241 /* 2242 * Give a chance to schedule after setting a key to 256 pages. 2243 * We only hold the mm lock, which is a rwsem and the kvm srcu. 2244 * Both can sleep. 2245 */ 2246 static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr, 2247 unsigned long next, struct mm_walk *walk) 2248 { 2249 cond_resched(); 2250 return 0; 2251 } 2252 2253 static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, 2254 unsigned long hmask, unsigned long next, 2255 struct mm_walk *walk) 2256 { 2257 pmd_t *pmd = (pmd_t *)pte; 2258 unsigned long start, end; 2259 struct folio *folio = page_folio(pmd_page(*pmd)); 2260 2261 /* 2262 * The write check makes sure we do not set a key on shared 2263 * memory. This is needed as the walker does not differentiate 2264 * between actual guest memory and the process executable or 2265 * shared libraries. 2266 */ 2267 if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID || 2268 !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE)) 2269 return 0; 2270 2271 start = pmd_val(*pmd) & HPAGE_MASK; 2272 end = start + HPAGE_SIZE; 2273 __storage_key_init_range(start, end); 2274 set_bit(PG_arch_1, &folio->flags); 2275 cond_resched(); 2276 return 0; 2277 } 2278 2279 static const struct mm_walk_ops enable_skey_walk_ops = { 2280 .hugetlb_entry = __s390_enable_skey_hugetlb, 2281 .pte_entry = __s390_enable_skey_pte, 2282 .pmd_entry = __s390_enable_skey_pmd, 2283 .walk_lock = PGWALK_WRLOCK, 2284 }; 2285 2286 int s390_enable_skey(void) 2287 { 2288 struct mm_struct *mm = current->mm; 2289 int rc = 0; 2290 2291 mmap_write_lock(mm); 2292 if (mm_uses_skeys(mm)) 2293 goto out_up; 2294 2295 mm->context.uses_skeys = 1; 2296 rc = gmap_helper_disable_cow_sharing(); 2297 if (rc) { 2298 mm->context.uses_skeys = 0; 2299 goto out_up; 2300 } 2301 walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL); 2302 2303 out_up: 2304 mmap_write_unlock(mm); 2305 return rc; 2306 } 2307 EXPORT_SYMBOL_GPL(s390_enable_skey); 2308 2309 /* 2310 * Reset CMMA state, make all pages stable again. 2311 */ 2312 static int __s390_reset_cmma(pte_t *pte, unsigned long addr, 2313 unsigned long next, struct mm_walk *walk) 2314 { 2315 ptep_zap_unused(walk->mm, addr, pte, 1); 2316 return 0; 2317 } 2318 2319 static const struct mm_walk_ops reset_cmma_walk_ops = { 2320 .pte_entry = __s390_reset_cmma, 2321 .walk_lock = PGWALK_WRLOCK, 2322 }; 2323 2324 void s390_reset_cmma(struct mm_struct *mm) 2325 { 2326 mmap_write_lock(mm); 2327 walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL); 2328 mmap_write_unlock(mm); 2329 } 2330 EXPORT_SYMBOL_GPL(s390_reset_cmma); 2331 2332 #define GATHER_GET_PAGES 32 2333 2334 struct reset_walk_state { 2335 unsigned long next; 2336 unsigned long count; 2337 unsigned long pfns[GATHER_GET_PAGES]; 2338 }; 2339 2340 static int s390_gather_pages(pte_t *ptep, unsigned long addr, 2341 unsigned long next, struct mm_walk *walk) 2342 { 2343 struct reset_walk_state *p = walk->private; 2344 pte_t pte = READ_ONCE(*ptep); 2345 2346 if (pte_present(pte)) { 2347 /* we have a reference from the mapping, take an extra one */ 2348 get_page(phys_to_page(pte_val(pte))); 2349 p->pfns[p->count] = phys_to_pfn(pte_val(pte)); 2350 p->next = next; 2351 p->count++; 2352 } 2353 return p->count >= GATHER_GET_PAGES; 2354 } 2355 2356 static const struct mm_walk_ops gather_pages_ops = { 2357 .pte_entry = s390_gather_pages, 2358 .walk_lock = PGWALK_RDLOCK, 2359 }; 2360 2361 /* 2362 * Call the Destroy secure page UVC on each page in the given array of PFNs. 2363 * Each page needs to have an extra reference, which will be released here. 2364 */ 2365 void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns) 2366 { 2367 struct folio *folio; 2368 unsigned long i; 2369 2370 for (i = 0; i < count; i++) { 2371 folio = pfn_folio(pfns[i]); 2372 /* we always have an extra reference */ 2373 uv_destroy_folio(folio); 2374 /* get rid of the extra reference */ 2375 folio_put(folio); 2376 cond_resched(); 2377 } 2378 } 2379 EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns); 2380 2381 /** 2382 * __s390_uv_destroy_range - Call the destroy secure page UVC on each page 2383 * in the given range of the given address space. 2384 * @mm: the mm to operate on 2385 * @start: the start of the range 2386 * @end: the end of the range 2387 * @interruptible: if not 0, stop when a fatal signal is received 2388 * 2389 * Walk the given range of the given address space and call the destroy 2390 * secure page UVC on each page. Optionally exit early if a fatal signal is 2391 * pending. 2392 * 2393 * Return: 0 on success, -EINTR if the function stopped before completing 2394 */ 2395 int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, 2396 unsigned long end, bool interruptible) 2397 { 2398 struct reset_walk_state state = { .next = start }; 2399 int r = 1; 2400 2401 while (r > 0) { 2402 state.count = 0; 2403 mmap_read_lock(mm); 2404 r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state); 2405 mmap_read_unlock(mm); 2406 cond_resched(); 2407 s390_uv_destroy_pfns(state.count, state.pfns); 2408 if (interruptible && fatal_signal_pending(current)) 2409 return -EINTR; 2410 } 2411 return 0; 2412 } 2413 EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); 2414 2415 /** 2416 * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy 2417 * @gmap: the gmap whose ASCE needs to be replaced 2418 * 2419 * If the ASCE is a SEGMENT type then this function will return -EINVAL, 2420 * otherwise the pointers in the host_to_guest radix tree will keep pointing 2421 * to the wrong pages, causing use-after-free and memory corruption. 2422 * If the allocation of the new top level page table fails, the ASCE is not 2423 * replaced. 2424 * In any case, the old ASCE is always removed from the gmap CRST list. 2425 * Therefore the caller has to make sure to save a pointer to it 2426 * beforehand, unless a leak is actually intended. 2427 */ 2428 int s390_replace_asce(struct gmap *gmap) 2429 { 2430 unsigned long asce; 2431 struct page *page; 2432 void *table; 2433 2434 /* Replacing segment type ASCEs would cause serious issues */ 2435 if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) 2436 return -EINVAL; 2437 2438 page = gmap_alloc_crst(); 2439 if (!page) 2440 return -ENOMEM; 2441 table = page_to_virt(page); 2442 memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); 2443 2444 /* Set new table origin while preserving existing ASCE control bits */ 2445 asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table); 2446 WRITE_ONCE(gmap->asce, asce); 2447 WRITE_ONCE(gmap->mm->context.gmap_asce, asce); 2448 WRITE_ONCE(gmap->table, table); 2449 2450 return 0; 2451 } 2452 EXPORT_SYMBOL_GPL(s390_replace_asce); 2453