1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * guest access functions 4 * 5 * Copyright IBM Corp. 2014 6 * 7 */ 8 9 #include <linux/vmalloc.h> 10 #include <linux/mm_types.h> 11 #include <linux/err.h> 12 #include <linux/pgtable.h> 13 #include <linux/bitfield.h> 14 #include <asm/access-regs.h> 15 #include <asm/fault.h> 16 #include <asm/gmap.h> 17 #include <asm/dat-bits.h> 18 #include "kvm-s390.h" 19 #include "gaccess.h" 20 21 #define GMAP_SHADOW_FAKE_TABLE 1ULL 22 23 /* 24 * vaddress union in order to easily decode a virtual address into its 25 * region first index, region second index etc. parts. 26 */ 27 union vaddress { 28 unsigned long addr; 29 struct { 30 unsigned long rfx : 11; 31 unsigned long rsx : 11; 32 unsigned long rtx : 11; 33 unsigned long sx : 11; 34 unsigned long px : 8; 35 unsigned long bx : 12; 36 }; 37 struct { 38 unsigned long rfx01 : 2; 39 unsigned long : 9; 40 unsigned long rsx01 : 2; 41 unsigned long : 9; 42 unsigned long rtx01 : 2; 43 unsigned long : 9; 44 unsigned long sx01 : 2; 45 unsigned long : 29; 46 }; 47 }; 48 49 /* 50 * raddress union which will contain the result (real or absolute address) 51 * after a page table walk. The rfaa, sfaa and pfra members are used to 52 * simply assign them the value of a region, segment or page table entry. 53 */ 54 union raddress { 55 unsigned long addr; 56 unsigned long rfaa : 33; /* Region-Frame Absolute Address */ 57 unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ 58 unsigned long pfra : 52; /* Page-Frame Real Address */ 59 }; 60 61 union alet { 62 u32 val; 63 struct { 64 u32 reserved : 7; 65 u32 p : 1; 66 u32 alesn : 8; 67 u32 alen : 16; 68 }; 69 }; 70 71 union ald { 72 u32 val; 73 struct { 74 u32 : 1; 75 u32 alo : 24; 76 u32 all : 7; 77 }; 78 }; 79 80 struct ale { 81 unsigned long i : 1; /* ALEN-Invalid Bit */ 82 unsigned long : 5; 83 unsigned long fo : 1; /* Fetch-Only Bit */ 84 unsigned long p : 1; /* Private Bit */ 85 unsigned long alesn : 8; /* Access-List-Entry Sequence Number */ 86 unsigned long aleax : 16; /* Access-List-Entry Authorization Index */ 87 unsigned long : 32; 88 unsigned long : 1; 89 unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */ 90 unsigned long : 6; 91 unsigned long astesn : 32; /* ASTE Sequence Number */ 92 }; 93 94 struct aste { 95 unsigned long i : 1; /* ASX-Invalid Bit */ 96 unsigned long ato : 29; /* Authority-Table Origin */ 97 unsigned long : 1; 98 unsigned long b : 1; /* Base-Space Bit */ 99 unsigned long ax : 16; /* Authorization Index */ 100 unsigned long atl : 12; /* Authority-Table Length */ 101 unsigned long : 2; 102 unsigned long ca : 1; /* Controlled-ASN Bit */ 103 unsigned long ra : 1; /* Reusable-ASN Bit */ 104 unsigned long asce : 64; /* Address-Space-Control Element */ 105 unsigned long ald : 32; 106 unsigned long astesn : 32; 107 /* .. more fields there */ 108 }; 109 110 int ipte_lock_held(struct kvm *kvm) 111 { 112 if (sclp.has_siif) { 113 int rc; 114 115 read_lock(&kvm->arch.sca_lock); 116 rc = kvm_s390_get_ipte_control(kvm)->kh != 0; 117 read_unlock(&kvm->arch.sca_lock); 118 return rc; 119 } 120 return kvm->arch.ipte_lock_count != 0; 121 } 122 123 static void ipte_lock_simple(struct kvm *kvm) 124 { 125 union ipte_control old, new, *ic; 126 127 mutex_lock(&kvm->arch.ipte_mutex); 128 kvm->arch.ipte_lock_count++; 129 if (kvm->arch.ipte_lock_count > 1) 130 goto out; 131 retry: 132 read_lock(&kvm->arch.sca_lock); 133 ic = kvm_s390_get_ipte_control(kvm); 134 old = READ_ONCE(*ic); 135 do { 136 if (old.k) { 137 read_unlock(&kvm->arch.sca_lock); 138 cond_resched(); 139 goto retry; 140 } 141 new = old; 142 new.k = 1; 143 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 144 read_unlock(&kvm->arch.sca_lock); 145 out: 146 mutex_unlock(&kvm->arch.ipte_mutex); 147 } 148 149 static void ipte_unlock_simple(struct kvm *kvm) 150 { 151 union ipte_control old, new, *ic; 152 153 mutex_lock(&kvm->arch.ipte_mutex); 154 kvm->arch.ipte_lock_count--; 155 if (kvm->arch.ipte_lock_count) 156 goto out; 157 read_lock(&kvm->arch.sca_lock); 158 ic = kvm_s390_get_ipte_control(kvm); 159 old = READ_ONCE(*ic); 160 do { 161 new = old; 162 new.k = 0; 163 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 164 read_unlock(&kvm->arch.sca_lock); 165 wake_up(&kvm->arch.ipte_wq); 166 out: 167 mutex_unlock(&kvm->arch.ipte_mutex); 168 } 169 170 static void ipte_lock_siif(struct kvm *kvm) 171 { 172 union ipte_control old, new, *ic; 173 174 retry: 175 read_lock(&kvm->arch.sca_lock); 176 ic = kvm_s390_get_ipte_control(kvm); 177 old = READ_ONCE(*ic); 178 do { 179 if (old.kg) { 180 read_unlock(&kvm->arch.sca_lock); 181 cond_resched(); 182 goto retry; 183 } 184 new = old; 185 new.k = 1; 186 new.kh++; 187 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 188 read_unlock(&kvm->arch.sca_lock); 189 } 190 191 static void ipte_unlock_siif(struct kvm *kvm) 192 { 193 union ipte_control old, new, *ic; 194 195 read_lock(&kvm->arch.sca_lock); 196 ic = kvm_s390_get_ipte_control(kvm); 197 old = READ_ONCE(*ic); 198 do { 199 new = old; 200 new.kh--; 201 if (!new.kh) 202 new.k = 0; 203 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 204 read_unlock(&kvm->arch.sca_lock); 205 if (!new.kh) 206 wake_up(&kvm->arch.ipte_wq); 207 } 208 209 void ipte_lock(struct kvm *kvm) 210 { 211 if (sclp.has_siif) 212 ipte_lock_siif(kvm); 213 else 214 ipte_lock_simple(kvm); 215 } 216 217 void ipte_unlock(struct kvm *kvm) 218 { 219 if (sclp.has_siif) 220 ipte_unlock_siif(kvm); 221 else 222 ipte_unlock_simple(kvm); 223 } 224 225 static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, 226 enum gacc_mode mode) 227 { 228 union alet alet; 229 struct ale ale; 230 struct aste aste; 231 unsigned long ald_addr, authority_table_addr; 232 union ald ald; 233 int eax, rc; 234 u8 authority_table; 235 236 if (ar >= NUM_ACRS) 237 return -EINVAL; 238 239 if (vcpu->arch.acrs_loaded) 240 save_access_regs(vcpu->run->s.regs.acrs); 241 alet.val = vcpu->run->s.regs.acrs[ar]; 242 243 if (ar == 0 || alet.val == 0) { 244 asce->val = vcpu->arch.sie_block->gcr[1]; 245 return 0; 246 } else if (alet.val == 1) { 247 asce->val = vcpu->arch.sie_block->gcr[7]; 248 return 0; 249 } 250 251 if (alet.reserved) 252 return PGM_ALET_SPECIFICATION; 253 254 if (alet.p) 255 ald_addr = vcpu->arch.sie_block->gcr[5]; 256 else 257 ald_addr = vcpu->arch.sie_block->gcr[2]; 258 ald_addr &= 0x7fffffc0; 259 260 rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald)); 261 if (rc) 262 return rc; 263 264 if (alet.alen / 8 > ald.all) 265 return PGM_ALEN_TRANSLATION; 266 267 if (0x7fffffff - ald.alo * 128 < alet.alen * 16) 268 return PGM_ADDRESSING; 269 270 rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale, 271 sizeof(struct ale)); 272 if (rc) 273 return rc; 274 275 if (ale.i == 1) 276 return PGM_ALEN_TRANSLATION; 277 if (ale.alesn != alet.alesn) 278 return PGM_ALE_SEQUENCE; 279 280 rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste)); 281 if (rc) 282 return rc; 283 284 if (aste.i) 285 return PGM_ASTE_VALIDITY; 286 if (aste.astesn != ale.astesn) 287 return PGM_ASTE_SEQUENCE; 288 289 if (ale.p == 1) { 290 eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff; 291 if (ale.aleax != eax) { 292 if (eax / 16 > aste.atl) 293 return PGM_EXTENDED_AUTHORITY; 294 295 authority_table_addr = aste.ato * 4 + eax / 4; 296 297 rc = read_guest_real(vcpu, authority_table_addr, 298 &authority_table, 299 sizeof(u8)); 300 if (rc) 301 return rc; 302 303 if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0) 304 return PGM_EXTENDED_AUTHORITY; 305 } 306 } 307 308 if (ale.fo == 1 && mode == GACC_STORE) 309 return PGM_PROTECTION; 310 311 asce->val = aste.asce; 312 return 0; 313 } 314 315 enum prot_type { 316 PROT_TYPE_LA = 0, 317 PROT_TYPE_KEYC = 1, 318 PROT_TYPE_ALC = 2, 319 PROT_TYPE_DAT = 3, 320 PROT_TYPE_IEP = 4, 321 /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ 322 PROT_TYPE_DUMMY, 323 }; 324 325 static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, 326 enum gacc_mode mode, enum prot_type prot, bool terminate) 327 { 328 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; 329 union teid *teid; 330 331 memset(pgm, 0, sizeof(*pgm)); 332 pgm->code = code; 333 teid = (union teid *)&pgm->trans_exc_code; 334 335 switch (code) { 336 case PGM_PROTECTION: 337 switch (prot) { 338 case PROT_TYPE_DUMMY: 339 /* We should never get here, acts like termination */ 340 WARN_ON_ONCE(1); 341 break; 342 case PROT_TYPE_IEP: 343 teid->b61 = 1; 344 fallthrough; 345 case PROT_TYPE_LA: 346 teid->b56 = 1; 347 break; 348 case PROT_TYPE_KEYC: 349 teid->b60 = 1; 350 break; 351 case PROT_TYPE_ALC: 352 teid->b60 = 1; 353 fallthrough; 354 case PROT_TYPE_DAT: 355 teid->b61 = 1; 356 break; 357 } 358 if (terminate) { 359 teid->b56 = 0; 360 teid->b60 = 0; 361 teid->b61 = 0; 362 } 363 fallthrough; 364 case PGM_ASCE_TYPE: 365 case PGM_PAGE_TRANSLATION: 366 case PGM_REGION_FIRST_TRANS: 367 case PGM_REGION_SECOND_TRANS: 368 case PGM_REGION_THIRD_TRANS: 369 case PGM_SEGMENT_TRANSLATION: 370 /* 371 * op_access_id only applies to MOVE_PAGE -> set bit 61 372 * exc_access_id has to be set to 0 for some instructions. Both 373 * cases have to be handled by the caller. 374 */ 375 teid->addr = gva >> PAGE_SHIFT; 376 teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH; 377 teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as; 378 fallthrough; 379 case PGM_ALEN_TRANSLATION: 380 case PGM_ALE_SEQUENCE: 381 case PGM_ASTE_VALIDITY: 382 case PGM_ASTE_SEQUENCE: 383 case PGM_EXTENDED_AUTHORITY: 384 /* 385 * We can always store exc_access_id, as it is 386 * undefined for non-ar cases. It is undefined for 387 * most DAT protection exceptions. 388 */ 389 pgm->exc_access_id = ar; 390 break; 391 } 392 return code; 393 } 394 395 static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, 396 enum gacc_mode mode, enum prot_type prot) 397 { 398 return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false); 399 } 400 401 static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, 402 unsigned long ga, u8 ar, enum gacc_mode mode) 403 { 404 int rc; 405 struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); 406 407 if (!psw.dat) { 408 asce->val = 0; 409 asce->r = 1; 410 return 0; 411 } 412 413 if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME)) 414 psw.as = PSW_BITS_AS_PRIMARY; 415 416 switch (psw.as) { 417 case PSW_BITS_AS_PRIMARY: 418 asce->val = vcpu->arch.sie_block->gcr[1]; 419 return 0; 420 case PSW_BITS_AS_SECONDARY: 421 asce->val = vcpu->arch.sie_block->gcr[7]; 422 return 0; 423 case PSW_BITS_AS_HOME: 424 asce->val = vcpu->arch.sie_block->gcr[13]; 425 return 0; 426 case PSW_BITS_AS_ACCREG: 427 rc = ar_translation(vcpu, asce, ar, mode); 428 if (rc > 0) 429 return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC); 430 return rc; 431 } 432 return 0; 433 } 434 435 static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) 436 { 437 return kvm_read_guest(kvm, gpa, val, sizeof(*val)); 438 } 439 440 /** 441 * guest_translate - translate a guest virtual into a guest absolute address 442 * @vcpu: virtual cpu 443 * @gva: guest virtual address 444 * @gpa: points to where guest physical (absolute) address should be stored 445 * @asce: effective asce 446 * @mode: indicates the access mode to be used 447 * @prot: returns the type for protection exceptions 448 * 449 * Translate a guest virtual address into a guest absolute address by means 450 * of dynamic address translation as specified by the architecture. 451 * If the resulting absolute address is not available in the configuration 452 * an addressing exception is indicated and @gpa will not be changed. 453 * 454 * Returns: - zero on success; @gpa contains the resulting absolute address 455 * - a negative value if guest access failed due to e.g. broken 456 * guest mapping 457 * - a positive value if an access exception happened. In this case 458 * the returned value is the program interruption code as defined 459 * by the architecture 460 */ 461 static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, 462 unsigned long *gpa, const union asce asce, 463 enum gacc_mode mode, enum prot_type *prot) 464 { 465 union vaddress vaddr = {.addr = gva}; 466 union raddress raddr = {.addr = gva}; 467 union page_table_entry pte; 468 int dat_protection = 0; 469 int iep_protection = 0; 470 union ctlreg0 ctlreg0; 471 unsigned long ptr; 472 int edat1, edat2, iep; 473 474 ctlreg0.val = vcpu->arch.sie_block->gcr[0]; 475 edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); 476 edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); 477 iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130); 478 if (asce.r) 479 goto real_address; 480 ptr = asce.rsto * PAGE_SIZE; 481 switch (asce.dt) { 482 case ASCE_TYPE_REGION1: 483 if (vaddr.rfx01 > asce.tl) 484 return PGM_REGION_FIRST_TRANS; 485 ptr += vaddr.rfx * 8; 486 break; 487 case ASCE_TYPE_REGION2: 488 if (vaddr.rfx) 489 return PGM_ASCE_TYPE; 490 if (vaddr.rsx01 > asce.tl) 491 return PGM_REGION_SECOND_TRANS; 492 ptr += vaddr.rsx * 8; 493 break; 494 case ASCE_TYPE_REGION3: 495 if (vaddr.rfx || vaddr.rsx) 496 return PGM_ASCE_TYPE; 497 if (vaddr.rtx01 > asce.tl) 498 return PGM_REGION_THIRD_TRANS; 499 ptr += vaddr.rtx * 8; 500 break; 501 case ASCE_TYPE_SEGMENT: 502 if (vaddr.rfx || vaddr.rsx || vaddr.rtx) 503 return PGM_ASCE_TYPE; 504 if (vaddr.sx01 > asce.tl) 505 return PGM_SEGMENT_TRANSLATION; 506 ptr += vaddr.sx * 8; 507 break; 508 } 509 switch (asce.dt) { 510 case ASCE_TYPE_REGION1: { 511 union region1_table_entry rfte; 512 513 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 514 return PGM_ADDRESSING; 515 if (deref_table(vcpu->kvm, ptr, &rfte.val)) 516 return -EFAULT; 517 if (rfte.i) 518 return PGM_REGION_FIRST_TRANS; 519 if (rfte.tt != TABLE_TYPE_REGION1) 520 return PGM_TRANSLATION_SPEC; 521 if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) 522 return PGM_REGION_SECOND_TRANS; 523 if (edat1) 524 dat_protection |= rfte.p; 525 ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8; 526 } 527 fallthrough; 528 case ASCE_TYPE_REGION2: { 529 union region2_table_entry rste; 530 531 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 532 return PGM_ADDRESSING; 533 if (deref_table(vcpu->kvm, ptr, &rste.val)) 534 return -EFAULT; 535 if (rste.i) 536 return PGM_REGION_SECOND_TRANS; 537 if (rste.tt != TABLE_TYPE_REGION2) 538 return PGM_TRANSLATION_SPEC; 539 if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) 540 return PGM_REGION_THIRD_TRANS; 541 if (edat1) 542 dat_protection |= rste.p; 543 ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8; 544 } 545 fallthrough; 546 case ASCE_TYPE_REGION3: { 547 union region3_table_entry rtte; 548 549 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 550 return PGM_ADDRESSING; 551 if (deref_table(vcpu->kvm, ptr, &rtte.val)) 552 return -EFAULT; 553 if (rtte.i) 554 return PGM_REGION_THIRD_TRANS; 555 if (rtte.tt != TABLE_TYPE_REGION3) 556 return PGM_TRANSLATION_SPEC; 557 if (rtte.cr && asce.p && edat2) 558 return PGM_TRANSLATION_SPEC; 559 if (rtte.fc && edat2) { 560 dat_protection |= rtte.fc1.p; 561 iep_protection = rtte.fc1.iep; 562 raddr.rfaa = rtte.fc1.rfaa; 563 goto absolute_address; 564 } 565 if (vaddr.sx01 < rtte.fc0.tf) 566 return PGM_SEGMENT_TRANSLATION; 567 if (vaddr.sx01 > rtte.fc0.tl) 568 return PGM_SEGMENT_TRANSLATION; 569 if (edat1) 570 dat_protection |= rtte.fc0.p; 571 ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8; 572 } 573 fallthrough; 574 case ASCE_TYPE_SEGMENT: { 575 union segment_table_entry ste; 576 577 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 578 return PGM_ADDRESSING; 579 if (deref_table(vcpu->kvm, ptr, &ste.val)) 580 return -EFAULT; 581 if (ste.i) 582 return PGM_SEGMENT_TRANSLATION; 583 if (ste.tt != TABLE_TYPE_SEGMENT) 584 return PGM_TRANSLATION_SPEC; 585 if (ste.cs && asce.p) 586 return PGM_TRANSLATION_SPEC; 587 if (ste.fc && edat1) { 588 dat_protection |= ste.fc1.p; 589 iep_protection = ste.fc1.iep; 590 raddr.sfaa = ste.fc1.sfaa; 591 goto absolute_address; 592 } 593 dat_protection |= ste.fc0.p; 594 ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8; 595 } 596 } 597 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 598 return PGM_ADDRESSING; 599 if (deref_table(vcpu->kvm, ptr, &pte.val)) 600 return -EFAULT; 601 if (pte.i) 602 return PGM_PAGE_TRANSLATION; 603 if (pte.z) 604 return PGM_TRANSLATION_SPEC; 605 dat_protection |= pte.p; 606 iep_protection = pte.iep; 607 raddr.pfra = pte.pfra; 608 real_address: 609 raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); 610 absolute_address: 611 if (mode == GACC_STORE && dat_protection) { 612 *prot = PROT_TYPE_DAT; 613 return PGM_PROTECTION; 614 } 615 if (mode == GACC_IFETCH && iep_protection && iep) { 616 *prot = PROT_TYPE_IEP; 617 return PGM_PROTECTION; 618 } 619 if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr)) 620 return PGM_ADDRESSING; 621 *gpa = raddr.addr; 622 return 0; 623 } 624 625 static inline int is_low_address(unsigned long ga) 626 { 627 /* Check for address ranges 0..511 and 4096..4607 */ 628 return (ga & ~0x11fful) == 0; 629 } 630 631 static int low_address_protection_enabled(struct kvm_vcpu *vcpu, 632 const union asce asce) 633 { 634 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; 635 psw_t *psw = &vcpu->arch.sie_block->gpsw; 636 637 if (!ctlreg0.lap) 638 return 0; 639 if (psw_bits(*psw).dat && asce.p) 640 return 0; 641 return 1; 642 } 643 644 static int vm_check_access_key(struct kvm *kvm, u8 access_key, 645 enum gacc_mode mode, gpa_t gpa) 646 { 647 u8 storage_key, access_control; 648 bool fetch_protected; 649 unsigned long hva; 650 int r; 651 652 if (access_key == 0) 653 return 0; 654 655 hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); 656 if (kvm_is_error_hva(hva)) 657 return PGM_ADDRESSING; 658 659 mmap_read_lock(current->mm); 660 r = get_guest_storage_key(current->mm, hva, &storage_key); 661 mmap_read_unlock(current->mm); 662 if (r) 663 return r; 664 access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); 665 if (access_control == access_key) 666 return 0; 667 fetch_protected = storage_key & _PAGE_FP_BIT; 668 if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected) 669 return 0; 670 return PGM_PROTECTION; 671 } 672 673 static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, 674 union asce asce) 675 { 676 psw_t *psw = &vcpu->arch.sie_block->gpsw; 677 unsigned long override; 678 679 if (mode == GACC_FETCH || mode == GACC_IFETCH) { 680 /* check if fetch protection override enabled */ 681 override = vcpu->arch.sie_block->gcr[0]; 682 override &= CR0_FETCH_PROTECTION_OVERRIDE; 683 /* not applicable if subject to DAT && private space */ 684 override = override && !(psw_bits(*psw).dat && asce.p); 685 return override; 686 } 687 return false; 688 } 689 690 static bool fetch_prot_override_applies(unsigned long ga, unsigned int len) 691 { 692 return ga < 2048 && ga + len <= 2048; 693 } 694 695 static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu) 696 { 697 /* check if storage protection override enabled */ 698 return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE; 699 } 700 701 static bool storage_prot_override_applies(u8 access_control) 702 { 703 /* matches special storage protection override key (9) -> allow */ 704 return access_control == PAGE_SPO_ACC; 705 } 706 707 static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key, 708 enum gacc_mode mode, union asce asce, gpa_t gpa, 709 unsigned long ga, unsigned int len) 710 { 711 u8 storage_key, access_control; 712 unsigned long hva; 713 int r; 714 715 /* access key 0 matches any storage key -> allow */ 716 if (access_key == 0) 717 return 0; 718 /* 719 * caller needs to ensure that gfn is accessible, so we can 720 * assume that this cannot fail 721 */ 722 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa)); 723 mmap_read_lock(current->mm); 724 r = get_guest_storage_key(current->mm, hva, &storage_key); 725 mmap_read_unlock(current->mm); 726 if (r) 727 return r; 728 access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); 729 /* access key matches storage key -> allow */ 730 if (access_control == access_key) 731 return 0; 732 if (mode == GACC_FETCH || mode == GACC_IFETCH) { 733 /* it is a fetch and fetch protection is off -> allow */ 734 if (!(storage_key & _PAGE_FP_BIT)) 735 return 0; 736 if (fetch_prot_override_applicable(vcpu, mode, asce) && 737 fetch_prot_override_applies(ga, len)) 738 return 0; 739 } 740 if (storage_prot_override_applicable(vcpu) && 741 storage_prot_override_applies(access_control)) 742 return 0; 743 return PGM_PROTECTION; 744 } 745 746 /** 747 * guest_range_to_gpas() - Calculate guest physical addresses of page fragments 748 * covering a logical range 749 * @vcpu: virtual cpu 750 * @ga: guest address, start of range 751 * @ar: access register 752 * @gpas: output argument, may be NULL 753 * @len: length of range in bytes 754 * @asce: address-space-control element to use for translation 755 * @mode: access mode 756 * @access_key: access key to mach the range's storage keys against 757 * 758 * Translate a logical range to a series of guest absolute addresses, 759 * such that the concatenation of page fragments starting at each gpa make up 760 * the whole range. 761 * The translation is performed as if done by the cpu for the given @asce, @ar, 762 * @mode and state of the @vcpu. 763 * If the translation causes an exception, its program interruption code is 764 * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified 765 * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject 766 * a correct exception into the guest. 767 * The resulting gpas are stored into @gpas, unless it is NULL. 768 * 769 * Note: All fragments except the first one start at the beginning of a page. 770 * When deriving the boundaries of a fragment from a gpa, all but the last 771 * fragment end at the end of the page. 772 * 773 * Return: 774 * * 0 - success 775 * * <0 - translation could not be performed, for example if guest 776 * memory could not be accessed 777 * * >0 - an access exception occurred. In this case the returned value 778 * is the program interruption code and the contents of pgm may 779 * be used to inject an exception into the guest. 780 */ 781 static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, 782 unsigned long *gpas, unsigned long len, 783 const union asce asce, enum gacc_mode mode, 784 u8 access_key) 785 { 786 psw_t *psw = &vcpu->arch.sie_block->gpsw; 787 unsigned int offset = offset_in_page(ga); 788 unsigned int fragment_len; 789 int lap_enabled, rc = 0; 790 enum prot_type prot; 791 unsigned long gpa; 792 793 lap_enabled = low_address_protection_enabled(vcpu, asce); 794 while (min(PAGE_SIZE - offset, len) > 0) { 795 fragment_len = min(PAGE_SIZE - offset, len); 796 ga = kvm_s390_logical_to_effective(vcpu, ga); 797 if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) 798 return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, 799 PROT_TYPE_LA); 800 if (psw_bits(*psw).dat) { 801 rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot); 802 if (rc < 0) 803 return rc; 804 } else { 805 gpa = kvm_s390_real_to_abs(vcpu, ga); 806 if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) { 807 rc = PGM_ADDRESSING; 808 prot = PROT_TYPE_DUMMY; 809 } 810 } 811 if (rc) 812 return trans_exc(vcpu, rc, ga, ar, mode, prot); 813 rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga, 814 fragment_len); 815 if (rc) 816 return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC); 817 if (gpas) 818 *gpas++ = gpa; 819 offset = 0; 820 ga += fragment_len; 821 len -= fragment_len; 822 } 823 return 0; 824 } 825 826 static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, 827 void *data, unsigned int len) 828 { 829 const unsigned int offset = offset_in_page(gpa); 830 const gfn_t gfn = gpa_to_gfn(gpa); 831 int rc; 832 833 if (!gfn_to_memslot(kvm, gfn)) 834 return PGM_ADDRESSING; 835 if (mode == GACC_STORE) 836 rc = kvm_write_guest_page(kvm, gfn, data, offset, len); 837 else 838 rc = kvm_read_guest_page(kvm, gfn, data, offset, len); 839 return rc; 840 } 841 842 static int 843 access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, 844 void *data, unsigned int len, u8 access_key) 845 { 846 struct kvm_memory_slot *slot; 847 bool writable; 848 gfn_t gfn; 849 hva_t hva; 850 int rc; 851 852 gfn = gpa >> PAGE_SHIFT; 853 slot = gfn_to_memslot(kvm, gfn); 854 hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); 855 856 if (kvm_is_error_hva(hva)) 857 return PGM_ADDRESSING; 858 /* 859 * Check if it's a ro memslot, even tho that can't occur (they're unsupported). 860 * Don't try to actually handle that case. 861 */ 862 if (!writable && mode == GACC_STORE) 863 return -EOPNOTSUPP; 864 hva += offset_in_page(gpa); 865 if (mode == GACC_STORE) 866 rc = copy_to_user_key((void __user *)hva, data, len, access_key); 867 else 868 rc = copy_from_user_key(data, (void __user *)hva, len, access_key); 869 if (rc) 870 return PGM_PROTECTION; 871 if (mode == GACC_STORE) 872 mark_page_dirty_in_slot(kvm, slot, gfn); 873 return 0; 874 } 875 876 int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, 877 unsigned long len, enum gacc_mode mode, u8 access_key) 878 { 879 int offset = offset_in_page(gpa); 880 int fragment_len; 881 int rc; 882 883 while (min(PAGE_SIZE - offset, len) > 0) { 884 fragment_len = min(PAGE_SIZE - offset, len); 885 rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key); 886 if (rc) 887 return rc; 888 offset = 0; 889 len -= fragment_len; 890 data += fragment_len; 891 gpa += fragment_len; 892 } 893 return 0; 894 } 895 896 int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, 897 void *data, unsigned long len, enum gacc_mode mode, 898 u8 access_key) 899 { 900 psw_t *psw = &vcpu->arch.sie_block->gpsw; 901 unsigned long nr_pages, idx; 902 unsigned long gpa_array[2]; 903 unsigned int fragment_len; 904 unsigned long *gpas; 905 enum prot_type prot; 906 int need_ipte_lock; 907 union asce asce; 908 bool try_storage_prot_override; 909 bool try_fetch_prot_override; 910 int rc; 911 912 if (!len) 913 return 0; 914 ga = kvm_s390_logical_to_effective(vcpu, ga); 915 rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode); 916 if (rc) 917 return rc; 918 nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; 919 gpas = gpa_array; 920 if (nr_pages > ARRAY_SIZE(gpa_array)) 921 gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); 922 if (!gpas) 923 return -ENOMEM; 924 try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce); 925 try_storage_prot_override = storage_prot_override_applicable(vcpu); 926 need_ipte_lock = psw_bits(*psw).dat && !asce.r; 927 if (need_ipte_lock) 928 ipte_lock(vcpu->kvm); 929 /* 930 * Since we do the access further down ultimately via a move instruction 931 * that does key checking and returns an error in case of a protection 932 * violation, we don't need to do the check during address translation. 933 * Skip it by passing access key 0, which matches any storage key, 934 * obviating the need for any further checks. As a result the check is 935 * handled entirely in hardware on access, we only need to take care to 936 * forego key protection checking if fetch protection override applies or 937 * retry with the special key 9 in case of storage protection override. 938 */ 939 rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0); 940 if (rc) 941 goto out_unlock; 942 for (idx = 0; idx < nr_pages; idx++) { 943 fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); 944 if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) { 945 rc = access_guest_page(vcpu->kvm, mode, gpas[idx], 946 data, fragment_len); 947 } else { 948 rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], 949 data, fragment_len, access_key); 950 } 951 if (rc == PGM_PROTECTION && try_storage_prot_override) 952 rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], 953 data, fragment_len, PAGE_SPO_ACC); 954 if (rc) 955 break; 956 len -= fragment_len; 957 data += fragment_len; 958 ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len); 959 } 960 if (rc > 0) { 961 bool terminate = (mode == GACC_STORE) && (idx > 0); 962 963 if (rc == PGM_PROTECTION) 964 prot = PROT_TYPE_KEYC; 965 else 966 prot = PROT_TYPE_DUMMY; 967 rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); 968 } 969 out_unlock: 970 if (need_ipte_lock) 971 ipte_unlock(vcpu->kvm); 972 if (nr_pages > ARRAY_SIZE(gpa_array)) 973 vfree(gpas); 974 return rc; 975 } 976 977 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, 978 void *data, unsigned long len, enum gacc_mode mode) 979 { 980 unsigned int fragment_len; 981 unsigned long gpa; 982 int rc = 0; 983 984 while (len && !rc) { 985 gpa = kvm_s390_real_to_abs(vcpu, gra); 986 fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); 987 rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len); 988 len -= fragment_len; 989 gra += fragment_len; 990 data += fragment_len; 991 } 992 if (rc > 0) 993 vcpu->arch.pgm.code = rc; 994 return rc; 995 } 996 997 /** 998 * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address. 999 * @kvm: Virtual machine instance. 1000 * @gpa: Absolute guest address of the location to be changed. 1001 * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a 1002 * non power of two will result in failure. 1003 * @old_addr: Pointer to old value. If the location at @gpa contains this value, 1004 * the exchange will succeed. After calling cmpxchg_guest_abs_with_key() 1005 * *@old_addr contains the value at @gpa before the attempt to 1006 * exchange the value. 1007 * @new: The value to place at @gpa. 1008 * @access_key: The access key to use for the guest access. 1009 * @success: output value indicating if an exchange occurred. 1010 * 1011 * Atomically exchange the value at @gpa by @new, if it contains *@old. 1012 * Honors storage keys. 1013 * 1014 * Return: * 0: successful exchange 1015 * * >0: a program interruption code indicating the reason cmpxchg could 1016 * not be attempted 1017 * * -EINVAL: address misaligned or len not power of two 1018 * * -EAGAIN: transient failure (len 1 or 2) 1019 * * -EOPNOTSUPP: read-only memslot (should never occur) 1020 */ 1021 int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, 1022 __uint128_t *old_addr, __uint128_t new, 1023 u8 access_key, bool *success) 1024 { 1025 gfn_t gfn = gpa_to_gfn(gpa); 1026 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 1027 bool writable; 1028 hva_t hva; 1029 int ret; 1030 1031 if (!IS_ALIGNED(gpa, len)) 1032 return -EINVAL; 1033 1034 hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); 1035 if (kvm_is_error_hva(hva)) 1036 return PGM_ADDRESSING; 1037 /* 1038 * Check if it's a read-only memslot, even though that cannot occur 1039 * since those are unsupported. 1040 * Don't try to actually handle that case. 1041 */ 1042 if (!writable) 1043 return -EOPNOTSUPP; 1044 1045 hva += offset_in_page(gpa); 1046 /* 1047 * The cmpxchg_user_key macro depends on the type of "old", so we need 1048 * a case for each valid length and get some code duplication as long 1049 * as we don't introduce a new macro. 1050 */ 1051 switch (len) { 1052 case 1: { 1053 u8 old; 1054 1055 ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key); 1056 *success = !ret && old == *old_addr; 1057 *old_addr = old; 1058 break; 1059 } 1060 case 2: { 1061 u16 old; 1062 1063 ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key); 1064 *success = !ret && old == *old_addr; 1065 *old_addr = old; 1066 break; 1067 } 1068 case 4: { 1069 u32 old; 1070 1071 ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key); 1072 *success = !ret && old == *old_addr; 1073 *old_addr = old; 1074 break; 1075 } 1076 case 8: { 1077 u64 old; 1078 1079 ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key); 1080 *success = !ret && old == *old_addr; 1081 *old_addr = old; 1082 break; 1083 } 1084 case 16: { 1085 __uint128_t old; 1086 1087 ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key); 1088 *success = !ret && old == *old_addr; 1089 *old_addr = old; 1090 break; 1091 } 1092 default: 1093 return -EINVAL; 1094 } 1095 if (*success) 1096 mark_page_dirty_in_slot(kvm, slot, gfn); 1097 /* 1098 * Assume that the fault is caused by protection, either key protection 1099 * or user page write protection. 1100 */ 1101 if (ret == -EFAULT) 1102 ret = PGM_PROTECTION; 1103 return ret; 1104 } 1105 1106 /** 1107 * guest_translate_address_with_key - translate guest logical into guest absolute address 1108 * @vcpu: virtual cpu 1109 * @gva: Guest virtual address 1110 * @ar: Access register 1111 * @gpa: Guest physical address 1112 * @mode: Translation access mode 1113 * @access_key: access key to mach the storage key with 1114 * 1115 * Parameter semantics are the same as the ones from guest_translate. 1116 * The memory contents at the guest address are not changed. 1117 * 1118 * Note: The IPTE lock is not taken during this function, so the caller 1119 * has to take care of this. 1120 */ 1121 int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, 1122 unsigned long *gpa, enum gacc_mode mode, 1123 u8 access_key) 1124 { 1125 union asce asce; 1126 int rc; 1127 1128 gva = kvm_s390_logical_to_effective(vcpu, gva); 1129 rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); 1130 if (rc) 1131 return rc; 1132 return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode, 1133 access_key); 1134 } 1135 1136 /** 1137 * check_gva_range - test a range of guest virtual addresses for accessibility 1138 * @vcpu: virtual cpu 1139 * @gva: Guest virtual address 1140 * @ar: Access register 1141 * @length: Length of test range 1142 * @mode: Translation access mode 1143 * @access_key: access key to mach the storage keys with 1144 */ 1145 int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, 1146 unsigned long length, enum gacc_mode mode, u8 access_key) 1147 { 1148 union asce asce; 1149 int rc = 0; 1150 1151 rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); 1152 if (rc) 1153 return rc; 1154 ipte_lock(vcpu->kvm); 1155 rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode, 1156 access_key); 1157 ipte_unlock(vcpu->kvm); 1158 1159 return rc; 1160 } 1161 1162 /** 1163 * check_gpa_range - test a range of guest physical addresses for accessibility 1164 * @kvm: virtual machine instance 1165 * @gpa: guest physical address 1166 * @length: length of test range 1167 * @mode: access mode to test, relevant for storage keys 1168 * @access_key: access key to mach the storage keys with 1169 */ 1170 int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, 1171 enum gacc_mode mode, u8 access_key) 1172 { 1173 unsigned int fragment_len; 1174 int rc = 0; 1175 1176 while (length && !rc) { 1177 fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length); 1178 rc = vm_check_access_key(kvm, access_key, mode, gpa); 1179 length -= fragment_len; 1180 gpa += fragment_len; 1181 } 1182 return rc; 1183 } 1184 1185 /** 1186 * kvm_s390_check_low_addr_prot_real - check for low-address protection 1187 * @vcpu: virtual cpu 1188 * @gra: Guest real address 1189 * 1190 * Checks whether an address is subject to low-address protection and set 1191 * up vcpu->arch.pgm accordingly if necessary. 1192 * 1193 * Return: 0 if no protection exception, or PGM_PROTECTION if protected. 1194 */ 1195 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) 1196 { 1197 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; 1198 1199 if (!ctlreg0.lap || !is_low_address(gra)) 1200 return 0; 1201 return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA); 1202 } 1203 1204 /** 1205 * kvm_s390_shadow_tables - walk the guest page table and create shadow tables 1206 * @sg: pointer to the shadow guest address space structure 1207 * @saddr: faulting address in the shadow gmap 1208 * @pgt: pointer to the beginning of the page table for the given address if 1209 * successful (return value 0), or to the first invalid DAT entry in 1210 * case of exceptions (return value > 0) 1211 * @dat_protection: referenced memory is write protected 1212 * @fake: pgt references contiguous guest memory block, not a pgtable 1213 */ 1214 static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, 1215 unsigned long *pgt, int *dat_protection, 1216 int *fake) 1217 { 1218 struct kvm *kvm; 1219 struct gmap *parent; 1220 union asce asce; 1221 union vaddress vaddr; 1222 unsigned long ptr; 1223 int rc; 1224 1225 *fake = 0; 1226 *dat_protection = 0; 1227 kvm = sg->private; 1228 parent = sg->parent; 1229 vaddr.addr = saddr; 1230 asce.val = sg->orig_asce; 1231 ptr = asce.rsto * PAGE_SIZE; 1232 if (asce.r) { 1233 *fake = 1; 1234 ptr = 0; 1235 asce.dt = ASCE_TYPE_REGION1; 1236 } 1237 switch (asce.dt) { 1238 case ASCE_TYPE_REGION1: 1239 if (vaddr.rfx01 > asce.tl && !*fake) 1240 return PGM_REGION_FIRST_TRANS; 1241 break; 1242 case ASCE_TYPE_REGION2: 1243 if (vaddr.rfx) 1244 return PGM_ASCE_TYPE; 1245 if (vaddr.rsx01 > asce.tl) 1246 return PGM_REGION_SECOND_TRANS; 1247 break; 1248 case ASCE_TYPE_REGION3: 1249 if (vaddr.rfx || vaddr.rsx) 1250 return PGM_ASCE_TYPE; 1251 if (vaddr.rtx01 > asce.tl) 1252 return PGM_REGION_THIRD_TRANS; 1253 break; 1254 case ASCE_TYPE_SEGMENT: 1255 if (vaddr.rfx || vaddr.rsx || vaddr.rtx) 1256 return PGM_ASCE_TYPE; 1257 if (vaddr.sx01 > asce.tl) 1258 return PGM_SEGMENT_TRANSLATION; 1259 break; 1260 } 1261 1262 switch (asce.dt) { 1263 case ASCE_TYPE_REGION1: { 1264 union region1_table_entry rfte; 1265 1266 if (*fake) { 1267 ptr += vaddr.rfx * _REGION1_SIZE; 1268 rfte.val = ptr; 1269 goto shadow_r2t; 1270 } 1271 *pgt = ptr + vaddr.rfx * 8; 1272 rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val); 1273 if (rc) 1274 return rc; 1275 if (rfte.i) 1276 return PGM_REGION_FIRST_TRANS; 1277 if (rfte.tt != TABLE_TYPE_REGION1) 1278 return PGM_TRANSLATION_SPEC; 1279 if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) 1280 return PGM_REGION_SECOND_TRANS; 1281 if (sg->edat_level >= 1) 1282 *dat_protection |= rfte.p; 1283 ptr = rfte.rto * PAGE_SIZE; 1284 shadow_r2t: 1285 rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); 1286 if (rc) 1287 return rc; 1288 kvm->stat.gmap_shadow_r1_entry++; 1289 } 1290 fallthrough; 1291 case ASCE_TYPE_REGION2: { 1292 union region2_table_entry rste; 1293 1294 if (*fake) { 1295 ptr += vaddr.rsx * _REGION2_SIZE; 1296 rste.val = ptr; 1297 goto shadow_r3t; 1298 } 1299 *pgt = ptr + vaddr.rsx * 8; 1300 rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val); 1301 if (rc) 1302 return rc; 1303 if (rste.i) 1304 return PGM_REGION_SECOND_TRANS; 1305 if (rste.tt != TABLE_TYPE_REGION2) 1306 return PGM_TRANSLATION_SPEC; 1307 if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) 1308 return PGM_REGION_THIRD_TRANS; 1309 if (sg->edat_level >= 1) 1310 *dat_protection |= rste.p; 1311 ptr = rste.rto * PAGE_SIZE; 1312 shadow_r3t: 1313 rste.p |= *dat_protection; 1314 rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); 1315 if (rc) 1316 return rc; 1317 kvm->stat.gmap_shadow_r2_entry++; 1318 } 1319 fallthrough; 1320 case ASCE_TYPE_REGION3: { 1321 union region3_table_entry rtte; 1322 1323 if (*fake) { 1324 ptr += vaddr.rtx * _REGION3_SIZE; 1325 rtte.val = ptr; 1326 goto shadow_sgt; 1327 } 1328 *pgt = ptr + vaddr.rtx * 8; 1329 rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val); 1330 if (rc) 1331 return rc; 1332 if (rtte.i) 1333 return PGM_REGION_THIRD_TRANS; 1334 if (rtte.tt != TABLE_TYPE_REGION3) 1335 return PGM_TRANSLATION_SPEC; 1336 if (rtte.cr && asce.p && sg->edat_level >= 2) 1337 return PGM_TRANSLATION_SPEC; 1338 if (rtte.fc && sg->edat_level >= 2) { 1339 *dat_protection |= rtte.fc0.p; 1340 *fake = 1; 1341 ptr = rtte.fc1.rfaa * _REGION3_SIZE; 1342 rtte.val = ptr; 1343 goto shadow_sgt; 1344 } 1345 if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl) 1346 return PGM_SEGMENT_TRANSLATION; 1347 if (sg->edat_level >= 1) 1348 *dat_protection |= rtte.fc0.p; 1349 ptr = rtte.fc0.sto * PAGE_SIZE; 1350 shadow_sgt: 1351 rtte.fc0.p |= *dat_protection; 1352 rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); 1353 if (rc) 1354 return rc; 1355 kvm->stat.gmap_shadow_r3_entry++; 1356 } 1357 fallthrough; 1358 case ASCE_TYPE_SEGMENT: { 1359 union segment_table_entry ste; 1360 1361 if (*fake) { 1362 ptr += vaddr.sx * _SEGMENT_SIZE; 1363 ste.val = ptr; 1364 goto shadow_pgt; 1365 } 1366 *pgt = ptr + vaddr.sx * 8; 1367 rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val); 1368 if (rc) 1369 return rc; 1370 if (ste.i) 1371 return PGM_SEGMENT_TRANSLATION; 1372 if (ste.tt != TABLE_TYPE_SEGMENT) 1373 return PGM_TRANSLATION_SPEC; 1374 if (ste.cs && asce.p) 1375 return PGM_TRANSLATION_SPEC; 1376 *dat_protection |= ste.fc0.p; 1377 if (ste.fc && sg->edat_level >= 1) { 1378 *fake = 1; 1379 ptr = ste.fc1.sfaa * _SEGMENT_SIZE; 1380 ste.val = ptr; 1381 goto shadow_pgt; 1382 } 1383 ptr = ste.fc0.pto * (PAGE_SIZE / 2); 1384 shadow_pgt: 1385 ste.fc0.p |= *dat_protection; 1386 rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); 1387 if (rc) 1388 return rc; 1389 kvm->stat.gmap_shadow_sg_entry++; 1390 } 1391 } 1392 /* Return the parent address of the page table */ 1393 *pgt = ptr; 1394 return 0; 1395 } 1396 1397 /** 1398 * shadow_pgt_lookup() - find a shadow page table 1399 * @sg: pointer to the shadow guest address space structure 1400 * @saddr: the address in the shadow aguest address space 1401 * @pgt: parent gmap address of the page table to get shadowed 1402 * @dat_protection: if the pgtable is marked as protected by dat 1403 * @fake: pgt references contiguous guest memory block, not a pgtable 1404 * 1405 * Returns 0 if the shadow page table was found and -EAGAIN if the page 1406 * table was not found. 1407 * 1408 * Called with sg->mm->mmap_lock in read. 1409 */ 1410 static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, 1411 int *dat_protection, int *fake) 1412 { 1413 unsigned long pt_index; 1414 unsigned long *table; 1415 struct page *page; 1416 int rc; 1417 1418 spin_lock(&sg->guest_table_lock); 1419 table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ 1420 if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { 1421 /* Shadow page tables are full pages (pte+pgste) */ 1422 page = pfn_to_page(*table >> PAGE_SHIFT); 1423 pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page)); 1424 *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE; 1425 *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); 1426 *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE); 1427 rc = 0; 1428 } else { 1429 rc = -EAGAIN; 1430 } 1431 spin_unlock(&sg->guest_table_lock); 1432 return rc; 1433 } 1434 1435 /** 1436 * kvm_s390_shadow_fault - handle fault on a shadow page table 1437 * @vcpu: virtual cpu 1438 * @sg: pointer to the shadow guest address space structure 1439 * @saddr: faulting address in the shadow gmap 1440 * @datptr: will contain the address of the faulting DAT table entry, or of 1441 * the valid leaf, plus some flags 1442 * 1443 * Returns: - 0 if the shadow fault was successfully resolved 1444 * - > 0 (pgm exception code) on exceptions while faulting 1445 * - -EAGAIN if the caller can retry immediately 1446 * - -EFAULT when accessing invalid guest addresses 1447 * - -ENOMEM if out of memory 1448 */ 1449 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, 1450 unsigned long saddr, unsigned long *datptr) 1451 { 1452 union vaddress vaddr; 1453 union page_table_entry pte; 1454 unsigned long pgt = 0; 1455 int dat_protection, fake; 1456 int rc; 1457 1458 if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm)) 1459 return -EFAULT; 1460 1461 mmap_read_lock(sg->mm); 1462 /* 1463 * We don't want any guest-2 tables to change - so the parent 1464 * tables/pointers we read stay valid - unshadowing is however 1465 * always possible - only guest_table_lock protects us. 1466 */ 1467 ipte_lock(vcpu->kvm); 1468 1469 rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); 1470 if (rc) 1471 rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, 1472 &fake); 1473 1474 vaddr.addr = saddr; 1475 if (fake) { 1476 pte.val = pgt + vaddr.px * PAGE_SIZE; 1477 goto shadow_page; 1478 } 1479 1480 switch (rc) { 1481 case PGM_SEGMENT_TRANSLATION: 1482 case PGM_REGION_THIRD_TRANS: 1483 case PGM_REGION_SECOND_TRANS: 1484 case PGM_REGION_FIRST_TRANS: 1485 pgt |= PEI_NOT_PTE; 1486 break; 1487 case 0: 1488 pgt += vaddr.px * 8; 1489 rc = gmap_read_table(sg->parent, pgt, &pte.val); 1490 } 1491 if (datptr) 1492 *datptr = pgt | dat_protection * PEI_DAT_PROT; 1493 if (!rc && pte.i) 1494 rc = PGM_PAGE_TRANSLATION; 1495 if (!rc && pte.z) 1496 rc = PGM_TRANSLATION_SPEC; 1497 shadow_page: 1498 pte.p |= dat_protection; 1499 if (!rc) 1500 rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); 1501 vcpu->kvm->stat.gmap_shadow_pg_entry++; 1502 ipte_unlock(vcpu->kvm); 1503 mmap_read_unlock(sg->mm); 1504 return rc; 1505 } 1506