1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017 - Linaro Ltd 4 * Author: Jintack Lim <jintack.lim@linaro.org> 5 */ 6 7 #include <linux/kvm_host.h> 8 9 #include <asm/esr.h> 10 #include <asm/kvm_hyp.h> 11 #include <asm/kvm_mmu.h> 12 13 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw) 14 { 15 wr->fst = fst; 16 wr->ptw = s1ptw; 17 wr->s2 = s1ptw; 18 wr->failed = true; 19 } 20 21 #define S1_MMU_DISABLED (-127) 22 23 static int get_ia_size(struct s1_walk_info *wi) 24 { 25 return 64 - wi->txsz; 26 } 27 28 /* Return true if the IPA is out of the OA range */ 29 static bool check_output_size(u64 ipa, struct s1_walk_info *wi) 30 { 31 return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); 32 } 33 34 /* Return the translation regime that applies to an AT instruction */ 35 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) 36 { 37 /* 38 * We only get here from guest EL2, so the translation 39 * regime AT applies to is solely defined by {E2H,TGE}. 40 */ 41 switch (op) { 42 case OP_AT_S1E2R: 43 case OP_AT_S1E2W: 44 case OP_AT_S1E2A: 45 return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; 46 break; 47 default: 48 return (vcpu_el2_e2h_is_set(vcpu) && 49 vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; 50 } 51 } 52 53 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 54 { 55 if (!kvm_has_s1pie(vcpu->kvm)) 56 return false; 57 58 switch (regime) { 59 case TR_EL2: 60 case TR_EL20: 61 return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE; 62 case TR_EL10: 63 return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) && 64 (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE); 65 default: 66 BUG(); 67 } 68 } 69 70 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) 71 { 72 u64 val; 73 74 if (!kvm_has_s1poe(vcpu->kvm)) { 75 wi->poe = wi->e0poe = false; 76 return; 77 } 78 79 switch (wi->regime) { 80 case TR_EL2: 81 case TR_EL20: 82 val = vcpu_read_sys_reg(vcpu, TCR2_EL2); 83 wi->poe = val & TCR2_EL2_POE; 84 wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE); 85 break; 86 case TR_EL10: 87 if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) { 88 wi->poe = wi->e0poe = false; 89 return; 90 } 91 92 val = __vcpu_sys_reg(vcpu, TCR2_EL1); 93 wi->poe = val & TCR2_EL1_POE; 94 wi->e0poe = val & TCR2_EL1_E0POE; 95 } 96 } 97 98 static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 99 struct s1_walk_result *wr, u64 va) 100 { 101 u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr; 102 unsigned int stride, x; 103 bool va55, tbi, lva; 104 105 hcr = __vcpu_sys_reg(vcpu, HCR_EL2); 106 107 va55 = va & BIT(55); 108 109 if (wi->regime == TR_EL2 && va55) 110 goto addrsz; 111 112 wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); 113 114 switch (wi->regime) { 115 case TR_EL10: 116 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 117 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); 118 ttbr = (va55 ? 119 vcpu_read_sys_reg(vcpu, TTBR1_EL1) : 120 vcpu_read_sys_reg(vcpu, TTBR0_EL1)); 121 break; 122 case TR_EL2: 123 case TR_EL20: 124 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 125 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); 126 ttbr = (va55 ? 127 vcpu_read_sys_reg(vcpu, TTBR1_EL2) : 128 vcpu_read_sys_reg(vcpu, TTBR0_EL2)); 129 break; 130 default: 131 BUG(); 132 } 133 134 tbi = (wi->regime == TR_EL2 ? 135 FIELD_GET(TCR_EL2_TBI, tcr) : 136 (va55 ? 137 FIELD_GET(TCR_TBI1, tcr) : 138 FIELD_GET(TCR_TBI0, tcr))); 139 140 if (!tbi && (u64)sign_extend64(va, 55) != va) 141 goto addrsz; 142 143 va = (u64)sign_extend64(va, 55); 144 145 /* Let's put the MMU disabled case aside immediately */ 146 switch (wi->regime) { 147 case TR_EL10: 148 /* 149 * If dealing with the EL1&0 translation regime, 3 things 150 * can disable the S1 translation: 151 * 152 * - HCR_EL2.DC = 1 153 * - HCR_EL2.{E2H,TGE} = {0,1} 154 * - SCTLR_EL1.M = 0 155 * 156 * The TGE part is interesting. If we have decided that this 157 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or 158 * {0,x}, and we only need to test for TGE == 1. 159 */ 160 if (hcr & (HCR_DC | HCR_TGE)) { 161 wr->level = S1_MMU_DISABLED; 162 break; 163 } 164 fallthrough; 165 case TR_EL2: 166 case TR_EL20: 167 if (!(sctlr & SCTLR_ELx_M)) 168 wr->level = S1_MMU_DISABLED; 169 break; 170 } 171 172 if (wr->level == S1_MMU_DISABLED) { 173 if (va >= BIT(kvm_get_pa_bits(vcpu->kvm))) 174 goto addrsz; 175 176 wr->pa = va; 177 return 0; 178 } 179 180 wi->be = sctlr & SCTLR_ELx_EE; 181 182 wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP); 183 wi->hpd &= (wi->regime == TR_EL2 ? 184 FIELD_GET(TCR_EL2_HPD, tcr) : 185 (va55 ? 186 FIELD_GET(TCR_HPD1, tcr) : 187 FIELD_GET(TCR_HPD0, tcr))); 188 /* R_JHSVW */ 189 wi->hpd |= s1pie_enabled(vcpu, wi->regime); 190 191 /* Do we have POE? */ 192 compute_s1poe(vcpu, wi); 193 194 /* R_BVXDG */ 195 wi->hpd |= (wi->poe || wi->e0poe); 196 197 /* Someone was silly enough to encode TG0/TG1 differently */ 198 if (va55) { 199 wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); 200 tg = FIELD_GET(TCR_TG1_MASK, tcr); 201 202 switch (tg << TCR_TG1_SHIFT) { 203 case TCR_TG1_4K: 204 wi->pgshift = 12; break; 205 case TCR_TG1_16K: 206 wi->pgshift = 14; break; 207 case TCR_TG1_64K: 208 default: /* IMPDEF: treat any other value as 64k */ 209 wi->pgshift = 16; break; 210 } 211 } else { 212 wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); 213 tg = FIELD_GET(TCR_TG0_MASK, tcr); 214 215 switch (tg << TCR_TG0_SHIFT) { 216 case TCR_TG0_4K: 217 wi->pgshift = 12; break; 218 case TCR_TG0_16K: 219 wi->pgshift = 14; break; 220 case TCR_TG0_64K: 221 default: /* IMPDEF: treat any other value as 64k */ 222 wi->pgshift = 16; break; 223 } 224 } 225 226 /* R_PLCGL, R_YXNYW */ 227 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { 228 if (wi->txsz > 39) 229 goto transfault_l0; 230 } else { 231 if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) 232 goto transfault_l0; 233 } 234 235 /* R_GTJBY, R_SXWGM */ 236 switch (BIT(wi->pgshift)) { 237 case SZ_4K: 238 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT); 239 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); 240 break; 241 case SZ_16K: 242 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT); 243 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); 244 break; 245 case SZ_64K: 246 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); 247 break; 248 } 249 250 if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) 251 goto transfault_l0; 252 253 ia_bits = get_ia_size(wi); 254 255 /* R_YYVYV, I_THCZK */ 256 if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || 257 (va55 && va < GENMASK(63, ia_bits))) 258 goto transfault_l0; 259 260 /* I_ZFSYQ */ 261 if (wi->regime != TR_EL2 && 262 (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) 263 goto transfault_l0; 264 265 /* R_BNDVG and following statements */ 266 if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && 267 wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) 268 goto transfault_l0; 269 270 /* AArch64.S1StartLevel() */ 271 stride = wi->pgshift - 3; 272 wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); 273 274 ps = (wi->regime == TR_EL2 ? 275 FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); 276 277 wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps)); 278 279 /* Compute minimal alignment */ 280 x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); 281 282 wi->baddr = ttbr & TTBRx_EL1_BADDR; 283 284 /* R_VPBBF */ 285 if (check_output_size(wi->baddr, wi)) 286 goto addrsz; 287 288 wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); 289 290 return 0; 291 292 addrsz: /* Address Size Fault level 0 */ 293 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false); 294 return -EFAULT; 295 296 transfault_l0: /* Translation Fault level 0 */ 297 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false); 298 return -EFAULT; 299 } 300 301 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 302 struct s1_walk_result *wr, u64 va) 303 { 304 u64 va_top, va_bottom, baddr, desc; 305 int level, stride, ret; 306 307 level = wi->sl; 308 stride = wi->pgshift - 3; 309 baddr = wi->baddr; 310 311 va_top = get_ia_size(wi) - 1; 312 313 while (1) { 314 u64 index, ipa; 315 316 va_bottom = (3 - level) * stride + wi->pgshift; 317 index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3); 318 319 ipa = baddr | index; 320 321 if (wi->s2) { 322 struct kvm_s2_trans s2_trans = {}; 323 324 ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); 325 if (ret) { 326 fail_s1_walk(wr, 327 (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, 328 true); 329 return ret; 330 } 331 332 if (!kvm_s2_trans_readable(&s2_trans)) { 333 fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), 334 true); 335 336 return -EPERM; 337 } 338 339 ipa = kvm_s2_trans_output(&s2_trans); 340 } 341 342 ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); 343 if (ret) { 344 fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); 345 return ret; 346 } 347 348 if (wi->be) 349 desc = be64_to_cpu((__force __be64)desc); 350 else 351 desc = le64_to_cpu((__force __le64)desc); 352 353 /* Invalid descriptor */ 354 if (!(desc & BIT(0))) 355 goto transfault; 356 357 /* Block mapping, check validity down the line */ 358 if (!(desc & BIT(1))) 359 break; 360 361 /* Page mapping */ 362 if (level == 3) 363 break; 364 365 /* Table handling */ 366 if (!wi->hpd) { 367 wr->APTable |= FIELD_GET(S1_TABLE_AP, desc); 368 wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc); 369 wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); 370 } 371 372 baddr = desc & GENMASK_ULL(47, wi->pgshift); 373 374 /* Check for out-of-range OA */ 375 if (check_output_size(baddr, wi)) 376 goto addrsz; 377 378 /* Prepare for next round */ 379 va_top = va_bottom - 1; 380 level++; 381 } 382 383 /* Block mapping, check the validity of the level */ 384 if (!(desc & BIT(1))) { 385 bool valid_block = false; 386 387 switch (BIT(wi->pgshift)) { 388 case SZ_4K: 389 valid_block = level == 1 || level == 2; 390 break; 391 case SZ_16K: 392 case SZ_64K: 393 valid_block = level == 2; 394 break; 395 } 396 397 if (!valid_block) 398 goto transfault; 399 } 400 401 if (check_output_size(desc & GENMASK(47, va_bottom), wi)) 402 goto addrsz; 403 404 if (!(desc & PTE_AF)) { 405 fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false); 406 return -EACCES; 407 } 408 409 va_bottom += contiguous_bit_shift(desc, wi, level); 410 411 wr->failed = false; 412 wr->level = level; 413 wr->desc = desc; 414 wr->pa = desc & GENMASK(47, va_bottom); 415 wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); 416 417 wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG); 418 if (wr->nG) { 419 u64 asid_ttbr, tcr; 420 421 switch (wi->regime) { 422 case TR_EL10: 423 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); 424 asid_ttbr = ((tcr & TCR_A1) ? 425 vcpu_read_sys_reg(vcpu, TTBR1_EL1) : 426 vcpu_read_sys_reg(vcpu, TTBR0_EL1)); 427 break; 428 case TR_EL20: 429 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); 430 asid_ttbr = ((tcr & TCR_A1) ? 431 vcpu_read_sys_reg(vcpu, TTBR1_EL2) : 432 vcpu_read_sys_reg(vcpu, TTBR0_EL2)); 433 break; 434 default: 435 BUG(); 436 } 437 438 wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr); 439 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || 440 !(tcr & TCR_ASID16)) 441 wr->asid &= GENMASK(7, 0); 442 } 443 444 return 0; 445 446 addrsz: 447 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false); 448 return -EINVAL; 449 transfault: 450 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false); 451 return -ENOENT; 452 } 453 454 struct mmu_config { 455 u64 ttbr0; 456 u64 ttbr1; 457 u64 tcr; 458 u64 mair; 459 u64 tcr2; 460 u64 pir; 461 u64 pire0; 462 u64 por_el0; 463 u64 por_el1; 464 u64 sctlr; 465 u64 vttbr; 466 u64 vtcr; 467 }; 468 469 static void __mmu_config_save(struct mmu_config *config) 470 { 471 config->ttbr0 = read_sysreg_el1(SYS_TTBR0); 472 config->ttbr1 = read_sysreg_el1(SYS_TTBR1); 473 config->tcr = read_sysreg_el1(SYS_TCR); 474 config->mair = read_sysreg_el1(SYS_MAIR); 475 if (cpus_have_final_cap(ARM64_HAS_TCR2)) { 476 config->tcr2 = read_sysreg_el1(SYS_TCR2); 477 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { 478 config->pir = read_sysreg_el1(SYS_PIR); 479 config->pire0 = read_sysreg_el1(SYS_PIRE0); 480 } 481 if (system_supports_poe()) { 482 config->por_el1 = read_sysreg_el1(SYS_POR); 483 config->por_el0 = read_sysreg_s(SYS_POR_EL0); 484 } 485 } 486 config->sctlr = read_sysreg_el1(SYS_SCTLR); 487 config->vttbr = read_sysreg(vttbr_el2); 488 config->vtcr = read_sysreg(vtcr_el2); 489 } 490 491 static void __mmu_config_restore(struct mmu_config *config) 492 { 493 /* 494 * ARM errata 1165522 and 1530923 require TGE to be 1 before 495 * we update the guest state. 496 */ 497 asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); 498 499 write_sysreg_el1(config->ttbr0, SYS_TTBR0); 500 write_sysreg_el1(config->ttbr1, SYS_TTBR1); 501 write_sysreg_el1(config->tcr, SYS_TCR); 502 write_sysreg_el1(config->mair, SYS_MAIR); 503 if (cpus_have_final_cap(ARM64_HAS_TCR2)) { 504 write_sysreg_el1(config->tcr2, SYS_TCR2); 505 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { 506 write_sysreg_el1(config->pir, SYS_PIR); 507 write_sysreg_el1(config->pire0, SYS_PIRE0); 508 } 509 if (system_supports_poe()) { 510 write_sysreg_el1(config->por_el1, SYS_POR); 511 write_sysreg_s(config->por_el0, SYS_POR_EL0); 512 } 513 } 514 write_sysreg_el1(config->sctlr, SYS_SCTLR); 515 write_sysreg(config->vttbr, vttbr_el2); 516 write_sysreg(config->vtcr, vtcr_el2); 517 } 518 519 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 520 { 521 u64 host_pan; 522 bool fail; 523 524 host_pan = read_sysreg_s(SYS_PSTATE_PAN); 525 write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN); 526 527 switch (op) { 528 case OP_AT_S1E1RP: 529 fail = __kvm_at(OP_AT_S1E1RP, vaddr); 530 break; 531 case OP_AT_S1E1WP: 532 fail = __kvm_at(OP_AT_S1E1WP, vaddr); 533 break; 534 } 535 536 write_sysreg_s(host_pan, SYS_PSTATE_PAN); 537 538 return fail; 539 } 540 541 #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic) 542 #define MEMATTR_NC 0b0100 543 #define MEMATTR_Wt 0b1000 544 #define MEMATTR_Wb 0b1100 545 #define MEMATTR_WbRaWa 0b1111 546 547 #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0) 548 549 static u8 s2_memattr_to_attr(u8 memattr) 550 { 551 memattr &= 0b1111; 552 553 switch (memattr) { 554 case 0b0000: 555 case 0b0001: 556 case 0b0010: 557 case 0b0011: 558 return memattr << 2; 559 case 0b0100: 560 return MEMATTR(Wb, Wb); 561 case 0b0101: 562 return MEMATTR(NC, NC); 563 case 0b0110: 564 return MEMATTR(Wt, NC); 565 case 0b0111: 566 return MEMATTR(Wb, NC); 567 case 0b1000: 568 /* Reserved, assume NC */ 569 return MEMATTR(NC, NC); 570 case 0b1001: 571 return MEMATTR(NC, Wt); 572 case 0b1010: 573 return MEMATTR(Wt, Wt); 574 case 0b1011: 575 return MEMATTR(Wb, Wt); 576 case 0b1100: 577 /* Reserved, assume NC */ 578 return MEMATTR(NC, NC); 579 case 0b1101: 580 return MEMATTR(NC, Wb); 581 case 0b1110: 582 return MEMATTR(Wt, Wb); 583 case 0b1111: 584 return MEMATTR(Wb, Wb); 585 default: 586 unreachable(); 587 } 588 } 589 590 static u8 combine_s1_s2_attr(u8 s1, u8 s2) 591 { 592 bool transient; 593 u8 final = 0; 594 595 /* Upgrade transient s1 to non-transient to simplify things */ 596 switch (s1) { 597 case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */ 598 transient = true; 599 s1 = MEMATTR_Wt | (s1 & GENMASK(1,0)); 600 break; 601 case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */ 602 transient = true; 603 s1 = MEMATTR_Wb | (s1 & GENMASK(1,0)); 604 break; 605 default: 606 transient = false; 607 } 608 609 /* S2CombineS1AttrHints() */ 610 if ((s1 & GENMASK(3, 2)) == MEMATTR_NC || 611 (s2 & GENMASK(3, 2)) == MEMATTR_NC) 612 final = MEMATTR_NC; 613 else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt || 614 (s2 & GENMASK(3, 2)) == MEMATTR_Wt) 615 final = MEMATTR_Wt; 616 else 617 final = MEMATTR_Wb; 618 619 if (final != MEMATTR_NC) { 620 /* Inherit RaWa hints form S1 */ 621 if (transient) { 622 switch (s1 & GENMASK(3, 2)) { 623 case MEMATTR_Wt: 624 final = 0; 625 break; 626 case MEMATTR_Wb: 627 final = MEMATTR_NC; 628 break; 629 } 630 } 631 632 final |= s1 & GENMASK(1, 0); 633 } 634 635 return final; 636 } 637 638 #define ATTR_NSH 0b00 639 #define ATTR_RSV 0b01 640 #define ATTR_OSH 0b10 641 #define ATTR_ISH 0b11 642 643 static u8 compute_sh(u8 attr, u64 desc) 644 { 645 u8 sh; 646 647 /* Any form of device, as well as NC has SH[1:0]=0b10 */ 648 if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) 649 return ATTR_OSH; 650 651 sh = FIELD_GET(PTE_SHARED, desc); 652 if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ 653 sh = ATTR_NSH; 654 655 return sh; 656 } 657 658 static u8 combine_sh(u8 s1_sh, u8 s2_sh) 659 { 660 if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) 661 return ATTR_OSH; 662 if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH) 663 return ATTR_ISH; 664 665 return ATTR_NSH; 666 } 667 668 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, 669 struct kvm_s2_trans *tr) 670 { 671 u8 s1_parattr, s2_memattr, final_attr; 672 u64 par; 673 674 /* If S2 has failed to translate, report the damage */ 675 if (tr->esr) { 676 par = SYS_PAR_EL1_RES1; 677 par |= SYS_PAR_EL1_F; 678 par |= SYS_PAR_EL1_S; 679 par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr); 680 return par; 681 } 682 683 s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par); 684 s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc); 685 686 if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) { 687 if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP)) 688 s2_memattr &= ~BIT(3); 689 690 /* Combination of R_VRJSW and R_RHWZM */ 691 switch (s2_memattr) { 692 case 0b0101: 693 if (MEMATTR_IS_DEVICE(s1_parattr)) 694 final_attr = s1_parattr; 695 else 696 final_attr = MEMATTR(NC, NC); 697 break; 698 case 0b0110: 699 case 0b1110: 700 final_attr = MEMATTR(WbRaWa, WbRaWa); 701 break; 702 case 0b0111: 703 case 0b1111: 704 /* Preserve S1 attribute */ 705 final_attr = s1_parattr; 706 break; 707 case 0b0100: 708 case 0b1100: 709 case 0b1101: 710 /* Reserved, do something non-silly */ 711 final_attr = s1_parattr; 712 break; 713 default: 714 /* 715 * MemAttr[2]=0, Device from S2. 716 * 717 * FWB does not influence the way that stage 1 718 * memory types and attributes are combined 719 * with stage 2 Device type and attributes. 720 */ 721 final_attr = min(s2_memattr_to_attr(s2_memattr), 722 s1_parattr); 723 } 724 } else { 725 /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ 726 u8 s2_parattr = s2_memattr_to_attr(s2_memattr); 727 728 if (MEMATTR_IS_DEVICE(s1_parattr) || 729 MEMATTR_IS_DEVICE(s2_parattr)) { 730 final_attr = min(s1_parattr, s2_parattr); 731 } else { 732 /* At this stage, this is memory vs memory */ 733 final_attr = combine_s1_s2_attr(s1_parattr & 0xf, 734 s2_parattr & 0xf); 735 final_attr |= combine_s1_s2_attr(s1_parattr >> 4, 736 s2_parattr >> 4) << 4; 737 } 738 } 739 740 if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) && 741 !MEMATTR_IS_DEVICE(final_attr)) 742 final_attr = MEMATTR(NC, NC); 743 744 par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); 745 par |= tr->output & GENMASK(47, 12); 746 par |= FIELD_PREP(SYS_PAR_EL1_SH, 747 combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), 748 compute_sh(final_attr, tr->desc))); 749 750 return par; 751 } 752 753 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, 754 enum trans_regime regime) 755 { 756 u64 par; 757 758 if (wr->failed) { 759 par = SYS_PAR_EL1_RES1; 760 par |= SYS_PAR_EL1_F; 761 par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst); 762 par |= wr->ptw ? SYS_PAR_EL1_PTW : 0; 763 par |= wr->s2 ? SYS_PAR_EL1_S : 0; 764 } else if (wr->level == S1_MMU_DISABLED) { 765 /* MMU off or HCR_EL2.DC == 1 */ 766 par = SYS_PAR_EL1_NSE; 767 par |= wr->pa & GENMASK_ULL(47, 12); 768 769 if (regime == TR_EL10 && 770 (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { 771 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 772 MEMATTR(WbRaWa, WbRaWa)); 773 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH); 774 } else { 775 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */ 776 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH); 777 } 778 } else { 779 u64 mair, sctlr; 780 u8 sh; 781 782 par = SYS_PAR_EL1_NSE; 783 784 mair = (regime == TR_EL10 ? 785 vcpu_read_sys_reg(vcpu, MAIR_EL1) : 786 vcpu_read_sys_reg(vcpu, MAIR_EL2)); 787 788 mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; 789 mair &= 0xff; 790 791 sctlr = (regime == TR_EL10 ? 792 vcpu_read_sys_reg(vcpu, SCTLR_EL1) : 793 vcpu_read_sys_reg(vcpu, SCTLR_EL2)); 794 795 /* Force NC for memory if SCTLR_ELx.C is clear */ 796 if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair)) 797 mair = MEMATTR(NC, NC); 798 799 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); 800 par |= wr->pa & GENMASK_ULL(47, 12); 801 802 sh = compute_sh(mair, wr->desc); 803 par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); 804 } 805 806 return par; 807 } 808 809 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 810 { 811 u64 sctlr; 812 813 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) 814 return false; 815 816 if (s1pie_enabled(vcpu, regime)) 817 return true; 818 819 if (regime == TR_EL10) 820 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 821 else 822 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 823 824 return sctlr & SCTLR_EL1_EPAN; 825 } 826 827 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, 828 struct s1_walk_info *wi, 829 struct s1_walk_result *wr) 830 { 831 bool wxn; 832 833 /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */ 834 if (wi->regime != TR_EL2) { 835 switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) { 836 case 0b00: 837 wr->pr = wr->pw = true; 838 wr->ur = wr->uw = false; 839 break; 840 case 0b01: 841 wr->pr = wr->pw = wr->ur = wr->uw = true; 842 break; 843 case 0b10: 844 wr->pr = true; 845 wr->pw = wr->ur = wr->uw = false; 846 break; 847 case 0b11: 848 wr->pr = wr->ur = true; 849 wr->pw = wr->uw = false; 850 break; 851 } 852 853 /* We don't use px for anything yet, but hey... */ 854 wr->px = !((wr->desc & PTE_PXN) || wr->uw); 855 wr->ux = !(wr->desc & PTE_UXN); 856 } else { 857 wr->ur = wr->uw = wr->ux = false; 858 859 if (!(wr->desc & PTE_RDONLY)) { 860 wr->pr = wr->pw = true; 861 } else { 862 wr->pr = true; 863 wr->pw = false; 864 } 865 866 /* XN maps to UXN */ 867 wr->px = !(wr->desc & PTE_UXN); 868 } 869 870 switch (wi->regime) { 871 case TR_EL2: 872 case TR_EL20: 873 wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN); 874 break; 875 case TR_EL10: 876 wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); 877 break; 878 } 879 880 wr->pwxn = wr->uwxn = wxn; 881 wr->pov = wi->poe; 882 wr->uov = wi->e0poe; 883 } 884 885 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu, 886 struct s1_walk_info *wi, 887 struct s1_walk_result *wr) 888 { 889 /* Hierarchical part of AArch64.S1DirectBasePermissions() */ 890 if (wi->regime != TR_EL2) { 891 switch (wr->APTable) { 892 case 0b00: 893 break; 894 case 0b01: 895 wr->ur = wr->uw = false; 896 break; 897 case 0b10: 898 wr->pw = wr->uw = false; 899 break; 900 case 0b11: 901 wr->pw = wr->ur = wr->uw = false; 902 break; 903 } 904 905 wr->px &= !wr->PXNTable; 906 wr->ux &= !wr->UXNTable; 907 } else { 908 if (wr->APTable & BIT(1)) 909 wr->pw = false; 910 911 /* XN maps to UXN */ 912 wr->px &= !wr->UXNTable; 913 } 914 } 915 916 #define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf) 917 918 #define set_priv_perms(wr, r, w, x) \ 919 do { \ 920 (wr)->pr = (r); \ 921 (wr)->pw = (w); \ 922 (wr)->px = (x); \ 923 } while (0) 924 925 #define set_unpriv_perms(wr, r, w, x) \ 926 do { \ 927 (wr)->ur = (r); \ 928 (wr)->uw = (w); \ 929 (wr)->ux = (x); \ 930 } while (0) 931 932 #define set_priv_wxn(wr, v) \ 933 do { \ 934 (wr)->pwxn = (v); \ 935 } while (0) 936 937 #define set_unpriv_wxn(wr, v) \ 938 do { \ 939 (wr)->uwxn = (v); \ 940 } while (0) 941 942 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS */ 943 #define set_perms(w, wr, ip) \ 944 do { \ 945 /* R_LLZDZ */ \ 946 switch ((ip)) { \ 947 case 0b0000: \ 948 set_ ## w ## _perms((wr), false, false, false); \ 949 break; \ 950 case 0b0001: \ 951 set_ ## w ## _perms((wr), true , false, false); \ 952 break; \ 953 case 0b0010: \ 954 set_ ## w ## _perms((wr), false, false, true ); \ 955 break; \ 956 case 0b0011: \ 957 set_ ## w ## _perms((wr), true , false, true ); \ 958 break; \ 959 case 0b0100: \ 960 set_ ## w ## _perms((wr), false, false, false); \ 961 break; \ 962 case 0b0101: \ 963 set_ ## w ## _perms((wr), true , true , false); \ 964 break; \ 965 case 0b0110: \ 966 set_ ## w ## _perms((wr), true , true , true ); \ 967 break; \ 968 case 0b0111: \ 969 set_ ## w ## _perms((wr), true , true , true ); \ 970 break; \ 971 case 0b1000: \ 972 set_ ## w ## _perms((wr), true , false, false); \ 973 break; \ 974 case 0b1001: \ 975 set_ ## w ## _perms((wr), true , false, false); \ 976 break; \ 977 case 0b1010: \ 978 set_ ## w ## _perms((wr), true , false, true ); \ 979 break; \ 980 case 0b1011: \ 981 set_ ## w ## _perms((wr), false, false, false); \ 982 break; \ 983 case 0b1100: \ 984 set_ ## w ## _perms((wr), true , true , false); \ 985 break; \ 986 case 0b1101: \ 987 set_ ## w ## _perms((wr), false, false, false); \ 988 break; \ 989 case 0b1110: \ 990 set_ ## w ## _perms((wr), true , true , true ); \ 991 break; \ 992 case 0b1111: \ 993 set_ ## w ## _perms((wr), false, false, false); \ 994 break; \ 995 } \ 996 \ 997 /* R_HJYGR */ \ 998 set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \ 999 \ 1000 } while (0) 1001 1002 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu, 1003 struct s1_walk_info *wi, 1004 struct s1_walk_result *wr) 1005 { 1006 u8 up, pp, idx; 1007 1008 idx = pte_pi_index(wr->desc); 1009 1010 switch (wi->regime) { 1011 case TR_EL10: 1012 pp = perm_idx(vcpu, PIR_EL1, idx); 1013 up = perm_idx(vcpu, PIRE0_EL1, idx); 1014 break; 1015 case TR_EL20: 1016 pp = perm_idx(vcpu, PIR_EL2, idx); 1017 up = perm_idx(vcpu, PIRE0_EL2, idx); 1018 break; 1019 case TR_EL2: 1020 pp = perm_idx(vcpu, PIR_EL2, idx); 1021 up = 0; 1022 break; 1023 } 1024 1025 set_perms(priv, wr, pp); 1026 1027 if (wi->regime != TR_EL2) 1028 set_perms(unpriv, wr, up); 1029 else 1030 set_unpriv_perms(wr, false, false, false); 1031 1032 wr->pov = wi->poe && !(pp & BIT(3)); 1033 wr->uov = wi->e0poe && !(up & BIT(3)); 1034 1035 /* R_VFPJF */ 1036 if (wr->px && wr->uw) { 1037 set_priv_perms(wr, false, false, false); 1038 set_unpriv_perms(wr, false, false, false); 1039 } 1040 } 1041 1042 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu, 1043 struct s1_walk_info *wi, 1044 struct s1_walk_result *wr) 1045 { 1046 u8 idx, pov_perms, uov_perms; 1047 1048 idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc); 1049 1050 switch (wi->regime) { 1051 case TR_EL10: 1052 pov_perms = perm_idx(vcpu, POR_EL1, idx); 1053 uov_perms = perm_idx(vcpu, POR_EL0, idx); 1054 break; 1055 case TR_EL20: 1056 pov_perms = perm_idx(vcpu, POR_EL2, idx); 1057 uov_perms = perm_idx(vcpu, POR_EL0, idx); 1058 break; 1059 case TR_EL2: 1060 pov_perms = perm_idx(vcpu, POR_EL2, idx); 1061 uov_perms = 0; 1062 break; 1063 } 1064 1065 if (pov_perms & ~POE_RWX) 1066 pov_perms = POE_NONE; 1067 1068 if (wi->poe && wr->pov) { 1069 wr->pr &= pov_perms & POE_R; 1070 wr->pw &= pov_perms & POE_W; 1071 wr->px &= pov_perms & POE_X; 1072 } 1073 1074 if (uov_perms & ~POE_RWX) 1075 uov_perms = POE_NONE; 1076 1077 if (wi->e0poe && wr->uov) { 1078 wr->ur &= uov_perms & POE_R; 1079 wr->uw &= uov_perms & POE_W; 1080 wr->ux &= uov_perms & POE_X; 1081 } 1082 } 1083 1084 static void compute_s1_permissions(struct kvm_vcpu *vcpu, 1085 struct s1_walk_info *wi, 1086 struct s1_walk_result *wr) 1087 { 1088 bool pan; 1089 1090 if (!s1pie_enabled(vcpu, wi->regime)) 1091 compute_s1_direct_permissions(vcpu, wi, wr); 1092 else 1093 compute_s1_indirect_permissions(vcpu, wi, wr); 1094 1095 if (!wi->hpd) 1096 compute_s1_hierarchical_permissions(vcpu, wi, wr); 1097 1098 if (wi->poe || wi->e0poe) 1099 compute_s1_overlay_permissions(vcpu, wi, wr); 1100 1101 /* R_QXXPC */ 1102 if (wr->pwxn) { 1103 if (!wr->pov && wr->pw) 1104 wr->px = false; 1105 if (wr->pov && wr->px) 1106 wr->pw = false; 1107 } 1108 1109 /* R_NPBXC */ 1110 if (wr->uwxn) { 1111 if (!wr->uov && wr->uw) 1112 wr->ux = false; 1113 if (wr->uov && wr->ux) 1114 wr->uw = false; 1115 } 1116 1117 pan = wi->pan && (wr->ur || wr->uw || 1118 (pan3_enabled(vcpu, wi->regime) && wr->ux)); 1119 wr->pw &= !pan; 1120 wr->pr &= !pan; 1121 } 1122 1123 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1124 { 1125 struct s1_walk_result wr = {}; 1126 struct s1_walk_info wi = {}; 1127 bool perm_fail = false; 1128 int ret, idx; 1129 1130 wi.regime = compute_translation_regime(vcpu, op); 1131 wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W); 1132 wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) && 1133 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT); 1134 1135 ret = setup_s1_walk(vcpu, &wi, &wr, vaddr); 1136 if (ret) 1137 goto compute_par; 1138 1139 if (wr.level == S1_MMU_DISABLED) 1140 goto compute_par; 1141 1142 idx = srcu_read_lock(&vcpu->kvm->srcu); 1143 1144 ret = walk_s1(vcpu, &wi, &wr, vaddr); 1145 1146 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1147 1148 if (ret) 1149 goto compute_par; 1150 1151 compute_s1_permissions(vcpu, &wi, &wr); 1152 1153 switch (op) { 1154 case OP_AT_S1E1RP: 1155 case OP_AT_S1E1R: 1156 case OP_AT_S1E2R: 1157 perm_fail = !wr.pr; 1158 break; 1159 case OP_AT_S1E1WP: 1160 case OP_AT_S1E1W: 1161 case OP_AT_S1E2W: 1162 perm_fail = !wr.pw; 1163 break; 1164 case OP_AT_S1E0R: 1165 perm_fail = !wr.ur; 1166 break; 1167 case OP_AT_S1E0W: 1168 perm_fail = !wr.uw; 1169 break; 1170 case OP_AT_S1E1A: 1171 case OP_AT_S1E2A: 1172 break; 1173 default: 1174 BUG(); 1175 } 1176 1177 if (perm_fail) 1178 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); 1179 1180 compute_par: 1181 return compute_par_s1(vcpu, &wr, wi.regime); 1182 } 1183 1184 /* 1185 * Return the PAR_EL1 value as the result of a valid translation. 1186 * 1187 * If the translation is unsuccessful, the value may only contain 1188 * PAR_EL1.F, and cannot be taken at face value. It isn't an 1189 * indication of the translation having failed, only that the fast 1190 * path did not succeed, *unless* it indicates a S1 permission or 1191 * access fault. 1192 */ 1193 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1194 { 1195 struct mmu_config config; 1196 struct kvm_s2_mmu *mmu; 1197 bool fail; 1198 u64 par; 1199 1200 par = SYS_PAR_EL1_F; 1201 1202 /* 1203 * We've trapped, so everything is live on the CPU. As we will 1204 * be switching contexts behind everybody's back, disable 1205 * interrupts while holding the mmu lock. 1206 */ 1207 guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock); 1208 1209 /* 1210 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already 1211 * the right one (as we trapped from vEL2). If not, save the 1212 * full MMU context. 1213 */ 1214 if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) 1215 goto skip_mmu_switch; 1216 1217 /* 1218 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not 1219 * find it (recycled by another vcpu, for example). When this 1220 * happens, admit defeat immediately and use the SW (slow) path. 1221 */ 1222 mmu = lookup_s2_mmu(vcpu); 1223 if (!mmu) 1224 return par; 1225 1226 __mmu_config_save(&config); 1227 1228 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0); 1229 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1); 1230 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR); 1231 write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); 1232 if (kvm_has_tcr2(vcpu->kvm)) { 1233 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2); 1234 if (kvm_has_s1pie(vcpu->kvm)) { 1235 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR); 1236 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0); 1237 } 1238 if (kvm_has_s1poe(vcpu->kvm)) { 1239 write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR); 1240 write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0); 1241 } 1242 } 1243 write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); 1244 __load_stage2(mmu, mmu->arch); 1245 1246 skip_mmu_switch: 1247 /* Temporarily switch back to guest context */ 1248 write_sysreg_hcr(vcpu->arch.hcr_el2); 1249 isb(); 1250 1251 switch (op) { 1252 case OP_AT_S1E1RP: 1253 case OP_AT_S1E1WP: 1254 fail = at_s1e1p_fast(vcpu, op, vaddr); 1255 break; 1256 case OP_AT_S1E1R: 1257 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1258 break; 1259 case OP_AT_S1E1W: 1260 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1261 break; 1262 case OP_AT_S1E0R: 1263 fail = __kvm_at(OP_AT_S1E0R, vaddr); 1264 break; 1265 case OP_AT_S1E0W: 1266 fail = __kvm_at(OP_AT_S1E0W, vaddr); 1267 break; 1268 case OP_AT_S1E1A: 1269 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1270 break; 1271 default: 1272 WARN_ON_ONCE(1); 1273 fail = true; 1274 break; 1275 } 1276 1277 if (!fail) 1278 par = read_sysreg_par(); 1279 1280 write_sysreg_hcr(HCR_HOST_VHE_FLAGS); 1281 1282 if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) 1283 __mmu_config_restore(&config); 1284 1285 return par; 1286 } 1287 1288 static bool par_check_s1_perm_fault(u64 par) 1289 { 1290 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); 1291 1292 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM && 1293 !(par & SYS_PAR_EL1_S)); 1294 } 1295 1296 static bool par_check_s1_access_fault(u64 par) 1297 { 1298 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); 1299 1300 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS && 1301 !(par & SYS_PAR_EL1_S)); 1302 } 1303 1304 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1305 { 1306 u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); 1307 1308 /* 1309 * If PAR_EL1 reports that AT failed on a S1 permission or access 1310 * fault, we know for sure that the PTW was able to walk the S1 1311 * tables and there's nothing else to do. 1312 * 1313 * If AT failed for any other reason, then we must walk the guest S1 1314 * to emulate the instruction. 1315 */ 1316 if ((par & SYS_PAR_EL1_F) && 1317 !par_check_s1_perm_fault(par) && 1318 !par_check_s1_access_fault(par)) 1319 par = handle_at_slow(vcpu, op, vaddr); 1320 1321 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1322 } 1323 1324 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1325 { 1326 u64 par; 1327 1328 /* 1329 * We've trapped, so everything is live on the CPU. As we will be 1330 * switching context behind everybody's back, disable interrupts... 1331 */ 1332 scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) { 1333 u64 val, hcr; 1334 bool fail; 1335 1336 val = hcr = read_sysreg(hcr_el2); 1337 val &= ~HCR_TGE; 1338 val |= HCR_VM; 1339 1340 if (!vcpu_el2_e2h_is_set(vcpu)) 1341 val |= HCR_NV | HCR_NV1; 1342 1343 write_sysreg_hcr(val); 1344 isb(); 1345 1346 par = SYS_PAR_EL1_F; 1347 1348 switch (op) { 1349 case OP_AT_S1E2R: 1350 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1351 break; 1352 case OP_AT_S1E2W: 1353 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1354 break; 1355 case OP_AT_S1E2A: 1356 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1357 break; 1358 default: 1359 WARN_ON_ONCE(1); 1360 fail = true; 1361 } 1362 1363 isb(); 1364 1365 if (!fail) 1366 par = read_sysreg_par(); 1367 1368 write_sysreg_hcr(hcr); 1369 isb(); 1370 } 1371 1372 /* We failed the translation, let's replay it in slow motion */ 1373 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) 1374 par = handle_at_slow(vcpu, op, vaddr); 1375 1376 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1377 } 1378 1379 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1380 { 1381 struct kvm_s2_trans out = {}; 1382 u64 ipa, par; 1383 bool write; 1384 int ret; 1385 1386 /* Do the stage-1 translation */ 1387 switch (op) { 1388 case OP_AT_S12E1R: 1389 op = OP_AT_S1E1R; 1390 write = false; 1391 break; 1392 case OP_AT_S12E1W: 1393 op = OP_AT_S1E1W; 1394 write = true; 1395 break; 1396 case OP_AT_S12E0R: 1397 op = OP_AT_S1E0R; 1398 write = false; 1399 break; 1400 case OP_AT_S12E0W: 1401 op = OP_AT_S1E0W; 1402 write = true; 1403 break; 1404 default: 1405 WARN_ON_ONCE(1); 1406 return; 1407 } 1408 1409 __kvm_at_s1e01(vcpu, op, vaddr); 1410 par = vcpu_read_sys_reg(vcpu, PAR_EL1); 1411 if (par & SYS_PAR_EL1_F) 1412 return; 1413 1414 /* 1415 * If we only have a single stage of translation (E2H=0 or 1416 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}. 1417 */ 1418 if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) || 1419 !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) 1420 return; 1421 1422 /* Do the stage-2 translation */ 1423 ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); 1424 out.esr = 0; 1425 ret = kvm_walk_nested_s2(vcpu, ipa, &out); 1426 if (ret < 0) 1427 return; 1428 1429 /* Check the access permission */ 1430 if (!out.esr && 1431 ((!write && !out.readable) || (write && !out.writable))) 1432 out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3); 1433 1434 par = compute_par_s12(vcpu, par, &out); 1435 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1436 } 1437 1438 /* 1439 * Translate a VA for a given EL in a given translation regime, with 1440 * or without PAN. This requires wi->{regime, as_el0, pan} to be 1441 * set. The rest of the wi and wr should be 0-initialised. 1442 */ 1443 int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 1444 struct s1_walk_result *wr, u64 va) 1445 { 1446 int ret; 1447 1448 ret = setup_s1_walk(vcpu, wi, wr, va); 1449 if (ret) 1450 return ret; 1451 1452 if (wr->level == S1_MMU_DISABLED) { 1453 wr->ur = wr->uw = wr->ux = true; 1454 wr->pr = wr->pw = wr->px = true; 1455 } else { 1456 ret = walk_s1(vcpu, wi, wr, va); 1457 if (ret) 1458 return ret; 1459 1460 compute_s1_permissions(vcpu, wi, wr); 1461 } 1462 1463 return 0; 1464 } 1465