xref: /linux/arch/arm64/kvm/at.c (revision c43267e6794a36013fd495a4d81bf7f748fe4615)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 - Linaro Ltd
4  * Author: Jintack Lim <jintack.lim@linaro.org>
5  */
6 
7 #include <linux/kvm_host.h>
8 
9 #include <asm/esr.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12 #include <asm/lsui.h>
13 
fail_s1_walk(struct s1_walk_result * wr,u8 fst,bool s1ptw)14 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw)
15 {
16 	wr->fst		= fst;
17 	wr->ptw		= s1ptw;
18 	wr->s2		= s1ptw;
19 	wr->failed	= true;
20 }
21 
22 #define S1_MMU_DISABLED		(-127)
23 
get_ia_size(struct s1_walk_info * wi)24 static int get_ia_size(struct s1_walk_info *wi)
25 {
26 	return 64 - wi->txsz;
27 }
28 
29 /* Return true if the IPA is out of the OA range */
check_output_size(u64 ipa,struct s1_walk_info * wi)30 static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
31 {
32 	if (wi->pa52bit)
33 		return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits));
34 	return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
35 }
36 
has_52bit_pa(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,u64 tcr)37 static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr)
38 {
39 	switch (BIT(wi->pgshift)) {
40 	case SZ_64K:
41 	default:		/* IMPDEF: treat any other value as 64k */
42 		if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52))
43 			return false;
44 		return ((wi->regime == TR_EL2 ?
45 			 FIELD_GET(TCR_EL2_PS_MASK, tcr) :
46 			 FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110);
47 	case SZ_16K:
48 		if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT))
49 			return false;
50 		break;
51 	case SZ_4K:
52 		if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT))
53 			return false;
54 		break;
55 	}
56 
57 	return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS));
58 }
59 
desc_to_oa(struct s1_walk_info * wi,u64 desc)60 static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc)
61 {
62 	u64 addr;
63 
64 	if (!wi->pa52bit)
65 		return desc & GENMASK_ULL(47, wi->pgshift);
66 
67 	switch (BIT(wi->pgshift)) {
68 	case SZ_4K:
69 	case SZ_16K:
70 		addr = desc & GENMASK_ULL(49, wi->pgshift);
71 		addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50;
72 		break;
73 	case SZ_64K:
74 	default:	    /* IMPDEF: treat any other value as 64k */
75 		addr = desc & GENMASK_ULL(47, wi->pgshift);
76 		addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48;
77 		break;
78 	}
79 
80 	return addr;
81 }
82 
83 /* Return the translation regime that applies to an AT instruction */
compute_translation_regime(struct kvm_vcpu * vcpu,u32 op)84 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
85 {
86 	/*
87 	 * We only get here from guest EL2, so the translation
88 	 * regime AT applies to is solely defined by {E2H,TGE}.
89 	 */
90 	switch (op) {
91 	case OP_AT_S1E2R:
92 	case OP_AT_S1E2W:
93 	case OP_AT_S1E2A:
94 		return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
95 	default:
96 		return (vcpu_el2_e2h_is_set(vcpu) &&
97 			vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
98 	}
99 }
100 
effective_tcr2(struct kvm_vcpu * vcpu,enum trans_regime regime)101 static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime)
102 {
103 	if (regime == TR_EL10) {
104 		if (vcpu_has_nv(vcpu) &&
105 		    !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En))
106 			return 0;
107 
108 		return vcpu_read_sys_reg(vcpu, TCR2_EL1);
109 	}
110 
111 	return vcpu_read_sys_reg(vcpu, TCR2_EL2);
112 }
113 
s1pie_enabled(struct kvm_vcpu * vcpu,enum trans_regime regime)114 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
115 {
116 	if (!kvm_has_s1pie(vcpu->kvm))
117 		return false;
118 
119 	/* Abuse TCR2_EL1_PIE and use it for EL2 as well */
120 	return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE;
121 }
122 
compute_s1poe(struct kvm_vcpu * vcpu,struct s1_walk_info * wi)123 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
124 {
125 	u64 val;
126 
127 	if (!kvm_has_s1poe(vcpu->kvm)) {
128 		wi->poe = wi->e0poe = false;
129 		return;
130 	}
131 
132 	val = effective_tcr2(vcpu, wi->regime);
133 
134 	/* Abuse TCR2_EL1_* for EL2 */
135 	wi->poe = val & TCR2_EL1_POE;
136 	wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
137 }
138 
setup_s1_walk(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)139 static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
140 			 struct s1_walk_result *wr, u64 va)
141 {
142 	u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
143 	unsigned int stride, x;
144 	bool va55, tbi, lva;
145 
146 	va55 = va & BIT(55);
147 
148 	if (vcpu_has_nv(vcpu)) {
149 		hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
150 		wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
151 	} else {
152 		WARN_ON_ONCE(wi->regime != TR_EL10);
153 		wi->s2 = false;
154 		hcr = 0;
155 	}
156 
157 	switch (wi->regime) {
158 	case TR_EL10:
159 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL1);
160 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL1);
161 		ttbr	= (va55 ?
162 			   vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
163 			   vcpu_read_sys_reg(vcpu, TTBR0_EL1));
164 		break;
165 	case TR_EL2:
166 	case TR_EL20:
167 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL2);
168 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL2);
169 		ttbr	= (va55 ?
170 			   vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
171 			   vcpu_read_sys_reg(vcpu, TTBR0_EL2));
172 		break;
173 	default:
174 		BUG();
175 	}
176 
177 	/* Someone was silly enough to encode TG0/TG1 differently */
178 	if (va55 && wi->regime != TR_EL2) {
179 		wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
180 		tg = FIELD_GET(TCR_TG1_MASK, tcr);
181 
182 		switch (tg << TCR_TG1_SHIFT) {
183 		case TCR_TG1_4K:
184 			wi->pgshift = 12;	 break;
185 		case TCR_TG1_16K:
186 			wi->pgshift = 14;	 break;
187 		case TCR_TG1_64K:
188 		default:	    /* IMPDEF: treat any other value as 64k */
189 			wi->pgshift = 16;	 break;
190 		}
191 	} else {
192 		wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
193 		tg = FIELD_GET(TCR_TG0_MASK, tcr);
194 
195 		switch (tg << TCR_TG0_SHIFT) {
196 		case TCR_TG0_4K:
197 			wi->pgshift = 12;	 break;
198 		case TCR_TG0_16K:
199 			wi->pgshift = 14;	 break;
200 		case TCR_TG0_64K:
201 		default:	    /* IMPDEF: treat any other value as 64k */
202 			wi->pgshift = 16;	 break;
203 		}
204 	}
205 
206 	wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
207 
208 	ia_bits = get_ia_size(wi);
209 
210 	/* AArch64.S1StartLevel() */
211 	stride = wi->pgshift - 3;
212 	wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
213 
214 	if (wi->regime == TR_EL2 && va55)
215 		goto addrsz;
216 
217 	tbi = (wi->regime == TR_EL2 ?
218 	       FIELD_GET(TCR_EL2_TBI, tcr) :
219 	       (va55 ?
220 		FIELD_GET(TCR_TBI1, tcr) :
221 		FIELD_GET(TCR_TBI0, tcr)));
222 
223 	if (!tbi && (u64)sign_extend64(va, 55) != va)
224 		goto addrsz;
225 
226 	wi->sh = (wi->regime == TR_EL2 ?
227 		  FIELD_GET(TCR_EL2_SH0_MASK, tcr) :
228 		  (va55 ?
229 		   FIELD_GET(TCR_SH1_MASK, tcr) :
230 		   FIELD_GET(TCR_SH0_MASK, tcr)));
231 
232 	va = (u64)sign_extend64(va, 55);
233 
234 	/* Let's put the MMU disabled case aside immediately */
235 	switch (wi->regime) {
236 	case TR_EL10:
237 		/*
238 		 * If dealing with the EL1&0 translation regime, 3 things
239 		 * can disable the S1 translation:
240 		 *
241 		 * - HCR_EL2.DC = 1
242 		 * - HCR_EL2.{E2H,TGE} = {0,1}
243 		 * - SCTLR_EL1.M = 0
244 		 *
245 		 * The TGE part is interesting. If we have decided that this
246 		 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
247 		 * {0,x}, and we only need to test for TGE == 1.
248 		 */
249 		if (hcr & (HCR_DC | HCR_TGE)) {
250 			wr->level = S1_MMU_DISABLED;
251 			break;
252 		}
253 		fallthrough;
254 	case TR_EL2:
255 	case TR_EL20:
256 		if (!(sctlr & SCTLR_ELx_M))
257 			wr->level = S1_MMU_DISABLED;
258 		break;
259 	}
260 
261 	if (wr->level == S1_MMU_DISABLED) {
262 		if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
263 			goto addrsz;
264 
265 		wr->pa = va;
266 		return 0;
267 	}
268 
269 	wi->be = sctlr & SCTLR_ELx_EE;
270 
271 	wi->hpd  = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
272 	wi->hpd &= (wi->regime == TR_EL2 ?
273 		    FIELD_GET(TCR_EL2_HPD, tcr) :
274 		    (va55 ?
275 		     FIELD_GET(TCR_HPD1, tcr) :
276 		     FIELD_GET(TCR_HPD0, tcr)));
277 	/* R_JHSVW */
278 	wi->hpd |= s1pie_enabled(vcpu, wi->regime);
279 
280 	/* Do we have POE? */
281 	compute_s1poe(vcpu, wi);
282 
283 	/* R_BVXDG */
284 	wi->hpd |= (wi->poe || wi->e0poe);
285 
286 	/* R_PLCGL, R_YXNYW */
287 	if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
288 		if (wi->txsz > 39)
289 			goto transfault;
290 	} else {
291 		if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
292 			goto transfault;
293 	}
294 
295 	/* R_GTJBY, R_SXWGM */
296 	switch (BIT(wi->pgshift)) {
297 	case SZ_4K:
298 	case SZ_16K:
299 		lva = wi->pa52bit;
300 		break;
301 	case SZ_64K:
302 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
303 		break;
304 	}
305 
306 	if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
307 		goto transfault;
308 
309 	/* R_YYVYV, I_THCZK */
310 	if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
311 	    (va55 && va < GENMASK(63, ia_bits)))
312 		goto transfault;
313 
314 	/* I_ZFSYQ */
315 	if (wi->regime != TR_EL2 &&
316 	    (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
317 		goto transfault;
318 
319 	/* R_BNDVG and following statements */
320 	if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
321 	    wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
322 		goto transfault;
323 
324 	ps = (wi->regime == TR_EL2 ?
325 	      FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
326 
327 	wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit));
328 
329 	/* Compute minimal alignment */
330 	x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
331 
332 	wi->baddr = ttbr & TTBRx_EL1_BADDR;
333 	if (wi->pa52bit) {
334 		/*
335 		 * Force the alignment on 64 bytes for top-level tables
336 		 * smaller than 8 entries, since TTBR.BADDR[5:2] are used to
337 		 * store bits [51:48] of the first level of lookup.
338 		 */
339 		x = max(x, 6);
340 
341 		wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48;
342 	}
343 
344 	/* R_VPBBF */
345 	if (check_output_size(wi->baddr, wi))
346 		goto addrsz;
347 
348 	wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
349 
350 	wi->ha  = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HAFDBS, AF);
351 	wi->ha &= (wi->regime == TR_EL2 ?
352 		  FIELD_GET(TCR_EL2_HA, tcr) :
353 		  FIELD_GET(TCR_HA, tcr));
354 
355 	return 0;
356 
357 addrsz:
358 	/*
359 	 * Address Size Fault level 0 to indicate it comes from TTBR.
360 	 * yes, this is an oddity.
361 	 */
362 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
363 	return -EFAULT;
364 
365 transfault:
366 	/* Translation Fault on start level */
367 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false);
368 	return -EFAULT;
369 }
370 
kvm_read_s1_desc(struct kvm_vcpu * vcpu,u64 pa,u64 * desc,struct s1_walk_info * wi)371 static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
372 			    struct s1_walk_info *wi)
373 {
374 	u64 val;
375 	int r;
376 
377 	r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
378 	if (r)
379 		return r;
380 
381 	if (wi->be)
382 		*desc = be64_to_cpu((__force __be64)val);
383 	else
384 		*desc = le64_to_cpu((__force __le64)val);
385 
386 	return 0;
387 }
388 
kvm_swap_s1_desc(struct kvm_vcpu * vcpu,u64 pa,u64 old,u64 new,struct s1_walk_info * wi)389 static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new,
390 			    struct s1_walk_info *wi)
391 {
392 	if (wi->be) {
393 		old = (__force u64)cpu_to_be64(old);
394 		new = (__force u64)cpu_to_be64(new);
395 	} else {
396 		old = (__force u64)cpu_to_le64(old);
397 		new = (__force u64)cpu_to_le64(new);
398 	}
399 
400 	return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
401 }
402 
walk_s1(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)403 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
404 		   struct s1_walk_result *wr, u64 va)
405 {
406 	u64 va_top, va_bottom, baddr, desc, new_desc, ipa;
407 	struct kvm_s2_trans s2_trans = {};
408 	int level, stride, ret;
409 
410 	level = wi->sl;
411 	stride = wi->pgshift - 3;
412 	baddr = wi->baddr;
413 
414 	va_top = get_ia_size(wi) - 1;
415 
416 	while (1) {
417 		u64 index;
418 
419 		va_bottom = (3 - level) * stride + wi->pgshift;
420 		index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
421 
422 		ipa = baddr | index;
423 
424 		if (wi->s2) {
425 			ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
426 			if (ret) {
427 				fail_s1_walk(wr,
428 					     (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
429 					     true);
430 				return ret;
431 			}
432 
433 			if (!kvm_s2_trans_readable(&s2_trans)) {
434 				fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
435 					     true);
436 
437 				return -EPERM;
438 			}
439 
440 			ipa = kvm_s2_trans_output(&s2_trans);
441 		}
442 
443 		if (wi->filter) {
444 			ret = wi->filter->fn(&(struct s1_walk_context)
445 					     {
446 						     .wi	= wi,
447 						     .table_ipa	= baddr,
448 						     .level	= level,
449 					     }, wi->filter->priv);
450 			if (ret)
451 				return ret;
452 		}
453 
454 		ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi);
455 		if (ret) {
456 			fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
457 			return ret;
458 		}
459 
460 		new_desc = desc;
461 
462 		/* Invalid descriptor */
463 		if (!(desc & BIT(0)))
464 			goto transfault;
465 
466 		/* Block mapping, check validity down the line */
467 		if (!(desc & BIT(1)))
468 			break;
469 
470 		/* Page mapping */
471 		if (level == 3)
472 			break;
473 
474 		/* Table handling */
475 		if (!wi->hpd) {
476 			wr->APTable  |= FIELD_GET(S1_TABLE_AP, desc);
477 			wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
478 			wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
479 		}
480 
481 		baddr = desc_to_oa(wi, desc);
482 
483 		/* Check for out-of-range OA */
484 		if (check_output_size(baddr, wi))
485 			goto addrsz;
486 
487 		/* Prepare for next round */
488 		va_top = va_bottom - 1;
489 		level++;
490 	}
491 
492 	/* Block mapping, check the validity of the level */
493 	if (!(desc & BIT(1))) {
494 		bool valid_block = false;
495 
496 		switch (BIT(wi->pgshift)) {
497 		case SZ_4K:
498 			valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
499 			break;
500 		case SZ_16K:
501 		case SZ_64K:
502 			valid_block = level == 2 || (wi->pa52bit && level == 1);
503 			break;
504 		}
505 
506 		if (!valid_block)
507 			goto transfault;
508 	}
509 
510 	baddr = desc_to_oa(wi, desc);
511 	if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
512 		goto addrsz;
513 
514 	if (wi->ha)
515 		new_desc |= PTE_AF;
516 
517 	if (new_desc != desc) {
518 		if (wi->s2 && !kvm_s2_trans_writable(&s2_trans)) {
519 			fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), true);
520 			return -EPERM;
521 		}
522 
523 		ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
524 		if (ret)
525 			return ret;
526 
527 		desc = new_desc;
528 	}
529 
530 	if (!(desc & PTE_AF)) {
531 		fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
532 		return -EACCES;
533 	}
534 
535 	va_bottom += contiguous_bit_shift(desc, wi, level);
536 
537 	wr->failed = false;
538 	wr->level = level;
539 	wr->desc = desc;
540 	wr->pa = baddr & GENMASK(52, va_bottom);
541 	wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
542 
543 	wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
544 	if (wr->nG)
545 		wr->asid = get_asid_by_regime(vcpu, wi->regime);
546 
547 	return 0;
548 
549 addrsz:
550 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false);
551 	return -EINVAL;
552 transfault:
553 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false);
554 	return -ENOENT;
555 }
556 
557 struct mmu_config {
558 	u64	ttbr0;
559 	u64	ttbr1;
560 	u64	tcr;
561 	u64	mair;
562 	u64	tcr2;
563 	u64	pir;
564 	u64	pire0;
565 	u64	por_el0;
566 	u64	por_el1;
567 	u64	sctlr;
568 	u64	vttbr;
569 	u64	vtcr;
570 };
571 
__mmu_config_save(struct mmu_config * config)572 static void __mmu_config_save(struct mmu_config *config)
573 {
574 	config->ttbr0	= read_sysreg_el1(SYS_TTBR0);
575 	config->ttbr1	= read_sysreg_el1(SYS_TTBR1);
576 	config->tcr	= read_sysreg_el1(SYS_TCR);
577 	config->mair	= read_sysreg_el1(SYS_MAIR);
578 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
579 		config->tcr2	= read_sysreg_el1(SYS_TCR2);
580 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
581 			config->pir	= read_sysreg_el1(SYS_PIR);
582 			config->pire0	= read_sysreg_el1(SYS_PIRE0);
583 		}
584 		if (system_supports_poe()) {
585 			config->por_el1	= read_sysreg_el1(SYS_POR);
586 			config->por_el0	= read_sysreg_s(SYS_POR_EL0);
587 		}
588 	}
589 	config->sctlr	= read_sysreg_el1(SYS_SCTLR);
590 	config->vttbr	= read_sysreg(vttbr_el2);
591 	config->vtcr	= read_sysreg(vtcr_el2);
592 }
593 
__mmu_config_restore(struct mmu_config * config)594 static void __mmu_config_restore(struct mmu_config *config)
595 {
596 	/*
597 	 * ARM errata 1165522 and 1530923 require TGE to be 1 before
598 	 * we update the guest state.
599 	 */
600 	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
601 
602 	write_sysreg_el1(config->ttbr0,	SYS_TTBR0);
603 	write_sysreg_el1(config->ttbr1,	SYS_TTBR1);
604 	write_sysreg_el1(config->tcr,	SYS_TCR);
605 	write_sysreg_el1(config->mair,	SYS_MAIR);
606 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
607 		write_sysreg_el1(config->tcr2, SYS_TCR2);
608 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
609 			write_sysreg_el1(config->pir, SYS_PIR);
610 			write_sysreg_el1(config->pire0, SYS_PIRE0);
611 		}
612 		if (system_supports_poe()) {
613 			write_sysreg_el1(config->por_el1, SYS_POR);
614 			write_sysreg_s(config->por_el0, SYS_POR_EL0);
615 		}
616 	}
617 	write_sysreg_el1(config->sctlr,	SYS_SCTLR);
618 	write_sysreg(config->vttbr,	vttbr_el2);
619 	write_sysreg(config->vtcr,	vtcr_el2);
620 }
621 
at_s1e1p_fast(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)622 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
623 {
624 	u64 host_pan;
625 	bool fail;
626 
627 	host_pan = read_sysreg_s(SYS_PSTATE_PAN);
628 	write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
629 
630 	switch (op) {
631 	case OP_AT_S1E1RP:
632 		fail = __kvm_at(OP_AT_S1E1RP, vaddr);
633 		break;
634 	case OP_AT_S1E1WP:
635 		fail = __kvm_at(OP_AT_S1E1WP, vaddr);
636 		break;
637 	}
638 
639 	write_sysreg_s(host_pan, SYS_PSTATE_PAN);
640 
641 	return fail;
642 }
643 
644 #define MEMATTR(ic, oc)		(MEMATTR_##oc << 4 | MEMATTR_##ic)
645 #define MEMATTR_NC		0b0100
646 #define MEMATTR_Wt		0b1000
647 #define MEMATTR_Wb		0b1100
648 #define MEMATTR_WbRaWa		0b1111
649 
650 #define MEMATTR_IS_DEVICE(m)	(((m) & GENMASK(7, 4)) == 0)
651 
s2_memattr_to_attr(u8 memattr)652 static u8 s2_memattr_to_attr(u8 memattr)
653 {
654 	memattr &= 0b1111;
655 
656 	switch (memattr) {
657 	case 0b0000:
658 	case 0b0001:
659 	case 0b0010:
660 	case 0b0011:
661 		return memattr << 2;
662 	case 0b0100:
663 		return MEMATTR(Wb, Wb);
664 	case 0b0101:
665 		return MEMATTR(NC, NC);
666 	case 0b0110:
667 		return MEMATTR(Wt, NC);
668 	case 0b0111:
669 		return MEMATTR(Wb, NC);
670 	case 0b1000:
671 		/* Reserved, assume NC */
672 		return MEMATTR(NC, NC);
673 	case 0b1001:
674 		return MEMATTR(NC, Wt);
675 	case 0b1010:
676 		return MEMATTR(Wt, Wt);
677 	case 0b1011:
678 		return MEMATTR(Wb, Wt);
679 	case 0b1100:
680 		/* Reserved, assume NC */
681 		return MEMATTR(NC, NC);
682 	case 0b1101:
683 		return MEMATTR(NC, Wb);
684 	case 0b1110:
685 		return MEMATTR(Wt, Wb);
686 	case 0b1111:
687 		return MEMATTR(Wb, Wb);
688 	default:
689 		unreachable();
690 	}
691 }
692 
combine_s1_s2_attr(u8 s1,u8 s2)693 static u8 combine_s1_s2_attr(u8 s1, u8 s2)
694 {
695 	bool transient;
696 	u8 final = 0;
697 
698 	/* Upgrade transient s1 to non-transient to simplify things */
699 	switch (s1) {
700 	case 0b0001 ... 0b0011:	/* Normal, Write-Through Transient */
701 		transient = true;
702 		s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
703 		break;
704 	case 0b0101 ... 0b0111:	/* Normal, Write-Back Transient */
705 		transient = true;
706 		s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
707 		break;
708 	default:
709 		transient = false;
710 	}
711 
712 	/* S2CombineS1AttrHints() */
713 	if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
714 	    (s2 & GENMASK(3, 2)) == MEMATTR_NC)
715 		final = MEMATTR_NC;
716 	else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
717 		 (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
718 		final = MEMATTR_Wt;
719 	else
720 		final = MEMATTR_Wb;
721 
722 	if (final != MEMATTR_NC) {
723 		/* Inherit RaWa hints form S1 */
724 		if (transient) {
725 			switch (s1 & GENMASK(3, 2)) {
726 			case MEMATTR_Wt:
727 				final = 0;
728 				break;
729 			case MEMATTR_Wb:
730 				final = MEMATTR_NC;
731 				break;
732 			}
733 		}
734 
735 		final |= s1 & GENMASK(1, 0);
736 	}
737 
738 	return final;
739 }
740 
741 #define ATTR_NSH	0b00
742 #define ATTR_RSV	0b01
743 #define ATTR_OSH	0b10
744 #define ATTR_ISH	0b11
745 
compute_final_sh(u8 attr,u8 sh)746 static u8 compute_final_sh(u8 attr, u8 sh)
747 {
748 	/* Any form of device, as well as NC has SH[1:0]=0b10 */
749 	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
750 		return ATTR_OSH;
751 
752 	if (sh == ATTR_RSV)		/* Reserved, mapped to NSH */
753 		sh = ATTR_NSH;
754 
755 	return sh;
756 }
757 
compute_s1_sh(struct s1_walk_info * wi,struct s1_walk_result * wr,u8 attr)758 static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr,
759 			u8 attr)
760 {
761 	u8 sh;
762 
763 	/*
764 	 * non-52bit and LPA have their basic shareability described in the
765 	 * descriptor. LPA2 gets it from the corresponding field in TCR,
766 	 * conveniently recorded in the walk info.
767 	 */
768 	if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K)
769 		sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc);
770 	else
771 		sh = wi->sh;
772 
773 	return compute_final_sh(attr, sh);
774 }
775 
combine_sh(u8 s1_sh,u8 s2_sh)776 static u8 combine_sh(u8 s1_sh, u8 s2_sh)
777 {
778 	if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
779 		return ATTR_OSH;
780 	if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
781 		return ATTR_ISH;
782 
783 	return ATTR_NSH;
784 }
785 
compute_par_s12(struct kvm_vcpu * vcpu,u64 s1_par,struct kvm_s2_trans * tr)786 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
787 			   struct kvm_s2_trans *tr)
788 {
789 	u8 s1_parattr, s2_memattr, final_attr, s2_sh;
790 	u64 par;
791 
792 	/* If S2 has failed to translate, report the damage */
793 	if (tr->esr) {
794 		par = SYS_PAR_EL1_RES1;
795 		par |= SYS_PAR_EL1_F;
796 		par |= SYS_PAR_EL1_S;
797 		par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
798 		return par;
799 	}
800 
801 	s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
802 	s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
803 
804 	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
805 		if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
806 			s2_memattr &= ~BIT(3);
807 
808 		/* Combination of R_VRJSW and R_RHWZM */
809 		switch (s2_memattr) {
810 		case 0b0101:
811 			if (MEMATTR_IS_DEVICE(s1_parattr))
812 				final_attr = s1_parattr;
813 			else
814 				final_attr = MEMATTR(NC, NC);
815 			break;
816 		case 0b0110:
817 		case 0b1110:
818 			final_attr = MEMATTR(WbRaWa, WbRaWa);
819 			break;
820 		case 0b0111:
821 		case 0b1111:
822 			/* Preserve S1 attribute */
823 			final_attr = s1_parattr;
824 			break;
825 		case 0b0100:
826 		case 0b1100:
827 		case 0b1101:
828 			/* Reserved, do something non-silly */
829 			final_attr = s1_parattr;
830 			break;
831 		default:
832 			/*
833 			 * MemAttr[2]=0, Device from S2.
834 			 *
835 			 * FWB does not influence the way that stage 1
836 			 * memory types and attributes are combined
837 			 * with stage 2 Device type and attributes.
838 			 */
839 			final_attr = min(s2_memattr_to_attr(s2_memattr),
840 					 s1_parattr);
841 		}
842 	} else {
843 		/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
844 		u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
845 
846 		if (MEMATTR_IS_DEVICE(s1_parattr) ||
847 		    MEMATTR_IS_DEVICE(s2_parattr)) {
848 			final_attr = min(s1_parattr, s2_parattr);
849 		} else {
850 			/* At this stage, this is memory vs memory */
851 			final_attr  = combine_s1_s2_attr(s1_parattr & 0xf,
852 							 s2_parattr & 0xf);
853 			final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
854 							 s2_parattr >> 4) << 4;
855 		}
856 	}
857 
858 	if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
859 	    !MEMATTR_IS_DEVICE(final_attr))
860 		final_attr = MEMATTR(NC, NC);
861 
862 	s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc);
863 
864 	par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
865 	par |= tr->output & GENMASK(47, 12);
866 	par |= FIELD_PREP(SYS_PAR_EL1_SH,
867 			  combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
868 				     compute_final_sh(final_attr, s2_sh)));
869 
870 	return par;
871 }
872 
compute_par_s1(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)873 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
874 			  struct s1_walk_result *wr)
875 {
876 	u64 par;
877 
878 	if (wr->failed) {
879 		par = SYS_PAR_EL1_RES1;
880 		par |= SYS_PAR_EL1_F;
881 		par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
882 		par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
883 		par |= wr->s2 ? SYS_PAR_EL1_S : 0;
884 	} else if (wr->level == S1_MMU_DISABLED) {
885 		/* MMU off or HCR_EL2.DC == 1 */
886 		par  = SYS_PAR_EL1_NSE;
887 		par |= wr->pa & SYS_PAR_EL1_PA;
888 
889 		if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) &&
890 		    (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
891 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
892 					  MEMATTR(WbRaWa, WbRaWa));
893 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
894 		} else {
895 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
896 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
897 		}
898 	} else {
899 		u64 mair, sctlr;
900 		u8 sh;
901 
902 		par  = SYS_PAR_EL1_NSE;
903 
904 		mair = (wi->regime == TR_EL10 ?
905 			vcpu_read_sys_reg(vcpu, MAIR_EL1) :
906 			vcpu_read_sys_reg(vcpu, MAIR_EL2));
907 
908 		mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
909 		mair &= 0xff;
910 
911 		sctlr = (wi->regime == TR_EL10 ?
912 			 vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
913 			 vcpu_read_sys_reg(vcpu, SCTLR_EL2));
914 
915 		/* Force NC for memory if SCTLR_ELx.C is clear */
916 		if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
917 			mair = MEMATTR(NC, NC);
918 
919 		par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
920 		par |= wr->pa & SYS_PAR_EL1_PA;
921 
922 		sh = compute_s1_sh(wi, wr, mair);
923 		par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
924 	}
925 
926 	return par;
927 }
928 
pan3_enabled(struct kvm_vcpu * vcpu,enum trans_regime regime)929 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
930 {
931 	u64 sctlr;
932 
933 	if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
934 		return false;
935 
936 	if (s1pie_enabled(vcpu, regime))
937 		return true;
938 
939 	if (regime == TR_EL10)
940 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
941 	else
942 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
943 
944 	return sctlr & SCTLR_EL1_EPAN;
945 }
946 
compute_s1_direct_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)947 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
948 					  struct s1_walk_info *wi,
949 					  struct s1_walk_result *wr)
950 {
951 	bool wxn;
952 
953 	/* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
954 	if (wi->regime != TR_EL2) {
955 		switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
956 		case 0b00:
957 			wr->pr = wr->pw = true;
958 			wr->ur = wr->uw = false;
959 			break;
960 		case 0b01:
961 			wr->pr = wr->pw = wr->ur = wr->uw = true;
962 			break;
963 		case 0b10:
964 			wr->pr = true;
965 			wr->pw = wr->ur = wr->uw = false;
966 			break;
967 		case 0b11:
968 			wr->pr = wr->ur = true;
969 			wr->pw = wr->uw = false;
970 			break;
971 		}
972 
973 		/* We don't use px for anything yet, but hey... */
974 		wr->px = !((wr->desc & PTE_PXN) || wr->uw);
975 		wr->ux = !(wr->desc & PTE_UXN);
976 	} else {
977 		wr->ur = wr->uw = wr->ux = false;
978 
979 		if (!(wr->desc & PTE_RDONLY)) {
980 			wr->pr = wr->pw = true;
981 		} else {
982 			wr->pr = true;
983 			wr->pw = false;
984 		}
985 
986 		/* XN maps to UXN */
987 		wr->px = !(wr->desc & PTE_UXN);
988 	}
989 
990 	switch (wi->regime) {
991 	case TR_EL2:
992 	case TR_EL20:
993 		wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
994 		break;
995 	case TR_EL10:
996 		wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
997 		break;
998 	}
999 
1000 	wr->pwxn = wr->uwxn = wxn;
1001 	wr->pov = wi->poe;
1002 	wr->uov = wi->e0poe;
1003 }
1004 
compute_s1_hierarchical_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1005 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
1006 						struct s1_walk_info *wi,
1007 						struct s1_walk_result *wr)
1008 {
1009 	/* Hierarchical part of AArch64.S1DirectBasePermissions() */
1010 	if (wi->regime != TR_EL2) {
1011 		switch (wr->APTable) {
1012 		case 0b00:
1013 			break;
1014 		case 0b01:
1015 			wr->ur = wr->uw = false;
1016 			break;
1017 		case 0b10:
1018 			wr->pw = wr->uw = false;
1019 			break;
1020 		case 0b11:
1021 			wr->pw = wr->ur = wr->uw = false;
1022 			break;
1023 		}
1024 
1025 		wr->px &= !wr->PXNTable;
1026 		wr->ux &= !wr->UXNTable;
1027 	} else {
1028 		if (wr->APTable & BIT(1))
1029 			wr->pw = false;
1030 
1031 		/* XN maps to UXN */
1032 		wr->px &= !wr->UXNTable;
1033 	}
1034 }
1035 
1036 #define perm_idx(v, r, i)	((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
1037 
1038 #define set_priv_perms(wr, r, w, x)	\
1039 	do {				\
1040 		(wr)->pr = (r);		\
1041 		(wr)->pw = (w);		\
1042 		(wr)->px = (x);		\
1043 	} while (0)
1044 
1045 #define set_unpriv_perms(wr, r, w, x)	\
1046 	do {				\
1047 		(wr)->ur = (r);		\
1048 		(wr)->uw = (w);		\
1049 		(wr)->ux = (x);		\
1050 	} while (0)
1051 
1052 #define set_priv_wxn(wr, v)		\
1053 	do {				\
1054 		(wr)->pwxn = (v);	\
1055 	} while (0)
1056 
1057 #define set_unpriv_wxn(wr, v)		\
1058 	do {				\
1059 		(wr)->uwxn = (v);	\
1060 	} while (0)
1061 
1062 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS  */
1063 #define set_perms(w, wr, ip)						\
1064 	do {								\
1065 		/* R_LLZDZ */						\
1066 		switch ((ip)) {						\
1067 		case 0b0000:						\
1068 			set_ ## w ## _perms((wr), false, false, false);	\
1069 			break;						\
1070 		case 0b0001:						\
1071 			set_ ## w ## _perms((wr), true , false, false);	\
1072 			break;						\
1073 		case 0b0010:						\
1074 			set_ ## w ## _perms((wr), false, false, true );	\
1075 			break;						\
1076 		case 0b0011:						\
1077 			set_ ## w ## _perms((wr), true , false, true );	\
1078 			break;						\
1079 		case 0b0100:						\
1080 			set_ ## w ## _perms((wr), false, false, false);	\
1081 			break;						\
1082 		case 0b0101:						\
1083 			set_ ## w ## _perms((wr), true , true , false);	\
1084 			break;						\
1085 		case 0b0110:						\
1086 			set_ ## w ## _perms((wr), true , true , true );	\
1087 			break;						\
1088 		case 0b0111:						\
1089 			set_ ## w ## _perms((wr), true , true , true );	\
1090 			break;						\
1091 		case 0b1000:						\
1092 			set_ ## w ## _perms((wr), true , false, false);	\
1093 			break;						\
1094 		case 0b1001:						\
1095 			set_ ## w ## _perms((wr), true , false, false);	\
1096 			break;						\
1097 		case 0b1010:						\
1098 			set_ ## w ## _perms((wr), true , false, true );	\
1099 			break;						\
1100 		case 0b1011:						\
1101 			set_ ## w ## _perms((wr), false, false, false);	\
1102 			break;						\
1103 		case 0b1100:						\
1104 			set_ ## w ## _perms((wr), true , true , false);	\
1105 			break;						\
1106 		case 0b1101:						\
1107 			set_ ## w ## _perms((wr), false, false, false);	\
1108 			break;						\
1109 		case 0b1110:						\
1110 			set_ ## w ## _perms((wr), true , true , true );	\
1111 			break;						\
1112 		case 0b1111:						\
1113 			set_ ## w ## _perms((wr), false, false, false);	\
1114 			break;						\
1115 		}							\
1116 									\
1117 		/* R_HJYGR */						\
1118 		set_ ## w ## _wxn((wr), ((ip) == 0b0110));		\
1119 									\
1120 	} while (0)
1121 
compute_s1_indirect_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1122 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1123 					    struct s1_walk_info *wi,
1124 					    struct s1_walk_result *wr)
1125 {
1126 	u8 up, pp, idx;
1127 
1128 	idx = pte_pi_index(wr->desc);
1129 
1130 	switch (wi->regime) {
1131 	case TR_EL10:
1132 		pp = perm_idx(vcpu, PIR_EL1, idx);
1133 		up = perm_idx(vcpu, PIRE0_EL1, idx);
1134 		break;
1135 	case TR_EL20:
1136 		pp = perm_idx(vcpu, PIR_EL2, idx);
1137 		up = perm_idx(vcpu, PIRE0_EL2, idx);
1138 		break;
1139 	case TR_EL2:
1140 		pp = perm_idx(vcpu, PIR_EL2, idx);
1141 		up = 0;
1142 		break;
1143 	}
1144 
1145 	set_perms(priv, wr, pp);
1146 
1147 	if (wi->regime != TR_EL2)
1148 		set_perms(unpriv, wr, up);
1149 	else
1150 		set_unpriv_perms(wr, false, false, false);
1151 
1152 	wr->pov = wi->poe && !(pp & BIT(3));
1153 	wr->uov = wi->e0poe && !(up & BIT(3));
1154 
1155 	/* R_VFPJF */
1156 	if (wr->px && wr->uw) {
1157 		set_priv_perms(wr, false, false, false);
1158 		set_unpriv_perms(wr, false, false, false);
1159 	}
1160 }
1161 
compute_s1_overlay_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1162 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1163 					   struct s1_walk_info *wi,
1164 					   struct s1_walk_result *wr)
1165 {
1166 	u8 idx, pov_perms, uov_perms;
1167 
1168 	idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1169 
1170 	if (wr->pov) {
1171 		switch (wi->regime) {
1172 		case TR_EL10:
1173 			pov_perms = perm_idx(vcpu, POR_EL1, idx);
1174 			break;
1175 		case TR_EL20:
1176 			pov_perms = perm_idx(vcpu, POR_EL2, idx);
1177 			break;
1178 		case TR_EL2:
1179 			pov_perms = perm_idx(vcpu, POR_EL2, idx);
1180 			break;
1181 		}
1182 
1183 		if (pov_perms & ~POE_RWX)
1184 			pov_perms = POE_NONE;
1185 
1186 		/* R_QXXPC, S1PrivOverflow enabled */
1187 		if (wr->pwxn && (pov_perms & POE_X))
1188 			pov_perms &= ~POE_W;
1189 
1190 		wr->pr &= pov_perms & POE_R;
1191 		wr->pw &= pov_perms & POE_W;
1192 		wr->px &= pov_perms & POE_X;
1193 	}
1194 
1195 	if (wr->uov) {
1196 		switch (wi->regime) {
1197 		case TR_EL10:
1198 			uov_perms = perm_idx(vcpu, POR_EL0, idx);
1199 			break;
1200 		case TR_EL20:
1201 			uov_perms = perm_idx(vcpu, POR_EL0, idx);
1202 			break;
1203 		case TR_EL2:
1204 			uov_perms = 0;
1205 			break;
1206 		}
1207 
1208 		if (uov_perms & ~POE_RWX)
1209 			uov_perms = POE_NONE;
1210 
1211 		/* R_NPBXC, S1UnprivOverlay enabled */
1212 		if (wr->uwxn && (uov_perms & POE_X))
1213 			uov_perms &= ~POE_W;
1214 
1215 		wr->ur &= uov_perms & POE_R;
1216 		wr->uw &= uov_perms & POE_W;
1217 		wr->ux &= uov_perms & POE_X;
1218 	}
1219 }
1220 
compute_s1_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1221 static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1222 				   struct s1_walk_info *wi,
1223 				   struct s1_walk_result *wr)
1224 {
1225 	bool pan;
1226 
1227 	if (!s1pie_enabled(vcpu, wi->regime))
1228 		compute_s1_direct_permissions(vcpu, wi, wr);
1229 	else
1230 		compute_s1_indirect_permissions(vcpu, wi, wr);
1231 
1232 	if (!wi->hpd)
1233 		compute_s1_hierarchical_permissions(vcpu, wi, wr);
1234 
1235 	compute_s1_overlay_permissions(vcpu, wi, wr);
1236 
1237 	/* R_QXXPC, S1PrivOverlay disabled */
1238 	if (!wr->pov)
1239 		wr->px &= !(wr->pwxn && wr->pw);
1240 
1241 	/* R_NPBXC, S1UnprivOverlay disabled */
1242 	if (!wr->uov)
1243 		wr->ux &= !(wr->uwxn && wr->uw);
1244 
1245 	pan = wi->pan && (wr->ur || wr->uw ||
1246 			  (pan3_enabled(vcpu, wi->regime) && wr->ux));
1247 	wr->pw &= !pan;
1248 	wr->pr &= !pan;
1249 }
1250 
handle_at_slow(struct kvm_vcpu * vcpu,u32 op,u64 vaddr,u64 * par)1251 static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par)
1252 {
1253 	struct s1_walk_result wr = {};
1254 	struct s1_walk_info wi = {};
1255 	bool perm_fail = false;
1256 	int ret, idx;
1257 
1258 	wi.regime = compute_translation_regime(vcpu, op);
1259 	wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
1260 	wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
1261 		 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
1262 
1263 	ret = setup_s1_walk(vcpu, &wi, &wr, vaddr);
1264 	if (ret)
1265 		goto compute_par;
1266 
1267 	if (wr.level == S1_MMU_DISABLED)
1268 		goto compute_par;
1269 
1270 	idx = srcu_read_lock(&vcpu->kvm->srcu);
1271 
1272 	ret = walk_s1(vcpu, &wi, &wr, vaddr);
1273 
1274 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
1275 
1276 	/*
1277 	 * Race to update a descriptor -- restart the walk.
1278 	 */
1279 	if (ret == -EAGAIN)
1280 		return ret;
1281 	if (ret)
1282 		goto compute_par;
1283 
1284 	compute_s1_permissions(vcpu, &wi, &wr);
1285 
1286 	switch (op) {
1287 	case OP_AT_S1E1RP:
1288 	case OP_AT_S1E1R:
1289 	case OP_AT_S1E2R:
1290 		perm_fail = !wr.pr;
1291 		break;
1292 	case OP_AT_S1E1WP:
1293 	case OP_AT_S1E1W:
1294 	case OP_AT_S1E2W:
1295 		perm_fail = !wr.pw;
1296 		break;
1297 	case OP_AT_S1E0R:
1298 		perm_fail = !wr.ur;
1299 		break;
1300 	case OP_AT_S1E0W:
1301 		perm_fail = !wr.uw;
1302 		break;
1303 	case OP_AT_S1E1A:
1304 	case OP_AT_S1E2A:
1305 		break;
1306 	default:
1307 		BUG();
1308 	}
1309 
1310 	if (perm_fail)
1311 		fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
1312 
1313 compute_par:
1314 	*par = compute_par_s1(vcpu, &wi, &wr);
1315 	return 0;
1316 }
1317 
1318 /*
1319  * Return the PAR_EL1 value as the result of a valid translation.
1320  *
1321  * If the translation is unsuccessful, the value may only contain
1322  * PAR_EL1.F, and cannot be taken at face value. It isn't an
1323  * indication of the translation having failed, only that the fast
1324  * path did not succeed, *unless* it indicates a S1 permission or
1325  * access fault.
1326  */
__kvm_at_s1e01_fast(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1327 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1328 {
1329 	struct mmu_config config;
1330 	struct kvm_s2_mmu *mmu;
1331 	bool fail, mmu_cs;
1332 	u64 par;
1333 
1334 	par = SYS_PAR_EL1_F;
1335 
1336 	/*
1337 	 * We've trapped, so everything is live on the CPU. As we will
1338 	 * be switching contexts behind everybody's back, disable
1339 	 * interrupts while holding the mmu lock.
1340 	 */
1341 	guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1342 
1343 	/*
1344 	 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1345 	 * the right one (as we trapped from vEL2). If not, save the
1346 	 * full MMU context.
1347 	 *
1348 	 * We are also guaranteed to be in the correct context if
1349 	 * we're not in a nested VM.
1350 	 */
1351 	mmu_cs = (vcpu_has_nv(vcpu) &&
1352 		  !(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)));
1353 	if (!mmu_cs)
1354 		goto skip_mmu_switch;
1355 
1356 	/*
1357 	 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1358 	 * find it (recycled by another vcpu, for example). When this
1359 	 * happens, admit defeat immediately and use the SW (slow) path.
1360 	 */
1361 	mmu = lookup_s2_mmu(vcpu);
1362 	if (!mmu)
1363 		return par;
1364 
1365 	__mmu_config_save(&config);
1366 
1367 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1),	SYS_TTBR0);
1368 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1),	SYS_TTBR1);
1369 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1),	SYS_TCR);
1370 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1),	SYS_MAIR);
1371 	if (kvm_has_tcr2(vcpu->kvm)) {
1372 		write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1373 		if (kvm_has_s1pie(vcpu->kvm)) {
1374 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1375 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1376 		}
1377 		if (kvm_has_s1poe(vcpu->kvm)) {
1378 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1379 			write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1380 		}
1381 	}
1382 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1),	SYS_SCTLR);
1383 	__load_stage2(mmu, mmu->arch);
1384 
1385 skip_mmu_switch:
1386 	/* Temporarily switch back to guest context */
1387 	write_sysreg_hcr(vcpu->arch.hcr_el2);
1388 	isb();
1389 
1390 	switch (op) {
1391 	case OP_AT_S1E1RP:
1392 	case OP_AT_S1E1WP:
1393 		fail = at_s1e1p_fast(vcpu, op, vaddr);
1394 		break;
1395 	case OP_AT_S1E1R:
1396 		fail = __kvm_at(OP_AT_S1E1R, vaddr);
1397 		break;
1398 	case OP_AT_S1E1W:
1399 		fail = __kvm_at(OP_AT_S1E1W, vaddr);
1400 		break;
1401 	case OP_AT_S1E0R:
1402 		fail = __kvm_at(OP_AT_S1E0R, vaddr);
1403 		break;
1404 	case OP_AT_S1E0W:
1405 		fail = __kvm_at(OP_AT_S1E0W, vaddr);
1406 		break;
1407 	case OP_AT_S1E1A:
1408 		fail = __kvm_at(OP_AT_S1E1A, vaddr);
1409 		break;
1410 	default:
1411 		WARN_ON_ONCE(1);
1412 		fail = true;
1413 		break;
1414 	}
1415 
1416 	if (!fail)
1417 		par = read_sysreg_par();
1418 
1419 	write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
1420 
1421 	if (mmu_cs)
1422 		__mmu_config_restore(&config);
1423 
1424 	return par;
1425 }
1426 
par_check_s1_perm_fault(u64 par)1427 static bool par_check_s1_perm_fault(u64 par)
1428 {
1429 	u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1430 
1431 	return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1432 		 !(par & SYS_PAR_EL1_S));
1433 }
1434 
par_check_s1_access_fault(u64 par)1435 static bool par_check_s1_access_fault(u64 par)
1436 {
1437 	u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1438 
1439 	return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS &&
1440 		 !(par & SYS_PAR_EL1_S));
1441 }
1442 
__kvm_at_s1e01(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1443 int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1444 {
1445 	u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1446 	int ret;
1447 
1448 	/*
1449 	 * If PAR_EL1 reports that AT failed on a S1 permission or access
1450 	 * fault, we know for sure that the PTW was able to walk the S1
1451 	 * tables and there's nothing else to do.
1452 	 *
1453 	 * If AT failed for any other reason, then we must walk the guest S1
1454 	 * to emulate the instruction.
1455 	 */
1456 	if ((par & SYS_PAR_EL1_F) &&
1457 	    !par_check_s1_perm_fault(par) &&
1458 	    !par_check_s1_access_fault(par)) {
1459 		ret = handle_at_slow(vcpu, op, vaddr, &par);
1460 		if (ret)
1461 			return ret;
1462 	}
1463 
1464 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1465 	return 0;
1466 }
1467 
__kvm_at_s1e2(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1468 int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1469 {
1470 	u64 par;
1471 	int ret;
1472 
1473 	/*
1474 	 * We've trapped, so everything is live on the CPU. As we will be
1475 	 * switching context behind everybody's back, disable interrupts...
1476 	 */
1477 	scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1478 		u64 val, hcr;
1479 		bool fail;
1480 
1481 		val = hcr = read_sysreg(hcr_el2);
1482 		val &= ~HCR_TGE;
1483 		val |= HCR_VM;
1484 
1485 		if (!vcpu_el2_e2h_is_set(vcpu))
1486 			val |= HCR_NV | HCR_NV1;
1487 
1488 		write_sysreg_hcr(val);
1489 		isb();
1490 
1491 		par = SYS_PAR_EL1_F;
1492 
1493 		switch (op) {
1494 		case OP_AT_S1E2R:
1495 			fail = __kvm_at(OP_AT_S1E1R, vaddr);
1496 			break;
1497 		case OP_AT_S1E2W:
1498 			fail = __kvm_at(OP_AT_S1E1W, vaddr);
1499 			break;
1500 		case OP_AT_S1E2A:
1501 			fail = __kvm_at(OP_AT_S1E1A, vaddr);
1502 			break;
1503 		default:
1504 			WARN_ON_ONCE(1);
1505 			fail = true;
1506 		}
1507 
1508 		if (!fail)
1509 			par = read_sysreg_par();
1510 
1511 		write_sysreg_hcr(hcr);
1512 		isb();
1513 	}
1514 
1515 	/* We failed the translation, let's replay it in slow motion */
1516 	if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
1517 		ret = handle_at_slow(vcpu, op, vaddr, &par);
1518 		if (ret)
1519 			return ret;
1520 	}
1521 
1522 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1523 	return 0;
1524 }
1525 
__kvm_at_s12(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1526 int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1527 {
1528 	struct kvm_s2_trans out = {};
1529 	u64 ipa, par;
1530 	bool write;
1531 	int ret;
1532 
1533 	/* Do the stage-1 translation */
1534 	switch (op) {
1535 	case OP_AT_S12E1R:
1536 		op = OP_AT_S1E1R;
1537 		write = false;
1538 		break;
1539 	case OP_AT_S12E1W:
1540 		op = OP_AT_S1E1W;
1541 		write = true;
1542 		break;
1543 	case OP_AT_S12E0R:
1544 		op = OP_AT_S1E0R;
1545 		write = false;
1546 		break;
1547 	case OP_AT_S12E0W:
1548 		op = OP_AT_S1E0W;
1549 		write = true;
1550 		break;
1551 	default:
1552 		WARN_ON_ONCE(1);
1553 		return 0;
1554 	}
1555 
1556 	__kvm_at_s1e01(vcpu, op, vaddr);
1557 	par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1558 	if (par & SYS_PAR_EL1_F)
1559 		return 0;
1560 
1561 	/*
1562 	 * If we only have a single stage of translation (EL2&0), exit
1563 	 * early. Same thing if {VM,DC}=={0,0}.
1564 	 */
1565 	if (compute_translation_regime(vcpu, op) == TR_EL20 ||
1566 	    !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1567 		return 0;
1568 
1569 	/* Do the stage-2 translation */
1570 	ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1571 	out.esr = 0;
1572 	ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1573 	if (ret < 0)
1574 		return ret;
1575 
1576 	/* Check the access permission */
1577 	if (!out.esr &&
1578 	    ((!write && !out.readable) || (write && !out.writable)))
1579 		out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1580 
1581 	par = compute_par_s12(vcpu, par, &out);
1582 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1583 	return 0;
1584 }
1585 
1586 /*
1587  * Translate a VA for a given EL in a given translation regime, with
1588  * or without PAN. This requires wi->{regime, as_el0, pan} to be
1589  * set. The rest of the wi and wr should be 0-initialised.
1590  */
__kvm_translate_va(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)1591 int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
1592 		       struct s1_walk_result *wr, u64 va)
1593 {
1594 	int ret;
1595 
1596 	ret = setup_s1_walk(vcpu, wi, wr, va);
1597 	if (ret)
1598 		return ret;
1599 
1600 	if (wr->level == S1_MMU_DISABLED) {
1601 		wr->ur = wr->uw = wr->ux = true;
1602 		wr->pr = wr->pw = wr->px = true;
1603 	} else {
1604 		ret = walk_s1(vcpu, wi, wr, va);
1605 		if (ret)
1606 			return ret;
1607 
1608 		compute_s1_permissions(vcpu, wi, wr);
1609 	}
1610 
1611 	return 0;
1612 }
1613 
1614 struct desc_match {
1615 	u64	ipa;
1616 	int	level;
1617 };
1618 
match_s1_desc(struct s1_walk_context * ctxt,void * priv)1619 static int match_s1_desc(struct s1_walk_context *ctxt, void *priv)
1620 {
1621 	struct desc_match *dm = priv;
1622 	u64 ipa = dm->ipa;
1623 
1624 	/* Use S1 granule alignment */
1625 	ipa &= GENMASK(51, ctxt->wi->pgshift);
1626 
1627 	/* Not the IPA we're looking for? Continue. */
1628 	if (ipa != ctxt->table_ipa)
1629 		return 0;
1630 
1631 	/* Note the level and interrupt the walk */
1632 	dm->level = ctxt->level;
1633 	return -EINTR;
1634 }
1635 
__kvm_find_s1_desc_level(struct kvm_vcpu * vcpu,u64 va,u64 ipa,int * level)1636 int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
1637 {
1638 	struct desc_match dm = {
1639 		.ipa	= ipa,
1640 	};
1641 	struct s1_walk_info wi = {
1642 		.filter	= &(struct s1_walk_filter){
1643 			.fn	= match_s1_desc,
1644 			.priv	= &dm,
1645 		},
1646 		.as_el0	= false,
1647 		.pan	= false,
1648 	};
1649 	struct s1_walk_result wr = {};
1650 	int ret;
1651 
1652 	if (is_hyp_ctxt(vcpu))
1653 		wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
1654 	else
1655 		wi.regime = TR_EL10;
1656 
1657 	ret = setup_s1_walk(vcpu, &wi, &wr, va);
1658 	if (ret)
1659 		return ret;
1660 
1661 	/* We really expect the S1 MMU to be on here... */
1662 	if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) {
1663 		*level = 0;
1664 		return 0;
1665 	}
1666 
1667 	/* Walk the guest's PT, looking for a match along the way */
1668 	ret = walk_s1(vcpu, &wi, &wr, va);
1669 	switch (ret) {
1670 	case -EINTR:
1671 		/* We interrupted the walk on a match, return the level */
1672 		*level = dm.level;
1673 		return 0;
1674 	case 0:
1675 		/* The walk completed, we failed to find the entry */
1676 		return -ENOENT;
1677 	default:
1678 		/* Any other error... */
1679 		return ret;
1680 	}
1681 }
1682 
__lsui_swap_desc(u64 __user * ptep,u64 old,u64 new)1683 static int __lsui_swap_desc(u64 __user *ptep, u64 old, u64 new)
1684 {
1685 	u64 tmp = old;
1686 	int ret = 0;
1687 
1688 	/*
1689 	 * Wrap LSUI instructions with uaccess_ttbr0_enable()/disable(),
1690 	 * as PAN toggling is not required.
1691 	 */
1692 	uaccess_ttbr0_enable();
1693 
1694 	asm volatile(__LSUI_PREAMBLE
1695 		     "1: cast	%[old], %[new], %[addr]\n"
1696 		     "2:\n"
1697 		     _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
1698 		     : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
1699 		     : [new] "r" (new)
1700 		     : "memory");
1701 
1702 	uaccess_ttbr0_disable();
1703 
1704 	if (ret)
1705 		return ret;
1706 	if (tmp != old)
1707 		return -EAGAIN;
1708 
1709 	return ret;
1710 }
1711 
__lse_swap_desc(u64 __user * ptep,u64 old,u64 new)1712 static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
1713 {
1714 	u64 tmp = old;
1715 	int ret = 0;
1716 
1717 	uaccess_enable_privileged();
1718 
1719 	asm volatile(__LSE_PREAMBLE
1720 		     "1: cas	%[old], %[new], %[addr]\n"
1721 		     "2:\n"
1722 		     _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
1723 		     : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
1724 		     : [new] "r" (new)
1725 		     : "memory");
1726 
1727 	uaccess_disable_privileged();
1728 
1729 	if (ret)
1730 		return ret;
1731 	if (tmp != old)
1732 		return -EAGAIN;
1733 
1734 	return ret;
1735 }
1736 
__llsc_swap_desc(u64 __user * ptep,u64 old,u64 new)1737 static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
1738 {
1739 	int ret = 1;
1740 	u64 tmp;
1741 
1742 	uaccess_enable_privileged();
1743 
1744 	asm volatile("prfm	pstl1strm, %[addr]\n"
1745 		     "1: ldxr	%[tmp], %[addr]\n"
1746 		     "sub	%[tmp], %[tmp], %[old]\n"
1747 		     "cbnz	%[tmp], 3f\n"
1748 		     "2: stlxr	%w[ret], %[new], %[addr]\n"
1749 		     "3:\n"
1750 		     _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w[ret])
1751 		     _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w[ret])
1752 		     : [ret] "+r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp)
1753 		     : [old] "r" (old), [new] "r" (new)
1754 		     : "memory");
1755 
1756 	uaccess_disable_privileged();
1757 
1758 	/* STLXR didn't update the descriptor, or the compare failed */
1759 	if (ret == 1)
1760 		return -EAGAIN;
1761 
1762 	return ret;
1763 }
1764 
__kvm_at_swap_desc(struct kvm * kvm,gpa_t ipa,u64 old,u64 new)1765 int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
1766 {
1767 	struct kvm_memory_slot *slot;
1768 	unsigned long hva;
1769 	u64 __user *ptep;
1770 	bool writable;
1771 	int offset;
1772 	gfn_t gfn;
1773 	int r;
1774 
1775 	lockdep_assert(srcu_read_lock_held(&kvm->srcu));
1776 
1777 	gfn = ipa >> PAGE_SHIFT;
1778 	offset = offset_in_page(ipa);
1779 	slot = gfn_to_memslot(kvm, gfn);
1780 	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
1781 	if (kvm_is_error_hva(hva))
1782 		return -EINVAL;
1783 	if (!writable)
1784 		return -EPERM;
1785 
1786 	ptep = (void __user *)hva + offset;
1787 	if (cpus_have_final_cap(ARM64_HAS_LSUI))
1788 		r = __lsui_swap_desc(ptep, old, new);
1789 	else if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
1790 		r = __lse_swap_desc(ptep, old, new);
1791 	else
1792 		r = __llsc_swap_desc(ptep, old, new);
1793 
1794 	if (r < 0)
1795 		return r;
1796 
1797 	mark_page_dirty_in_slot(kvm, slot, gfn);
1798 	return 0;
1799 }
1800