1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *    Copyright IBM Corp. 2007, 2011
4  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
5  */
6 
7 #include <linux/cpufeature.h>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/errno.h>
11 #include <linux/gfp.h>
12 #include <linux/mm.h>
13 #include <linux/swap.h>
14 #include <linux/smp.h>
15 #include <linux/spinlock.h>
16 #include <linux/rcupdate.h>
17 #include <linux/slab.h>
18 #include <linux/swapops.h>
19 #include <linux/sysctl.h>
20 #include <linux/ksm.h>
21 #include <linux/mman.h>
22 
23 #include <asm/tlbflush.h>
24 #include <asm/mmu_context.h>
25 #include <asm/page-states.h>
26 #include <asm/machine.h>
27 
28 pgprot_t pgprot_writecombine(pgprot_t prot)
29 {
30 	/*
31 	 * mio_wb_bit_mask may be set on a different CPU, but it is only set
32 	 * once at init and only read afterwards.
33 	 */
34 	return __pgprot(pgprot_val(prot) | mio_wb_bit_mask);
35 }
36 EXPORT_SYMBOL_GPL(pgprot_writecombine);
37 
38 static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
39 				   pte_t *ptep, int nodat)
40 {
41 	unsigned long opt, asce;
42 
43 	if (machine_has_tlb_guest()) {
44 		opt = 0;
45 		asce = READ_ONCE(mm->context.gmap_asce);
46 		if (asce == 0UL || nodat)
47 			opt |= IPTE_NODAT;
48 		if (asce != -1UL) {
49 			asce = asce ? : mm->context.asce;
50 			opt |= IPTE_GUEST_ASCE;
51 		}
52 		__ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
53 	} else {
54 		__ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
55 	}
56 }
57 
58 static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
59 				    pte_t *ptep, int nodat)
60 {
61 	unsigned long opt, asce;
62 
63 	if (machine_has_tlb_guest()) {
64 		opt = 0;
65 		asce = READ_ONCE(mm->context.gmap_asce);
66 		if (asce == 0UL || nodat)
67 			opt |= IPTE_NODAT;
68 		if (asce != -1UL) {
69 			asce = asce ? : mm->context.asce;
70 			opt |= IPTE_GUEST_ASCE;
71 		}
72 		__ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
73 	} else {
74 		__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
75 	}
76 }
77 
78 static inline pte_t ptep_flush_direct(struct mm_struct *mm,
79 				      unsigned long addr, pte_t *ptep,
80 				      int nodat)
81 {
82 	pte_t old;
83 
84 	old = *ptep;
85 	if (unlikely(pte_val(old) & _PAGE_INVALID))
86 		return old;
87 	atomic_inc(&mm->context.flush_count);
88 	if (cpu_has_tlb_lc() &&
89 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
90 		ptep_ipte_local(mm, addr, ptep, nodat);
91 	else
92 		ptep_ipte_global(mm, addr, ptep, nodat);
93 	atomic_dec(&mm->context.flush_count);
94 	return old;
95 }
96 
97 static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
98 				    unsigned long addr, pte_t *ptep,
99 				    int nodat)
100 {
101 	pte_t old;
102 
103 	old = *ptep;
104 	if (unlikely(pte_val(old) & _PAGE_INVALID))
105 		return old;
106 	atomic_inc(&mm->context.flush_count);
107 	if (cpumask_equal(&mm->context.cpu_attach_mask,
108 			  cpumask_of(smp_processor_id()))) {
109 		set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_INVALID)));
110 		mm->context.flush_mm = 1;
111 	} else
112 		ptep_ipte_global(mm, addr, ptep, nodat);
113 	atomic_dec(&mm->context.flush_count);
114 	return old;
115 }
116 
117 static inline pgste_t pgste_get_lock(pte_t *ptep)
118 {
119 	unsigned long value = 0;
120 #ifdef CONFIG_PGSTE
121 	unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
122 
123 	do {
124 		value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
125 	} while (value & PGSTE_PCL_BIT);
126 	value |= PGSTE_PCL_BIT;
127 #endif
128 	return __pgste(value);
129 }
130 
131 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
132 {
133 #ifdef CONFIG_PGSTE
134 	barrier();
135 	WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
136 #endif
137 }
138 
139 static inline pgste_t pgste_get(pte_t *ptep)
140 {
141 	unsigned long pgste = 0;
142 #ifdef CONFIG_PGSTE
143 	pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
144 #endif
145 	return __pgste(pgste);
146 }
147 
148 static inline void pgste_set(pte_t *ptep, pgste_t pgste)
149 {
150 #ifdef CONFIG_PGSTE
151 	*(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
152 #endif
153 }
154 
155 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
156 				       struct mm_struct *mm)
157 {
158 #ifdef CONFIG_PGSTE
159 	unsigned long address, bits, skey;
160 
161 	if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)
162 		return pgste;
163 	address = pte_val(pte) & PAGE_MASK;
164 	skey = (unsigned long) page_get_storage_key(address);
165 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
166 	/* Transfer page changed & referenced bit to guest bits in pgste */
167 	pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */
168 	/* Copy page access key and fetch protection bit to pgste */
169 	pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
170 	pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
171 #endif
172 	return pgste;
173 
174 }
175 
176 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
177 				 struct mm_struct *mm)
178 {
179 #ifdef CONFIG_PGSTE
180 	unsigned long address;
181 	unsigned long nkey;
182 
183 	if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)
184 		return;
185 	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
186 	address = pte_val(entry) & PAGE_MASK;
187 	/*
188 	 * Set page access key and fetch protection bit from pgste.
189 	 * The guest C/R information is still in the PGSTE, set real
190 	 * key C/R to 0.
191 	 */
192 	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
193 	nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
194 	page_set_storage_key(address, nkey, 0);
195 #endif
196 }
197 
198 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
199 {
200 #ifdef CONFIG_PGSTE
201 	if ((pte_val(entry) & _PAGE_PRESENT) &&
202 	    (pte_val(entry) & _PAGE_WRITE) &&
203 	    !(pte_val(entry) & _PAGE_INVALID)) {
204 		if (!machine_has_esop()) {
205 			/*
206 			 * Without enhanced suppression-on-protection force
207 			 * the dirty bit on for all writable ptes.
208 			 */
209 			entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY));
210 			entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT));
211 		}
212 		if (!(pte_val(entry) & _PAGE_PROTECT))
213 			/* This pte allows write access, set user-dirty */
214 			pgste = set_pgste_bit(pgste, PGSTE_UC_BIT);
215 	}
216 #endif
217 	set_pte(ptep, entry);
218 	return pgste;
219 }
220 
221 static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
222 				       unsigned long addr,
223 				       pte_t *ptep, pgste_t pgste)
224 {
225 #ifdef CONFIG_PGSTE
226 	unsigned long bits;
227 
228 	bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
229 	if (bits) {
230 		pgste = __pgste(pgste_val(pgste) ^ bits);
231 		ptep_notify(mm, addr, ptep, bits);
232 	}
233 #endif
234 	return pgste;
235 }
236 
237 static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
238 				      unsigned long addr, pte_t *ptep)
239 {
240 	pgste_t pgste = __pgste(0);
241 
242 	if (mm_has_pgste(mm)) {
243 		pgste = pgste_get_lock(ptep);
244 		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
245 	}
246 	return pgste;
247 }
248 
249 static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
250 				    unsigned long addr, pte_t *ptep,
251 				    pgste_t pgste, pte_t old, pte_t new)
252 {
253 	if (mm_has_pgste(mm)) {
254 		if (pte_val(old) & _PAGE_INVALID)
255 			pgste_set_key(ptep, pgste, new, mm);
256 		if (pte_val(new) & _PAGE_INVALID) {
257 			pgste = pgste_update_all(old, pgste, mm);
258 			if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
259 			    _PGSTE_GPS_USAGE_UNUSED)
260 				old = set_pte_bit(old, __pgprot(_PAGE_UNUSED));
261 		}
262 		pgste = pgste_set_pte(ptep, pgste, new);
263 		pgste_set_unlock(ptep, pgste);
264 	} else {
265 		set_pte(ptep, new);
266 	}
267 	return old;
268 }
269 
270 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
271 		       pte_t *ptep, pte_t new)
272 {
273 	pgste_t pgste;
274 	pte_t old;
275 	int nodat;
276 
277 	preempt_disable();
278 	pgste = ptep_xchg_start(mm, addr, ptep);
279 	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
280 	old = ptep_flush_direct(mm, addr, ptep, nodat);
281 	old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
282 	preempt_enable();
283 	return old;
284 }
285 EXPORT_SYMBOL(ptep_xchg_direct);
286 
287 /*
288  * Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
289  * RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
290  */
291 void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
292 			 pte_t new)
293 {
294 	preempt_disable();
295 	atomic_inc(&mm->context.flush_count);
296 	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
297 		__ptep_rdp(addr, ptep, 0, 0, 1);
298 	else
299 		__ptep_rdp(addr, ptep, 0, 0, 0);
300 	/*
301 	 * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
302 	 * means it is still valid and active, and must not be changed according
303 	 * to the architecture. But writing a new value that only differs in SW
304 	 * bits is allowed.
305 	 */
306 	set_pte(ptep, new);
307 	atomic_dec(&mm->context.flush_count);
308 	preempt_enable();
309 }
310 EXPORT_SYMBOL(ptep_reset_dat_prot);
311 
312 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
313 		     pte_t *ptep, pte_t new)
314 {
315 	pgste_t pgste;
316 	pte_t old;
317 	int nodat;
318 
319 	preempt_disable();
320 	pgste = ptep_xchg_start(mm, addr, ptep);
321 	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
322 	old = ptep_flush_lazy(mm, addr, ptep, nodat);
323 	old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
324 	preempt_enable();
325 	return old;
326 }
327 EXPORT_SYMBOL(ptep_xchg_lazy);
328 
329 pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
330 			     pte_t *ptep)
331 {
332 	pgste_t pgste;
333 	pte_t old;
334 	int nodat;
335 	struct mm_struct *mm = vma->vm_mm;
336 
337 	preempt_disable();
338 	pgste = ptep_xchg_start(mm, addr, ptep);
339 	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
340 	old = ptep_flush_lazy(mm, addr, ptep, nodat);
341 	if (mm_has_pgste(mm)) {
342 		pgste = pgste_update_all(old, pgste, mm);
343 		pgste_set(ptep, pgste);
344 	}
345 	return old;
346 }
347 
348 void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
349 			     pte_t *ptep, pte_t old_pte, pte_t pte)
350 {
351 	pgste_t pgste;
352 	struct mm_struct *mm = vma->vm_mm;
353 
354 	if (mm_has_pgste(mm)) {
355 		pgste = pgste_get(ptep);
356 		pgste_set_key(ptep, pgste, pte, mm);
357 		pgste = pgste_set_pte(ptep, pgste, pte);
358 		pgste_set_unlock(ptep, pgste);
359 	} else {
360 		set_pte(ptep, pte);
361 	}
362 	preempt_enable();
363 }
364 
365 static inline void pmdp_idte_local(struct mm_struct *mm,
366 				   unsigned long addr, pmd_t *pmdp)
367 {
368 	if (machine_has_tlb_guest())
369 		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
370 			    mm->context.asce, IDTE_LOCAL);
371 	else
372 		__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
373 	if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
374 		gmap_pmdp_idte_local(mm, addr);
375 }
376 
377 static inline void pmdp_idte_global(struct mm_struct *mm,
378 				    unsigned long addr, pmd_t *pmdp)
379 {
380 	if (machine_has_tlb_guest()) {
381 		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
382 			    mm->context.asce, IDTE_GLOBAL);
383 		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
384 			gmap_pmdp_idte_global(mm, addr);
385 	} else if (cpu_has_idte()) {
386 		__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
387 		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
388 			gmap_pmdp_idte_global(mm, addr);
389 	} else {
390 		__pmdp_csp(pmdp);
391 		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
392 			gmap_pmdp_csp(mm, addr);
393 	}
394 }
395 
396 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
397 				      unsigned long addr, pmd_t *pmdp)
398 {
399 	pmd_t old;
400 
401 	old = *pmdp;
402 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
403 		return old;
404 	atomic_inc(&mm->context.flush_count);
405 	if (cpu_has_tlb_lc() &&
406 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
407 		pmdp_idte_local(mm, addr, pmdp);
408 	else
409 		pmdp_idte_global(mm, addr, pmdp);
410 	atomic_dec(&mm->context.flush_count);
411 	return old;
412 }
413 
414 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
415 				    unsigned long addr, pmd_t *pmdp)
416 {
417 	pmd_t old;
418 
419 	old = *pmdp;
420 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
421 		return old;
422 	atomic_inc(&mm->context.flush_count);
423 	if (cpumask_equal(&mm->context.cpu_attach_mask,
424 			  cpumask_of(smp_processor_id()))) {
425 		set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID)));
426 		mm->context.flush_mm = 1;
427 		if (mm_has_pgste(mm))
428 			gmap_pmdp_invalidate(mm, addr);
429 	} else {
430 		pmdp_idte_global(mm, addr, pmdp);
431 	}
432 	atomic_dec(&mm->context.flush_count);
433 	return old;
434 }
435 
436 #ifdef CONFIG_PGSTE
437 static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
438 {
439 	struct vm_area_struct *vma;
440 	pgd_t *pgd;
441 	p4d_t *p4d;
442 	pud_t *pud;
443 
444 	/* We need a valid VMA, otherwise this is clearly a fault. */
445 	vma = vma_lookup(mm, addr);
446 	if (!vma)
447 		return -EFAULT;
448 
449 	pgd = pgd_offset(mm, addr);
450 	if (!pgd_present(*pgd))
451 		return -ENOENT;
452 
453 	p4d = p4d_offset(pgd, addr);
454 	if (!p4d_present(*p4d))
455 		return -ENOENT;
456 
457 	pud = pud_offset(p4d, addr);
458 	if (!pud_present(*pud))
459 		return -ENOENT;
460 
461 	/* Large PUDs are not supported yet. */
462 	if (pud_leaf(*pud))
463 		return -EFAULT;
464 
465 	*pmdp = pmd_offset(pud, addr);
466 	return 0;
467 }
468 #endif
469 
470 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
471 		       pmd_t *pmdp, pmd_t new)
472 {
473 	pmd_t old;
474 
475 	preempt_disable();
476 	old = pmdp_flush_direct(mm, addr, pmdp);
477 	set_pmd(pmdp, new);
478 	preempt_enable();
479 	return old;
480 }
481 EXPORT_SYMBOL(pmdp_xchg_direct);
482 
483 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
484 		     pmd_t *pmdp, pmd_t new)
485 {
486 	pmd_t old;
487 
488 	preempt_disable();
489 	old = pmdp_flush_lazy(mm, addr, pmdp);
490 	set_pmd(pmdp, new);
491 	preempt_enable();
492 	return old;
493 }
494 EXPORT_SYMBOL(pmdp_xchg_lazy);
495 
496 static inline void pudp_idte_local(struct mm_struct *mm,
497 				   unsigned long addr, pud_t *pudp)
498 {
499 	if (machine_has_tlb_guest())
500 		__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
501 			    mm->context.asce, IDTE_LOCAL);
502 	else
503 		__pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
504 }
505 
506 static inline void pudp_idte_global(struct mm_struct *mm,
507 				    unsigned long addr, pud_t *pudp)
508 {
509 	if (machine_has_tlb_guest())
510 		__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
511 			    mm->context.asce, IDTE_GLOBAL);
512 	else if (cpu_has_idte())
513 		__pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
514 	else
515 		/*
516 		 * Invalid bit position is the same for pmd and pud, so we can
517 		 * reuse _pmd_csp() here
518 		 */
519 		__pmdp_csp((pmd_t *) pudp);
520 }
521 
522 static inline pud_t pudp_flush_direct(struct mm_struct *mm,
523 				      unsigned long addr, pud_t *pudp)
524 {
525 	pud_t old;
526 
527 	old = *pudp;
528 	if (pud_val(old) & _REGION_ENTRY_INVALID)
529 		return old;
530 	atomic_inc(&mm->context.flush_count);
531 	if (cpu_has_tlb_lc() &&
532 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
533 		pudp_idte_local(mm, addr, pudp);
534 	else
535 		pudp_idte_global(mm, addr, pudp);
536 	atomic_dec(&mm->context.flush_count);
537 	return old;
538 }
539 
540 pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
541 		       pud_t *pudp, pud_t new)
542 {
543 	pud_t old;
544 
545 	preempt_disable();
546 	old = pudp_flush_direct(mm, addr, pudp);
547 	set_pud(pudp, new);
548 	preempt_enable();
549 	return old;
550 }
551 EXPORT_SYMBOL(pudp_xchg_direct);
552 
553 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
554 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
555 				pgtable_t pgtable)
556 {
557 	struct list_head *lh = (struct list_head *) pgtable;
558 
559 	assert_spin_locked(pmd_lockptr(mm, pmdp));
560 
561 	/* FIFO */
562 	if (!pmd_huge_pte(mm, pmdp))
563 		INIT_LIST_HEAD(lh);
564 	else
565 		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
566 	pmd_huge_pte(mm, pmdp) = pgtable;
567 }
568 
569 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
570 {
571 	struct list_head *lh;
572 	pgtable_t pgtable;
573 	pte_t *ptep;
574 
575 	assert_spin_locked(pmd_lockptr(mm, pmdp));
576 
577 	/* FIFO */
578 	pgtable = pmd_huge_pte(mm, pmdp);
579 	lh = (struct list_head *) pgtable;
580 	if (list_empty(lh))
581 		pmd_huge_pte(mm, pmdp) = NULL;
582 	else {
583 		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
584 		list_del(lh);
585 	}
586 	ptep = (pte_t *) pgtable;
587 	set_pte(ptep, __pte(_PAGE_INVALID));
588 	ptep++;
589 	set_pte(ptep, __pte(_PAGE_INVALID));
590 	return pgtable;
591 }
592 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
593 
594 #ifdef CONFIG_PGSTE
595 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
596 		     pte_t *ptep, pte_t entry)
597 {
598 	pgste_t pgste;
599 
600 	/* the mm_has_pgste() check is done in set_pte_at() */
601 	preempt_disable();
602 	pgste = pgste_get_lock(ptep);
603 	pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO);
604 	pgste_set_key(ptep, pgste, entry, mm);
605 	pgste = pgste_set_pte(ptep, pgste, entry);
606 	pgste_set_unlock(ptep, pgste);
607 	preempt_enable();
608 }
609 
610 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
611 {
612 	pgste_t pgste;
613 
614 	preempt_disable();
615 	pgste = pgste_get_lock(ptep);
616 	pgste = set_pgste_bit(pgste, PGSTE_IN_BIT);
617 	pgste_set_unlock(ptep, pgste);
618 	preempt_enable();
619 }
620 
621 /**
622  * ptep_force_prot - change access rights of a locked pte
623  * @mm: pointer to the process mm_struct
624  * @addr: virtual address in the guest address space
625  * @ptep: pointer to the page table entry
626  * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
627  * @bit: pgste bit to set (e.g. for notification)
628  *
629  * Returns 0 if the access rights were changed and -EAGAIN if the current
630  * and requested access rights are incompatible.
631  */
632 int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
633 		    pte_t *ptep, int prot, unsigned long bit)
634 {
635 	pte_t entry;
636 	pgste_t pgste;
637 	int pte_i, pte_p, nodat;
638 
639 	pgste = pgste_get_lock(ptep);
640 	entry = *ptep;
641 	/* Check pte entry after all locks have been acquired */
642 	pte_i = pte_val(entry) & _PAGE_INVALID;
643 	pte_p = pte_val(entry) & _PAGE_PROTECT;
644 	if ((pte_i && (prot != PROT_NONE)) ||
645 	    (pte_p && (prot & PROT_WRITE))) {
646 		pgste_set_unlock(ptep, pgste);
647 		return -EAGAIN;
648 	}
649 	/* Change access rights and set pgste bit */
650 	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
651 	if (prot == PROT_NONE && !pte_i) {
652 		ptep_flush_direct(mm, addr, ptep, nodat);
653 		pgste = pgste_update_all(entry, pgste, mm);
654 		entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID));
655 	}
656 	if (prot == PROT_READ && !pte_p) {
657 		ptep_flush_direct(mm, addr, ptep, nodat);
658 		entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
659 		entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
660 	}
661 	pgste = set_pgste_bit(pgste, bit);
662 	pgste = pgste_set_pte(ptep, pgste, entry);
663 	pgste_set_unlock(ptep, pgste);
664 	return 0;
665 }
666 
667 int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
668 		    pte_t *sptep, pte_t *tptep, pte_t pte)
669 {
670 	pgste_t spgste, tpgste;
671 	pte_t spte, tpte;
672 	int rc = -EAGAIN;
673 
674 	if (!(pte_val(*tptep) & _PAGE_INVALID))
675 		return 0;	/* already shadowed */
676 	spgste = pgste_get_lock(sptep);
677 	spte = *sptep;
678 	if (!(pte_val(spte) & _PAGE_INVALID) &&
679 	    !((pte_val(spte) & _PAGE_PROTECT) &&
680 	      !(pte_val(pte) & _PAGE_PROTECT))) {
681 		spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT);
682 		tpgste = pgste_get_lock(tptep);
683 		tpte = __pte((pte_val(spte) & PAGE_MASK) |
684 			     (pte_val(pte) & _PAGE_PROTECT));
685 		/* don't touch the storage key - it belongs to parent pgste */
686 		tpgste = pgste_set_pte(tptep, tpgste, tpte);
687 		pgste_set_unlock(tptep, tpgste);
688 		rc = 1;
689 	}
690 	pgste_set_unlock(sptep, spgste);
691 	return rc;
692 }
693 
694 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
695 {
696 	pgste_t pgste;
697 	int nodat;
698 
699 	pgste = pgste_get_lock(ptep);
700 	/* notifier is called by the caller */
701 	nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
702 	ptep_flush_direct(mm, saddr, ptep, nodat);
703 	/* don't touch the storage key - it belongs to parent pgste */
704 	pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
705 	pgste_set_unlock(ptep, pgste);
706 }
707 
708 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
709 {
710 	if (!non_swap_entry(entry))
711 		dec_mm_counter(mm, MM_SWAPENTS);
712 	else if (is_migration_entry(entry)) {
713 		struct folio *folio = pfn_swap_entry_folio(entry);
714 
715 		dec_mm_counter(mm, mm_counter(folio));
716 	}
717 	free_swap_and_cache(entry);
718 }
719 
720 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
721 		     pte_t *ptep, int reset)
722 {
723 	unsigned long pgstev;
724 	pgste_t pgste;
725 	pte_t pte;
726 
727 	/* Zap unused and logically-zero pages */
728 	preempt_disable();
729 	pgste = pgste_get_lock(ptep);
730 	pgstev = pgste_val(pgste);
731 	pte = *ptep;
732 	if (!reset && pte_swap(pte) &&
733 	    ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
734 	     (pgstev & _PGSTE_GPS_ZERO))) {
735 		ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
736 		pte_clear(mm, addr, ptep);
737 	}
738 	if (reset)
739 		pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
740 	pgste_set_unlock(ptep, pgste);
741 	preempt_enable();
742 }
743 
744 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
745 {
746 	unsigned long ptev;
747 	pgste_t pgste;
748 
749 	/* Clear storage key ACC and F, but set R/C */
750 	preempt_disable();
751 	pgste = pgste_get_lock(ptep);
752 	pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
753 	pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT);
754 	ptev = pte_val(*ptep);
755 	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
756 		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
757 	pgste_set_unlock(ptep, pgste);
758 	preempt_enable();
759 }
760 
761 /*
762  * Test and reset if a guest page is dirty
763  */
764 bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
765 		       pte_t *ptep)
766 {
767 	pgste_t pgste;
768 	pte_t pte;
769 	bool dirty;
770 	int nodat;
771 
772 	pgste = pgste_get_lock(ptep);
773 	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
774 	pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT);
775 	pte = *ptep;
776 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
777 		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
778 		nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
779 		ptep_ipte_global(mm, addr, ptep, nodat);
780 		if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE))
781 			pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
782 		else
783 			pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
784 		set_pte(ptep, pte);
785 	}
786 	pgste_set_unlock(ptep, pgste);
787 	return dirty;
788 }
789 EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
790 
791 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
792 			  unsigned char key, bool nq)
793 {
794 	unsigned long keyul, paddr;
795 	spinlock_t *ptl;
796 	pgste_t old, new;
797 	pmd_t *pmdp;
798 	pte_t *ptep;
799 
800 	/*
801 	 * If we don't have a PTE table and if there is no huge page mapped,
802 	 * we can ignore attempts to set the key to 0, because it already is 0.
803 	 */
804 	switch (pmd_lookup(mm, addr, &pmdp)) {
805 	case -ENOENT:
806 		return key ? -EFAULT : 0;
807 	case 0:
808 		break;
809 	default:
810 		return -EFAULT;
811 	}
812 again:
813 	ptl = pmd_lock(mm, pmdp);
814 	if (!pmd_present(*pmdp)) {
815 		spin_unlock(ptl);
816 		return key ? -EFAULT : 0;
817 	}
818 
819 	if (pmd_leaf(*pmdp)) {
820 		paddr = pmd_val(*pmdp) & HPAGE_MASK;
821 		paddr |= addr & ~HPAGE_MASK;
822 		/*
823 		 * Huge pmds need quiescing operations, they are
824 		 * always mapped.
825 		 */
826 		page_set_storage_key(paddr, key, 1);
827 		spin_unlock(ptl);
828 		return 0;
829 	}
830 	spin_unlock(ptl);
831 
832 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
833 	if (!ptep)
834 		goto again;
835 	new = old = pgste_get_lock(ptep);
836 	new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT |
837 				   PGSTE_ACC_BITS | PGSTE_FP_BIT);
838 	keyul = (unsigned long) key;
839 	new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48);
840 	new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
841 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
842 		unsigned long bits, skey;
843 
844 		paddr = pte_val(*ptep) & PAGE_MASK;
845 		skey = (unsigned long) page_get_storage_key(paddr);
846 		bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
847 		skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
848 		/* Set storage key ACC and FP */
849 		page_set_storage_key(paddr, skey, !nq);
850 		/* Merge host changed & referenced into pgste  */
851 		new = set_pgste_bit(new, bits << 52);
852 	}
853 	/* changing the guest storage key is considered a change of the page */
854 	if ((pgste_val(new) ^ pgste_val(old)) &
855 	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
856 		new = set_pgste_bit(new, PGSTE_UC_BIT);
857 
858 	pgste_set_unlock(ptep, new);
859 	pte_unmap_unlock(ptep, ptl);
860 	return 0;
861 }
862 EXPORT_SYMBOL(set_guest_storage_key);
863 
864 /*
865  * Conditionally set a guest storage key (handling csske).
866  * oldkey will be updated when either mr or mc is set and a pointer is given.
867  *
868  * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
869  * storage key was updated and -EFAULT on access errors.
870  */
871 int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
872 			       unsigned char key, unsigned char *oldkey,
873 			       bool nq, bool mr, bool mc)
874 {
875 	unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
876 	int rc;
877 
878 	/* we can drop the pgste lock between getting and setting the key */
879 	if (mr | mc) {
880 		rc = get_guest_storage_key(current->mm, addr, &tmp);
881 		if (rc)
882 			return rc;
883 		if (oldkey)
884 			*oldkey = tmp;
885 		if (!mr)
886 			mask |= _PAGE_REFERENCED;
887 		if (!mc)
888 			mask |= _PAGE_CHANGED;
889 		if (!((tmp ^ key) & mask))
890 			return 0;
891 	}
892 	rc = set_guest_storage_key(current->mm, addr, key, nq);
893 	return rc < 0 ? rc : 1;
894 }
895 EXPORT_SYMBOL(cond_set_guest_storage_key);
896 
897 /*
898  * Reset a guest reference bit (rrbe), returning the reference and changed bit.
899  *
900  * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
901  */
902 int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
903 {
904 	spinlock_t *ptl;
905 	unsigned long paddr;
906 	pgste_t old, new;
907 	pmd_t *pmdp;
908 	pte_t *ptep;
909 	int cc = 0;
910 
911 	/*
912 	 * If we don't have a PTE table and if there is no huge page mapped,
913 	 * the storage key is 0 and there is nothing for us to do.
914 	 */
915 	switch (pmd_lookup(mm, addr, &pmdp)) {
916 	case -ENOENT:
917 		return 0;
918 	case 0:
919 		break;
920 	default:
921 		return -EFAULT;
922 	}
923 again:
924 	ptl = pmd_lock(mm, pmdp);
925 	if (!pmd_present(*pmdp)) {
926 		spin_unlock(ptl);
927 		return 0;
928 	}
929 
930 	if (pmd_leaf(*pmdp)) {
931 		paddr = pmd_val(*pmdp) & HPAGE_MASK;
932 		paddr |= addr & ~HPAGE_MASK;
933 		cc = page_reset_referenced(paddr);
934 		spin_unlock(ptl);
935 		return cc;
936 	}
937 	spin_unlock(ptl);
938 
939 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
940 	if (!ptep)
941 		goto again;
942 	new = old = pgste_get_lock(ptep);
943 	/* Reset guest reference bit only */
944 	new = clear_pgste_bit(new, PGSTE_GR_BIT);
945 
946 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
947 		paddr = pte_val(*ptep) & PAGE_MASK;
948 		cc = page_reset_referenced(paddr);
949 		/* Merge real referenced bit into host-set */
950 		new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT);
951 	}
952 	/* Reflect guest's logical view, not physical */
953 	cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
954 	/* Changing the guest storage key is considered a change of the page */
955 	if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
956 		new = set_pgste_bit(new, PGSTE_UC_BIT);
957 
958 	pgste_set_unlock(ptep, new);
959 	pte_unmap_unlock(ptep, ptl);
960 	return cc;
961 }
962 EXPORT_SYMBOL(reset_guest_reference_bit);
963 
964 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
965 			  unsigned char *key)
966 {
967 	unsigned long paddr;
968 	spinlock_t *ptl;
969 	pgste_t pgste;
970 	pmd_t *pmdp;
971 	pte_t *ptep;
972 
973 	/*
974 	 * If we don't have a PTE table and if there is no huge page mapped,
975 	 * the storage key is 0.
976 	 */
977 	*key = 0;
978 
979 	switch (pmd_lookup(mm, addr, &pmdp)) {
980 	case -ENOENT:
981 		return 0;
982 	case 0:
983 		break;
984 	default:
985 		return -EFAULT;
986 	}
987 again:
988 	ptl = pmd_lock(mm, pmdp);
989 	if (!pmd_present(*pmdp)) {
990 		spin_unlock(ptl);
991 		return 0;
992 	}
993 
994 	if (pmd_leaf(*pmdp)) {
995 		paddr = pmd_val(*pmdp) & HPAGE_MASK;
996 		paddr |= addr & ~HPAGE_MASK;
997 		*key = page_get_storage_key(paddr);
998 		spin_unlock(ptl);
999 		return 0;
1000 	}
1001 	spin_unlock(ptl);
1002 
1003 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
1004 	if (!ptep)
1005 		goto again;
1006 	pgste = pgste_get_lock(ptep);
1007 	*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
1008 	paddr = pte_val(*ptep) & PAGE_MASK;
1009 	if (!(pte_val(*ptep) & _PAGE_INVALID))
1010 		*key = page_get_storage_key(paddr);
1011 	/* Reflect guest's logical view, not physical */
1012 	*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
1013 	pgste_set_unlock(ptep, pgste);
1014 	pte_unmap_unlock(ptep, ptl);
1015 	return 0;
1016 }
1017 EXPORT_SYMBOL(get_guest_storage_key);
1018 
1019 /**
1020  * pgste_perform_essa - perform ESSA actions on the PGSTE.
1021  * @mm: the memory context. It must have PGSTEs, no check is performed here!
1022  * @hva: the host virtual address of the page whose PGSTE is to be processed
1023  * @orc: the specific action to perform, see the ESSA_SET_* macros.
1024  * @oldpte: the PTE will be saved there if the pointer is not NULL.
1025  * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
1026  *
1027  * Return: 1 if the page is to be added to the CBRL, otherwise 0,
1028  *	   or < 0 in case of error. -EINVAL is returned for invalid values
1029  *	   of orc, -EFAULT for invalid addresses.
1030  */
1031 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
1032 			unsigned long *oldpte, unsigned long *oldpgste)
1033 {
1034 	struct vm_area_struct *vma;
1035 	unsigned long pgstev;
1036 	spinlock_t *ptl;
1037 	pgste_t pgste;
1038 	pte_t *ptep;
1039 	int res = 0;
1040 
1041 	WARN_ON_ONCE(orc > ESSA_MAX);
1042 	if (unlikely(orc > ESSA_MAX))
1043 		return -EINVAL;
1044 
1045 	vma = vma_lookup(mm, hva);
1046 	if (!vma || is_vm_hugetlb_page(vma))
1047 		return -EFAULT;
1048 	ptep = get_locked_pte(mm, hva, &ptl);
1049 	if (unlikely(!ptep))
1050 		return -EFAULT;
1051 	pgste = pgste_get_lock(ptep);
1052 	pgstev = pgste_val(pgste);
1053 	if (oldpte)
1054 		*oldpte = pte_val(*ptep);
1055 	if (oldpgste)
1056 		*oldpgste = pgstev;
1057 
1058 	switch (orc) {
1059 	case ESSA_GET_STATE:
1060 		break;
1061 	case ESSA_SET_STABLE:
1062 		pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
1063 		pgstev |= _PGSTE_GPS_USAGE_STABLE;
1064 		break;
1065 	case ESSA_SET_UNUSED:
1066 		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1067 		pgstev |= _PGSTE_GPS_USAGE_UNUSED;
1068 		if (pte_val(*ptep) & _PAGE_INVALID)
1069 			res = 1;
1070 		break;
1071 	case ESSA_SET_VOLATILE:
1072 		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1073 		pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1074 		if (pte_val(*ptep) & _PAGE_INVALID)
1075 			res = 1;
1076 		break;
1077 	case ESSA_SET_POT_VOLATILE:
1078 		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1079 		if (!(pte_val(*ptep) & _PAGE_INVALID)) {
1080 			pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
1081 			break;
1082 		}
1083 		if (pgstev & _PGSTE_GPS_ZERO) {
1084 			pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1085 			break;
1086 		}
1087 		if (!(pgstev & PGSTE_GC_BIT)) {
1088 			pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1089 			res = 1;
1090 			break;
1091 		}
1092 		break;
1093 	case ESSA_SET_STABLE_RESIDENT:
1094 		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1095 		pgstev |= _PGSTE_GPS_USAGE_STABLE;
1096 		/*
1097 		 * Since the resident state can go away any time after this
1098 		 * call, we will not make this page resident. We can revisit
1099 		 * this decision if a guest will ever start using this.
1100 		 */
1101 		break;
1102 	case ESSA_SET_STABLE_IF_RESIDENT:
1103 		if (!(pte_val(*ptep) & _PAGE_INVALID)) {
1104 			pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1105 			pgstev |= _PGSTE_GPS_USAGE_STABLE;
1106 		}
1107 		break;
1108 	case ESSA_SET_STABLE_NODAT:
1109 		pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1110 		pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
1111 		break;
1112 	default:
1113 		/* we should never get here! */
1114 		break;
1115 	}
1116 	/* If we are discarding a page, set it to logical zero */
1117 	if (res)
1118 		pgstev |= _PGSTE_GPS_ZERO;
1119 
1120 	pgste = __pgste(pgstev);
1121 	pgste_set_unlock(ptep, pgste);
1122 	pte_unmap_unlock(ptep, ptl);
1123 	return res;
1124 }
1125 EXPORT_SYMBOL(pgste_perform_essa);
1126 
1127 /**
1128  * set_pgste_bits - set specific PGSTE bits.
1129  * @mm: the memory context. It must have PGSTEs, no check is performed here!
1130  * @hva: the host virtual address of the page whose PGSTE is to be processed
1131  * @bits: a bitmask representing the bits that will be touched
1132  * @value: the values of the bits to be written. Only the bits in the mask
1133  *	   will be written.
1134  *
1135  * Return: 0 on success, < 0 in case of error.
1136  */
1137 int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
1138 			unsigned long bits, unsigned long value)
1139 {
1140 	struct vm_area_struct *vma;
1141 	spinlock_t *ptl;
1142 	pgste_t new;
1143 	pte_t *ptep;
1144 
1145 	vma = vma_lookup(mm, hva);
1146 	if (!vma || is_vm_hugetlb_page(vma))
1147 		return -EFAULT;
1148 	ptep = get_locked_pte(mm, hva, &ptl);
1149 	if (unlikely(!ptep))
1150 		return -EFAULT;
1151 	new = pgste_get_lock(ptep);
1152 
1153 	new = clear_pgste_bit(new, bits);
1154 	new = set_pgste_bit(new, value & bits);
1155 
1156 	pgste_set_unlock(ptep, new);
1157 	pte_unmap_unlock(ptep, ptl);
1158 	return 0;
1159 }
1160 EXPORT_SYMBOL(set_pgste_bits);
1161 
1162 /**
1163  * get_pgste - get the current PGSTE for the given address.
1164  * @mm: the memory context. It must have PGSTEs, no check is performed here!
1165  * @hva: the host virtual address of the page whose PGSTE is to be processed
1166  * @pgstep: will be written with the current PGSTE for the given address.
1167  *
1168  * Return: 0 on success, < 0 in case of error.
1169  */
1170 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
1171 {
1172 	struct vm_area_struct *vma;
1173 	spinlock_t *ptl;
1174 	pte_t *ptep;
1175 
1176 	vma = vma_lookup(mm, hva);
1177 	if (!vma || is_vm_hugetlb_page(vma))
1178 		return -EFAULT;
1179 	ptep = get_locked_pte(mm, hva, &ptl);
1180 	if (unlikely(!ptep))
1181 		return -EFAULT;
1182 	*pgstep = pgste_val(pgste_get(ptep));
1183 	pte_unmap_unlock(ptep, ptl);
1184 	return 0;
1185 }
1186 EXPORT_SYMBOL(get_pgste);
1187 #endif
1188