1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Helper functions for KVM guest address space mapping code
4 *
5 * Copyright IBM Corp. 2007, 2025
6 */
7
8 #include <linux/export.h>
9 #include <linux/mm_types.h>
10 #include <linux/mmap_lock.h>
11 #include <linux/mm.h>
12 #include <linux/hugetlb.h>
13 #include <linux/swap.h>
14 #include <linux/swapops.h>
15 #include <linux/pagewalk.h>
16 #include <linux/ksm.h>
17 #include <asm/gmap_helpers.h>
18
19 /**
20 * ptep_zap_swap_entry() - discard a swap entry.
21 * @mm: the mm
22 * @entry: the swap entry that needs to be zapped
23 *
24 * Discards the given swap entry. If the swap entry was an actual swap
25 * entry (and not a migration entry, for example), the actual swapped
26 * page is also discarded from swap.
27 */
ptep_zap_swap_entry(struct mm_struct * mm,swp_entry_t entry)28 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
29 {
30 if (!non_swap_entry(entry))
31 dec_mm_counter(mm, MM_SWAPENTS);
32 else if (is_migration_entry(entry))
33 dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
34 free_swap_and_cache(entry);
35 }
36
37 /**
38 * gmap_helper_zap_one_page() - discard a page if it was swapped.
39 * @mm: the mm
40 * @vmaddr: the userspace virtual address that needs to be discarded
41 *
42 * If the given address maps to a swap entry, discard it.
43 *
44 * Context: needs to be called while holding the mmap lock.
45 */
gmap_helper_zap_one_page(struct mm_struct * mm,unsigned long vmaddr)46 void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
47 {
48 struct vm_area_struct *vma;
49 spinlock_t *ptl;
50 pte_t *ptep;
51
52 mmap_assert_locked(mm);
53
54 /* Find the vm address for the guest address */
55 vma = vma_lookup(mm, vmaddr);
56 if (!vma || is_vm_hugetlb_page(vma))
57 return;
58
59 /* Get pointer to the page table entry */
60 ptep = get_locked_pte(mm, vmaddr, &ptl);
61 if (unlikely(!ptep))
62 return;
63 if (pte_swap(*ptep))
64 ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
65 pte_unmap_unlock(ptep, ptl);
66 }
67 EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
68
69 /**
70 * gmap_helper_discard() - discard user pages in the given range
71 * @mm: the mm
72 * @vmaddr: starting userspace address
73 * @end: end address (first address outside the range)
74 *
75 * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
76 *
77 * Context: needs to be called while holding the mmap lock.
78 */
gmap_helper_discard(struct mm_struct * mm,unsigned long vmaddr,unsigned long end)79 void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
80 {
81 struct vm_area_struct *vma;
82
83 mmap_assert_locked(mm);
84
85 while (vmaddr < end) {
86 vma = find_vma_intersection(mm, vmaddr, end);
87 if (!vma)
88 return;
89 if (!is_vm_hugetlb_page(vma))
90 zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
91 vmaddr = vma->vm_end;
92 }
93 }
94 EXPORT_SYMBOL_GPL(gmap_helper_discard);
95
find_zeropage_pte_entry(pte_t * pte,unsigned long addr,unsigned long end,struct mm_walk * walk)96 static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
97 unsigned long end, struct mm_walk *walk)
98 {
99 unsigned long *found_addr = walk->private;
100
101 /* Return 1 of the page is a zeropage. */
102 if (is_zero_pfn(pte_pfn(*pte))) {
103 /*
104 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
105 * right thing and likely don't care: FAULT_FLAG_UNSHARE
106 * currently only works in COW mappings, which is also where
107 * mm_forbids_zeropage() is checked.
108 */
109 if (!is_cow_mapping(walk->vma->vm_flags))
110 return -EFAULT;
111
112 *found_addr = addr;
113 return 1;
114 }
115 return 0;
116 }
117
118 static const struct mm_walk_ops find_zeropage_ops = {
119 .pte_entry = find_zeropage_pte_entry,
120 .walk_lock = PGWALK_WRLOCK,
121 };
122
123 /** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
124 * @mm: the mm whose zero pages are to be unshared
125 *
126 * Unshare all shared zeropages, replacing them by anonymous pages. Note that
127 * we cannot simply zap all shared zeropages, because this could later
128 * trigger unexpected userfaultfd missing events.
129 *
130 * This must be called after mm->context.allow_cow_sharing was
131 * set to 0, to avoid future mappings of shared zeropages.
132 *
133 * mm contracts with s390, that even if mm were to remove a page table,
134 * and racing with walk_page_range_vma() calling pte_offset_map_lock()
135 * would fail, it will never insert a page table containing empty zero
136 * pages once mm_forbids_zeropage(mm) i.e.
137 * mm->context.allow_cow_sharing is set to 0.
138 */
__gmap_helper_unshare_zeropages(struct mm_struct * mm)139 static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
140 {
141 struct vm_area_struct *vma;
142 VMA_ITERATOR(vmi, mm, 0);
143 unsigned long addr;
144 vm_fault_t fault;
145 int rc;
146
147 for_each_vma(vmi, vma) {
148 /*
149 * We could only look at COW mappings, but it's more future
150 * proof to catch unexpected zeropages in other mappings and
151 * fail.
152 */
153 if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
154 continue;
155 addr = vma->vm_start;
156
157 retry:
158 rc = walk_page_range_vma(vma, addr, vma->vm_end,
159 &find_zeropage_ops, &addr);
160 if (rc < 0)
161 return rc;
162 else if (!rc)
163 continue;
164
165 /* addr was updated by find_zeropage_pte_entry() */
166 fault = handle_mm_fault(vma, addr,
167 FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
168 NULL);
169 if (fault & VM_FAULT_OOM)
170 return -ENOMEM;
171 /*
172 * See break_ksm(): even after handle_mm_fault() returned 0, we
173 * must start the lookup from the current address, because
174 * handle_mm_fault() may back out if there's any difficulty.
175 *
176 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
177 * maybe they could trigger in the future on concurrent
178 * truncation. In that case, the shared zeropage would be gone
179 * and we can simply retry and make progress.
180 */
181 cond_resched();
182 goto retry;
183 }
184
185 return 0;
186 }
187
188 /**
189 * gmap_helper_disable_cow_sharing() - disable all COW sharing
190 *
191 * Disable most COW-sharing of memory pages for the whole process:
192 * (1) Disable KSM and unmerge/unshare any KSM pages.
193 * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
194 *
195 * Not that we currently don't bother with COW-shared pages that are shared
196 * with parent/child processes due to fork().
197 */
gmap_helper_disable_cow_sharing(void)198 int gmap_helper_disable_cow_sharing(void)
199 {
200 struct mm_struct *mm = current->mm;
201 int rc;
202
203 mmap_assert_write_locked(mm);
204
205 if (!mm->context.allow_cow_sharing)
206 return 0;
207
208 mm->context.allow_cow_sharing = 0;
209
210 /* Replace all shared zeropages by anonymous pages. */
211 rc = __gmap_helper_unshare_zeropages(mm);
212 /*
213 * Make sure to disable KSM (if enabled for the whole process or
214 * individual VMAs). Note that nothing currently hinders user space
215 * from re-enabling it.
216 */
217 if (!rc)
218 rc = ksm_disable(mm);
219 if (rc)
220 mm->context.allow_cow_sharing = 1;
221 return rc;
222 }
223 EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
224