1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Support KVM gust page tracking 4 * 5 * This feature allows us to track page access in guest. Currently, only 6 * write access is tracked. 7 * 8 * Copyright(C) 2015 Intel Corporation. 9 * 10 * Author: 11 * Xiao Guangrong <guangrong.xiao@linux.intel.com> 12 */ 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/lockdep.h> 16 #include <linux/kvm_host.h> 17 #include <linux/rculist.h> 18 19 #include "mmu.h" 20 #include "mmu_internal.h" 21 #include "page_track.h" 22 23 static bool kvm_external_write_tracking_enabled(struct kvm *kvm) 24 { 25 #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING 26 /* 27 * Read external_write_tracking_enabled before related pointers. Pairs 28 * with the smp_store_release in kvm_page_track_write_tracking_enable(). 29 */ 30 return smp_load_acquire(&kvm->arch.external_write_tracking_enabled); 31 #else 32 return false; 33 #endif 34 } 35 36 bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) 37 { 38 return kvm_external_write_tracking_enabled(kvm) || 39 kvm_shadow_root_allocated(kvm) || !tdp_enabled; 40 } 41 42 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) 43 { 44 vfree(slot->arch.gfn_write_track); 45 slot->arch.gfn_write_track = NULL; 46 } 47 48 static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot, 49 unsigned long npages) 50 { 51 const size_t size = sizeof(*slot->arch.gfn_write_track); 52 53 if (!slot->arch.gfn_write_track) 54 slot->arch.gfn_write_track = __vcalloc(npages, size, 55 GFP_KERNEL_ACCOUNT); 56 57 return slot->arch.gfn_write_track ? 0 : -ENOMEM; 58 } 59 60 int kvm_page_track_create_memslot(struct kvm *kvm, 61 struct kvm_memory_slot *slot, 62 unsigned long npages) 63 { 64 if (!kvm_page_track_write_tracking_enabled(kvm)) 65 return 0; 66 67 return __kvm_page_track_write_tracking_alloc(slot, npages); 68 } 69 70 int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) 71 { 72 return __kvm_page_track_write_tracking_alloc(slot, slot->npages); 73 } 74 75 static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn, 76 short count) 77 { 78 int index, val; 79 80 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); 81 82 val = slot->arch.gfn_write_track[index]; 83 84 if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX)) 85 return; 86 87 slot->arch.gfn_write_track[index] += count; 88 } 89 90 void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, 91 gfn_t gfn) 92 { 93 lockdep_assert_held_write(&kvm->mmu_lock); 94 95 lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || 96 srcu_read_lock_held(&kvm->srcu)); 97 98 if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) 99 return; 100 101 update_gfn_write_track(slot, gfn, 1); 102 103 /* 104 * new track stops large page mapping for the 105 * tracked page. 106 */ 107 kvm_mmu_gfn_disallow_lpage(slot, gfn); 108 109 if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) 110 kvm_flush_remote_tlbs(kvm); 111 } 112 113 void __kvm_write_track_remove_gfn(struct kvm *kvm, 114 struct kvm_memory_slot *slot, gfn_t gfn) 115 { 116 lockdep_assert_held_write(&kvm->mmu_lock); 117 118 lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || 119 srcu_read_lock_held(&kvm->srcu)); 120 121 if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) 122 return; 123 124 update_gfn_write_track(slot, gfn, -1); 125 126 /* 127 * allow large page mapping for the tracked page 128 * after the tracker is gone. 129 */ 130 kvm_mmu_gfn_allow_lpage(slot, gfn); 131 } 132 133 /* 134 * check if the corresponding access on the specified guest page is tracked. 135 */ 136 bool kvm_gfn_is_write_tracked(struct kvm *kvm, 137 const struct kvm_memory_slot *slot, gfn_t gfn) 138 { 139 int index; 140 141 if (!slot) 142 return false; 143 144 if (!kvm_page_track_write_tracking_enabled(kvm)) 145 return false; 146 147 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); 148 return !!READ_ONCE(slot->arch.gfn_write_track[index]); 149 } 150 151 #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING 152 void kvm_page_track_cleanup(struct kvm *kvm) 153 { 154 struct kvm_page_track_notifier_head *head; 155 156 head = &kvm->arch.track_notifier_head; 157 cleanup_srcu_struct(&head->track_srcu); 158 } 159 160 int kvm_page_track_init(struct kvm *kvm) 161 { 162 struct kvm_page_track_notifier_head *head; 163 164 head = &kvm->arch.track_notifier_head; 165 INIT_HLIST_HEAD(&head->track_notifier_list); 166 return init_srcu_struct(&head->track_srcu); 167 } 168 169 static int kvm_enable_external_write_tracking(struct kvm *kvm) 170 { 171 struct kvm_memslots *slots; 172 struct kvm_memory_slot *slot; 173 int r = 0, i, bkt; 174 175 if (kvm->arch.vm_type == KVM_X86_TDX_VM) 176 return -EOPNOTSUPP; 177 178 mutex_lock(&kvm->slots_arch_lock); 179 180 /* 181 * Check for *any* write tracking user (not just external users) under 182 * lock. This avoids unnecessary work, e.g. if KVM itself is using 183 * write tracking, or if two external users raced when registering. 184 */ 185 if (kvm_page_track_write_tracking_enabled(kvm)) 186 goto out_success; 187 188 for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { 189 slots = __kvm_memslots(kvm, i); 190 kvm_for_each_memslot(slot, bkt, slots) { 191 /* 192 * Intentionally do NOT free allocations on failure to 193 * avoid having to track which allocations were made 194 * now versus when the memslot was created. The 195 * metadata is guaranteed to be freed when the slot is 196 * freed, and will be kept/used if userspace retries 197 * the failed ioctl() instead of killing the VM. 198 */ 199 r = kvm_page_track_write_tracking_alloc(slot); 200 if (r) 201 goto out_unlock; 202 } 203 } 204 205 out_success: 206 /* 207 * Ensure that external_write_tracking_enabled becomes true strictly 208 * after all the related pointers are set. 209 */ 210 smp_store_release(&kvm->arch.external_write_tracking_enabled, true); 211 out_unlock: 212 mutex_unlock(&kvm->slots_arch_lock); 213 return r; 214 } 215 216 /* 217 * register the notifier so that event interception for the tracked guest 218 * pages can be received. 219 */ 220 int kvm_page_track_register_notifier(struct kvm *kvm, 221 struct kvm_page_track_notifier_node *n) 222 { 223 struct kvm_page_track_notifier_head *head; 224 int r; 225 226 if (!kvm || kvm->mm != current->mm) 227 return -ESRCH; 228 229 if (!kvm_external_write_tracking_enabled(kvm)) { 230 r = kvm_enable_external_write_tracking(kvm); 231 if (r) 232 return r; 233 } 234 235 kvm_get_kvm(kvm); 236 237 head = &kvm->arch.track_notifier_head; 238 239 write_lock(&kvm->mmu_lock); 240 hlist_add_head_rcu(&n->node, &head->track_notifier_list); 241 write_unlock(&kvm->mmu_lock); 242 return 0; 243 } 244 EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); 245 246 /* 247 * stop receiving the event interception. It is the opposed operation of 248 * kvm_page_track_register_notifier(). 249 */ 250 void kvm_page_track_unregister_notifier(struct kvm *kvm, 251 struct kvm_page_track_notifier_node *n) 252 { 253 struct kvm_page_track_notifier_head *head; 254 255 head = &kvm->arch.track_notifier_head; 256 257 write_lock(&kvm->mmu_lock); 258 hlist_del_rcu(&n->node); 259 write_unlock(&kvm->mmu_lock); 260 synchronize_srcu(&head->track_srcu); 261 262 kvm_put_kvm(kvm); 263 } 264 EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); 265 266 /* 267 * Notify the node that write access is intercepted and write emulation is 268 * finished at this time. 269 * 270 * The node should figure out if the written page is the one that node is 271 * interested in by itself. 272 */ 273 void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes) 274 { 275 struct kvm_page_track_notifier_head *head; 276 struct kvm_page_track_notifier_node *n; 277 int idx; 278 279 head = &kvm->arch.track_notifier_head; 280 281 if (hlist_empty(&head->track_notifier_list)) 282 return; 283 284 idx = srcu_read_lock(&head->track_srcu); 285 hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, 286 srcu_read_lock_held(&head->track_srcu)) 287 if (n->track_write) 288 n->track_write(gpa, new, bytes, n); 289 srcu_read_unlock(&head->track_srcu, idx); 290 } 291 292 /* 293 * Notify external page track nodes that a memory region is being removed from 294 * the VM, e.g. so that users can free any associated metadata. 295 */ 296 void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot) 297 { 298 struct kvm_page_track_notifier_head *head; 299 struct kvm_page_track_notifier_node *n; 300 int idx; 301 302 head = &kvm->arch.track_notifier_head; 303 304 if (hlist_empty(&head->track_notifier_list)) 305 return; 306 307 idx = srcu_read_lock(&head->track_srcu); 308 hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, 309 srcu_read_lock_held(&head->track_srcu)) 310 if (n->track_remove_region) 311 n->track_remove_region(slot->base_gfn, slot->npages, n); 312 srcu_read_unlock(&head->track_srcu, idx); 313 } 314 315 /* 316 * add guest page to the tracking pool so that corresponding access on that 317 * page will be intercepted. 318 * 319 * @kvm: the guest instance we are interested in. 320 * @gfn: the guest page. 321 */ 322 int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn) 323 { 324 struct kvm_memory_slot *slot; 325 int idx; 326 327 idx = srcu_read_lock(&kvm->srcu); 328 329 slot = gfn_to_memslot(kvm, gfn); 330 if (!slot) { 331 srcu_read_unlock(&kvm->srcu, idx); 332 return -EINVAL; 333 } 334 335 write_lock(&kvm->mmu_lock); 336 __kvm_write_track_add_gfn(kvm, slot, gfn); 337 write_unlock(&kvm->mmu_lock); 338 339 srcu_read_unlock(&kvm->srcu, idx); 340 341 return 0; 342 } 343 EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn); 344 345 /* 346 * remove the guest page from the tracking pool which stops the interception 347 * of corresponding access on that page. 348 * 349 * @kvm: the guest instance we are interested in. 350 * @gfn: the guest page. 351 */ 352 int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn) 353 { 354 struct kvm_memory_slot *slot; 355 int idx; 356 357 idx = srcu_read_lock(&kvm->srcu); 358 359 slot = gfn_to_memslot(kvm, gfn); 360 if (!slot) { 361 srcu_read_unlock(&kvm->srcu, idx); 362 return -EINVAL; 363 } 364 365 write_lock(&kvm->mmu_lock); 366 __kvm_write_track_remove_gfn(kvm, slot, gfn); 367 write_unlock(&kvm->mmu_lock); 368 369 srcu_read_unlock(&kvm->srcu, idx); 370 371 return 0; 372 } 373 EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn); 374 #endif 375