Lines Matching +full:page +full:- +full:level
1 // SPDX-License-Identifier: GPL-2.0
17 INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); in kvm_mmu_init_tdp_mmu()
18 spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); in kvm_mmu_init_tdp_mmu()
26 lockdep_assert_held_read(&kvm->mmu_lock); in kvm_lockdep_assert_mmu_lock_held()
28 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_lockdep_assert_mmu_lock_held()
44 KVM_MMU_WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); in kvm_mmu_uninit_tdp_mmu()
46 WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); in kvm_mmu_uninit_tdp_mmu()
58 free_page((unsigned long)sp->external_spt); in tdp_mmu_free_sp()
59 free_page((unsigned long)sp->spt); in tdp_mmu_free_sp()
64 * This is called through call_rcu in order to free TDP page table memory
67 * By only accessing TDP MMU page table memory in an RCU read critical
81 if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) in kvm_tdp_mmu_put_root()
89 KVM_BUG_ON(!is_tdp_mmu_page(root) || !root->role.invalid, kvm); in kvm_tdp_mmu_put_root()
91 spin_lock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_put_root()
92 list_del_rcu(&root->link); in kvm_tdp_mmu_put_root()
93 spin_unlock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_put_root()
94 call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback); in kvm_tdp_mmu_put_root()
103 if (root->role.invalid && !(types & KVM_INVALID_ROOTS)) in tdp_mmu_root_match()
115 * must hold a reference to @prev_root if it's non-NULL).
128 * While the roots themselves are RCU-protected, fields such as in tdp_mmu_next_root()
131 lockdep_assert_held(&kvm->mmu_lock); in tdp_mmu_next_root()
136 next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots, in tdp_mmu_next_root()
137 &prev_root->link, in tdp_mmu_next_root()
140 next_root = list_first_or_null_rcu(&kvm->arch.tdp_mmu_roots, in tdp_mmu_next_root()
148 next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots, in tdp_mmu_next_root()
149 &next_root->link, typeof(*next_root), link); in tdp_mmu_next_root()
171 ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
181 ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
187 * inherently yield-friendly and should use the yield-safe variant above.
192 list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) \
199 * Iterate over all TDP MMU roots in an RCU read-side critical section.
206 list_for_each_entry_rcu(_root, &_kvm->arch.tdp_mmu_roots, link) \
218 sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); in tdp_mmu_alloc_sp()
219 sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache); in tdp_mmu_alloc_sp()
227 INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); in tdp_mmu_init_sp()
229 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); in tdp_mmu_init_sp()
231 sp->role = role; in tdp_mmu_init_sp()
232 sp->gfn = gfn; in tdp_mmu_init_sp()
233 sp->ptep = sptep; in tdp_mmu_init_sp()
234 sp->tdp_mmu_page = true; in tdp_mmu_init_sp()
245 parent_sp = sptep_to_sp(rcu_dereference(iter->sptep)); in tdp_mmu_init_child_sp()
247 role = parent_sp->role; in tdp_mmu_init_child_sp()
248 role.level--; in tdp_mmu_init_child_sp()
250 tdp_mmu_init_sp(child_sp, iter->sptep, iter->gfn, role); in tdp_mmu_init_child_sp()
255 struct kvm_mmu *mmu = vcpu->arch.mmu; in kvm_tdp_mmu_alloc_root()
256 union kvm_mmu_page_role role = mmu->root_role; in kvm_tdp_mmu_alloc_root()
258 struct kvm *kvm = vcpu->kvm; in kvm_tdp_mmu_alloc_root()
270 read_lock(&kvm->mmu_lock); in kvm_tdp_mmu_alloc_root()
273 if (root->role.word == role.word) in kvm_tdp_mmu_alloc_root()
277 spin_lock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_alloc_root()
287 list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { in kvm_tdp_mmu_alloc_root()
288 if (root->role.word == role.word && in kvm_tdp_mmu_alloc_root()
303 refcount_set(&root->tdp_mmu_root_count, 2); in kvm_tdp_mmu_alloc_root()
304 list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots); in kvm_tdp_mmu_alloc_root()
307 spin_unlock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_alloc_root()
309 read_unlock(&kvm->mmu_lock); in kvm_tdp_mmu_alloc_root()
316 mmu->mirror_root_hpa = __pa(root->spt); in kvm_tdp_mmu_alloc_root()
318 mmu->root.hpa = __pa(root->spt); in kvm_tdp_mmu_alloc_root()
319 mmu->root.pgd = 0; in kvm_tdp_mmu_alloc_root()
324 u64 old_spte, u64 new_spte, int level,
329 kvm_account_pgtable_pages((void *)sp->spt, +1); in tdp_account_mmu_page()
331 atomic64_inc(&kvm->arch.tdp_mmu_pages); in tdp_account_mmu_page()
337 kvm_account_pgtable_pages((void *)sp->spt, -1); in tdp_unaccount_mmu_page()
339 atomic64_dec(&kvm->arch.tdp_mmu_pages); in tdp_unaccount_mmu_page()
344 * tdp_mmu_unlink_sp() - Remove a shadow page from the list of used pages
347 * @sp: the page to be removed
353 if (!sp->nx_huge_page_disallowed) in tdp_mmu_unlink_sp()
356 spin_lock(&kvm->arch.tdp_mmu_pages_lock); in tdp_mmu_unlink_sp()
357 sp->nx_huge_page_disallowed = false; in tdp_mmu_unlink_sp()
359 spin_unlock(&kvm->arch.tdp_mmu_pages_lock); in tdp_mmu_unlink_sp()
363 int level) in remove_external_spte() argument
371 * But remove_external_spte() will be called on non-leaf PTEs via in remove_external_spte()
375 if (!is_last_spte(old_spte, level)) in remove_external_spte()
379 lockdep_assert_held_write(&kvm->mmu_lock); in remove_external_spte()
381 ret = static_call(kvm_x86_remove_external_spte)(kvm, gfn, level, old_pfn); in remove_external_spte()
386 * handle_removed_pt() - handle a page table removed from the TDP structure
389 * @pt: the page removed from the paging structure
394 * Given a page table that has been removed from the TDP paging structure,
395 * iterates through the page table to clear SPTEs and free child page tables.
399 * this thread will be responsible for ensuring the page is freed. Hence the
405 int level = sp->role.level; in handle_removed_pt() local
406 gfn_t base_gfn = sp->gfn; in handle_removed_pt()
415 gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level); in handle_removed_pt()
423 * handling a page fault could overwrite it, so in handle_removed_pt()
435 * If the SPTE is not MMU-present, there is no backing in handle_removed_pt()
436 * page associated with the SPTE and so no side effects in handle_removed_pt()
453 * document that A/D assists can use upper-level PxE in handle_removed_pt()
455 * still access the page and mark it dirty. in handle_removed_pt()
467 * scenario where a non-atomic update could result in a in handle_removed_pt()
469 * sufficient as a fast page fault could read the upper in handle_removed_pt()
470 * level SPTE before it is zapped, and then make this in handle_removed_pt()
476 FROZEN_SPTE, level); in handle_removed_pt()
479 old_spte, FROZEN_SPTE, level, shared); in handle_removed_pt()
483 remove_external_spte(kvm, gfn, old_spte, level); in handle_removed_pt()
488 WARN_ON(static_call(kvm_x86_free_external_spt)(kvm, base_gfn, sp->role.level, in handle_removed_pt()
489 sp->external_spt))) { in handle_removed_pt()
491 * Failed to free page table page in mirror page table and in handle_removed_pt()
493 * Intentionally leak the page to prevent the kernel from in handle_removed_pt()
494 * accessing the encrypted page. in handle_removed_pt()
496 sp->external_spt = NULL; in handle_removed_pt()
499 call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); in handle_removed_pt()
502 static void *get_external_spt(gfn_t gfn, u64 new_spte, int level) in get_external_spt() argument
504 if (is_shadow_present_pte(new_spte) && !is_last_spte(new_spte, level)) { in get_external_spt()
507 WARN_ON_ONCE(sp->role.level + 1 != level); in get_external_spt()
508 WARN_ON_ONCE(sp->gfn != gfn); in get_external_spt()
509 return sp->external_spt; in get_external_spt()
517 u64 new_spte, int level) in set_external_spte_present() argument
521 bool is_leaf = is_present && is_last_spte(new_spte, level); in set_external_spte_present()
527 lockdep_assert_held(&kvm->mmu_lock); in set_external_spte_present()
530 * page table has been modified. Use FROZEN_SPTE similar to in set_external_spte_present()
534 return -EBUSY; in set_external_spte_present()
537 * Use different call to either set up middle level in set_external_spte_present()
538 * external page table, or leaf. in set_external_spte_present()
541 ret = static_call(kvm_x86_set_external_spte)(kvm, gfn, level, new_pfn); in set_external_spte_present()
543 void *external_spt = get_external_spt(gfn, new_spte, level); in set_external_spte_present()
546 ret = static_call(kvm_x86_link_external_spt)(kvm, gfn, level, external_spt); in set_external_spte_present()
556 * handle_changed_spte - handle bookkeeping associated with an SPTE change
562 * @level: the level of the PT the SPTE is part of in the paging structure
572 u64 old_spte, u64 new_spte, int level, in handle_changed_spte() argument
577 bool was_leaf = was_present && is_last_spte(old_spte, level); in handle_changed_spte()
578 bool is_leaf = is_present && is_last_spte(new_spte, level); in handle_changed_spte()
581 WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL); in handle_changed_spte()
582 WARN_ON_ONCE(level < PG_LEVEL_4K); in handle_changed_spte()
583 WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); in handle_changed_spte()
590 * should be zapping the SPTE before the main MM's page table is in handle_changed_spte()
598 "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", in handle_changed_spte()
599 as_id, gfn, old_spte, new_spte, level); in handle_changed_spte()
611 trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte); in handle_changed_spte()
617 * The only times a SPTE should be changed from a non-present to in handle_changed_spte()
618 * non-present state is when an MMIO entry is installed/modified/ in handle_changed_spte()
636 "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", in handle_changed_spte()
637 as_id, gfn, old_spte, new_spte, level); in handle_changed_spte()
642 kvm_update_page_stats(kvm, level, is_leaf ? 1 : -1); in handle_changed_spte()
652 handle_removed_pt(kvm, spte_to_child_pt(old_spte, level), shared); in handle_changed_spte()
662 * and pre-checking before inserting a new SPTE is advantageous as it in __tdp_mmu_set_spte_atomic()
665 WARN_ON_ONCE(iter->yielded || is_frozen_spte(iter->old_spte)); in __tdp_mmu_set_spte_atomic()
667 if (is_mirror_sptep(iter->sptep) && !is_frozen_spte(new_spte)) { in __tdp_mmu_set_spte_atomic()
675 return -EBUSY; in __tdp_mmu_set_spte_atomic()
677 ret = set_external_spte_present(kvm, iter->sptep, iter->gfn, in __tdp_mmu_set_spte_atomic()
678 iter->old_spte, new_spte, iter->level); in __tdp_mmu_set_spte_atomic()
682 u64 *sptep = rcu_dereference(iter->sptep); in __tdp_mmu_set_spte_atomic()
688 * updates iter->old_spte with the current value, so the caller in __tdp_mmu_set_spte_atomic()
692 if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) in __tdp_mmu_set_spte_atomic()
693 return -EBUSY; in __tdp_mmu_set_spte_atomic()
700 * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically
701 * and handle the associated bookkeeping. Do not mark the page dirty
704 * If setting the SPTE fails because it has changed, iter->old_spte will be
711 * * 0 - If the SPTE was set.
712 * * -EBUSY - If the SPTE cannot be set. In this case this function will have
713 * no side-effects other than setting iter->old_spte to the last
722 lockdep_assert_held_read(&kvm->mmu_lock); in tdp_mmu_set_spte_atomic()
728 handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, in tdp_mmu_set_spte_atomic()
729 new_spte, iter->level, true); in tdp_mmu_set_spte_atomic()
735 * tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
742 * @level: The level _containing_ the SPTE (its parent PT's level)
748 u64 old_spte, u64 new_spte, gfn_t gfn, int level) in tdp_mmu_set_spte() argument
750 lockdep_assert_held_write(&kvm->mmu_lock); in tdp_mmu_set_spte()
761 old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level); in tdp_mmu_set_spte()
763 handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false); in tdp_mmu_set_spte()
766 * Users that do non-atomic setting of PTEs don't operate on mirror in tdp_mmu_set_spte()
771 remove_external_spte(kvm, gfn, old_spte, level); in tdp_mmu_set_spte()
780 WARN_ON_ONCE(iter->yielded); in tdp_mmu_iter_set_spte()
781 iter->old_spte = tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep, in tdp_mmu_iter_set_spte()
782 iter->old_spte, new_spte, in tdp_mmu_iter_set_spte()
783 iter->gfn, iter->level); in tdp_mmu_iter_set_spte()
792 !is_last_spte(_iter.old_spte, _iter.level)) \
799 if (!need_resched() && !rwlock_needbreak(&kvm->mmu_lock)) in tdp_mmu_iter_need_resched()
803 return iter->next_last_level_gfn != iter->yielded_gfn; in tdp_mmu_iter_need_resched()
813 * If this function yields, iter->yielded is set and the caller must skip to
824 KVM_MMU_WARN_ON(iter->yielded); in tdp_mmu_iter_cond_resched()
835 cond_resched_rwlock_read(&kvm->mmu_lock); in tdp_mmu_iter_cond_resched()
837 cond_resched_rwlock_write(&kvm->mmu_lock); in tdp_mmu_iter_cond_resched()
841 WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn); in tdp_mmu_iter_cond_resched()
843 iter->yielded = true; in tdp_mmu_iter_cond_resched()
871 if (iter.level > zap_level) in __tdp_mmu_zap_root()
892 * and lead to use-after-free as zapping a SPTE triggers "writeback" of in tdp_mmu_zap_root()
893 * dirty accessed bits to the SPTE's associated struct page. in tdp_mmu_zap_root()
895 WARN_ON_ONCE(!refcount_read(&root->tdp_mmu_root_count)); in tdp_mmu_zap_root()
904 * preempt models) or mmu_lock contention (full or real-time models). in tdp_mmu_zap_root()
909 * in order to mimic the page fault path, which can replace a 1GiB page in tdp_mmu_zap_root()
923 __tdp_mmu_zap_root(kvm, root, shared, root->role.level); in tdp_mmu_zap_root()
933 * This helper intentionally doesn't allow zapping a root shadow page, in kvm_tdp_mmu_zap_sp()
934 * which doesn't have a parent page table and thus no associated entry. in kvm_tdp_mmu_zap_sp()
936 if (WARN_ON_ONCE(!sp->ptep)) in kvm_tdp_mmu_zap_sp()
939 old_spte = kvm_tdp_mmu_read_spte(sp->ptep); in kvm_tdp_mmu_zap_sp()
943 tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, in kvm_tdp_mmu_zap_sp()
944 SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1); in kvm_tdp_mmu_zap_sp()
963 lockdep_assert_held_write(&kvm->mmu_lock); in tdp_mmu_zap_leafs()
975 !is_last_spte(iter.old_spte, iter.level)) in tdp_mmu_zap_leafs()
984 if (!root->role.invalid) in tdp_mmu_zap_leafs()
1006 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_tdp_mmu_zap_leafs()
1007 for_each_valid_tdp_mmu_root_yield_safe(kvm, root, -1) in kvm_tdp_mmu_zap_leafs()
1023 * "fast zap". Walking zapped top-level SPTEs isn't all that expensive in kvm_tdp_mmu_zap_all()
1030 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_tdp_mmu_zap_all()
1031 __for_each_tdp_mmu_root_yield_safe(kvm, root, -1, in kvm_tdp_mmu_zap_all()
1045 read_lock(&kvm->mmu_lock); in kvm_tdp_mmu_zap_invalidated_roots()
1047 write_lock(&kvm->mmu_lock); in kvm_tdp_mmu_zap_invalidated_roots()
1050 if (!root->tdp_mmu_scheduled_root_to_zap) in kvm_tdp_mmu_zap_invalidated_roots()
1053 root->tdp_mmu_scheduled_root_to_zap = false; in kvm_tdp_mmu_zap_invalidated_roots()
1054 KVM_BUG_ON(!root->role.invalid, kvm); in kvm_tdp_mmu_zap_invalidated_roots()
1060 * TLB flush on reuse also invalidates paging-structure-cache in kvm_tdp_mmu_zap_invalidated_roots()
1076 read_unlock(&kvm->mmu_lock); in kvm_tdp_mmu_zap_invalidated_roots()
1078 write_unlock(&kvm->mmu_lock); in kvm_tdp_mmu_zap_invalidated_roots()
1115 refcount_read(&kvm->users_count) && kvm->created_vcpus) in kvm_tdp_mmu_invalidate_roots()
1116 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_tdp_mmu_invalidate_roots()
1123 list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { in kvm_tdp_mmu_invalidate_roots()
1133 if (!root->role.invalid) { in kvm_tdp_mmu_invalidate_roots()
1134 root->tdp_mmu_scheduled_root_to_zap = true; in kvm_tdp_mmu_invalidate_roots()
1135 root->role.invalid = true; in kvm_tdp_mmu_invalidate_roots()
1141 * Installs a last-level SPTE to handle a TDP page fault.
1148 struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep)); in tdp_mmu_map_handle_target_level()
1153 if (WARN_ON_ONCE(sp->role.level != fault->goal_level)) in tdp_mmu_map_handle_target_level()
1156 if (fault->prefetch && is_shadow_present_pte(iter->old_spte)) in tdp_mmu_map_handle_target_level()
1159 if (is_shadow_present_pte(iter->old_spte) && in tdp_mmu_map_handle_target_level()
1160 is_access_allowed(fault, iter->old_spte) && in tdp_mmu_map_handle_target_level()
1161 is_last_spte(iter->old_spte, iter->level)) in tdp_mmu_map_handle_target_level()
1164 if (unlikely(!fault->slot)) in tdp_mmu_map_handle_target_level()
1165 new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); in tdp_mmu_map_handle_target_level()
1167 wrprot = make_spte(vcpu, sp, fault->slot, ACC_ALL, iter->gfn, in tdp_mmu_map_handle_target_level()
1168 fault->pfn, iter->old_spte, fault->prefetch, in tdp_mmu_map_handle_target_level()
1169 false, fault->map_writable, &new_spte); in tdp_mmu_map_handle_target_level()
1171 if (new_spte == iter->old_spte) in tdp_mmu_map_handle_target_level()
1173 else if (tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte)) in tdp_mmu_map_handle_target_level()
1175 else if (is_shadow_present_pte(iter->old_spte) && in tdp_mmu_map_handle_target_level()
1176 (!is_last_spte(iter->old_spte, iter->level) || in tdp_mmu_map_handle_target_level()
1177 WARN_ON_ONCE(leaf_spte_change_needs_tlb_flush(iter->old_spte, new_spte)))) in tdp_mmu_map_handle_target_level()
1178 kvm_flush_remote_tlbs_gfn(vcpu->kvm, iter->gfn, iter->level); in tdp_mmu_map_handle_target_level()
1181 * If the page fault was caused by a write but the page is write in tdp_mmu_map_handle_target_level()
1185 if (wrprot && fault->write) in tdp_mmu_map_handle_target_level()
1189 if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) { in tdp_mmu_map_handle_target_level()
1190 vcpu->stat.pf_mmio_spte_created++; in tdp_mmu_map_handle_target_level()
1191 trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn, in tdp_mmu_map_handle_target_level()
1195 trace_kvm_mmu_set_spte(iter->level, iter->gfn, in tdp_mmu_map_handle_target_level()
1196 rcu_dereference(iter->sptep)); in tdp_mmu_map_handle_target_level()
1203 * tdp_mmu_link_sp - Replace the given spte with an spte pointing to the
1204 * provided page table.
1208 * @sp: The new TDP page table to install.
1211 * Returns: 0 if the new page table was installed. Non-0 if the page table
1212 * could not be installed (e.g. the atomic compare-exchange failed).
1217 u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled); in tdp_mmu_link_sp()
1237 * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing
1238 * page tables and SPTEs to translate the faulting guest physical address.
1243 struct kvm *kvm = vcpu->kvm; in kvm_tdp_mmu_map()
1254 for_each_tdp_pte(iter, kvm, root, fault->gfn, fault->gfn + 1) { in kvm_tdp_mmu_map()
1257 if (fault->nx_huge_page_workaround_enabled) in kvm_tdp_mmu_map()
1258 disallowed_hugepage_adjust(fault, iter.old_spte, iter.level); in kvm_tdp_mmu_map()
1262 * retry, avoiding unnecessary page table allocation and free. in kvm_tdp_mmu_map()
1267 if (iter.level == fault->goal_level) in kvm_tdp_mmu_map()
1270 /* Step down into the lower level page table if it exists. */ in kvm_tdp_mmu_map()
1276 * The SPTE is either non-present or points to a huge page that in kvm_tdp_mmu_map()
1284 sp->nx_huge_page_disallowed = fault->huge_page_disallowed; in kvm_tdp_mmu_map()
1287 /* Don't support large page for mirrored roots (TDX) */ in kvm_tdp_mmu_map()
1288 KVM_BUG_ON(is_mirror_sptep(iter.sptep), vcpu->kvm); in kvm_tdp_mmu_map()
1295 * Force the guest to retry if installing an upper level SPTE in kvm_tdp_mmu_map()
1303 if (fault->huge_page_disallowed && in kvm_tdp_mmu_map()
1304 fault->req_level >= iter.level) { in kvm_tdp_mmu_map()
1305 spin_lock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_map()
1306 if (sp->nx_huge_page_disallowed) in kvm_tdp_mmu_map()
1308 spin_unlock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_map()
1313 * The walk aborted before reaching the target level, e.g. because the in kvm_tdp_mmu_map()
1314 * iterator detected an upper level SPTE was frozen during traversal. in kvm_tdp_mmu_map()
1316 WARN_ON_ONCE(iter.level == fault->goal_level); in kvm_tdp_mmu_map()
1334 types = kvm_gfn_range_filter_to_root_types(kvm, range->attr_filter) | KVM_INVALID_ROOTS; in kvm_tdp_mmu_unmap_gfn_range()
1336 __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, types) in kvm_tdp_mmu_unmap_gfn_range()
1337 flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end, in kvm_tdp_mmu_unmap_gfn_range()
1338 range->may_block, flush); in kvm_tdp_mmu_unmap_gfn_range()
1344 * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero
1349 * return value to determine if the page has been accessed.
1355 if (spte_ad_enabled(iter->old_spte)) { in kvm_tdp_mmu_age_spte()
1356 iter->old_spte = tdp_mmu_clear_spte_bits_atomic(iter->sptep, in kvm_tdp_mmu_age_spte()
1358 new_spte = iter->old_spte & ~shadow_accessed_mask; in kvm_tdp_mmu_age_spte()
1360 new_spte = mark_spte_for_access_track(iter->old_spte); in kvm_tdp_mmu_age_spte()
1369 trace_kvm_tdp_mmu_spte_changed(iter->as_id, iter->gfn, iter->level, in kvm_tdp_mmu_age_spte()
1370 iter->old_spte, new_spte); in kvm_tdp_mmu_age_spte()
1382 types = kvm_gfn_range_filter_to_root_types(kvm, range->attr_filter); in __kvm_tdp_mmu_age_gfn_range()
1393 for_each_tdp_mmu_root_rcu(kvm, root, range->slot->as_id, types) { in __kvm_tdp_mmu_age_gfn_range()
1394 tdp_root_for_each_leaf_pte(iter, kvm, root, range->start, range->end) { in __kvm_tdp_mmu_age_gfn_range()
1441 !is_last_spte(iter.old_spte, iter.level) || in wrprot_gfn_range()
1468 lockdep_assert_held_read(&kvm->mmu_lock); in kvm_tdp_mmu_wrprot_slot()
1470 for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) in kvm_tdp_mmu_wrprot_slot()
1471 spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, in kvm_tdp_mmu_wrprot_slot()
1472 slot->base_gfn + slot->npages, min_level); in kvm_tdp_mmu_wrprot_slot()
1485 sp->spt = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); in tdp_mmu_alloc_sp_for_split()
1486 if (!sp->spt) { in tdp_mmu_alloc_sp_for_split()
1498 const u64 huge_spte = iter->old_spte; in tdp_mmu_split_huge_page()
1499 const int level = iter->level; in tdp_mmu_split_huge_page() local
1503 * No need for atomics when writing to sp->spt since the page table has in tdp_mmu_split_huge_page()
1507 sp->spt[i] = make_small_spte(kvm, huge_spte, sp->role, i); in tdp_mmu_split_huge_page()
1510 * Replace the huge spte with a pointer to the populated lower level in tdp_mmu_split_huge_page()
1511 * page table. Since we are making this change without a TLB flush vCPUs in tdp_mmu_split_huge_page()
1522 * tdp_mmu_link_sp_atomic() will handle subtracting the huge page we in tdp_mmu_split_huge_page()
1523 * are overwriting from the page stats. But we have to manually update in tdp_mmu_split_huge_page()
1524 * the page stats with the new present child pages. in tdp_mmu_split_huge_page()
1526 kvm_update_page_stats(kvm, level - 1, SPTE_ENT_PER_PAGE); in tdp_mmu_split_huge_page()
1529 trace_kvm_mmu_split_huge_page(iter->gfn, huge_spte, level, ret); in tdp_mmu_split_huge_page()
1544 * Traverse the page table splitting all huge pages above the target in tdp_mmu_split_huge_pages_root()
1545 * level into one lower level. For example, if we encounter a 1GB page in tdp_mmu_split_huge_pages_root()
1548 * Since the TDP iterator uses a pre-order traversal, we are guaranteed in tdp_mmu_split_huge_pages_root()
1551 * level above the target level (e.g. splitting a 1GB to 512 2MB pages, in tdp_mmu_split_huge_pages_root()
1566 read_unlock(&kvm->mmu_lock); in tdp_mmu_split_huge_pages_root()
1568 write_unlock(&kvm->mmu_lock); in tdp_mmu_split_huge_pages_root()
1573 read_lock(&kvm->mmu_lock); in tdp_mmu_split_huge_pages_root()
1575 write_lock(&kvm->mmu_lock); in tdp_mmu_split_huge_pages_root()
1580 iter.level, -ENOMEM); in tdp_mmu_split_huge_pages_root()
1581 return -ENOMEM; in tdp_mmu_split_huge_pages_root()
1613 * Try to split all huge pages mapped by the TDP MMU down to the target level.
1624 for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) { in kvm_tdp_mmu_try_split_huge_pages()
1637 * from level, so it is valid to key off any shadow page to determine if in tdp_mmu_need_write_protect()
1655 !is_last_spte(iter.old_spte, iter.level)) in clear_dirty_gfn_range()
1675 * Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the
1683 lockdep_assert_held_read(&kvm->mmu_lock); in kvm_tdp_mmu_clear_dirty_slot()
1684 for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) in kvm_tdp_mmu_clear_dirty_slot()
1685 clear_dirty_gfn_range(kvm, root, slot->base_gfn, in kvm_tdp_mmu_clear_dirty_slot()
1686 slot->base_gfn + slot->npages); in kvm_tdp_mmu_clear_dirty_slot()
1696 lockdep_assert_held_write(&kvm->mmu_lock); in clear_dirty_pt_masked()
1708 if (iter.level > PG_LEVEL_4K || in clear_dirty_pt_masked()
1709 !(mask & (1UL << (iter.gfn - gfn)))) in clear_dirty_pt_masked()
1712 mask &= ~(1UL << (iter.gfn - gfn)); in clear_dirty_pt_masked()
1719 iter.level); in clear_dirty_pt_masked()
1721 trace_kvm_tdp_mmu_spte_changed(iter.as_id, iter.gfn, iter.level, in clear_dirty_pt_masked()
1730 * Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for
1741 for_each_valid_tdp_mmu_root(kvm, root, slot->as_id) in kvm_tdp_mmu_clear_dirty_pt_masked()
1749 struct kvm_mmu_page *root = spte_to_child_sp(parent->old_spte); in tdp_mmu_make_huge_spte()
1750 gfn_t start = parent->gfn; in tdp_mmu_make_huge_spte()
1751 gfn_t end = start + KVM_PAGES_PER_HPAGE(parent->level); in tdp_mmu_make_huge_spte()
1758 * returning -EAGAIN here and then failing the forward progress in tdp_mmu_make_huge_spte()
1762 return -EAGAIN; in tdp_mmu_make_huge_spte()
1764 *huge_spte = make_huge_spte(kvm, iter.old_spte, parent->level); in tdp_mmu_make_huge_spte()
1768 return -ENOENT; in tdp_mmu_make_huge_spte()
1775 gfn_t start = slot->base_gfn; in recover_huge_pages_range()
1776 gfn_t end = start + slot->npages; in recover_huge_pages_range()
1795 if (iter.level > KVM_MAX_HUGEPAGE_LEVEL || in recover_huge_pages_range()
1801 * a large page size, then its parent would have been zapped in recover_huge_pages_range()
1804 if (is_last_spte(iter.old_spte, iter.level)) in recover_huge_pages_range()
1808 * If iter.gfn resides outside of the slot, i.e. the page for in recover_huge_pages_range()
1809 * the current level overlaps but is not contained by the slot, in recover_huge_pages_range()
1811 * to query that info from slot->arch.lpage_info will cause an in recover_huge_pages_range()
1812 * out-of-bounds access. in recover_huge_pages_range()
1818 if (max_mapping_level < iter.level) in recover_huge_pages_range()
1822 if (r == -EAGAIN) in recover_huge_pages_range()
1840 * Recover huge page mappings within the slot by replacing non-leaf SPTEs with
1848 lockdep_assert_held_read(&kvm->mmu_lock); in kvm_tdp_mmu_recover_huge_pages()
1849 for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) in kvm_tdp_mmu_recover_huge_pages()
1854 * Removes write access on the last level SPTE mapping this GFN and unsets the
1855 * MMU-writable bit to ensure future writes continue to be intercepted.
1871 !is_last_spte(iter.old_spte, iter.level)) in write_protect_gfn()
1890 * Removes write access on the last level SPTE mapping this GFN and unsets the
1891 * MMU-writable bit to ensure future writes continue to be intercepted.
1901 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_tdp_mmu_write_protect_gfn()
1902 for_each_valid_tdp_mmu_root(kvm, root, slot->as_id) in kvm_tdp_mmu_write_protect_gfn()
1909 * Return the level of the lowest level SPTE added to sptes.
1910 * That SPTE may be non-present.
1917 struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa); in kvm_tdp_mmu_get_walk()
1920 int leaf = -1; in kvm_tdp_mmu_get_walk()
1922 *root_level = vcpu->arch.mmu->root_role.level; in kvm_tdp_mmu_get_walk()
1924 for_each_tdp_pte(iter, vcpu->kvm, root, gfn, gfn + 1) { in kvm_tdp_mmu_get_walk()
1925 leaf = iter.level; in kvm_tdp_mmu_get_walk()
1933 * Returns the last level spte pointer of the shadow page walk for the given
1934 * gpa, and sets *spte to the spte value. This spte may be non-preset. If no
1938 * - Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
1939 * - The returned sptep must not be used after kvm_tdp_mmu_walk_lockless_end.
1951 for_each_tdp_pte(iter, vcpu->kvm, root, gfn, gfn + 1) { in kvm_tdp_mmu_fast_pf_get_last_sptep()
1959 * legacy MMU and thus does not retain the TDP MMU-specific __rcu in kvm_tdp_mmu_fast_pf_get_last_sptep()