Lines Matching +full:ext +full:- +full:gen

1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine driver for Linux
5 * This module enables machines with Intel VT-x extensions to run virtual
57 static int __read_mostly nx_huge_pages = -1;
88 * When setting this variable to true it enables Two-Dimensional-Paging
90 * 1. the guest-virtual to guest-physical
91 * 2. while doing 1. it walks guest-physical to host-physical
118 (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS)
121 (PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
122 * PT32_LEVEL_BITS))) - 1))
125 (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
130 (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
132 (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
133 * PT32_LEVEL_BITS))) - 1))
190 int ret = -ENOTSUPP; in kvm_flush_remote_tlbs_with_range()
252 u64 kvm_gen, spte_gen, gen; in check_mmio_spte() local
254 gen = kvm_vcpu_memslots(vcpu)->generation; in check_mmio_spte()
255 if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS)) in check_mmio_spte()
258 kvm_gen = gen & MMIO_SPTE_GEN_MASK; in check_mmio_spte()
270 exception->error_code |= PFERR_RSVD_MASK; in translate_gpa()
284 return vcpu->arch.efer & EFER_NX; in is_nx()
289 int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT; in pse36_gfn_delta()
332 sp->clear_spte_count++; in count_spte_clear()
342 ssptep->spte_high = sspte.spte_high; in __set_spte()
351 WRITE_ONCE(ssptep->spte_low, sspte.spte_low); in __set_spte()
361 WRITE_ONCE(ssptep->spte_low, sspte.spte_low); in __update_clear_spte_fast()
369 ssptep->spte_high = sspte.spte_high; in __update_clear_spte_fast()
381 orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low); in __update_clear_spte_slow()
382 orig.spte_high = ssptep->spte_high; in __update_clear_spte_slow()
383 ssptep->spte_high = sspte.spte_high; in __update_clear_spte_slow()
395 * we need to protect against in-progress updates of the spte.
398 * for the high part of the spte. The race is fine for a present->non-present
399 * change (because the high part of the spte is ignored for non-present spte),
400 * but for a present->present change we must reread the spte.
402 * All such changes are done in two steps (present->non-present and
403 * non-present->present), hence it is enough to count the number of
404 * present->non-present updates: if it changed while reading the spte,
414 count = sp->clear_spte_count; in __get_spte_lockless()
417 spte.spte_low = orig->spte_low; in __get_spte_lockless()
420 spte.spte_high = orig->spte_high; in __get_spte_lockless()
423 if (unlikely(spte.spte_low != orig->spte_low || in __get_spte_lockless()
424 count != sp->clear_spte_count)) in __get_spte_lockless()
438 * out of mmu-lock, it can ensure dirty bit is not lost, in spte_has_volatile_bits()
495 * Whenever we overwrite a writable spte with a read-only one we
497 * will find a read-only spte, even though the writable spte
512 * For the spte updated out of mmu-lock is safe, since in mmu_spte_update()
542 * Returns non-zero if the PTE was previously valid.
590 /* Restore an acc-track PTE back to a regular PTE */
617 clear_bit((ffs(shadow_accessed_mask) - 1), in mmu_spte_age()
637 * Prevent page table teardown by making any free-er wait during in walk_shadow_page_lockless_begin()
644 * to vcpu->mode. in walk_shadow_page_lockless_begin()
646 smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES); in walk_shadow_page_lockless_begin()
652 * Make sure the write to vcpu->mode is not reordered in front of in walk_shadow_page_lockless_end()
656 smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); in walk_shadow_page_lockless_end()
665 r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache, in mmu_topup_memory_caches()
669 r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadow_page_cache, in mmu_topup_memory_caches()
674 r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_gfn_array_cache, in mmu_topup_memory_caches()
679 return kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache, in mmu_topup_memory_caches()
685 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache); in mmu_free_memory_caches()
686 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache); in mmu_free_memory_caches()
687 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_gfn_array_cache); in mmu_free_memory_caches()
688 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); in mmu_free_memory_caches()
693 return kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache); in mmu_alloc_pte_list_desc()
703 if (!sp->role.direct) in kvm_mmu_page_get_gfn()
704 return sp->gfns[index]; in kvm_mmu_page_get_gfn()
706 return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS)); in kvm_mmu_page_get_gfn()
711 if (!sp->role.direct) { in kvm_mmu_page_set_gfn()
712 sp->gfns[index] = gfn; in kvm_mmu_page_set_gfn()
719 sp->gfn, in kvm_mmu_page_set_gfn()
733 idx = gfn_to_index(gfn, slot->base_gfn, level); in lpage_info_slot()
734 return &slot->arch.lpage_info[level - 2][idx]; in lpage_info_slot()
745 linfo->disallow_lpage += count; in update_gfn_disallow_lpage_count()
746 WARN_ON(linfo->disallow_lpage < 0); in update_gfn_disallow_lpage_count()
757 update_gfn_disallow_lpage_count(slot, gfn, -1); in kvm_mmu_gfn_allow_lpage()
766 kvm->arch.indirect_shadow_pages++; in account_shadowed()
767 gfn = sp->gfn; in account_shadowed()
768 slots = kvm_memslots_for_spte_role(kvm, sp->role); in account_shadowed()
771 /* the non-leaf shadow pages are keeping readonly. */ in account_shadowed()
772 if (sp->role.level > PG_LEVEL_4K) in account_shadowed()
781 if (sp->lpage_disallowed) in account_huge_nx_page()
784 ++kvm->stat.nx_lpage_splits; in account_huge_nx_page()
785 list_add_tail(&sp->lpage_disallowed_link, in account_huge_nx_page()
786 &kvm->arch.lpage_disallowed_mmu_pages); in account_huge_nx_page()
787 sp->lpage_disallowed = true; in account_huge_nx_page()
796 kvm->arch.indirect_shadow_pages--; in unaccount_shadowed()
797 gfn = sp->gfn; in unaccount_shadowed()
798 slots = kvm_memslots_for_spte_role(kvm, sp->role); in unaccount_shadowed()
800 if (sp->role.level > PG_LEVEL_4K) in unaccount_shadowed()
809 --kvm->stat.nx_lpage_splits; in unaccount_huge_nx_page()
810 sp->lpage_disallowed = false; in unaccount_huge_nx_page()
811 list_del(&sp->lpage_disallowed_link); in unaccount_huge_nx_page()
821 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) in gfn_to_memslot_dirty_bitmap()
823 if (no_dirty_log && slot->dirty_bitmap) in gfn_to_memslot_dirty_bitmap()
832 * If the bit zero of rmap_head->val is clear, then it points to the only spte
833 * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
846 if (!rmap_head->val) { in pte_list_add()
847 rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte); in pte_list_add()
848 rmap_head->val = (unsigned long)spte; in pte_list_add()
849 } else if (!(rmap_head->val & 1)) { in pte_list_add()
850 rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte); in pte_list_add()
852 desc->sptes[0] = (u64 *)rmap_head->val; in pte_list_add()
853 desc->sptes[1] = spte; in pte_list_add()
854 rmap_head->val = (unsigned long)desc | 1; in pte_list_add()
857 rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); in pte_list_add()
858 desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); in pte_list_add()
859 while (desc->sptes[PTE_LIST_EXT-1]) { in pte_list_add()
862 if (!desc->more) { in pte_list_add()
863 desc->more = mmu_alloc_pte_list_desc(vcpu); in pte_list_add()
864 desc = desc->more; in pte_list_add()
867 desc = desc->more; in pte_list_add()
869 for (i = 0; desc->sptes[i]; ++i) in pte_list_add()
871 desc->sptes[i] = spte; in pte_list_add()
883 for (j = PTE_LIST_EXT - 1; !desc->sptes[j] && j > i; --j) in pte_list_desc_remove_entry()
885 desc->sptes[i] = desc->sptes[j]; in pte_list_desc_remove_entry()
886 desc->sptes[j] = NULL; in pte_list_desc_remove_entry()
889 if (!prev_desc && !desc->more) in pte_list_desc_remove_entry()
890 rmap_head->val = 0; in pte_list_desc_remove_entry()
893 prev_desc->more = desc->more; in pte_list_desc_remove_entry()
895 rmap_head->val = (unsigned long)desc->more | 1; in pte_list_desc_remove_entry()
905 if (!rmap_head->val) { in __pte_list_remove()
906 pr_err("%s: %p 0->BUG\n", __func__, spte); in __pte_list_remove()
908 } else if (!(rmap_head->val & 1)) { in __pte_list_remove()
909 rmap_printk("%s: %p 1->0\n", __func__, spte); in __pte_list_remove()
910 if ((u64 *)rmap_head->val != spte) { in __pte_list_remove()
911 pr_err("%s: %p 1->BUG\n", __func__, spte); in __pte_list_remove()
914 rmap_head->val = 0; in __pte_list_remove()
916 rmap_printk("%s: %p many->many\n", __func__, spte); in __pte_list_remove()
917 desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); in __pte_list_remove()
920 for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { in __pte_list_remove()
921 if (desc->sptes[i] == spte) { in __pte_list_remove()
928 desc = desc->more; in __pte_list_remove()
930 pr_err("%s: %p many->many\n", __func__, spte); in __pte_list_remove()
946 idx = gfn_to_index(gfn, slot->base_gfn, level); in __gfn_to_rmap()
947 return &slot->arch.rmap[level - PG_LEVEL_4K][idx]; in __gfn_to_rmap()
956 slots = kvm_memslots_for_spte_role(kvm, sp->role); in gfn_to_rmap()
958 return __gfn_to_rmap(gfn, sp->role.level, slot); in gfn_to_rmap()
965 mc = &vcpu->arch.mmu_pte_list_desc_cache; in rmap_can_add()
975 kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); in rmap_add()
976 rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); in rmap_add()
987 gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); in rmap_remove()
1014 if (!rmap_head->val) in rmap_get_first()
1017 if (!(rmap_head->val & 1)) { in rmap_get_first()
1018 iter->desc = NULL; in rmap_get_first()
1019 sptep = (u64 *)rmap_head->val; in rmap_get_first()
1023 iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); in rmap_get_first()
1024 iter->pos = 0; in rmap_get_first()
1025 sptep = iter->desc->sptes[iter->pos]; in rmap_get_first()
1040 if (iter->desc) { in rmap_get_next()
1041 if (iter->pos < PTE_LIST_EXT - 1) { in rmap_get_next()
1042 ++iter->pos; in rmap_get_next()
1043 sptep = iter->desc->sptes[iter->pos]; in rmap_get_next()
1048 iter->desc = iter->desc->more; in rmap_get_next()
1050 if (iter->desc) { in rmap_get_next()
1051 iter->pos = 0; in rmap_get_next()
1052 /* desc->sptes[0] cannot be NULL */ in rmap_get_next()
1053 sptep = iter->desc->sptes[iter->pos]; in rmap_get_next()
1078 WARN_ON(sptep_to_sp(sptep)->role.level == PG_LEVEL_4K); in __drop_large_spte()
1080 --kvm->stat.lpages; in __drop_large_spte()
1089 if (__drop_large_spte(vcpu->kvm, sptep)) { in drop_large_spte()
1092 kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn, in drop_large_spte()
1093 KVM_PAGES_PER_HPAGE(sp->role.level)); in drop_large_spte()
1098 * Write-protect on the specified @sptep, @pt_protect indicates whether
1099 * spte write-protection is caused by protecting shadow page table.
1103 * - for dirty logging, the spte can be set to writable at anytime if
1105 * - for spte protection, the spte can be writable only after unsync-ing
1164 * - D bit on ad-enabled SPTEs, and
1165 * - W bit on ad-disabled SPTEs.
1213 * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
1228 if (kvm->arch.tdp_mmu_enabled) in kvm_mmu_write_protect_pt_masked()
1230 slot->base_gfn + gfn_offset, mask, true); in kvm_mmu_write_protect_pt_masked()
1232 rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), in kvm_mmu_write_protect_pt_masked()
1237 mask &= mask - 1; in kvm_mmu_write_protect_pt_masked()
1242 * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write
1243 * protect the page if the D-bit isn't supported.
1245 * @slot: slot to clear D-bit
1247 * @mask: indicates which pages we should clear D-bit
1249 * Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap.
1257 if (kvm->arch.tdp_mmu_enabled) in kvm_mmu_clear_dirty_pt_masked()
1259 slot->base_gfn + gfn_offset, mask, false); in kvm_mmu_clear_dirty_pt_masked()
1261 rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), in kvm_mmu_clear_dirty_pt_masked()
1266 mask &= mask - 1; in kvm_mmu_clear_dirty_pt_masked()
1272 * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
1304 if (kvm->arch.tdp_mmu_enabled) in kvm_mmu_slot_gfn_write_protect()
1316 return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn); in rmap_write_protect()
1403 iterator->level = level; in rmap_walk_init_level()
1404 iterator->gfn = iterator->start_gfn; in rmap_walk_init_level()
1405 iterator->rmap = __gfn_to_rmap(iterator->gfn, level, iterator->slot); in rmap_walk_init_level()
1406 iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn, level, in rmap_walk_init_level()
1407 iterator->slot); in rmap_walk_init_level()
1415 iterator->slot = slot; in slot_rmap_walk_init()
1416 iterator->start_level = start_level; in slot_rmap_walk_init()
1417 iterator->end_level = end_level; in slot_rmap_walk_init()
1418 iterator->start_gfn = start_gfn; in slot_rmap_walk_init()
1419 iterator->end_gfn = end_gfn; in slot_rmap_walk_init()
1421 rmap_walk_init_level(iterator, iterator->start_level); in slot_rmap_walk_init()
1426 return !!iterator->rmap; in slot_rmap_walk_okay()
1431 if (++iterator->rmap <= iterator->end_rmap) { in slot_rmap_walk_next()
1432 iterator->gfn += (1UL << KVM_HPAGE_GFN_SHIFT(iterator->level)); in slot_rmap_walk_next()
1436 if (++iterator->level > iterator->end_level) { in slot_rmap_walk_next()
1437 iterator->rmap = NULL; in slot_rmap_walk_next()
1441 rmap_walk_init_level(iterator, iterator->level); in slot_rmap_walk_next()
1474 hva_start = max(start, memslot->userspace_addr); in kvm_handle_hva_range()
1475 hva_end = min(end, memslot->userspace_addr + in kvm_handle_hva_range()
1476 (memslot->npages << PAGE_SHIFT)); in kvm_handle_hva_range()
1481 * {gfn_start, gfn_start+1, ..., gfn_end-1}. in kvm_handle_hva_range()
1484 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); in kvm_handle_hva_range()
1488 gfn_start, gfn_end - 1, in kvm_handle_hva_range()
1516 if (kvm->arch.tdp_mmu_enabled) in kvm_unmap_hva_range()
1528 if (kvm->arch.tdp_mmu_enabled) in kvm_set_spte_hva()
1571 rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); in rmap_recycle()
1573 kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0); in rmap_recycle()
1574 kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn, in rmap_recycle()
1575 KVM_PAGES_PER_HPAGE(sp->role.level)); in rmap_recycle()
1583 if (kvm->arch.tdp_mmu_enabled) in kvm_age_hva()
1594 if (kvm->arch.tdp_mmu_enabled) in kvm_test_age_hva()
1618 * kvm->arch.n_used_mmu_pages values. We need a global,
1624 kvm->arch.n_used_mmu_pages += nr; in kvm_mod_used_mmu_pages()
1630 MMU_WARN_ON(!is_empty_shadow_page(sp->spt)); in kvm_mmu_free_page()
1631 hlist_del(&sp->hash_link); in kvm_mmu_free_page()
1632 list_del(&sp->link); in kvm_mmu_free_page()
1633 free_page((unsigned long)sp->spt); in kvm_mmu_free_page()
1634 if (!sp->role.direct) in kvm_mmu_free_page()
1635 free_page((unsigned long)sp->gfns); in kvm_mmu_free_page()
1650 pte_list_add(vcpu, parent_pte, &sp->parent_ptes); in mmu_page_add_parent_pte()
1656 __pte_list_remove(parent_pte, &sp->parent_ptes); in mmu_page_remove_parent_pte()
1670 sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); in kvm_mmu_alloc_page()
1671 sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache); in kvm_mmu_alloc_page()
1673 sp->gfns = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_gfn_array_cache); in kvm_mmu_alloc_page()
1674 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); in kvm_mmu_alloc_page()
1681 sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; in kvm_mmu_alloc_page()
1682 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); in kvm_mmu_alloc_page()
1683 kvm_mod_used_mmu_pages(vcpu->kvm, +1); in kvm_mmu_alloc_page()
1693 for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { in kvm_mmu_mark_parents_unsync()
1704 index = spte - sp->spt; in mark_unsync()
1705 if (__test_and_set_bit(index, sp->unsync_child_bitmap)) in mark_unsync()
1707 if (sp->unsync_children++) in mark_unsync()
1740 if (sp->unsync) in mmu_pages_add()
1741 for (i=0; i < pvec->nr; i++) in mmu_pages_add()
1742 if (pvec->page[i].sp == sp) in mmu_pages_add()
1745 pvec->page[pvec->nr].sp = sp; in mmu_pages_add()
1746 pvec->page[pvec->nr].idx = idx; in mmu_pages_add()
1747 pvec->nr++; in mmu_pages_add()
1748 return (pvec->nr == KVM_PAGE_ARRAY_NR); in mmu_pages_add()
1753 --sp->unsync_children; in clear_unsync_child_bit()
1754 WARN_ON((int)sp->unsync_children < 0); in clear_unsync_child_bit()
1755 __clear_bit(idx, sp->unsync_child_bitmap); in clear_unsync_child_bit()
1763 for_each_set_bit(i, sp->unsync_child_bitmap, 512) { in __mmu_unsync_walk()
1765 u64 ent = sp->spt[i]; in __mmu_unsync_walk()
1774 if (child->unsync_children) { in __mmu_unsync_walk()
1776 return -ENOSPC; in __mmu_unsync_walk()
1786 } else if (child->unsync) { in __mmu_unsync_walk()
1789 return -ENOSPC; in __mmu_unsync_walk()
1797 #define INVALID_INDEX (-1)
1802 pvec->nr = 0; in mmu_unsync_walk()
1803 if (!sp->unsync_children) in mmu_unsync_walk()
1812 WARN_ON(!sp->unsync); in kvm_unlink_unsync_page()
1814 sp->unsync = 0; in kvm_unlink_unsync_page()
1815 --kvm->stat.mmu_unsync; in kvm_unlink_unsync_page()
1830 &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
1831 if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
1835 return sp->role.cr0_wp && sp->role.smap_andnot_wp; in is_ept_sp()
1838 /* @sp->gfn should be write-protected at the call site */
1842 if ((!is_ept_sp(sp) && sp->role.gpte_is_8_bytes != !!is_pae(vcpu)) || in __kvm_sync_page()
1843 vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { in __kvm_sync_page()
1844 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); in __kvm_sync_page()
1869 if (kvm_mmu_remote_flush_or_zap(vcpu->kvm, invalid_list, remote_flush)) in kvm_mmu_flush_or_zap()
1885 return sp->role.invalid || in is_obsolete_sp()
1886 unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); in is_obsolete_sp()
1892 kvm_unlink_unsync_page(vcpu->kvm, sp); in kvm_sync_page()
1896 /* @gfn should be write-protected at the call site */
1903 for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { in kvm_sync_pages()
1904 if (!s->unsync) in kvm_sync_pages()
1907 WARN_ON(s->role.level != PG_LEVEL_4K); in kvm_sync_pages()
1930 for (n = i+1; n < pvec->nr; n++) { in mmu_pages_next()
1931 struct kvm_mmu_page *sp = pvec->page[n].sp; in mmu_pages_next()
1932 unsigned idx = pvec->page[n].idx; in mmu_pages_next()
1933 int level = sp->role.level; in mmu_pages_next()
1935 parents->idx[level-1] = idx; in mmu_pages_next()
1939 parents->parent[level-2] = sp; in mmu_pages_next()
1951 if (pvec->nr == 0) in mmu_pages_first()
1954 WARN_ON(pvec->page[0].idx != INVALID_INDEX); in mmu_pages_first()
1956 sp = pvec->page[0].sp; in mmu_pages_first()
1957 level = sp->role.level; in mmu_pages_first()
1960 parents->parent[level-2] = sp; in mmu_pages_first()
1965 parents->parent[level-1] = NULL; in mmu_pages_first()
1975 unsigned int idx = parents->idx[level]; in mmu_pages_clear_parents()
1976 sp = parents->parent[level]; in mmu_pages_clear_parents()
1983 } while (!sp->unsync_children); in mmu_pages_clear_parents()
2000 protected |= rmap_write_protect(vcpu, sp->gfn); in mmu_sync_children()
2003 kvm_flush_remote_tlbs(vcpu->kvm); in mmu_sync_children()
2011 if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) { in mmu_sync_children()
2013 cond_resched_lock(&vcpu->kvm->mmu_lock); in mmu_sync_children()
2023 atomic_set(&sp->write_flooding_count, 0); in __clear_sp_write_flooding_count()
2038 bool direct_mmu = vcpu->arch.mmu->direct_map; in kvm_mmu_get_page()
2048 role = vcpu->arch.mmu->mmu_role.base; in kvm_mmu_get_page()
2054 if (!direct_mmu && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) { in kvm_mmu_get_page()
2056 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; in kvm_mmu_get_page()
2060 sp_list = &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]; in kvm_mmu_get_page()
2061 for_each_valid_sp(vcpu->kvm, sp, sp_list) { in kvm_mmu_get_page()
2062 if (sp->gfn != gfn) { in kvm_mmu_get_page()
2067 if (!need_sync && sp->unsync) in kvm_mmu_get_page()
2070 if (sp->role.word != role.word) in kvm_mmu_get_page()
2076 if (sp->unsync) { in kvm_mmu_get_page()
2087 if (sp->unsync_children) in kvm_mmu_get_page()
2097 ++vcpu->kvm->stat.mmu_cache_miss; in kvm_mmu_get_page()
2101 sp->gfn = gfn; in kvm_mmu_get_page()
2102 sp->role = role; in kvm_mmu_get_page()
2103 hlist_add_head(&sp->hash_link, sp_list); in kvm_mmu_get_page()
2110 account_shadowed(vcpu->kvm, sp); in kvm_mmu_get_page()
2112 kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1); in kvm_mmu_get_page()
2121 if (collisions > vcpu->kvm->stat.max_mmu_page_hash_collisions) in kvm_mmu_get_page()
2122 vcpu->kvm->stat.max_mmu_page_hash_collisions = collisions; in kvm_mmu_get_page()
2130 iterator->addr = addr; in shadow_walk_init_using_root()
2131 iterator->shadow_addr = root; in shadow_walk_init_using_root()
2132 iterator->level = vcpu->arch.mmu->shadow_root_level; in shadow_walk_init_using_root()
2134 if (iterator->level == PT64_ROOT_4LEVEL && in shadow_walk_init_using_root()
2135 vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL && in shadow_walk_init_using_root()
2136 !vcpu->arch.mmu->direct_map) in shadow_walk_init_using_root()
2137 --iterator->level; in shadow_walk_init_using_root()
2139 if (iterator->level == PT32E_ROOT_LEVEL) { in shadow_walk_init_using_root()
2141 * prev_root is currently only used for 64-bit hosts. So only in shadow_walk_init_using_root()
2144 BUG_ON(root != vcpu->arch.mmu->root_hpa); in shadow_walk_init_using_root()
2146 iterator->shadow_addr in shadow_walk_init_using_root()
2147 = vcpu->arch.mmu->pae_root[(addr >> 30) & 3]; in shadow_walk_init_using_root()
2148 iterator->shadow_addr &= PT64_BASE_ADDR_MASK; in shadow_walk_init_using_root()
2149 --iterator->level; in shadow_walk_init_using_root()
2150 if (!iterator->shadow_addr) in shadow_walk_init_using_root()
2151 iterator->level = 0; in shadow_walk_init_using_root()
2158 shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root_hpa, in shadow_walk_init()
2164 if (iterator->level < PG_LEVEL_4K) in shadow_walk_okay()
2167 iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level); in shadow_walk_okay()
2168 iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index; in shadow_walk_okay()
2175 if (is_last_spte(spte, iterator->level)) { in __shadow_walk_next()
2176 iterator->level = 0; in __shadow_walk_next()
2180 iterator->shadow_addr = spte & PT64_BASE_ADDR_MASK; in __shadow_walk_next()
2181 --iterator->level; in __shadow_walk_next()
2186 __shadow_walk_next(iterator, *iterator->sptep); in shadow_walk_next()
2196 spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp)); in link_shadow_page()
2202 if (sp->unsync_children || sp->unsync) in link_shadow_page()
2215 * sp's access: allow writable in the read-only sp, in validate_direct_spte()
2220 if (child->role.access == direct_access) in validate_direct_spte()
2224 kvm_flush_remote_tlbs_with_address(vcpu->kvm, child->gfn, 1); in validate_direct_spte()
2228 /* Returns the number of zapped non-leaf child shadow pages. */
2237 if (is_last_spte(pte, sp->role.level)) { in mmu_page_zap_pte()
2240 --kvm->stat.lpages; in mmu_page_zap_pte()
2251 child->role.guest_mode && !child->parent_ptes.val) in mmu_page_zap_pte()
2269 zapped += mmu_page_zap_pte(kvm, sp, sp->spt + i, invalid_list); in kvm_mmu_page_unlink_children()
2279 while ((sptep = rmap_get_first(&sp->parent_ptes, &iter))) in kvm_mmu_unlink_parents()
2291 if (parent->role.level == PG_LEVEL_4K) in mmu_zap_unsync_children()
2315 ++kvm->stat.mmu_shadow_zapped; in __kvm_mmu_prepare_zap_page()
2323 if (!sp->role.invalid && !sp->role.direct) in __kvm_mmu_prepare_zap_page()
2326 if (sp->unsync) in __kvm_mmu_prepare_zap_page()
2328 if (!sp->root_count) { in __kvm_mmu_prepare_zap_page()
2335 * !sp->root_count. in __kvm_mmu_prepare_zap_page()
2337 if (sp->role.invalid) in __kvm_mmu_prepare_zap_page()
2338 list_add(&sp->link, invalid_list); in __kvm_mmu_prepare_zap_page()
2340 list_move(&sp->link, invalid_list); in __kvm_mmu_prepare_zap_page()
2341 kvm_mod_used_mmu_pages(kvm, -1); in __kvm_mmu_prepare_zap_page()
2347 list_del(&sp->link); in __kvm_mmu_prepare_zap_page()
2358 if (sp->lpage_disallowed) in __kvm_mmu_prepare_zap_page()
2361 sp->role.invalid = 1; in __kvm_mmu_prepare_zap_page()
2384 * the page tables and see changes to vcpu->mode here. The barrier in kvm_mmu_commit_zap_page()
2394 WARN_ON(!sp->role.invalid || sp->root_count); in kvm_mmu_commit_zap_page()
2408 if (list_empty(&kvm->arch.active_mmu_pages)) in kvm_mmu_zap_oldest_mmu_pages()
2412 list_for_each_entry_safe(sp, tmp, &kvm->arch.active_mmu_pages, link) { in kvm_mmu_zap_oldest_mmu_pages()
2417 if (sp->root_count) in kvm_mmu_zap_oldest_mmu_pages()
2432 kvm->stat.mmu_recycled += total_zapped; in kvm_mmu_zap_oldest_mmu_pages()
2438 if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) in kvm_mmu_available_pages()
2439 return kvm->arch.n_max_mmu_pages - in kvm_mmu_available_pages()
2440 kvm->arch.n_used_mmu_pages; in kvm_mmu_available_pages()
2447 unsigned long avail = kvm_mmu_available_pages(vcpu->kvm); in make_mmu_pages_available()
2452 kvm_mmu_zap_oldest_mmu_pages(vcpu->kvm, KVM_REFILL_PAGES - avail); in make_mmu_pages_available()
2454 if (!kvm_mmu_available_pages(vcpu->kvm)) in make_mmu_pages_available()
2455 return -ENOSPC; in make_mmu_pages_available()
2465 spin_lock(&kvm->mmu_lock); in kvm_mmu_change_mmu_pages()
2467 if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { in kvm_mmu_change_mmu_pages()
2468 kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages - in kvm_mmu_change_mmu_pages()
2471 goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; in kvm_mmu_change_mmu_pages()
2474 kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; in kvm_mmu_change_mmu_pages()
2476 spin_unlock(&kvm->mmu_lock); in kvm_mmu_change_mmu_pages()
2487 spin_lock(&kvm->mmu_lock); in kvm_mmu_unprotect_page()
2490 sp->role.word); in kvm_mmu_unprotect_page()
2495 spin_unlock(&kvm->mmu_lock); in kvm_mmu_unprotect_page()
2504 ++vcpu->kvm->stat.mmu_unsync; in kvm_unsync_page()
2505 sp->unsync = 1; in kvm_unsync_page()
2518 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { in mmu_need_write_protect()
2522 if (sp->unsync) in mmu_need_write_protect()
2525 WARN_ON(sp->role.level != PG_LEVEL_4K); in mmu_need_write_protect()
2534 * before the page had been marked as unsync-ed, something like the in mmu_need_write_protect()
2538 * --------------------------------------------------------------------- in mmu_need_write_protect()
2551 * 2.3 kvm_mmu_sync_pages() reads sp->unsync. in mmu_need_write_protect()
2560 * (sp->unsync = true) in mmu_need_write_protect()
2627 drop_spte(vcpu->kvm, sptep); in mmu_set_spte()
2642 kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, in mmu_set_spte()
2660 ++vcpu->kvm->stat.lpages; in mmu_set_spte()
2691 unsigned int access = sp->role.access; in direct_pte_prefetch_many()
2695 gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); in direct_pte_prefetch_many()
2698 return -1; in direct_pte_prefetch_many()
2700 ret = gfn_to_page_many_atomic(slot, gfn, pages, end - start); in direct_pte_prefetch_many()
2702 return -1; in direct_pte_prefetch_many()
2705 mmu_set_spte(vcpu, start, access, false, sp->role.level, gfn, in direct_pte_prefetch_many()
2719 WARN_ON(!sp->role.direct); in __direct_pte_prefetch()
2721 i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1); in __direct_pte_prefetch()
2722 spte = sp->spt + i; in __direct_pte_prefetch()
2750 if (sp->role.level > PG_LEVEL_4K) in direct_pte_prefetch()
2767 * Note, using the already-retrieved memslot and __gfn_to_hva_memslot() in host_pfn_mapping_level()
2770 * read-only memslots due to gfn_to_hva() assuming writes. Earlier in host_pfn_mapping_level()
2772 * read-only memslot. in host_pfn_mapping_level()
2776 pte = lookup_address_in_mm(vcpu->kvm->mm, hva, &level); in host_pfn_mapping_level()
2806 for ( ; max_level > PG_LEVEL_4K; max_level--) { in kvm_mmu_hugepage_adjust()
2808 if (!linfo->disallow_lpage) in kvm_mmu_hugepage_adjust()
2832 mask = KVM_PAGES_PER_HPAGE(level) - 1; in kvm_mmu_hugepage_adjust()
2854 u64 page_mask = KVM_PAGES_PER_HPAGE(level) - in disallowed_hugepage_adjust()
2855 KVM_PAGES_PER_HPAGE(level - 1); in disallowed_hugepage_adjust()
2857 (*goal_levelp)--; in disallowed_hugepage_adjust()
2875 if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) in __direct_map()
2891 base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); in __direct_map()
2898 it.level - 1, true, ACC_ALL); in __direct_map()
2903 account_huge_nx_page(vcpu->kvm, sp); in __direct_map()
2914 ++vcpu->stat.pf_fixed; in __direct_map()
2938 return -EFAULT; in kvm_handle_bad_page()
2977 * is caused by write-protect, that means we just need change the W in page_fault_can_be_fast()
2978 * bit of the spte which can be done out of mmu-lock. in page_fault_can_be_fast()
2980 * However, if access tracking is disabled we know that a non-present in page_fault_can_be_fast()
3001 WARN_ON(!sp->role.direct); in fast_pf_fix_direct_spte()
3011 * so non-PML cases won't be impacted. in fast_pf_fix_direct_spte()
3021 * calculated by sp->gfn. in fast_pf_fix_direct_spte()
3023 gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); in fast_pf_fix_direct_spte()
3067 if (!is_last_spte(spte, sp->role.level)) in fast_page_fault()
3091 * Currently, to simplify the code, write-protection can in fast_page_fault()
3093 * write-protected for dirty-logging or access tracking. in fast_page_fault()
3100 * Do not fix write-permission on the large spte. Since in fast_page_fault()
3101 * we only dirty the first page into the dirty-bitmap in in fast_page_fault()
3110 if (sp->role.level > PG_LEVEL_4K) in fast_page_fault()
3156 if (sp->tdp_mmu_page) in mmu_free_root_page()
3158 else if (sp->role.invalid) in mmu_free_root_page()
3169 struct kvm *kvm = vcpu->kvm; in kvm_mmu_free_roots()
3177 if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) { in kvm_mmu_free_roots()
3180 VALID_PAGE(mmu->prev_roots[i].hpa)) in kvm_mmu_free_roots()
3187 spin_lock(&kvm->mmu_lock); in kvm_mmu_free_roots()
3191 mmu_free_root_page(kvm, &mmu->prev_roots[i].hpa, in kvm_mmu_free_roots()
3195 if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && in kvm_mmu_free_roots()
3196 (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) { in kvm_mmu_free_roots()
3197 mmu_free_root_page(kvm, &mmu->root_hpa, &invalid_list); in kvm_mmu_free_roots()
3200 if (mmu->pae_root[i] != 0) in kvm_mmu_free_roots()
3202 &mmu->pae_root[i], in kvm_mmu_free_roots()
3204 mmu->root_hpa = INVALID_PAGE; in kvm_mmu_free_roots()
3206 mmu->root_pgd = 0; in kvm_mmu_free_roots()
3210 spin_unlock(&kvm->mmu_lock); in kvm_mmu_free_roots()
3231 spin_lock(&vcpu->kvm->mmu_lock); in mmu_alloc_root()
3234 spin_unlock(&vcpu->kvm->mmu_lock); in mmu_alloc_root()
3238 ++sp->root_count; in mmu_alloc_root()
3240 spin_unlock(&vcpu->kvm->mmu_lock); in mmu_alloc_root()
3241 return __pa(sp->spt); in mmu_alloc_root()
3246 u8 shadow_root_level = vcpu->arch.mmu->shadow_root_level; in mmu_alloc_direct_roots()
3250 if (vcpu->kvm->arch.tdp_mmu_enabled) { in mmu_alloc_direct_roots()
3254 return -ENOSPC; in mmu_alloc_direct_roots()
3255 vcpu->arch.mmu->root_hpa = root; in mmu_alloc_direct_roots()
3261 return -ENOSPC; in mmu_alloc_direct_roots()
3262 vcpu->arch.mmu->root_hpa = root; in mmu_alloc_direct_roots()
3265 MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->pae_root[i])); in mmu_alloc_direct_roots()
3267 root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), in mmu_alloc_direct_roots()
3270 return -ENOSPC; in mmu_alloc_direct_roots()
3271 vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK; in mmu_alloc_direct_roots()
3273 vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); in mmu_alloc_direct_roots()
3278 vcpu->arch.mmu->root_pgd = 0; in mmu_alloc_direct_roots()
3290 root_pgd = vcpu->arch.mmu->get_guest_pgd(vcpu); in mmu_alloc_shadow_roots()
3298 * write-protect the guests page table root. in mmu_alloc_shadow_roots()
3300 if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { in mmu_alloc_shadow_roots()
3301 MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->root_hpa)); in mmu_alloc_shadow_roots()
3304 vcpu->arch.mmu->shadow_root_level, false); in mmu_alloc_shadow_roots()
3306 return -ENOSPC; in mmu_alloc_shadow_roots()
3307 vcpu->arch.mmu->root_hpa = root; in mmu_alloc_shadow_roots()
3312 * We shadow a 32 bit page table. This may be a legacy 2-level in mmu_alloc_shadow_roots()
3313 * or a PAE 3-level page table. In either case we need to be aware that in mmu_alloc_shadow_roots()
3317 if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) in mmu_alloc_shadow_roots()
3321 MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->pae_root[i])); in mmu_alloc_shadow_roots()
3322 if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) { in mmu_alloc_shadow_roots()
3323 pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i); in mmu_alloc_shadow_roots()
3325 vcpu->arch.mmu->pae_root[i] = 0; in mmu_alloc_shadow_roots()
3336 return -ENOSPC; in mmu_alloc_shadow_roots()
3337 vcpu->arch.mmu->pae_root[i] = root | pm_mask; in mmu_alloc_shadow_roots()
3339 vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); in mmu_alloc_shadow_roots()
3345 if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) { in mmu_alloc_shadow_roots()
3346 if (vcpu->arch.mmu->lm_root == NULL) { in mmu_alloc_shadow_roots()
3358 lm_root[0] = __pa(vcpu->arch.mmu->pae_root) | pm_mask; in mmu_alloc_shadow_roots()
3360 vcpu->arch.mmu->lm_root = lm_root; in mmu_alloc_shadow_roots()
3363 vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root); in mmu_alloc_shadow_roots()
3367 vcpu->arch.mmu->root_pgd = root_pgd; in mmu_alloc_shadow_roots()
3374 if (vcpu->arch.mmu->direct_map) in mmu_alloc_roots()
3385 if (vcpu->arch.mmu->direct_map) in kvm_mmu_sync_roots()
3388 if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) in kvm_mmu_sync_roots()
3393 if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { in kvm_mmu_sync_roots()
3394 hpa_t root = vcpu->arch.mmu->root_hpa; in kvm_mmu_sync_roots()
3398 * Even if another CPU was marking the SP as unsync-ed in kvm_mmu_sync_roots()
3407 if (!smp_load_acquire(&sp->unsync) && in kvm_mmu_sync_roots()
3408 !smp_load_acquire(&sp->unsync_children)) in kvm_mmu_sync_roots()
3411 spin_lock(&vcpu->kvm->mmu_lock); in kvm_mmu_sync_roots()
3417 spin_unlock(&vcpu->kvm->mmu_lock); in kvm_mmu_sync_roots()
3421 spin_lock(&vcpu->kvm->mmu_lock); in kvm_mmu_sync_roots()
3425 hpa_t root = vcpu->arch.mmu->pae_root[i]; in kvm_mmu_sync_roots()
3435 spin_unlock(&vcpu->kvm->mmu_lock); in kvm_mmu_sync_roots()
3443 exception->error_code = 0; in nonpaging_gva_to_gpa()
3452 exception->error_code = 0; in nonpaging_gva_to_gpa_nested()
3453 return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception); in nonpaging_gva_to_gpa_nested()
3461 return pte & rsvd_check->rsvd_bits_mask[bit7][level-1]; in __is_rsvd_bits_set()
3466 return rsvd_check->bad_mt_xwr & BIT_ULL(pte & 0x3f); in __is_bad_mt_xwr()
3486 * That SPTE may be non-present.
3491 int leaf = vcpu->arch.mmu->root_level; in get_walk()
3503 sptes[leaf - 1] = spte; in get_walk()
3520 int root = vcpu->arch.mmu->shadow_root_level; in get_mmio_spte()
3525 if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) { in get_mmio_spte()
3530 if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) in get_mmio_spte()
3535 rsvd_check = &vcpu->arch.mmu->shadow_zero_check; in get_mmio_spte()
3537 for (level = root; level >= leaf; level--) { in get_mmio_spte()
3538 if (!is_shadow_present_pte(sptes[level - 1])) in get_mmio_spte()
3541 * Use a bitwise-OR instead of a logical-OR to aggregate the in get_mmio_spte()
3545 reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) | in get_mmio_spte()
3546 __is_rsvd_bits_set(rsvd_check, sptes[level - 1], in get_mmio_spte()
3553 for (level = root; level >= leaf; level--) in get_mmio_spte()
3554 pr_err("------ spte 0x%llx level %d.\n", in get_mmio_spte()
3555 sptes[level - 1], level); in get_mmio_spte()
3558 *sptep = sptes[leaf - 1]; in get_mmio_spte()
3573 return -EINVAL; in handle_mmio_page_fault()
3636 arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; in kvm_arch_setup_async_pf()
3638 arch.direct_map = vcpu->arch.mmu->direct_map; in kvm_arch_setup_async_pf()
3639 arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); in kvm_arch_setup_async_pf()
3692 if (!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) { in direct_page_fault()
3702 mmu_seq = vcpu->kvm->mmu_notifier_seq; in direct_page_fault()
3712 spin_lock(&vcpu->kvm->mmu_lock); in direct_page_fault()
3713 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) in direct_page_fault()
3719 if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) in direct_page_fault()
3727 spin_unlock(&vcpu->kvm->mmu_lock); in direct_page_fault()
3746 u32 flags = vcpu->arch.apf.host_apf_flags; in kvm_handle_page_fault()
3749 /* A 64-bit CR2 should be impossible on 32-bit KVM. */ in kvm_handle_page_fault()
3751 return -EFAULT; in kvm_handle_page_fault()
3754 vcpu->arch.l1tf_flush_l1d = true; in kvm_handle_page_fault()
3763 vcpu->arch.apf.host_apf_flags = 0; in kvm_handle_page_fault()
3782 max_level--) { in kvm_tdp_page_fault()
3784 gfn_t base = (gpa >> PAGE_SHIFT) & ~(page_num - 1); in kvm_tdp_page_fault()
3797 context->page_fault = nonpaging_page_fault; in nonpaging_init_context()
3798 context->gva_to_gpa = nonpaging_gva_to_gpa; in nonpaging_init_context()
3799 context->sync_page = nonpaging_sync_page; in nonpaging_init_context()
3800 context->invlpg = NULL; in nonpaging_init_context()
3801 context->update_pte = nonpaging_update_pte; in nonpaging_init_context()
3802 context->root_level = 0; in nonpaging_init_context()
3803 context->shadow_root_level = PT32E_ROOT_LEVEL; in nonpaging_init_context()
3804 context->direct_map = true; in nonpaging_init_context()
3805 context->nx = false; in nonpaging_init_context()
3811 return (role.direct || pgd == root->pgd) && in is_root_usable()
3812 VALID_PAGE(root->hpa) && to_shadow_page(root->hpa) && in is_root_usable()
3813 role.word == to_shadow_page(root->hpa)->role.word; in is_root_usable()
3819 * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
3821 * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
3829 struct kvm_mmu *mmu = vcpu->arch.mmu; in cached_root_available()
3831 root.pgd = mmu->root_pgd; in cached_root_available()
3832 root.hpa = mmu->root_hpa; in cached_root_available()
3838 swap(root, mmu->prev_roots[i]); in cached_root_available()
3844 mmu->root_hpa = root.hpa; in cached_root_available()
3845 mmu->root_pgd = root.pgd; in cached_root_available()
3853 struct kvm_mmu *mmu = vcpu->arch.mmu; in fast_pgd_switch()
3856 * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid in fast_pgd_switch()
3857 * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs in fast_pgd_switch()
3860 if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && in fast_pgd_switch()
3861 mmu->root_level >= PT64_ROOT_4LEVEL) in fast_pgd_switch()
3872 kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, KVM_MMU_ROOT_CURRENT); in __kvm_mmu_new_pgd()
3891 * switching to a new CR3, that GVA->GPA mapping may no longer be in __kvm_mmu_new_pgd()
3903 to_shadow_page(vcpu->arch.mmu->root_hpa)); in __kvm_mmu_new_pgd()
3940 * The RHS has bit 7 set iff level < mmu->last_nonleaf_level. in is_last_gpte()
3944 gpte &= level - mmu->last_nonleaf_level; in is_last_gpte()
3951 gpte |= level - PG_LEVEL_4K - 1; in is_last_gpte()
3979 rsvd_check->bad_mt_xwr = 0; in __reset_rsvds_bits_mask()
3987 * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for in __reset_rsvds_bits_mask()
3996 rsvd_check->rsvd_bits_mask[0][1] = 0; in __reset_rsvds_bits_mask()
3997 rsvd_check->rsvd_bits_mask[0][0] = 0; in __reset_rsvds_bits_mask()
3998 rsvd_check->rsvd_bits_mask[1][0] = in __reset_rsvds_bits_mask()
3999 rsvd_check->rsvd_bits_mask[0][0]; in __reset_rsvds_bits_mask()
4002 rsvd_check->rsvd_bits_mask[1][1] = 0; in __reset_rsvds_bits_mask()
4008 rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); in __reset_rsvds_bits_mask()
4011 rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); in __reset_rsvds_bits_mask()
4014 rsvd_check->rsvd_bits_mask[0][2] = in __reset_rsvds_bits_mask()
4017 rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | in __reset_rsvds_bits_mask()
4019 rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd | in __reset_rsvds_bits_mask()
4021 rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd | in __reset_rsvds_bits_mask()
4024 rsvd_check->rsvd_bits_mask[1][0] = in __reset_rsvds_bits_mask()
4025 rsvd_check->rsvd_bits_mask[0][0]; in __reset_rsvds_bits_mask()
4028 rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd | in __reset_rsvds_bits_mask()
4031 rsvd_check->rsvd_bits_mask[1][4] = in __reset_rsvds_bits_mask()
4032 rsvd_check->rsvd_bits_mask[0][4]; in __reset_rsvds_bits_mask()
4035 rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | in __reset_rsvds_bits_mask()
4038 rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd | in __reset_rsvds_bits_mask()
4041 rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | in __reset_rsvds_bits_mask()
4043 rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd | in __reset_rsvds_bits_mask()
4045 rsvd_check->rsvd_bits_mask[1][3] = in __reset_rsvds_bits_mask()
4046 rsvd_check->rsvd_bits_mask[0][3]; in __reset_rsvds_bits_mask()
4047 rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd | in __reset_rsvds_bits_mask()
4050 rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd | in __reset_rsvds_bits_mask()
4053 rsvd_check->rsvd_bits_mask[1][0] = in __reset_rsvds_bits_mask()
4054 rsvd_check->rsvd_bits_mask[0][0]; in __reset_rsvds_bits_mask()
4062 __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, in reset_rsvds_bits_mask()
4063 cpuid_maxphyaddr(vcpu), context->root_level, in reset_rsvds_bits_mask()
4064 context->nx, in reset_rsvds_bits_mask()
4076 rsvd_check->rsvd_bits_mask[0][4] = in __reset_rsvds_bits_mask_ept()
4078 rsvd_check->rsvd_bits_mask[0][3] = in __reset_rsvds_bits_mask_ept()
4080 rsvd_check->rsvd_bits_mask[0][2] = in __reset_rsvds_bits_mask_ept()
4082 rsvd_check->rsvd_bits_mask[0][1] = in __reset_rsvds_bits_mask_ept()
4084 rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); in __reset_rsvds_bits_mask_ept()
4087 rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; in __reset_rsvds_bits_mask_ept()
4088 rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; in __reset_rsvds_bits_mask_ept()
4089 rsvd_check->rsvd_bits_mask[1][2] = in __reset_rsvds_bits_mask_ept()
4091 rsvd_check->rsvd_bits_mask[1][1] = in __reset_rsvds_bits_mask_ept()
4093 rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; in __reset_rsvds_bits_mask_ept()
4104 rsvd_check->bad_mt_xwr = bad_mt_xwr; in __reset_rsvds_bits_mask_ept()
4110 __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check, in reset_rsvds_bits_mask_ept()
4122 bool uses_nx = context->nx || in reset_shadow_zero_bits_mask()
4123 context->mmu_role.base.smep_andnot_wp; in reset_shadow_zero_bits_mask()
4131 shadow_zero_check = &context->shadow_zero_check; in reset_shadow_zero_bits_mask()
4134 context->shadow_root_level, uses_nx, in reset_shadow_zero_bits_mask()
4141 for (i = context->shadow_root_level; --i >= 0;) { in reset_shadow_zero_bits_mask()
4142 shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; in reset_shadow_zero_bits_mask()
4143 shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; in reset_shadow_zero_bits_mask()
4157 * possible, however, kvm currently does not do execution-protection.
4166 shadow_zero_check = &context->shadow_zero_check; in reset_tdp_shadow_zero_bits_mask()
4171 context->shadow_root_level, false, in reset_tdp_shadow_zero_bits_mask()
4182 for (i = context->shadow_root_level; --i >= 0;) { in reset_tdp_shadow_zero_bits_mask()
4183 shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; in reset_tdp_shadow_zero_bits_mask()
4184 shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; in reset_tdp_shadow_zero_bits_mask()
4196 __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, in reset_ept_shadow_zero_bits_mask()
4223 for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { in update_permission_bitmask()
4231 /* Faults from writes to non-writable pages */ in update_permission_bitmask()
4235 /* Faults from fetches of non-executable pages*/ in update_permission_bitmask()
4247 if (!mmu->nx) in update_permission_bitmask()
4259 * SMAP:kernel-mode data accesses from user-mode in update_permission_bitmask()
4263 * - X86_CR4_SMAP is set in CR4 in update_permission_bitmask()
4264 * - A user page is accessed in update_permission_bitmask()
4265 * - The access is not a fetch in update_permission_bitmask()
4266 * - Page fault in kernel mode in update_permission_bitmask()
4267 * - if CPL = 3 or X86_EFLAGS_AC is clear in update_permission_bitmask()
4278 mmu->permissions[byte] = ff | uf | wf | smepf | smapf; in update_permission_bitmask()
4284 * user-mode addresses based on the value in the PKRU register. Protection
4293 * - PK is always zero unless CR4.PKE=1 and EFER.LMA=1
4294 * - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch)
4295 * - PK is always zero if U=0 in the page tables
4296 * - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access.
4313 mmu->pkru_mask = 0; in update_pkru_bitmask()
4319 mmu->pkru_mask = 0; in update_pkru_bitmask()
4325 for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) { in update_pkru_bitmask()
4353 mmu->pkru_mask |= (pkey_bits & 3) << pfec; in update_pkru_bitmask()
4359 unsigned root_level = mmu->root_level; in update_last_nonleaf_level()
4361 mmu->last_nonleaf_level = root_level; in update_last_nonleaf_level()
4363 mmu->last_nonleaf_level++; in update_last_nonleaf_level()
4370 context->nx = is_nx(vcpu); in paging64_init_context_common()
4371 context->root_level = level; in paging64_init_context_common()
4379 context->page_fault = paging64_page_fault; in paging64_init_context_common()
4380 context->gva_to_gpa = paging64_gva_to_gpa; in paging64_init_context_common()
4381 context->sync_page = paging64_sync_page; in paging64_init_context_common()
4382 context->invlpg = paging64_invlpg; in paging64_init_context_common()
4383 context->update_pte = paging64_update_pte; in paging64_init_context_common()
4384 context->shadow_root_level = level; in paging64_init_context_common()
4385 context->direct_map = false; in paging64_init_context_common()
4400 context->nx = false; in paging32_init_context()
4401 context->root_level = PT32_ROOT_LEVEL; in paging32_init_context()
4408 context->page_fault = paging32_page_fault; in paging32_init_context()
4409 context->gva_to_gpa = paging32_gva_to_gpa; in paging32_init_context()
4410 context->sync_page = paging32_sync_page; in paging32_init_context()
4411 context->invlpg = paging32_invlpg; in paging32_init_context()
4412 context->update_pte = paging32_update_pte; in paging32_init_context()
4413 context->shadow_root_level = PT32E_ROOT_LEVEL; in paging32_init_context()
4414 context->direct_map = false; in paging32_init_context()
4425 union kvm_mmu_extended_role ext = {0}; in kvm_calc_mmu_role_ext() local
4427 ext.cr0_pg = !!is_paging(vcpu); in kvm_calc_mmu_role_ext()
4428 ext.cr4_pae = !!is_pae(vcpu); in kvm_calc_mmu_role_ext()
4429 ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); in kvm_calc_mmu_role_ext()
4430 ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); in kvm_calc_mmu_role_ext()
4431 ext.cr4_pse = !!is_pse(vcpu); in kvm_calc_mmu_role_ext()
4432 ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE); in kvm_calc_mmu_role_ext()
4433 ext.maxphyaddr = cpuid_maxphyaddr(vcpu); in kvm_calc_mmu_role_ext()
4435 ext.valid = 1; in kvm_calc_mmu_role_ext()
4437 return ext; in kvm_calc_mmu_role_ext()
4454 role.ext = kvm_calc_mmu_role_ext(vcpu); in kvm_calc_mmu_role_common()
4461 /* Use 5-level TDP if and only if it's useful/necessary. */ in kvm_mmu_get_tdp_level()
4483 struct kvm_mmu *context = &vcpu->arch.root_mmu; in init_kvm_tdp_mmu()
4487 if (new_role.as_u64 == context->mmu_role.as_u64) in init_kvm_tdp_mmu()
4490 context->mmu_role.as_u64 = new_role.as_u64; in init_kvm_tdp_mmu()
4491 context->page_fault = kvm_tdp_page_fault; in init_kvm_tdp_mmu()
4492 context->sync_page = nonpaging_sync_page; in init_kvm_tdp_mmu()
4493 context->invlpg = NULL; in init_kvm_tdp_mmu()
4494 context->update_pte = nonpaging_update_pte; in init_kvm_tdp_mmu()
4495 context->shadow_root_level = kvm_mmu_get_tdp_level(vcpu); in init_kvm_tdp_mmu()
4496 context->direct_map = true; in init_kvm_tdp_mmu()
4497 context->get_guest_pgd = get_cr3; in init_kvm_tdp_mmu()
4498 context->get_pdptr = kvm_pdptr_read; in init_kvm_tdp_mmu()
4499 context->inject_page_fault = kvm_inject_page_fault; in init_kvm_tdp_mmu()
4502 context->nx = false; in init_kvm_tdp_mmu()
4503 context->gva_to_gpa = nonpaging_gva_to_gpa; in init_kvm_tdp_mmu()
4504 context->root_level = 0; in init_kvm_tdp_mmu()
4506 context->nx = is_nx(vcpu); in init_kvm_tdp_mmu()
4507 context->root_level = is_la57_mode(vcpu) ? in init_kvm_tdp_mmu()
4510 context->gva_to_gpa = paging64_gva_to_gpa; in init_kvm_tdp_mmu()
4512 context->nx = is_nx(vcpu); in init_kvm_tdp_mmu()
4513 context->root_level = PT32E_ROOT_LEVEL; in init_kvm_tdp_mmu()
4515 context->gva_to_gpa = paging64_gva_to_gpa; in init_kvm_tdp_mmu()
4517 context->nx = false; in init_kvm_tdp_mmu()
4518 context->root_level = PT32_ROOT_LEVEL; in init_kvm_tdp_mmu()
4520 context->gva_to_gpa = paging32_gva_to_gpa; in init_kvm_tdp_mmu()
4534 role.base.smep_andnot_wp = role.ext.cr4_smep && in kvm_calc_shadow_root_page_role_common()
4536 role.base.smap_andnot_wp = role.ext.cr4_smap && in kvm_calc_shadow_root_page_role_common()
4574 context->mmu_role.as_u64 = new_role.as_u64; in shadow_mmu_init_context()
4580 struct kvm_mmu *context = &vcpu->arch.root_mmu; in kvm_init_shadow_mmu()
4584 if (new_role.as_u64 != context->mmu_role.as_u64) in kvm_init_shadow_mmu()
4603 struct kvm_mmu *context = &vcpu->arch.guest_mmu; in kvm_init_shadow_npt_mmu()
4606 context->shadow_root_level = new_role.base.level; in kvm_init_shadow_npt_mmu()
4610 if (new_role.as_u64 != context->mmu_role.as_u64) in kvm_init_shadow_npt_mmu()
4622 role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm; in kvm_calc_shadow_ept_root_page_role()
4638 role.ext = kvm_calc_mmu_role_ext(vcpu); in kvm_calc_shadow_ept_root_page_role()
4639 role.ext.execonly = execonly; in kvm_calc_shadow_ept_root_page_role()
4647 struct kvm_mmu *context = &vcpu->arch.guest_mmu; in kvm_init_shadow_ept_mmu()
4655 if (new_role.as_u64 == context->mmu_role.as_u64) in kvm_init_shadow_ept_mmu()
4658 context->shadow_root_level = level; in kvm_init_shadow_ept_mmu()
4660 context->nx = true; in kvm_init_shadow_ept_mmu()
4661 context->ept_ad = accessed_dirty; in kvm_init_shadow_ept_mmu()
4662 context->page_fault = ept_page_fault; in kvm_init_shadow_ept_mmu()
4663 context->gva_to_gpa = ept_gva_to_gpa; in kvm_init_shadow_ept_mmu()
4664 context->sync_page = ept_sync_page; in kvm_init_shadow_ept_mmu()
4665 context->invlpg = ept_invlpg; in kvm_init_shadow_ept_mmu()
4666 context->update_pte = ept_update_pte; in kvm_init_shadow_ept_mmu()
4667 context->root_level = level; in kvm_init_shadow_ept_mmu()
4668 context->direct_map = false; in kvm_init_shadow_ept_mmu()
4669 context->mmu_role.as_u64 = new_role.as_u64; in kvm_init_shadow_ept_mmu()
4681 struct kvm_mmu *context = &vcpu->arch.root_mmu; in init_kvm_softmmu()
4686 vcpu->arch.efer); in init_kvm_softmmu()
4688 context->get_guest_pgd = get_cr3; in init_kvm_softmmu()
4689 context->get_pdptr = kvm_pdptr_read; in init_kvm_softmmu()
4690 context->inject_page_fault = kvm_inject_page_fault; in init_kvm_softmmu()
4696 struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; in init_kvm_nested_mmu()
4698 if (new_role.as_u64 == g_context->mmu_role.as_u64) in init_kvm_nested_mmu()
4701 g_context->mmu_role.as_u64 = new_role.as_u64; in init_kvm_nested_mmu()
4702 g_context->get_guest_pgd = get_cr3; in init_kvm_nested_mmu()
4703 g_context->get_pdptr = kvm_pdptr_read; in init_kvm_nested_mmu()
4704 g_context->inject_page_fault = kvm_inject_page_fault; in init_kvm_nested_mmu()
4710 g_context->invlpg = NULL; in init_kvm_nested_mmu()
4713 * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using in init_kvm_nested_mmu()
4721 g_context->nx = false; in init_kvm_nested_mmu()
4722 g_context->root_level = 0; in init_kvm_nested_mmu()
4723 g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; in init_kvm_nested_mmu()
4725 g_context->nx = is_nx(vcpu); in init_kvm_nested_mmu()
4726 g_context->root_level = is_la57_mode(vcpu) ? in init_kvm_nested_mmu()
4729 g_context->gva_to_gpa = paging64_gva_to_gpa_nested; in init_kvm_nested_mmu()
4731 g_context->nx = is_nx(vcpu); in init_kvm_nested_mmu()
4732 g_context->root_level = PT32E_ROOT_LEVEL; in init_kvm_nested_mmu()
4734 g_context->gva_to_gpa = paging64_gva_to_gpa_nested; in init_kvm_nested_mmu()
4736 g_context->nx = false; in init_kvm_nested_mmu()
4737 g_context->root_level = PT32_ROOT_LEVEL; in init_kvm_nested_mmu()
4739 g_context->gva_to_gpa = paging32_gva_to_gpa_nested; in init_kvm_nested_mmu()
4752 vcpu->arch.mmu->root_hpa = INVALID_PAGE; in kvm_init_mmu()
4755 vcpu->arch.mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; in kvm_init_mmu()
4791 r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->direct_map); in kvm_mmu_load()
4807 kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL); in kvm_mmu_unload()
4808 WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root_hpa)); in kvm_mmu_unload()
4809 kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); in kvm_mmu_unload()
4810 WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa)); in kvm_mmu_unload()
4818 if (sp->role.level != PG_LEVEL_4K) { in mmu_pte_write_new_pte()
4819 ++vcpu->kvm->stat.mmu_pde_zapped; in mmu_pte_write_new_pte()
4823 ++vcpu->kvm->stat.mmu_pte_updated; in mmu_pte_write_new_pte()
4824 vcpu->arch.mmu->update_pte(vcpu, sp, spte, new); in mmu_pte_write_new_pte()
4852 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ in mmu_pte_write_fetch_gpte()
4873 * Skip write-flooding detected for the sp whose level is 1, because in detect_write_flooding()
4874 * it can become unsync, then the guest page is not write-protected. in detect_write_flooding()
4876 if (sp->role.level == PG_LEVEL_4K) in detect_write_flooding()
4879 atomic_inc(&sp->write_flooding_count); in detect_write_flooding()
4880 return atomic_read(&sp->write_flooding_count) >= 3; in detect_write_flooding()
4893 gpa, bytes, sp->role.word); in detect_write_misaligned()
4896 pte_size = sp->role.gpte_is_8_bytes ? 8 : 4; in detect_write_misaligned()
4902 if (!(offset & (pte_size - 1)) && bytes == 1) in detect_write_misaligned()
4905 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); in detect_write_misaligned()
4918 level = sp->role.level; in get_written_sptes()
4920 if (!sp->role.gpte_is_8_bytes) { in get_written_sptes()
4921 page_offset <<= 1; /* 32->64 */ in get_written_sptes()
4923 * A 32-bit pde maps 4MB while the shadow pdes map in get_written_sptes()
4934 if (quadrant != sp->role.quadrant) in get_written_sptes()
4938 spte = &sp->spt[page_offset / sizeof(*spte)]; in get_written_sptes()
4945 * - level: explicitly checked in mmu_pte_write_new_pte(), and will never
4947 * - access: updated based on the new guest PTE
4948 * - quadrant: handled by get_written_sptes()
4949 * - invalid: always false (loop only walks valid shadow pages)
4971 * write-protected, so we can exit simply. in kvm_mmu_pte_write()
4973 if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) in kvm_mmu_pte_write()
4987 spin_lock(&vcpu->kvm->mmu_lock); in kvm_mmu_pte_write()
4991 ++vcpu->kvm->stat.mmu_pte_write; in kvm_mmu_pte_write()
4994 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { in kvm_mmu_pte_write()
4997 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); in kvm_mmu_pte_write()
4998 ++vcpu->kvm->stat.mmu_flooded; in kvm_mmu_pte_write()
5007 while (npte--) { in kvm_mmu_pte_write()
5008 u32 base_role = vcpu->arch.mmu->mmu_role.base.word; in kvm_mmu_pte_write()
5011 mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL); in kvm_mmu_pte_write()
5013 !((sp->role.word ^ base_role) & ~role_ign.word) && in kvm_mmu_pte_write()
5023 spin_unlock(&vcpu->kvm->mmu_lock); in kvm_mmu_pte_write()
5031 if (vcpu->arch.mmu->direct_map) in kvm_mmu_unprotect_page_virt()
5036 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); in kvm_mmu_unprotect_page_virt()
5046 bool direct = vcpu->arch.mmu->direct_map; in kvm_mmu_page_fault()
5048 if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa))) in kvm_mmu_page_fault()
5062 return -EIO; in kvm_mmu_page_fault()
5077 if (vcpu->arch.mmu->direct_map && in kvm_mmu_page_fault()
5079 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); in kvm_mmu_page_fault()
5084 * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still in kvm_mmu_page_fault()
5086 * re-execute the instruction that caused the page fault. Do not allow in kvm_mmu_page_fault()
5089 * faulting on the non-existent MMIO address. Retrying an instruction in kvm_mmu_page_fault()
5107 /* It's actually a GPA for vcpu->arch.guest_mmu. */ in kvm_mmu_invalidate_gva()
5108 if (mmu != &vcpu->arch.guest_mmu) { in kvm_mmu_invalidate_gva()
5109 /* INVLPG on a non-canonical address is a NOP according to the SDM. */ in kvm_mmu_invalidate_gva()
5116 if (!mmu->invlpg) in kvm_mmu_invalidate_gva()
5120 mmu->invlpg(vcpu, gva, mmu->root_hpa); in kvm_mmu_invalidate_gva()
5134 if (VALID_PAGE(mmu->prev_roots[i].hpa)) in kvm_mmu_invalidate_gva()
5135 mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); in kvm_mmu_invalidate_gva()
5137 mmu->invlpg(vcpu, gva, root_hpa); in kvm_mmu_invalidate_gva()
5144 kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE); in kvm_mmu_invlpg()
5145 ++vcpu->stat.invlpg; in kvm_mmu_invlpg()
5152 struct kvm_mmu *mmu = vcpu->arch.mmu; in kvm_mmu_invpcid_gva()
5157 mmu->invlpg(vcpu, gva, mmu->root_hpa); in kvm_mmu_invpcid_gva()
5162 if (VALID_PAGE(mmu->prev_roots[i].hpa) && in kvm_mmu_invpcid_gva()
5163 pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) { in kvm_mmu_invpcid_gva()
5164 mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); in kvm_mmu_invpcid_gva()
5172 ++vcpu->stat.invlpg; in kvm_mmu_invpcid_gva()
5207 /* The caller should hold mmu-lock before calling this function. */
5221 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { in slot_handle_level_range()
5225 iterator.gfn - start_gfn + 1); in slot_handle_level_range()
5228 cond_resched_lock(&kvm->mmu_lock); in slot_handle_level_range()
5234 end_gfn - start_gfn + 1); in slot_handle_level_range()
5247 end_level, memslot->base_gfn, in slot_handle_level()
5248 memslot->base_gfn + memslot->npages - 1, in slot_handle_level()
5278 free_page((unsigned long)mmu->pae_root); in free_mmu_pages()
5279 free_page((unsigned long)mmu->lm_root); in free_mmu_pages()
5287 mmu->root_hpa = INVALID_PAGE; in __kvm_mmu_create()
5288 mmu->root_pgd = 0; in __kvm_mmu_create()
5289 mmu->translate_gpa = translate_gpa; in __kvm_mmu_create()
5291 mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; in __kvm_mmu_create()
5295 * while the PDP table is a per-vCPU construct that's allocated at MMU in __kvm_mmu_create()
5296 * creation. When emulating 32-bit mode, cr3 is only 32 bits even on in __kvm_mmu_create()
5299 * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can in __kvm_mmu_create()
5307 return -ENOMEM; in __kvm_mmu_create()
5309 mmu->pae_root = page_address(page); in __kvm_mmu_create()
5311 mmu->pae_root[i] = INVALID_PAGE; in __kvm_mmu_create()
5320 vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache; in kvm_mmu_create()
5321 vcpu->arch.mmu_pte_list_desc_cache.gfp_zero = __GFP_ZERO; in kvm_mmu_create()
5323 vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache; in kvm_mmu_create()
5324 vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO; in kvm_mmu_create()
5326 vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO; in kvm_mmu_create()
5328 vcpu->arch.mmu = &vcpu->arch.root_mmu; in kvm_mmu_create()
5329 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; in kvm_mmu_create()
5331 vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; in kvm_mmu_create()
5333 ret = __kvm_mmu_create(vcpu, &vcpu->arch.guest_mmu); in kvm_mmu_create()
5337 ret = __kvm_mmu_create(vcpu, &vcpu->arch.root_mmu); in kvm_mmu_create()
5343 free_mmu_pages(&vcpu->arch.guest_mmu); in kvm_mmu_create()
5355 &kvm->arch.active_mmu_pages, link) { in kvm_zap_obsolete_pages()
5368 if (WARN_ON(sp->role.invalid)) in kvm_zap_obsolete_pages()
5378 cond_resched_lock(&kvm->mmu_lock)) { in kvm_zap_obsolete_pages()
5384 &kvm->arch.zapped_obsolete_pages, &nr_zapped)) { in kvm_zap_obsolete_pages()
5395 kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); in kvm_zap_obsolete_pages()
5399 * Fast invalidate all shadow pages and use lock-break technique
5404 * not use any resource of the being-deleted slot or all slots
5409 lockdep_assert_held(&kvm->slots_lock); in kvm_mmu_zap_all_fast()
5411 spin_lock(&kvm->mmu_lock); in kvm_mmu_zap_all_fast()
5421 kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1; in kvm_mmu_zap_all_fast()
5435 if (kvm->arch.tdp_mmu_enabled) in kvm_mmu_zap_all_fast()
5438 spin_unlock(&kvm->mmu_lock); in kvm_mmu_zap_all_fast()
5443 return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); in kvm_has_zapped_obsolete_pages()
5455 struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; in kvm_mmu_init_vm()
5459 node->track_write = kvm_mmu_pte_write; in kvm_mmu_init_vm()
5460 node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; in kvm_mmu_init_vm()
5466 struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; in kvm_mmu_uninit_vm()
5480 spin_lock(&kvm->mmu_lock); in kvm_zap_gfn_range()
5486 start = max(gfn_start, memslot->base_gfn); in kvm_zap_gfn_range()
5487 end = min(gfn_end, memslot->base_gfn + memslot->npages); in kvm_zap_gfn_range()
5494 start, end - 1, true); in kvm_zap_gfn_range()
5498 if (kvm->arch.tdp_mmu_enabled) { in kvm_zap_gfn_range()
5504 spin_unlock(&kvm->mmu_lock); in kvm_zap_gfn_range()
5519 spin_lock(&kvm->mmu_lock); in kvm_mmu_slot_remove_write_access()
5522 if (kvm->arch.tdp_mmu_enabled) in kvm_mmu_slot_remove_write_access()
5524 spin_unlock(&kvm->mmu_lock); in kvm_mmu_slot_remove_write_access()
5562 if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && in kvm_mmu_zap_collapsible_spte()
5568 kvm_flush_remote_tlbs_with_address(kvm, sp->gfn, in kvm_mmu_zap_collapsible_spte()
5569 KVM_PAGES_PER_HPAGE(sp->role.level)); in kvm_mmu_zap_collapsible_spte()
5583 /* FIXME: const-ify all uses of struct kvm_memory_slot. */ in kvm_mmu_zap_collapsible_sptes()
5584 spin_lock(&kvm->mmu_lock); in kvm_mmu_zap_collapsible_sptes()
5588 if (kvm->arch.tdp_mmu_enabled) in kvm_mmu_zap_collapsible_sptes()
5590 spin_unlock(&kvm->mmu_lock); in kvm_mmu_zap_collapsible_sptes()
5603 lockdep_assert_held(&kvm->slots_lock); in kvm_arch_flush_remote_tlbs_memslot()
5604 kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn, in kvm_arch_flush_remote_tlbs_memslot()
5605 memslot->npages); in kvm_arch_flush_remote_tlbs_memslot()
5613 spin_lock(&kvm->mmu_lock); in kvm_mmu_slot_leaf_clear_dirty()
5615 if (kvm->arch.tdp_mmu_enabled) in kvm_mmu_slot_leaf_clear_dirty()
5617 spin_unlock(&kvm->mmu_lock); in kvm_mmu_slot_leaf_clear_dirty()
5635 spin_lock(&kvm->mmu_lock); in kvm_mmu_slot_largepage_remove_write_access()
5638 if (kvm->arch.tdp_mmu_enabled) in kvm_mmu_slot_largepage_remove_write_access()
5640 spin_unlock(&kvm->mmu_lock); in kvm_mmu_slot_largepage_remove_write_access()
5652 spin_lock(&kvm->mmu_lock); in kvm_mmu_slot_set_dirty()
5654 if (kvm->arch.tdp_mmu_enabled) in kvm_mmu_slot_set_dirty()
5656 spin_unlock(&kvm->mmu_lock); in kvm_mmu_slot_set_dirty()
5669 spin_lock(&kvm->mmu_lock); in kvm_mmu_zap_all()
5671 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { in kvm_mmu_zap_all()
5672 if (WARN_ON(sp->role.invalid)) in kvm_mmu_zap_all()
5676 if (cond_resched_lock(&kvm->mmu_lock)) in kvm_mmu_zap_all()
5682 if (kvm->arch.tdp_mmu_enabled) in kvm_mmu_zap_all()
5685 spin_unlock(&kvm->mmu_lock); in kvm_mmu_zap_all()
5688 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) in kvm_mmu_invalidate_mmio_sptes() argument
5690 WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); in kvm_mmu_invalidate_mmio_sptes()
5692 gen &= MMIO_SPTE_GEN_MASK; in kvm_mmu_invalidate_mmio_sptes()
5701 gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1); in kvm_mmu_invalidate_mmio_sptes()
5707 if (unlikely(gen == 0)) { in kvm_mmu_invalidate_mmio_sptes()
5717 int nr_to_scan = sc->nr_to_scan; in mmu_shrink_scan()
5727 * Never scan more than sc->nr_to_scan VM instances. in mmu_shrink_scan()
5732 if (!nr_to_scan--) in mmu_shrink_scan()
5735 * n_used_mmu_pages is accessed without holding kvm->mmu_lock in mmu_shrink_scan()
5740 if (!kvm->arch.n_used_mmu_pages && in mmu_shrink_scan()
5744 idx = srcu_read_lock(&kvm->srcu); in mmu_shrink_scan()
5745 spin_lock(&kvm->mmu_lock); in mmu_shrink_scan()
5749 &kvm->arch.zapped_obsolete_pages); in mmu_shrink_scan()
5753 freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan); in mmu_shrink_scan()
5756 spin_unlock(&kvm->mmu_lock); in mmu_shrink_scan()
5757 srcu_read_unlock(&kvm->srcu, idx); in mmu_shrink_scan()
5761 * per-vm shrinkers cry out in mmu_shrink_scan()
5764 list_move_tail(&kvm->vm_list, &vm_list); in mmu_shrink_scan()
5796 * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT in kvm_set_mmio_spte_mask()
5798 * 52-bit physical addresses then there are no reserved PA bits in the in kvm_set_mmio_spte_mask()
5833 return -EINVAL; in set_nx_huge_pages()
5843 mutex_lock(&kvm->slots_lock); in set_nx_huge_pages()
5845 mutex_unlock(&kvm->slots_lock); in set_nx_huge_pages()
5847 wake_up_process(kvm->arch.nx_lpage_recovery_thread); in set_nx_huge_pages()
5857 int ret = -ENOMEM; in kvm_mmu_module_init()
5859 if (nx_huge_pages == -1) in kvm_mmu_module_init()
5917 nr_pages += memslot->npages; in kvm_mmu_calculate_default_mmu_pages()
5929 free_mmu_pages(&vcpu->arch.root_mmu); in kvm_mmu_destroy()
5930 free_mmu_pages(&vcpu->arch.guest_mmu); in kvm_mmu_destroy()
5959 wake_up_process(kvm->arch.nx_lpage_recovery_thread); in set_nx_huge_pages_recovery_ratio()
5975 rcu_idx = srcu_read_lock(&kvm->srcu); in kvm_recover_nx_lpages()
5976 spin_lock(&kvm->mmu_lock); in kvm_recover_nx_lpages()
5979 to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0; in kvm_recover_nx_lpages()
5980 for ( ; to_zap; --to_zap) { in kvm_recover_nx_lpages()
5981 if (list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) in kvm_recover_nx_lpages()
5989 sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, in kvm_recover_nx_lpages()
5992 WARN_ON_ONCE(!sp->lpage_disallowed); in kvm_recover_nx_lpages()
5993 if (sp->tdp_mmu_page) in kvm_recover_nx_lpages()
5994 kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, in kvm_recover_nx_lpages()
5995 sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level)); in kvm_recover_nx_lpages()
5998 WARN_ON_ONCE(sp->lpage_disallowed); in kvm_recover_nx_lpages()
6001 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { in kvm_recover_nx_lpages()
6003 cond_resched_lock(&kvm->mmu_lock); in kvm_recover_nx_lpages()
6008 spin_unlock(&kvm->mmu_lock); in kvm_recover_nx_lpages()
6009 srcu_read_unlock(&kvm->srcu, rcu_idx); in kvm_recover_nx_lpages()
6015 ? start_time + 60 * HZ - get_jiffies_64() in get_nx_lpage_recovery_timeout()
6049 "kvm-nx-lpage-recovery", in kvm_mmu_post_init_vm()
6050 &kvm->arch.nx_lpage_recovery_thread); in kvm_mmu_post_init_vm()
6052 kthread_unpark(kvm->arch.nx_lpage_recovery_thread); in kvm_mmu_post_init_vm()
6059 if (kvm->arch.nx_lpage_recovery_thread) in kvm_mmu_pre_destroy_vm()
6060 kthread_stop(kvm->arch.nx_lpage_recovery_thread); in kvm_mmu_pre_destroy_vm()