1*775c8a3dSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2af585b92SGleb Natapov /* 3af585b92SGleb Natapov * kvm asynchronous fault support 4af585b92SGleb Natapov * 5af585b92SGleb Natapov * Copyright 2010 Red Hat, Inc. 6af585b92SGleb Natapov * 7af585b92SGleb Natapov * Author: 8af585b92SGleb Natapov * Gleb Natapov <gleb@redhat.com> 9af585b92SGleb Natapov */ 10af585b92SGleb Natapov 11af585b92SGleb Natapov #include <linux/kvm_host.h> 12af585b92SGleb Natapov #include <linux/slab.h> 13af585b92SGleb Natapov #include <linux/module.h> 14af585b92SGleb Natapov #include <linux/mmu_context.h> 156e84f315SIngo Molnar #include <linux/sched/mm.h> 16af585b92SGleb Natapov 17af585b92SGleb Natapov #include "async_pf.h" 18af585b92SGleb Natapov #include <trace/events/kvm.h> 19af585b92SGleb Natapov 20e0ead41aSDominik Dingel static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu, 21e0ead41aSDominik Dingel struct kvm_async_pf *work) 22e0ead41aSDominik Dingel { 23e0ead41aSDominik Dingel #ifdef CONFIG_KVM_ASYNC_PF_SYNC 24e0ead41aSDominik Dingel kvm_arch_async_page_present(vcpu, work); 25e0ead41aSDominik Dingel #endif 26e0ead41aSDominik Dingel } 27e0ead41aSDominik Dingel static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu, 28e0ead41aSDominik Dingel struct kvm_async_pf *work) 29e0ead41aSDominik Dingel { 30e0ead41aSDominik Dingel #ifndef CONFIG_KVM_ASYNC_PF_SYNC 31e0ead41aSDominik Dingel kvm_arch_async_page_present(vcpu, work); 32e0ead41aSDominik Dingel #endif 33e0ead41aSDominik Dingel } 34e0ead41aSDominik Dingel 35af585b92SGleb Natapov static struct kmem_cache *async_pf_cache; 36af585b92SGleb Natapov 37af585b92SGleb Natapov int kvm_async_pf_init(void) 38af585b92SGleb Natapov { 39af585b92SGleb Natapov async_pf_cache = KMEM_CACHE(kvm_async_pf, 0); 40af585b92SGleb Natapov 41af585b92SGleb Natapov if (!async_pf_cache) 42af585b92SGleb Natapov return -ENOMEM; 43af585b92SGleb Natapov 44af585b92SGleb Natapov return 0; 45af585b92SGleb Natapov } 46af585b92SGleb Natapov 47af585b92SGleb Natapov void kvm_async_pf_deinit(void) 48af585b92SGleb Natapov { 49af585b92SGleb Natapov kmem_cache_destroy(async_pf_cache); 50af585b92SGleb Natapov async_pf_cache = NULL; 51af585b92SGleb Natapov } 52af585b92SGleb Natapov 53af585b92SGleb Natapov void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) 54af585b92SGleb Natapov { 55af585b92SGleb Natapov INIT_LIST_HEAD(&vcpu->async_pf.done); 56af585b92SGleb Natapov INIT_LIST_HEAD(&vcpu->async_pf.queue); 57af585b92SGleb Natapov spin_lock_init(&vcpu->async_pf.lock); 58af585b92SGleb Natapov } 59af585b92SGleb Natapov 60af585b92SGleb Natapov static void async_pf_execute(struct work_struct *work) 61af585b92SGleb Natapov { 62af585b92SGleb Natapov struct kvm_async_pf *apf = 63af585b92SGleb Natapov container_of(work, struct kvm_async_pf, work); 64af585b92SGleb Natapov struct mm_struct *mm = apf->mm; 65af585b92SGleb Natapov struct kvm_vcpu *vcpu = apf->vcpu; 66af585b92SGleb Natapov unsigned long addr = apf->addr; 67af585b92SGleb Natapov gva_t gva = apf->gva; 688b7457efSLorenzo Stoakes int locked = 1; 69af585b92SGleb Natapov 70af585b92SGleb Natapov might_sleep(); 71af585b92SGleb Natapov 721e987790SDave Hansen /* 73bdd303cbSWei Yang * This work is run asynchronously to the task which owns 741e987790SDave Hansen * mm and might be done in another context, so we must 758b7457efSLorenzo Stoakes * access remotely. 761e987790SDave Hansen */ 778b7457efSLorenzo Stoakes down_read(&mm->mmap_sem); 788b7457efSLorenzo Stoakes get_user_pages_remote(NULL, mm, addr, 1, FOLL_WRITE, NULL, NULL, 798b7457efSLorenzo Stoakes &locked); 808b7457efSLorenzo Stoakes if (locked) 818b7457efSLorenzo Stoakes up_read(&mm->mmap_sem); 821e987790SDave Hansen 83e0ead41aSDominik Dingel kvm_async_page_present_sync(vcpu, apf); 84af585b92SGleb Natapov 85af585b92SGleb Natapov spin_lock(&vcpu->async_pf.lock); 86af585b92SGleb Natapov list_add_tail(&apf->link, &vcpu->async_pf.done); 8722583f0dSPaolo Bonzini apf->vcpu = NULL; 88af585b92SGleb Natapov spin_unlock(&vcpu->async_pf.lock); 89af585b92SGleb Natapov 90af585b92SGleb Natapov /* 91af585b92SGleb Natapov * apf may be freed by kvm_check_async_pf_completion() after 92af585b92SGleb Natapov * this point 93af585b92SGleb Natapov */ 94af585b92SGleb Natapov 95f2e10669Schai wen trace_kvm_async_pf_completed(addr, gva); 96af585b92SGleb Natapov 97b9f67a42SDavidlohr Bueso if (swq_has_sleeper(&vcpu->wq)) 98b3dae109SPeter Zijlstra swake_up_one(&vcpu->wq); 99af585b92SGleb Natapov 10041c22f62SOleg Nesterov mmput(mm); 101af585b92SGleb Natapov kvm_put_kvm(vcpu->kvm); 102af585b92SGleb Natapov } 103af585b92SGleb Natapov 104af585b92SGleb Natapov void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) 105af585b92SGleb Natapov { 10622583f0dSPaolo Bonzini spin_lock(&vcpu->async_pf.lock); 10722583f0dSPaolo Bonzini 108af585b92SGleb Natapov /* cancel outstanding work queue item */ 109af585b92SGleb Natapov while (!list_empty(&vcpu->async_pf.queue)) { 110af585b92SGleb Natapov struct kvm_async_pf *work = 111433da860SGeliang Tang list_first_entry(&vcpu->async_pf.queue, 112af585b92SGleb Natapov typeof(*work), queue); 113af585b92SGleb Natapov list_del(&work->queue); 1149f2ceda4SDominik Dingel 11522583f0dSPaolo Bonzini /* 11622583f0dSPaolo Bonzini * We know it's present in vcpu->async_pf.done, do 11722583f0dSPaolo Bonzini * nothing here. 11822583f0dSPaolo Bonzini */ 11922583f0dSPaolo Bonzini if (!work->vcpu) 12022583f0dSPaolo Bonzini continue; 12122583f0dSPaolo Bonzini 12222583f0dSPaolo Bonzini spin_unlock(&vcpu->async_pf.lock); 1239f2ceda4SDominik Dingel #ifdef CONFIG_KVM_ASYNC_PF_SYNC 1249f2ceda4SDominik Dingel flush_work(&work->work); 1259f2ceda4SDominik Dingel #else 12698fda169SRadim Krčmář if (cancel_work_sync(&work->work)) { 12741c22f62SOleg Nesterov mmput(work->mm); 12828b441e2SRadim Krčmář kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ 129af585b92SGleb Natapov kmem_cache_free(async_pf_cache, work); 130af585b92SGleb Natapov } 1319f2ceda4SDominik Dingel #endif 13222583f0dSPaolo Bonzini spin_lock(&vcpu->async_pf.lock); 13328b441e2SRadim Krčmář } 134af585b92SGleb Natapov 135af585b92SGleb Natapov while (!list_empty(&vcpu->async_pf.done)) { 136af585b92SGleb Natapov struct kvm_async_pf *work = 137433da860SGeliang Tang list_first_entry(&vcpu->async_pf.done, 138af585b92SGleb Natapov typeof(*work), link); 139af585b92SGleb Natapov list_del(&work->link); 140af585b92SGleb Natapov kmem_cache_free(async_pf_cache, work); 141af585b92SGleb Natapov } 142af585b92SGleb Natapov spin_unlock(&vcpu->async_pf.lock); 143af585b92SGleb Natapov 144af585b92SGleb Natapov vcpu->async_pf.queued = 0; 145af585b92SGleb Natapov } 146af585b92SGleb Natapov 147af585b92SGleb Natapov void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) 148af585b92SGleb Natapov { 149af585b92SGleb Natapov struct kvm_async_pf *work; 150af585b92SGleb Natapov 15115096ffcSXiao Guangrong while (!list_empty_careful(&vcpu->async_pf.done) && 15215096ffcSXiao Guangrong kvm_arch_can_inject_async_page_present(vcpu)) { 153af585b92SGleb Natapov spin_lock(&vcpu->async_pf.lock); 15415096ffcSXiao Guangrong work = list_first_entry(&vcpu->async_pf.done, typeof(*work), 15515096ffcSXiao Guangrong link); 156af585b92SGleb Natapov list_del(&work->link); 157af585b92SGleb Natapov spin_unlock(&vcpu->async_pf.lock); 158af585b92SGleb Natapov 15956028d08SGleb Natapov kvm_arch_async_page_ready(vcpu, work); 1601179ba53SDominik Dingel kvm_async_page_present_async(vcpu, work); 161af585b92SGleb Natapov 162af585b92SGleb Natapov list_del(&work->queue); 163af585b92SGleb Natapov vcpu->async_pf.queued--; 164af585b92SGleb Natapov kmem_cache_free(async_pf_cache, work); 165af585b92SGleb Natapov } 16615096ffcSXiao Guangrong } 167af585b92SGleb Natapov 168e0ead41aSDominik Dingel int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, 169af585b92SGleb Natapov struct kvm_arch_async_pf *arch) 170af585b92SGleb Natapov { 171af585b92SGleb Natapov struct kvm_async_pf *work; 172af585b92SGleb Natapov 173af585b92SGleb Natapov if (vcpu->async_pf.queued >= ASYNC_PF_PER_VCPU) 174af585b92SGleb Natapov return 0; 175af585b92SGleb Natapov 176af585b92SGleb Natapov /* setup delayed work */ 177af585b92SGleb Natapov 178af585b92SGleb Natapov /* 179af585b92SGleb Natapov * do alloc nowait since if we are going to sleep anyway we 180af585b92SGleb Natapov * may as well sleep faulting in page 181af585b92SGleb Natapov */ 182d7444794SChristian Borntraeger work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN); 183af585b92SGleb Natapov if (!work) 184af585b92SGleb Natapov return 0; 185af585b92SGleb Natapov 186f2e10669Schai wen work->wakeup_all = false; 187af585b92SGleb Natapov work->vcpu = vcpu; 188af585b92SGleb Natapov work->gva = gva; 189e0ead41aSDominik Dingel work->addr = hva; 190af585b92SGleb Natapov work->arch = *arch; 191af585b92SGleb Natapov work->mm = current->mm; 1923fce371bSVegard Nossum mmget(work->mm); 193af585b92SGleb Natapov kvm_get_kvm(work->vcpu->kvm); 194af585b92SGleb Natapov 195af585b92SGleb Natapov /* this can't really happen otherwise gfn_to_pfn_async 196af585b92SGleb Natapov would succeed */ 197af585b92SGleb Natapov if (unlikely(kvm_is_error_hva(work->addr))) 198af585b92SGleb Natapov goto retry_sync; 199af585b92SGleb Natapov 200af585b92SGleb Natapov INIT_WORK(&work->work, async_pf_execute); 201af585b92SGleb Natapov if (!schedule_work(&work->work)) 202af585b92SGleb Natapov goto retry_sync; 203af585b92SGleb Natapov 204af585b92SGleb Natapov list_add_tail(&work->queue, &vcpu->async_pf.queue); 205af585b92SGleb Natapov vcpu->async_pf.queued++; 206af585b92SGleb Natapov kvm_arch_async_page_not_present(vcpu, work); 207af585b92SGleb Natapov return 1; 208af585b92SGleb Natapov retry_sync: 209af585b92SGleb Natapov kvm_put_kvm(work->vcpu->kvm); 21041c22f62SOleg Nesterov mmput(work->mm); 211af585b92SGleb Natapov kmem_cache_free(async_pf_cache, work); 212af585b92SGleb Natapov return 0; 213af585b92SGleb Natapov } 214344d9588SGleb Natapov 215344d9588SGleb Natapov int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) 216344d9588SGleb Natapov { 217344d9588SGleb Natapov struct kvm_async_pf *work; 218344d9588SGleb Natapov 21964f638c7SXiao Guangrong if (!list_empty_careful(&vcpu->async_pf.done)) 220344d9588SGleb Natapov return 0; 221344d9588SGleb Natapov 222344d9588SGleb Natapov work = kmem_cache_zalloc(async_pf_cache, GFP_ATOMIC); 223344d9588SGleb Natapov if (!work) 224344d9588SGleb Natapov return -ENOMEM; 225344d9588SGleb Natapov 226f2e10669Schai wen work->wakeup_all = true; 227344d9588SGleb Natapov INIT_LIST_HEAD(&work->queue); /* for list_del to work */ 228344d9588SGleb Natapov 22964f638c7SXiao Guangrong spin_lock(&vcpu->async_pf.lock); 230344d9588SGleb Natapov list_add_tail(&work->link, &vcpu->async_pf.done); 23164f638c7SXiao Guangrong spin_unlock(&vcpu->async_pf.lock); 23264f638c7SXiao Guangrong 233344d9588SGleb Natapov vcpu->async_pf.queued++; 234344d9588SGleb Natapov return 0; 235344d9588SGleb Natapov } 236