1ae1fae34SPekka Enberg #include "kvm/kvm.h" 272811558SPekka Enberg #include "kvm/read-write.h" 372811558SPekka Enberg #include "kvm/util.h" 424ed52dbSCyrill Gorcunov #include "kvm/strbuf.h" 54298ddadSSasha Levin #include "kvm/mutex.h" 64298ddadSSasha Levin #include "kvm/kvm-cpu.h" 74b1addaeSSasha Levin #include "kvm/kvm-ipc.h" 8eda03319SPekka Enberg 9d82350d3SWill Deacon #include <linux/kernel.h> 106c7d8514SPekka Enberg #include <linux/kvm.h> 11d82350d3SWill Deacon #include <linux/list.h> 12495fbd4eSSasha Levin #include <linux/err.h> 13f5ab5f67SPekka Enberg 144b1addaeSSasha Levin #include <sys/un.h> 15e2e876c2SMatt Evans #include <sys/stat.h> 164b1addaeSSasha Levin #include <sys/types.h> 174b1addaeSSasha Levin #include <sys/socket.h> 18ae1fae34SPekka Enberg #include <sys/ioctl.h> 191f9cff23SPekka Enberg #include <sys/mman.h> 202da26a59SPekka Enberg #include <stdbool.h> 2106e41eeaSPekka Enberg #include <limits.h> 22ce79f1caSPekka Enberg #include <signal.h> 23f5ab5f67SPekka Enberg #include <stdarg.h> 24b8f6afcdSPekka Enberg #include <stdlib.h> 25f5ab5f67SPekka Enberg #include <string.h> 260d1f17ecSPekka Enberg #include <unistd.h> 271f9cff23SPekka Enberg #include <stdio.h> 28b8f6afcdSPekka Enberg #include <fcntl.h> 29ce79f1caSPekka Enberg #include <time.h> 304298ddadSSasha Levin #include <sys/eventfd.h> 31c7828731SSasha Levin #include <asm/unistd.h> 3263bc8503SSasha Levin #include <dirent.h> 33b8f6afcdSPekka Enberg 34ae1fae34SPekka Enberg #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason 350d1f17ecSPekka Enberg 36ae1fae34SPekka Enberg const char *kvm_exit_reasons[] = { 37ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN), 38ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION), 39ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO), 40ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL), 41ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG), 42ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT), 43ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO), 44ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN), 45ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN), 46ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY), 47ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR), 48ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR), 49ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS), 50ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC), 51ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET), 52ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR), 53ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI), 54ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR), 5563e158a0SMatt Evans #ifdef CONFIG_PPC64 5663e158a0SMatt Evans DEFINE_KVM_EXIT_REASON(KVM_EXIT_PAPR_HCALL), 5763e158a0SMatt Evans #endif 589b1fb1c3SPekka Enberg }; 599b1fb1c3SPekka Enberg 604298ddadSSasha Levin static int pause_event; 614298ddadSSasha Levin static DEFINE_MUTEX(pause_lock); 62af7b0868SMatt Evans extern struct kvm_ext kvm_req_ext[]; 634298ddadSSasha Levin 649667701cSPekka Enberg static char kvm_dir[PATH_MAX]; 659667701cSPekka Enberg 6629f4ec31SJulien Thierry extern __thread struct kvm_cpu *current_kvm_cpu; 6729f4ec31SJulien Thierry 68495fbd4eSSasha Levin static int set_dir(const char *fmt, va_list args) 699667701cSPekka Enberg { 70dd188f9fSPekka Enberg char tmp[PATH_MAX]; 71dd188f9fSPekka Enberg 72dd188f9fSPekka Enberg vsnprintf(tmp, sizeof(tmp), fmt, args); 73dd188f9fSPekka Enberg 742bc995fbSPekka Enberg mkdir(tmp, 0777); 752bc995fbSPekka Enberg 76dd188f9fSPekka Enberg if (!realpath(tmp, kvm_dir)) 77495fbd4eSSasha Levin return -errno; 78f76a3285SPekka Enberg 79f76a3285SPekka Enberg strcat(kvm_dir, "/"); 80495fbd4eSSasha Levin 81495fbd4eSSasha Levin return 0; 829667701cSPekka Enberg } 839667701cSPekka Enberg 849667701cSPekka Enberg void kvm__set_dir(const char *fmt, ...) 859667701cSPekka Enberg { 869667701cSPekka Enberg va_list args; 879667701cSPekka Enberg 889667701cSPekka Enberg va_start(args, fmt); 899667701cSPekka Enberg set_dir(fmt, args); 909667701cSPekka Enberg va_end(args); 919667701cSPekka Enberg } 929667701cSPekka Enberg 939667701cSPekka Enberg const char *kvm__get_dir(void) 949667701cSPekka Enberg { 959667701cSPekka Enberg return kvm_dir; 969667701cSPekka Enberg } 979667701cSPekka Enberg 98663165a2SAndre Przywara bool kvm__supports_vm_extension(struct kvm *kvm, unsigned int extension) 99663165a2SAndre Przywara { 100663165a2SAndre Przywara static int supports_vm_ext_check = 0; 101663165a2SAndre Przywara int ret; 102663165a2SAndre Przywara 103663165a2SAndre Przywara switch (supports_vm_ext_check) { 104663165a2SAndre Przywara case 0: 105663165a2SAndre Przywara ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, 106663165a2SAndre Przywara KVM_CAP_CHECK_EXTENSION_VM); 107663165a2SAndre Przywara if (ret <= 0) { 108663165a2SAndre Przywara supports_vm_ext_check = -1; 109663165a2SAndre Przywara return false; 110663165a2SAndre Przywara } 111663165a2SAndre Przywara supports_vm_ext_check = 1; 112663165a2SAndre Przywara /* fall through */ 113663165a2SAndre Przywara case 1: 114663165a2SAndre Przywara break; 115663165a2SAndre Przywara case -1: 116663165a2SAndre Przywara return false; 117663165a2SAndre Przywara } 118663165a2SAndre Przywara 119663165a2SAndre Przywara ret = ioctl(kvm->vm_fd, KVM_CHECK_EXTENSION, extension); 120663165a2SAndre Przywara if (ret < 0) 121663165a2SAndre Przywara return false; 122663165a2SAndre Przywara 123663165a2SAndre Przywara return ret; 124663165a2SAndre Przywara } 125663165a2SAndre Przywara 1261d6fb3f2SSasha Levin bool kvm__supports_extension(struct kvm *kvm, unsigned int extension) 127b8f6afcdSPekka Enberg { 12828fa19c0SPekka Enberg int ret; 129b8f6afcdSPekka Enberg 13043835ac9SSasha Levin ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, extension); 1314076b041SPekka Enberg if (ret < 0) 1324076b041SPekka Enberg return false; 1334076b041SPekka Enberg 1344076b041SPekka Enberg return ret; 1354076b041SPekka Enberg } 1364076b041SPekka Enberg 13743835ac9SSasha Levin static int kvm__check_extensions(struct kvm *kvm) 13855e19624SCyrill Gorcunov { 139495fbd4eSSasha Levin int i; 14055e19624SCyrill Gorcunov 141af7b0868SMatt Evans for (i = 0; ; i++) { 142af7b0868SMatt Evans if (!kvm_req_ext[i].name) 143af7b0868SMatt Evans break; 14443835ac9SSasha Levin if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) { 14550687d87SWill Deacon pr_err("Unsupported KVM extension detected: %s", 14655e19624SCyrill Gorcunov kvm_req_ext[i].name); 147495fbd4eSSasha Levin return -i; 14855e19624SCyrill Gorcunov } 14955e19624SCyrill Gorcunov } 15055e19624SCyrill Gorcunov 15155e19624SCyrill Gorcunov return 0; 15255e19624SCyrill Gorcunov } 15355e19624SCyrill Gorcunov 15447621338SSasha Levin struct kvm *kvm__new(void) 1554076b041SPekka Enberg { 156495fbd4eSSasha Levin struct kvm *kvm = calloc(1, sizeof(*kvm)); 15743835ac9SSasha Levin if (!kvm) 158495fbd4eSSasha Levin return ERR_PTR(-ENOMEM); 1594076b041SPekka Enberg 1608d987725SAlexandru Elisei mutex_init(&kvm->mem_banks_lock); 161d648dbf5SCyrill Gorcunov kvm->sys_fd = -1; 162d648dbf5SCyrill Gorcunov kvm->vm_fd = -1; 163d648dbf5SCyrill Gorcunov 16420b65266SJulien Thierry #ifdef KVM_BRLOCK_DEBUG 16520b65266SJulien Thierry kvm->brlock_sem = (pthread_rwlock_t) PTHREAD_RWLOCK_INITIALIZER; 16620b65266SJulien Thierry #endif 16720b65266SJulien Thierry 16843835ac9SSasha Levin return kvm; 1694076b041SPekka Enberg } 1704076b041SPekka Enberg 171495fbd4eSSasha Levin int kvm__exit(struct kvm *kvm) 1729ef4c68eSPekka Enberg { 173d82350d3SWill Deacon struct kvm_mem_bank *bank, *tmp; 174495fbd4eSSasha Levin 175d82350d3SWill Deacon kvm__arch_delete_ram(kvm); 176d82350d3SWill Deacon 177d82350d3SWill Deacon list_for_each_entry_safe(bank, tmp, &kvm->mem_banks, list) { 178d82350d3SWill Deacon list_del(&bank->list); 179d82350d3SWill Deacon free(bank); 180d82350d3SWill Deacon } 181d82350d3SWill Deacon 182d82350d3SWill Deacon free(kvm); 183495fbd4eSSasha Levin return 0; 1849ef4c68eSPekka Enberg } 18549a8afd1SSasha Levin core_exit(kvm__exit); 1869ef4c68eSPekka Enberg 1878d987725SAlexandru Elisei int kvm__destroy_mem(struct kvm *kvm, u64 guest_phys, u64 size, 1888d987725SAlexandru Elisei void *userspace_addr) 1898d987725SAlexandru Elisei { 1908d987725SAlexandru Elisei struct kvm_userspace_memory_region mem; 1918d987725SAlexandru Elisei struct kvm_mem_bank *bank; 1928d987725SAlexandru Elisei int ret; 1938d987725SAlexandru Elisei 1948d987725SAlexandru Elisei mutex_lock(&kvm->mem_banks_lock); 1958d987725SAlexandru Elisei list_for_each_entry(bank, &kvm->mem_banks, list) 1968d987725SAlexandru Elisei if (bank->guest_phys_addr == guest_phys && 1978d987725SAlexandru Elisei bank->size == size && bank->host_addr == userspace_addr) 1988d987725SAlexandru Elisei break; 1998d987725SAlexandru Elisei 2008d987725SAlexandru Elisei if (&bank->list == &kvm->mem_banks) { 2018d987725SAlexandru Elisei pr_err("Region [%llx-%llx] not found", guest_phys, 2028d987725SAlexandru Elisei guest_phys + size - 1); 2038d987725SAlexandru Elisei ret = -EINVAL; 2048d987725SAlexandru Elisei goto out; 2058d987725SAlexandru Elisei } 2068d987725SAlexandru Elisei 2078d987725SAlexandru Elisei if (bank->type == KVM_MEM_TYPE_RESERVED) { 2088d987725SAlexandru Elisei pr_err("Cannot delete reserved region [%llx-%llx]", 2098d987725SAlexandru Elisei guest_phys, guest_phys + size - 1); 2108d987725SAlexandru Elisei ret = -EINVAL; 2118d987725SAlexandru Elisei goto out; 2128d987725SAlexandru Elisei } 2138d987725SAlexandru Elisei 2148d987725SAlexandru Elisei mem = (struct kvm_userspace_memory_region) { 2158d987725SAlexandru Elisei .slot = bank->slot, 2168d987725SAlexandru Elisei .guest_phys_addr = guest_phys, 2178d987725SAlexandru Elisei .memory_size = 0, 2188d987725SAlexandru Elisei .userspace_addr = (unsigned long)userspace_addr, 2198d987725SAlexandru Elisei }; 2208d987725SAlexandru Elisei 2218d987725SAlexandru Elisei ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem); 2228d987725SAlexandru Elisei if (ret < 0) { 2238d987725SAlexandru Elisei ret = -errno; 2248d987725SAlexandru Elisei goto out; 2258d987725SAlexandru Elisei } 2268d987725SAlexandru Elisei 2278d987725SAlexandru Elisei list_del(&bank->list); 2288d987725SAlexandru Elisei free(bank); 2298d987725SAlexandru Elisei kvm->mem_slots--; 2308d987725SAlexandru Elisei ret = 0; 2318d987725SAlexandru Elisei 2328d987725SAlexandru Elisei out: 2338d987725SAlexandru Elisei mutex_unlock(&kvm->mem_banks_lock); 2348d987725SAlexandru Elisei return ret; 2358d987725SAlexandru Elisei } 2368d987725SAlexandru Elisei 2378f46c736SJean-Philippe Brucker int kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size, 2388f46c736SJean-Philippe Brucker void *userspace_addr, enum kvm_mem_type type) 2394076b041SPekka Enberg { 2402b0e3342SPekka Enberg struct kvm_userspace_memory_region mem; 241fa1076abSJean-Philippe Brucker struct kvm_mem_bank *merged = NULL; 242d82350d3SWill Deacon struct kvm_mem_bank *bank; 2438d987725SAlexandru Elisei struct list_head *prev_entry; 2448d987725SAlexandru Elisei u32 slot; 245*0480e04aSAndre Przywara u32 flags = 0; 246839051d9SSasha Levin int ret; 247839051d9SSasha Levin 2488d987725SAlexandru Elisei mutex_lock(&kvm->mem_banks_lock); 2498d987725SAlexandru Elisei /* Check for overlap and find first empty slot. */ 2508d987725SAlexandru Elisei slot = 0; 2518d987725SAlexandru Elisei prev_entry = &kvm->mem_banks; 252fa1076abSJean-Philippe Brucker list_for_each_entry(bank, &kvm->mem_banks, list) { 253fa1076abSJean-Philippe Brucker u64 bank_end = bank->guest_phys_addr + bank->size - 1; 254fa1076abSJean-Philippe Brucker u64 end = guest_phys + size - 1; 2558d987725SAlexandru Elisei if (guest_phys > bank_end || end < bank->guest_phys_addr) { 2568d987725SAlexandru Elisei /* 2578d987725SAlexandru Elisei * Keep the banks sorted ascending by slot, so it's 2588d987725SAlexandru Elisei * easier for us to find a free slot. 2598d987725SAlexandru Elisei */ 2608d987725SAlexandru Elisei if (bank->slot == slot) { 2618d987725SAlexandru Elisei slot++; 2628d987725SAlexandru Elisei prev_entry = &bank->list; 2638d987725SAlexandru Elisei } 264fa1076abSJean-Philippe Brucker continue; 2658d987725SAlexandru Elisei } 266fa1076abSJean-Philippe Brucker 267fa1076abSJean-Philippe Brucker /* Merge overlapping reserved regions */ 268fa1076abSJean-Philippe Brucker if (bank->type == KVM_MEM_TYPE_RESERVED && 269fa1076abSJean-Philippe Brucker type == KVM_MEM_TYPE_RESERVED) { 270fa1076abSJean-Philippe Brucker bank->guest_phys_addr = min(bank->guest_phys_addr, guest_phys); 271fa1076abSJean-Philippe Brucker bank->size = max(bank_end, end) - bank->guest_phys_addr + 1; 272fa1076abSJean-Philippe Brucker 273fa1076abSJean-Philippe Brucker if (merged) { 274fa1076abSJean-Philippe Brucker /* 275fa1076abSJean-Philippe Brucker * This is at least the second merge, remove 276fa1076abSJean-Philippe Brucker * previous result. 277fa1076abSJean-Philippe Brucker */ 278fa1076abSJean-Philippe Brucker list_del(&merged->list); 279fa1076abSJean-Philippe Brucker free(merged); 280fa1076abSJean-Philippe Brucker } 281fa1076abSJean-Philippe Brucker 282fa1076abSJean-Philippe Brucker guest_phys = bank->guest_phys_addr; 283fa1076abSJean-Philippe Brucker size = bank->size; 284fa1076abSJean-Philippe Brucker merged = bank; 285fa1076abSJean-Philippe Brucker 286fa1076abSJean-Philippe Brucker /* Keep checking that we don't overlap another region */ 287fa1076abSJean-Philippe Brucker continue; 288fa1076abSJean-Philippe Brucker } 289fa1076abSJean-Philippe Brucker 290fa1076abSJean-Philippe Brucker pr_err("%s region [%llx-%llx] would overlap %s region [%llx-%llx]", 291fa1076abSJean-Philippe Brucker kvm_mem_type_to_string(type), guest_phys, guest_phys + size - 1, 292fa1076abSJean-Philippe Brucker kvm_mem_type_to_string(bank->type), bank->guest_phys_addr, 293fa1076abSJean-Philippe Brucker bank->guest_phys_addr + bank->size - 1); 294fa1076abSJean-Philippe Brucker 2958d987725SAlexandru Elisei ret = -EINVAL; 2968d987725SAlexandru Elisei goto out; 297fa1076abSJean-Philippe Brucker } 298fa1076abSJean-Philippe Brucker 2998d987725SAlexandru Elisei if (merged) { 3008d987725SAlexandru Elisei ret = 0; 3018d987725SAlexandru Elisei goto out; 3028d987725SAlexandru Elisei } 303fa1076abSJean-Philippe Brucker 304d82350d3SWill Deacon bank = malloc(sizeof(*bank)); 3058d987725SAlexandru Elisei if (!bank) { 3068d987725SAlexandru Elisei ret = -ENOMEM; 3078d987725SAlexandru Elisei goto out; 3088d987725SAlexandru Elisei } 309d82350d3SWill Deacon 310d82350d3SWill Deacon INIT_LIST_HEAD(&bank->list); 311d82350d3SWill Deacon bank->guest_phys_addr = guest_phys; 312d82350d3SWill Deacon bank->host_addr = userspace_addr; 313d82350d3SWill Deacon bank->size = size; 3148f46c736SJean-Philippe Brucker bank->type = type; 3158d987725SAlexandru Elisei bank->slot = slot; 316d82350d3SWill Deacon 317*0480e04aSAndre Przywara if (type & KVM_MEM_TYPE_READONLY) 318*0480e04aSAndre Przywara flags |= KVM_MEM_READONLY; 319*0480e04aSAndre Przywara 320fa1076abSJean-Philippe Brucker if (type != KVM_MEM_TYPE_RESERVED) { 321839051d9SSasha Levin mem = (struct kvm_userspace_memory_region) { 3228d987725SAlexandru Elisei .slot = slot, 323*0480e04aSAndre Przywara .flags = flags, 324874467f8SSasha Levin .guest_phys_addr = guest_phys, 325874467f8SSasha Levin .memory_size = size, 326c4acb611SIngo Molnar .userspace_addr = (unsigned long)userspace_addr, 327839051d9SSasha Levin }; 328839051d9SSasha Levin 329874467f8SSasha Levin ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem); 3308d987725SAlexandru Elisei if (ret < 0) { 3318d987725SAlexandru Elisei ret = -errno; 3328d987725SAlexandru Elisei goto out; 3338d987725SAlexandru Elisei } 334fa1076abSJean-Philippe Brucker } 335495fbd4eSSasha Levin 3368d987725SAlexandru Elisei list_add(&bank->list, prev_entry); 3378d987725SAlexandru Elisei kvm->mem_slots++; 3388d987725SAlexandru Elisei ret = 0; 339fa1076abSJean-Philippe Brucker 3408d987725SAlexandru Elisei out: 3418d987725SAlexandru Elisei mutex_unlock(&kvm->mem_banks_lock); 3428d987725SAlexandru Elisei return ret; 343839051d9SSasha Levin } 344839051d9SSasha Levin 345f412251fSWill Deacon void *guest_flat_to_host(struct kvm *kvm, u64 offset) 346f412251fSWill Deacon { 347f412251fSWill Deacon struct kvm_mem_bank *bank; 348f412251fSWill Deacon 349f412251fSWill Deacon list_for_each_entry(bank, &kvm->mem_banks, list) { 350f412251fSWill Deacon u64 bank_start = bank->guest_phys_addr; 351f412251fSWill Deacon u64 bank_end = bank_start + bank->size; 352f412251fSWill Deacon 353f412251fSWill Deacon if (offset >= bank_start && offset < bank_end) 354f412251fSWill Deacon return bank->host_addr + (offset - bank_start); 355f412251fSWill Deacon } 356f412251fSWill Deacon 357f412251fSWill Deacon pr_warning("unable to translate guest address 0x%llx to host", 358f412251fSWill Deacon (unsigned long long)offset); 359f412251fSWill Deacon return NULL; 360f412251fSWill Deacon } 361f412251fSWill Deacon 3620cb41990SWill Deacon u64 host_to_guest_flat(struct kvm *kvm, void *ptr) 3630cb41990SWill Deacon { 3640cb41990SWill Deacon struct kvm_mem_bank *bank; 3650cb41990SWill Deacon 3660cb41990SWill Deacon list_for_each_entry(bank, &kvm->mem_banks, list) { 3670cb41990SWill Deacon void *bank_start = bank->host_addr; 3680cb41990SWill Deacon void *bank_end = bank_start + bank->size; 3690cb41990SWill Deacon 3700cb41990SWill Deacon if (ptr >= bank_start && ptr < bank_end) 3710cb41990SWill Deacon return bank->guest_phys_addr + (ptr - bank_start); 3720cb41990SWill Deacon } 3730cb41990SWill Deacon 3740cb41990SWill Deacon pr_warning("unable to translate host address %p to guest", ptr); 3750cb41990SWill Deacon return 0; 3760cb41990SWill Deacon } 3770cb41990SWill Deacon 3788f46c736SJean-Philippe Brucker /* 3798f46c736SJean-Philippe Brucker * Iterate over each registered memory bank. Call @fun for each bank with @data 3808f46c736SJean-Philippe Brucker * as argument. @type is a bitmask that allows to filter banks according to 3818f46c736SJean-Philippe Brucker * their type. 3828f46c736SJean-Philippe Brucker * 3838f46c736SJean-Philippe Brucker * If one call to @fun returns a non-zero value, stop iterating and return the 3848f46c736SJean-Philippe Brucker * value. Otherwise, return zero. 3858f46c736SJean-Philippe Brucker */ 3868f46c736SJean-Philippe Brucker int kvm__for_each_mem_bank(struct kvm *kvm, enum kvm_mem_type type, 3878f46c736SJean-Philippe Brucker int (*fun)(struct kvm *kvm, struct kvm_mem_bank *bank, void *data), 3888f46c736SJean-Philippe Brucker void *data) 3898f46c736SJean-Philippe Brucker { 3908f46c736SJean-Philippe Brucker int ret; 3918f46c736SJean-Philippe Brucker struct kvm_mem_bank *bank; 3928f46c736SJean-Philippe Brucker 3938f46c736SJean-Philippe Brucker list_for_each_entry(bank, &kvm->mem_banks, list) { 3948f46c736SJean-Philippe Brucker if (type != KVM_MEM_TYPE_ALL && !(bank->type & type)) 3958f46c736SJean-Philippe Brucker continue; 3968f46c736SJean-Philippe Brucker 3978f46c736SJean-Philippe Brucker ret = fun(kvm, bank, data); 3988f46c736SJean-Philippe Brucker if (ret) 3998f46c736SJean-Philippe Brucker break; 4008f46c736SJean-Philippe Brucker } 4018f46c736SJean-Philippe Brucker 4028f46c736SJean-Philippe Brucker return ret; 4038f46c736SJean-Philippe Brucker } 4048f46c736SJean-Philippe Brucker 4058259b8ccSSasha Levin int kvm__recommended_cpus(struct kvm *kvm) 406384922b3SPekka Enberg { 407384922b3SPekka Enberg int ret; 408384922b3SPekka Enberg 40943835ac9SSasha Levin ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_NR_VCPUS); 4108259b8ccSSasha Levin if (ret <= 0) 4113b9b691dSMatt Evans /* 4123b9b691dSMatt Evans * api.txt states that if KVM_CAP_NR_VCPUS does not exist, 4133b9b691dSMatt Evans * assume 4. 4143b9b691dSMatt Evans */ 4153b9b691dSMatt Evans return 4; 416384922b3SPekka Enberg 417384922b3SPekka Enberg return ret; 418384922b3SPekka Enberg } 419384922b3SPekka Enberg 4208259b8ccSSasha Levin int kvm__max_cpus(struct kvm *kvm) 4218259b8ccSSasha Levin { 4228259b8ccSSasha Levin int ret; 4238259b8ccSSasha Levin 4248259b8ccSSasha Levin ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_MAX_VCPUS); 4258259b8ccSSasha Levin if (ret <= 0) 4268259b8ccSSasha Levin ret = kvm__recommended_cpus(kvm); 4278259b8ccSSasha Levin 4288259b8ccSSasha Levin return ret; 4298259b8ccSSasha Levin } 4308259b8ccSSasha Levin 43147621338SSasha Levin int kvm__init(struct kvm *kvm) 432839051d9SSasha Levin { 4334076b041SPekka Enberg int ret; 4344076b041SPekka Enberg 435495fbd4eSSasha Levin if (!kvm__arch_cpu_supports_vm()) { 436495fbd4eSSasha Levin pr_err("Your CPU does not support hardware virtualization"); 4376fce7105SYang Bai ret = -ENOSYS; 4386fce7105SYang Bai goto err; 439495fbd4eSSasha Levin } 440c78b8713SAsias He 44147621338SSasha Levin kvm->sys_fd = open(kvm->cfg.dev, O_RDWR); 44243835ac9SSasha Levin if (kvm->sys_fd < 0) { 443d648dbf5SCyrill Gorcunov if (errno == ENOENT) 444495fbd4eSSasha Levin pr_err("'%s' not found. Please make sure your kernel has CONFIG_KVM " 44547621338SSasha Levin "enabled and that the KVM modules are loaded.", kvm->cfg.dev); 446d648dbf5SCyrill Gorcunov else if (errno == ENODEV) 447d648dbf5SCyrill Gorcunov pr_err("'%s' KVM driver not available.\n # (If the KVM " 448495fbd4eSSasha Levin "module is loaded then 'dmesg' may offer further clues " 44947621338SSasha Levin "about the failure.)", kvm->cfg.dev); 450d648dbf5SCyrill Gorcunov else 45147621338SSasha Levin pr_err("Could not open %s: ", kvm->cfg.dev); 452d648dbf5SCyrill Gorcunov 453495fbd4eSSasha Levin ret = -errno; 454d648dbf5SCyrill Gorcunov goto err_free; 4556d7c36ceSPekka Enberg } 456b8f6afcdSPekka Enberg 45743835ac9SSasha Levin ret = ioctl(kvm->sys_fd, KVM_GET_API_VERSION, 0); 458495fbd4eSSasha Levin if (ret != KVM_API_VERSION) { 459495fbd4eSSasha Levin pr_err("KVM_API_VERSION ioctl"); 460495fbd4eSSasha Levin ret = -errno; 461d648dbf5SCyrill Gorcunov goto err_sys_fd; 462495fbd4eSSasha Levin } 4636c7d8514SPekka Enberg 464b5a5cd67SAndreas Herrmann kvm->vm_fd = ioctl(kvm->sys_fd, KVM_CREATE_VM, KVM_VM_TYPE); 465495fbd4eSSasha Levin if (kvm->vm_fd < 0) { 46681404cdbSDavid Daney pr_err("KVM_CREATE_VM ioctl"); 467495fbd4eSSasha Levin ret = kvm->vm_fd; 468d648dbf5SCyrill Gorcunov goto err_sys_fd; 469495fbd4eSSasha Levin } 47028fa19c0SPekka Enberg 471495fbd4eSSasha Levin if (kvm__check_extensions(kvm)) { 4725f5b0144SWill Deacon pr_err("A required KVM extension is not supported by OS"); 473495fbd4eSSasha Levin ret = -ENOSYS; 4746fce7105SYang Bai goto err_vm_fd; 475495fbd4eSSasha Levin } 4769687927dSAsias He 47747621338SSasha Levin kvm__arch_init(kvm, kvm->cfg.hugetlbfs_path, kvm->cfg.ram_size); 4789687927dSAsias He 479d82350d3SWill Deacon INIT_LIST_HEAD(&kvm->mem_banks); 480abee258bSSasha Levin kvm__init_ram(kvm); 481abee258bSSasha Levin 482084a1356SSasha Levin if (!kvm->cfg.firmware_filename) { 483084a1356SSasha Levin if (!kvm__load_kernel(kvm, kvm->cfg.kernel_filename, 484ff7ba6faSWill Deacon kvm->cfg.initrd_filename, kvm->cfg.real_cmdline)) 485084a1356SSasha Levin die("unable to load kernel %s", kvm->cfg.kernel_filename); 486084a1356SSasha Levin } 487084a1356SSasha Levin 488084a1356SSasha Levin if (kvm->cfg.firmware_filename) { 489084a1356SSasha Levin if (!kvm__load_firmware(kvm, kvm->cfg.firmware_filename)) 490084a1356SSasha Levin die("unable to load firmware image %s: %s", kvm->cfg.firmware_filename, strerror(errno)); 491084a1356SSasha Levin } else { 492084a1356SSasha Levin ret = kvm__arch_setup_firmware(kvm); 493084a1356SSasha Levin if (ret < 0) 494084a1356SSasha Levin die("kvm__arch_setup_firmware() failed with error %d\n", ret); 495084a1356SSasha Levin } 496084a1356SSasha Levin 49747621338SSasha Levin return 0; 498d648dbf5SCyrill Gorcunov 4996fce7105SYang Bai err_vm_fd: 500495fbd4eSSasha Levin close(kvm->vm_fd); 501d648dbf5SCyrill Gorcunov err_sys_fd: 502495fbd4eSSasha Levin close(kvm->sys_fd); 503d648dbf5SCyrill Gorcunov err_free: 504495fbd4eSSasha Levin free(kvm); 5056fce7105SYang Bai err: 50647621338SSasha Levin return ret; 5074076b041SPekka Enberg } 50849a8afd1SSasha Levin core_init(kvm__init); 5094076b041SPekka Enberg 51072811558SPekka Enberg /* RFC 1952 */ 51172811558SPekka Enberg #define GZIP_ID1 0x1f 51272811558SPekka Enberg #define GZIP_ID2 0x8b 513663ce1dfSMatt Evans #define CPIO_MAGIC "0707" 514663ce1dfSMatt Evans /* initrd may be gzipped, or a plain cpio */ 51572811558SPekka Enberg static bool initrd_check(int fd) 51672811558SPekka Enberg { 517663ce1dfSMatt Evans unsigned char id[4]; 51872811558SPekka Enberg 51972811558SPekka Enberg if (read_in_full(fd, id, ARRAY_SIZE(id)) < 0) 52072811558SPekka Enberg return false; 52172811558SPekka Enberg 52272811558SPekka Enberg if (lseek(fd, 0, SEEK_SET) < 0) 52372811558SPekka Enberg die_perror("lseek"); 52472811558SPekka Enberg 525663ce1dfSMatt Evans return (id[0] == GZIP_ID1 && id[1] == GZIP_ID2) || 526663ce1dfSMatt Evans !memcmp(id, CPIO_MAGIC, 4); 52772811558SPekka Enberg } 52872811558SPekka Enberg 5296d1f350dSCyrill Gorcunov bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename, 530ff7ba6faSWill Deacon const char *initrd_filename, const char *kernel_cmdline) 531ae1fae34SPekka Enberg { 5327fb218bdSPekka Enberg bool ret; 5332065a6f7SCyrill Gorcunov int fd_kernel = -1, fd_initrd = -1; 534ae1fae34SPekka Enberg 5352065a6f7SCyrill Gorcunov fd_kernel = open(kernel_filename, O_RDONLY); 5362065a6f7SCyrill Gorcunov if (fd_kernel < 0) 5370b62d2bbSPekka Enberg die("Unable to open kernel %s", kernel_filename); 538ae1fae34SPekka Enberg 5392065a6f7SCyrill Gorcunov if (initrd_filename) { 5402065a6f7SCyrill Gorcunov fd_initrd = open(initrd_filename, O_RDONLY); 5412065a6f7SCyrill Gorcunov if (fd_initrd < 0) 5420b62d2bbSPekka Enberg die("Unable to open initrd %s", initrd_filename); 54372811558SPekka Enberg 54472811558SPekka Enberg if (!initrd_check(fd_initrd)) 54572811558SPekka Enberg die("%s is not an initrd", initrd_filename); 5462065a6f7SCyrill Gorcunov } 5472065a6f7SCyrill Gorcunov 548004f7684SAndre Przywara ret = kvm__arch_load_kernel_image(kvm, fd_kernel, fd_initrd, 549004f7684SAndre Przywara kernel_cmdline); 550009b0758SPekka Enberg 551604dbd63SMatt Evans if (initrd_filename) 552604dbd63SMatt Evans close(fd_initrd); 5535a6ac675SSasha Levin close(fd_kernel); 5545a6ac675SSasha Levin 555004f7684SAndre Przywara if (!ret) 556004f7684SAndre Przywara die("%s is not a valid kernel image", kernel_filename); 557ae1fae34SPekka Enberg return ret; 558ae1fae34SPekka Enberg } 559ae1fae34SPekka Enberg 560b2cf1e9fSAsias He void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size, int debug_fd) 561090f898eSCyrill Gorcunov { 562090f898eSCyrill Gorcunov unsigned char *p; 563090f898eSCyrill Gorcunov unsigned long n; 564090f898eSCyrill Gorcunov 565090f898eSCyrill Gorcunov size &= ~7; /* mod 8 */ 566090f898eSCyrill Gorcunov if (!size) 567090f898eSCyrill Gorcunov return; 568090f898eSCyrill Gorcunov 56943835ac9SSasha Levin p = guest_flat_to_host(kvm, addr); 570090f898eSCyrill Gorcunov 57148cf3877SPekka Enberg for (n = 0; n < size; n += 8) { 572b2cf1e9fSAsias He if (!host_ptr_in_ram(kvm, p + n)) { 573b2cf1e9fSAsias He dprintf(debug_fd, " 0x%08lx: <unknown>\n", addr + n); 574b2cf1e9fSAsias He continue; 575b2cf1e9fSAsias He } 576b2cf1e9fSAsias He dprintf(debug_fd, " 0x%08lx: %02x %02x %02x %02x %02x %02x %02x %02x\n", 577090f898eSCyrill Gorcunov addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3], 578090f898eSCyrill Gorcunov p[n + 4], p[n + 5], p[n + 6], p[n + 7]); 579090f898eSCyrill Gorcunov } 58048cf3877SPekka Enberg } 5814298ddadSSasha Levin 5822aa76b26SWill Deacon void kvm__reboot(struct kvm *kvm) 5832aa76b26SWill Deacon { 5842aa76b26SWill Deacon /* Check if the guest is running */ 5852aa76b26SWill Deacon if (!kvm->cpus[0] || kvm->cpus[0]->thread == 0) 5862aa76b26SWill Deacon return; 5872aa76b26SWill Deacon 588e8cb90fbSWill Deacon pthread_kill(kvm->cpus[0]->thread, SIGKVMEXIT); 5892aa76b26SWill Deacon } 5902aa76b26SWill Deacon 591e8cb90fbSWill Deacon void kvm__continue(struct kvm *kvm) 592e8cb90fbSWill Deacon { 5932aa76b26SWill Deacon mutex_unlock(&pause_lock); 5942aa76b26SWill Deacon } 5952aa76b26SWill Deacon 5964346fd8fSSasha Levin void kvm__pause(struct kvm *kvm) 5974298ddadSSasha Levin { 5984298ddadSSasha Levin int i, paused_vcpus = 0; 5994298ddadSSasha Levin 600e8cb90fbSWill Deacon mutex_lock(&pause_lock); 601e8cb90fbSWill Deacon 6024298ddadSSasha Levin /* Check if the guest is running */ 60337b8e06bSJean-Philippe Brucker if (!kvm->cpus || !kvm->cpus[0] || kvm->cpus[0]->thread == 0) 6044298ddadSSasha Levin return; 6054298ddadSSasha Levin 6064298ddadSSasha Levin pause_event = eventfd(0, 0); 6074298ddadSSasha Levin if (pause_event < 0) 6084298ddadSSasha Levin die("Failed creating pause notification event"); 6092aa76b26SWill Deacon for (i = 0; i < kvm->nrcpus; i++) { 61029f4ec31SJulien Thierry if (kvm->cpus[i]->is_running && kvm->cpus[i]->paused == 0) 611df4239fbSSasha Levin pthread_kill(kvm->cpus[i]->thread, SIGKVMPAUSE); 6122aa76b26SWill Deacon else 6132aa76b26SWill Deacon paused_vcpus++; 6142aa76b26SWill Deacon } 6154298ddadSSasha Levin 6164298ddadSSasha Levin while (paused_vcpus < kvm->nrcpus) { 6174298ddadSSasha Levin u64 cur_read; 6184298ddadSSasha Levin 6194298ddadSSasha Levin if (read(pause_event, &cur_read, sizeof(cur_read)) < 0) 6204298ddadSSasha Levin die("Failed reading pause event"); 6214298ddadSSasha Levin paused_vcpus += cur_read; 6224298ddadSSasha Levin } 6234298ddadSSasha Levin close(pause_event); 6244298ddadSSasha Levin } 6254298ddadSSasha Levin 6264298ddadSSasha Levin void kvm__notify_paused(void) 6274298ddadSSasha Levin { 6284298ddadSSasha Levin u64 p = 1; 6294298ddadSSasha Levin 6304298ddadSSasha Levin if (write(pause_event, &p, sizeof(p)) < 0) 6314298ddadSSasha Levin die("Failed notifying of paused VCPU."); 6324298ddadSSasha Levin 6334298ddadSSasha Levin mutex_lock(&pause_lock); 63429f4ec31SJulien Thierry current_kvm_cpu->paused = 0; 6354298ddadSSasha Levin mutex_unlock(&pause_lock); 6364298ddadSSasha Levin } 637