1ae1fae34SPekka Enberg #include "kvm/kvm.h" 272811558SPekka Enberg #include "kvm/read-write.h" 372811558SPekka Enberg #include "kvm/util.h" 4*24ed52dbSCyrill Gorcunov #include "kvm/strbuf.h" 54298ddadSSasha Levin #include "kvm/mutex.h" 64298ddadSSasha Levin #include "kvm/kvm-cpu.h" 74b1addaeSSasha Levin #include "kvm/kvm-ipc.h" 8eda03319SPekka Enberg 96c7d8514SPekka Enberg #include <linux/kvm.h> 10495fbd4eSSasha Levin #include <linux/err.h> 11f5ab5f67SPekka Enberg 124b1addaeSSasha Levin #include <sys/un.h> 13e2e876c2SMatt Evans #include <sys/stat.h> 144b1addaeSSasha Levin #include <sys/types.h> 154b1addaeSSasha Levin #include <sys/socket.h> 16ae1fae34SPekka Enberg #include <sys/ioctl.h> 171f9cff23SPekka Enberg #include <sys/mman.h> 182da26a59SPekka Enberg #include <stdbool.h> 1906e41eeaSPekka Enberg #include <limits.h> 20ce79f1caSPekka Enberg #include <signal.h> 21f5ab5f67SPekka Enberg #include <stdarg.h> 22b8f6afcdSPekka Enberg #include <stdlib.h> 23f5ab5f67SPekka Enberg #include <string.h> 240d1f17ecSPekka Enberg #include <unistd.h> 251f9cff23SPekka Enberg #include <stdio.h> 26b8f6afcdSPekka Enberg #include <fcntl.h> 27ce79f1caSPekka Enberg #include <time.h> 284298ddadSSasha Levin #include <sys/eventfd.h> 29c7828731SSasha Levin #include <asm/unistd.h> 3063bc8503SSasha Levin #include <dirent.h> 31b8f6afcdSPekka Enberg 32ae1fae34SPekka Enberg #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason 330d1f17ecSPekka Enberg 34ae1fae34SPekka Enberg const char *kvm_exit_reasons[] = { 35ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN), 36ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION), 37ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO), 38ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL), 39ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG), 40ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT), 41ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO), 42ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN), 43ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN), 44ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY), 45ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR), 46ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR), 47ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS), 48ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC), 49ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET), 50ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR), 51ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI), 52ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR), 5363e158a0SMatt Evans #ifdef CONFIG_PPC64 5463e158a0SMatt Evans DEFINE_KVM_EXIT_REASON(KVM_EXIT_PAPR_HCALL), 5563e158a0SMatt Evans #endif 569b1fb1c3SPekka Enberg }; 579b1fb1c3SPekka Enberg 584298ddadSSasha Levin extern struct kvm *kvm; 59e2077857SMatt Evans extern struct kvm_cpu **kvm_cpus; 604298ddadSSasha Levin static int pause_event; 614298ddadSSasha Levin static DEFINE_MUTEX(pause_lock); 62af7b0868SMatt Evans extern struct kvm_ext kvm_req_ext[]; 634298ddadSSasha Levin 649667701cSPekka Enberg static char kvm_dir[PATH_MAX]; 659667701cSPekka Enberg 66495fbd4eSSasha Levin static int set_dir(const char *fmt, va_list args) 679667701cSPekka Enberg { 68dd188f9fSPekka Enberg char tmp[PATH_MAX]; 69dd188f9fSPekka Enberg 70dd188f9fSPekka Enberg vsnprintf(tmp, sizeof(tmp), fmt, args); 71dd188f9fSPekka Enberg 722bc995fbSPekka Enberg mkdir(tmp, 0777); 732bc995fbSPekka Enberg 74dd188f9fSPekka Enberg if (!realpath(tmp, kvm_dir)) 75495fbd4eSSasha Levin return -errno; 76f76a3285SPekka Enberg 77f76a3285SPekka Enberg strcat(kvm_dir, "/"); 78495fbd4eSSasha Levin 79495fbd4eSSasha Levin return 0; 809667701cSPekka Enberg } 819667701cSPekka Enberg 829667701cSPekka Enberg void kvm__set_dir(const char *fmt, ...) 839667701cSPekka Enberg { 849667701cSPekka Enberg va_list args; 859667701cSPekka Enberg 869667701cSPekka Enberg va_start(args, fmt); 879667701cSPekka Enberg set_dir(fmt, args); 889667701cSPekka Enberg va_end(args); 899667701cSPekka Enberg } 909667701cSPekka Enberg 919667701cSPekka Enberg const char *kvm__get_dir(void) 929667701cSPekka Enberg { 939667701cSPekka Enberg return kvm_dir; 949667701cSPekka Enberg } 959667701cSPekka Enberg 961d6fb3f2SSasha Levin bool kvm__supports_extension(struct kvm *kvm, unsigned int extension) 97b8f6afcdSPekka Enberg { 9828fa19c0SPekka Enberg int ret; 99b8f6afcdSPekka Enberg 10043835ac9SSasha Levin ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, extension); 1014076b041SPekka Enberg if (ret < 0) 1024076b041SPekka Enberg return false; 1034076b041SPekka Enberg 1044076b041SPekka Enberg return ret; 1054076b041SPekka Enberg } 1064076b041SPekka Enberg 10743835ac9SSasha Levin static int kvm__check_extensions(struct kvm *kvm) 10855e19624SCyrill Gorcunov { 109495fbd4eSSasha Levin int i; 11055e19624SCyrill Gorcunov 111af7b0868SMatt Evans for (i = 0; ; i++) { 112af7b0868SMatt Evans if (!kvm_req_ext[i].name) 113af7b0868SMatt Evans break; 11443835ac9SSasha Levin if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) { 115599ed2a8SCyrill Gorcunov pr_err("Unsuppored KVM extension detected: %s", 11655e19624SCyrill Gorcunov kvm_req_ext[i].name); 117495fbd4eSSasha Levin return -i; 11855e19624SCyrill Gorcunov } 11955e19624SCyrill Gorcunov } 12055e19624SCyrill Gorcunov 12155e19624SCyrill Gorcunov return 0; 12255e19624SCyrill Gorcunov } 12355e19624SCyrill Gorcunov 1244076b041SPekka Enberg static struct kvm *kvm__new(void) 1254076b041SPekka Enberg { 126495fbd4eSSasha Levin struct kvm *kvm = calloc(1, sizeof(*kvm)); 12743835ac9SSasha Levin if (!kvm) 128495fbd4eSSasha Levin return ERR_PTR(-ENOMEM); 1294076b041SPekka Enberg 130d648dbf5SCyrill Gorcunov kvm->sys_fd = -1; 131d648dbf5SCyrill Gorcunov kvm->vm_fd = -1; 132d648dbf5SCyrill Gorcunov 13343835ac9SSasha Levin return kvm; 1344076b041SPekka Enberg } 1354076b041SPekka Enberg 136b91f1976SLai Jiangshan #define KVM_SOCK_SUFFIX ".sock" 137b91f1976SLai Jiangshan #define KVM_SOCK_SUFFIX_LEN ((ssize_t)sizeof(KVM_SOCK_SUFFIX) - 1) 138b91f1976SLai Jiangshan 1394b1addaeSSasha Levin static int kvm__create_socket(struct kvm *kvm) 1405358b0e6SSasha Levin { 1414b1addaeSSasha Levin char full_name[PATH_MAX]; 1424b1addaeSSasha Levin unsigned int s; 1434b1addaeSSasha Levin struct sockaddr_un local; 1444b1addaeSSasha Levin int len, r; 1455358b0e6SSasha Levin 146*24ed52dbSCyrill Gorcunov /* This usually 108 bytes long */ 147*24ed52dbSCyrill Gorcunov BUILD_BUG_ON(sizeof(local.sun_path) < 32); 148*24ed52dbSCyrill Gorcunov 1495358b0e6SSasha Levin if (!kvm->name) 150495fbd4eSSasha Levin return -EINVAL; 1515358b0e6SSasha Levin 152*24ed52dbSCyrill Gorcunov snprintf(full_name, sizeof(full_name), "%s/%s%s", 153*24ed52dbSCyrill Gorcunov kvm__get_dir(), kvm->name, KVM_SOCK_SUFFIX); 154495fbd4eSSasha Levin if (access(full_name, F_OK) == 0) { 155495fbd4eSSasha Levin pr_err("Socket file %s already exist", full_name); 156495fbd4eSSasha Levin return -EEXIST; 157495fbd4eSSasha Levin } 158fa0022d2SSasha Levin 1594b1addaeSSasha Levin s = socket(AF_UNIX, SOCK_STREAM, 0); 1604b1addaeSSasha Levin if (s < 0) 1614b1addaeSSasha Levin return s; 1624b1addaeSSasha Levin local.sun_family = AF_UNIX; 163*24ed52dbSCyrill Gorcunov strlcpy(local.sun_path, full_name, sizeof(local.sun_path)); 1644b1addaeSSasha Levin len = strlen(local.sun_path) + sizeof(local.sun_family); 1654b1addaeSSasha Levin r = bind(s, (struct sockaddr *)&local, len); 1664b1addaeSSasha Levin if (r < 0) 1674b1addaeSSasha Levin goto fail; 1684b1addaeSSasha Levin 1694b1addaeSSasha Levin r = listen(s, 5); 1704b1addaeSSasha Levin if (r < 0) 1714b1addaeSSasha Levin goto fail; 1724b1addaeSSasha Levin 1734b1addaeSSasha Levin return s; 1744b1addaeSSasha Levin 1754b1addaeSSasha Levin fail: 1764b1addaeSSasha Levin close(s); 177495fbd4eSSasha Levin return r; 1785358b0e6SSasha Levin } 1795358b0e6SSasha Levin 1804b1addaeSSasha Levin void kvm__remove_socket(const char *name) 1815358b0e6SSasha Levin { 1825358b0e6SSasha Levin char full_name[PATH_MAX]; 1835358b0e6SSasha Levin 184*24ed52dbSCyrill Gorcunov snprintf(full_name, sizeof(full_name), "%s/%s%s", 185*24ed52dbSCyrill Gorcunov kvm__get_dir(), name, KVM_SOCK_SUFFIX); 1865358b0e6SSasha Levin unlink(full_name); 1875358b0e6SSasha Levin } 1885358b0e6SSasha Levin 1894b1addaeSSasha Levin int kvm__get_sock_by_instance(const char *name) 1905358b0e6SSasha Levin { 1914b1addaeSSasha Levin int s, len, r; 1924b1addaeSSasha Levin char sock_file[PATH_MAX]; 1934b1addaeSSasha Levin struct sockaddr_un local; 1945358b0e6SSasha Levin 195*24ed52dbSCyrill Gorcunov snprintf(sock_file, sizeof(sock_file), "%s/%s%s", 196*24ed52dbSCyrill Gorcunov kvm__get_dir(), name, KVM_SOCK_SUFFIX); 1974b1addaeSSasha Levin s = socket(AF_UNIX, SOCK_STREAM, 0); 1985358b0e6SSasha Levin 1994b1addaeSSasha Levin local.sun_family = AF_UNIX; 200*24ed52dbSCyrill Gorcunov strlcpy(local.sun_path, sock_file, sizeof(local.sun_path)); 2014b1addaeSSasha Levin len = strlen(local.sun_path) + sizeof(local.sun_family); 2025358b0e6SSasha Levin 2034b1addaeSSasha Levin r = connect(s, &local, len); 204e3e9e392SSasha Levin if (r < 0 && errno == ECONNREFUSED) { 20549dca54dSLai Jiangshan /* Tell the user clean ghost socket file */ 20649dca54dSLai Jiangshan pr_err("\"%s\" could be a ghost socket file, please remove it", 20749dca54dSLai Jiangshan sock_file); 208495fbd4eSSasha Levin return r; 209e3e9e392SSasha Levin } else if (r < 0) { 210495fbd4eSSasha Levin return r; 211e3e9e392SSasha Levin } 2125358b0e6SSasha Levin 2134b1addaeSSasha Levin return s; 2145358b0e6SSasha Levin } 2155358b0e6SSasha Levin 2164b1addaeSSasha Levin int kvm__enumerate_instances(int (*callback)(const char *name, int fd)) 21763bc8503SSasha Levin { 2184b1addaeSSasha Levin int sock; 21963bc8503SSasha Levin DIR *dir; 22063bc8503SSasha Levin struct dirent entry, *result; 221886af5f2SLiming Wang int ret = 0; 22263bc8503SSasha Levin 223da08dcdbSLai Jiangshan dir = opendir(kvm__get_dir()); 2243d7f121cSLai Jiangshan if (!dir) 225495fbd4eSSasha Levin return -errno; 22663bc8503SSasha Levin 2273d7f121cSLai Jiangshan for (;;) { 22863bc8503SSasha Levin readdir_r(dir, &entry, &result); 22963bc8503SSasha Levin if (result == NULL) 23063bc8503SSasha Levin break; 2314b1addaeSSasha Levin if (entry.d_type == DT_SOCK) { 232b91f1976SLai Jiangshan ssize_t name_len = strlen(entry.d_name); 233b91f1976SLai Jiangshan char *p; 234b91f1976SLai Jiangshan 235b91f1976SLai Jiangshan if (name_len <= KVM_SOCK_SUFFIX_LEN) 236b91f1976SLai Jiangshan continue; 237b91f1976SLai Jiangshan 238b91f1976SLai Jiangshan p = &entry.d_name[name_len - KVM_SOCK_SUFFIX_LEN]; 239b91f1976SLai Jiangshan if (memcmp(KVM_SOCK_SUFFIX, p, KVM_SOCK_SUFFIX_LEN)) 240b91f1976SLai Jiangshan continue; 241b91f1976SLai Jiangshan 242b91f1976SLai Jiangshan *p = 0; 2434b1addaeSSasha Levin sock = kvm__get_sock_by_instance(entry.d_name); 244e3e9e392SSasha Levin if (sock < 0) 245e3e9e392SSasha Levin continue; 2464b1addaeSSasha Levin ret = callback(entry.d_name, sock); 2474b1addaeSSasha Levin close(sock); 248886af5f2SLiming Wang if (ret < 0) 249886af5f2SLiming Wang break; 25063bc8503SSasha Levin } 25163bc8503SSasha Levin } 25263bc8503SSasha Levin 2531a0ef251SSasha Levin closedir(dir); 2541a0ef251SSasha Levin 255886af5f2SLiming Wang return ret; 25663bc8503SSasha Levin } 25763bc8503SSasha Levin 258495fbd4eSSasha Levin int kvm__exit(struct kvm *kvm) 2599ef4c68eSPekka Enberg { 26043835ac9SSasha Levin kvm__stop_timer(kvm); 261fbfe68b7SSasha Levin 262e56e2de7SLai Jiangshan kvm__arch_delete_ram(kvm); 263c733c80bSSasha Levin kvm_ipc__stop(); 2644b1addaeSSasha Levin kvm__remove_socket(kvm->name); 265dfb2e458SCyrill Gorcunov free(kvm->name); 26643835ac9SSasha Levin free(kvm); 267495fbd4eSSasha Levin 268495fbd4eSSasha Levin return 0; 2699ef4c68eSPekka Enberg } 2709ef4c68eSPekka Enberg 27196feb589SPekka Enberg /* 27296feb589SPekka Enberg * Note: KVM_SET_USER_MEMORY_REGION assumes that we don't pass overlapping 27396feb589SPekka Enberg * memory regions to it. Therefore, be careful if you use this function for 27496feb589SPekka Enberg * registering memory regions for emulating hardware. 27596feb589SPekka Enberg */ 276495fbd4eSSasha Levin int kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size, void *userspace_addr) 2774076b041SPekka Enberg { 2782b0e3342SPekka Enberg struct kvm_userspace_memory_region mem; 279839051d9SSasha Levin int ret; 280839051d9SSasha Levin 281839051d9SSasha Levin mem = (struct kvm_userspace_memory_region) { 28296feb589SPekka Enberg .slot = kvm->mem_slots++, 283874467f8SSasha Levin .guest_phys_addr = guest_phys, 284874467f8SSasha Levin .memory_size = size, 285c4acb611SIngo Molnar .userspace_addr = (unsigned long)userspace_addr, 286839051d9SSasha Levin }; 287839051d9SSasha Levin 288874467f8SSasha Levin ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem); 289839051d9SSasha Levin if (ret < 0) 290495fbd4eSSasha Levin return -errno; 291495fbd4eSSasha Levin 292495fbd4eSSasha Levin return 0; 293839051d9SSasha Levin } 294839051d9SSasha Levin 2958259b8ccSSasha Levin int kvm__recommended_cpus(struct kvm *kvm) 296384922b3SPekka Enberg { 297384922b3SPekka Enberg int ret; 298384922b3SPekka Enberg 29943835ac9SSasha Levin ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_NR_VCPUS); 3008259b8ccSSasha Levin if (ret <= 0) 3013b9b691dSMatt Evans /* 3023b9b691dSMatt Evans * api.txt states that if KVM_CAP_NR_VCPUS does not exist, 3033b9b691dSMatt Evans * assume 4. 3043b9b691dSMatt Evans */ 3053b9b691dSMatt Evans return 4; 306384922b3SPekka Enberg 307384922b3SPekka Enberg return ret; 308384922b3SPekka Enberg } 309384922b3SPekka Enberg 3104b1addaeSSasha Levin static void kvm__pid(int fd, u32 type, u32 len, u8 *msg) 3114b1addaeSSasha Levin { 3124b1addaeSSasha Levin pid_t pid = getpid(); 3134b1addaeSSasha Levin int r = 0; 3144b1addaeSSasha Levin 3154b1addaeSSasha Levin if (type == KVM_IPC_PID) 3164b1addaeSSasha Levin r = write(fd, &pid, sizeof(pid)); 3174b1addaeSSasha Levin 3184b1addaeSSasha Levin if (r < 0) 3194b1addaeSSasha Levin pr_warning("Failed sending PID"); 3204b1addaeSSasha Levin } 3214b1addaeSSasha Levin 3228259b8ccSSasha Levin /* 3238259b8ccSSasha Levin * The following hack should be removed once 'x86: Raise the hard 3248259b8ccSSasha Levin * VCPU count limit' makes it's way into the mainline. 3258259b8ccSSasha Levin */ 3268259b8ccSSasha Levin #ifndef KVM_CAP_MAX_VCPUS 3278259b8ccSSasha Levin #define KVM_CAP_MAX_VCPUS 66 3288259b8ccSSasha Levin #endif 3298259b8ccSSasha Levin 3308259b8ccSSasha Levin int kvm__max_cpus(struct kvm *kvm) 3318259b8ccSSasha Levin { 3328259b8ccSSasha Levin int ret; 3338259b8ccSSasha Levin 3348259b8ccSSasha Levin ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_MAX_VCPUS); 3358259b8ccSSasha Levin if (ret <= 0) 3368259b8ccSSasha Levin ret = kvm__recommended_cpus(kvm); 3378259b8ccSSasha Levin 3388259b8ccSSasha Levin return ret; 3398259b8ccSSasha Levin } 3408259b8ccSSasha Levin 34161061257SMatt Evans struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name) 342839051d9SSasha Levin { 34343835ac9SSasha Levin struct kvm *kvm; 3444076b041SPekka Enberg int ret; 3454076b041SPekka Enberg 346495fbd4eSSasha Levin if (!kvm__arch_cpu_supports_vm()) { 347495fbd4eSSasha Levin pr_err("Your CPU does not support hardware virtualization"); 3486fce7105SYang Bai ret = -ENOSYS; 3496fce7105SYang Bai goto err; 350495fbd4eSSasha Levin } 351c78b8713SAsias He 35243835ac9SSasha Levin kvm = kvm__new(); 353d648dbf5SCyrill Gorcunov if (IS_ERR(kvm)) 354495fbd4eSSasha Levin return kvm; 3554076b041SPekka Enberg 35643835ac9SSasha Levin kvm->sys_fd = open(kvm_dev, O_RDWR); 35743835ac9SSasha Levin if (kvm->sys_fd < 0) { 358d648dbf5SCyrill Gorcunov if (errno == ENOENT) 359495fbd4eSSasha Levin pr_err("'%s' not found. Please make sure your kernel has CONFIG_KVM " 360495fbd4eSSasha Levin "enabled and that the KVM modules are loaded.", kvm_dev); 361d648dbf5SCyrill Gorcunov else if (errno == ENODEV) 362d648dbf5SCyrill Gorcunov pr_err("'%s' KVM driver not available.\n # (If the KVM " 363495fbd4eSSasha Levin "module is loaded then 'dmesg' may offer further clues " 364495fbd4eSSasha Levin "about the failure.)", kvm_dev); 365d648dbf5SCyrill Gorcunov else 366495fbd4eSSasha Levin pr_err("Could not open %s: ", kvm_dev); 367d648dbf5SCyrill Gorcunov 368495fbd4eSSasha Levin ret = -errno; 369d648dbf5SCyrill Gorcunov goto err_free; 3706d7c36ceSPekka Enberg } 371b8f6afcdSPekka Enberg 37243835ac9SSasha Levin ret = ioctl(kvm->sys_fd, KVM_GET_API_VERSION, 0); 373495fbd4eSSasha Levin if (ret != KVM_API_VERSION) { 374495fbd4eSSasha Levin pr_err("KVM_API_VERSION ioctl"); 375495fbd4eSSasha Levin ret = -errno; 376d648dbf5SCyrill Gorcunov goto err_sys_fd; 377495fbd4eSSasha Levin } 3786c7d8514SPekka Enberg 37943835ac9SSasha Levin kvm->vm_fd = ioctl(kvm->sys_fd, KVM_CREATE_VM, 0); 380495fbd4eSSasha Levin if (kvm->vm_fd < 0) { 381495fbd4eSSasha Levin ret = kvm->vm_fd; 382d648dbf5SCyrill Gorcunov goto err_sys_fd; 383495fbd4eSSasha Levin } 38428fa19c0SPekka Enberg 385dfb2e458SCyrill Gorcunov kvm->name = strdup(name); 386dfb2e458SCyrill Gorcunov if (!kvm->name) { 387dfb2e458SCyrill Gorcunov ret = -ENOMEM; 3886fce7105SYang Bai goto err_vm_fd; 389dfb2e458SCyrill Gorcunov } 390dfb2e458SCyrill Gorcunov 391495fbd4eSSasha Levin if (kvm__check_extensions(kvm)) { 392495fbd4eSSasha Levin pr_err("A required KVM extention is not supported by OS"); 393495fbd4eSSasha Levin ret = -ENOSYS; 3946fce7105SYang Bai goto err_vm_fd; 395495fbd4eSSasha Levin } 3969687927dSAsias He 3977eff9f49SWanlong Gao kvm__arch_init(kvm, hugetlbfs_path, ram_size); 3989687927dSAsias He 399e21e8ff3SYang Bai ret = kvm_ipc__start(kvm__create_socket(kvm)); 400e21e8ff3SYang Bai if (ret < 0) { 401e21e8ff3SYang Bai pr_err("Starting ipc failed."); 402e21e8ff3SYang Bai goto err_vm_fd; 403e21e8ff3SYang Bai } 404e21e8ff3SYang Bai 4057db067beSYang Bai ret = kvm_ipc__register_handler(KVM_IPC_PID, kvm__pid); 4067db067beSYang Bai if (ret < 0) { 4077db067beSYang Bai pr_err("Register ipc handler failed."); 4087db067beSYang Bai goto err_ipc; 4097db067beSYang Bai } 410d648dbf5SCyrill Gorcunov 41143835ac9SSasha Levin return kvm; 412d648dbf5SCyrill Gorcunov 4137db067beSYang Bai err_ipc: 4147db067beSYang Bai kvm_ipc__stop(); 4156fce7105SYang Bai err_vm_fd: 416495fbd4eSSasha Levin close(kvm->vm_fd); 417d648dbf5SCyrill Gorcunov err_sys_fd: 418495fbd4eSSasha Levin close(kvm->sys_fd); 419d648dbf5SCyrill Gorcunov err_free: 420495fbd4eSSasha Levin free(kvm); 4216fce7105SYang Bai err: 422495fbd4eSSasha Levin return ERR_PTR(ret); 4234076b041SPekka Enberg } 4244076b041SPekka Enberg 42572811558SPekka Enberg /* RFC 1952 */ 42672811558SPekka Enberg #define GZIP_ID1 0x1f 42772811558SPekka Enberg #define GZIP_ID2 0x8b 428663ce1dfSMatt Evans #define CPIO_MAGIC "0707" 429663ce1dfSMatt Evans /* initrd may be gzipped, or a plain cpio */ 43072811558SPekka Enberg static bool initrd_check(int fd) 43172811558SPekka Enberg { 432663ce1dfSMatt Evans unsigned char id[4]; 43372811558SPekka Enberg 43472811558SPekka Enberg if (read_in_full(fd, id, ARRAY_SIZE(id)) < 0) 43572811558SPekka Enberg return false; 43672811558SPekka Enberg 43772811558SPekka Enberg if (lseek(fd, 0, SEEK_SET) < 0) 43872811558SPekka Enberg die_perror("lseek"); 43972811558SPekka Enberg 440663ce1dfSMatt Evans return (id[0] == GZIP_ID1 && id[1] == GZIP_ID2) || 441663ce1dfSMatt Evans !memcmp(id, CPIO_MAGIC, 4); 44272811558SPekka Enberg } 44372811558SPekka Enberg 4446d1f350dSCyrill Gorcunov bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename, 44553861c74SJohn Floren const char *initrd_filename, const char *kernel_cmdline, u16 vidmode) 446ae1fae34SPekka Enberg { 4477fb218bdSPekka Enberg bool ret; 4482065a6f7SCyrill Gorcunov int fd_kernel = -1, fd_initrd = -1; 449ae1fae34SPekka Enberg 4502065a6f7SCyrill Gorcunov fd_kernel = open(kernel_filename, O_RDONLY); 4512065a6f7SCyrill Gorcunov if (fd_kernel < 0) 4520b62d2bbSPekka Enberg die("Unable to open kernel %s", kernel_filename); 453ae1fae34SPekka Enberg 4542065a6f7SCyrill Gorcunov if (initrd_filename) { 4552065a6f7SCyrill Gorcunov fd_initrd = open(initrd_filename, O_RDONLY); 4562065a6f7SCyrill Gorcunov if (fd_initrd < 0) 4570b62d2bbSPekka Enberg die("Unable to open initrd %s", initrd_filename); 45872811558SPekka Enberg 45972811558SPekka Enberg if (!initrd_check(fd_initrd)) 46072811558SPekka Enberg die("%s is not an initrd", initrd_filename); 4612065a6f7SCyrill Gorcunov } 4622065a6f7SCyrill Gorcunov 46353861c74SJohn Floren ret = load_bzimage(kvm, fd_kernel, fd_initrd, kernel_cmdline, vidmode); 46428972750SCyrill Gorcunov 465009b0758SPekka Enberg if (ret) 466009b0758SPekka Enberg goto found_kernel; 467ae1fae34SPekka Enberg 4684542f276SCyrill Gorcunov pr_warning("%s is not a bzImage. Trying to load it as a flat binary...", kernel_filename); 4690b62d2bbSPekka Enberg 470604dbd63SMatt Evans ret = load_flat_binary(kvm, fd_kernel, fd_initrd, kernel_cmdline); 471604dbd63SMatt Evans 472009b0758SPekka Enberg if (ret) 473009b0758SPekka Enberg goto found_kernel; 474009b0758SPekka Enberg 475604dbd63SMatt Evans if (initrd_filename) 476604dbd63SMatt Evans close(fd_initrd); 4775a6ac675SSasha Levin close(fd_kernel); 4785a6ac675SSasha Levin 479009b0758SPekka Enberg die("%s is not a valid bzImage or flat binary", kernel_filename); 480009b0758SPekka Enberg 481009b0758SPekka Enberg found_kernel: 482604dbd63SMatt Evans if (initrd_filename) 483604dbd63SMatt Evans close(fd_initrd); 4845a6ac675SSasha Levin close(fd_kernel); 4855a6ac675SSasha Levin 486ae1fae34SPekka Enberg return ret; 487ae1fae34SPekka Enberg } 488ae1fae34SPekka Enberg 489ce79f1caSPekka Enberg #define TIMER_INTERVAL_NS 1000000 /* 1 msec */ 490ce79f1caSPekka Enberg 491ce79f1caSPekka Enberg /* 492ce79f1caSPekka Enberg * This function sets up a timer that's used to inject interrupts from the 493ce79f1caSPekka Enberg * userspace hypervisor into the guest at periodical intervals. Please note 494ce79f1caSPekka Enberg * that clock interrupt, for example, is not handled here. 495ce79f1caSPekka Enberg */ 49643835ac9SSasha Levin void kvm__start_timer(struct kvm *kvm) 497ce79f1caSPekka Enberg { 498ce79f1caSPekka Enberg struct itimerspec its; 499ce79f1caSPekka Enberg struct sigevent sev; 500ce79f1caSPekka Enberg 501ce79f1caSPekka Enberg memset(&sev, 0, sizeof(struct sigevent)); 502ce79f1caSPekka Enberg sev.sigev_value.sival_int = 0; 503c7828731SSasha Levin sev.sigev_notify = SIGEV_THREAD_ID; 504ce79f1caSPekka Enberg sev.sigev_signo = SIGALRM; 505c7828731SSasha Levin sev._sigev_un._tid = syscall(__NR_gettid); 506ce79f1caSPekka Enberg 50743835ac9SSasha Levin if (timer_create(CLOCK_REALTIME, &sev, &kvm->timerid) < 0) 508ce79f1caSPekka Enberg die("timer_create()"); 509ce79f1caSPekka Enberg 510ce79f1caSPekka Enberg its.it_value.tv_sec = TIMER_INTERVAL_NS / 1000000000; 511ce79f1caSPekka Enberg its.it_value.tv_nsec = TIMER_INTERVAL_NS % 1000000000; 512ce79f1caSPekka Enberg its.it_interval.tv_sec = its.it_value.tv_sec; 513ce79f1caSPekka Enberg its.it_interval.tv_nsec = its.it_value.tv_nsec; 514ce79f1caSPekka Enberg 51543835ac9SSasha Levin if (timer_settime(kvm->timerid, 0, &its, NULL) < 0) 516ce79f1caSPekka Enberg die("timer_settime()"); 517ce79f1caSPekka Enberg } 518ce79f1caSPekka Enberg 51943835ac9SSasha Levin void kvm__stop_timer(struct kvm *kvm) 520fbfe68b7SSasha Levin { 52143835ac9SSasha Levin if (kvm->timerid) 52243835ac9SSasha Levin if (timer_delete(kvm->timerid) < 0) 523fbfe68b7SSasha Levin die("timer_delete()"); 524fbfe68b7SSasha Levin 52543835ac9SSasha Levin kvm->timerid = 0; 526fbfe68b7SSasha Levin } 527fbfe68b7SSasha Levin 52843835ac9SSasha Levin void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size) 529090f898eSCyrill Gorcunov { 530090f898eSCyrill Gorcunov unsigned char *p; 531090f898eSCyrill Gorcunov unsigned long n; 532090f898eSCyrill Gorcunov 533090f898eSCyrill Gorcunov size &= ~7; /* mod 8 */ 534090f898eSCyrill Gorcunov if (!size) 535090f898eSCyrill Gorcunov return; 536090f898eSCyrill Gorcunov 53743835ac9SSasha Levin p = guest_flat_to_host(kvm, addr); 538090f898eSCyrill Gorcunov 53948cf3877SPekka Enberg for (n = 0; n < size; n += 8) { 54043835ac9SSasha Levin if (!host_ptr_in_ram(kvm, p + n)) 54148cf3877SPekka Enberg break; 54248cf3877SPekka Enberg 543090f898eSCyrill Gorcunov printf(" 0x%08lx: %02x %02x %02x %02x %02x %02x %02x %02x\n", 544090f898eSCyrill Gorcunov addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3], 545090f898eSCyrill Gorcunov p[n + 4], p[n + 5], p[n + 6], p[n + 7]); 546090f898eSCyrill Gorcunov } 54748cf3877SPekka Enberg } 5484298ddadSSasha Levin 5494298ddadSSasha Levin void kvm__pause(void) 5504298ddadSSasha Levin { 5514298ddadSSasha Levin int i, paused_vcpus = 0; 5524298ddadSSasha Levin 5534298ddadSSasha Levin /* Check if the guest is running */ 5544298ddadSSasha Levin if (!kvm_cpus[0] || kvm_cpus[0]->thread == 0) 5554298ddadSSasha Levin return; 5564298ddadSSasha Levin 5574298ddadSSasha Levin mutex_lock(&pause_lock); 5584298ddadSSasha Levin 5594298ddadSSasha Levin pause_event = eventfd(0, 0); 5604298ddadSSasha Levin if (pause_event < 0) 5614298ddadSSasha Levin die("Failed creating pause notification event"); 5624298ddadSSasha Levin for (i = 0; i < kvm->nrcpus; i++) 5634298ddadSSasha Levin pthread_kill(kvm_cpus[i]->thread, SIGKVMPAUSE); 5644298ddadSSasha Levin 5654298ddadSSasha Levin while (paused_vcpus < kvm->nrcpus) { 5664298ddadSSasha Levin u64 cur_read; 5674298ddadSSasha Levin 5684298ddadSSasha Levin if (read(pause_event, &cur_read, sizeof(cur_read)) < 0) 5694298ddadSSasha Levin die("Failed reading pause event"); 5704298ddadSSasha Levin paused_vcpus += cur_read; 5714298ddadSSasha Levin } 5724298ddadSSasha Levin close(pause_event); 5734298ddadSSasha Levin } 5744298ddadSSasha Levin 5754298ddadSSasha Levin void kvm__continue(void) 5764298ddadSSasha Levin { 5774298ddadSSasha Levin /* Check if the guest is running */ 5784298ddadSSasha Levin if (!kvm_cpus[0] || kvm_cpus[0]->thread == 0) 5794298ddadSSasha Levin return; 5804298ddadSSasha Levin 5814298ddadSSasha Levin mutex_unlock(&pause_lock); 5824298ddadSSasha Levin } 5834298ddadSSasha Levin 5844298ddadSSasha Levin void kvm__notify_paused(void) 5854298ddadSSasha Levin { 5864298ddadSSasha Levin u64 p = 1; 5874298ddadSSasha Levin 5884298ddadSSasha Levin if (write(pause_event, &p, sizeof(p)) < 0) 5894298ddadSSasha Levin die("Failed notifying of paused VCPU."); 5904298ddadSSasha Levin 5914298ddadSSasha Levin mutex_lock(&pause_lock); 5924298ddadSSasha Levin mutex_unlock(&pause_lock); 5934298ddadSSasha Levin } 594