1ae1fae34SPekka Enberg #include "kvm/kvm.h" 2ae1fae34SPekka Enberg 3eda03319SPekka Enberg #include "kvm/interrupt.h" 4f3150089SPekka Enberg #include "kvm/util.h" 5eda03319SPekka Enberg 66c7d8514SPekka Enberg #include <linux/kvm.h> 7f5ab5f67SPekka Enberg 8f5ab5f67SPekka Enberg #include <asm/bootparam.h> 9f5ab5f67SPekka Enberg 10ae1fae34SPekka Enberg #include <sys/ioctl.h> 111f9cff23SPekka Enberg #include <inttypes.h> 121f9cff23SPekka Enberg #include <sys/mman.h> 132da26a59SPekka Enberg #include <stdbool.h> 146e5e8b8dSPekka Enberg #include <assert.h> 1506e41eeaSPekka Enberg #include <limits.h> 16f5ab5f67SPekka Enberg #include <stdarg.h> 17b8f6afcdSPekka Enberg #include <stdlib.h> 18f5ab5f67SPekka Enberg #include <string.h> 190d1f17ecSPekka Enberg #include <unistd.h> 201f9cff23SPekka Enberg #include <stdio.h> 21b8f6afcdSPekka Enberg #include <fcntl.h> 22b8f6afcdSPekka Enberg 231f9cff23SPekka Enberg /* 241f9cff23SPekka Enberg * Compatibility code. Remove this when we move to tools/kvm. 251f9cff23SPekka Enberg */ 261f9cff23SPekka Enberg #ifndef KVM_EXIT_INTERNAL_ERROR 271f9cff23SPekka Enberg # define KVM_EXIT_INTERNAL_ERROR 17 281f9cff23SPekka Enberg #endif 291f9cff23SPekka Enberg 30ae1fae34SPekka Enberg #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason 310d1f17ecSPekka Enberg 32ae1fae34SPekka Enberg const char *kvm_exit_reasons[] = { 33ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN), 34ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION), 35ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO), 36ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL), 37ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG), 38ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT), 39ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO), 40ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN), 41ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN), 42ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY), 43ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR), 44ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR), 45ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS), 46ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC), 47ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET), 48ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR), 49ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI), 50ae1fae34SPekka Enberg DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR), 519b1fb1c3SPekka Enberg }; 529b1fb1c3SPekka Enberg 536753ed2fSPekka Enberg static inline uint32_t segment_to_flat(uint16_t selector, uint16_t offset) 546753ed2fSPekka Enberg { 556753ed2fSPekka Enberg return ((uint32_t)selector << 4) + (uint32_t) offset; 566753ed2fSPekka Enberg } 576753ed2fSPekka Enberg 586753ed2fSPekka Enberg static inline void *guest_flat_to_host(struct kvm *self, unsigned long offset) 59ae1fae34SPekka Enberg { 60ae1fae34SPekka Enberg return self->ram_start + offset; 61ae1fae34SPekka Enberg } 62ae1fae34SPekka Enberg 636753ed2fSPekka Enberg static inline void *guest_real_to_host(struct kvm *self, uint16_t selector, uint16_t offset) 646753ed2fSPekka Enberg { 656753ed2fSPekka Enberg unsigned long flat = segment_to_flat(selector, offset); 666753ed2fSPekka Enberg 676753ed2fSPekka Enberg return guest_flat_to_host(self, flat); 686753ed2fSPekka Enberg } 696753ed2fSPekka Enberg 70ae1fae34SPekka Enberg static bool kvm__supports_extension(struct kvm *self, unsigned int extension) 71b8f6afcdSPekka Enberg { 7228fa19c0SPekka Enberg int ret; 73b8f6afcdSPekka Enberg 7473ac60e6SPekka Enberg ret = ioctl(self->sys_fd, KVM_CHECK_EXTENSION, extension); 754076b041SPekka Enberg if (ret < 0) 764076b041SPekka Enberg return false; 774076b041SPekka Enberg 784076b041SPekka Enberg return ret; 794076b041SPekka Enberg } 804076b041SPekka Enberg 814076b041SPekka Enberg static struct kvm *kvm__new(void) 824076b041SPekka Enberg { 834076b041SPekka Enberg struct kvm *self = calloc(1, sizeof *self); 844076b041SPekka Enberg 854076b041SPekka Enberg if (!self) 864076b041SPekka Enberg die("out of memory"); 874076b041SPekka Enberg 884076b041SPekka Enberg return self; 894076b041SPekka Enberg } 904076b041SPekka Enberg 919ef4c68eSPekka Enberg void kvm__delete(struct kvm *self) 929ef4c68eSPekka Enberg { 939ef4c68eSPekka Enberg free(self->ram_start); 949ef4c68eSPekka Enberg free(self); 959ef4c68eSPekka Enberg } 969ef4c68eSPekka Enberg 97ae1fae34SPekka Enberg struct kvm *kvm__init(void) 984076b041SPekka Enberg { 992b0e3342SPekka Enberg struct kvm_userspace_memory_region mem; 1004076b041SPekka Enberg struct kvm *self; 1010d1f17ecSPekka Enberg long page_size; 1021f9cff23SPekka Enberg int mmap_size; 1034076b041SPekka Enberg int ret; 1044076b041SPekka Enberg 1054076b041SPekka Enberg self = kvm__new(); 1064076b041SPekka Enberg 10773ac60e6SPekka Enberg self->sys_fd = open("/dev/kvm", O_RDWR); 10873ac60e6SPekka Enberg if (self->sys_fd < 0) 109f5ab5f67SPekka Enberg die_perror("open"); 110b8f6afcdSPekka Enberg 11173ac60e6SPekka Enberg ret = ioctl(self->sys_fd, KVM_GET_API_VERSION, 0); 1126c7d8514SPekka Enberg if (ret != KVM_API_VERSION) 113f5ab5f67SPekka Enberg die_perror("KVM_API_VERSION ioctl"); 1146c7d8514SPekka Enberg 11573ac60e6SPekka Enberg self->vm_fd = ioctl(self->sys_fd, KVM_CREATE_VM, 0); 11673ac60e6SPekka Enberg if (self->vm_fd < 0) 117f5ab5f67SPekka Enberg die_perror("KVM_CREATE_VM ioctl"); 11828fa19c0SPekka Enberg 1194076b041SPekka Enberg if (!kvm__supports_extension(self, KVM_CAP_USER_MEMORY)) 120f5ab5f67SPekka Enberg die("KVM_CAP_USER_MEMORY is not supported"); 1212da26a59SPekka Enberg 1220d1f17ecSPekka Enberg self->ram_size = 64UL * 1024UL * 1024UL; 1230d1f17ecSPekka Enberg 1240d1f17ecSPekka Enberg page_size = sysconf(_SC_PAGESIZE); 1250d1f17ecSPekka Enberg if (posix_memalign(&self->ram_start, page_size, self->ram_size) != 0) 1260d1f17ecSPekka Enberg die("out of memory"); 1270d1f17ecSPekka Enberg 1282b0e3342SPekka Enberg mem = (struct kvm_userspace_memory_region) { 1292b0e3342SPekka Enberg .slot = 0, 1302b0e3342SPekka Enberg .guest_phys_addr = 0x0UL, 1310d1f17ecSPekka Enberg .memory_size = self->ram_size, 1320d1f17ecSPekka Enberg .userspace_addr = (unsigned long) self->ram_start, 1332b0e3342SPekka Enberg }; 1342b0e3342SPekka Enberg 13573ac60e6SPekka Enberg ret = ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem, 1); 1362b0e3342SPekka Enberg if (ret < 0) 137f5ab5f67SPekka Enberg die_perror("KVM_SET_USER_MEMORY_REGION ioctl"); 1382b0e3342SPekka Enberg 139895c2fefSPekka Enberg if (!kvm__supports_extension(self, KVM_CAP_SET_TSS_ADDR)) 140f5ab5f67SPekka Enberg die("KVM_CAP_SET_TSS_ADDR is not supported"); 141895c2fefSPekka Enberg 14273ac60e6SPekka Enberg ret = ioctl(self->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); 143895c2fefSPekka Enberg if (ret < 0) 144f5ab5f67SPekka Enberg die_perror("KVM_SET_TSS_ADDR ioctl"); 145895c2fefSPekka Enberg 14673ac60e6SPekka Enberg self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0); 1472b0e3342SPekka Enberg if (self->vcpu_fd < 0) 148f5ab5f67SPekka Enberg die_perror("KVM_CREATE_VCPU ioctl"); 1492b0e3342SPekka Enberg 1501f9cff23SPekka Enberg mmap_size = ioctl(self->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); 1511f9cff23SPekka Enberg if (mmap_size < 0) 152f5ab5f67SPekka Enberg die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); 1531f9cff23SPekka Enberg 1541f9cff23SPekka Enberg self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); 1551f9cff23SPekka Enberg if (self->kvm_run == MAP_FAILED) 1561f9cff23SPekka Enberg die("unable to mmap vcpu fd"); 1571f9cff23SPekka Enberg 1584076b041SPekka Enberg return self; 1594076b041SPekka Enberg } 1604076b041SPekka Enberg 161ae1fae34SPekka Enberg void kvm__enable_singlestep(struct kvm *self) 162aee6632eSPekka Enberg { 163aee6632eSPekka Enberg struct kvm_guest_debug debug = { 164aee6632eSPekka Enberg .control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, 165aee6632eSPekka Enberg }; 166aee6632eSPekka Enberg 167aee6632eSPekka Enberg if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0) 168b625d4b6SCyrill Gorcunov warning("KVM_SET_GUEST_DEBUG failed"); 169aee6632eSPekka Enberg } 170aee6632eSPekka Enberg 1715f6772b8SCyrill Gorcunov #define BOOT_LOADER_SELECTOR 0x1000 172b08e9ec4SPekka Enberg #define BOOT_LOADER_IP 0x0000 173dbdb74c2SPekka Enberg #define BOOT_LOADER_SP 0x8000 174009b0758SPekka Enberg 175edc8a14dSPekka Enberg static int load_flat_binary(struct kvm *self, int fd) 176009b0758SPekka Enberg { 177009b0758SPekka Enberg void *p; 178009b0758SPekka Enberg int nr; 179009b0758SPekka Enberg 180009b0758SPekka Enberg if (lseek(fd, 0, SEEK_SET) < 0) 181009b0758SPekka Enberg die_perror("lseek"); 182009b0758SPekka Enberg 1836753ed2fSPekka Enberg p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); 184009b0758SPekka Enberg 185009b0758SPekka Enberg while ((nr = read(fd, p, 65536)) > 0) 186009b0758SPekka Enberg p += nr; 187009b0758SPekka Enberg 188dbdb74c2SPekka Enberg self->boot_selector = BOOT_LOADER_SELECTOR; 189edc8a14dSPekka Enberg self->boot_ip = BOOT_LOADER_IP; 190dbdb74c2SPekka Enberg self->boot_sp = BOOT_LOADER_SP; 191edc8a14dSPekka Enberg 1927fb218bdSPekka Enberg return true; 193009b0758SPekka Enberg } 194009b0758SPekka Enberg 1957fb218bdSPekka Enberg /* 1967fb218bdSPekka Enberg * The protected mode kernel part of a modern bzImage is loaded at 1 MB by 1977fb218bdSPekka Enberg * default. 1987fb218bdSPekka Enberg */ 1997fb218bdSPekka Enberg #define BZ_KERNEL_START 0x100000UL 200ae1fae34SPekka Enberg 201ae1fae34SPekka Enberg static const char *BZIMAGE_MAGIC = "HdrS"; 202ae1fae34SPekka Enberg 20310943d14SPekka Enberg #define BZ_DEFAULT_SETUP_SECTS 4 20410943d14SPekka Enberg 2056d1f350dSCyrill Gorcunov static bool load_bzimage(struct kvm *self, int fd, const char *kernel_cmdline) 206ae1fae34SPekka Enberg { 207bc75b0aeSCyrill Gorcunov struct real_intr_desc intr; 20822489bb0SCyrill Gorcunov struct boot_params boot; 2094b62331fSPekka Enberg unsigned long setup_sects; 210305d9054SCyrill Gorcunov unsigned int intr_addr; 211debcfac0SCyrill Gorcunov size_t cmdline_size, cmdline_offset; 2127fb218bdSPekka Enberg ssize_t setup_size; 21322489bb0SCyrill Gorcunov void *p; 214ae1fae34SPekka Enberg int nr; 215ae1fae34SPekka Enberg 2165d67eaf6SPekka Enberg /* 2175d67eaf6SPekka Enberg * See Documentation/x86/boot.txt for details no bzImage on-disk and 2185d67eaf6SPekka Enberg * memory layout. 2195d67eaf6SPekka Enberg */ 2205d67eaf6SPekka Enberg 221009b0758SPekka Enberg if (lseek(fd, 0, SEEK_SET) < 0) 222009b0758SPekka Enberg die_perror("lseek"); 223009b0758SPekka Enberg 224ae1fae34SPekka Enberg read(fd, &boot, sizeof(boot)); 225ae1fae34SPekka Enberg 226ae1fae34SPekka Enberg if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)) != 0) 2277fb218bdSPekka Enberg return false; 228ae1fae34SPekka Enberg 229ad681038SCyrill Gorcunov if (boot.hdr.version < 0x0200) { 230ad681038SCyrill Gorcunov warning("Too old kernel"); 231ad681038SCyrill Gorcunov return false; 232ad681038SCyrill Gorcunov } 233ad681038SCyrill Gorcunov 234e93ab78aSPekka Enberg if (lseek(fd, 0, SEEK_SET) < 0) 235e93ab78aSPekka Enberg die_perror("lseek"); 236e93ab78aSPekka Enberg 2374cf542bbSCyrill Gorcunov if (!boot.hdr.setup_sects) 2384cf542bbSCyrill Gorcunov boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; 23910943d14SPekka Enberg setup_sects = boot.hdr.setup_sects + 1; 24010943d14SPekka Enberg 24154d4a626SPekka Enberg setup_size = setup_sects << 9; 2426753ed2fSPekka Enberg p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); 243ae1fae34SPekka Enberg 2447fb218bdSPekka Enberg if (read(fd, p, setup_size) != setup_size) 2457fb218bdSPekka Enberg die_perror("read"); 2467fb218bdSPekka Enberg 2476753ed2fSPekka Enberg p = guest_flat_to_host(self, BZ_KERNEL_START); 248ae1fae34SPekka Enberg 249ae1fae34SPekka Enberg while ((nr = read(fd, p, 65536)) > 0) 250ae1fae34SPekka Enberg p += nr; 251ae1fae34SPekka Enberg 252debcfac0SCyrill Gorcunov if (boot.hdr.version < 0x0206) 253debcfac0SCyrill Gorcunov boot.hdr.cmdline_size = 256; 254debcfac0SCyrill Gorcunov 255debcfac0SCyrill Gorcunov if (kernel_cmdline) { 256debcfac0SCyrill Gorcunov cmdline_size = strlen(kernel_cmdline) + 1; 257debcfac0SCyrill Gorcunov if (cmdline_size > boot.hdr.cmdline_size) 258debcfac0SCyrill Gorcunov cmdline_size = boot.hdr.cmdline_size; 259ad681038SCyrill Gorcunov } else 260ad681038SCyrill Gorcunov cmdline_size = 0; 261ad681038SCyrill Gorcunov 262ad681038SCyrill Gorcunov 263ad681038SCyrill Gorcunov if (boot.hdr.version < 0x0202 || !(boot.hdr.loadflags & 0x01)) 264ad681038SCyrill Gorcunov cmdline_offset = (0x9ff0 - cmdline_size) & ~15; 265ad681038SCyrill Gorcunov else 266ad681038SCyrill Gorcunov cmdline_offset = 0x10000; 267ad681038SCyrill Gorcunov 26883f4324cSCyrill Gorcunov if (kernel_cmdline) { 26983f4324cSCyrill Gorcunov p = guest_flat_to_host(self, cmdline_offset); 27083f4324cSCyrill Gorcunov memset(p, 0, cmdline_size); 27183f4324cSCyrill Gorcunov strcpy(p, kernel_cmdline); 27283f4324cSCyrill Gorcunov } 27383f4324cSCyrill Gorcunov 274ad681038SCyrill Gorcunov if (boot.hdr.version >= 0x0200) { 275ad681038SCyrill Gorcunov if (boot.hdr.version >= 0x0202) { 276ad681038SCyrill Gorcunov boot.hdr.cmd_line_ptr = 277ad681038SCyrill Gorcunov (BOOT_LOADER_SELECTOR << 4) + cmdline_offset; 278ad681038SCyrill Gorcunov } else if (boot.hdr.version >= 0x0201) { 279ad681038SCyrill Gorcunov boot.hdr.heap_end_ptr = cmdline_offset - 0x0200; 280ad681038SCyrill Gorcunov boot.hdr.loadflags |= CAN_USE_HEAP; 281ad681038SCyrill Gorcunov } 282ad681038SCyrill Gorcunov 283debcfac0SCyrill Gorcunov } 284debcfac0SCyrill Gorcunov 285dbdb74c2SPekka Enberg self->boot_selector = BOOT_LOADER_SELECTOR; 286edc8a14dSPekka Enberg /* 287edc8a14dSPekka Enberg * The real-mode setup code starts at offset 0x200 of a bzImage. See 288edc8a14dSPekka Enberg * Documentation/x86/boot.txt for details. 289edc8a14dSPekka Enberg */ 290edc8a14dSPekka Enberg self->boot_ip = BOOT_LOADER_IP + 0x200; 291dbdb74c2SPekka Enberg self->boot_sp = BOOT_LOADER_SP; 292edc8a14dSPekka Enberg 293ea684828SCyrill Gorcunov /* 29422489bb0SCyrill Gorcunov * Setup a *fake* real mode vector table, it has only 29522489bb0SCyrill Gorcunov * one real hadler which does just iret 29622489bb0SCyrill Gorcunov * 297305d9054SCyrill Gorcunov * This is where the BIOS lives -- BDA area 298ea684828SCyrill Gorcunov */ 299305d9054SCyrill Gorcunov intr_addr = BIOS_INTR_NEXT(BDA_START + 0, 16); 300305d9054SCyrill Gorcunov p = guest_flat_to_host(self, intr_addr); 301305d9054SCyrill Gorcunov memcpy(p, intfake, intfake_size); 302bc75b0aeSCyrill Gorcunov intr = (struct real_intr_desc) { 303305d9054SCyrill Gorcunov .segment = REAL_SEGMENT(intr_addr), 30422489bb0SCyrill Gorcunov .offset = 0, 305ea684828SCyrill Gorcunov }; 306bc75b0aeSCyrill Gorcunov interrupt_table__setup(&self->interrupt_table, &intr); 307305d9054SCyrill Gorcunov 308305d9054SCyrill Gorcunov intr_addr = BIOS_INTR_NEXT(BDA_START + intfake_size, 16); 309305d9054SCyrill Gorcunov p = guest_flat_to_host(self, intr_addr); 310305d9054SCyrill Gorcunov memcpy(p, int10, int10_size); 311305d9054SCyrill Gorcunov intr = (struct real_intr_desc) { 312305d9054SCyrill Gorcunov .segment = REAL_SEGMENT(intr_addr), 313305d9054SCyrill Gorcunov .offset = 0, 314305d9054SCyrill Gorcunov }; 315305d9054SCyrill Gorcunov interrupt_table__set(&self->interrupt_table, &intr, 0x10); 316305d9054SCyrill Gorcunov 317305d9054SCyrill Gorcunov p = guest_flat_to_host(self, 0); 318bc75b0aeSCyrill Gorcunov interrupt_table__copy(&self->interrupt_table, p, REAL_INTR_SIZE); 319ea684828SCyrill Gorcunov 3207fb218bdSPekka Enberg return true; 321ae1fae34SPekka Enberg } 322ae1fae34SPekka Enberg 3236d1f350dSCyrill Gorcunov bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename, 3246d1f350dSCyrill Gorcunov const char *kernel_cmdline) 325ae1fae34SPekka Enberg { 3267fb218bdSPekka Enberg bool ret; 327ae1fae34SPekka Enberg int fd; 328ae1fae34SPekka Enberg 329ae1fae34SPekka Enberg fd = open(kernel_filename, O_RDONLY); 330ae1fae34SPekka Enberg if (fd < 0) 331ae1fae34SPekka Enberg die("unable to open kernel"); 332ae1fae34SPekka Enberg 3336d1f350dSCyrill Gorcunov ret = load_bzimage(kvm, fd, kernel_cmdline); 334009b0758SPekka Enberg if (ret) 335009b0758SPekka Enberg goto found_kernel; 336ae1fae34SPekka Enberg 337009b0758SPekka Enberg ret = load_flat_binary(kvm, fd); 338009b0758SPekka Enberg if (ret) 339009b0758SPekka Enberg goto found_kernel; 340009b0758SPekka Enberg 341009b0758SPekka Enberg die("%s is not a valid bzImage or flat binary", kernel_filename); 342009b0758SPekka Enberg 343009b0758SPekka Enberg found_kernel: 344ae1fae34SPekka Enberg return ret; 345ae1fae34SPekka Enberg } 346ae1fae34SPekka Enberg 34706e41eeaSPekka Enberg static inline uint64_t ip_flat_to_real(struct kvm *self, uint64_t ip) 34806e41eeaSPekka Enberg { 34906e41eeaSPekka Enberg uint64_t cs = self->sregs.cs.selector; 35006e41eeaSPekka Enberg 35106e41eeaSPekka Enberg return ip - (cs << 4); 35206e41eeaSPekka Enberg } 35306e41eeaSPekka Enberg 354*f01944c8SPekka Enberg static inline bool is_in_protected_mode(struct kvm *self) 355*f01944c8SPekka Enberg { 356*f01944c8SPekka Enberg return self->sregs.cr0 & 0x01; 357*f01944c8SPekka Enberg } 358*f01944c8SPekka Enberg 359f326512aSPekka Enberg static inline uint64_t ip_to_flat(struct kvm *self, uint64_t ip) 36006e41eeaSPekka Enberg { 361f326512aSPekka Enberg uint64_t cs; 362f326512aSPekka Enberg 363f326512aSPekka Enberg /* 364f326512aSPekka Enberg * NOTE! We should take code segment base address into account here. 365f326512aSPekka Enberg * Luckily it's usually zero because Linux uses flat memory model. 366f326512aSPekka Enberg */ 367*f01944c8SPekka Enberg if (is_in_protected_mode(self)) 368f326512aSPekka Enberg return ip; 369f326512aSPekka Enberg 370f326512aSPekka Enberg cs = self->sregs.cs.selector; 37106e41eeaSPekka Enberg 37206e41eeaSPekka Enberg return ip + (cs << 4); 37306e41eeaSPekka Enberg } 37406e41eeaSPekka Enberg 375dbdb74c2SPekka Enberg static inline uint32_t selector_to_base(uint16_t selector) 376dbdb74c2SPekka Enberg { 377dbdb74c2SPekka Enberg /* 378dbdb74c2SPekka Enberg * KVM on Intel requires 'base' to be 'selector * 16' in real mode. 379dbdb74c2SPekka Enberg */ 380dbdb74c2SPekka Enberg return (uint32_t)selector * 16; 381dbdb74c2SPekka Enberg } 382dbdb74c2SPekka Enberg 38353d48714SPekka Enberg static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) 38453d48714SPekka Enberg { 38553d48714SPekka Enberg struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs)); 38653d48714SPekka Enberg 38753d48714SPekka Enberg if (!self) 38853d48714SPekka Enberg die("out of memory"); 38953d48714SPekka Enberg 39053d48714SPekka Enberg return self; 39153d48714SPekka Enberg } 39253d48714SPekka Enberg 39353d48714SPekka Enberg #define MSR_IA32_TIME_STAMP_COUNTER 0x10 39453d48714SPekka Enberg 39553d48714SPekka Enberg #define MSR_IA32_SYSENTER_CS 0x174 39653d48714SPekka Enberg #define MSR_IA32_SYSENTER_ESP 0x175 39753d48714SPekka Enberg #define MSR_IA32_SYSENTER_EIP 0x176 39853d48714SPekka Enberg 39953d48714SPekka Enberg #define MSR_IA32_STAR 0xc0000081 40053d48714SPekka Enberg #define MSR_IA32_LSTAR 0xc0000082 40153d48714SPekka Enberg #define MSR_IA32_CSTAR 0xc0000083 40253d48714SPekka Enberg #define MSR_IA32_FMASK 0xc0000084 40353d48714SPekka Enberg #define MSR_IA32_KERNEL_GS_BASE 0xc0000102 40453d48714SPekka Enberg 40553d48714SPekka Enberg #define KVM_MSR_ENTRY(_index, _data) \ 40653d48714SPekka Enberg (struct kvm_msr_entry) { .index = _index, .data = _data } 40753d48714SPekka Enberg 40853d48714SPekka Enberg static void kvm__setup_msrs(struct kvm *self) 40953d48714SPekka Enberg { 41053d48714SPekka Enberg unsigned long ndx = 0; 41153d48714SPekka Enberg 41253d48714SPekka Enberg self->msrs = kvm_msrs__new(100); 41353d48714SPekka Enberg 41453d48714SPekka Enberg self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); 41553d48714SPekka Enberg self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); 41653d48714SPekka Enberg self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); 41753d48714SPekka Enberg self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR, 0x0); 41853d48714SPekka Enberg self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR, 0x0); 41953d48714SPekka Enberg self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE, 0x0); 42053d48714SPekka Enberg self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK, 0x0); 42153d48714SPekka Enberg self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR, 0x0); 42253d48714SPekka Enberg self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER, 0x0); 42353d48714SPekka Enberg 42453d48714SPekka Enberg self->msrs->nmsrs = ndx; 42553d48714SPekka Enberg 42653d48714SPekka Enberg if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0) 42753d48714SPekka Enberg die_perror("KVM_SET_MSRS failed"); 42853d48714SPekka Enberg } 42953d48714SPekka Enberg 4303d78d3a5SPekka Enberg static void kvm__setup_fpu(struct kvm *self) 4313d78d3a5SPekka Enberg { 4323d78d3a5SPekka Enberg self->fpu = (struct kvm_fpu) { 4333d78d3a5SPekka Enberg .fcw = 0x37f, 4343d78d3a5SPekka Enberg .mxcsr = 0x1f80, 4353d78d3a5SPekka Enberg }; 4363d78d3a5SPekka Enberg 4373d78d3a5SPekka Enberg if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0) 4383d78d3a5SPekka Enberg die_perror("KVM_SET_FPU failed"); 4393d78d3a5SPekka Enberg } 4403d78d3a5SPekka Enberg 4413d78d3a5SPekka Enberg static void kvm__setup_regs(struct kvm *self) 4423d78d3a5SPekka Enberg { 4433d78d3a5SPekka Enberg self->regs = (struct kvm_regs) { 4443d78d3a5SPekka Enberg /* We start the guest in 16-bit real mode */ 4453d78d3a5SPekka Enberg .rflags = 0x0000000000000002ULL, 4463d78d3a5SPekka Enberg 4473d78d3a5SPekka Enberg .rip = self->boot_ip, 4483d78d3a5SPekka Enberg .rsp = self->boot_sp, 4493d78d3a5SPekka Enberg .rbp = self->boot_sp, 4503d78d3a5SPekka Enberg }; 4513d78d3a5SPekka Enberg 4523d78d3a5SPekka Enberg if (self->regs.rip > USHRT_MAX) 4533d78d3a5SPekka Enberg die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip); 4543d78d3a5SPekka Enberg 4553d78d3a5SPekka Enberg if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0) 4563d78d3a5SPekka Enberg die_perror("KVM_SET_REGS failed"); 4573d78d3a5SPekka Enberg } 4583d78d3a5SPekka Enberg 4593d78d3a5SPekka Enberg static void kvm__setup_sregs(struct kvm *self) 460ae1fae34SPekka Enberg { 46153602077SPekka Enberg self->sregs = (struct kvm_sregs) { 46253602077SPekka Enberg .cr0 = 0x60000010ULL, 46353602077SPekka Enberg .cs = (struct kvm_segment) { 464dbdb74c2SPekka Enberg .selector = self->boot_selector, 465dbdb74c2SPekka Enberg .base = selector_to_base(self->boot_selector), 46653602077SPekka Enberg .limit = 0xffffU, 46753602077SPekka Enberg .type = 0x0bU, 46853602077SPekka Enberg .present = 1, 46953602077SPekka Enberg .dpl = 0x03, 47053602077SPekka Enberg .s = 1, 47153602077SPekka Enberg }, 47253602077SPekka Enberg .ss = (struct kvm_segment) { 473dbdb74c2SPekka Enberg .selector = self->boot_selector, 474dbdb74c2SPekka Enberg .base = selector_to_base(self->boot_selector), 47553602077SPekka Enberg .limit = 0xffffU, 47653602077SPekka Enberg .type = 0x03U, 47753602077SPekka Enberg .present = 1, 47853602077SPekka Enberg .dpl = 0x03, 47953602077SPekka Enberg .s = 1, 48053602077SPekka Enberg }, 48153602077SPekka Enberg .ds = (struct kvm_segment) { 482dbdb74c2SPekka Enberg .selector = self->boot_selector, 483dbdb74c2SPekka Enberg .base = selector_to_base(self->boot_selector), 48453602077SPekka Enberg .limit = 0xffffU, 48553602077SPekka Enberg .type = 0x03U, 48653602077SPekka Enberg .present = 1, 48753602077SPekka Enberg .dpl = 0x03, 48853602077SPekka Enberg .s = 1, 48953602077SPekka Enberg }, 49053602077SPekka Enberg .es = (struct kvm_segment) { 491dbdb74c2SPekka Enberg .selector = self->boot_selector, 492dbdb74c2SPekka Enberg .base = selector_to_base(self->boot_selector), 49353602077SPekka Enberg .limit = 0xffffU, 49453602077SPekka Enberg .type = 0x03U, 49553602077SPekka Enberg .present = 1, 49653602077SPekka Enberg .dpl = 0x03, 49753602077SPekka Enberg .s = 1, 49853602077SPekka Enberg }, 49953602077SPekka Enberg .fs = (struct kvm_segment) { 500dbdb74c2SPekka Enberg .selector = self->boot_selector, 501dbdb74c2SPekka Enberg .base = selector_to_base(self->boot_selector), 50253602077SPekka Enberg .limit = 0xffffU, 50353602077SPekka Enberg .type = 0x03U, 50453602077SPekka Enberg .present = 1, 50553602077SPekka Enberg .dpl = 0x03, 50653602077SPekka Enberg .s = 1, 50753602077SPekka Enberg }, 50853602077SPekka Enberg .gs = (struct kvm_segment) { 509dbdb74c2SPekka Enberg .selector = self->boot_selector, 510dbdb74c2SPekka Enberg .base = selector_to_base(self->boot_selector), 51153602077SPekka Enberg .limit = 0xffffU, 51253602077SPekka Enberg .type = 0x03U, 51353602077SPekka Enberg .present = 1, 51453602077SPekka Enberg .dpl = 0x03, 51553602077SPekka Enberg .s = 1, 51653602077SPekka Enberg }, 51753602077SPekka Enberg .tr = (struct kvm_segment) { 51853602077SPekka Enberg .limit = 0xffffU, 51953602077SPekka Enberg .present = 1, 52053602077SPekka Enberg .type = 0x03U, 52153602077SPekka Enberg }, 52253602077SPekka Enberg .ldt = (struct kvm_segment) { 52353602077SPekka Enberg .limit = 0xffffU, 52453602077SPekka Enberg .present = 1, 5257085d935SPekka Enberg .type = 0x02U, 52653602077SPekka Enberg }, 52753602077SPekka Enberg .gdt = (struct kvm_dtable) { 52853602077SPekka Enberg .limit = 0xffffU, 52953602077SPekka Enberg }, 53053602077SPekka Enberg .idt = (struct kvm_dtable) { 53153602077SPekka Enberg .limit = 0xffffU, 53253602077SPekka Enberg }, 53353602077SPekka Enberg }; 53453602077SPekka Enberg 53553602077SPekka Enberg if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0) 53653602077SPekka Enberg die_perror("KVM_SET_SREGS failed"); 5373d78d3a5SPekka Enberg } 53806e41eeaSPekka Enberg 5393d78d3a5SPekka Enberg void kvm__reset_vcpu(struct kvm *self) 5403d78d3a5SPekka Enberg { 5413d78d3a5SPekka Enberg kvm__setup_sregs(self); 542dbdb74c2SPekka Enberg 5433d78d3a5SPekka Enberg kvm__setup_regs(self); 54406e41eeaSPekka Enberg 5453d78d3a5SPekka Enberg kvm__setup_fpu(self); 54653d48714SPekka Enberg 54753d48714SPekka Enberg kvm__setup_msrs(self); 548ae1fae34SPekka Enberg } 549ae1fae34SPekka Enberg 550ae1fae34SPekka Enberg void kvm__run(struct kvm *self) 551ae1fae34SPekka Enberg { 552ae1fae34SPekka Enberg if (ioctl(self->vcpu_fd, KVM_RUN, 0) < 0) 553ae1fae34SPekka Enberg die_perror("KVM_RUN failed"); 554ae1fae34SPekka Enberg } 555ae1fae34SPekka Enberg 5568351aaddSPekka Enberg static void print_dtable(const char *name, struct kvm_dtable *dtable) 5578351aaddSPekka Enberg { 5588351aaddSPekka Enberg printf(" %s %016" PRIx64 " %08" PRIx16 "\n", 5598351aaddSPekka Enberg name, (uint64_t) dtable->base, (uint16_t) dtable->limit); 5608351aaddSPekka Enberg } 5618351aaddSPekka Enberg 56253602077SPekka Enberg static void print_segment(const char *name, struct kvm_segment *seg) 56353602077SPekka Enberg { 564ce556636SPekka Enberg printf(" %s %04" PRIx16 " %016" PRIx64 " %08" PRIx32 " %02" PRIx8 " %x %x %x %x %x %x %x\n", 56553602077SPekka Enberg name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit, 566ce556636SPekka Enberg (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); 56753602077SPekka Enberg } 56853602077SPekka Enberg 569ae1fae34SPekka Enberg void kvm__show_registers(struct kvm *self) 5707118d2caSPekka Enberg { 57153602077SPekka Enberg unsigned long cr0, cr2, cr3; 57253602077SPekka Enberg unsigned long cr4, cr8; 5737118d2caSPekka Enberg unsigned long rax, rbx, rcx; 5747118d2caSPekka Enberg unsigned long rdx, rsi, rdi; 5757118d2caSPekka Enberg unsigned long rbp, r8, r9; 5767118d2caSPekka Enberg unsigned long r10, r11, r12; 5777118d2caSPekka Enberg unsigned long r13, r14, r15; 5787118d2caSPekka Enberg unsigned long rip, rsp; 57953602077SPekka Enberg struct kvm_sregs sregs; 580a2fe6199SPekka Enberg unsigned long rflags; 5817118d2caSPekka Enberg struct kvm_regs regs; 582ce5e0ecbSPekka Enberg int i; 5837118d2caSPekka Enberg 5847118d2caSPekka Enberg if (ioctl(self->vcpu_fd, KVM_GET_REGS, ®s) < 0) 5857118d2caSPekka Enberg die("KVM_GET_REGS failed"); 5867118d2caSPekka Enberg 587a2fe6199SPekka Enberg rflags = regs.rflags; 588a2fe6199SPekka Enberg 5897118d2caSPekka Enberg rip = regs.rip; rsp = regs.rsp; 5907118d2caSPekka Enberg rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; 5917118d2caSPekka Enberg rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; 5927118d2caSPekka Enberg rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; 5937118d2caSPekka Enberg r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; 5947118d2caSPekka Enberg r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; 5957118d2caSPekka Enberg 5967118d2caSPekka Enberg printf("Registers:\n"); 5972177ec43SPekka Enberg printf(" rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); 598ea2e4ea0SCyrill Gorcunov printf(" rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); 5997118d2caSPekka Enberg printf(" rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); 6007118d2caSPekka Enberg printf(" rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); 6017118d2caSPekka Enberg printf(" r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); 6027118d2caSPekka Enberg printf(" r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); 60353602077SPekka Enberg 60453602077SPekka Enberg if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) 60553602077SPekka Enberg die("KVM_GET_REGS failed"); 60653602077SPekka Enberg 60753602077SPekka Enberg cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; 60853602077SPekka Enberg cr4 = sregs.cr4; cr8 = sregs.cr8; 60953602077SPekka Enberg 61053602077SPekka Enberg printf(" cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); 61153602077SPekka Enberg printf(" cr4: %016lx cr8: %016lx\n", cr4, cr8); 612ce5e0ecbSPekka Enberg printf("Segment registers:\n"); 613ce556636SPekka Enberg printf(" register selector base limit type p dpl db s l g avl\n"); 61453602077SPekka Enberg print_segment("cs ", &sregs.cs); 61553602077SPekka Enberg print_segment("ss ", &sregs.ss); 61653602077SPekka Enberg print_segment("ds ", &sregs.ds); 61753602077SPekka Enberg print_segment("es ", &sregs.es); 61853602077SPekka Enberg print_segment("fs ", &sregs.fs); 61953602077SPekka Enberg print_segment("gs ", &sregs.gs); 620ce5e0ecbSPekka Enberg print_segment("tr ", &sregs.tr); 621ce5e0ecbSPekka Enberg print_segment("ldt", &sregs.ldt); 6228351aaddSPekka Enberg print_dtable("gdt", &sregs.gdt); 6238351aaddSPekka Enberg print_dtable("idt", &sregs.idt); 6242049569dSPekka Enberg printf(" [ efer: %016lx apic base: %016lx nmi: %s ]\n", (uint64_t) sregs.efer, (uint64_t) sregs.apic_base, 6252049569dSPekka Enberg (self->nmi_disabled ? "disabled" : "enabled")); 626ce5e0ecbSPekka Enberg printf("Interrupt bitmap:\n"); 627ce5e0ecbSPekka Enberg printf(" "); 628ce5e0ecbSPekka Enberg for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) 629ce5e0ecbSPekka Enberg printf("%016lx ", (uint64_t) sregs.interrupt_bitmap[i]); 630ce5e0ecbSPekka Enberg printf("\n"); 6317118d2caSPekka Enberg } 6327118d2caSPekka Enberg 633ae1fae34SPekka Enberg void kvm__show_code(struct kvm *self) 6346f10be05SPekka Enberg { 6356f10be05SPekka Enberg unsigned int code_bytes = 64; 6366f10be05SPekka Enberg unsigned int code_prologue = code_bytes * 43 / 64; 6376f10be05SPekka Enberg unsigned int code_len = code_bytes; 6386f10be05SPekka Enberg unsigned char c; 639ae1fae34SPekka Enberg unsigned int i; 6406f10be05SPekka Enberg uint8_t *ip; 6416f10be05SPekka Enberg 6422a601aafSPekka Enberg if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0) 6432a601aafSPekka Enberg die("KVM_GET_REGS failed"); 6442a601aafSPekka Enberg 6452a601aafSPekka Enberg if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 6462a601aafSPekka Enberg die("KVM_GET_SREGS failed"); 6472a601aafSPekka Enberg 648f326512aSPekka Enberg ip = guest_flat_to_host(self, ip_to_flat(self, self->regs.rip) - code_prologue); 6496f10be05SPekka Enberg 6506f10be05SPekka Enberg printf("Code: "); 6516f10be05SPekka Enberg 6526f10be05SPekka Enberg for (i = 0; i < code_len; i++, ip++) { 6536e8abc38SPekka Enberg c = *ip; 6546e8abc38SPekka Enberg 655f326512aSPekka Enberg if (ip == guest_flat_to_host(self, ip_to_flat(self, self->regs.rip))) 6566f10be05SPekka Enberg printf("<%02x> ", c); 6576f10be05SPekka Enberg else 6586f10be05SPekka Enberg printf("%02x ", c); 6596f10be05SPekka Enberg } 6606f10be05SPekka Enberg 6616f10be05SPekka Enberg printf("\n"); 6622a7d39a4SPekka Enberg 6632a7d39a4SPekka Enberg printf("Stack:\n"); 6642a7d39a4SPekka Enberg kvm__dump_mem(self, self->regs.rsp, 32); 6656f10be05SPekka Enberg } 666090f898eSCyrill Gorcunov 667*f01944c8SPekka Enberg void kvm__show_page_tables(struct kvm *self) 668*f01944c8SPekka Enberg { 669*f01944c8SPekka Enberg uint64_t *pte1; 670*f01944c8SPekka Enberg uint64_t *pte2; 671*f01944c8SPekka Enberg uint64_t *pte3; 672*f01944c8SPekka Enberg uint64_t *pte4; 673*f01944c8SPekka Enberg 674*f01944c8SPekka Enberg if (!is_in_protected_mode(self)) 675*f01944c8SPekka Enberg return; 676*f01944c8SPekka Enberg 677*f01944c8SPekka Enberg if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 678*f01944c8SPekka Enberg die("KVM_GET_SREGS failed"); 679*f01944c8SPekka Enberg 680*f01944c8SPekka Enberg pte4 = guest_flat_to_host(self, self->sregs.cr3); 681*f01944c8SPekka Enberg pte3 = guest_flat_to_host(self, (*pte4 & ~0xfff)); 682*f01944c8SPekka Enberg pte2 = guest_flat_to_host(self, (*pte3 & ~0xfff)); 683*f01944c8SPekka Enberg pte1 = guest_flat_to_host(self, (*pte2 & ~0xfff)); 684*f01944c8SPekka Enberg 685*f01944c8SPekka Enberg printf("Page Tables:\n"); 686*f01944c8SPekka Enberg if (*pte2 & (1 << 7)) 687*f01944c8SPekka Enberg printf(" pte4: %016lx pte3: %016lx pte2: %016lx\n", *pte4, *pte3, *pte2); 688*f01944c8SPekka Enberg else 689*f01944c8SPekka Enberg printf(" pte4: %016lx pte3: %016lx pte2: %016lx pte1: %016lx\n", *pte4, *pte3, *pte2, *pte1); 690*f01944c8SPekka Enberg } 691*f01944c8SPekka Enberg 692090f898eSCyrill Gorcunov void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size) 693090f898eSCyrill Gorcunov { 694090f898eSCyrill Gorcunov unsigned char *p; 695090f898eSCyrill Gorcunov unsigned long n; 696090f898eSCyrill Gorcunov 697090f898eSCyrill Gorcunov size &= ~7; /* mod 8 */ 698090f898eSCyrill Gorcunov if (!size) 699090f898eSCyrill Gorcunov return; 700090f898eSCyrill Gorcunov 7012a7d39a4SPekka Enberg p = guest_flat_to_host(self, addr); 702090f898eSCyrill Gorcunov 703090f898eSCyrill Gorcunov for (n = 0; n < size; n+=8) 704090f898eSCyrill Gorcunov printf(" 0x%08lx: %02x %02x %02x %02x %02x %02x %02x %02x\n", 705090f898eSCyrill Gorcunov addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3], 706090f898eSCyrill Gorcunov p[n + 4], p[n + 5], p[n + 6], p[n + 7]); 707090f898eSCyrill Gorcunov } 708