1 #include "kvm/kvm-cpu.h" 2 3 #include "kvm/virtio-console.h" 4 #include "kvm/8250-serial.h" 5 #include "kvm/util.h" 6 #include "kvm/kvm.h" 7 8 #include <sys/ioctl.h> 9 #include <sys/mman.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 #include <stdio.h> 13 14 static inline bool is_in_protected_mode(struct kvm_cpu *self) 15 { 16 return self->sregs.cr0 & 0x01; 17 } 18 19 static inline uint64_t ip_to_flat(struct kvm_cpu *self, uint64_t ip) 20 { 21 uint64_t cs; 22 23 /* 24 * NOTE! We should take code segment base address into account here. 25 * Luckily it's usually zero because Linux uses flat memory model. 26 */ 27 if (is_in_protected_mode(self)) 28 return ip; 29 30 cs = self->sregs.cs.selector; 31 32 return ip + (cs << 4); 33 } 34 35 static inline uint32_t selector_to_base(uint16_t selector) 36 { 37 /* 38 * KVM on Intel requires 'base' to be 'selector * 16' in real mode. 39 */ 40 return (uint32_t)selector * 16; 41 } 42 43 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) 44 { 45 struct kvm_cpu *self; 46 47 self = calloc(1, sizeof *self); 48 if (!self) 49 return NULL; 50 51 self->kvm = kvm; 52 53 return self; 54 } 55 56 void kvm_cpu__delete(struct kvm_cpu *self) 57 { 58 if (self->msrs) 59 free(self->msrs); 60 61 free(self); 62 } 63 64 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm) 65 { 66 struct kvm_cpu *self; 67 int mmap_size; 68 69 self = kvm_cpu__new(kvm); 70 if (!self) 71 return NULL; 72 73 self->vcpu_fd = ioctl(self->kvm->vm_fd, KVM_CREATE_VCPU, 0); 74 if (self->vcpu_fd < 0) 75 die_perror("KVM_CREATE_VCPU ioctl"); 76 77 mmap_size = ioctl(self->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); 78 if (mmap_size < 0) 79 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); 80 81 self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); 82 if (self->kvm_run == MAP_FAILED) 83 die("unable to mmap vcpu fd"); 84 85 return self; 86 } 87 88 void kvm_cpu__enable_singlestep(struct kvm_cpu *self) 89 { 90 struct kvm_guest_debug debug = { 91 .control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, 92 }; 93 94 if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0) 95 warning("KVM_SET_GUEST_DEBUG failed"); 96 } 97 98 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) 99 { 100 struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs)); 101 102 if (!self) 103 die("out of memory"); 104 105 return self; 106 } 107 108 #define MSR_IA32_TIME_STAMP_COUNTER 0x10 109 110 #define MSR_IA32_SYSENTER_CS 0x174 111 #define MSR_IA32_SYSENTER_ESP 0x175 112 #define MSR_IA32_SYSENTER_EIP 0x176 113 114 #define MSR_IA32_STAR 0xc0000081 115 #define MSR_IA32_LSTAR 0xc0000082 116 #define MSR_IA32_CSTAR 0xc0000083 117 #define MSR_IA32_FMASK 0xc0000084 118 #define MSR_IA32_KERNEL_GS_BASE 0xc0000102 119 120 #define KVM_MSR_ENTRY(_index, _data) \ 121 (struct kvm_msr_entry) { .index = _index, .data = _data } 122 123 static void kvm_cpu__setup_msrs(struct kvm_cpu *self) 124 { 125 unsigned long ndx = 0; 126 127 self->msrs = kvm_msrs__new(100); 128 129 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); 130 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); 131 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); 132 #ifdef CONFIG_X86_64 133 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR, 0x0); 134 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR, 0x0); 135 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE, 0x0); 136 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK, 0x0); 137 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR, 0x0); 138 #endif 139 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER, 0x0); 140 141 self->msrs->nmsrs = ndx; 142 143 if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0) 144 die_perror("KVM_SET_MSRS failed"); 145 } 146 147 static void kvm_cpu__setup_fpu(struct kvm_cpu *self) 148 { 149 self->fpu = (struct kvm_fpu) { 150 .fcw = 0x37f, 151 .mxcsr = 0x1f80, 152 }; 153 154 if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0) 155 die_perror("KVM_SET_FPU failed"); 156 } 157 158 static void kvm_cpu__setup_regs(struct kvm_cpu *self) 159 { 160 self->regs = (struct kvm_regs) { 161 /* We start the guest in 16-bit real mode */ 162 .rflags = 0x0000000000000002ULL, 163 164 .rip = self->kvm->boot_ip, 165 .rsp = self->kvm->boot_sp, 166 .rbp = self->kvm->boot_sp, 167 }; 168 169 if (self->regs.rip > USHRT_MAX) 170 die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip); 171 172 if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0) 173 die_perror("KVM_SET_REGS failed"); 174 } 175 176 static void kvm_cpu__setup_sregs(struct kvm_cpu *self) 177 { 178 179 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 180 die_perror("KVM_GET_SREGS failed"); 181 182 self->sregs.cs.selector = self->kvm->boot_selector; 183 self->sregs.cs.base = selector_to_base(self->kvm->boot_selector); 184 self->sregs.ss.selector = self->kvm->boot_selector; 185 self->sregs.ss.base = selector_to_base(self->kvm->boot_selector); 186 self->sregs.ds.selector = self->kvm->boot_selector; 187 self->sregs.ds.base = selector_to_base(self->kvm->boot_selector); 188 self->sregs.es.selector = self->kvm->boot_selector; 189 self->sregs.es.base = selector_to_base(self->kvm->boot_selector); 190 self->sregs.fs.selector = self->kvm->boot_selector; 191 self->sregs.fs.base = selector_to_base(self->kvm->boot_selector); 192 self->sregs.gs.selector = self->kvm->boot_selector; 193 self->sregs.gs.base = selector_to_base(self->kvm->boot_selector); 194 195 if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0) 196 die_perror("KVM_SET_SREGS failed"); 197 } 198 199 /** 200 * kvm_cpu__reset_vcpu - reset virtual CPU to a known state 201 */ 202 void kvm_cpu__reset_vcpu(struct kvm_cpu *self) 203 { 204 kvm_cpu__setup_sregs(self); 205 kvm_cpu__setup_regs(self); 206 kvm_cpu__setup_fpu(self); 207 kvm_cpu__setup_msrs(self); 208 } 209 210 static void print_dtable(const char *name, struct kvm_dtable *dtable) 211 { 212 printf(" %s %016" PRIx64 " %08" PRIx16 "\n", 213 name, (uint64_t) dtable->base, (uint16_t) dtable->limit); 214 } 215 216 static void print_segment(const char *name, struct kvm_segment *seg) 217 { 218 printf(" %s %04" PRIx16 " %016" PRIx64 " %08" PRIx32 " %02" PRIx8 " %x %x %x %x %x %x %x\n", 219 name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit, 220 (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); 221 } 222 223 void kvm_cpu__show_registers(struct kvm_cpu *self) 224 { 225 unsigned long cr0, cr2, cr3; 226 unsigned long cr4, cr8; 227 unsigned long rax, rbx, rcx; 228 unsigned long rdx, rsi, rdi; 229 unsigned long rbp, r8, r9; 230 unsigned long r10, r11, r12; 231 unsigned long r13, r14, r15; 232 unsigned long rip, rsp; 233 struct kvm_sregs sregs; 234 unsigned long rflags; 235 struct kvm_regs regs; 236 int i; 237 238 if (ioctl(self->vcpu_fd, KVM_GET_REGS, ®s) < 0) 239 die("KVM_GET_REGS failed"); 240 241 rflags = regs.rflags; 242 243 rip = regs.rip; rsp = regs.rsp; 244 rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; 245 rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; 246 rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; 247 r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; 248 r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; 249 250 printf("Registers:\n"); 251 printf(" rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); 252 printf(" rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); 253 printf(" rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); 254 printf(" rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); 255 printf(" r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); 256 printf(" r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); 257 258 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) 259 die("KVM_GET_REGS failed"); 260 261 cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; 262 cr4 = sregs.cr4; cr8 = sregs.cr8; 263 264 printf(" cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); 265 printf(" cr4: %016lx cr8: %016lx\n", cr4, cr8); 266 printf("Segment registers:\n"); 267 printf(" register selector base limit type p dpl db s l g avl\n"); 268 print_segment("cs ", &sregs.cs); 269 print_segment("ss ", &sregs.ss); 270 print_segment("ds ", &sregs.ds); 271 print_segment("es ", &sregs.es); 272 print_segment("fs ", &sregs.fs); 273 print_segment("gs ", &sregs.gs); 274 print_segment("tr ", &sregs.tr); 275 print_segment("ldt", &sregs.ldt); 276 print_dtable("gdt", &sregs.gdt); 277 print_dtable("idt", &sregs.idt); 278 printf(" [ efer: %016" PRIx64 " apic base: %016" PRIx64 " nmi: %s ]\n", 279 (uint64_t) sregs.efer, (uint64_t) sregs.apic_base, 280 (self->kvm->nmi_disabled ? "disabled" : "enabled")); 281 printf("Interrupt bitmap:\n"); 282 printf(" "); 283 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) 284 printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]); 285 printf("\n"); 286 } 287 288 void kvm_cpu__show_code(struct kvm_cpu *self) 289 { 290 unsigned int code_bytes = 64; 291 unsigned int code_prologue = code_bytes * 43 / 64; 292 unsigned int code_len = code_bytes; 293 unsigned char c; 294 unsigned int i; 295 uint8_t *ip; 296 297 if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0) 298 die("KVM_GET_REGS failed"); 299 300 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 301 die("KVM_GET_SREGS failed"); 302 303 ip = guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip) - code_prologue); 304 305 printf("Code: "); 306 307 for (i = 0; i < code_len; i++, ip++) { 308 if (!host_ptr_in_ram(self->kvm, ip)) 309 break; 310 311 c = *ip; 312 313 if (ip == guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip))) 314 printf("<%02x> ", c); 315 else 316 printf("%02x ", c); 317 } 318 319 printf("\n"); 320 321 printf("Stack:\n"); 322 kvm__dump_mem(self->kvm, self->regs.rsp, 32); 323 } 324 325 void kvm_cpu__show_page_tables(struct kvm_cpu *self) 326 { 327 uint64_t *pte1; 328 uint64_t *pte2; 329 uint64_t *pte3; 330 uint64_t *pte4; 331 332 if (!is_in_protected_mode(self)) 333 return; 334 335 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 336 die("KVM_GET_SREGS failed"); 337 338 pte4 = guest_flat_to_host(self->kvm, self->sregs.cr3); 339 if (!host_ptr_in_ram(self->kvm, pte4)) 340 return; 341 342 pte3 = guest_flat_to_host(self->kvm, (*pte4 & ~0xfff)); 343 if (!host_ptr_in_ram(self->kvm, pte3)) 344 return; 345 346 pte2 = guest_flat_to_host(self->kvm, (*pte3 & ~0xfff)); 347 if (!host_ptr_in_ram(self->kvm, pte2)) 348 return; 349 350 pte1 = guest_flat_to_host(self->kvm, (*pte2 & ~0xfff)); 351 if (!host_ptr_in_ram(self->kvm, pte1)) 352 return; 353 354 printf("Page Tables:\n"); 355 if (*pte2 & (1 << 7)) 356 printf(" pte4: %016" PRIx64 " pte3: %016" PRIx64 357 " pte2: %016" PRIx64 "\n", 358 *pte4, *pte3, *pte2); 359 else 360 printf(" pte4: %016" PRIx64 " pte3: %016" PRIx64 " pte2: %016" 361 PRIx64 " pte1: %016" PRIx64 "\n", 362 *pte4, *pte3, *pte2, *pte1); 363 } 364 365 void kvm_cpu__run(struct kvm_cpu *self) 366 { 367 int err; 368 369 err = ioctl(self->vcpu_fd, KVM_RUN, 0); 370 if (err && (errno != EINTR && errno != EAGAIN)) 371 die_perror("KVM_RUN failed"); 372 } 373 374 int kvm_cpu__start(struct kvm_cpu *cpu) 375 { 376 kvm_cpu__setup_cpuid(cpu); 377 kvm_cpu__reset_vcpu(cpu); 378 379 for (;;) { 380 kvm_cpu__run(cpu); 381 382 switch (cpu->kvm_run->exit_reason) { 383 case KVM_EXIT_DEBUG: 384 kvm_cpu__show_registers(cpu); 385 kvm_cpu__show_code(cpu); 386 break; 387 case KVM_EXIT_IO: { 388 bool ret; 389 390 ret = kvm__emulate_io(cpu->kvm, 391 cpu->kvm_run->io.port, 392 (uint8_t *)cpu->kvm_run + 393 cpu->kvm_run->io.data_offset, 394 cpu->kvm_run->io.direction, 395 cpu->kvm_run->io.size, 396 cpu->kvm_run->io.count); 397 398 if (!ret) 399 goto panic_kvm; 400 break; 401 } 402 case KVM_EXIT_MMIO: { 403 bool ret; 404 405 ret = kvm__emulate_mmio(cpu->kvm, 406 cpu->kvm_run->mmio.phys_addr, 407 cpu->kvm_run->mmio.data, 408 cpu->kvm_run->mmio.len, 409 cpu->kvm_run->mmio.is_write); 410 411 if (!ret) 412 goto panic_kvm; 413 break; 414 } 415 case KVM_EXIT_INTR: { 416 serial8250__inject_interrupt(cpu->kvm); 417 virtio_console__inject_interrupt(cpu->kvm); 418 break; 419 } 420 case KVM_EXIT_SHUTDOWN: 421 goto exit_kvm; 422 default: 423 goto panic_kvm; 424 } 425 } 426 427 exit_kvm: 428 return 0; 429 430 panic_kvm: 431 return 1; 432 } 433