1 #include "kvm/kvm-cpu.h" 2 3 #include "kvm/symbol.h" 4 #include "kvm/util.h" 5 #include "kvm/kvm.h" 6 7 #include <asm/msr-index.h> 8 9 #include <sys/ioctl.h> 10 #include <sys/mman.h> 11 #include <signal.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <errno.h> 15 #include <stdio.h> 16 17 #define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) 18 19 extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS]; 20 extern __thread struct kvm_cpu *current_kvm_cpu; 21 22 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) 23 { 24 return vcpu->sregs.cr0 & 0x01; 25 } 26 27 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) 28 { 29 u64 cs; 30 31 /* 32 * NOTE! We should take code segment base address into account here. 33 * Luckily it's usually zero because Linux uses flat memory model. 34 */ 35 if (is_in_protected_mode(vcpu)) 36 return ip; 37 38 cs = vcpu->sregs.cs.selector; 39 40 return ip + (cs << 4); 41 } 42 43 static inline u32 selector_to_base(u16 selector) 44 { 45 /* 46 * KVM on Intel requires 'base' to be 'selector * 16' in real mode. 47 */ 48 return (u32)selector * 16; 49 } 50 51 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) 52 { 53 struct kvm_cpu *vcpu; 54 55 vcpu = calloc(1, sizeof *vcpu); 56 if (!vcpu) 57 return NULL; 58 59 vcpu->kvm = kvm; 60 61 return vcpu; 62 } 63 64 void kvm_cpu__delete(struct kvm_cpu *vcpu) 65 { 66 if (vcpu->msrs) 67 free(vcpu->msrs); 68 69 free(vcpu); 70 } 71 72 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) 73 { 74 struct kvm_cpu *vcpu; 75 int mmap_size; 76 int coalesced_offset; 77 78 vcpu = kvm_cpu__new(kvm); 79 if (!vcpu) 80 return NULL; 81 82 vcpu->cpu_id = cpu_id; 83 84 vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); 85 if (vcpu->vcpu_fd < 0) 86 die_perror("KVM_CREATE_VCPU ioctl"); 87 88 mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); 89 if (mmap_size < 0) 90 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); 91 92 vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); 93 if (vcpu->kvm_run == MAP_FAILED) 94 die("unable to mmap vcpu fd"); 95 96 coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); 97 if (coalesced_offset) 98 vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); 99 100 vcpu->is_running = true; 101 102 return vcpu; 103 } 104 105 void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu) 106 { 107 struct kvm_guest_debug debug = { 108 .control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, 109 }; 110 111 if (ioctl(vcpu->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0) 112 pr_warning("KVM_SET_GUEST_DEBUG failed"); 113 } 114 115 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) 116 { 117 struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); 118 119 if (!vcpu) 120 die("out of memory"); 121 122 return vcpu; 123 } 124 125 #define KVM_MSR_ENTRY(_index, _data) \ 126 (struct kvm_msr_entry) { .index = _index, .data = _data } 127 128 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) 129 { 130 unsigned long ndx = 0; 131 132 vcpu->msrs = kvm_msrs__new(100); 133 134 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); 135 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); 136 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); 137 #ifdef CONFIG_X86_64 138 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); 139 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); 140 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); 141 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); 142 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); 143 #endif 144 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); 145 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, 146 MSR_IA32_MISC_ENABLE_FAST_STRING); 147 148 vcpu->msrs->nmsrs = ndx; 149 150 if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) 151 die_perror("KVM_SET_MSRS failed"); 152 } 153 154 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) 155 { 156 vcpu->fpu = (struct kvm_fpu) { 157 .fcw = 0x37f, 158 .mxcsr = 0x1f80, 159 }; 160 161 if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) 162 die_perror("KVM_SET_FPU failed"); 163 } 164 165 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) 166 { 167 vcpu->regs = (struct kvm_regs) { 168 /* We start the guest in 16-bit real mode */ 169 .rflags = 0x0000000000000002ULL, 170 171 .rip = vcpu->kvm->boot_ip, 172 .rsp = vcpu->kvm->boot_sp, 173 .rbp = vcpu->kvm->boot_sp, 174 }; 175 176 if (vcpu->regs.rip > USHRT_MAX) 177 die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); 178 179 if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) 180 die_perror("KVM_SET_REGS failed"); 181 } 182 183 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) 184 { 185 186 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 187 die_perror("KVM_GET_SREGS failed"); 188 189 vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; 190 vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); 191 vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; 192 vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); 193 vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; 194 vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); 195 vcpu->sregs.es.selector = vcpu->kvm->boot_selector; 196 vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); 197 vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; 198 vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); 199 vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; 200 vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); 201 202 if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) 203 die_perror("KVM_SET_SREGS failed"); 204 } 205 206 /** 207 * kvm_cpu__reset_vcpu - reset virtual CPU to a known state 208 */ 209 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) 210 { 211 kvm_cpu__setup_sregs(vcpu); 212 kvm_cpu__setup_regs(vcpu); 213 kvm_cpu__setup_fpu(vcpu); 214 kvm_cpu__setup_msrs(vcpu); 215 } 216 217 static void print_dtable(const char *name, struct kvm_dtable *dtable) 218 { 219 printf(" %s %016llx %08hx\n", 220 name, (u64) dtable->base, (u16) dtable->limit); 221 } 222 223 static void print_segment(const char *name, struct kvm_segment *seg) 224 { 225 printf(" %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", 226 name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, 227 (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); 228 } 229 230 void kvm_cpu__show_registers(struct kvm_cpu *vcpu) 231 { 232 unsigned long cr0, cr2, cr3; 233 unsigned long cr4, cr8; 234 unsigned long rax, rbx, rcx; 235 unsigned long rdx, rsi, rdi; 236 unsigned long rbp, r8, r9; 237 unsigned long r10, r11, r12; 238 unsigned long r13, r14, r15; 239 unsigned long rip, rsp; 240 struct kvm_sregs sregs; 241 unsigned long rflags; 242 struct kvm_regs regs; 243 int i; 244 245 if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) 246 die("KVM_GET_REGS failed"); 247 248 rflags = regs.rflags; 249 250 rip = regs.rip; rsp = regs.rsp; 251 rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; 252 rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; 253 rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; 254 r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; 255 r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; 256 257 printf("\n Registers:\n"); 258 printf( " ----------\n"); 259 printf(" rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); 260 printf(" rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); 261 printf(" rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); 262 printf(" rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); 263 printf(" r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); 264 printf(" r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); 265 266 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) 267 die("KVM_GET_REGS failed"); 268 269 cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; 270 cr4 = sregs.cr4; cr8 = sregs.cr8; 271 272 printf(" cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); 273 printf(" cr4: %016lx cr8: %016lx\n", cr4, cr8); 274 printf("\n Segment registers:\n"); 275 printf( " ------------------\n"); 276 printf(" register selector base limit type p dpl db s l g avl\n"); 277 print_segment("cs ", &sregs.cs); 278 print_segment("ss ", &sregs.ss); 279 print_segment("ds ", &sregs.ds); 280 print_segment("es ", &sregs.es); 281 print_segment("fs ", &sregs.fs); 282 print_segment("gs ", &sregs.gs); 283 print_segment("tr ", &sregs.tr); 284 print_segment("ldt", &sregs.ldt); 285 print_dtable("gdt", &sregs.gdt); 286 print_dtable("idt", &sregs.idt); 287 288 printf("\n APIC:\n"); 289 printf( " -----\n"); 290 printf(" efer: %016llx apic base: %016llx nmi: %s\n", 291 (u64) sregs.efer, (u64) sregs.apic_base, 292 (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); 293 294 printf("\n Interrupt bitmap:\n"); 295 printf( " -----------------\n"); 296 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) 297 printf(" %016llx", (u64) sregs.interrupt_bitmap[i]); 298 printf("\n"); 299 } 300 301 #define MAX_SYM_LEN 128 302 303 void kvm_cpu__show_code(struct kvm_cpu *vcpu) 304 { 305 unsigned int code_bytes = 64; 306 unsigned int code_prologue = code_bytes * 43 / 64; 307 unsigned int code_len = code_bytes; 308 char sym[MAX_SYM_LEN]; 309 unsigned char c; 310 unsigned int i; 311 u8 *ip; 312 313 if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) 314 die("KVM_GET_REGS failed"); 315 316 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 317 die("KVM_GET_SREGS failed"); 318 319 ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); 320 321 printf("\n Code:\n"); 322 printf( " -----\n"); 323 324 symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); 325 326 printf(" rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); 327 328 for (i = 0; i < code_len; i++, ip++) { 329 if (!host_ptr_in_ram(vcpu->kvm, ip)) 330 break; 331 332 c = *ip; 333 334 if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) 335 printf(" <%02x>", c); 336 else 337 printf(" %02x", c); 338 } 339 340 printf("\n"); 341 342 printf("\n Stack:\n"); 343 printf( " ------\n"); 344 kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); 345 } 346 347 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) 348 { 349 u64 *pte1; 350 u64 *pte2; 351 u64 *pte3; 352 u64 *pte4; 353 354 if (!is_in_protected_mode(vcpu)) 355 return; 356 357 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 358 die("KVM_GET_SREGS failed"); 359 360 pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); 361 if (!host_ptr_in_ram(vcpu->kvm, pte4)) 362 return; 363 364 pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); 365 if (!host_ptr_in_ram(vcpu->kvm, pte3)) 366 return; 367 368 pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); 369 if (!host_ptr_in_ram(vcpu->kvm, pte2)) 370 return; 371 372 pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); 373 if (!host_ptr_in_ram(vcpu->kvm, pte1)) 374 return; 375 376 printf("Page Tables:\n"); 377 if (*pte2 & (1 << 7)) 378 printf(" pte4: %016llx pte3: %016llx" 379 " pte2: %016llx\n", 380 *pte4, *pte3, *pte2); 381 else 382 printf(" pte4: %016llx pte3: %016llx pte2: %016" 383 "llx pte1: %016llx\n", 384 *pte4, *pte3, *pte2, *pte1); 385 } 386 387 void kvm_cpu__run(struct kvm_cpu *vcpu) 388 { 389 int err; 390 391 err = ioctl(vcpu->vcpu_fd, KVM_RUN, 0); 392 if (err && (errno != EINTR && errno != EAGAIN)) 393 die_perror("KVM_RUN failed"); 394 } 395 396 static void kvm_cpu_signal_handler(int signum) 397 { 398 if (signum == SIGKVMEXIT) { 399 if (current_kvm_cpu && current_kvm_cpu->is_running) { 400 current_kvm_cpu->is_running = false; 401 pthread_kill(pthread_self(), SIGKVMEXIT); 402 } 403 } else if (signum == SIGKVMPAUSE) { 404 current_kvm_cpu->paused = 1; 405 } 406 } 407 408 static void kvm_cpu__handle_coalesced_mmio(struct kvm_cpu *cpu) 409 { 410 if (cpu->ring) { 411 while (cpu->ring->first != cpu->ring->last) { 412 struct kvm_coalesced_mmio *m; 413 m = &cpu->ring->coalesced_mmio[cpu->ring->first]; 414 kvm__emulate_mmio(cpu->kvm, 415 m->phys_addr, 416 m->data, 417 m->len, 418 1); 419 cpu->ring->first = (cpu->ring->first + 1) % KVM_COALESCED_MMIO_MAX; 420 } 421 } 422 } 423 424 void kvm_cpu__reboot(void) 425 { 426 int i; 427 428 for (i = 0; i < KVM_NR_CPUS; i++) 429 if (kvm_cpus[i]) 430 pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); 431 } 432 433 int kvm_cpu__start(struct kvm_cpu *cpu) 434 { 435 sigset_t sigset; 436 437 sigemptyset(&sigset); 438 sigaddset(&sigset, SIGALRM); 439 440 pthread_sigmask(SIG_BLOCK, &sigset, NULL); 441 442 signal(SIGKVMEXIT, kvm_cpu_signal_handler); 443 signal(SIGKVMPAUSE, kvm_cpu_signal_handler); 444 445 kvm_cpu__setup_cpuid(cpu); 446 kvm_cpu__reset_vcpu(cpu); 447 448 if (cpu->kvm->single_step) 449 kvm_cpu__enable_singlestep(cpu); 450 451 while (cpu->is_running) { 452 if (cpu->paused) { 453 kvm__notify_paused(); 454 cpu->paused = 0; 455 } 456 457 kvm_cpu__run(cpu); 458 459 switch (cpu->kvm_run->exit_reason) { 460 case KVM_EXIT_UNKNOWN: 461 break; 462 case KVM_EXIT_DEBUG: 463 kvm_cpu__show_registers(cpu); 464 kvm_cpu__show_code(cpu); 465 break; 466 case KVM_EXIT_IO: { 467 bool ret; 468 469 ret = kvm__emulate_io(cpu->kvm, 470 cpu->kvm_run->io.port, 471 (u8 *)cpu->kvm_run + 472 cpu->kvm_run->io.data_offset, 473 cpu->kvm_run->io.direction, 474 cpu->kvm_run->io.size, 475 cpu->kvm_run->io.count); 476 477 if (!ret) 478 goto panic_kvm; 479 break; 480 } 481 case KVM_EXIT_MMIO: { 482 bool ret; 483 484 ret = kvm__emulate_mmio(cpu->kvm, 485 cpu->kvm_run->mmio.phys_addr, 486 cpu->kvm_run->mmio.data, 487 cpu->kvm_run->mmio.len, 488 cpu->kvm_run->mmio.is_write); 489 490 if (!ret) 491 goto panic_kvm; 492 break; 493 } 494 case KVM_EXIT_INTR: 495 if (cpu->is_running) 496 break; 497 goto exit_kvm; 498 case KVM_EXIT_SHUTDOWN: 499 goto exit_kvm; 500 default: 501 goto panic_kvm; 502 } 503 kvm_cpu__handle_coalesced_mmio(cpu); 504 } 505 506 exit_kvm: 507 return 0; 508 509 panic_kvm: 510 return 1; 511 } 512