1 #include "kvm/kvm-cpu.h" 2 3 #include "kvm/symbol.h" 4 #include "kvm/util.h" 5 #include "kvm/kvm.h" 6 7 #include <asm/msr-index.h> 8 9 #include <sys/ioctl.h> 10 #include <sys/mman.h> 11 #include <signal.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <errno.h> 15 #include <stdio.h> 16 17 extern __thread struct kvm_cpu *current_kvm_cpu; 18 19 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) 20 { 21 return vcpu->sregs.cr0 & 0x01; 22 } 23 24 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) 25 { 26 u64 cs; 27 28 /* 29 * NOTE! We should take code segment base address into account here. 30 * Luckily it's usually zero because Linux uses flat memory model. 31 */ 32 if (is_in_protected_mode(vcpu)) 33 return ip; 34 35 cs = vcpu->sregs.cs.selector; 36 37 return ip + (cs << 4); 38 } 39 40 static inline u32 selector_to_base(u16 selector) 41 { 42 /* 43 * KVM on Intel requires 'base' to be 'selector * 16' in real mode. 44 */ 45 return (u32)selector * 16; 46 } 47 48 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) 49 { 50 struct kvm_cpu *vcpu; 51 52 vcpu = calloc(1, sizeof *vcpu); 53 if (!vcpu) 54 return NULL; 55 56 vcpu->kvm = kvm; 57 58 return vcpu; 59 } 60 61 void kvm_cpu__delete(struct kvm_cpu *vcpu) 62 { 63 if (vcpu->msrs) 64 free(vcpu->msrs); 65 66 free(vcpu); 67 } 68 69 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) 70 { 71 struct kvm_cpu *vcpu; 72 int mmap_size; 73 74 vcpu = kvm_cpu__new(kvm); 75 if (!vcpu) 76 return NULL; 77 78 vcpu->cpu_id = cpu_id; 79 80 vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); 81 if (vcpu->vcpu_fd < 0) 82 die_perror("KVM_CREATE_VCPU ioctl"); 83 84 mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); 85 if (mmap_size < 0) 86 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); 87 88 vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); 89 if (vcpu->kvm_run == MAP_FAILED) 90 die("unable to mmap vcpu fd"); 91 92 vcpu->is_running = true; 93 94 return vcpu; 95 } 96 97 void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu) 98 { 99 struct kvm_guest_debug debug = { 100 .control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, 101 }; 102 103 if (ioctl(vcpu->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0) 104 pr_warning("KVM_SET_GUEST_DEBUG failed"); 105 } 106 107 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) 108 { 109 struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); 110 111 if (!vcpu) 112 die("out of memory"); 113 114 return vcpu; 115 } 116 117 #define KVM_MSR_ENTRY(_index, _data) \ 118 (struct kvm_msr_entry) { .index = _index, .data = _data } 119 120 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) 121 { 122 unsigned long ndx = 0; 123 124 vcpu->msrs = kvm_msrs__new(100); 125 126 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); 127 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); 128 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); 129 #ifdef CONFIG_X86_64 130 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); 131 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); 132 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); 133 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); 134 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); 135 #endif 136 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); 137 138 vcpu->msrs->nmsrs = ndx; 139 140 if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) 141 die_perror("KVM_SET_MSRS failed"); 142 } 143 144 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) 145 { 146 vcpu->fpu = (struct kvm_fpu) { 147 .fcw = 0x37f, 148 .mxcsr = 0x1f80, 149 }; 150 151 if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) 152 die_perror("KVM_SET_FPU failed"); 153 } 154 155 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) 156 { 157 vcpu->regs = (struct kvm_regs) { 158 /* We start the guest in 16-bit real mode */ 159 .rflags = 0x0000000000000002ULL, 160 161 .rip = vcpu->kvm->boot_ip, 162 .rsp = vcpu->kvm->boot_sp, 163 .rbp = vcpu->kvm->boot_sp, 164 }; 165 166 if (vcpu->regs.rip > USHRT_MAX) 167 die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); 168 169 if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) 170 die_perror("KVM_SET_REGS failed"); 171 } 172 173 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) 174 { 175 176 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 177 die_perror("KVM_GET_SREGS failed"); 178 179 vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; 180 vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); 181 vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; 182 vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); 183 vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; 184 vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); 185 vcpu->sregs.es.selector = vcpu->kvm->boot_selector; 186 vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); 187 vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; 188 vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); 189 vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; 190 vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); 191 192 if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) 193 die_perror("KVM_SET_SREGS failed"); 194 } 195 196 /** 197 * kvm_cpu__reset_vcpu - reset virtual CPU to a known state 198 */ 199 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) 200 { 201 kvm_cpu__setup_sregs(vcpu); 202 kvm_cpu__setup_regs(vcpu); 203 kvm_cpu__setup_fpu(vcpu); 204 kvm_cpu__setup_msrs(vcpu); 205 } 206 207 static void print_dtable(const char *name, struct kvm_dtable *dtable) 208 { 209 printf(" %s %016llx %08hx\n", 210 name, (u64) dtable->base, (u16) dtable->limit); 211 } 212 213 static void print_segment(const char *name, struct kvm_segment *seg) 214 { 215 printf(" %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", 216 name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, 217 (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); 218 } 219 220 void kvm_cpu__show_registers(struct kvm_cpu *vcpu) 221 { 222 unsigned long cr0, cr2, cr3; 223 unsigned long cr4, cr8; 224 unsigned long rax, rbx, rcx; 225 unsigned long rdx, rsi, rdi; 226 unsigned long rbp, r8, r9; 227 unsigned long r10, r11, r12; 228 unsigned long r13, r14, r15; 229 unsigned long rip, rsp; 230 struct kvm_sregs sregs; 231 unsigned long rflags; 232 struct kvm_regs regs; 233 int i; 234 235 if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) 236 die("KVM_GET_REGS failed"); 237 238 rflags = regs.rflags; 239 240 rip = regs.rip; rsp = regs.rsp; 241 rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; 242 rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; 243 rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; 244 r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; 245 r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; 246 247 printf("\n Registers:\n"); 248 printf( " ----------\n"); 249 printf(" rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); 250 printf(" rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); 251 printf(" rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); 252 printf(" rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); 253 printf(" r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); 254 printf(" r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); 255 256 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) 257 die("KVM_GET_REGS failed"); 258 259 cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; 260 cr4 = sregs.cr4; cr8 = sregs.cr8; 261 262 printf(" cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); 263 printf(" cr4: %016lx cr8: %016lx\n", cr4, cr8); 264 printf("\n Segment registers:\n"); 265 printf( " ------------------\n"); 266 printf(" register selector base limit type p dpl db s l g avl\n"); 267 print_segment("cs ", &sregs.cs); 268 print_segment("ss ", &sregs.ss); 269 print_segment("ds ", &sregs.ds); 270 print_segment("es ", &sregs.es); 271 print_segment("fs ", &sregs.fs); 272 print_segment("gs ", &sregs.gs); 273 print_segment("tr ", &sregs.tr); 274 print_segment("ldt", &sregs.ldt); 275 print_dtable("gdt", &sregs.gdt); 276 print_dtable("idt", &sregs.idt); 277 278 printf("\n APIC:\n"); 279 printf( " -----\n"); 280 printf(" efer: %016llx apic base: %016llx nmi: %s\n", 281 (u64) sregs.efer, (u64) sregs.apic_base, 282 (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); 283 284 printf("\n Interrupt bitmap:\n"); 285 printf( " -----------------\n"); 286 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) 287 printf(" %016llx", (u64) sregs.interrupt_bitmap[i]); 288 printf("\n"); 289 } 290 291 #define MAX_SYM_LEN 128 292 293 void kvm_cpu__show_code(struct kvm_cpu *vcpu) 294 { 295 unsigned int code_bytes = 64; 296 unsigned int code_prologue = code_bytes * 43 / 64; 297 unsigned int code_len = code_bytes; 298 char sym[MAX_SYM_LEN]; 299 unsigned char c; 300 unsigned int i; 301 u8 *ip; 302 303 if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) 304 die("KVM_GET_REGS failed"); 305 306 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 307 die("KVM_GET_SREGS failed"); 308 309 ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); 310 311 printf("\n Code:\n"); 312 printf( " -----\n"); 313 314 symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); 315 316 printf(" rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); 317 318 for (i = 0; i < code_len; i++, ip++) { 319 if (!host_ptr_in_ram(vcpu->kvm, ip)) 320 break; 321 322 c = *ip; 323 324 if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) 325 printf(" <%02x>", c); 326 else 327 printf(" %02x", c); 328 } 329 330 printf("\n"); 331 332 printf("\n Stack:\n"); 333 printf( " ------\n"); 334 kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); 335 } 336 337 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) 338 { 339 u64 *pte1; 340 u64 *pte2; 341 u64 *pte3; 342 u64 *pte4; 343 344 if (!is_in_protected_mode(vcpu)) 345 return; 346 347 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 348 die("KVM_GET_SREGS failed"); 349 350 pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); 351 if (!host_ptr_in_ram(vcpu->kvm, pte4)) 352 return; 353 354 pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); 355 if (!host_ptr_in_ram(vcpu->kvm, pte3)) 356 return; 357 358 pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); 359 if (!host_ptr_in_ram(vcpu->kvm, pte2)) 360 return; 361 362 pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); 363 if (!host_ptr_in_ram(vcpu->kvm, pte1)) 364 return; 365 366 printf("Page Tables:\n"); 367 if (*pte2 & (1 << 7)) 368 printf(" pte4: %016llx pte3: %016llx" 369 " pte2: %016llx\n", 370 *pte4, *pte3, *pte2); 371 else 372 printf(" pte4: %016llx pte3: %016llx pte2: %016" 373 "llx pte1: %016llx\n", 374 *pte4, *pte3, *pte2, *pte1); 375 } 376 377 void kvm_cpu__run(struct kvm_cpu *vcpu) 378 { 379 int err; 380 381 err = ioctl(vcpu->vcpu_fd, KVM_RUN, 0); 382 if (err && (errno != EINTR && errno != EAGAIN)) 383 die_perror("KVM_RUN failed"); 384 } 385 386 static void kvm_cpu_exit_handler(int signum) 387 { 388 if (current_kvm_cpu->is_running) { 389 current_kvm_cpu->is_running = false; 390 pthread_kill(pthread_self(), SIGKVMEXIT); 391 } 392 } 393 394 int kvm_cpu__start(struct kvm_cpu *cpu) 395 { 396 sigset_t sigset; 397 398 sigemptyset(&sigset); 399 sigaddset(&sigset, SIGALRM); 400 401 pthread_sigmask(SIG_BLOCK, &sigset, NULL); 402 403 signal(SIGKVMEXIT, kvm_cpu_exit_handler); 404 405 kvm_cpu__setup_cpuid(cpu); 406 kvm_cpu__reset_vcpu(cpu); 407 408 for (;;) { 409 kvm_cpu__run(cpu); 410 411 switch (cpu->kvm_run->exit_reason) { 412 case KVM_EXIT_UNKNOWN: 413 break; 414 case KVM_EXIT_DEBUG: 415 kvm_cpu__show_registers(cpu); 416 kvm_cpu__show_code(cpu); 417 break; 418 case KVM_EXIT_IO: { 419 bool ret; 420 421 ret = kvm__emulate_io(cpu->kvm, 422 cpu->kvm_run->io.port, 423 (u8 *)cpu->kvm_run + 424 cpu->kvm_run->io.data_offset, 425 cpu->kvm_run->io.direction, 426 cpu->kvm_run->io.size, 427 cpu->kvm_run->io.count); 428 429 if (!ret) 430 goto panic_kvm; 431 break; 432 } 433 case KVM_EXIT_MMIO: { 434 bool ret; 435 436 ret = kvm__emulate_mmio(cpu->kvm, 437 cpu->kvm_run->mmio.phys_addr, 438 cpu->kvm_run->mmio.data, 439 cpu->kvm_run->mmio.len, 440 cpu->kvm_run->mmio.is_write); 441 442 if (!ret) 443 goto panic_kvm; 444 break; 445 } 446 case KVM_EXIT_INTR: 447 if (cpu->is_running) 448 break; 449 goto exit_kvm; 450 case KVM_EXIT_SHUTDOWN: 451 goto exit_kvm; 452 default: 453 goto panic_kvm; 454 } 455 } 456 457 exit_kvm: 458 return 0; 459 460 panic_kvm: 461 return 1; 462 } 463