1 #include "kvm/kvm-cpu.h" 2 3 #include "kvm/symbol.h" 4 #include "kvm/util.h" 5 #include "kvm/kvm.h" 6 7 #include <asm/msr-index.h> 8 #include <asm/apicdef.h> 9 #include <linux/err.h> 10 #include <sys/ioctl.h> 11 #include <sys/mman.h> 12 #include <signal.h> 13 #include <stdlib.h> 14 #include <string.h> 15 #include <errno.h> 16 #include <stdio.h> 17 18 static int debug_fd; 19 20 void kvm_cpu__set_debug_fd(int fd) 21 { 22 debug_fd = fd; 23 } 24 25 int kvm_cpu__get_debug_fd(void) 26 { 27 return debug_fd; 28 } 29 30 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) 31 { 32 return vcpu->sregs.cr0 & 0x01; 33 } 34 35 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) 36 { 37 u64 cs; 38 39 /* 40 * NOTE! We should take code segment base address into account here. 41 * Luckily it's usually zero because Linux uses flat memory model. 42 */ 43 if (is_in_protected_mode(vcpu)) 44 return ip; 45 46 cs = vcpu->sregs.cs.selector; 47 48 return ip + (cs << 4); 49 } 50 51 static inline u32 selector_to_base(u16 selector) 52 { 53 /* 54 * KVM on Intel requires 'base' to be 'selector * 16' in real mode. 55 */ 56 return (u32)selector << 4; 57 } 58 59 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) 60 { 61 struct kvm_cpu *vcpu; 62 63 vcpu = calloc(1, sizeof(*vcpu)); 64 if (!vcpu) 65 return NULL; 66 67 vcpu->kvm = kvm; 68 69 return vcpu; 70 } 71 72 void kvm_cpu__delete(struct kvm_cpu *vcpu) 73 { 74 if (vcpu->msrs) 75 free(vcpu->msrs); 76 77 free(vcpu); 78 } 79 80 static int kvm_cpu__set_lint(struct kvm_cpu *vcpu) 81 { 82 struct local_apic lapic; 83 84 if (ioctl(vcpu->vcpu_fd, KVM_GET_LAPIC, &lapic)) 85 return -1; 86 87 lapic.lvt_lint0.delivery_mode = APIC_MODE_EXTINT; 88 lapic.lvt_lint1.delivery_mode = APIC_MODE_NMI; 89 90 return ioctl(vcpu->vcpu_fd, KVM_SET_LAPIC, &lapic); 91 } 92 93 struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned long cpu_id) 94 { 95 struct kvm_cpu *vcpu; 96 int mmap_size; 97 int coalesced_offset; 98 99 vcpu = kvm_cpu__new(kvm); 100 if (!vcpu) 101 return NULL; 102 103 vcpu->cpu_id = cpu_id; 104 105 vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); 106 if (vcpu->vcpu_fd < 0) 107 die_perror("KVM_CREATE_VCPU ioctl"); 108 109 mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); 110 if (mmap_size < 0) 111 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); 112 113 vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); 114 if (vcpu->kvm_run == MAP_FAILED) 115 die("unable to mmap vcpu fd"); 116 117 coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); 118 if (coalesced_offset) 119 vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); 120 121 if (kvm_cpu__set_lint(vcpu)) 122 die_perror("KVM_SET_LAPIC failed"); 123 124 vcpu->is_running = true; 125 126 return vcpu; 127 } 128 129 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) 130 { 131 struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); 132 133 if (!vcpu) 134 die("out of memory"); 135 136 return vcpu; 137 } 138 139 #define KVM_MSR_ENTRY(_index, _data) \ 140 (struct kvm_msr_entry) { .index = _index, .data = _data } 141 142 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) 143 { 144 unsigned long ndx = 0; 145 146 vcpu->msrs = kvm_msrs__new(100); 147 148 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); 149 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); 150 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); 151 #ifdef CONFIG_X86_64 152 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); 153 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); 154 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); 155 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); 156 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); 157 #endif 158 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); 159 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, 160 MSR_IA32_MISC_ENABLE_FAST_STRING); 161 162 vcpu->msrs->nmsrs = ndx; 163 164 if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) 165 die_perror("KVM_SET_MSRS failed"); 166 } 167 168 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) 169 { 170 vcpu->fpu = (struct kvm_fpu) { 171 .fcw = 0x37f, 172 .mxcsr = 0x1f80, 173 }; 174 175 if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) 176 die_perror("KVM_SET_FPU failed"); 177 } 178 179 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) 180 { 181 vcpu->regs = (struct kvm_regs) { 182 /* We start the guest in 16-bit real mode */ 183 .rflags = 0x0000000000000002ULL, 184 185 .rip = vcpu->kvm->arch.boot_ip, 186 .rsp = vcpu->kvm->arch.boot_sp, 187 .rbp = vcpu->kvm->arch.boot_sp, 188 }; 189 190 if (vcpu->regs.rip > USHRT_MAX) 191 die("ip 0x%llx is too high for real mode", (u64)vcpu->regs.rip); 192 193 if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) 194 die_perror("KVM_SET_REGS failed"); 195 } 196 197 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) 198 { 199 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 200 die_perror("KVM_GET_SREGS failed"); 201 202 vcpu->sregs.cs.selector = vcpu->kvm->arch.boot_selector; 203 vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->arch.boot_selector); 204 vcpu->sregs.ss.selector = vcpu->kvm->arch.boot_selector; 205 vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->arch.boot_selector); 206 vcpu->sregs.ds.selector = vcpu->kvm->arch.boot_selector; 207 vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->arch.boot_selector); 208 vcpu->sregs.es.selector = vcpu->kvm->arch.boot_selector; 209 vcpu->sregs.es.base = selector_to_base(vcpu->kvm->arch.boot_selector); 210 vcpu->sregs.fs.selector = vcpu->kvm->arch.boot_selector; 211 vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->arch.boot_selector); 212 vcpu->sregs.gs.selector = vcpu->kvm->arch.boot_selector; 213 vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->arch.boot_selector); 214 215 if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) 216 die_perror("KVM_SET_SREGS failed"); 217 } 218 219 /** 220 * kvm_cpu__reset_vcpu - reset virtual CPU to a known state 221 */ 222 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) 223 { 224 kvm_cpu__setup_cpuid(vcpu); 225 kvm_cpu__setup_sregs(vcpu); 226 kvm_cpu__setup_regs(vcpu); 227 kvm_cpu__setup_fpu(vcpu); 228 kvm_cpu__setup_msrs(vcpu); 229 } 230 231 bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu) 232 { 233 return false; 234 } 235 236 static void print_dtable(const char *name, struct kvm_dtable *dtable) 237 { 238 dprintf(debug_fd, " %s %016llx %08hx\n", 239 name, (u64) dtable->base, (u16) dtable->limit); 240 } 241 242 static void print_segment(const char *name, struct kvm_segment *seg) 243 { 244 dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", 245 name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, 246 (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); 247 } 248 249 void kvm_cpu__show_registers(struct kvm_cpu *vcpu) 250 { 251 unsigned long cr0, cr2, cr3; 252 unsigned long cr4, cr8; 253 unsigned long rax, rbx, rcx; 254 unsigned long rdx, rsi, rdi; 255 unsigned long rbp, r8, r9; 256 unsigned long r10, r11, r12; 257 unsigned long r13, r14, r15; 258 unsigned long rip, rsp; 259 struct kvm_sregs sregs; 260 unsigned long rflags; 261 struct kvm_regs regs; 262 int i; 263 264 if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) 265 die("KVM_GET_REGS failed"); 266 267 rflags = regs.rflags; 268 269 rip = regs.rip; rsp = regs.rsp; 270 rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; 271 rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; 272 rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; 273 r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; 274 r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; 275 276 dprintf(debug_fd, "\n Registers:\n"); 277 dprintf(debug_fd, " ----------\n"); 278 dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); 279 dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); 280 dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); 281 dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); 282 dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); 283 dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); 284 285 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) 286 die("KVM_GET_REGS failed"); 287 288 cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; 289 cr4 = sregs.cr4; cr8 = sregs.cr8; 290 291 dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); 292 dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8); 293 dprintf(debug_fd, "\n Segment registers:\n"); 294 dprintf(debug_fd, " ------------------\n"); 295 dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n"); 296 print_segment("cs ", &sregs.cs); 297 print_segment("ss ", &sregs.ss); 298 print_segment("ds ", &sregs.ds); 299 print_segment("es ", &sregs.es); 300 print_segment("fs ", &sregs.fs); 301 print_segment("gs ", &sregs.gs); 302 print_segment("tr ", &sregs.tr); 303 print_segment("ldt", &sregs.ldt); 304 print_dtable("gdt", &sregs.gdt); 305 print_dtable("idt", &sregs.idt); 306 307 dprintf(debug_fd, "\n APIC:\n"); 308 dprintf(debug_fd, " -----\n"); 309 dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n", 310 (u64) sregs.efer, (u64) sregs.apic_base, 311 (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); 312 313 dprintf(debug_fd, "\n Interrupt bitmap:\n"); 314 dprintf(debug_fd, " -----------------\n"); 315 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) 316 dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]); 317 dprintf(debug_fd, "\n"); 318 } 319 320 #define MAX_SYM_LEN 128 321 322 void kvm_cpu__show_code(struct kvm_cpu *vcpu) 323 { 324 unsigned int code_bytes = 64; 325 unsigned int code_prologue = 43; 326 unsigned int code_len = code_bytes; 327 char sym[MAX_SYM_LEN] = SYMBOL_DEFAULT_UNKNOWN, *psym; 328 unsigned char c; 329 unsigned int i; 330 u8 *ip; 331 332 if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) 333 die("KVM_GET_REGS failed"); 334 335 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 336 die("KVM_GET_SREGS failed"); 337 338 ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); 339 340 dprintf(debug_fd, "\n Code:\n"); 341 dprintf(debug_fd, " -----\n"); 342 343 psym = symbol_lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); 344 if (IS_ERR(psym)) 345 dprintf(debug_fd, 346 "Warning: symbol_lookup() failed to find symbol " 347 "with error: %ld\n", PTR_ERR(psym)); 348 349 dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); 350 351 for (i = 0; i < code_len; i++, ip++) { 352 if (!host_ptr_in_ram(vcpu->kvm, ip)) 353 break; 354 355 c = *ip; 356 357 if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) 358 dprintf(debug_fd, " <%02x>", c); 359 else 360 dprintf(debug_fd, " %02x", c); 361 } 362 363 dprintf(debug_fd, "\n"); 364 365 dprintf(debug_fd, "\n Stack:\n"); 366 dprintf(debug_fd, " ------\n"); 367 dprintf(debug_fd, " rsp: [<%016lx>] \n", (unsigned long) vcpu->regs.rsp); 368 kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32, debug_fd); 369 } 370 371 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) 372 { 373 u64 *pte1; 374 u64 *pte2; 375 u64 *pte3; 376 u64 *pte4; 377 378 if (!is_in_protected_mode(vcpu)) { 379 dprintf(debug_fd, "\n Page Tables:\n"); 380 dprintf(debug_fd, " ------\n"); 381 dprintf(debug_fd, " Not in protected mode\n"); 382 return; 383 } 384 385 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 386 die("KVM_GET_SREGS failed"); 387 388 pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); 389 if (!host_ptr_in_ram(vcpu->kvm, pte4)) 390 return; 391 392 pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); 393 if (!host_ptr_in_ram(vcpu->kvm, pte3)) 394 return; 395 396 pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); 397 if (!host_ptr_in_ram(vcpu->kvm, pte2)) 398 return; 399 400 pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); 401 if (!host_ptr_in_ram(vcpu->kvm, pte1)) 402 return; 403 404 dprintf(debug_fd, "\n Page Tables:\n"); 405 dprintf(debug_fd, " ------\n"); 406 if (*pte2 & (1 << 7)) 407 dprintf(debug_fd, " pte4: %016llx pte3: %016llx" 408 " pte2: %016llx\n", 409 *pte4, *pte3, *pte2); 410 else 411 dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016" 412 "llx pte1: %016llx\n", 413 *pte4, *pte3, *pte2, *pte1); 414 } 415 416 void kvm_cpu__arch_nmi(struct kvm_cpu *cpu) 417 { 418 struct kvm_lapic_state klapic; 419 struct local_apic *lapic = (void *)&klapic; 420 421 if (ioctl(cpu->vcpu_fd, KVM_GET_LAPIC, &klapic) != 0) 422 return; 423 424 if (lapic->lvt_lint1.mask) 425 return; 426 427 if (lapic->lvt_lint1.delivery_mode != APIC_MODE_NMI) 428 return; 429 430 ioctl(cpu->vcpu_fd, KVM_NMI); 431 } 432