1 #include "kvm/kvm-cpu.h" 2 3 #include "kvm/symbol.h" 4 #include "kvm/util.h" 5 #include "kvm/kvm.h" 6 7 #include <asm/msr-index.h> 8 9 #include <sys/ioctl.h> 10 #include <sys/mman.h> 11 #include <signal.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <errno.h> 15 #include <stdio.h> 16 17 static int debug_fd; 18 19 void kvm_cpu__set_debug_fd(int fd) 20 { 21 debug_fd = fd; 22 } 23 24 int kvm_cpu__get_debug_fd(void) 25 { 26 return debug_fd; 27 } 28 29 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) 30 { 31 return vcpu->sregs.cr0 & 0x01; 32 } 33 34 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) 35 { 36 u64 cs; 37 38 /* 39 * NOTE! We should take code segment base address into account here. 40 * Luckily it's usually zero because Linux uses flat memory model. 41 */ 42 if (is_in_protected_mode(vcpu)) 43 return ip; 44 45 cs = vcpu->sregs.cs.selector; 46 47 return ip + (cs << 4); 48 } 49 50 static inline u32 selector_to_base(u16 selector) 51 { 52 /* 53 * KVM on Intel requires 'base' to be 'selector * 16' in real mode. 54 */ 55 return (u32)selector * 16; 56 } 57 58 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) 59 { 60 struct kvm_cpu *vcpu; 61 62 vcpu = calloc(1, sizeof *vcpu); 63 if (!vcpu) 64 return NULL; 65 66 vcpu->kvm = kvm; 67 68 return vcpu; 69 } 70 71 void kvm_cpu__delete(struct kvm_cpu *vcpu) 72 { 73 if (vcpu->msrs) 74 free(vcpu->msrs); 75 76 free(vcpu); 77 } 78 79 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) 80 { 81 struct kvm_cpu *vcpu; 82 int mmap_size; 83 int coalesced_offset; 84 85 vcpu = kvm_cpu__new(kvm); 86 if (!vcpu) 87 return NULL; 88 89 vcpu->cpu_id = cpu_id; 90 91 vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); 92 if (vcpu->vcpu_fd < 0) 93 die_perror("KVM_CREATE_VCPU ioctl"); 94 95 mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); 96 if (mmap_size < 0) 97 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); 98 99 vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); 100 if (vcpu->kvm_run == MAP_FAILED) 101 die("unable to mmap vcpu fd"); 102 103 coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); 104 if (coalesced_offset) 105 vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); 106 107 vcpu->is_running = true; 108 109 return vcpu; 110 } 111 112 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) 113 { 114 struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); 115 116 if (!vcpu) 117 die("out of memory"); 118 119 return vcpu; 120 } 121 122 #define KVM_MSR_ENTRY(_index, _data) \ 123 (struct kvm_msr_entry) { .index = _index, .data = _data } 124 125 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) 126 { 127 unsigned long ndx = 0; 128 129 vcpu->msrs = kvm_msrs__new(100); 130 131 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); 132 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); 133 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); 134 #ifdef CONFIG_X86_64 135 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); 136 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); 137 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); 138 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); 139 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); 140 #endif 141 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); 142 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, 143 MSR_IA32_MISC_ENABLE_FAST_STRING); 144 145 vcpu->msrs->nmsrs = ndx; 146 147 if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) 148 die_perror("KVM_SET_MSRS failed"); 149 } 150 151 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) 152 { 153 vcpu->fpu = (struct kvm_fpu) { 154 .fcw = 0x37f, 155 .mxcsr = 0x1f80, 156 }; 157 158 if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) 159 die_perror("KVM_SET_FPU failed"); 160 } 161 162 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) 163 { 164 vcpu->regs = (struct kvm_regs) { 165 /* We start the guest in 16-bit real mode */ 166 .rflags = 0x0000000000000002ULL, 167 168 .rip = vcpu->kvm->boot_ip, 169 .rsp = vcpu->kvm->boot_sp, 170 .rbp = vcpu->kvm->boot_sp, 171 }; 172 173 if (vcpu->regs.rip > USHRT_MAX) 174 die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); 175 176 if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) 177 die_perror("KVM_SET_REGS failed"); 178 } 179 180 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) 181 { 182 183 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 184 die_perror("KVM_GET_SREGS failed"); 185 186 vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; 187 vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); 188 vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; 189 vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); 190 vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; 191 vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); 192 vcpu->sregs.es.selector = vcpu->kvm->boot_selector; 193 vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); 194 vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; 195 vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); 196 vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; 197 vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); 198 199 if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) 200 die_perror("KVM_SET_SREGS failed"); 201 } 202 203 /** 204 * kvm_cpu__reset_vcpu - reset virtual CPU to a known state 205 */ 206 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) 207 { 208 kvm_cpu__setup_cpuid(vcpu); 209 kvm_cpu__setup_sregs(vcpu); 210 kvm_cpu__setup_regs(vcpu); 211 kvm_cpu__setup_fpu(vcpu); 212 kvm_cpu__setup_msrs(vcpu); 213 } 214 215 bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu) 216 { 217 return false; 218 } 219 220 static void print_dtable(const char *name, struct kvm_dtable *dtable) 221 { 222 dprintf(debug_fd, " %s %016llx %08hx\n", 223 name, (u64) dtable->base, (u16) dtable->limit); 224 } 225 226 static void print_segment(const char *name, struct kvm_segment *seg) 227 { 228 dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", 229 name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, 230 (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); 231 } 232 233 void kvm_cpu__show_registers(struct kvm_cpu *vcpu) 234 { 235 unsigned long cr0, cr2, cr3; 236 unsigned long cr4, cr8; 237 unsigned long rax, rbx, rcx; 238 unsigned long rdx, rsi, rdi; 239 unsigned long rbp, r8, r9; 240 unsigned long r10, r11, r12; 241 unsigned long r13, r14, r15; 242 unsigned long rip, rsp; 243 struct kvm_sregs sregs; 244 unsigned long rflags; 245 struct kvm_regs regs; 246 int i; 247 248 if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) 249 die("KVM_GET_REGS failed"); 250 251 rflags = regs.rflags; 252 253 rip = regs.rip; rsp = regs.rsp; 254 rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; 255 rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; 256 rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; 257 r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; 258 r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; 259 260 dprintf(debug_fd, "\n Registers:\n"); 261 dprintf(debug_fd, " ----------\n"); 262 dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); 263 dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); 264 dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); 265 dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); 266 dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); 267 dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); 268 269 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) 270 die("KVM_GET_REGS failed"); 271 272 cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; 273 cr4 = sregs.cr4; cr8 = sregs.cr8; 274 275 dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); 276 dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8); 277 dprintf(debug_fd, "\n Segment registers:\n"); 278 dprintf(debug_fd, " ------------------\n"); 279 dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n"); 280 print_segment("cs ", &sregs.cs); 281 print_segment("ss ", &sregs.ss); 282 print_segment("ds ", &sregs.ds); 283 print_segment("es ", &sregs.es); 284 print_segment("fs ", &sregs.fs); 285 print_segment("gs ", &sregs.gs); 286 print_segment("tr ", &sregs.tr); 287 print_segment("ldt", &sregs.ldt); 288 print_dtable("gdt", &sregs.gdt); 289 print_dtable("idt", &sregs.idt); 290 291 dprintf(debug_fd, "\n APIC:\n"); 292 dprintf(debug_fd, " -----\n"); 293 dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n", 294 (u64) sregs.efer, (u64) sregs.apic_base, 295 (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); 296 297 dprintf(debug_fd, "\n Interrupt bitmap:\n"); 298 dprintf(debug_fd, " -----------------\n"); 299 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) 300 dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]); 301 dprintf(debug_fd, "\n"); 302 } 303 304 #define MAX_SYM_LEN 128 305 306 void kvm_cpu__show_code(struct kvm_cpu *vcpu) 307 { 308 unsigned int code_bytes = 64; 309 unsigned int code_prologue = code_bytes * 43 / 64; 310 unsigned int code_len = code_bytes; 311 char sym[MAX_SYM_LEN]; 312 unsigned char c; 313 unsigned int i; 314 u8 *ip; 315 316 if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) 317 die("KVM_GET_REGS failed"); 318 319 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 320 die("KVM_GET_SREGS failed"); 321 322 ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); 323 324 dprintf(debug_fd, "\n Code:\n"); 325 dprintf(debug_fd, " -----\n"); 326 327 symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); 328 329 dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); 330 331 for (i = 0; i < code_len; i++, ip++) { 332 if (!host_ptr_in_ram(vcpu->kvm, ip)) 333 break; 334 335 c = *ip; 336 337 if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) 338 dprintf(debug_fd, " <%02x>", c); 339 else 340 dprintf(debug_fd, " %02x", c); 341 } 342 343 dprintf(debug_fd, "\n"); 344 345 dprintf(debug_fd, "\n Stack:\n"); 346 dprintf(debug_fd, " ------\n"); 347 kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); 348 } 349 350 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) 351 { 352 u64 *pte1; 353 u64 *pte2; 354 u64 *pte3; 355 u64 *pte4; 356 357 if (!is_in_protected_mode(vcpu)) 358 return; 359 360 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) 361 die("KVM_GET_SREGS failed"); 362 363 pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); 364 if (!host_ptr_in_ram(vcpu->kvm, pte4)) 365 return; 366 367 pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); 368 if (!host_ptr_in_ram(vcpu->kvm, pte3)) 369 return; 370 371 pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); 372 if (!host_ptr_in_ram(vcpu->kvm, pte2)) 373 return; 374 375 pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); 376 if (!host_ptr_in_ram(vcpu->kvm, pte1)) 377 return; 378 379 dprintf(debug_fd, "Page Tables:\n"); 380 if (*pte2 & (1 << 7)) 381 dprintf(debug_fd, " pte4: %016llx pte3: %016llx" 382 " pte2: %016llx\n", 383 *pte4, *pte3, *pte2); 384 else 385 dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016" 386 "llx pte1: %016llx\n", 387 *pte4, *pte3, *pte2, *pte1); 388 } 389