1 #include "kvm/kvm-cpu.h" 2 3 #include "kvm/util.h" 4 #include "kvm/kvm.h" 5 6 #include <asm/msr-index.h> 7 8 #include <sys/ioctl.h> 9 #include <sys/mman.h> 10 #include <signal.h> 11 #include <stdlib.h> 12 #include <errno.h> 13 #include <stdio.h> 14 15 static inline bool is_in_protected_mode(struct kvm_cpu *self) 16 { 17 return self->sregs.cr0 & 0x01; 18 } 19 20 static inline u64 ip_to_flat(struct kvm_cpu *self, u64 ip) 21 { 22 u64 cs; 23 24 /* 25 * NOTE! We should take code segment base address into account here. 26 * Luckily it's usually zero because Linux uses flat memory model. 27 */ 28 if (is_in_protected_mode(self)) 29 return ip; 30 31 cs = self->sregs.cs.selector; 32 33 return ip + (cs << 4); 34 } 35 36 static inline u32 selector_to_base(u16 selector) 37 { 38 /* 39 * KVM on Intel requires 'base' to be 'selector * 16' in real mode. 40 */ 41 return (u32)selector * 16; 42 } 43 44 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) 45 { 46 struct kvm_cpu *self; 47 48 self = calloc(1, sizeof *self); 49 if (!self) 50 return NULL; 51 52 self->kvm = kvm; 53 54 return self; 55 } 56 57 void kvm_cpu__delete(struct kvm_cpu *self) 58 { 59 if (self->msrs) 60 free(self->msrs); 61 62 free(self); 63 } 64 65 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) 66 { 67 struct kvm_cpu *self; 68 int mmap_size; 69 70 self = kvm_cpu__new(kvm); 71 if (!self) 72 return NULL; 73 74 self->cpu_id = cpu_id; 75 76 self->vcpu_fd = ioctl(self->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); 77 if (self->vcpu_fd < 0) 78 die_perror("KVM_CREATE_VCPU ioctl"); 79 80 mmap_size = ioctl(self->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); 81 if (mmap_size < 0) 82 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); 83 84 self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); 85 if (self->kvm_run == MAP_FAILED) 86 die("unable to mmap vcpu fd"); 87 88 return self; 89 } 90 91 void kvm_cpu__enable_singlestep(struct kvm_cpu *self) 92 { 93 struct kvm_guest_debug debug = { 94 .control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, 95 }; 96 97 if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0) 98 warning("KVM_SET_GUEST_DEBUG failed"); 99 } 100 101 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) 102 { 103 struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs)); 104 105 if (!self) 106 die("out of memory"); 107 108 return self; 109 } 110 111 #define KVM_MSR_ENTRY(_index, _data) \ 112 (struct kvm_msr_entry) { .index = _index, .data = _data } 113 114 static void kvm_cpu__setup_msrs(struct kvm_cpu *self) 115 { 116 unsigned long ndx = 0; 117 118 self->msrs = kvm_msrs__new(100); 119 120 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); 121 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); 122 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); 123 #ifdef CONFIG_X86_64 124 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); 125 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); 126 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); 127 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); 128 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); 129 #endif 130 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); 131 132 self->msrs->nmsrs = ndx; 133 134 if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0) 135 die_perror("KVM_SET_MSRS failed"); 136 } 137 138 static void kvm_cpu__setup_fpu(struct kvm_cpu *self) 139 { 140 self->fpu = (struct kvm_fpu) { 141 .fcw = 0x37f, 142 .mxcsr = 0x1f80, 143 }; 144 145 if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0) 146 die_perror("KVM_SET_FPU failed"); 147 } 148 149 static void kvm_cpu__setup_regs(struct kvm_cpu *self) 150 { 151 self->regs = (struct kvm_regs) { 152 /* We start the guest in 16-bit real mode */ 153 .rflags = 0x0000000000000002ULL, 154 155 .rip = self->kvm->boot_ip, 156 .rsp = self->kvm->boot_sp, 157 .rbp = self->kvm->boot_sp, 158 }; 159 160 if (self->regs.rip > USHRT_MAX) 161 die("ip 0x%llx is too high for real mode", (u64) self->regs.rip); 162 163 if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0) 164 die_perror("KVM_SET_REGS failed"); 165 } 166 167 static void kvm_cpu__setup_sregs(struct kvm_cpu *self) 168 { 169 170 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 171 die_perror("KVM_GET_SREGS failed"); 172 173 self->sregs.cs.selector = self->kvm->boot_selector; 174 self->sregs.cs.base = selector_to_base(self->kvm->boot_selector); 175 self->sregs.ss.selector = self->kvm->boot_selector; 176 self->sregs.ss.base = selector_to_base(self->kvm->boot_selector); 177 self->sregs.ds.selector = self->kvm->boot_selector; 178 self->sregs.ds.base = selector_to_base(self->kvm->boot_selector); 179 self->sregs.es.selector = self->kvm->boot_selector; 180 self->sregs.es.base = selector_to_base(self->kvm->boot_selector); 181 self->sregs.fs.selector = self->kvm->boot_selector; 182 self->sregs.fs.base = selector_to_base(self->kvm->boot_selector); 183 self->sregs.gs.selector = self->kvm->boot_selector; 184 self->sregs.gs.base = selector_to_base(self->kvm->boot_selector); 185 186 if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0) 187 die_perror("KVM_SET_SREGS failed"); 188 } 189 190 /** 191 * kvm_cpu__reset_vcpu - reset virtual CPU to a known state 192 */ 193 void kvm_cpu__reset_vcpu(struct kvm_cpu *self) 194 { 195 kvm_cpu__setup_sregs(self); 196 kvm_cpu__setup_regs(self); 197 kvm_cpu__setup_fpu(self); 198 kvm_cpu__setup_msrs(self); 199 } 200 201 static void print_dtable(const char *name, struct kvm_dtable *dtable) 202 { 203 printf(" %s %016llx %08hx\n", 204 name, (u64) dtable->base, (u16) dtable->limit); 205 } 206 207 static void print_segment(const char *name, struct kvm_segment *seg) 208 { 209 printf(" %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", 210 name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, 211 (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); 212 } 213 214 void kvm_cpu__show_registers(struct kvm_cpu *self) 215 { 216 unsigned long cr0, cr2, cr3; 217 unsigned long cr4, cr8; 218 unsigned long rax, rbx, rcx; 219 unsigned long rdx, rsi, rdi; 220 unsigned long rbp, r8, r9; 221 unsigned long r10, r11, r12; 222 unsigned long r13, r14, r15; 223 unsigned long rip, rsp; 224 struct kvm_sregs sregs; 225 unsigned long rflags; 226 struct kvm_regs regs; 227 int i; 228 229 if (ioctl(self->vcpu_fd, KVM_GET_REGS, ®s) < 0) 230 die("KVM_GET_REGS failed"); 231 232 rflags = regs.rflags; 233 234 rip = regs.rip; rsp = regs.rsp; 235 rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; 236 rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; 237 rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; 238 r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; 239 r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; 240 241 printf("Registers:\n"); 242 printf(" rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); 243 printf(" rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); 244 printf(" rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); 245 printf(" rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); 246 printf(" r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); 247 printf(" r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); 248 249 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) 250 die("KVM_GET_REGS failed"); 251 252 cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; 253 cr4 = sregs.cr4; cr8 = sregs.cr8; 254 255 printf(" cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); 256 printf(" cr4: %016lx cr8: %016lx\n", cr4, cr8); 257 printf("Segment registers:\n"); 258 printf(" register selector base limit type p dpl db s l g avl\n"); 259 print_segment("cs ", &sregs.cs); 260 print_segment("ss ", &sregs.ss); 261 print_segment("ds ", &sregs.ds); 262 print_segment("es ", &sregs.es); 263 print_segment("fs ", &sregs.fs); 264 print_segment("gs ", &sregs.gs); 265 print_segment("tr ", &sregs.tr); 266 print_segment("ldt", &sregs.ldt); 267 print_dtable("gdt", &sregs.gdt); 268 print_dtable("idt", &sregs.idt); 269 printf(" [ efer: %016llx apic base: %016llx nmi: %s ]\n", 270 (u64) sregs.efer, (u64) sregs.apic_base, 271 (self->kvm->nmi_disabled ? "disabled" : "enabled")); 272 printf("Interrupt bitmap:\n"); 273 printf(" "); 274 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) 275 printf("%016llx ", (u64) sregs.interrupt_bitmap[i]); 276 printf("\n"); 277 } 278 279 void kvm_cpu__show_code(struct kvm_cpu *self) 280 { 281 unsigned int code_bytes = 64; 282 unsigned int code_prologue = code_bytes * 43 / 64; 283 unsigned int code_len = code_bytes; 284 unsigned char c; 285 unsigned int i; 286 u8 *ip; 287 288 if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0) 289 die("KVM_GET_REGS failed"); 290 291 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 292 die("KVM_GET_SREGS failed"); 293 294 ip = guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip) - code_prologue); 295 296 printf("Code: "); 297 298 for (i = 0; i < code_len; i++, ip++) { 299 if (!host_ptr_in_ram(self->kvm, ip)) 300 break; 301 302 c = *ip; 303 304 if (ip == guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip))) 305 printf("<%02x> ", c); 306 else 307 printf("%02x ", c); 308 } 309 310 printf("\n"); 311 312 printf("Stack:\n"); 313 kvm__dump_mem(self->kvm, self->regs.rsp, 32); 314 } 315 316 void kvm_cpu__show_page_tables(struct kvm_cpu *self) 317 { 318 u64 *pte1; 319 u64 *pte2; 320 u64 *pte3; 321 u64 *pte4; 322 323 if (!is_in_protected_mode(self)) 324 return; 325 326 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 327 die("KVM_GET_SREGS failed"); 328 329 pte4 = guest_flat_to_host(self->kvm, self->sregs.cr3); 330 if (!host_ptr_in_ram(self->kvm, pte4)) 331 return; 332 333 pte3 = guest_flat_to_host(self->kvm, (*pte4 & ~0xfff)); 334 if (!host_ptr_in_ram(self->kvm, pte3)) 335 return; 336 337 pte2 = guest_flat_to_host(self->kvm, (*pte3 & ~0xfff)); 338 if (!host_ptr_in_ram(self->kvm, pte2)) 339 return; 340 341 pte1 = guest_flat_to_host(self->kvm, (*pte2 & ~0xfff)); 342 if (!host_ptr_in_ram(self->kvm, pte1)) 343 return; 344 345 printf("Page Tables:\n"); 346 if (*pte2 & (1 << 7)) 347 printf(" pte4: %016llx pte3: %016llx" 348 " pte2: %016llx\n", 349 *pte4, *pte3, *pte2); 350 else 351 printf(" pte4: %016llx pte3: %016llx pte2: %016" 352 "llx pte1: %016llx\n", 353 *pte4, *pte3, *pte2, *pte1); 354 } 355 356 void kvm_cpu__run(struct kvm_cpu *self) 357 { 358 int err; 359 360 err = ioctl(self->vcpu_fd, KVM_RUN, 0); 361 if (err && (errno != EINTR && errno != EAGAIN)) 362 die_perror("KVM_RUN failed"); 363 } 364 365 int kvm_cpu__start(struct kvm_cpu *cpu) 366 { 367 sigset_t sigset; 368 369 sigemptyset(&sigset); 370 sigaddset(&sigset, SIGALRM); 371 372 pthread_sigmask(SIG_BLOCK, &sigset, NULL); 373 374 kvm_cpu__setup_cpuid(cpu); 375 kvm_cpu__reset_vcpu(cpu); 376 377 for (;;) { 378 kvm_cpu__run(cpu); 379 380 switch (cpu->kvm_run->exit_reason) { 381 case KVM_EXIT_DEBUG: 382 kvm_cpu__show_registers(cpu); 383 kvm_cpu__show_code(cpu); 384 break; 385 case KVM_EXIT_IO: { 386 bool ret; 387 388 ret = kvm__emulate_io(cpu->kvm, 389 cpu->kvm_run->io.port, 390 (u8 *)cpu->kvm_run + 391 cpu->kvm_run->io.data_offset, 392 cpu->kvm_run->io.direction, 393 cpu->kvm_run->io.size, 394 cpu->kvm_run->io.count); 395 396 if (!ret) 397 goto panic_kvm; 398 break; 399 } 400 case KVM_EXIT_MMIO: { 401 bool ret; 402 403 ret = kvm__emulate_mmio(cpu->kvm, 404 cpu->kvm_run->mmio.phys_addr, 405 cpu->kvm_run->mmio.data, 406 cpu->kvm_run->mmio.len, 407 cpu->kvm_run->mmio.is_write); 408 409 if (!ret) 410 goto panic_kvm; 411 break; 412 } 413 case KVM_EXIT_INTR: 414 break; 415 case KVM_EXIT_SHUTDOWN: 416 goto exit_kvm; 417 default: 418 goto panic_kvm; 419 } 420 } 421 422 exit_kvm: 423 return 0; 424 425 panic_kvm: 426 return 1; 427 } 428