1 #include "kvm/kvm-cpu.h" 2 3 #include "kvm/util.h" 4 #include "kvm/kvm.h" 5 6 #include <sys/ioctl.h> 7 #include <sys/mman.h> 8 #include <stdlib.h> 9 #include <errno.h> 10 #include <stdio.h> 11 12 static inline bool is_in_protected_mode(struct kvm_cpu *self) 13 { 14 return self->sregs.cr0 & 0x01; 15 } 16 17 static inline uint64_t ip_to_flat(struct kvm_cpu *self, uint64_t ip) 18 { 19 uint64_t cs; 20 21 /* 22 * NOTE! We should take code segment base address into account here. 23 * Luckily it's usually zero because Linux uses flat memory model. 24 */ 25 if (is_in_protected_mode(self)) 26 return ip; 27 28 cs = self->sregs.cs.selector; 29 30 return ip + (cs << 4); 31 } 32 33 static inline uint32_t selector_to_base(uint16_t selector) 34 { 35 /* 36 * KVM on Intel requires 'base' to be 'selector * 16' in real mode. 37 */ 38 return (uint32_t)selector * 16; 39 } 40 41 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) 42 { 43 struct kvm_cpu *self; 44 45 self = calloc(1, sizeof *self); 46 if (!self) 47 return NULL; 48 49 self->kvm = kvm; 50 51 return self; 52 } 53 54 void kvm_cpu__delete(struct kvm_cpu *self) 55 { 56 if (self->msrs) 57 free(self->msrs); 58 59 free(self); 60 } 61 62 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm) 63 { 64 struct kvm_cpu *self; 65 int mmap_size; 66 67 self = kvm_cpu__new(kvm); 68 if (!self) 69 return NULL; 70 71 self->vcpu_fd = ioctl(self->kvm->vm_fd, KVM_CREATE_VCPU, 0); 72 if (self->vcpu_fd < 0) 73 die_perror("KVM_CREATE_VCPU ioctl"); 74 75 mmap_size = ioctl(self->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); 76 if (mmap_size < 0) 77 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); 78 79 self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); 80 if (self->kvm_run == MAP_FAILED) 81 die("unable to mmap vcpu fd"); 82 83 return self; 84 } 85 86 void kvm_cpu__enable_singlestep(struct kvm_cpu *self) 87 { 88 struct kvm_guest_debug debug = { 89 .control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, 90 }; 91 92 if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0) 93 warning("KVM_SET_GUEST_DEBUG failed"); 94 } 95 96 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) 97 { 98 struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs)); 99 100 if (!self) 101 die("out of memory"); 102 103 return self; 104 } 105 106 #define MSR_IA32_TIME_STAMP_COUNTER 0x10 107 108 #define MSR_IA32_SYSENTER_CS 0x174 109 #define MSR_IA32_SYSENTER_ESP 0x175 110 #define MSR_IA32_SYSENTER_EIP 0x176 111 112 #define MSR_IA32_STAR 0xc0000081 113 #define MSR_IA32_LSTAR 0xc0000082 114 #define MSR_IA32_CSTAR 0xc0000083 115 #define MSR_IA32_FMASK 0xc0000084 116 #define MSR_IA32_KERNEL_GS_BASE 0xc0000102 117 118 #define KVM_MSR_ENTRY(_index, _data) \ 119 (struct kvm_msr_entry) { .index = _index, .data = _data } 120 121 static void kvm_cpu__setup_msrs(struct kvm_cpu *self) 122 { 123 unsigned long ndx = 0; 124 125 self->msrs = kvm_msrs__new(100); 126 127 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); 128 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); 129 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); 130 #ifdef CONFIG_X86_64 131 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR, 0x0); 132 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR, 0x0); 133 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE, 0x0); 134 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK, 0x0); 135 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR, 0x0); 136 #endif 137 self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER, 0x0); 138 139 self->msrs->nmsrs = ndx; 140 141 if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0) 142 die_perror("KVM_SET_MSRS failed"); 143 } 144 145 static void kvm_cpu__setup_fpu(struct kvm_cpu *self) 146 { 147 self->fpu = (struct kvm_fpu) { 148 .fcw = 0x37f, 149 .mxcsr = 0x1f80, 150 }; 151 152 if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0) 153 die_perror("KVM_SET_FPU failed"); 154 } 155 156 static void kvm_cpu__setup_regs(struct kvm_cpu *self) 157 { 158 self->regs = (struct kvm_regs) { 159 /* We start the guest in 16-bit real mode */ 160 .rflags = 0x0000000000000002ULL, 161 162 .rip = self->kvm->boot_ip, 163 .rsp = self->kvm->boot_sp, 164 .rbp = self->kvm->boot_sp, 165 }; 166 167 if (self->regs.rip > USHRT_MAX) 168 die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip); 169 170 if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0) 171 die_perror("KVM_SET_REGS failed"); 172 } 173 174 static void kvm_cpu__setup_sregs(struct kvm_cpu *self) 175 { 176 177 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 178 die_perror("KVM_GET_SREGS failed"); 179 180 self->sregs.cs.selector = self->kvm->boot_selector; 181 self->sregs.cs.base = selector_to_base(self->kvm->boot_selector); 182 self->sregs.ss.selector = self->kvm->boot_selector; 183 self->sregs.ss.base = selector_to_base(self->kvm->boot_selector); 184 self->sregs.ds.selector = self->kvm->boot_selector; 185 self->sregs.ds.base = selector_to_base(self->kvm->boot_selector); 186 self->sregs.es.selector = self->kvm->boot_selector; 187 self->sregs.es.base = selector_to_base(self->kvm->boot_selector); 188 self->sregs.fs.selector = self->kvm->boot_selector; 189 self->sregs.fs.base = selector_to_base(self->kvm->boot_selector); 190 self->sregs.gs.selector = self->kvm->boot_selector; 191 self->sregs.gs.base = selector_to_base(self->kvm->boot_selector); 192 193 if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0) 194 die_perror("KVM_SET_SREGS failed"); 195 } 196 197 /** 198 * kvm_cpu__reset_vcpu - reset virtual CPU to a known state 199 */ 200 void kvm_cpu__reset_vcpu(struct kvm_cpu *self) 201 { 202 kvm_cpu__setup_sregs(self); 203 kvm_cpu__setup_regs(self); 204 kvm_cpu__setup_fpu(self); 205 kvm_cpu__setup_msrs(self); 206 } 207 208 static void print_dtable(const char *name, struct kvm_dtable *dtable) 209 { 210 printf(" %s %016" PRIx64 " %08" PRIx16 "\n", 211 name, (uint64_t) dtable->base, (uint16_t) dtable->limit); 212 } 213 214 static void print_segment(const char *name, struct kvm_segment *seg) 215 { 216 printf(" %s %04" PRIx16 " %016" PRIx64 " %08" PRIx32 " %02" PRIx8 " %x %x %x %x %x %x %x\n", 217 name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit, 218 (uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); 219 } 220 221 void kvm_cpu__show_registers(struct kvm_cpu *self) 222 { 223 unsigned long cr0, cr2, cr3; 224 unsigned long cr4, cr8; 225 unsigned long rax, rbx, rcx; 226 unsigned long rdx, rsi, rdi; 227 unsigned long rbp, r8, r9; 228 unsigned long r10, r11, r12; 229 unsigned long r13, r14, r15; 230 unsigned long rip, rsp; 231 struct kvm_sregs sregs; 232 unsigned long rflags; 233 struct kvm_regs regs; 234 int i; 235 236 if (ioctl(self->vcpu_fd, KVM_GET_REGS, ®s) < 0) 237 die("KVM_GET_REGS failed"); 238 239 rflags = regs.rflags; 240 241 rip = regs.rip; rsp = regs.rsp; 242 rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; 243 rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; 244 rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; 245 r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; 246 r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; 247 248 printf("Registers:\n"); 249 printf(" rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); 250 printf(" rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); 251 printf(" rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); 252 printf(" rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); 253 printf(" r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); 254 printf(" r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); 255 256 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) 257 die("KVM_GET_REGS failed"); 258 259 cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; 260 cr4 = sregs.cr4; cr8 = sregs.cr8; 261 262 printf(" cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); 263 printf(" cr4: %016lx cr8: %016lx\n", cr4, cr8); 264 printf("Segment registers:\n"); 265 printf(" register selector base limit type p dpl db s l g avl\n"); 266 print_segment("cs ", &sregs.cs); 267 print_segment("ss ", &sregs.ss); 268 print_segment("ds ", &sregs.ds); 269 print_segment("es ", &sregs.es); 270 print_segment("fs ", &sregs.fs); 271 print_segment("gs ", &sregs.gs); 272 print_segment("tr ", &sregs.tr); 273 print_segment("ldt", &sregs.ldt); 274 print_dtable("gdt", &sregs.gdt); 275 print_dtable("idt", &sregs.idt); 276 printf(" [ efer: %016" PRIx64 " apic base: %016" PRIx64 " nmi: %s ]\n", 277 (uint64_t) sregs.efer, (uint64_t) sregs.apic_base, 278 (self->kvm->nmi_disabled ? "disabled" : "enabled")); 279 printf("Interrupt bitmap:\n"); 280 printf(" "); 281 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) 282 printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]); 283 printf("\n"); 284 } 285 286 void kvm_cpu__show_code(struct kvm_cpu *self) 287 { 288 unsigned int code_bytes = 64; 289 unsigned int code_prologue = code_bytes * 43 / 64; 290 unsigned int code_len = code_bytes; 291 unsigned char c; 292 unsigned int i; 293 uint8_t *ip; 294 295 if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0) 296 die("KVM_GET_REGS failed"); 297 298 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 299 die("KVM_GET_SREGS failed"); 300 301 ip = guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip) - code_prologue); 302 303 printf("Code: "); 304 305 for (i = 0; i < code_len; i++, ip++) { 306 if (!host_ptr_in_ram(self->kvm, ip)) 307 break; 308 309 c = *ip; 310 311 if (ip == guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip))) 312 printf("<%02x> ", c); 313 else 314 printf("%02x ", c); 315 } 316 317 printf("\n"); 318 319 printf("Stack:\n"); 320 kvm__dump_mem(self->kvm, self->regs.rsp, 32); 321 } 322 323 void kvm_cpu__show_page_tables(struct kvm_cpu *self) 324 { 325 uint64_t *pte1; 326 uint64_t *pte2; 327 uint64_t *pte3; 328 uint64_t *pte4; 329 330 if (!is_in_protected_mode(self)) 331 return; 332 333 if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0) 334 die("KVM_GET_SREGS failed"); 335 336 pte4 = guest_flat_to_host(self->kvm, self->sregs.cr3); 337 if (!host_ptr_in_ram(self->kvm, pte4)) 338 return; 339 340 pte3 = guest_flat_to_host(self->kvm, (*pte4 & ~0xfff)); 341 if (!host_ptr_in_ram(self->kvm, pte3)) 342 return; 343 344 pte2 = guest_flat_to_host(self->kvm, (*pte3 & ~0xfff)); 345 if (!host_ptr_in_ram(self->kvm, pte2)) 346 return; 347 348 pte1 = guest_flat_to_host(self->kvm, (*pte2 & ~0xfff)); 349 if (!host_ptr_in_ram(self->kvm, pte1)) 350 return; 351 352 printf("Page Tables:\n"); 353 if (*pte2 & (1 << 7)) 354 printf(" pte4: %016" PRIx64 " pte3: %016" PRIx64 355 " pte2: %016" PRIx64 "\n", 356 *pte4, *pte3, *pte2); 357 else 358 printf(" pte4: %016" PRIx64 " pte3: %016" PRIx64 " pte2: %016" 359 PRIx64 " pte1: %016" PRIx64 "\n", 360 *pte4, *pte3, *pte2, *pte1); 361 } 362 363 void kvm_cpu__run(struct kvm_cpu *self) 364 { 365 int err; 366 367 err = ioctl(self->vcpu_fd, KVM_RUN, 0); 368 if (err && (errno != EINTR && errno != EAGAIN)) 369 die_perror("KVM_RUN failed"); 370 } 371