1 #include "kvm/kvm-cpu.h"
2
3 #include "kvm/symbol.h"
4 #include "kvm/util.h"
5 #include "kvm/kvm.h"
6
7 #include <asm/apicdef.h>
8 #include <linux/err.h>
9 #include <sys/ioctl.h>
10 #include <sys/mman.h>
11 #include <signal.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <stdio.h>
16
17 static int debug_fd;
18
kvm_cpu__set_debug_fd(int fd)19 void kvm_cpu__set_debug_fd(int fd)
20 {
21 debug_fd = fd;
22 }
23
kvm_cpu__get_debug_fd(void)24 int kvm_cpu__get_debug_fd(void)
25 {
26 return debug_fd;
27 }
28
is_in_protected_mode(struct kvm_cpu * vcpu)29 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu)
30 {
31 return vcpu->sregs.cr0 & 0x01;
32 }
33
ip_to_flat(struct kvm_cpu * vcpu,u64 ip)34 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip)
35 {
36 u64 cs;
37
38 /*
39 * NOTE! We should take code segment base address into account here.
40 * Luckily it's usually zero because Linux uses flat memory model.
41 */
42 if (is_in_protected_mode(vcpu))
43 return ip;
44
45 cs = vcpu->sregs.cs.selector;
46
47 return ip + (cs << 4);
48 }
49
selector_to_base(u16 selector)50 static inline u32 selector_to_base(u16 selector)
51 {
52 /*
53 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
54 */
55 return (u32)selector << 4;
56 }
57
kvm_cpu__new(struct kvm * kvm)58 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
59 {
60 struct kvm_cpu *vcpu;
61
62 vcpu = calloc(1, sizeof(*vcpu));
63 if (!vcpu)
64 return NULL;
65
66 vcpu->kvm = kvm;
67
68 return vcpu;
69 }
70
kvm_cpu__delete(struct kvm_cpu * vcpu)71 void kvm_cpu__delete(struct kvm_cpu *vcpu)
72 {
73 if (vcpu->msrs)
74 free(vcpu->msrs);
75
76 free(vcpu);
77 }
78
kvm_cpu__set_lint(struct kvm_cpu * vcpu)79 static int kvm_cpu__set_lint(struct kvm_cpu *vcpu)
80 {
81 struct local_apic lapic;
82
83 if (ioctl(vcpu->vcpu_fd, KVM_GET_LAPIC, &lapic))
84 return -1;
85
86 lapic.lvt_lint0.delivery_mode = APIC_MODE_EXTINT;
87 lapic.lvt_lint1.delivery_mode = APIC_MODE_NMI;
88
89 return ioctl(vcpu->vcpu_fd, KVM_SET_LAPIC, &lapic);
90 }
91
kvm_cpu__arch_init(struct kvm * kvm,unsigned long cpu_id)92 struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned long cpu_id)
93 {
94 struct kvm_cpu *vcpu;
95 int mmap_size;
96 int coalesced_offset;
97
98 vcpu = kvm_cpu__new(kvm);
99 if (!vcpu)
100 return NULL;
101
102 vcpu->cpu_id = cpu_id;
103
104 vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id);
105 if (vcpu->vcpu_fd < 0)
106 die_perror("KVM_CREATE_VCPU ioctl");
107
108 mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
109 if (mmap_size < 0)
110 die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
111
112 vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0);
113 if (vcpu->kvm_run == MAP_FAILED)
114 die("unable to mmap vcpu fd");
115
116 coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
117 if (coalesced_offset)
118 vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE);
119
120 if (kvm_cpu__set_lint(vcpu))
121 die_perror("KVM_SET_LAPIC failed");
122
123 vcpu->is_running = true;
124
125 return vcpu;
126 }
127
kvm_msrs__new(size_t nmsrs)128 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
129 {
130 struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs));
131
132 if (!vcpu)
133 die("out of memory");
134
135 return vcpu;
136 }
137
138 #define MSR_IA32_SYSENTER_CS 0x00000174
139 #define MSR_IA32_SYSENTER_ESP 0x00000175
140 #define MSR_IA32_SYSENTER_EIP 0x00000176
141
142 #define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
143 #define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
144 #define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
145 #define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
146 #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
147
148 #define MSR_IA32_TSC 0x00000010
149 #define MSR_IA32_MISC_ENABLE 0x000001a0
150
151 #define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
152 #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
153
154 #define KVM_MSR_ENTRY(_index, _data) \
155 (struct kvm_msr_entry) { .index = _index, .data = _data }
156
kvm_cpu__setup_msrs(struct kvm_cpu * vcpu)157 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu)
158 {
159 unsigned long ndx = 0;
160
161 vcpu->msrs = kvm_msrs__new(100);
162
163 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0);
164 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0);
165 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0);
166 #ifdef CONFIG_X86_64
167 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0);
168 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0);
169 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0);
170 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0);
171 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0);
172 #endif
173 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0);
174 vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE,
175 MSR_IA32_MISC_ENABLE_FAST_STRING);
176
177 vcpu->msrs->nmsrs = ndx;
178
179 if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0)
180 die_perror("KVM_SET_MSRS failed");
181 }
182
kvm_cpu__setup_fpu(struct kvm_cpu * vcpu)183 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu)
184 {
185 vcpu->fpu = (struct kvm_fpu) {
186 .fcw = 0x37f,
187 .mxcsr = 0x1f80,
188 };
189
190 if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0)
191 die_perror("KVM_SET_FPU failed");
192 }
193
kvm_cpu__setup_regs(struct kvm_cpu * vcpu)194 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
195 {
196 vcpu->regs = (struct kvm_regs) {
197 /* We start the guest in 16-bit real mode */
198 .rflags = 0x0000000000000002ULL,
199
200 .rip = vcpu->kvm->arch.boot_ip,
201 .rsp = vcpu->kvm->arch.boot_sp,
202 .rbp = vcpu->kvm->arch.boot_sp,
203 };
204
205 if (vcpu->regs.rip > USHRT_MAX)
206 die("ip 0x%llx is too high for real mode", (u64)vcpu->regs.rip);
207
208 if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
209 die_perror("KVM_SET_REGS failed");
210 }
211
kvm_cpu__setup_sregs(struct kvm_cpu * vcpu)212 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu)
213 {
214 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
215 die_perror("KVM_GET_SREGS failed");
216
217 vcpu->sregs.cs.selector = vcpu->kvm->arch.boot_selector;
218 vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->arch.boot_selector);
219 vcpu->sregs.ss.selector = vcpu->kvm->arch.boot_selector;
220 vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->arch.boot_selector);
221 vcpu->sregs.ds.selector = vcpu->kvm->arch.boot_selector;
222 vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->arch.boot_selector);
223 vcpu->sregs.es.selector = vcpu->kvm->arch.boot_selector;
224 vcpu->sregs.es.base = selector_to_base(vcpu->kvm->arch.boot_selector);
225 vcpu->sregs.fs.selector = vcpu->kvm->arch.boot_selector;
226 vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->arch.boot_selector);
227 vcpu->sregs.gs.selector = vcpu->kvm->arch.boot_selector;
228 vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->arch.boot_selector);
229
230 if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0)
231 die_perror("KVM_SET_SREGS failed");
232 }
233
234 /**
235 * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
236 */
kvm_cpu__reset_vcpu(struct kvm_cpu * vcpu)237 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
238 {
239 kvm_cpu__setup_cpuid(vcpu);
240 kvm_cpu__setup_sregs(vcpu);
241 kvm_cpu__setup_regs(vcpu);
242 kvm_cpu__setup_fpu(vcpu);
243 kvm_cpu__setup_msrs(vcpu);
244 }
245
kvm_cpu__handle_exit(struct kvm_cpu * vcpu)246 bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
247 {
248 return false;
249 }
250
print_dtable(const char * name,struct kvm_dtable * dtable)251 static void print_dtable(const char *name, struct kvm_dtable *dtable)
252 {
253 dprintf(debug_fd, " %s %016llx %08hx\n",
254 name, (u64) dtable->base, (u16) dtable->limit);
255 }
256
print_segment(const char * name,struct kvm_segment * seg)257 static void print_segment(const char *name, struct kvm_segment *seg)
258 {
259 dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n",
260 name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit,
261 (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
262 }
263
kvm_cpu__show_registers(struct kvm_cpu * vcpu)264 void kvm_cpu__show_registers(struct kvm_cpu *vcpu)
265 {
266 unsigned long cr0, cr2, cr3;
267 unsigned long cr4, cr8;
268 unsigned long rax, rbx, rcx;
269 unsigned long rdx, rsi, rdi;
270 unsigned long rbp, r8, r9;
271 unsigned long r10, r11, r12;
272 unsigned long r13, r14, r15;
273 unsigned long rip, rsp;
274 struct kvm_sregs sregs;
275 unsigned long rflags;
276 struct kvm_regs regs;
277 int i;
278
279 if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0)
280 die("KVM_GET_REGS failed");
281
282 rflags = regs.rflags;
283
284 rip = regs.rip; rsp = regs.rsp;
285 rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
286 rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
287 rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9;
288 r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
289 r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
290
291 dprintf(debug_fd, "\n Registers:\n");
292 dprintf(debug_fd, " ----------\n");
293 dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
294 dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx);
295 dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi);
296 dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9);
297 dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12);
298 dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15);
299
300 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
301 die("KVM_GET_REGS failed");
302
303 cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
304 cr4 = sregs.cr4; cr8 = sregs.cr8;
305
306 dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3);
307 dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8);
308 dprintf(debug_fd, "\n Segment registers:\n");
309 dprintf(debug_fd, " ------------------\n");
310 dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n");
311 print_segment("cs ", &sregs.cs);
312 print_segment("ss ", &sregs.ss);
313 print_segment("ds ", &sregs.ds);
314 print_segment("es ", &sregs.es);
315 print_segment("fs ", &sregs.fs);
316 print_segment("gs ", &sregs.gs);
317 print_segment("tr ", &sregs.tr);
318 print_segment("ldt", &sregs.ldt);
319 print_dtable("gdt", &sregs.gdt);
320 print_dtable("idt", &sregs.idt);
321
322 dprintf(debug_fd, "\n APIC:\n");
323 dprintf(debug_fd, " -----\n");
324 dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n",
325 (u64) sregs.efer, (u64) sregs.apic_base,
326 (vcpu->kvm->nmi_disabled ? "disabled" : "enabled"));
327
328 dprintf(debug_fd, "\n Interrupt bitmap:\n");
329 dprintf(debug_fd, " -----------------\n");
330 for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
331 dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]);
332 dprintf(debug_fd, "\n");
333 }
334
335 #define MAX_SYM_LEN 128
336
kvm_cpu__show_code(struct kvm_cpu * vcpu)337 void kvm_cpu__show_code(struct kvm_cpu *vcpu)
338 {
339 unsigned int code_bytes = 64;
340 unsigned int code_prologue = 43;
341 unsigned int code_len = code_bytes;
342 char sym[MAX_SYM_LEN] = SYMBOL_DEFAULT_UNKNOWN, *psym;
343 unsigned char c;
344 unsigned int i;
345 u8 *ip;
346
347 if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0)
348 die("KVM_GET_REGS failed");
349
350 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
351 die("KVM_GET_SREGS failed");
352
353 ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue);
354
355 dprintf(debug_fd, "\n Code:\n");
356 dprintf(debug_fd, " -----\n");
357
358 psym = symbol_lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN);
359 if (IS_ERR(psym))
360 dprintf(debug_fd,
361 "Warning: symbol_lookup() failed to find symbol "
362 "with error: %ld\n", PTR_ERR(psym));
363
364 dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym);
365
366 for (i = 0; i < code_len; i++, ip++) {
367 if (!host_ptr_in_ram(vcpu->kvm, ip))
368 break;
369
370 c = *ip;
371
372 if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip)))
373 dprintf(debug_fd, " <%02x>", c);
374 else
375 dprintf(debug_fd, " %02x", c);
376 }
377
378 dprintf(debug_fd, "\n");
379
380 dprintf(debug_fd, "\n Stack:\n");
381 dprintf(debug_fd, " ------\n");
382 dprintf(debug_fd, " rsp: [<%016lx>] \n", (unsigned long) vcpu->regs.rsp);
383 kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32, debug_fd);
384 }
385
kvm_cpu__show_page_tables(struct kvm_cpu * vcpu)386 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu)
387 {
388 u64 *pte1;
389 u64 *pte2;
390 u64 *pte3;
391 u64 *pte4;
392
393 if (!is_in_protected_mode(vcpu)) {
394 dprintf(debug_fd, "\n Page Tables:\n");
395 dprintf(debug_fd, " ------\n");
396 dprintf(debug_fd, " Not in protected mode\n");
397 return;
398 }
399
400 if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
401 die("KVM_GET_SREGS failed");
402
403 pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3);
404 if (!host_ptr_in_ram(vcpu->kvm, pte4))
405 return;
406
407 pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff));
408 if (!host_ptr_in_ram(vcpu->kvm, pte3))
409 return;
410
411 pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff));
412 if (!host_ptr_in_ram(vcpu->kvm, pte2))
413 return;
414
415 pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff));
416 if (!host_ptr_in_ram(vcpu->kvm, pte1))
417 return;
418
419 dprintf(debug_fd, "\n Page Tables:\n");
420 dprintf(debug_fd, " ------\n");
421 if (*pte2 & (1 << 7))
422 dprintf(debug_fd, " pte4: %016llx pte3: %016llx"
423 " pte2: %016llx\n",
424 *pte4, *pte3, *pte2);
425 else
426 dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016"
427 "llx pte1: %016llx\n",
428 *pte4, *pte3, *pte2, *pte1);
429 }
430
kvm_cpu__arch_nmi(struct kvm_cpu * cpu)431 void kvm_cpu__arch_nmi(struct kvm_cpu *cpu)
432 {
433 struct kvm_lapic_state klapic;
434 struct local_apic *lapic = (void *)&klapic;
435
436 if (ioctl(cpu->vcpu_fd, KVM_GET_LAPIC, &klapic) != 0)
437 return;
438
439 if (lapic->lvt_lint1.mask)
440 return;
441
442 if (lapic->lvt_lint1.delivery_mode != APIC_MODE_NMI)
443 return;
444
445 ioctl(cpu->vcpu_fd, KVM_NMI);
446 }
447