xref: /kvmtool/x86/kvm-cpu.c (revision 1cc05b24bfe0211bb408f3264af8e0c42dcdde9c)
1 #include "kvm/kvm-cpu.h"
2 
3 #include "kvm/symbol.h"
4 #include "kvm/util.h"
5 #include "kvm/kvm.h"
6 
7 #include <asm/apicdef.h>
8 #include <linux/err.h>
9 #include <sys/ioctl.h>
10 #include <sys/mman.h>
11 #include <signal.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <stdio.h>
16 
17 static int debug_fd;
18 
kvm_cpu__set_debug_fd(int fd)19 void kvm_cpu__set_debug_fd(int fd)
20 {
21 	debug_fd = fd;
22 }
23 
kvm_cpu__get_debug_fd(void)24 int kvm_cpu__get_debug_fd(void)
25 {
26 	return debug_fd;
27 }
28 
is_in_protected_mode(struct kvm_cpu * vcpu)29 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu)
30 {
31 	return vcpu->sregs.cr0 & 0x01;
32 }
33 
ip_to_flat(struct kvm_cpu * vcpu,u64 ip)34 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip)
35 {
36 	u64 cs;
37 
38 	/*
39 	 * NOTE! We should take code segment base address into account here.
40 	 * Luckily it's usually zero because Linux uses flat memory model.
41 	 */
42 	if (is_in_protected_mode(vcpu))
43 		return ip;
44 
45 	cs = vcpu->sregs.cs.selector;
46 
47 	return ip + (cs << 4);
48 }
49 
selector_to_base(u16 selector)50 static inline u32 selector_to_base(u16 selector)
51 {
52 	/*
53 	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
54 	 */
55 	return (u32)selector << 4;
56 }
57 
kvm_cpu__new(struct kvm * kvm)58 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
59 {
60 	struct kvm_cpu *vcpu;
61 
62 	vcpu = calloc(1, sizeof(*vcpu));
63 	if (!vcpu)
64 		return NULL;
65 
66 	vcpu->kvm = kvm;
67 
68 	return vcpu;
69 }
70 
kvm_cpu__delete(struct kvm_cpu * vcpu)71 void kvm_cpu__delete(struct kvm_cpu *vcpu)
72 {
73 	if (vcpu->msrs)
74 		free(vcpu->msrs);
75 
76 	free(vcpu);
77 }
78 
kvm_cpu__set_lint(struct kvm_cpu * vcpu)79 static int kvm_cpu__set_lint(struct kvm_cpu *vcpu)
80 {
81 	struct local_apic lapic;
82 
83 	if (ioctl(vcpu->vcpu_fd, KVM_GET_LAPIC, &lapic))
84 		return -1;
85 
86 	lapic.lvt_lint0.delivery_mode = APIC_MODE_EXTINT;
87 	lapic.lvt_lint1.delivery_mode = APIC_MODE_NMI;
88 
89 	return ioctl(vcpu->vcpu_fd, KVM_SET_LAPIC, &lapic);
90 }
91 
kvm_cpu__arch_init(struct kvm * kvm,unsigned long cpu_id)92 struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned long cpu_id)
93 {
94 	struct kvm_cpu *vcpu;
95 	int mmap_size;
96 	int coalesced_offset;
97 
98 	vcpu = kvm_cpu__new(kvm);
99 	if (!vcpu)
100 		return NULL;
101 
102 	vcpu->cpu_id = cpu_id;
103 
104 	vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id);
105 	if (vcpu->vcpu_fd < 0)
106 		die_perror("KVM_CREATE_VCPU ioctl");
107 
108 	mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
109 	if (mmap_size < 0)
110 		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
111 
112 	vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0);
113 	if (vcpu->kvm_run == MAP_FAILED)
114 		die("unable to mmap vcpu fd");
115 
116 	coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
117 	if (coalesced_offset)
118 		vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE);
119 
120 	if (kvm_cpu__set_lint(vcpu))
121 		die_perror("KVM_SET_LAPIC failed");
122 
123 	vcpu->is_running = true;
124 
125 	return vcpu;
126 }
127 
kvm_msrs__new(size_t nmsrs)128 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
129 {
130 	struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs));
131 
132 	if (!vcpu)
133 		die("out of memory");
134 
135 	return vcpu;
136 }
137 
138 #define MSR_IA32_SYSENTER_CS            0x00000174
139 #define MSR_IA32_SYSENTER_ESP           0x00000175
140 #define MSR_IA32_SYSENTER_EIP           0x00000176
141 
142 #define MSR_STAR                0xc0000081 /* legacy mode SYSCALL target */
143 #define MSR_LSTAR               0xc0000082 /* long mode SYSCALL target */
144 #define MSR_CSTAR               0xc0000083 /* compat mode SYSCALL target */
145 #define MSR_SYSCALL_MASK        0xc0000084 /* EFLAGS mask for syscall */
146 #define MSR_KERNEL_GS_BASE      0xc0000102 /* SwapGS GS shadow */
147 
148 #define MSR_IA32_TSC                    0x00000010
149 #define MSR_IA32_MISC_ENABLE            0x000001a0
150 
151 #define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT            0
152 #define MSR_IA32_MISC_ENABLE_FAST_STRING                (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
153 
154 #define KVM_MSR_ENTRY(_index, _data)	\
155 	(struct kvm_msr_entry) { .index = _index, .data = _data }
156 
kvm_cpu__setup_msrs(struct kvm_cpu * vcpu)157 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu)
158 {
159 	unsigned long ndx = 0;
160 
161 	vcpu->msrs = kvm_msrs__new(100);
162 
163 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,	0x0);
164 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,	0x0);
165 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,	0x0);
166 #ifdef CONFIG_X86_64
167 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR,			0x0);
168 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR,			0x0);
169 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE,		0x0);
170 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK,		0x0);
171 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR,			0x0);
172 #endif
173 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC,		0x0);
174 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE,
175 						MSR_IA32_MISC_ENABLE_FAST_STRING);
176 
177 	vcpu->msrs->nmsrs = ndx;
178 
179 	if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0)
180 		die_perror("KVM_SET_MSRS failed");
181 }
182 
kvm_cpu__setup_fpu(struct kvm_cpu * vcpu)183 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu)
184 {
185 	vcpu->fpu = (struct kvm_fpu) {
186 		.fcw	= 0x37f,
187 		.mxcsr	= 0x1f80,
188 	};
189 
190 	if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0)
191 		die_perror("KVM_SET_FPU failed");
192 }
193 
kvm_cpu__setup_regs(struct kvm_cpu * vcpu)194 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
195 {
196 	vcpu->regs = (struct kvm_regs) {
197 		/* We start the guest in 16-bit real mode  */
198 		.rflags	= 0x0000000000000002ULL,
199 
200 		.rip	= vcpu->kvm->arch.boot_ip,
201 		.rsp	= vcpu->kvm->arch.boot_sp,
202 		.rbp	= vcpu->kvm->arch.boot_sp,
203 	};
204 
205 	if (vcpu->regs.rip > USHRT_MAX)
206 		die("ip 0x%llx is too high for real mode", (u64)vcpu->regs.rip);
207 
208 	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
209 		die_perror("KVM_SET_REGS failed");
210 }
211 
kvm_cpu__setup_sregs(struct kvm_cpu * vcpu)212 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu)
213 {
214 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
215 		die_perror("KVM_GET_SREGS failed");
216 
217 	vcpu->sregs.cs.selector	= vcpu->kvm->arch.boot_selector;
218 	vcpu->sregs.cs.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
219 	vcpu->sregs.ss.selector	= vcpu->kvm->arch.boot_selector;
220 	vcpu->sregs.ss.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
221 	vcpu->sregs.ds.selector	= vcpu->kvm->arch.boot_selector;
222 	vcpu->sregs.ds.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
223 	vcpu->sregs.es.selector	= vcpu->kvm->arch.boot_selector;
224 	vcpu->sregs.es.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
225 	vcpu->sregs.fs.selector	= vcpu->kvm->arch.boot_selector;
226 	vcpu->sregs.fs.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
227 	vcpu->sregs.gs.selector	= vcpu->kvm->arch.boot_selector;
228 	vcpu->sregs.gs.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
229 
230 	if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0)
231 		die_perror("KVM_SET_SREGS failed");
232 }
233 
234 /**
235  * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
236  */
kvm_cpu__reset_vcpu(struct kvm_cpu * vcpu)237 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
238 {
239 	kvm_cpu__setup_cpuid(vcpu);
240 	kvm_cpu__setup_sregs(vcpu);
241 	kvm_cpu__setup_regs(vcpu);
242 	kvm_cpu__setup_fpu(vcpu);
243 	kvm_cpu__setup_msrs(vcpu);
244 }
245 
kvm_cpu__handle_exit(struct kvm_cpu * vcpu)246 bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
247 {
248 	return false;
249 }
250 
print_dtable(const char * name,struct kvm_dtable * dtable)251 static void print_dtable(const char *name, struct kvm_dtable *dtable)
252 {
253 	dprintf(debug_fd, " %s                 %016llx  %08hx\n",
254 		name, (u64) dtable->base, (u16) dtable->limit);
255 }
256 
print_segment(const char * name,struct kvm_segment * seg)257 static void print_segment(const char *name, struct kvm_segment *seg)
258 {
259 	dprintf(debug_fd, " %s       %04hx      %016llx  %08x  %02hhx    %x %x   %x  %x %x %x %x\n",
260 		name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit,
261 		(u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
262 }
263 
kvm_cpu__show_registers(struct kvm_cpu * vcpu)264 void kvm_cpu__show_registers(struct kvm_cpu *vcpu)
265 {
266 	unsigned long cr0, cr2, cr3;
267 	unsigned long cr4, cr8;
268 	unsigned long rax, rbx, rcx;
269 	unsigned long rdx, rsi, rdi;
270 	unsigned long rbp,  r8,  r9;
271 	unsigned long r10, r11, r12;
272 	unsigned long r13, r14, r15;
273 	unsigned long rip, rsp;
274 	struct kvm_sregs sregs;
275 	unsigned long rflags;
276 	struct kvm_regs regs;
277 	int i;
278 
279 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &regs) < 0)
280 		die("KVM_GET_REGS failed");
281 
282 	rflags = regs.rflags;
283 
284 	rip = regs.rip; rsp = regs.rsp;
285 	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
286 	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
287 	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
288 	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
289 	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
290 
291 	dprintf(debug_fd, "\n Registers:\n");
292 	dprintf(debug_fd,   " ----------\n");
293 	dprintf(debug_fd, " rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
294 	dprintf(debug_fd, " rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
295 	dprintf(debug_fd, " rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
296 	dprintf(debug_fd, " rbp: %016lx    r8: %016lx    r9: %016lx\n", rbp, r8,  r9);
297 	dprintf(debug_fd, " r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
298 	dprintf(debug_fd, " r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
299 
300 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
301 		die("KVM_GET_REGS failed");
302 
303 	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
304 	cr4 = sregs.cr4; cr8 = sregs.cr8;
305 
306 	dprintf(debug_fd, " cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
307 	dprintf(debug_fd, " cr4: %016lx   cr8: %016lx\n", cr4, cr8);
308 	dprintf(debug_fd, "\n Segment registers:\n");
309 	dprintf(debug_fd,   " ------------------\n");
310 	dprintf(debug_fd, " register  selector  base              limit     type  p dpl db s l g avl\n");
311 	print_segment("cs ", &sregs.cs);
312 	print_segment("ss ", &sregs.ss);
313 	print_segment("ds ", &sregs.ds);
314 	print_segment("es ", &sregs.es);
315 	print_segment("fs ", &sregs.fs);
316 	print_segment("gs ", &sregs.gs);
317 	print_segment("tr ", &sregs.tr);
318 	print_segment("ldt", &sregs.ldt);
319 	print_dtable("gdt", &sregs.gdt);
320 	print_dtable("idt", &sregs.idt);
321 
322 	dprintf(debug_fd, "\n APIC:\n");
323 	dprintf(debug_fd,   " -----\n");
324 	dprintf(debug_fd, " efer: %016llx  apic base: %016llx  nmi: %s\n",
325 		(u64) sregs.efer, (u64) sregs.apic_base,
326 		(vcpu->kvm->nmi_disabled ? "disabled" : "enabled"));
327 
328 	dprintf(debug_fd, "\n Interrupt bitmap:\n");
329 	dprintf(debug_fd,   " -----------------\n");
330 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
331 		dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]);
332 	dprintf(debug_fd, "\n");
333 }
334 
335 #define MAX_SYM_LEN 128
336 
kvm_cpu__show_code(struct kvm_cpu * vcpu)337 void kvm_cpu__show_code(struct kvm_cpu *vcpu)
338 {
339 	unsigned int code_bytes = 64;
340 	unsigned int code_prologue = 43;
341 	unsigned int code_len = code_bytes;
342 	char sym[MAX_SYM_LEN] = SYMBOL_DEFAULT_UNKNOWN, *psym;
343 	unsigned char c;
344 	unsigned int i;
345 	u8 *ip;
346 
347 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0)
348 		die("KVM_GET_REGS failed");
349 
350 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
351 		die("KVM_GET_SREGS failed");
352 
353 	ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue);
354 
355 	dprintf(debug_fd, "\n Code:\n");
356 	dprintf(debug_fd,   " -----\n");
357 
358 	psym = symbol_lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN);
359 	if (IS_ERR(psym))
360 		dprintf(debug_fd,
361 			"Warning: symbol_lookup() failed to find symbol "
362 			"with error: %ld\n", PTR_ERR(psym));
363 
364 	dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym);
365 
366 	for (i = 0; i < code_len; i++, ip++) {
367 		if (!host_ptr_in_ram(vcpu->kvm, ip))
368 			break;
369 
370 		c = *ip;
371 
372 		if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip)))
373 			dprintf(debug_fd, " <%02x>", c);
374 		else
375 			dprintf(debug_fd, " %02x", c);
376 	}
377 
378 	dprintf(debug_fd, "\n");
379 
380 	dprintf(debug_fd, "\n Stack:\n");
381 	dprintf(debug_fd,   " ------\n");
382 	dprintf(debug_fd, " rsp: [<%016lx>] \n", (unsigned long) vcpu->regs.rsp);
383 	kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32, debug_fd);
384 }
385 
kvm_cpu__show_page_tables(struct kvm_cpu * vcpu)386 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu)
387 {
388 	u64 *pte1;
389 	u64 *pte2;
390 	u64 *pte3;
391 	u64 *pte4;
392 
393 	if (!is_in_protected_mode(vcpu)) {
394 		dprintf(debug_fd, "\n Page Tables:\n");
395 		dprintf(debug_fd, " ------\n");
396 		dprintf(debug_fd, " Not in protected mode\n");
397 		return;
398 	}
399 
400 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
401 		die("KVM_GET_SREGS failed");
402 
403 	pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3);
404 	if (!host_ptr_in_ram(vcpu->kvm, pte4))
405 		return;
406 
407 	pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff));
408 	if (!host_ptr_in_ram(vcpu->kvm, pte3))
409 		return;
410 
411 	pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff));
412 	if (!host_ptr_in_ram(vcpu->kvm, pte2))
413 		return;
414 
415 	pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff));
416 	if (!host_ptr_in_ram(vcpu->kvm, pte1))
417 		return;
418 
419 	dprintf(debug_fd, "\n Page Tables:\n");
420 	dprintf(debug_fd, " ------\n");
421 	if (*pte2 & (1 << 7))
422 		dprintf(debug_fd, " pte4: %016llx   pte3: %016llx"
423 			"   pte2: %016llx\n",
424 			*pte4, *pte3, *pte2);
425 	else
426 		dprintf(debug_fd, " pte4: %016llx  pte3: %016llx   pte2: %016"
427 			"llx   pte1: %016llx\n",
428 			*pte4, *pte3, *pte2, *pte1);
429 }
430 
kvm_cpu__arch_nmi(struct kvm_cpu * cpu)431 void kvm_cpu__arch_nmi(struct kvm_cpu *cpu)
432 {
433 	struct kvm_lapic_state klapic;
434 	struct local_apic *lapic = (void *)&klapic;
435 
436 	if (ioctl(cpu->vcpu_fd, KVM_GET_LAPIC, &klapic) != 0)
437 		return;
438 
439 	if (lapic->lvt_lint1.mask)
440 		return;
441 
442 	if (lapic->lvt_lint1.delivery_mode != APIC_MODE_NMI)
443 		return;
444 
445 	ioctl(cpu->vcpu_fd, KVM_NMI);
446 }
447