xref: /kvmtool/x86/kvm-cpu.c (revision b2cf1e9f5f05cf75fb50b584fe37ab3b9f0f7cc0)
1 #include "kvm/kvm-cpu.h"
2 
3 #include "kvm/symbol.h"
4 #include "kvm/util.h"
5 #include "kvm/kvm.h"
6 
7 #include <asm/msr-index.h>
8 #include <asm/apicdef.h>
9 #include <linux/err.h>
10 #include <sys/ioctl.h>
11 #include <sys/mman.h>
12 #include <signal.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <errno.h>
16 #include <stdio.h>
17 
18 static int debug_fd;
19 
20 void kvm_cpu__set_debug_fd(int fd)
21 {
22 	debug_fd = fd;
23 }
24 
25 int kvm_cpu__get_debug_fd(void)
26 {
27 	return debug_fd;
28 }
29 
30 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu)
31 {
32 	return vcpu->sregs.cr0 & 0x01;
33 }
34 
35 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip)
36 {
37 	u64 cs;
38 
39 	/*
40 	 * NOTE! We should take code segment base address into account here.
41 	 * Luckily it's usually zero because Linux uses flat memory model.
42 	 */
43 	if (is_in_protected_mode(vcpu))
44 		return ip;
45 
46 	cs = vcpu->sregs.cs.selector;
47 
48 	return ip + (cs << 4);
49 }
50 
51 static inline u32 selector_to_base(u16 selector)
52 {
53 	/*
54 	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
55 	 */
56 	return (u32)selector << 4;
57 }
58 
59 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
60 {
61 	struct kvm_cpu *vcpu;
62 
63 	vcpu = calloc(1, sizeof(*vcpu));
64 	if (!vcpu)
65 		return NULL;
66 
67 	vcpu->kvm = kvm;
68 
69 	return vcpu;
70 }
71 
72 void kvm_cpu__delete(struct kvm_cpu *vcpu)
73 {
74 	if (vcpu->msrs)
75 		free(vcpu->msrs);
76 
77 	free(vcpu);
78 }
79 
80 static int kvm_cpu__set_lint(struct kvm_cpu *vcpu)
81 {
82 	struct local_apic lapic;
83 
84 	if (ioctl(vcpu->vcpu_fd, KVM_GET_LAPIC, &lapic))
85 		return -1;
86 
87 	lapic.lvt_lint0.delivery_mode = APIC_MODE_EXTINT;
88 	lapic.lvt_lint1.delivery_mode = APIC_MODE_NMI;
89 
90 	return ioctl(vcpu->vcpu_fd, KVM_SET_LAPIC, &lapic);
91 }
92 
93 struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned long cpu_id)
94 {
95 	struct kvm_cpu *vcpu;
96 	int mmap_size;
97 	int coalesced_offset;
98 
99 	vcpu = kvm_cpu__new(kvm);
100 	if (!vcpu)
101 		return NULL;
102 
103 	vcpu->cpu_id = cpu_id;
104 
105 	vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id);
106 	if (vcpu->vcpu_fd < 0)
107 		die_perror("KVM_CREATE_VCPU ioctl");
108 
109 	mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
110 	if (mmap_size < 0)
111 		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
112 
113 	vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0);
114 	if (vcpu->kvm_run == MAP_FAILED)
115 		die("unable to mmap vcpu fd");
116 
117 	coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
118 	if (coalesced_offset)
119 		vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE);
120 
121 	if (kvm_cpu__set_lint(vcpu))
122 		die_perror("KVM_SET_LAPIC failed");
123 
124 	vcpu->is_running = true;
125 
126 	return vcpu;
127 }
128 
129 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
130 {
131 	struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs));
132 
133 	if (!vcpu)
134 		die("out of memory");
135 
136 	return vcpu;
137 }
138 
139 #define KVM_MSR_ENTRY(_index, _data)	\
140 	(struct kvm_msr_entry) { .index = _index, .data = _data }
141 
142 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu)
143 {
144 	unsigned long ndx = 0;
145 
146 	vcpu->msrs = kvm_msrs__new(100);
147 
148 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,	0x0);
149 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,	0x0);
150 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,	0x0);
151 #ifdef CONFIG_X86_64
152 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR,			0x0);
153 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR,			0x0);
154 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE,		0x0);
155 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK,		0x0);
156 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR,			0x0);
157 #endif
158 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC,		0x0);
159 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE,
160 						MSR_IA32_MISC_ENABLE_FAST_STRING);
161 
162 	vcpu->msrs->nmsrs = ndx;
163 
164 	if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0)
165 		die_perror("KVM_SET_MSRS failed");
166 }
167 
168 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu)
169 {
170 	vcpu->fpu = (struct kvm_fpu) {
171 		.fcw	= 0x37f,
172 		.mxcsr	= 0x1f80,
173 	};
174 
175 	if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0)
176 		die_perror("KVM_SET_FPU failed");
177 }
178 
179 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
180 {
181 	vcpu->regs = (struct kvm_regs) {
182 		/* We start the guest in 16-bit real mode  */
183 		.rflags	= 0x0000000000000002ULL,
184 
185 		.rip	= vcpu->kvm->arch.boot_ip,
186 		.rsp	= vcpu->kvm->arch.boot_sp,
187 		.rbp	= vcpu->kvm->arch.boot_sp,
188 	};
189 
190 	if (vcpu->regs.rip > USHRT_MAX)
191 		die("ip 0x%llx is too high for real mode", (u64)vcpu->regs.rip);
192 
193 	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
194 		die_perror("KVM_SET_REGS failed");
195 }
196 
197 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu)
198 {
199 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
200 		die_perror("KVM_GET_SREGS failed");
201 
202 	vcpu->sregs.cs.selector	= vcpu->kvm->arch.boot_selector;
203 	vcpu->sregs.cs.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
204 	vcpu->sregs.ss.selector	= vcpu->kvm->arch.boot_selector;
205 	vcpu->sregs.ss.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
206 	vcpu->sregs.ds.selector	= vcpu->kvm->arch.boot_selector;
207 	vcpu->sregs.ds.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
208 	vcpu->sregs.es.selector	= vcpu->kvm->arch.boot_selector;
209 	vcpu->sregs.es.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
210 	vcpu->sregs.fs.selector	= vcpu->kvm->arch.boot_selector;
211 	vcpu->sregs.fs.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
212 	vcpu->sregs.gs.selector	= vcpu->kvm->arch.boot_selector;
213 	vcpu->sregs.gs.base	= selector_to_base(vcpu->kvm->arch.boot_selector);
214 
215 	if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0)
216 		die_perror("KVM_SET_SREGS failed");
217 }
218 
219 /**
220  * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
221  */
222 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
223 {
224 	kvm_cpu__setup_cpuid(vcpu);
225 	kvm_cpu__setup_sregs(vcpu);
226 	kvm_cpu__setup_regs(vcpu);
227 	kvm_cpu__setup_fpu(vcpu);
228 	kvm_cpu__setup_msrs(vcpu);
229 }
230 
231 bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
232 {
233 	return false;
234 }
235 
236 static void print_dtable(const char *name, struct kvm_dtable *dtable)
237 {
238 	dprintf(debug_fd, " %s                 %016llx  %08hx\n",
239 		name, (u64) dtable->base, (u16) dtable->limit);
240 }
241 
242 static void print_segment(const char *name, struct kvm_segment *seg)
243 {
244 	dprintf(debug_fd, " %s       %04hx      %016llx  %08x  %02hhx    %x %x   %x  %x %x %x %x\n",
245 		name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit,
246 		(u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
247 }
248 
249 void kvm_cpu__show_registers(struct kvm_cpu *vcpu)
250 {
251 	unsigned long cr0, cr2, cr3;
252 	unsigned long cr4, cr8;
253 	unsigned long rax, rbx, rcx;
254 	unsigned long rdx, rsi, rdi;
255 	unsigned long rbp,  r8,  r9;
256 	unsigned long r10, r11, r12;
257 	unsigned long r13, r14, r15;
258 	unsigned long rip, rsp;
259 	struct kvm_sregs sregs;
260 	unsigned long rflags;
261 	struct kvm_regs regs;
262 	int i;
263 
264 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &regs) < 0)
265 		die("KVM_GET_REGS failed");
266 
267 	rflags = regs.rflags;
268 
269 	rip = regs.rip; rsp = regs.rsp;
270 	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
271 	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
272 	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
273 	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
274 	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
275 
276 	dprintf(debug_fd, "\n Registers:\n");
277 	dprintf(debug_fd,   " ----------\n");
278 	dprintf(debug_fd, " rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
279 	dprintf(debug_fd, " rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
280 	dprintf(debug_fd, " rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
281 	dprintf(debug_fd, " rbp: %016lx    r8: %016lx    r9: %016lx\n", rbp, r8,  r9);
282 	dprintf(debug_fd, " r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
283 	dprintf(debug_fd, " r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
284 
285 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
286 		die("KVM_GET_REGS failed");
287 
288 	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
289 	cr4 = sregs.cr4; cr8 = sregs.cr8;
290 
291 	dprintf(debug_fd, " cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
292 	dprintf(debug_fd, " cr4: %016lx   cr8: %016lx\n", cr4, cr8);
293 	dprintf(debug_fd, "\n Segment registers:\n");
294 	dprintf(debug_fd,   " ------------------\n");
295 	dprintf(debug_fd, " register  selector  base              limit     type  p dpl db s l g avl\n");
296 	print_segment("cs ", &sregs.cs);
297 	print_segment("ss ", &sregs.ss);
298 	print_segment("ds ", &sregs.ds);
299 	print_segment("es ", &sregs.es);
300 	print_segment("fs ", &sregs.fs);
301 	print_segment("gs ", &sregs.gs);
302 	print_segment("tr ", &sregs.tr);
303 	print_segment("ldt", &sregs.ldt);
304 	print_dtable("gdt", &sregs.gdt);
305 	print_dtable("idt", &sregs.idt);
306 
307 	dprintf(debug_fd, "\n APIC:\n");
308 	dprintf(debug_fd,   " -----\n");
309 	dprintf(debug_fd, " efer: %016llx  apic base: %016llx  nmi: %s\n",
310 		(u64) sregs.efer, (u64) sregs.apic_base,
311 		(vcpu->kvm->nmi_disabled ? "disabled" : "enabled"));
312 
313 	dprintf(debug_fd, "\n Interrupt bitmap:\n");
314 	dprintf(debug_fd,   " -----------------\n");
315 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
316 		dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]);
317 	dprintf(debug_fd, "\n");
318 }
319 
320 #define MAX_SYM_LEN 128
321 
322 void kvm_cpu__show_code(struct kvm_cpu *vcpu)
323 {
324 	unsigned int code_bytes = 64;
325 	unsigned int code_prologue = 43;
326 	unsigned int code_len = code_bytes;
327 	char sym[MAX_SYM_LEN] = SYMBOL_DEFAULT_UNKNOWN, *psym;
328 	unsigned char c;
329 	unsigned int i;
330 	u8 *ip;
331 
332 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0)
333 		die("KVM_GET_REGS failed");
334 
335 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
336 		die("KVM_GET_SREGS failed");
337 
338 	ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue);
339 
340 	dprintf(debug_fd, "\n Code:\n");
341 	dprintf(debug_fd,   " -----\n");
342 
343 	psym = symbol_lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN);
344 	if (IS_ERR(psym))
345 		dprintf(debug_fd,
346 			"Warning: symbol_lookup() failed to find symbol "
347 			"with error: %ld\n", PTR_ERR(psym));
348 
349 	dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym);
350 
351 	for (i = 0; i < code_len; i++, ip++) {
352 		if (!host_ptr_in_ram(vcpu->kvm, ip))
353 			break;
354 
355 		c = *ip;
356 
357 		if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip)))
358 			dprintf(debug_fd, " <%02x>", c);
359 		else
360 			dprintf(debug_fd, " %02x", c);
361 	}
362 
363 	dprintf(debug_fd, "\n");
364 
365 	dprintf(debug_fd, "\n Stack:\n");
366 	dprintf(debug_fd,   " ------\n");
367 	dprintf(debug_fd, " rsp: [<%016lx>] \n", (unsigned long) vcpu->regs.rsp);
368 	kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32, debug_fd);
369 }
370 
371 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu)
372 {
373 	u64 *pte1;
374 	u64 *pte2;
375 	u64 *pte3;
376 	u64 *pte4;
377 
378 	if (!is_in_protected_mode(vcpu)) {
379 		dprintf(debug_fd, "\n Page Tables:\n");
380 		dprintf(debug_fd, " ------\n");
381 		dprintf(debug_fd, " Not in protected mode\n");
382 		return;
383 	}
384 
385 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
386 		die("KVM_GET_SREGS failed");
387 
388 	pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3);
389 	if (!host_ptr_in_ram(vcpu->kvm, pte4))
390 		return;
391 
392 	pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff));
393 	if (!host_ptr_in_ram(vcpu->kvm, pte3))
394 		return;
395 
396 	pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff));
397 	if (!host_ptr_in_ram(vcpu->kvm, pte2))
398 		return;
399 
400 	pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff));
401 	if (!host_ptr_in_ram(vcpu->kvm, pte1))
402 		return;
403 
404 	dprintf(debug_fd, "\n Page Tables:\n");
405 	dprintf(debug_fd, " ------\n");
406 	if (*pte2 & (1 << 7))
407 		dprintf(debug_fd, " pte4: %016llx   pte3: %016llx"
408 			"   pte2: %016llx\n",
409 			*pte4, *pte3, *pte2);
410 	else
411 		dprintf(debug_fd, " pte4: %016llx  pte3: %016llx   pte2: %016"
412 			"llx   pte1: %016llx\n",
413 			*pte4, *pte3, *pte2, *pte1);
414 }
415 
416 void kvm_cpu__arch_nmi(struct kvm_cpu *cpu)
417 {
418 	struct kvm_lapic_state klapic;
419 	struct local_apic *lapic = (void *)&klapic;
420 
421 	if (ioctl(cpu->vcpu_fd, KVM_GET_LAPIC, &klapic) != 0)
422 		return;
423 
424 	if (lapic->lvt_lint1.mask)
425 		return;
426 
427 	if (lapic->lvt_lint1.delivery_mode != APIC_MODE_NMI)
428 		return;
429 
430 	ioctl(cpu->vcpu_fd, KVM_NMI);
431 }
432