xref: /kvmtool/x86/kvm-cpu.c (revision 4b1c6f6e947ba8c35c0dc49346916817e943786f)
1 #include "kvm/kvm-cpu.h"
2 
3 #include "kvm/symbol.h"
4 #include "kvm/util.h"
5 #include "kvm/kvm.h"
6 
7 #include <asm/msr-index.h>
8 #include <asm/apicdef.h>
9 
10 #include <sys/ioctl.h>
11 #include <sys/mman.h>
12 #include <signal.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <errno.h>
16 #include <stdio.h>
17 
18 static int debug_fd;
19 
20 void kvm_cpu__set_debug_fd(int fd)
21 {
22 	debug_fd = fd;
23 }
24 
25 int kvm_cpu__get_debug_fd(void)
26 {
27 	return debug_fd;
28 }
29 
30 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu)
31 {
32 	return vcpu->sregs.cr0 & 0x01;
33 }
34 
35 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip)
36 {
37 	u64 cs;
38 
39 	/*
40 	 * NOTE! We should take code segment base address into account here.
41 	 * Luckily it's usually zero because Linux uses flat memory model.
42 	 */
43 	if (is_in_protected_mode(vcpu))
44 		return ip;
45 
46 	cs = vcpu->sregs.cs.selector;
47 
48 	return ip + (cs << 4);
49 }
50 
51 static inline u32 selector_to_base(u16 selector)
52 {
53 	/*
54 	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
55 	 */
56 	return (u32)selector * 16;
57 }
58 
59 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
60 {
61 	struct kvm_cpu *vcpu;
62 
63 	vcpu		= calloc(1, sizeof *vcpu);
64 	if (!vcpu)
65 		return NULL;
66 
67 	vcpu->kvm	= kvm;
68 
69 	return vcpu;
70 }
71 
72 void kvm_cpu__delete(struct kvm_cpu *vcpu)
73 {
74 	if (vcpu->msrs)
75 		free(vcpu->msrs);
76 
77 	free(vcpu);
78 }
79 
80 static int kvm_cpu__set_lint(struct kvm_cpu *vcpu)
81 {
82 	struct kvm_lapic_state klapic;
83 	struct local_apic *lapic = (void *)&klapic;
84 	u32 lvt;
85 
86 	if (ioctl(vcpu->vcpu_fd, KVM_GET_LAPIC, &klapic))
87 		return -1;
88 
89 	lvt = *(u32 *)&lapic->lvt_lint0;
90 	lvt = SET_APIC_DELIVERY_MODE(lvt, APIC_MODE_EXTINT);
91 	*(u32 *)&lapic->lvt_lint0 = lvt;
92 
93 	lvt = *(u32 *)&lapic->lvt_lint1;
94 	lvt = SET_APIC_DELIVERY_MODE(lvt, APIC_MODE_NMI);
95 	*(u32 *)&lapic->lvt_lint1 = lvt;
96 
97 	return ioctl(vcpu->vcpu_fd, KVM_SET_LAPIC, &klapic);
98 }
99 
100 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
101 {
102 	struct kvm_cpu *vcpu;
103 	int mmap_size;
104 	int coalesced_offset;
105 
106 	vcpu		= kvm_cpu__new(kvm);
107 	if (!vcpu)
108 		return NULL;
109 
110 	vcpu->cpu_id	= cpu_id;
111 
112 	vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id);
113 	if (vcpu->vcpu_fd < 0)
114 		die_perror("KVM_CREATE_VCPU ioctl");
115 
116 	mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
117 	if (mmap_size < 0)
118 		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
119 
120 	vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0);
121 	if (vcpu->kvm_run == MAP_FAILED)
122 		die("unable to mmap vcpu fd");
123 
124 	coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
125 	if (coalesced_offset)
126 		vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE);
127 
128 	if (kvm_cpu__set_lint(vcpu))
129 		die_perror("KVM_SET_LAPIC failed");
130 
131 	vcpu->is_running = true;
132 
133 	return vcpu;
134 }
135 
136 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
137 {
138 	struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs));
139 
140 	if (!vcpu)
141 		die("out of memory");
142 
143 	return vcpu;
144 }
145 
146 #define KVM_MSR_ENTRY(_index, _data)	\
147 	(struct kvm_msr_entry) { .index = _index, .data = _data }
148 
149 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu)
150 {
151 	unsigned long ndx = 0;
152 
153 	vcpu->msrs = kvm_msrs__new(100);
154 
155 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,	0x0);
156 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,	0x0);
157 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,	0x0);
158 #ifdef CONFIG_X86_64
159 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR,			0x0);
160 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR,			0x0);
161 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE,		0x0);
162 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK,		0x0);
163 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR,			0x0);
164 #endif
165 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC,		0x0);
166 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE,
167 						MSR_IA32_MISC_ENABLE_FAST_STRING);
168 
169 	vcpu->msrs->nmsrs	= ndx;
170 
171 	if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0)
172 		die_perror("KVM_SET_MSRS failed");
173 }
174 
175 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu)
176 {
177 	vcpu->fpu = (struct kvm_fpu) {
178 		.fcw		= 0x37f,
179 		.mxcsr		= 0x1f80,
180 	};
181 
182 	if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0)
183 		die_perror("KVM_SET_FPU failed");
184 }
185 
186 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
187 {
188 	vcpu->regs = (struct kvm_regs) {
189 		/* We start the guest in 16-bit real mode  */
190 		.rflags		= 0x0000000000000002ULL,
191 
192 		.rip		= vcpu->kvm->boot_ip,
193 		.rsp		= vcpu->kvm->boot_sp,
194 		.rbp		= vcpu->kvm->boot_sp,
195 	};
196 
197 	if (vcpu->regs.rip > USHRT_MAX)
198 		die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip);
199 
200 	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
201 		die_perror("KVM_SET_REGS failed");
202 }
203 
204 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu)
205 {
206 
207 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
208 		die_perror("KVM_GET_SREGS failed");
209 
210 	vcpu->sregs.cs.selector	= vcpu->kvm->boot_selector;
211 	vcpu->sregs.cs.base	= selector_to_base(vcpu->kvm->boot_selector);
212 	vcpu->sregs.ss.selector	= vcpu->kvm->boot_selector;
213 	vcpu->sregs.ss.base	= selector_to_base(vcpu->kvm->boot_selector);
214 	vcpu->sregs.ds.selector	= vcpu->kvm->boot_selector;
215 	vcpu->sregs.ds.base	= selector_to_base(vcpu->kvm->boot_selector);
216 	vcpu->sregs.es.selector	= vcpu->kvm->boot_selector;
217 	vcpu->sregs.es.base	= selector_to_base(vcpu->kvm->boot_selector);
218 	vcpu->sregs.fs.selector	= vcpu->kvm->boot_selector;
219 	vcpu->sregs.fs.base	= selector_to_base(vcpu->kvm->boot_selector);
220 	vcpu->sregs.gs.selector	= vcpu->kvm->boot_selector;
221 	vcpu->sregs.gs.base	= selector_to_base(vcpu->kvm->boot_selector);
222 
223 	if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0)
224 		die_perror("KVM_SET_SREGS failed");
225 }
226 
227 /**
228  * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
229  */
230 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
231 {
232 	kvm_cpu__setup_cpuid(vcpu);
233 	kvm_cpu__setup_sregs(vcpu);
234 	kvm_cpu__setup_regs(vcpu);
235 	kvm_cpu__setup_fpu(vcpu);
236 	kvm_cpu__setup_msrs(vcpu);
237 }
238 
239 bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
240 {
241 	return false;
242 }
243 
244 static void print_dtable(const char *name, struct kvm_dtable *dtable)
245 {
246 	dprintf(debug_fd, " %s                 %016llx  %08hx\n",
247 		name, (u64) dtable->base, (u16) dtable->limit);
248 }
249 
250 static void print_segment(const char *name, struct kvm_segment *seg)
251 {
252 	dprintf(debug_fd, " %s       %04hx      %016llx  %08x  %02hhx    %x %x   %x  %x %x %x %x\n",
253 		name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit,
254 		(u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
255 }
256 
257 void kvm_cpu__show_registers(struct kvm_cpu *vcpu)
258 {
259 	unsigned long cr0, cr2, cr3;
260 	unsigned long cr4, cr8;
261 	unsigned long rax, rbx, rcx;
262 	unsigned long rdx, rsi, rdi;
263 	unsigned long rbp,  r8,  r9;
264 	unsigned long r10, r11, r12;
265 	unsigned long r13, r14, r15;
266 	unsigned long rip, rsp;
267 	struct kvm_sregs sregs;
268 	unsigned long rflags;
269 	struct kvm_regs regs;
270 	int i;
271 
272 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &regs) < 0)
273 		die("KVM_GET_REGS failed");
274 
275 	rflags = regs.rflags;
276 
277 	rip = regs.rip; rsp = regs.rsp;
278 	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
279 	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
280 	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
281 	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
282 	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
283 
284 	dprintf(debug_fd, "\n Registers:\n");
285 	dprintf(debug_fd,   " ----------\n");
286 	dprintf(debug_fd, " rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
287 	dprintf(debug_fd, " rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
288 	dprintf(debug_fd, " rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
289 	dprintf(debug_fd, " rbp: %016lx    r8: %016lx    r9: %016lx\n", rbp, r8,  r9);
290 	dprintf(debug_fd, " r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
291 	dprintf(debug_fd, " r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
292 
293 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
294 		die("KVM_GET_REGS failed");
295 
296 	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
297 	cr4 = sregs.cr4; cr8 = sregs.cr8;
298 
299 	dprintf(debug_fd, " cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
300 	dprintf(debug_fd, " cr4: %016lx   cr8: %016lx\n", cr4, cr8);
301 	dprintf(debug_fd, "\n Segment registers:\n");
302 	dprintf(debug_fd,   " ------------------\n");
303 	dprintf(debug_fd, " register  selector  base              limit     type  p dpl db s l g avl\n");
304 	print_segment("cs ", &sregs.cs);
305 	print_segment("ss ", &sregs.ss);
306 	print_segment("ds ", &sregs.ds);
307 	print_segment("es ", &sregs.es);
308 	print_segment("fs ", &sregs.fs);
309 	print_segment("gs ", &sregs.gs);
310 	print_segment("tr ", &sregs.tr);
311 	print_segment("ldt", &sregs.ldt);
312 	print_dtable("gdt", &sregs.gdt);
313 	print_dtable("idt", &sregs.idt);
314 
315 	dprintf(debug_fd, "\n APIC:\n");
316 	dprintf(debug_fd,   " -----\n");
317 	dprintf(debug_fd, " efer: %016llx  apic base: %016llx  nmi: %s\n",
318 		(u64) sregs.efer, (u64) sregs.apic_base,
319 		(vcpu->kvm->nmi_disabled ? "disabled" : "enabled"));
320 
321 	dprintf(debug_fd, "\n Interrupt bitmap:\n");
322 	dprintf(debug_fd,   " -----------------\n");
323 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
324 		dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]);
325 	dprintf(debug_fd, "\n");
326 }
327 
328 #define MAX_SYM_LEN		128
329 
330 void kvm_cpu__show_code(struct kvm_cpu *vcpu)
331 {
332 	unsigned int code_bytes = 64;
333 	unsigned int code_prologue = code_bytes * 43 / 64;
334 	unsigned int code_len = code_bytes;
335 	char sym[MAX_SYM_LEN];
336 	unsigned char c;
337 	unsigned int i;
338 	u8 *ip;
339 
340 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0)
341 		die("KVM_GET_REGS failed");
342 
343 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
344 		die("KVM_GET_SREGS failed");
345 
346 	ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue);
347 
348 	dprintf(debug_fd, "\n Code:\n");
349 	dprintf(debug_fd,   " -----\n");
350 
351 	symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN);
352 
353 	dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym);
354 
355 	for (i = 0; i < code_len; i++, ip++) {
356 		if (!host_ptr_in_ram(vcpu->kvm, ip))
357 			break;
358 
359 		c = *ip;
360 
361 		if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip)))
362 			dprintf(debug_fd, " <%02x>", c);
363 		else
364 			dprintf(debug_fd, " %02x", c);
365 	}
366 
367 	dprintf(debug_fd, "\n");
368 
369 	dprintf(debug_fd, "\n Stack:\n");
370 	dprintf(debug_fd,   " ------\n");
371 	kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32);
372 }
373 
374 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu)
375 {
376 	u64 *pte1;
377 	u64 *pte2;
378 	u64 *pte3;
379 	u64 *pte4;
380 
381 	if (!is_in_protected_mode(vcpu))
382 		return;
383 
384 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
385 		die("KVM_GET_SREGS failed");
386 
387 	pte4	= guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3);
388 	if (!host_ptr_in_ram(vcpu->kvm, pte4))
389 		return;
390 
391 	pte3	= guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff));
392 	if (!host_ptr_in_ram(vcpu->kvm, pte3))
393 		return;
394 
395 	pte2	= guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff));
396 	if (!host_ptr_in_ram(vcpu->kvm, pte2))
397 		return;
398 
399 	pte1	= guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff));
400 	if (!host_ptr_in_ram(vcpu->kvm, pte1))
401 		return;
402 
403 	dprintf(debug_fd, "Page Tables:\n");
404 	if (*pte2 & (1 << 7))
405 		dprintf(debug_fd, " pte4: %016llx   pte3: %016llx"
406 			"   pte2: %016llx\n",
407 			*pte4, *pte3, *pte2);
408 	else
409 		dprintf(debug_fd, " pte4: %016llx  pte3: %016llx   pte2: %016"
410 			"llx   pte1: %016llx\n",
411 			*pte4, *pte3, *pte2, *pte1);
412 }
413 
414 void kvm_cpu__arch_nmi(struct kvm_cpu *cpu)
415 {
416 	struct kvm_lapic_state klapic;
417 	struct local_apic *lapic = (void *)&klapic;
418 
419 	if (ioctl(cpu->vcpu_fd, KVM_GET_LAPIC, &klapic) != 0)
420 		return;
421 
422 	if (lapic->lvt_lint1.mask)
423 		return;
424 
425 	if (lapic->lvt_lint1.delivery_mode != APIC_MODE_NMI)
426 		return;
427 
428 	ioctl(cpu->vcpu_fd, KVM_NMI);
429 }
430