xref: /kvmtool/kvm-cpu.c (revision f705405977c8166158b05706194501f2de30d09c)
1 #include "kvm/kvm-cpu.h"
2 
3 #include "kvm/symbol.h"
4 #include "kvm/util.h"
5 #include "kvm/kvm.h"
6 
7 #include <asm/msr-index.h>
8 
9 #include <sys/ioctl.h>
10 #include <sys/mman.h>
11 #include <signal.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <stdio.h>
16 
17 #define PAGE_SIZE (sysconf(_SC_PAGE_SIZE))
18 
19 extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS];
20 extern __thread struct kvm_cpu *current_kvm_cpu;
21 
22 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu)
23 {
24 	return vcpu->sregs.cr0 & 0x01;
25 }
26 
27 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip)
28 {
29 	u64 cs;
30 
31 	/*
32 	 * NOTE! We should take code segment base address into account here.
33 	 * Luckily it's usually zero because Linux uses flat memory model.
34 	 */
35 	if (is_in_protected_mode(vcpu))
36 		return ip;
37 
38 	cs = vcpu->sregs.cs.selector;
39 
40 	return ip + (cs << 4);
41 }
42 
43 static inline u32 selector_to_base(u16 selector)
44 {
45 	/*
46 	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
47 	 */
48 	return (u32)selector * 16;
49 }
50 
51 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
52 {
53 	struct kvm_cpu *vcpu;
54 
55 	vcpu		= calloc(1, sizeof *vcpu);
56 	if (!vcpu)
57 		return NULL;
58 
59 	vcpu->kvm	= kvm;
60 
61 	return vcpu;
62 }
63 
64 void kvm_cpu__delete(struct kvm_cpu *vcpu)
65 {
66 	if (vcpu->msrs)
67 		free(vcpu->msrs);
68 
69 	free(vcpu);
70 }
71 
72 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
73 {
74 	struct kvm_cpu *vcpu;
75 	int mmap_size;
76 	int coalesced_offset;
77 
78 	vcpu		= kvm_cpu__new(kvm);
79 	if (!vcpu)
80 		return NULL;
81 
82 	vcpu->cpu_id	= cpu_id;
83 
84 	vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id);
85 	if (vcpu->vcpu_fd < 0)
86 		die_perror("KVM_CREATE_VCPU ioctl");
87 
88 	mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
89 	if (mmap_size < 0)
90 		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
91 
92 	vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0);
93 	if (vcpu->kvm_run == MAP_FAILED)
94 		die("unable to mmap vcpu fd");
95 
96 	coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
97 	if (coalesced_offset)
98 		vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE);
99 
100 	vcpu->is_running = true;
101 
102 	return vcpu;
103 }
104 
105 void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu)
106 {
107 	struct kvm_guest_debug debug = {
108 		.control	= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
109 	};
110 
111 	if (ioctl(vcpu->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
112 		pr_warning("KVM_SET_GUEST_DEBUG failed");
113 }
114 
115 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
116 {
117 	struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs));
118 
119 	if (!vcpu)
120 		die("out of memory");
121 
122 	return vcpu;
123 }
124 
125 #define KVM_MSR_ENTRY(_index, _data)	\
126 	(struct kvm_msr_entry) { .index = _index, .data = _data }
127 
128 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu)
129 {
130 	unsigned long ndx = 0;
131 
132 	vcpu->msrs = kvm_msrs__new(100);
133 
134 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,	0x0);
135 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,	0x0);
136 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,	0x0);
137 #ifdef CONFIG_X86_64
138 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR,			0x0);
139 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR,			0x0);
140 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE,		0x0);
141 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK,		0x0);
142 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR,			0x0);
143 #endif
144 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC,		0x0);
145 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE,
146 						MSR_IA32_MISC_ENABLE_FAST_STRING);
147 
148 	vcpu->msrs->nmsrs	= ndx;
149 
150 	if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0)
151 		die_perror("KVM_SET_MSRS failed");
152 }
153 
154 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu)
155 {
156 	vcpu->fpu = (struct kvm_fpu) {
157 		.fcw		= 0x37f,
158 		.mxcsr		= 0x1f80,
159 	};
160 
161 	if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0)
162 		die_perror("KVM_SET_FPU failed");
163 }
164 
165 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
166 {
167 	vcpu->regs = (struct kvm_regs) {
168 		/* We start the guest in 16-bit real mode  */
169 		.rflags		= 0x0000000000000002ULL,
170 
171 		.rip		= vcpu->kvm->boot_ip,
172 		.rsp		= vcpu->kvm->boot_sp,
173 		.rbp		= vcpu->kvm->boot_sp,
174 	};
175 
176 	if (vcpu->regs.rip > USHRT_MAX)
177 		die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip);
178 
179 	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
180 		die_perror("KVM_SET_REGS failed");
181 }
182 
183 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu)
184 {
185 
186 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
187 		die_perror("KVM_GET_SREGS failed");
188 
189 	vcpu->sregs.cs.selector	= vcpu->kvm->boot_selector;
190 	vcpu->sregs.cs.base	= selector_to_base(vcpu->kvm->boot_selector);
191 	vcpu->sregs.ss.selector	= vcpu->kvm->boot_selector;
192 	vcpu->sregs.ss.base	= selector_to_base(vcpu->kvm->boot_selector);
193 	vcpu->sregs.ds.selector	= vcpu->kvm->boot_selector;
194 	vcpu->sregs.ds.base	= selector_to_base(vcpu->kvm->boot_selector);
195 	vcpu->sregs.es.selector	= vcpu->kvm->boot_selector;
196 	vcpu->sregs.es.base	= selector_to_base(vcpu->kvm->boot_selector);
197 	vcpu->sregs.fs.selector	= vcpu->kvm->boot_selector;
198 	vcpu->sregs.fs.base	= selector_to_base(vcpu->kvm->boot_selector);
199 	vcpu->sregs.gs.selector	= vcpu->kvm->boot_selector;
200 	vcpu->sregs.gs.base	= selector_to_base(vcpu->kvm->boot_selector);
201 
202 	if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0)
203 		die_perror("KVM_SET_SREGS failed");
204 }
205 
206 /**
207  * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
208  */
209 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
210 {
211 	kvm_cpu__setup_sregs(vcpu);
212 	kvm_cpu__setup_regs(vcpu);
213 	kvm_cpu__setup_fpu(vcpu);
214 	kvm_cpu__setup_msrs(vcpu);
215 }
216 
217 static void print_dtable(const char *name, struct kvm_dtable *dtable)
218 {
219 	printf(" %s                 %016llx  %08hx\n",
220 		name, (u64) dtable->base, (u16) dtable->limit);
221 }
222 
223 static void print_segment(const char *name, struct kvm_segment *seg)
224 {
225 	printf(" %s       %04hx      %016llx  %08x  %02hhx    %x %x   %x  %x %x %x %x\n",
226 		name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit,
227 		(u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
228 }
229 
230 void kvm_cpu__show_registers(struct kvm_cpu *vcpu)
231 {
232 	unsigned long cr0, cr2, cr3;
233 	unsigned long cr4, cr8;
234 	unsigned long rax, rbx, rcx;
235 	unsigned long rdx, rsi, rdi;
236 	unsigned long rbp,  r8,  r9;
237 	unsigned long r10, r11, r12;
238 	unsigned long r13, r14, r15;
239 	unsigned long rip, rsp;
240 	struct kvm_sregs sregs;
241 	unsigned long rflags;
242 	struct kvm_regs regs;
243 	int i;
244 
245 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &regs) < 0)
246 		die("KVM_GET_REGS failed");
247 
248 	rflags = regs.rflags;
249 
250 	rip = regs.rip; rsp = regs.rsp;
251 	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
252 	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
253 	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
254 	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
255 	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
256 
257 	printf("\n Registers:\n");
258 	printf(  " ----------\n");
259 	printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
260 	printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
261 	printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
262 	printf(" rbp: %016lx    r8: %016lx    r9: %016lx\n", rbp, r8,  r9);
263 	printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
264 	printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
265 
266 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
267 		die("KVM_GET_REGS failed");
268 
269 	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
270 	cr4 = sregs.cr4; cr8 = sregs.cr8;
271 
272 	printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
273 	printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
274 	printf("\n Segment registers:\n");
275 	printf(  " ------------------\n");
276 	printf(" register  selector  base              limit     type  p dpl db s l g avl\n");
277 	print_segment("cs ", &sregs.cs);
278 	print_segment("ss ", &sregs.ss);
279 	print_segment("ds ", &sregs.ds);
280 	print_segment("es ", &sregs.es);
281 	print_segment("fs ", &sregs.fs);
282 	print_segment("gs ", &sregs.gs);
283 	print_segment("tr ", &sregs.tr);
284 	print_segment("ldt", &sregs.ldt);
285 	print_dtable("gdt", &sregs.gdt);
286 	print_dtable("idt", &sregs.idt);
287 
288 	printf("\n APIC:\n");
289 	printf(  " -----\n");
290 	printf(" efer: %016llx  apic base: %016llx  nmi: %s\n",
291 		(u64) sregs.efer, (u64) sregs.apic_base,
292 		(vcpu->kvm->nmi_disabled ? "disabled" : "enabled"));
293 
294 	printf("\n Interrupt bitmap:\n");
295 	printf(  " -----------------\n");
296 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
297 		printf(" %016llx", (u64) sregs.interrupt_bitmap[i]);
298 	printf("\n");
299 }
300 
301 #define MAX_SYM_LEN		128
302 
303 void kvm_cpu__show_code(struct kvm_cpu *vcpu)
304 {
305 	unsigned int code_bytes = 64;
306 	unsigned int code_prologue = code_bytes * 43 / 64;
307 	unsigned int code_len = code_bytes;
308 	char sym[MAX_SYM_LEN];
309 	unsigned char c;
310 	unsigned int i;
311 	u8 *ip;
312 
313 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0)
314 		die("KVM_GET_REGS failed");
315 
316 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
317 		die("KVM_GET_SREGS failed");
318 
319 	ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue);
320 
321 	printf("\n Code:\n");
322 	printf(  " -----\n");
323 
324 	symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN);
325 
326 	printf(" rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym);
327 
328 	for (i = 0; i < code_len; i++, ip++) {
329 		if (!host_ptr_in_ram(vcpu->kvm, ip))
330 			break;
331 
332 		c = *ip;
333 
334 		if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip)))
335 			printf(" <%02x>", c);
336 		else
337 			printf(" %02x", c);
338 	}
339 
340 	printf("\n");
341 
342 	printf("\n Stack:\n");
343 	printf(  " ------\n");
344 	kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32);
345 }
346 
347 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu)
348 {
349 	u64 *pte1;
350 	u64 *pte2;
351 	u64 *pte3;
352 	u64 *pte4;
353 
354 	if (!is_in_protected_mode(vcpu))
355 		return;
356 
357 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
358 		die("KVM_GET_SREGS failed");
359 
360 	pte4	= guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3);
361 	if (!host_ptr_in_ram(vcpu->kvm, pte4))
362 		return;
363 
364 	pte3	= guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff));
365 	if (!host_ptr_in_ram(vcpu->kvm, pte3))
366 		return;
367 
368 	pte2	= guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff));
369 	if (!host_ptr_in_ram(vcpu->kvm, pte2))
370 		return;
371 
372 	pte1	= guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff));
373 	if (!host_ptr_in_ram(vcpu->kvm, pte1))
374 		return;
375 
376 	printf("Page Tables:\n");
377 	if (*pte2 & (1 << 7))
378 		printf(" pte4: %016llx   pte3: %016llx"
379 			"   pte2: %016llx\n",
380 			*pte4, *pte3, *pte2);
381 	else
382 		printf(" pte4: %016llx  pte3: %016llx   pte2: %016"
383 			"llx   pte1: %016llx\n",
384 			*pte4, *pte3, *pte2, *pte1);
385 }
386 
387 void kvm_cpu__run(struct kvm_cpu *vcpu)
388 {
389 	int err;
390 
391 	err = ioctl(vcpu->vcpu_fd, KVM_RUN, 0);
392 	if (err && (errno != EINTR && errno != EAGAIN))
393 		die_perror("KVM_RUN failed");
394 }
395 
396 static void kvm_cpu_signal_handler(int signum)
397 {
398 	if (signum == SIGKVMEXIT) {
399 		if (current_kvm_cpu && current_kvm_cpu->is_running) {
400 			current_kvm_cpu->is_running = false;
401 			pthread_kill(pthread_self(), SIGKVMEXIT);
402 		}
403 	} else if (signum == SIGKVMPAUSE) {
404 		current_kvm_cpu->paused = 1;
405 	}
406 }
407 
408 static void kvm_cpu__handle_coalesced_mmio(struct kvm_cpu *cpu)
409 {
410 	if (cpu->ring) {
411 		while (cpu->ring->first != cpu->ring->last) {
412 			struct kvm_coalesced_mmio *m;
413 			m = &cpu->ring->coalesced_mmio[cpu->ring->first];
414 			kvm__emulate_mmio(cpu->kvm,
415 					m->phys_addr,
416 					m->data,
417 					m->len,
418 					1);
419 			cpu->ring->first = (cpu->ring->first + 1) % KVM_COALESCED_MMIO_MAX;
420 		}
421 	}
422 }
423 
424 void kvm_cpu__reboot(void)
425 {
426 	int i;
427 
428 	for (i = 0; i < KVM_NR_CPUS; i++)
429 		if (kvm_cpus[i])
430 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
431 }
432 
433 int kvm_cpu__start(struct kvm_cpu *cpu)
434 {
435 	sigset_t sigset;
436 
437 	sigemptyset(&sigset);
438 	sigaddset(&sigset, SIGALRM);
439 
440 	pthread_sigmask(SIG_BLOCK, &sigset, NULL);
441 
442 	signal(SIGKVMEXIT, kvm_cpu_signal_handler);
443 	signal(SIGKVMPAUSE, kvm_cpu_signal_handler);
444 
445 	kvm_cpu__setup_cpuid(cpu);
446 	kvm_cpu__reset_vcpu(cpu);
447 
448 	if (cpu->kvm->single_step)
449 		kvm_cpu__enable_singlestep(cpu);
450 
451 	while (cpu->is_running) {
452 		if (cpu->paused) {
453 			kvm__notify_paused();
454 			cpu->paused = 0;
455 		}
456 
457 		kvm_cpu__run(cpu);
458 
459 		switch (cpu->kvm_run->exit_reason) {
460 		case KVM_EXIT_UNKNOWN:
461 			break;
462 		case KVM_EXIT_DEBUG:
463 			kvm_cpu__show_registers(cpu);
464 			kvm_cpu__show_code(cpu);
465 			break;
466 		case KVM_EXIT_IO: {
467 			bool ret;
468 
469 			ret = kvm__emulate_io(cpu->kvm,
470 					cpu->kvm_run->io.port,
471 					(u8 *)cpu->kvm_run +
472 					cpu->kvm_run->io.data_offset,
473 					cpu->kvm_run->io.direction,
474 					cpu->kvm_run->io.size,
475 					cpu->kvm_run->io.count);
476 
477 			if (!ret)
478 				goto panic_kvm;
479 			break;
480 		}
481 		case KVM_EXIT_MMIO: {
482 			bool ret;
483 
484 			ret = kvm__emulate_mmio(cpu->kvm,
485 					cpu->kvm_run->mmio.phys_addr,
486 					cpu->kvm_run->mmio.data,
487 					cpu->kvm_run->mmio.len,
488 					cpu->kvm_run->mmio.is_write);
489 
490 			if (!ret)
491 				goto panic_kvm;
492 			break;
493 		}
494 		case KVM_EXIT_INTR:
495 			if (cpu->is_running)
496 				break;
497 			goto exit_kvm;
498 		case KVM_EXIT_SHUTDOWN:
499 			goto exit_kvm;
500 		default:
501 			goto panic_kvm;
502 		}
503 		kvm_cpu__handle_coalesced_mmio(cpu);
504 	}
505 
506 exit_kvm:
507 	return 0;
508 
509 panic_kvm:
510 	return 1;
511 }
512