xref: /kvmtool/kvm-cpu.c (revision 4298ddade7b629079b3dfb0457cf4bcff7c47d3e)
1 #include "kvm/kvm-cpu.h"
2 
3 #include "kvm/symbol.h"
4 #include "kvm/util.h"
5 #include "kvm/kvm.h"
6 
7 #include <asm/msr-index.h>
8 
9 #include <sys/ioctl.h>
10 #include <sys/mman.h>
11 #include <signal.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <stdio.h>
16 
17 extern __thread struct kvm_cpu *current_kvm_cpu;
18 
19 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu)
20 {
21 	return vcpu->sregs.cr0 & 0x01;
22 }
23 
24 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip)
25 {
26 	u64 cs;
27 
28 	/*
29 	 * NOTE! We should take code segment base address into account here.
30 	 * Luckily it's usually zero because Linux uses flat memory model.
31 	 */
32 	if (is_in_protected_mode(vcpu))
33 		return ip;
34 
35 	cs = vcpu->sregs.cs.selector;
36 
37 	return ip + (cs << 4);
38 }
39 
40 static inline u32 selector_to_base(u16 selector)
41 {
42 	/*
43 	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
44 	 */
45 	return (u32)selector * 16;
46 }
47 
48 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
49 {
50 	struct kvm_cpu *vcpu;
51 
52 	vcpu		= calloc(1, sizeof *vcpu);
53 	if (!vcpu)
54 		return NULL;
55 
56 	vcpu->kvm	= kvm;
57 
58 	return vcpu;
59 }
60 
61 void kvm_cpu__delete(struct kvm_cpu *vcpu)
62 {
63 	if (vcpu->msrs)
64 		free(vcpu->msrs);
65 
66 	free(vcpu);
67 }
68 
69 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
70 {
71 	struct kvm_cpu *vcpu;
72 	int mmap_size;
73 
74 	vcpu		= kvm_cpu__new(kvm);
75 	if (!vcpu)
76 		return NULL;
77 
78 	vcpu->cpu_id	= cpu_id;
79 
80 	vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id);
81 	if (vcpu->vcpu_fd < 0)
82 		die_perror("KVM_CREATE_VCPU ioctl");
83 
84 	mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
85 	if (mmap_size < 0)
86 		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
87 
88 	vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0);
89 	if (vcpu->kvm_run == MAP_FAILED)
90 		die("unable to mmap vcpu fd");
91 
92 	vcpu->is_running = true;
93 
94 	return vcpu;
95 }
96 
97 void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu)
98 {
99 	struct kvm_guest_debug debug = {
100 		.control	= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
101 	};
102 
103 	if (ioctl(vcpu->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
104 		pr_warning("KVM_SET_GUEST_DEBUG failed");
105 }
106 
107 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
108 {
109 	struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs));
110 
111 	if (!vcpu)
112 		die("out of memory");
113 
114 	return vcpu;
115 }
116 
117 #define KVM_MSR_ENTRY(_index, _data)	\
118 	(struct kvm_msr_entry) { .index = _index, .data = _data }
119 
120 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu)
121 {
122 	unsigned long ndx = 0;
123 
124 	vcpu->msrs = kvm_msrs__new(100);
125 
126 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,	0x0);
127 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,	0x0);
128 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,	0x0);
129 #ifdef CONFIG_X86_64
130 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR,			0x0);
131 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR,			0x0);
132 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE,		0x0);
133 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK,		0x0);
134 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR,			0x0);
135 #endif
136 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC,		0x0);
137 
138 	vcpu->msrs->nmsrs	= ndx;
139 
140 	if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0)
141 		die_perror("KVM_SET_MSRS failed");
142 }
143 
144 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu)
145 {
146 	vcpu->fpu = (struct kvm_fpu) {
147 		.fcw		= 0x37f,
148 		.mxcsr		= 0x1f80,
149 	};
150 
151 	if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0)
152 		die_perror("KVM_SET_FPU failed");
153 }
154 
155 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
156 {
157 	vcpu->regs = (struct kvm_regs) {
158 		/* We start the guest in 16-bit real mode  */
159 		.rflags		= 0x0000000000000002ULL,
160 
161 		.rip		= vcpu->kvm->boot_ip,
162 		.rsp		= vcpu->kvm->boot_sp,
163 		.rbp		= vcpu->kvm->boot_sp,
164 	};
165 
166 	if (vcpu->regs.rip > USHRT_MAX)
167 		die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip);
168 
169 	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
170 		die_perror("KVM_SET_REGS failed");
171 }
172 
173 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu)
174 {
175 
176 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
177 		die_perror("KVM_GET_SREGS failed");
178 
179 	vcpu->sregs.cs.selector	= vcpu->kvm->boot_selector;
180 	vcpu->sregs.cs.base	= selector_to_base(vcpu->kvm->boot_selector);
181 	vcpu->sregs.ss.selector	= vcpu->kvm->boot_selector;
182 	vcpu->sregs.ss.base	= selector_to_base(vcpu->kvm->boot_selector);
183 	vcpu->sregs.ds.selector	= vcpu->kvm->boot_selector;
184 	vcpu->sregs.ds.base	= selector_to_base(vcpu->kvm->boot_selector);
185 	vcpu->sregs.es.selector	= vcpu->kvm->boot_selector;
186 	vcpu->sregs.es.base	= selector_to_base(vcpu->kvm->boot_selector);
187 	vcpu->sregs.fs.selector	= vcpu->kvm->boot_selector;
188 	vcpu->sregs.fs.base	= selector_to_base(vcpu->kvm->boot_selector);
189 	vcpu->sregs.gs.selector	= vcpu->kvm->boot_selector;
190 	vcpu->sregs.gs.base	= selector_to_base(vcpu->kvm->boot_selector);
191 
192 	if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0)
193 		die_perror("KVM_SET_SREGS failed");
194 }
195 
196 /**
197  * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
198  */
199 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
200 {
201 	kvm_cpu__setup_sregs(vcpu);
202 	kvm_cpu__setup_regs(vcpu);
203 	kvm_cpu__setup_fpu(vcpu);
204 	kvm_cpu__setup_msrs(vcpu);
205 }
206 
207 static void print_dtable(const char *name, struct kvm_dtable *dtable)
208 {
209 	printf(" %s                 %016llx  %08hx\n",
210 		name, (u64) dtable->base, (u16) dtable->limit);
211 }
212 
213 static void print_segment(const char *name, struct kvm_segment *seg)
214 {
215 	printf(" %s       %04hx      %016llx  %08x  %02hhx    %x %x   %x  %x %x %x %x\n",
216 		name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit,
217 		(u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
218 }
219 
220 void kvm_cpu__show_registers(struct kvm_cpu *vcpu)
221 {
222 	unsigned long cr0, cr2, cr3;
223 	unsigned long cr4, cr8;
224 	unsigned long rax, rbx, rcx;
225 	unsigned long rdx, rsi, rdi;
226 	unsigned long rbp,  r8,  r9;
227 	unsigned long r10, r11, r12;
228 	unsigned long r13, r14, r15;
229 	unsigned long rip, rsp;
230 	struct kvm_sregs sregs;
231 	unsigned long rflags;
232 	struct kvm_regs regs;
233 	int i;
234 
235 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &regs) < 0)
236 		die("KVM_GET_REGS failed");
237 
238 	rflags = regs.rflags;
239 
240 	rip = regs.rip; rsp = regs.rsp;
241 	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
242 	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
243 	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
244 	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
245 	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
246 
247 	printf("\n Registers:\n");
248 	printf(  " ----------\n");
249 	printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
250 	printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
251 	printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
252 	printf(" rbp: %016lx    r8: %016lx    r9: %016lx\n", rbp, r8,  r9);
253 	printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
254 	printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
255 
256 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
257 		die("KVM_GET_REGS failed");
258 
259 	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
260 	cr4 = sregs.cr4; cr8 = sregs.cr8;
261 
262 	printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
263 	printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
264 	printf("\n Segment registers:\n");
265 	printf(  " ------------------\n");
266 	printf(" register  selector  base              limit     type  p dpl db s l g avl\n");
267 	print_segment("cs ", &sregs.cs);
268 	print_segment("ss ", &sregs.ss);
269 	print_segment("ds ", &sregs.ds);
270 	print_segment("es ", &sregs.es);
271 	print_segment("fs ", &sregs.fs);
272 	print_segment("gs ", &sregs.gs);
273 	print_segment("tr ", &sregs.tr);
274 	print_segment("ldt", &sregs.ldt);
275 	print_dtable("gdt", &sregs.gdt);
276 	print_dtable("idt", &sregs.idt);
277 
278 	printf("\n APIC:\n");
279 	printf(  " -----\n");
280 	printf(" efer: %016llx  apic base: %016llx  nmi: %s\n",
281 		(u64) sregs.efer, (u64) sregs.apic_base,
282 		(vcpu->kvm->nmi_disabled ? "disabled" : "enabled"));
283 
284 	printf("\n Interrupt bitmap:\n");
285 	printf(  " -----------------\n");
286 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
287 		printf(" %016llx", (u64) sregs.interrupt_bitmap[i]);
288 	printf("\n");
289 }
290 
291 #define MAX_SYM_LEN		128
292 
293 void kvm_cpu__show_code(struct kvm_cpu *vcpu)
294 {
295 	unsigned int code_bytes = 64;
296 	unsigned int code_prologue = code_bytes * 43 / 64;
297 	unsigned int code_len = code_bytes;
298 	char sym[MAX_SYM_LEN];
299 	unsigned char c;
300 	unsigned int i;
301 	u8 *ip;
302 
303 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0)
304 		die("KVM_GET_REGS failed");
305 
306 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
307 		die("KVM_GET_SREGS failed");
308 
309 	ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue);
310 
311 	printf("\n Code:\n");
312 	printf(  " -----\n");
313 
314 	symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN);
315 
316 	printf(" rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym);
317 
318 	for (i = 0; i < code_len; i++, ip++) {
319 		if (!host_ptr_in_ram(vcpu->kvm, ip))
320 			break;
321 
322 		c = *ip;
323 
324 		if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip)))
325 			printf(" <%02x>", c);
326 		else
327 			printf(" %02x", c);
328 	}
329 
330 	printf("\n");
331 
332 	printf("\n Stack:\n");
333 	printf(  " ------\n");
334 	kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32);
335 }
336 
337 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu)
338 {
339 	u64 *pte1;
340 	u64 *pte2;
341 	u64 *pte3;
342 	u64 *pte4;
343 
344 	if (!is_in_protected_mode(vcpu))
345 		return;
346 
347 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
348 		die("KVM_GET_SREGS failed");
349 
350 	pte4	= guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3);
351 	if (!host_ptr_in_ram(vcpu->kvm, pte4))
352 		return;
353 
354 	pte3	= guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff));
355 	if (!host_ptr_in_ram(vcpu->kvm, pte3))
356 		return;
357 
358 	pte2	= guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff));
359 	if (!host_ptr_in_ram(vcpu->kvm, pte2))
360 		return;
361 
362 	pte1	= guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff));
363 	if (!host_ptr_in_ram(vcpu->kvm, pte1))
364 		return;
365 
366 	printf("Page Tables:\n");
367 	if (*pte2 & (1 << 7))
368 		printf(" pte4: %016llx   pte3: %016llx"
369 			"   pte2: %016llx\n",
370 			*pte4, *pte3, *pte2);
371 	else
372 		printf(" pte4: %016llx  pte3: %016llx   pte2: %016"
373 			"llx   pte1: %016llx\n",
374 			*pte4, *pte3, *pte2, *pte1);
375 }
376 
377 void kvm_cpu__run(struct kvm_cpu *vcpu)
378 {
379 	int err;
380 
381 	err = ioctl(vcpu->vcpu_fd, KVM_RUN, 0);
382 	if (err && (errno != EINTR && errno != EAGAIN))
383 		die_perror("KVM_RUN failed");
384 }
385 
386 static void kvm_cpu_signal_handler(int signum)
387 {
388 	if (signum == SIGKVMEXIT) {
389 		if (current_kvm_cpu->is_running) {
390 			current_kvm_cpu->is_running = false;
391 			pthread_kill(pthread_self(), SIGKVMEXIT);
392 		}
393 	} else if (signum == SIGKVMPAUSE) {
394 		current_kvm_cpu->paused = 1;
395 	}
396 }
397 
398 int kvm_cpu__start(struct kvm_cpu *cpu)
399 {
400 	sigset_t sigset;
401 
402 	sigemptyset(&sigset);
403 	sigaddset(&sigset, SIGALRM);
404 
405 	pthread_sigmask(SIG_BLOCK, &sigset, NULL);
406 
407 	signal(SIGKVMEXIT, kvm_cpu_signal_handler);
408 	signal(SIGKVMPAUSE, kvm_cpu_signal_handler);
409 
410 	kvm_cpu__setup_cpuid(cpu);
411 	kvm_cpu__reset_vcpu(cpu);
412 
413 	for (;;) {
414 		if (cpu->paused) {
415 			kvm__notify_paused();
416 			cpu->paused = 0;
417 		}
418 
419 		kvm_cpu__run(cpu);
420 
421 		switch (cpu->kvm_run->exit_reason) {
422 		case KVM_EXIT_UNKNOWN:
423 			break;
424 		case KVM_EXIT_DEBUG:
425 			kvm_cpu__show_registers(cpu);
426 			kvm_cpu__show_code(cpu);
427 			break;
428 		case KVM_EXIT_IO: {
429 			bool ret;
430 
431 			ret = kvm__emulate_io(cpu->kvm,
432 					cpu->kvm_run->io.port,
433 					(u8 *)cpu->kvm_run +
434 					cpu->kvm_run->io.data_offset,
435 					cpu->kvm_run->io.direction,
436 					cpu->kvm_run->io.size,
437 					cpu->kvm_run->io.count);
438 
439 			if (!ret)
440 				goto panic_kvm;
441 			break;
442 		}
443 		case KVM_EXIT_MMIO: {
444 			bool ret;
445 
446 			ret = kvm__emulate_mmio(cpu->kvm,
447 					cpu->kvm_run->mmio.phys_addr,
448 					cpu->kvm_run->mmio.data,
449 					cpu->kvm_run->mmio.len,
450 					cpu->kvm_run->mmio.is_write);
451 
452 			if (!ret)
453 				goto panic_kvm;
454 			break;
455 		}
456 		case KVM_EXIT_INTR:
457 			if (cpu->is_running)
458 				break;
459 			goto exit_kvm;
460 		case KVM_EXIT_SHUTDOWN:
461 			goto exit_kvm;
462 		default:
463 			goto panic_kvm;
464 		}
465 	}
466 
467 exit_kvm:
468 	return 0;
469 
470 panic_kvm:
471 	return 1;
472 }
473