xref: /kvmtool/kvm-cpu.c (revision 5d1a249ce504d81a87c548dcf110cc224a1c09d3)
1 #include "kvm/kvm-cpu.h"
2 
3 #include "kvm/virtio-console.h"
4 #include "kvm/8250-serial.h"
5 #include "kvm/util.h"
6 #include "kvm/kvm.h"
7 
8 #include <sys/ioctl.h>
9 #include <sys/mman.h>
10 #include <stdlib.h>
11 #include <errno.h>
12 #include <stdio.h>
13 
14 static inline bool is_in_protected_mode(struct kvm_cpu *self)
15 {
16 	return self->sregs.cr0 & 0x01;
17 }
18 
19 static inline uint64_t ip_to_flat(struct kvm_cpu *self, uint64_t ip)
20 {
21 	uint64_t cs;
22 
23 	/*
24 	 * NOTE! We should take code segment base address into account here.
25 	 * Luckily it's usually zero because Linux uses flat memory model.
26 	 */
27 	if (is_in_protected_mode(self))
28 		return ip;
29 
30 	cs = self->sregs.cs.selector;
31 
32 	return ip + (cs << 4);
33 }
34 
35 static inline uint32_t selector_to_base(uint16_t selector)
36 {
37 	/*
38 	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
39 	 */
40 	return (uint32_t)selector * 16;
41 }
42 
43 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
44 {
45 	struct kvm_cpu *self;
46 
47 	self		= calloc(1, sizeof *self);
48 	if (!self)
49 		return NULL;
50 
51 	self->kvm	= kvm;
52 
53 	return self;
54 }
55 
56 void kvm_cpu__delete(struct kvm_cpu *self)
57 {
58 	if (self->msrs)
59 		free(self->msrs);
60 
61 	free(self);
62 }
63 
64 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm)
65 {
66 	struct kvm_cpu *self;
67 	int mmap_size;
68 
69 	self		= kvm_cpu__new(kvm);
70 	if (!self)
71 		return NULL;
72 
73 	self->vcpu_fd = ioctl(self->kvm->vm_fd, KVM_CREATE_VCPU, 0);
74 	if (self->vcpu_fd < 0)
75 		die_perror("KVM_CREATE_VCPU ioctl");
76 
77 	mmap_size = ioctl(self->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
78 	if (mmap_size < 0)
79 		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
80 
81 	self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
82 	if (self->kvm_run == MAP_FAILED)
83 		die("unable to mmap vcpu fd");
84 
85 	return self;
86 }
87 
88 void kvm_cpu__enable_singlestep(struct kvm_cpu *self)
89 {
90 	struct kvm_guest_debug debug = {
91 		.control	= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
92 	};
93 
94 	if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
95 		warning("KVM_SET_GUEST_DEBUG failed");
96 }
97 
98 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
99 {
100 	struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs));
101 
102 	if (!self)
103 		die("out of memory");
104 
105 	return self;
106 }
107 
108 #define MSR_IA32_TIME_STAMP_COUNTER	0x10
109 
110 #define MSR_IA32_SYSENTER_CS		0x174
111 #define MSR_IA32_SYSENTER_ESP		0x175
112 #define MSR_IA32_SYSENTER_EIP		0x176
113 
114 #define MSR_IA32_STAR			0xc0000081
115 #define MSR_IA32_LSTAR			0xc0000082
116 #define MSR_IA32_CSTAR			0xc0000083
117 #define MSR_IA32_FMASK			0xc0000084
118 #define MSR_IA32_KERNEL_GS_BASE		0xc0000102
119 
120 #define KVM_MSR_ENTRY(_index, _data)	\
121 	(struct kvm_msr_entry) { .index = _index, .data = _data }
122 
123 static void kvm_cpu__setup_msrs(struct kvm_cpu *self)
124 {
125 	unsigned long ndx = 0;
126 
127 	self->msrs = kvm_msrs__new(100);
128 
129 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,	0x0);
130 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,	0x0);
131 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,	0x0);
132 #ifdef CONFIG_X86_64
133 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR,		0x0);
134 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR,		0x0);
135 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE,	0x0);
136 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK,		0x0);
137 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR,		0x0);
138 #endif
139 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER,	0x0);
140 
141 	self->msrs->nmsrs	= ndx;
142 
143 	if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0)
144 		die_perror("KVM_SET_MSRS failed");
145 }
146 
147 static void kvm_cpu__setup_fpu(struct kvm_cpu *self)
148 {
149 	self->fpu = (struct kvm_fpu) {
150 		.fcw		= 0x37f,
151 		.mxcsr		= 0x1f80,
152 	};
153 
154 	if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0)
155 		die_perror("KVM_SET_FPU failed");
156 }
157 
158 static void kvm_cpu__setup_regs(struct kvm_cpu *self)
159 {
160 	self->regs = (struct kvm_regs) {
161 		/* We start the guest in 16-bit real mode  */
162 		.rflags		= 0x0000000000000002ULL,
163 
164 		.rip		= self->kvm->boot_ip,
165 		.rsp		= self->kvm->boot_sp,
166 		.rbp		= self->kvm->boot_sp,
167 	};
168 
169 	if (self->regs.rip > USHRT_MAX)
170 		die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip);
171 
172 	if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
173 		die_perror("KVM_SET_REGS failed");
174 }
175 
176 static void kvm_cpu__setup_sregs(struct kvm_cpu *self)
177 {
178 
179 	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
180 		die_perror("KVM_GET_SREGS failed");
181 
182 	self->sregs.cs.selector	= self->kvm->boot_selector;
183 	self->sregs.cs.base	= selector_to_base(self->kvm->boot_selector);
184 	self->sregs.ss.selector	= self->kvm->boot_selector;
185 	self->sregs.ss.base	= selector_to_base(self->kvm->boot_selector);
186 	self->sregs.ds.selector	= self->kvm->boot_selector;
187 	self->sregs.ds.base	= selector_to_base(self->kvm->boot_selector);
188 	self->sregs.es.selector	= self->kvm->boot_selector;
189 	self->sregs.es.base	= selector_to_base(self->kvm->boot_selector);
190 	self->sregs.fs.selector	= self->kvm->boot_selector;
191 	self->sregs.fs.base	= selector_to_base(self->kvm->boot_selector);
192 	self->sregs.gs.selector	= self->kvm->boot_selector;
193 	self->sregs.gs.base	= selector_to_base(self->kvm->boot_selector);
194 
195 	if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
196 		die_perror("KVM_SET_SREGS failed");
197 }
198 
199 /**
200  * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
201  */
202 void kvm_cpu__reset_vcpu(struct kvm_cpu *self)
203 {
204 	kvm_cpu__setup_sregs(self);
205 	kvm_cpu__setup_regs(self);
206 	kvm_cpu__setup_fpu(self);
207 	kvm_cpu__setup_msrs(self);
208 }
209 
210 static void print_dtable(const char *name, struct kvm_dtable *dtable)
211 {
212 	printf(" %s                 %016" PRIx64 "  %08" PRIx16 "\n",
213 		name, (uint64_t) dtable->base, (uint16_t) dtable->limit);
214 }
215 
216 static void print_segment(const char *name, struct kvm_segment *seg)
217 {
218 	printf(" %s       %04" PRIx16 "      %016" PRIx64 "  %08" PRIx32 "  %02" PRIx8 "    %x %x   %x  %x %x %x %x\n",
219 		name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit,
220 		(uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
221 }
222 
223 void kvm_cpu__show_registers(struct kvm_cpu *self)
224 {
225 	unsigned long cr0, cr2, cr3;
226 	unsigned long cr4, cr8;
227 	unsigned long rax, rbx, rcx;
228 	unsigned long rdx, rsi, rdi;
229 	unsigned long rbp,  r8,  r9;
230 	unsigned long r10, r11, r12;
231 	unsigned long r13, r14, r15;
232 	unsigned long rip, rsp;
233 	struct kvm_sregs sregs;
234 	unsigned long rflags;
235 	struct kvm_regs regs;
236 	int i;
237 
238 	if (ioctl(self->vcpu_fd, KVM_GET_REGS, &regs) < 0)
239 		die("KVM_GET_REGS failed");
240 
241 	rflags = regs.rflags;
242 
243 	rip = regs.rip; rsp = regs.rsp;
244 	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
245 	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
246 	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
247 	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
248 	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
249 
250 	printf("Registers:\n");
251 	printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
252 	printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
253 	printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
254 	printf(" rbp: %016lx   r8:  %016lx   r9:  %016lx\n", rbp, r8,  r9);
255 	printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
256 	printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
257 
258 	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
259 		die("KVM_GET_REGS failed");
260 
261 	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
262 	cr4 = sregs.cr4; cr8 = sregs.cr8;
263 
264 	printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
265 	printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
266 	printf("Segment registers:\n");
267 	printf(" register  selector  base              limit     type  p dpl db s l g avl\n");
268 	print_segment("cs ", &sregs.cs);
269 	print_segment("ss ", &sregs.ss);
270 	print_segment("ds ", &sregs.ds);
271 	print_segment("es ", &sregs.es);
272 	print_segment("fs ", &sregs.fs);
273 	print_segment("gs ", &sregs.gs);
274 	print_segment("tr ", &sregs.tr);
275 	print_segment("ldt", &sregs.ldt);
276 	print_dtable("gdt", &sregs.gdt);
277 	print_dtable("idt", &sregs.idt);
278 	printf(" [ efer: %016" PRIx64 "  apic base: %016" PRIx64 "  nmi: %s ]\n",
279 		(uint64_t) sregs.efer, (uint64_t) sregs.apic_base,
280 		(self->kvm->nmi_disabled ? "disabled" : "enabled"));
281 	printf("Interrupt bitmap:\n");
282 	printf(" ");
283 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
284 		printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]);
285 	printf("\n");
286 }
287 
288 void kvm_cpu__show_code(struct kvm_cpu *self)
289 {
290 	unsigned int code_bytes = 64;
291 	unsigned int code_prologue = code_bytes * 43 / 64;
292 	unsigned int code_len = code_bytes;
293 	unsigned char c;
294 	unsigned int i;
295 	uint8_t *ip;
296 
297 	if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
298 		die("KVM_GET_REGS failed");
299 
300 	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
301 		die("KVM_GET_SREGS failed");
302 
303 	ip = guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip) - code_prologue);
304 
305 	printf("Code: ");
306 
307 	for (i = 0; i < code_len; i++, ip++) {
308 		if (!host_ptr_in_ram(self->kvm, ip))
309 			break;
310 
311 		c = *ip;
312 
313 		if (ip == guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip)))
314 			printf("<%02x> ", c);
315 		else
316 			printf("%02x ", c);
317 	}
318 
319 	printf("\n");
320 
321 	printf("Stack:\n");
322 	kvm__dump_mem(self->kvm, self->regs.rsp, 32);
323 }
324 
325 void kvm_cpu__show_page_tables(struct kvm_cpu *self)
326 {
327 	uint64_t *pte1;
328 	uint64_t *pte2;
329 	uint64_t *pte3;
330 	uint64_t *pte4;
331 
332 	if (!is_in_protected_mode(self))
333 		return;
334 
335 	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
336 		die("KVM_GET_SREGS failed");
337 
338 	pte4	= guest_flat_to_host(self->kvm, self->sregs.cr3);
339 	if (!host_ptr_in_ram(self->kvm, pte4))
340 		return;
341 
342 	pte3	= guest_flat_to_host(self->kvm, (*pte4 & ~0xfff));
343 	if (!host_ptr_in_ram(self->kvm, pte3))
344 		return;
345 
346 	pte2	= guest_flat_to_host(self->kvm, (*pte3 & ~0xfff));
347 	if (!host_ptr_in_ram(self->kvm, pte2))
348 		return;
349 
350 	pte1	= guest_flat_to_host(self->kvm, (*pte2 & ~0xfff));
351 	if (!host_ptr_in_ram(self->kvm, pte1))
352 		return;
353 
354 	printf("Page Tables:\n");
355 	if (*pte2 & (1 << 7))
356 		printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64
357 			"   pte2: %016" PRIx64 "\n",
358 			*pte4, *pte3, *pte2);
359 	else
360 		printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64 "   pte2: %016"
361 			PRIx64 "   pte1: %016" PRIx64 "\n",
362 			*pte4, *pte3, *pte2, *pte1);
363 }
364 
365 void kvm_cpu__run(struct kvm_cpu *self)
366 {
367 	int err;
368 
369 	err = ioctl(self->vcpu_fd, KVM_RUN, 0);
370 	if (err && (errno != EINTR && errno != EAGAIN))
371 		die_perror("KVM_RUN failed");
372 }
373 
374 int kvm_cpu__start(struct kvm_cpu *cpu)
375 {
376 	kvm_cpu__setup_cpuid(cpu);
377 	kvm_cpu__reset_vcpu(cpu);
378 
379 	for (;;) {
380 		kvm_cpu__run(cpu);
381 
382 		switch (cpu->kvm_run->exit_reason) {
383 		case KVM_EXIT_DEBUG:
384 			kvm_cpu__show_registers(cpu);
385 			kvm_cpu__show_code(cpu);
386 			break;
387 		case KVM_EXIT_IO: {
388 			bool ret;
389 
390 			ret = kvm__emulate_io(cpu->kvm,
391 					cpu->kvm_run->io.port,
392 					(uint8_t *)cpu->kvm_run +
393 					cpu->kvm_run->io.data_offset,
394 					cpu->kvm_run->io.direction,
395 					cpu->kvm_run->io.size,
396 					cpu->kvm_run->io.count);
397 
398 			if (!ret)
399 				goto panic_kvm;
400 			break;
401 		}
402 		case KVM_EXIT_MMIO: {
403 			bool ret;
404 
405 			ret = kvm__emulate_mmio(cpu->kvm,
406 					cpu->kvm_run->mmio.phys_addr,
407 					cpu->kvm_run->mmio.data,
408 					cpu->kvm_run->mmio.len,
409 					cpu->kvm_run->mmio.is_write);
410 
411 			if (!ret)
412 				goto panic_kvm;
413 			break;
414 		}
415 		case KVM_EXIT_INTR: {
416 			serial8250__inject_interrupt(cpu->kvm);
417 			virtio_console__inject_interrupt(cpu->kvm);
418 			break;
419 		}
420 		case KVM_EXIT_SHUTDOWN:
421 			goto exit_kvm;
422 		default:
423 			goto panic_kvm;
424 		}
425 	}
426 
427 exit_kvm:
428 	return 0;
429 
430 panic_kvm:
431 	return 1;
432 }
433