xref: /kvmtool/kvm-cpu.c (revision 5c3d55fa6f0e4a7591f7edc4dea1ad695d6e1935)
1 #include "kvm/kvm-cpu.h"
2 
3 #include "kvm/util.h"
4 #include "kvm/kvm.h"
5 
6 #include <sys/ioctl.h>
7 #include <sys/mman.h>
8 #include <stdlib.h>
9 #include <errno.h>
10 #include <stdio.h>
11 
12 static inline bool is_in_protected_mode(struct kvm_cpu *self)
13 {
14 	return self->sregs.cr0 & 0x01;
15 }
16 
17 static inline uint64_t ip_to_flat(struct kvm_cpu *self, uint64_t ip)
18 {
19 	uint64_t cs;
20 
21 	/*
22 	 * NOTE! We should take code segment base address into account here.
23 	 * Luckily it's usually zero because Linux uses flat memory model.
24 	 */
25 	if (is_in_protected_mode(self))
26 		return ip;
27 
28 	cs = self->sregs.cs.selector;
29 
30 	return ip + (cs << 4);
31 }
32 
33 static inline uint32_t selector_to_base(uint16_t selector)
34 {
35 	/*
36 	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
37 	 */
38 	return (uint32_t)selector * 16;
39 }
40 
41 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
42 {
43 	struct kvm_cpu *self;
44 
45 	self		= calloc(1, sizeof *self);
46 	if (!self)
47 		return NULL;
48 
49 	self->kvm	= kvm;
50 
51 	return self;
52 }
53 
54 void kvm_cpu__delete(struct kvm_cpu *self)
55 {
56 	if (self->msrs)
57 		free(self->msrs);
58 
59 	free(self);
60 }
61 
62 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm)
63 {
64 	struct kvm_cpu *self;
65 	int mmap_size;
66 
67 	self		= kvm_cpu__new(kvm);
68 	if (!self)
69 		return NULL;
70 
71 	self->vcpu_fd = ioctl(self->kvm->vm_fd, KVM_CREATE_VCPU, 0);
72 	if (self->vcpu_fd < 0)
73 		die_perror("KVM_CREATE_VCPU ioctl");
74 
75 	mmap_size = ioctl(self->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
76 	if (mmap_size < 0)
77 		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
78 
79 	self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
80 	if (self->kvm_run == MAP_FAILED)
81 		die("unable to mmap vcpu fd");
82 
83 	return self;
84 }
85 
86 void kvm_cpu__enable_singlestep(struct kvm_cpu *self)
87 {
88 	struct kvm_guest_debug debug = {
89 		.control	= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
90 	};
91 
92 	if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
93 		warning("KVM_SET_GUEST_DEBUG failed");
94 }
95 
96 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
97 {
98 	struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs));
99 
100 	if (!self)
101 		die("out of memory");
102 
103 	return self;
104 }
105 
106 #define MSR_IA32_TIME_STAMP_COUNTER	0x10
107 
108 #define MSR_IA32_SYSENTER_CS		0x174
109 #define MSR_IA32_SYSENTER_ESP		0x175
110 #define MSR_IA32_SYSENTER_EIP		0x176
111 
112 #define MSR_IA32_STAR			0xc0000081
113 #define MSR_IA32_LSTAR			0xc0000082
114 #define MSR_IA32_CSTAR			0xc0000083
115 #define MSR_IA32_FMASK			0xc0000084
116 #define MSR_IA32_KERNEL_GS_BASE		0xc0000102
117 
118 #define KVM_MSR_ENTRY(_index, _data)	\
119 	(struct kvm_msr_entry) { .index = _index, .data = _data }
120 
121 static void kvm_cpu__setup_msrs(struct kvm_cpu *self)
122 {
123 	unsigned long ndx = 0;
124 
125 	self->msrs = kvm_msrs__new(100);
126 
127 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,	0x0);
128 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,	0x0);
129 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,	0x0);
130 #ifdef CONFIG_X86_64
131 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR,		0x0);
132 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR,		0x0);
133 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE,	0x0);
134 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK,		0x0);
135 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR,		0x0);
136 #endif
137 	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER,	0x0);
138 
139 	self->msrs->nmsrs	= ndx;
140 
141 	if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0)
142 		die_perror("KVM_SET_MSRS failed");
143 }
144 
145 static void kvm_cpu__setup_fpu(struct kvm_cpu *self)
146 {
147 	self->fpu = (struct kvm_fpu) {
148 		.fcw		= 0x37f,
149 		.mxcsr		= 0x1f80,
150 	};
151 
152 	if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0)
153 		die_perror("KVM_SET_FPU failed");
154 }
155 
156 static void kvm_cpu__setup_regs(struct kvm_cpu *self)
157 {
158 	self->regs = (struct kvm_regs) {
159 		/* We start the guest in 16-bit real mode  */
160 		.rflags		= 0x0000000000000002ULL,
161 
162 		.rip		= self->kvm->boot_ip,
163 		.rsp		= self->kvm->boot_sp,
164 		.rbp		= self->kvm->boot_sp,
165 	};
166 
167 	if (self->regs.rip > USHRT_MAX)
168 		die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip);
169 
170 	if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
171 		die_perror("KVM_SET_REGS failed");
172 }
173 
174 static void kvm_cpu__setup_sregs(struct kvm_cpu *self)
175 {
176 
177 	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
178 		die_perror("KVM_GET_SREGS failed");
179 
180 	self->sregs.cs.selector	= self->kvm->boot_selector;
181 	self->sregs.cs.base	= selector_to_base(self->kvm->boot_selector);
182 	self->sregs.ss.selector	= self->kvm->boot_selector;
183 	self->sregs.ss.base	= selector_to_base(self->kvm->boot_selector);
184 	self->sregs.ds.selector	= self->kvm->boot_selector;
185 	self->sregs.ds.base	= selector_to_base(self->kvm->boot_selector);
186 	self->sregs.es.selector	= self->kvm->boot_selector;
187 	self->sregs.es.base	= selector_to_base(self->kvm->boot_selector);
188 	self->sregs.fs.selector	= self->kvm->boot_selector;
189 	self->sregs.fs.base	= selector_to_base(self->kvm->boot_selector);
190 	self->sregs.gs.selector	= self->kvm->boot_selector;
191 	self->sregs.gs.base	= selector_to_base(self->kvm->boot_selector);
192 
193 	if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
194 		die_perror("KVM_SET_SREGS failed");
195 }
196 
197 /**
198  * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
199  */
200 void kvm_cpu__reset_vcpu(struct kvm_cpu *self)
201 {
202 	kvm_cpu__setup_sregs(self);
203 	kvm_cpu__setup_regs(self);
204 	kvm_cpu__setup_fpu(self);
205 	kvm_cpu__setup_msrs(self);
206 }
207 
208 static void print_dtable(const char *name, struct kvm_dtable *dtable)
209 {
210 	printf(" %s                 %016" PRIx64 "  %08" PRIx16 "\n",
211 		name, (uint64_t) dtable->base, (uint16_t) dtable->limit);
212 }
213 
214 static void print_segment(const char *name, struct kvm_segment *seg)
215 {
216 	printf(" %s       %04" PRIx16 "      %016" PRIx64 "  %08" PRIx32 "  %02" PRIx8 "    %x %x   %x  %x %x %x %x\n",
217 		name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit,
218 		(uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
219 }
220 
221 void kvm_cpu__show_registers(struct kvm_cpu *self)
222 {
223 	unsigned long cr0, cr2, cr3;
224 	unsigned long cr4, cr8;
225 	unsigned long rax, rbx, rcx;
226 	unsigned long rdx, rsi, rdi;
227 	unsigned long rbp,  r8,  r9;
228 	unsigned long r10, r11, r12;
229 	unsigned long r13, r14, r15;
230 	unsigned long rip, rsp;
231 	struct kvm_sregs sregs;
232 	unsigned long rflags;
233 	struct kvm_regs regs;
234 	int i;
235 
236 	if (ioctl(self->vcpu_fd, KVM_GET_REGS, &regs) < 0)
237 		die("KVM_GET_REGS failed");
238 
239 	rflags = regs.rflags;
240 
241 	rip = regs.rip; rsp = regs.rsp;
242 	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
243 	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
244 	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
245 	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
246 	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
247 
248 	printf("Registers:\n");
249 	printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
250 	printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
251 	printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
252 	printf(" rbp: %016lx   r8:  %016lx   r9:  %016lx\n", rbp, r8,  r9);
253 	printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
254 	printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
255 
256 	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
257 		die("KVM_GET_REGS failed");
258 
259 	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
260 	cr4 = sregs.cr4; cr8 = sregs.cr8;
261 
262 	printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
263 	printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
264 	printf("Segment registers:\n");
265 	printf(" register  selector  base              limit     type  p dpl db s l g avl\n");
266 	print_segment("cs ", &sregs.cs);
267 	print_segment("ss ", &sregs.ss);
268 	print_segment("ds ", &sregs.ds);
269 	print_segment("es ", &sregs.es);
270 	print_segment("fs ", &sregs.fs);
271 	print_segment("gs ", &sregs.gs);
272 	print_segment("tr ", &sregs.tr);
273 	print_segment("ldt", &sregs.ldt);
274 	print_dtable("gdt", &sregs.gdt);
275 	print_dtable("idt", &sregs.idt);
276 	printf(" [ efer: %016" PRIx64 "  apic base: %016" PRIx64 "  nmi: %s ]\n",
277 		(uint64_t) sregs.efer, (uint64_t) sregs.apic_base,
278 		(self->kvm->nmi_disabled ? "disabled" : "enabled"));
279 	printf("Interrupt bitmap:\n");
280 	printf(" ");
281 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
282 		printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]);
283 	printf("\n");
284 }
285 
286 void kvm_cpu__show_code(struct kvm_cpu *self)
287 {
288 	unsigned int code_bytes = 64;
289 	unsigned int code_prologue = code_bytes * 43 / 64;
290 	unsigned int code_len = code_bytes;
291 	unsigned char c;
292 	unsigned int i;
293 	uint8_t *ip;
294 
295 	if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
296 		die("KVM_GET_REGS failed");
297 
298 	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
299 		die("KVM_GET_SREGS failed");
300 
301 	ip = guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip) - code_prologue);
302 
303 	printf("Code: ");
304 
305 	for (i = 0; i < code_len; i++, ip++) {
306 		if (!host_ptr_in_ram(self->kvm, ip))
307 			break;
308 
309 		c = *ip;
310 
311 		if (ip == guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip)))
312 			printf("<%02x> ", c);
313 		else
314 			printf("%02x ", c);
315 	}
316 
317 	printf("\n");
318 
319 	printf("Stack:\n");
320 	kvm__dump_mem(self->kvm, self->regs.rsp, 32);
321 }
322 
323 void kvm_cpu__show_page_tables(struct kvm_cpu *self)
324 {
325 	uint64_t *pte1;
326 	uint64_t *pte2;
327 	uint64_t *pte3;
328 	uint64_t *pte4;
329 
330 	if (!is_in_protected_mode(self))
331 		return;
332 
333 	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
334 		die("KVM_GET_SREGS failed");
335 
336 	pte4	= guest_flat_to_host(self->kvm, self->sregs.cr3);
337 	if (!host_ptr_in_ram(self->kvm, pte4))
338 		return;
339 
340 	pte3	= guest_flat_to_host(self->kvm, (*pte4 & ~0xfff));
341 	if (!host_ptr_in_ram(self->kvm, pte3))
342 		return;
343 
344 	pte2	= guest_flat_to_host(self->kvm, (*pte3 & ~0xfff));
345 	if (!host_ptr_in_ram(self->kvm, pte2))
346 		return;
347 
348 	pte1	= guest_flat_to_host(self->kvm, (*pte2 & ~0xfff));
349 	if (!host_ptr_in_ram(self->kvm, pte1))
350 		return;
351 
352 	printf("Page Tables:\n");
353 	if (*pte2 & (1 << 7))
354 		printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64
355 			"   pte2: %016" PRIx64 "\n",
356 			*pte4, *pte3, *pte2);
357 	else
358 		printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64 "   pte2: %016"
359 			PRIx64 "   pte1: %016" PRIx64 "\n",
360 			*pte4, *pte3, *pte2, *pte1);
361 }
362 
363 void kvm_cpu__run(struct kvm_cpu *self)
364 {
365 	int err;
366 
367 	err = ioctl(self->vcpu_fd, KVM_RUN, 0);
368 	if (err && (errno != EINTR && errno != EAGAIN))
369 		die_perror("KVM_RUN failed");
370 }
371