xref: /kvmtool/x86/kvm-cpu.c (revision 341ee0d4957dc91f36f24cd4e4b4ebd2f4da5632)
1 #include "kvm/kvm-cpu.h"
2 
3 #include "kvm/symbol.h"
4 #include "kvm/util.h"
5 #include "kvm/kvm.h"
6 
7 #include <asm/msr-index.h>
8 
9 #include <sys/ioctl.h>
10 #include <sys/mman.h>
11 #include <signal.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <errno.h>
15 #include <stdio.h>
16 
17 static int debug_fd;
18 
19 void kvm_cpu__set_debug_fd(int fd)
20 {
21 	debug_fd = fd;
22 }
23 
24 int kvm_cpu__get_debug_fd(void)
25 {
26 	return debug_fd;
27 }
28 
29 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu)
30 {
31 	return vcpu->sregs.cr0 & 0x01;
32 }
33 
34 static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip)
35 {
36 	u64 cs;
37 
38 	/*
39 	 * NOTE! We should take code segment base address into account here.
40 	 * Luckily it's usually zero because Linux uses flat memory model.
41 	 */
42 	if (is_in_protected_mode(vcpu))
43 		return ip;
44 
45 	cs = vcpu->sregs.cs.selector;
46 
47 	return ip + (cs << 4);
48 }
49 
50 static inline u32 selector_to_base(u16 selector)
51 {
52 	/*
53 	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
54 	 */
55 	return (u32)selector * 16;
56 }
57 
58 static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
59 {
60 	struct kvm_cpu *vcpu;
61 
62 	vcpu		= calloc(1, sizeof *vcpu);
63 	if (!vcpu)
64 		return NULL;
65 
66 	vcpu->kvm	= kvm;
67 
68 	return vcpu;
69 }
70 
71 void kvm_cpu__delete(struct kvm_cpu *vcpu)
72 {
73 	if (vcpu->msrs)
74 		free(vcpu->msrs);
75 
76 	free(vcpu);
77 }
78 
79 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
80 {
81 	struct kvm_cpu *vcpu;
82 	int mmap_size;
83 	int coalesced_offset;
84 
85 	vcpu		= kvm_cpu__new(kvm);
86 	if (!vcpu)
87 		return NULL;
88 
89 	vcpu->cpu_id	= cpu_id;
90 
91 	vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id);
92 	if (vcpu->vcpu_fd < 0)
93 		die_perror("KVM_CREATE_VCPU ioctl");
94 
95 	mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
96 	if (mmap_size < 0)
97 		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
98 
99 	vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0);
100 	if (vcpu->kvm_run == MAP_FAILED)
101 		die("unable to mmap vcpu fd");
102 
103 	coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
104 	if (coalesced_offset)
105 		vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE);
106 
107 	vcpu->is_running = true;
108 
109 	return vcpu;
110 }
111 
112 static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
113 {
114 	struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs));
115 
116 	if (!vcpu)
117 		die("out of memory");
118 
119 	return vcpu;
120 }
121 
122 #define KVM_MSR_ENTRY(_index, _data)	\
123 	(struct kvm_msr_entry) { .index = _index, .data = _data }
124 
125 static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu)
126 {
127 	unsigned long ndx = 0;
128 
129 	vcpu->msrs = kvm_msrs__new(100);
130 
131 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,	0x0);
132 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,	0x0);
133 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,	0x0);
134 #ifdef CONFIG_X86_64
135 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR,			0x0);
136 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR,			0x0);
137 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE,		0x0);
138 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK,		0x0);
139 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR,			0x0);
140 #endif
141 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC,		0x0);
142 	vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE,
143 						MSR_IA32_MISC_ENABLE_FAST_STRING);
144 
145 	vcpu->msrs->nmsrs	= ndx;
146 
147 	if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0)
148 		die_perror("KVM_SET_MSRS failed");
149 }
150 
151 static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu)
152 {
153 	vcpu->fpu = (struct kvm_fpu) {
154 		.fcw		= 0x37f,
155 		.mxcsr		= 0x1f80,
156 	};
157 
158 	if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0)
159 		die_perror("KVM_SET_FPU failed");
160 }
161 
162 static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu)
163 {
164 	vcpu->regs = (struct kvm_regs) {
165 		/* We start the guest in 16-bit real mode  */
166 		.rflags		= 0x0000000000000002ULL,
167 
168 		.rip		= vcpu->kvm->boot_ip,
169 		.rsp		= vcpu->kvm->boot_sp,
170 		.rbp		= vcpu->kvm->boot_sp,
171 	};
172 
173 	if (vcpu->regs.rip > USHRT_MAX)
174 		die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip);
175 
176 	if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0)
177 		die_perror("KVM_SET_REGS failed");
178 }
179 
180 static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu)
181 {
182 
183 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
184 		die_perror("KVM_GET_SREGS failed");
185 
186 	vcpu->sregs.cs.selector	= vcpu->kvm->boot_selector;
187 	vcpu->sregs.cs.base	= selector_to_base(vcpu->kvm->boot_selector);
188 	vcpu->sregs.ss.selector	= vcpu->kvm->boot_selector;
189 	vcpu->sregs.ss.base	= selector_to_base(vcpu->kvm->boot_selector);
190 	vcpu->sregs.ds.selector	= vcpu->kvm->boot_selector;
191 	vcpu->sregs.ds.base	= selector_to_base(vcpu->kvm->boot_selector);
192 	vcpu->sregs.es.selector	= vcpu->kvm->boot_selector;
193 	vcpu->sregs.es.base	= selector_to_base(vcpu->kvm->boot_selector);
194 	vcpu->sregs.fs.selector	= vcpu->kvm->boot_selector;
195 	vcpu->sregs.fs.base	= selector_to_base(vcpu->kvm->boot_selector);
196 	vcpu->sregs.gs.selector	= vcpu->kvm->boot_selector;
197 	vcpu->sregs.gs.base	= selector_to_base(vcpu->kvm->boot_selector);
198 
199 	if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0)
200 		die_perror("KVM_SET_SREGS failed");
201 }
202 
203 /**
204  * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
205  */
206 void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
207 {
208 	kvm_cpu__setup_cpuid(vcpu);
209 	kvm_cpu__setup_sregs(vcpu);
210 	kvm_cpu__setup_regs(vcpu);
211 	kvm_cpu__setup_fpu(vcpu);
212 	kvm_cpu__setup_msrs(vcpu);
213 }
214 
215 bool kvm_cpu__handle_exit(struct kvm_cpu *vcpu)
216 {
217 	return false;
218 }
219 
220 static void print_dtable(const char *name, struct kvm_dtable *dtable)
221 {
222 	dprintf(debug_fd, " %s                 %016llx  %08hx\n",
223 		name, (u64) dtable->base, (u16) dtable->limit);
224 }
225 
226 static void print_segment(const char *name, struct kvm_segment *seg)
227 {
228 	dprintf(debug_fd, " %s       %04hx      %016llx  %08x  %02hhx    %x %x   %x  %x %x %x %x\n",
229 		name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit,
230 		(u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
231 }
232 
233 void kvm_cpu__show_registers(struct kvm_cpu *vcpu)
234 {
235 	unsigned long cr0, cr2, cr3;
236 	unsigned long cr4, cr8;
237 	unsigned long rax, rbx, rcx;
238 	unsigned long rdx, rsi, rdi;
239 	unsigned long rbp,  r8,  r9;
240 	unsigned long r10, r11, r12;
241 	unsigned long r13, r14, r15;
242 	unsigned long rip, rsp;
243 	struct kvm_sregs sregs;
244 	unsigned long rflags;
245 	struct kvm_regs regs;
246 	int i;
247 
248 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &regs) < 0)
249 		die("KVM_GET_REGS failed");
250 
251 	rflags = regs.rflags;
252 
253 	rip = regs.rip; rsp = regs.rsp;
254 	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
255 	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
256 	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
257 	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
258 	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
259 
260 	dprintf(debug_fd, "\n Registers:\n");
261 	dprintf(debug_fd,   " ----------\n");
262 	dprintf(debug_fd, " rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
263 	dprintf(debug_fd, " rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
264 	dprintf(debug_fd, " rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
265 	dprintf(debug_fd, " rbp: %016lx    r8: %016lx    r9: %016lx\n", rbp, r8,  r9);
266 	dprintf(debug_fd, " r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
267 	dprintf(debug_fd, " r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
268 
269 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
270 		die("KVM_GET_REGS failed");
271 
272 	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
273 	cr4 = sregs.cr4; cr8 = sregs.cr8;
274 
275 	dprintf(debug_fd, " cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
276 	dprintf(debug_fd, " cr4: %016lx   cr8: %016lx\n", cr4, cr8);
277 	dprintf(debug_fd, "\n Segment registers:\n");
278 	dprintf(debug_fd,   " ------------------\n");
279 	dprintf(debug_fd, " register  selector  base              limit     type  p dpl db s l g avl\n");
280 	print_segment("cs ", &sregs.cs);
281 	print_segment("ss ", &sregs.ss);
282 	print_segment("ds ", &sregs.ds);
283 	print_segment("es ", &sregs.es);
284 	print_segment("fs ", &sregs.fs);
285 	print_segment("gs ", &sregs.gs);
286 	print_segment("tr ", &sregs.tr);
287 	print_segment("ldt", &sregs.ldt);
288 	print_dtable("gdt", &sregs.gdt);
289 	print_dtable("idt", &sregs.idt);
290 
291 	dprintf(debug_fd, "\n APIC:\n");
292 	dprintf(debug_fd,   " -----\n");
293 	dprintf(debug_fd, " efer: %016llx  apic base: %016llx  nmi: %s\n",
294 		(u64) sregs.efer, (u64) sregs.apic_base,
295 		(vcpu->kvm->nmi_disabled ? "disabled" : "enabled"));
296 
297 	dprintf(debug_fd, "\n Interrupt bitmap:\n");
298 	dprintf(debug_fd,   " -----------------\n");
299 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
300 		dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]);
301 	dprintf(debug_fd, "\n");
302 }
303 
304 #define MAX_SYM_LEN		128
305 
306 void kvm_cpu__show_code(struct kvm_cpu *vcpu)
307 {
308 	unsigned int code_bytes = 64;
309 	unsigned int code_prologue = code_bytes * 43 / 64;
310 	unsigned int code_len = code_bytes;
311 	char sym[MAX_SYM_LEN];
312 	unsigned char c;
313 	unsigned int i;
314 	u8 *ip;
315 
316 	if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0)
317 		die("KVM_GET_REGS failed");
318 
319 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
320 		die("KVM_GET_SREGS failed");
321 
322 	ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue);
323 
324 	dprintf(debug_fd, "\n Code:\n");
325 	dprintf(debug_fd,   " -----\n");
326 
327 	symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN);
328 
329 	dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym);
330 
331 	for (i = 0; i < code_len; i++, ip++) {
332 		if (!host_ptr_in_ram(vcpu->kvm, ip))
333 			break;
334 
335 		c = *ip;
336 
337 		if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip)))
338 			dprintf(debug_fd, " <%02x>", c);
339 		else
340 			dprintf(debug_fd, " %02x", c);
341 	}
342 
343 	dprintf(debug_fd, "\n");
344 
345 	dprintf(debug_fd, "\n Stack:\n");
346 	dprintf(debug_fd,   " ------\n");
347 	kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32);
348 }
349 
350 void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu)
351 {
352 	u64 *pte1;
353 	u64 *pte2;
354 	u64 *pte3;
355 	u64 *pte4;
356 
357 	if (!is_in_protected_mode(vcpu))
358 		return;
359 
360 	if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0)
361 		die("KVM_GET_SREGS failed");
362 
363 	pte4	= guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3);
364 	if (!host_ptr_in_ram(vcpu->kvm, pte4))
365 		return;
366 
367 	pte3	= guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff));
368 	if (!host_ptr_in_ram(vcpu->kvm, pte3))
369 		return;
370 
371 	pte2	= guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff));
372 	if (!host_ptr_in_ram(vcpu->kvm, pte2))
373 		return;
374 
375 	pte1	= guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff));
376 	if (!host_ptr_in_ram(vcpu->kvm, pte1))
377 		return;
378 
379 	dprintf(debug_fd, "Page Tables:\n");
380 	if (*pte2 & (1 << 7))
381 		dprintf(debug_fd, " pte4: %016llx   pte3: %016llx"
382 			"   pte2: %016llx\n",
383 			*pte4, *pte3, *pte2);
384 	else
385 		dprintf(debug_fd, " pte4: %016llx  pte3: %016llx   pte2: %016"
386 			"llx   pte1: %016llx\n",
387 			*pte4, *pte3, *pte2, *pte1);
388 }
389