xref: /kvmtool/kvm.c (revision 3d78d3a59ab51c654f253b5117f041e90a9a983c)
1ae1fae34SPekka Enberg #include "kvm/kvm.h"
2ae1fae34SPekka Enberg 
3eda03319SPekka Enberg #include "kvm/interrupt.h"
4f3150089SPekka Enberg #include "kvm/util.h"
5eda03319SPekka Enberg 
66c7d8514SPekka Enberg #include <linux/kvm.h>
7f5ab5f67SPekka Enberg 
8f5ab5f67SPekka Enberg #include <asm/bootparam.h>
9f5ab5f67SPekka Enberg 
10ae1fae34SPekka Enberg #include <sys/ioctl.h>
111f9cff23SPekka Enberg #include <inttypes.h>
121f9cff23SPekka Enberg #include <sys/mman.h>
132da26a59SPekka Enberg #include <stdbool.h>
146e5e8b8dSPekka Enberg #include <assert.h>
1506e41eeaSPekka Enberg #include <limits.h>
16f5ab5f67SPekka Enberg #include <stdarg.h>
17b8f6afcdSPekka Enberg #include <stdlib.h>
18f5ab5f67SPekka Enberg #include <string.h>
190d1f17ecSPekka Enberg #include <unistd.h>
201f9cff23SPekka Enberg #include <stdio.h>
21b8f6afcdSPekka Enberg #include <fcntl.h>
22b8f6afcdSPekka Enberg 
231f9cff23SPekka Enberg /*
241f9cff23SPekka Enberg  * Compatibility code. Remove this when we move to tools/kvm.
251f9cff23SPekka Enberg  */
261f9cff23SPekka Enberg #ifndef KVM_EXIT_INTERNAL_ERROR
271f9cff23SPekka Enberg # define KVM_EXIT_INTERNAL_ERROR		17
281f9cff23SPekka Enberg #endif
291f9cff23SPekka Enberg 
30ae1fae34SPekka Enberg #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
310d1f17ecSPekka Enberg 
32ae1fae34SPekka Enberg const char *kvm_exit_reasons[] = {
33ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN),
34ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION),
35ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO),
36ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL),
37ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG),
38ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT),
39ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO),
40ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN),
41ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN),
42ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY),
43ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR),
44ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR),
45ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS),
46ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC),
47ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET),
48ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
49ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
50ae1fae34SPekka Enberg 	DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
519b1fb1c3SPekka Enberg };
529b1fb1c3SPekka Enberg 
536753ed2fSPekka Enberg static inline uint32_t segment_to_flat(uint16_t selector, uint16_t offset)
546753ed2fSPekka Enberg {
556753ed2fSPekka Enberg 	return ((uint32_t)selector << 4) + (uint32_t) offset;
566753ed2fSPekka Enberg }
576753ed2fSPekka Enberg 
586753ed2fSPekka Enberg static inline void *guest_flat_to_host(struct kvm *self, unsigned long offset)
59ae1fae34SPekka Enberg {
60ae1fae34SPekka Enberg 	return self->ram_start + offset;
61ae1fae34SPekka Enberg }
62ae1fae34SPekka Enberg 
636753ed2fSPekka Enberg static inline void *guest_real_to_host(struct kvm *self, uint16_t selector, uint16_t offset)
646753ed2fSPekka Enberg {
656753ed2fSPekka Enberg 	unsigned long flat = segment_to_flat(selector, offset);
666753ed2fSPekka Enberg 
676753ed2fSPekka Enberg 	return guest_flat_to_host(self, flat);
686753ed2fSPekka Enberg }
696753ed2fSPekka Enberg 
70ae1fae34SPekka Enberg static bool kvm__supports_extension(struct kvm *self, unsigned int extension)
71b8f6afcdSPekka Enberg {
7228fa19c0SPekka Enberg 	int ret;
73b8f6afcdSPekka Enberg 
7473ac60e6SPekka Enberg 	ret = ioctl(self->sys_fd, KVM_CHECK_EXTENSION, extension);
754076b041SPekka Enberg 	if (ret < 0)
764076b041SPekka Enberg 		return false;
774076b041SPekka Enberg 
784076b041SPekka Enberg 	return ret;
794076b041SPekka Enberg }
804076b041SPekka Enberg 
814076b041SPekka Enberg static struct kvm *kvm__new(void)
824076b041SPekka Enberg {
834076b041SPekka Enberg 	struct kvm *self = calloc(1, sizeof *self);
844076b041SPekka Enberg 
854076b041SPekka Enberg 	if (!self)
864076b041SPekka Enberg 		die("out of memory");
874076b041SPekka Enberg 
884076b041SPekka Enberg 	return self;
894076b041SPekka Enberg }
904076b041SPekka Enberg 
919ef4c68eSPekka Enberg void kvm__delete(struct kvm *self)
929ef4c68eSPekka Enberg {
939ef4c68eSPekka Enberg 	free(self->ram_start);
949ef4c68eSPekka Enberg 	free(self);
959ef4c68eSPekka Enberg }
969ef4c68eSPekka Enberg 
97ae1fae34SPekka Enberg struct kvm *kvm__init(void)
984076b041SPekka Enberg {
992b0e3342SPekka Enberg 	struct kvm_userspace_memory_region mem;
1004076b041SPekka Enberg 	struct kvm *self;
1010d1f17ecSPekka Enberg 	long page_size;
1021f9cff23SPekka Enberg 	int mmap_size;
1034076b041SPekka Enberg 	int ret;
1044076b041SPekka Enberg 
1054076b041SPekka Enberg 	self = kvm__new();
1064076b041SPekka Enberg 
10773ac60e6SPekka Enberg 	self->sys_fd = open("/dev/kvm", O_RDWR);
10873ac60e6SPekka Enberg 	if (self->sys_fd < 0)
109f5ab5f67SPekka Enberg 		die_perror("open");
110b8f6afcdSPekka Enberg 
11173ac60e6SPekka Enberg 	ret = ioctl(self->sys_fd, KVM_GET_API_VERSION, 0);
1126c7d8514SPekka Enberg 	if (ret != KVM_API_VERSION)
113f5ab5f67SPekka Enberg 		die_perror("KVM_API_VERSION ioctl");
1146c7d8514SPekka Enberg 
11573ac60e6SPekka Enberg 	self->vm_fd = ioctl(self->sys_fd, KVM_CREATE_VM, 0);
11673ac60e6SPekka Enberg 	if (self->vm_fd < 0)
117f5ab5f67SPekka Enberg 		die_perror("KVM_CREATE_VM ioctl");
11828fa19c0SPekka Enberg 
1194076b041SPekka Enberg 	if (!kvm__supports_extension(self, KVM_CAP_USER_MEMORY))
120f5ab5f67SPekka Enberg 		die("KVM_CAP_USER_MEMORY is not supported");
1212da26a59SPekka Enberg 
1220d1f17ecSPekka Enberg 	self->ram_size		= 64UL * 1024UL * 1024UL;
1230d1f17ecSPekka Enberg 
1240d1f17ecSPekka Enberg 	page_size	= sysconf(_SC_PAGESIZE);
1250d1f17ecSPekka Enberg 	if (posix_memalign(&self->ram_start, page_size, self->ram_size) != 0)
1260d1f17ecSPekka Enberg 		die("out of memory");
1270d1f17ecSPekka Enberg 
1282b0e3342SPekka Enberg 	mem = (struct kvm_userspace_memory_region) {
1292b0e3342SPekka Enberg 		.slot			= 0,
1302b0e3342SPekka Enberg 		.guest_phys_addr	= 0x0UL,
1310d1f17ecSPekka Enberg 		.memory_size		= self->ram_size,
1320d1f17ecSPekka Enberg 		.userspace_addr		= (unsigned long) self->ram_start,
1332b0e3342SPekka Enberg 	};
1342b0e3342SPekka Enberg 
13573ac60e6SPekka Enberg 	ret = ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem, 1);
1362b0e3342SPekka Enberg 	if (ret < 0)
137f5ab5f67SPekka Enberg 		die_perror("KVM_SET_USER_MEMORY_REGION ioctl");
1382b0e3342SPekka Enberg 
139895c2fefSPekka Enberg 	if (!kvm__supports_extension(self, KVM_CAP_SET_TSS_ADDR))
140f5ab5f67SPekka Enberg 		die("KVM_CAP_SET_TSS_ADDR is not supported");
141895c2fefSPekka Enberg 
14273ac60e6SPekka Enberg 	ret = ioctl(self->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
143895c2fefSPekka Enberg 	if (ret < 0)
144f5ab5f67SPekka Enberg 		die_perror("KVM_SET_TSS_ADDR ioctl");
145895c2fefSPekka Enberg 
14673ac60e6SPekka Enberg 	self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
1472b0e3342SPekka Enberg 	if (self->vcpu_fd < 0)
148f5ab5f67SPekka Enberg 		die_perror("KVM_CREATE_VCPU ioctl");
1492b0e3342SPekka Enberg 
1501f9cff23SPekka Enberg 	mmap_size = ioctl(self->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
1511f9cff23SPekka Enberg 	if (mmap_size < 0)
152f5ab5f67SPekka Enberg 		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
1531f9cff23SPekka Enberg 
1541f9cff23SPekka Enberg 	self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
1551f9cff23SPekka Enberg 	if (self->kvm_run == MAP_FAILED)
1561f9cff23SPekka Enberg 		die("unable to mmap vcpu fd");
1571f9cff23SPekka Enberg 
1584076b041SPekka Enberg 	return self;
1594076b041SPekka Enberg }
1604076b041SPekka Enberg 
161ae1fae34SPekka Enberg void kvm__enable_singlestep(struct kvm *self)
162aee6632eSPekka Enberg {
163aee6632eSPekka Enberg 	struct kvm_guest_debug debug = {
164aee6632eSPekka Enberg 		.control	= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
165aee6632eSPekka Enberg 	};
166aee6632eSPekka Enberg 
167aee6632eSPekka Enberg 	if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
168b625d4b6SCyrill Gorcunov 		warning("KVM_SET_GUEST_DEBUG failed");
169aee6632eSPekka Enberg }
170aee6632eSPekka Enberg 
1715f6772b8SCyrill Gorcunov #define BOOT_LOADER_SELECTOR	0x1000
172b08e9ec4SPekka Enberg #define BOOT_LOADER_IP		0x0000
173dbdb74c2SPekka Enberg #define BOOT_LOADER_SP		0x8000
174009b0758SPekka Enberg 
175edc8a14dSPekka Enberg static int load_flat_binary(struct kvm *self, int fd)
176009b0758SPekka Enberg {
177009b0758SPekka Enberg 	void *p;
178009b0758SPekka Enberg 	int nr;
179009b0758SPekka Enberg 
180009b0758SPekka Enberg 	if (lseek(fd, 0, SEEK_SET) < 0)
181009b0758SPekka Enberg 		die_perror("lseek");
182009b0758SPekka Enberg 
1836753ed2fSPekka Enberg 	p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
184009b0758SPekka Enberg 
185009b0758SPekka Enberg 	while ((nr = read(fd, p, 65536)) > 0)
186009b0758SPekka Enberg 		p += nr;
187009b0758SPekka Enberg 
188dbdb74c2SPekka Enberg 	self->boot_selector	= BOOT_LOADER_SELECTOR;
189edc8a14dSPekka Enberg 	self->boot_ip		= BOOT_LOADER_IP;
190dbdb74c2SPekka Enberg 	self->boot_sp		= BOOT_LOADER_SP;
191edc8a14dSPekka Enberg 
1927fb218bdSPekka Enberg 	return true;
193009b0758SPekka Enberg }
194009b0758SPekka Enberg 
1957fb218bdSPekka Enberg /*
1967fb218bdSPekka Enberg  * The protected mode kernel part of a modern bzImage is loaded at 1 MB by
1977fb218bdSPekka Enberg  * default.
1987fb218bdSPekka Enberg  */
1997fb218bdSPekka Enberg #define BZ_KERNEL_START			0x100000UL
200ae1fae34SPekka Enberg 
201ae1fae34SPekka Enberg static const char *BZIMAGE_MAGIC	= "HdrS";
202ae1fae34SPekka Enberg 
20310943d14SPekka Enberg #define BZ_DEFAULT_SETUP_SECTS		4
20410943d14SPekka Enberg 
2056d1f350dSCyrill Gorcunov static bool load_bzimage(struct kvm *self, int fd, const char *kernel_cmdline)
206ae1fae34SPekka Enberg {
207bc75b0aeSCyrill Gorcunov 	struct real_intr_desc intr;
20822489bb0SCyrill Gorcunov 	struct boot_params boot;
2094b62331fSPekka Enberg 	unsigned long setup_sects;
210305d9054SCyrill Gorcunov 	unsigned int intr_addr;
211debcfac0SCyrill Gorcunov 	size_t cmdline_size, cmdline_offset;
2127fb218bdSPekka Enberg 	ssize_t setup_size;
21322489bb0SCyrill Gorcunov 	void *p;
214ae1fae34SPekka Enberg 	int nr;
215ae1fae34SPekka Enberg 
2165d67eaf6SPekka Enberg 	/*
2175d67eaf6SPekka Enberg 	 * See Documentation/x86/boot.txt for details no bzImage on-disk and
2185d67eaf6SPekka Enberg 	 * memory layout.
2195d67eaf6SPekka Enberg 	 */
2205d67eaf6SPekka Enberg 
221009b0758SPekka Enberg 	if (lseek(fd, 0, SEEK_SET) < 0)
222009b0758SPekka Enberg 		die_perror("lseek");
223009b0758SPekka Enberg 
224ae1fae34SPekka Enberg 	read(fd, &boot, sizeof(boot));
225ae1fae34SPekka Enberg 
226ae1fae34SPekka Enberg         if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)) != 0)
2277fb218bdSPekka Enberg 		return false;
228ae1fae34SPekka Enberg 
229ad681038SCyrill Gorcunov 	if (boot.hdr.version < 0x0200) {
230ad681038SCyrill Gorcunov 		warning("Too old kernel");
231ad681038SCyrill Gorcunov 		return false;
232ad681038SCyrill Gorcunov 	}
233ad681038SCyrill Gorcunov 
234e93ab78aSPekka Enberg 	if (lseek(fd, 0, SEEK_SET) < 0)
235e93ab78aSPekka Enberg 		die_perror("lseek");
236e93ab78aSPekka Enberg 
2374cf542bbSCyrill Gorcunov 	if (!boot.hdr.setup_sects)
2384cf542bbSCyrill Gorcunov 		boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS;
23910943d14SPekka Enberg 	setup_sects = boot.hdr.setup_sects + 1;
24010943d14SPekka Enberg 
24154d4a626SPekka Enberg 	setup_size = setup_sects << 9;
2426753ed2fSPekka Enberg 	p = guest_real_to_host(self, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
243ae1fae34SPekka Enberg 
2447fb218bdSPekka Enberg 	if (read(fd, p, setup_size) != setup_size)
2457fb218bdSPekka Enberg 		die_perror("read");
2467fb218bdSPekka Enberg 
2476753ed2fSPekka Enberg 	p = guest_flat_to_host(self, BZ_KERNEL_START);
248ae1fae34SPekka Enberg 
249ae1fae34SPekka Enberg 	while ((nr = read(fd, p, 65536)) > 0)
250ae1fae34SPekka Enberg 		p += nr;
251ae1fae34SPekka Enberg 
252debcfac0SCyrill Gorcunov 	if (boot.hdr.version < 0x0202 || !(boot.hdr.loadflags & 0x01))
253debcfac0SCyrill Gorcunov 		cmdline_offset = (0x9ff0 - cmdline_size) & ~15;
254debcfac0SCyrill Gorcunov 	else
255debcfac0SCyrill Gorcunov 		cmdline_offset = 0x10000;
256debcfac0SCyrill Gorcunov 
257debcfac0SCyrill Gorcunov 	if (boot.hdr.version < 0x0206)
258debcfac0SCyrill Gorcunov 		boot.hdr.cmdline_size = 256;
259debcfac0SCyrill Gorcunov 
260debcfac0SCyrill Gorcunov 	if (kernel_cmdline) {
261debcfac0SCyrill Gorcunov 		cmdline_size = strlen(kernel_cmdline) + 1;
262debcfac0SCyrill Gorcunov 		if (cmdline_size > boot.hdr.cmdline_size)
263debcfac0SCyrill Gorcunov 			cmdline_size = boot.hdr.cmdline_size;
264debcfac0SCyrill Gorcunov 
265debcfac0SCyrill Gorcunov 		p = guest_flat_to_host(self, cmdline_offset);
266debcfac0SCyrill Gorcunov 		memset(p, 0, cmdline_size);
267debcfac0SCyrill Gorcunov 		strcpy(p, kernel_cmdline);
268ad681038SCyrill Gorcunov 	} else
269ad681038SCyrill Gorcunov 		cmdline_size = 0;
270ad681038SCyrill Gorcunov 
271ad681038SCyrill Gorcunov 
272ad681038SCyrill Gorcunov 	if (boot.hdr.version < 0x0202 || !(boot.hdr.loadflags & 0x01))
273ad681038SCyrill Gorcunov 		cmdline_offset = (0x9ff0 - cmdline_size) & ~15;
274ad681038SCyrill Gorcunov 	else
275ad681038SCyrill Gorcunov 		cmdline_offset = 0x10000;
276ad681038SCyrill Gorcunov 
277ad681038SCyrill Gorcunov 	if (boot.hdr.version >= 0x0200) {
278ad681038SCyrill Gorcunov 		if (boot.hdr.version >= 0x0202) {
279ad681038SCyrill Gorcunov 			boot.hdr.cmd_line_ptr =
280ad681038SCyrill Gorcunov 				(BOOT_LOADER_SELECTOR << 4) + cmdline_offset;
281ad681038SCyrill Gorcunov 		} else if (boot.hdr.version >= 0x0201) {
282ad681038SCyrill Gorcunov 			boot.hdr.heap_end_ptr = cmdline_offset - 0x0200;
283ad681038SCyrill Gorcunov 			boot.hdr.loadflags |= CAN_USE_HEAP;
284ad681038SCyrill Gorcunov 		}
285ad681038SCyrill Gorcunov 
286debcfac0SCyrill Gorcunov 	}
287debcfac0SCyrill Gorcunov 
288dbdb74c2SPekka Enberg 	self->boot_selector	= BOOT_LOADER_SELECTOR;
289edc8a14dSPekka Enberg 	/*
290edc8a14dSPekka Enberg 	 * The real-mode setup code starts at offset 0x200 of a bzImage. See
291edc8a14dSPekka Enberg 	 * Documentation/x86/boot.txt for details.
292edc8a14dSPekka Enberg 	 */
293edc8a14dSPekka Enberg 	self->boot_ip		= BOOT_LOADER_IP + 0x200;
294dbdb74c2SPekka Enberg 	self->boot_sp		= BOOT_LOADER_SP;
295edc8a14dSPekka Enberg 
296ea684828SCyrill Gorcunov 	/*
29722489bb0SCyrill Gorcunov 	 * Setup a *fake* real mode vector table, it has only
29822489bb0SCyrill Gorcunov 	 * one real hadler which does just iret
29922489bb0SCyrill Gorcunov 	 *
300305d9054SCyrill Gorcunov 	 * This is where the BIOS lives -- BDA area
301ea684828SCyrill Gorcunov 	 */
302305d9054SCyrill Gorcunov 	intr_addr = BIOS_INTR_NEXT(BDA_START + 0, 16);
303305d9054SCyrill Gorcunov 	p = guest_flat_to_host(self, intr_addr);
304305d9054SCyrill Gorcunov 	memcpy(p, intfake, intfake_size);
305bc75b0aeSCyrill Gorcunov 	intr = (struct real_intr_desc) {
306305d9054SCyrill Gorcunov 		.segment	= REAL_SEGMENT(intr_addr),
30722489bb0SCyrill Gorcunov 		.offset		= 0,
308ea684828SCyrill Gorcunov 	};
309bc75b0aeSCyrill Gorcunov 	interrupt_table__setup(&self->interrupt_table, &intr);
310305d9054SCyrill Gorcunov 
311305d9054SCyrill Gorcunov 	intr_addr = BIOS_INTR_NEXT(BDA_START + intfake_size, 16);
312305d9054SCyrill Gorcunov 	p = guest_flat_to_host(self, intr_addr);
313305d9054SCyrill Gorcunov 	memcpy(p, int10, int10_size);
314305d9054SCyrill Gorcunov 	intr = (struct real_intr_desc) {
315305d9054SCyrill Gorcunov 		.segment	= REAL_SEGMENT(intr_addr),
316305d9054SCyrill Gorcunov 		.offset		= 0,
317305d9054SCyrill Gorcunov 	};
318305d9054SCyrill Gorcunov 	interrupt_table__set(&self->interrupt_table, &intr, 0x10);
319305d9054SCyrill Gorcunov 
320305d9054SCyrill Gorcunov 	p = guest_flat_to_host(self, 0);
321bc75b0aeSCyrill Gorcunov 	interrupt_table__copy(&self->interrupt_table, p, REAL_INTR_SIZE);
322ea684828SCyrill Gorcunov 
3237fb218bdSPekka Enberg 	return true;
324ae1fae34SPekka Enberg }
325ae1fae34SPekka Enberg 
3266d1f350dSCyrill Gorcunov bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
3276d1f350dSCyrill Gorcunov 			const char *kernel_cmdline)
328ae1fae34SPekka Enberg {
3297fb218bdSPekka Enberg 	bool ret;
330ae1fae34SPekka Enberg 	int fd;
331ae1fae34SPekka Enberg 
332ae1fae34SPekka Enberg 	fd = open(kernel_filename, O_RDONLY);
333ae1fae34SPekka Enberg 	if (fd < 0)
334ae1fae34SPekka Enberg 		die("unable to open kernel");
335ae1fae34SPekka Enberg 
3366d1f350dSCyrill Gorcunov 	ret = load_bzimage(kvm, fd, kernel_cmdline);
337009b0758SPekka Enberg 	if (ret)
338009b0758SPekka Enberg 		goto found_kernel;
339ae1fae34SPekka Enberg 
340009b0758SPekka Enberg 	ret = load_flat_binary(kvm, fd);
341009b0758SPekka Enberg 	if (ret)
342009b0758SPekka Enberg 		goto found_kernel;
343009b0758SPekka Enberg 
344009b0758SPekka Enberg 	die("%s is not a valid bzImage or flat binary", kernel_filename);
345009b0758SPekka Enberg 
346009b0758SPekka Enberg found_kernel:
347ae1fae34SPekka Enberg 	return ret;
348ae1fae34SPekka Enberg }
349ae1fae34SPekka Enberg 
35006e41eeaSPekka Enberg static inline uint64_t ip_flat_to_real(struct kvm *self, uint64_t ip)
35106e41eeaSPekka Enberg {
35206e41eeaSPekka Enberg 	uint64_t cs = self->sregs.cs.selector;
35306e41eeaSPekka Enberg 
35406e41eeaSPekka Enberg 	return ip - (cs << 4);
35506e41eeaSPekka Enberg }
35606e41eeaSPekka Enberg 
357f326512aSPekka Enberg static inline uint64_t ip_to_flat(struct kvm *self, uint64_t ip)
35806e41eeaSPekka Enberg {
359f326512aSPekka Enberg 	uint64_t cs;
360f326512aSPekka Enberg 
361f326512aSPekka Enberg 	/*
362f326512aSPekka Enberg 	 * NOTE! We should take code segment base address into account here.
363f326512aSPekka Enberg 	 * Luckily it's usually zero because Linux uses flat memory model.
364f326512aSPekka Enberg 	 */
365f326512aSPekka Enberg 	if (self->sregs.cr0 & 0x01)
366f326512aSPekka Enberg 		return ip;
367f326512aSPekka Enberg 
368f326512aSPekka Enberg 	cs = self->sregs.cs.selector;
36906e41eeaSPekka Enberg 
37006e41eeaSPekka Enberg 	return ip + (cs << 4);
37106e41eeaSPekka Enberg }
37206e41eeaSPekka Enberg 
373dbdb74c2SPekka Enberg static inline uint32_t selector_to_base(uint16_t selector)
374dbdb74c2SPekka Enberg {
375dbdb74c2SPekka Enberg 	/*
376dbdb74c2SPekka Enberg 	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
377dbdb74c2SPekka Enberg 	 */
378dbdb74c2SPekka Enberg 	return (uint32_t)selector * 16;
379dbdb74c2SPekka Enberg }
380dbdb74c2SPekka Enberg 
381*3d78d3a5SPekka Enberg static void kvm__setup_fpu(struct kvm *self)
382*3d78d3a5SPekka Enberg {
383*3d78d3a5SPekka Enberg 	self->fpu = (struct kvm_fpu) {
384*3d78d3a5SPekka Enberg 		.fcw		= 0x37f,
385*3d78d3a5SPekka Enberg 		.mxcsr		= 0x1f80,
386*3d78d3a5SPekka Enberg 	};
387*3d78d3a5SPekka Enberg 
388*3d78d3a5SPekka Enberg 	if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0)
389*3d78d3a5SPekka Enberg 		die_perror("KVM_SET_FPU failed");
390*3d78d3a5SPekka Enberg }
391*3d78d3a5SPekka Enberg 
392*3d78d3a5SPekka Enberg static void kvm__setup_regs(struct kvm *self)
393*3d78d3a5SPekka Enberg {
394*3d78d3a5SPekka Enberg 	self->regs = (struct kvm_regs) {
395*3d78d3a5SPekka Enberg 		/* We start the guest in 16-bit real mode  */
396*3d78d3a5SPekka Enberg 		.rflags		= 0x0000000000000002ULL,
397*3d78d3a5SPekka Enberg 
398*3d78d3a5SPekka Enberg 		.rip		= self->boot_ip,
399*3d78d3a5SPekka Enberg 		.rsp		= self->boot_sp,
400*3d78d3a5SPekka Enberg 		.rbp		= self->boot_sp,
401*3d78d3a5SPekka Enberg 	};
402*3d78d3a5SPekka Enberg 
403*3d78d3a5SPekka Enberg 	if (self->regs.rip > USHRT_MAX)
404*3d78d3a5SPekka Enberg 		die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip);
405*3d78d3a5SPekka Enberg 
406*3d78d3a5SPekka Enberg 	if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
407*3d78d3a5SPekka Enberg 		die_perror("KVM_SET_REGS failed");
408*3d78d3a5SPekka Enberg }
409*3d78d3a5SPekka Enberg 
410*3d78d3a5SPekka Enberg static void kvm__setup_sregs(struct kvm *self)
411ae1fae34SPekka Enberg {
41253602077SPekka Enberg 	self->sregs = (struct kvm_sregs) {
41353602077SPekka Enberg 		.cr0		= 0x60000010ULL,
41453602077SPekka Enberg 		.cs		= (struct kvm_segment) {
415dbdb74c2SPekka Enberg 			.selector	= self->boot_selector,
416dbdb74c2SPekka Enberg 			.base		= selector_to_base(self->boot_selector),
41753602077SPekka Enberg 			.limit		= 0xffffU,
41853602077SPekka Enberg 			.type		= 0x0bU,
41953602077SPekka Enberg 			.present	= 1,
42053602077SPekka Enberg 			.dpl		= 0x03,
42153602077SPekka Enberg 			.s		= 1,
42253602077SPekka Enberg 		},
42353602077SPekka Enberg 		.ss		= (struct kvm_segment) {
424dbdb74c2SPekka Enberg 			.selector	= self->boot_selector,
425dbdb74c2SPekka Enberg 			.base		= selector_to_base(self->boot_selector),
42653602077SPekka Enberg 			.limit		= 0xffffU,
42753602077SPekka Enberg 			.type		= 0x03U,
42853602077SPekka Enberg 			.present	= 1,
42953602077SPekka Enberg 			.dpl		= 0x03,
43053602077SPekka Enberg 			.s		= 1,
43153602077SPekka Enberg 		},
43253602077SPekka Enberg 		.ds		= (struct kvm_segment) {
433dbdb74c2SPekka Enberg 			.selector	= self->boot_selector,
434dbdb74c2SPekka Enberg 			.base		= selector_to_base(self->boot_selector),
43553602077SPekka Enberg 			.limit		= 0xffffU,
43653602077SPekka Enberg 			.type		= 0x03U,
43753602077SPekka Enberg 			.present	= 1,
43853602077SPekka Enberg 			.dpl		= 0x03,
43953602077SPekka Enberg 			.s		= 1,
44053602077SPekka Enberg 		},
44153602077SPekka Enberg 		.es		= (struct kvm_segment) {
442dbdb74c2SPekka Enberg 			.selector	= self->boot_selector,
443dbdb74c2SPekka Enberg 			.base		= selector_to_base(self->boot_selector),
44453602077SPekka Enberg 			.limit		= 0xffffU,
44553602077SPekka Enberg 			.type		= 0x03U,
44653602077SPekka Enberg 			.present	= 1,
44753602077SPekka Enberg 			.dpl		= 0x03,
44853602077SPekka Enberg 			.s		= 1,
44953602077SPekka Enberg 		},
45053602077SPekka Enberg 		.fs		= (struct kvm_segment) {
451dbdb74c2SPekka Enberg 			.selector	= self->boot_selector,
452dbdb74c2SPekka Enberg 			.base		= selector_to_base(self->boot_selector),
45353602077SPekka Enberg 			.limit		= 0xffffU,
45453602077SPekka Enberg 			.type		= 0x03U,
45553602077SPekka Enberg 			.present	= 1,
45653602077SPekka Enberg 			.dpl		= 0x03,
45753602077SPekka Enberg 			.s		= 1,
45853602077SPekka Enberg 		},
45953602077SPekka Enberg 		.gs		= (struct kvm_segment) {
460dbdb74c2SPekka Enberg 			.selector	= self->boot_selector,
461dbdb74c2SPekka Enberg 			.base		= selector_to_base(self->boot_selector),
46253602077SPekka Enberg 			.limit		= 0xffffU,
46353602077SPekka Enberg 			.type		= 0x03U,
46453602077SPekka Enberg 			.present	= 1,
46553602077SPekka Enberg 			.dpl		= 0x03,
46653602077SPekka Enberg 			.s		= 1,
46753602077SPekka Enberg 		},
46853602077SPekka Enberg 		.tr		= (struct kvm_segment) {
46953602077SPekka Enberg 			.limit		= 0xffffU,
47053602077SPekka Enberg 			.present	= 1,
47153602077SPekka Enberg 			.type		= 0x03U,
47253602077SPekka Enberg 		},
47353602077SPekka Enberg 		.ldt		= (struct kvm_segment) {
47453602077SPekka Enberg 			.limit		= 0xffffU,
47553602077SPekka Enberg 			.present	= 1,
4767085d935SPekka Enberg 			.type		= 0x02U,
47753602077SPekka Enberg 		},
47853602077SPekka Enberg 		.gdt		= (struct kvm_dtable) {
47953602077SPekka Enberg 			.limit		= 0xffffU,
48053602077SPekka Enberg 		},
48153602077SPekka Enberg 		.idt		= (struct kvm_dtable) {
48253602077SPekka Enberg 			.limit		= 0xffffU,
48353602077SPekka Enberg 		},
48453602077SPekka Enberg 	};
48553602077SPekka Enberg 
48653602077SPekka Enberg 	if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
48753602077SPekka Enberg 		die_perror("KVM_SET_SREGS failed");
488*3d78d3a5SPekka Enberg }
48906e41eeaSPekka Enberg 
490*3d78d3a5SPekka Enberg void kvm__reset_vcpu(struct kvm *self)
491*3d78d3a5SPekka Enberg {
492*3d78d3a5SPekka Enberg 	kvm__setup_sregs(self);
493dbdb74c2SPekka Enberg 
494*3d78d3a5SPekka Enberg 	kvm__setup_regs(self);
49506e41eeaSPekka Enberg 
496*3d78d3a5SPekka Enberg 	kvm__setup_fpu(self);
497ae1fae34SPekka Enberg }
498ae1fae34SPekka Enberg 
499ae1fae34SPekka Enberg void kvm__run(struct kvm *self)
500ae1fae34SPekka Enberg {
501ae1fae34SPekka Enberg 	if (ioctl(self->vcpu_fd, KVM_RUN, 0) < 0)
502ae1fae34SPekka Enberg 		die_perror("KVM_RUN failed");
503ae1fae34SPekka Enberg }
504ae1fae34SPekka Enberg 
50553602077SPekka Enberg static void print_segment(const char *name, struct kvm_segment *seg)
50653602077SPekka Enberg {
507ce556636SPekka Enberg 	printf(" %s       %04" PRIx16 "      %016" PRIx64 "  %08" PRIx32 "  %02" PRIx8 "    %x %x   %x  %x %x %x %x\n",
50853602077SPekka Enberg 		name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit,
509ce556636SPekka Enberg 		(uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
51053602077SPekka Enberg }
51153602077SPekka Enberg 
512ae1fae34SPekka Enberg void kvm__show_registers(struct kvm *self)
5137118d2caSPekka Enberg {
51453602077SPekka Enberg 	unsigned long cr0, cr2, cr3;
51553602077SPekka Enberg 	unsigned long cr4, cr8;
5167118d2caSPekka Enberg 	unsigned long rax, rbx, rcx;
5177118d2caSPekka Enberg 	unsigned long rdx, rsi, rdi;
5187118d2caSPekka Enberg 	unsigned long rbp,  r8,  r9;
5197118d2caSPekka Enberg 	unsigned long r10, r11, r12;
5207118d2caSPekka Enberg 	unsigned long r13, r14, r15;
5217118d2caSPekka Enberg 	unsigned long rip, rsp;
52253602077SPekka Enberg 	struct kvm_sregs sregs;
523a2fe6199SPekka Enberg 	unsigned long rflags;
5247118d2caSPekka Enberg 	struct kvm_regs regs;
525ce5e0ecbSPekka Enberg 	int i;
5267118d2caSPekka Enberg 
5277118d2caSPekka Enberg 	if (ioctl(self->vcpu_fd, KVM_GET_REGS, &regs) < 0)
5287118d2caSPekka Enberg 		die("KVM_GET_REGS failed");
5297118d2caSPekka Enberg 
530a2fe6199SPekka Enberg 	rflags = regs.rflags;
531a2fe6199SPekka Enberg 
5327118d2caSPekka Enberg 	rip = regs.rip; rsp = regs.rsp;
5337118d2caSPekka Enberg 	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
5347118d2caSPekka Enberg 	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
5357118d2caSPekka Enberg 	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
5367118d2caSPekka Enberg 	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
5377118d2caSPekka Enberg 	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
5387118d2caSPekka Enberg 
5397118d2caSPekka Enberg 	printf("Registers:\n");
5402177ec43SPekka Enberg 	printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
541ea2e4ea0SCyrill Gorcunov 	printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
5427118d2caSPekka Enberg 	printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
5437118d2caSPekka Enberg 	printf(" rbp: %016lx   r8:  %016lx   r9:  %016lx\n", rbp, r8,  r9);
5447118d2caSPekka Enberg 	printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
5457118d2caSPekka Enberg 	printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
54653602077SPekka Enberg 
54753602077SPekka Enberg 	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
54853602077SPekka Enberg 		die("KVM_GET_REGS failed");
54953602077SPekka Enberg 
55053602077SPekka Enberg 	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
55153602077SPekka Enberg 	cr4 = sregs.cr4; cr8 = sregs.cr8;
55253602077SPekka Enberg 
55353602077SPekka Enberg 	printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
55453602077SPekka Enberg 	printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
555ce5e0ecbSPekka Enberg 	printf("Segment registers:\n");
556ce556636SPekka Enberg 	printf(" register  selector  base              limit     type  p dpl db s l g avl\n");
55753602077SPekka Enberg 	print_segment("cs ", &sregs.cs);
55853602077SPekka Enberg 	print_segment("ss ", &sregs.ss);
55953602077SPekka Enberg 	print_segment("ds ", &sregs.ds);
56053602077SPekka Enberg 	print_segment("es ", &sregs.es);
56153602077SPekka Enberg 	print_segment("fs ", &sregs.fs);
56253602077SPekka Enberg 	print_segment("gs ", &sregs.gs);
563ce5e0ecbSPekka Enberg 	print_segment("tr ", &sregs.tr);
564ce5e0ecbSPekka Enberg 	print_segment("ldt", &sregs.ldt);
5652049569dSPekka Enberg 	printf(" [ efer: %016lx  apic base: %016lx  nmi: %s ]\n", (uint64_t) sregs.efer, (uint64_t) sregs.apic_base,
5662049569dSPekka Enberg 		(self->nmi_disabled ? "disabled" : "enabled"));
567ce5e0ecbSPekka Enberg 	printf("Interrupt bitmap:\n");
568ce5e0ecbSPekka Enberg 	printf(" ");
569ce5e0ecbSPekka Enberg 	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
570ce5e0ecbSPekka Enberg 		printf("%016lx ", (uint64_t) sregs.interrupt_bitmap[i]);
571ce5e0ecbSPekka Enberg 	printf("\n");
5727118d2caSPekka Enberg }
5737118d2caSPekka Enberg 
574ae1fae34SPekka Enberg void kvm__show_code(struct kvm *self)
5756f10be05SPekka Enberg {
5766f10be05SPekka Enberg 	unsigned int code_bytes = 64;
5776f10be05SPekka Enberg 	unsigned int code_prologue = code_bytes * 43 / 64;
5786f10be05SPekka Enberg 	unsigned int code_len = code_bytes;
5796f10be05SPekka Enberg 	unsigned char c;
580ae1fae34SPekka Enberg 	unsigned int i;
5816f10be05SPekka Enberg 	uint8_t *ip;
5826f10be05SPekka Enberg 
5832a601aafSPekka Enberg 	if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
5842a601aafSPekka Enberg 		die("KVM_GET_REGS failed");
5852a601aafSPekka Enberg 
5862a601aafSPekka Enberg 	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
5872a601aafSPekka Enberg 		die("KVM_GET_SREGS failed");
5882a601aafSPekka Enberg 
589f326512aSPekka Enberg 	ip = guest_flat_to_host(self, ip_to_flat(self, self->regs.rip) - code_prologue);
5906f10be05SPekka Enberg 
5916f10be05SPekka Enberg 	printf("Code: ");
5926f10be05SPekka Enberg 
5936f10be05SPekka Enberg 	for (i = 0; i < code_len; i++, ip++) {
5946e8abc38SPekka Enberg 		c = *ip;
5956e8abc38SPekka Enberg 
596f326512aSPekka Enberg 		if (ip == guest_flat_to_host(self, ip_to_flat(self, self->regs.rip)))
5976f10be05SPekka Enberg 			printf("<%02x> ", c);
5986f10be05SPekka Enberg 		else
5996f10be05SPekka Enberg 			printf("%02x ", c);
6006f10be05SPekka Enberg 	}
6016f10be05SPekka Enberg 
6026f10be05SPekka Enberg 	printf("\n");
6036f10be05SPekka Enberg }
604090f898eSCyrill Gorcunov 
605090f898eSCyrill Gorcunov void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size)
606090f898eSCyrill Gorcunov {
607090f898eSCyrill Gorcunov 	unsigned char *p;
608090f898eSCyrill Gorcunov 	unsigned long n;
609090f898eSCyrill Gorcunov 
610090f898eSCyrill Gorcunov 	size &= ~7; /* mod 8 */
611090f898eSCyrill Gorcunov 	if (!size)
612090f898eSCyrill Gorcunov 		return;
613090f898eSCyrill Gorcunov 
614090f898eSCyrill Gorcunov 	p = (unsigned char *)guest_flat_to_host(self, addr);
615090f898eSCyrill Gorcunov 
616090f898eSCyrill Gorcunov 	printf("Guest memory dump:\n");
617090f898eSCyrill Gorcunov 
618090f898eSCyrill Gorcunov 	for (n = 0; n < size; n+=8)
619090f898eSCyrill Gorcunov 		printf("0x%08lx: %02x%02x%02x%02x %02x%02x%02x%02x\n",
620090f898eSCyrill Gorcunov 			addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3],
621090f898eSCyrill Gorcunov 				  p[n + 4], p[n + 5], p[n + 6], p[n + 7]);
622090f898eSCyrill Gorcunov }
623