xref: /kvmtool/x86/kvm.c (revision f6b8ccc18fa40f99d9c074cd1ace5c765ea8d05e)
1af7b0868SMatt Evans #include "kvm/kvm.h"
2af7b0868SMatt Evans #include "kvm/boot-protocol.h"
3af7b0868SMatt Evans #include "kvm/cpufeature.h"
4af7b0868SMatt Evans #include "kvm/interrupt.h"
5af7b0868SMatt Evans #include "kvm/mptable.h"
6af7b0868SMatt Evans #include "kvm/util.h"
70b69bdefSMatt Evans #include "kvm/8250-serial.h"
80b69bdefSMatt Evans #include "kvm/virtio-console.h"
9af7b0868SMatt Evans 
10af7b0868SMatt Evans #include <asm/bootparam.h>
11af7b0868SMatt Evans #include <linux/kvm.h>
12af7b0868SMatt Evans 
13af7b0868SMatt Evans #include <sys/types.h>
14af7b0868SMatt Evans #include <sys/ioctl.h>
15af7b0868SMatt Evans #include <sys/mman.h>
16af7b0868SMatt Evans #include <sys/stat.h>
17af7b0868SMatt Evans #include <stdbool.h>
18af7b0868SMatt Evans #include <assert.h>
19af7b0868SMatt Evans #include <stdlib.h>
20af7b0868SMatt Evans #include <string.h>
21af7b0868SMatt Evans #include <unistd.h>
22af7b0868SMatt Evans #include <stdio.h>
23af7b0868SMatt Evans #include <fcntl.h>
24af7b0868SMatt Evans #include <asm/unistd.h>
25af7b0868SMatt Evans 
26af7b0868SMatt Evans struct kvm_ext kvm_req_ext[] = {
27af7b0868SMatt Evans 	{ DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) },
28af7b0868SMatt Evans 	{ DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) },
29af7b0868SMatt Evans 	{ DEFINE_KVM_EXT(KVM_CAP_PIT2) },
30af7b0868SMatt Evans 	{ DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) },
31af7b0868SMatt Evans 	{ DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) },
32af7b0868SMatt Evans 	{ DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) },
33af7b0868SMatt Evans 	{ DEFINE_KVM_EXT(KVM_CAP_HLT) },
34af7b0868SMatt Evans 	{ DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) },
35af7b0868SMatt Evans 	{ DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) },
36af7b0868SMatt Evans 	{ 0, 0 }
37af7b0868SMatt Evans };
38af7b0868SMatt Evans 
39af7b0868SMatt Evans bool kvm__arch_cpu_supports_vm(void)
40af7b0868SMatt Evans {
41af7b0868SMatt Evans 	struct cpuid_regs regs;
42af7b0868SMatt Evans 	u32 eax_base;
43af7b0868SMatt Evans 	int feature;
44af7b0868SMatt Evans 
45af7b0868SMatt Evans 	regs	= (struct cpuid_regs) {
46af7b0868SMatt Evans 		.eax		= 0x00,
47af7b0868SMatt Evans 	};
48af7b0868SMatt Evans 	host_cpuid(&regs);
49af7b0868SMatt Evans 
50af7b0868SMatt Evans 	switch (regs.ebx) {
51af7b0868SMatt Evans 	case CPUID_VENDOR_INTEL_1:
52af7b0868SMatt Evans 		eax_base	= 0x00;
53af7b0868SMatt Evans 		feature		= KVM__X86_FEATURE_VMX;
54af7b0868SMatt Evans 		break;
55af7b0868SMatt Evans 
56af7b0868SMatt Evans 	case CPUID_VENDOR_AMD_1:
57af7b0868SMatt Evans 		eax_base	= 0x80000000;
58af7b0868SMatt Evans 		feature		= KVM__X86_FEATURE_SVM;
59af7b0868SMatt Evans 		break;
60af7b0868SMatt Evans 
61af7b0868SMatt Evans 	default:
62af7b0868SMatt Evans 		return false;
63af7b0868SMatt Evans 	}
64af7b0868SMatt Evans 
65af7b0868SMatt Evans 	regs	= (struct cpuid_regs) {
66af7b0868SMatt Evans 		.eax		= eax_base,
67af7b0868SMatt Evans 	};
68af7b0868SMatt Evans 	host_cpuid(&regs);
69af7b0868SMatt Evans 
70af7b0868SMatt Evans 	if (regs.eax < eax_base + 0x01)
71af7b0868SMatt Evans 		return false;
72af7b0868SMatt Evans 
73af7b0868SMatt Evans 	regs	= (struct cpuid_regs) {
74af7b0868SMatt Evans 		.eax		= eax_base + 0x01
75af7b0868SMatt Evans 	};
76af7b0868SMatt Evans 	host_cpuid(&regs);
77af7b0868SMatt Evans 
78af7b0868SMatt Evans 	return regs.ecx & (1 << feature);
79af7b0868SMatt Evans }
80af7b0868SMatt Evans 
81af7b0868SMatt Evans /*
82af7b0868SMatt Evans  * Allocating RAM size bigger than 4GB requires us to leave a gap
83af7b0868SMatt Evans  * in the RAM which is used for PCI MMIO, hotplug, and unconfigured
84af7b0868SMatt Evans  * devices (see documentation of e820_setup_gap() for details).
85af7b0868SMatt Evans  *
86af7b0868SMatt Evans  * If we're required to initialize RAM bigger than 4GB, we will create
87af7b0868SMatt Evans  * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space.
88af7b0868SMatt Evans  */
89af7b0868SMatt Evans 
90af7b0868SMatt Evans void kvm__init_ram(struct kvm *kvm)
91af7b0868SMatt Evans {
92af7b0868SMatt Evans 	u64	phys_start, phys_size;
93af7b0868SMatt Evans 	void	*host_mem;
94af7b0868SMatt Evans 
95af7b0868SMatt Evans 	if (kvm->ram_size < KVM_32BIT_GAP_START) {
96af7b0868SMatt Evans 		/* Use a single block of RAM for 32bit RAM */
97af7b0868SMatt Evans 
98af7b0868SMatt Evans 		phys_start = 0;
99af7b0868SMatt Evans 		phys_size  = kvm->ram_size;
100af7b0868SMatt Evans 		host_mem   = kvm->ram_start;
101af7b0868SMatt Evans 
102af7b0868SMatt Evans 		kvm__register_mem(kvm, phys_start, phys_size, host_mem);
103af7b0868SMatt Evans 	} else {
104af7b0868SMatt Evans 		/* First RAM range from zero to the PCI gap: */
105af7b0868SMatt Evans 
106af7b0868SMatt Evans 		phys_start = 0;
107af7b0868SMatt Evans 		phys_size  = KVM_32BIT_GAP_START;
108af7b0868SMatt Evans 		host_mem   = kvm->ram_start;
109af7b0868SMatt Evans 
110af7b0868SMatt Evans 		kvm__register_mem(kvm, phys_start, phys_size, host_mem);
111af7b0868SMatt Evans 
112af7b0868SMatt Evans 		/* Second RAM range from 4GB to the end of RAM: */
113af7b0868SMatt Evans 
114af7b0868SMatt Evans 		phys_start = 0x100000000ULL;
115af7b0868SMatt Evans 		phys_size  = kvm->ram_size - phys_size;
116af7b0868SMatt Evans 		host_mem   = kvm->ram_start + phys_start;
117af7b0868SMatt Evans 
118af7b0868SMatt Evans 		kvm__register_mem(kvm, phys_start, phys_size, host_mem);
119af7b0868SMatt Evans 	}
120af7b0868SMatt Evans }
121af7b0868SMatt Evans 
1228e704a7aSMatt Evans /* Arch-specific commandline setup */
1238e704a7aSMatt Evans void kvm__arch_set_cmdline(char *cmdline, bool video)
1248e704a7aSMatt Evans {
1258e704a7aSMatt Evans 	strcpy(cmdline, "noapic noacpi pci=conf1 reboot=k panic=1 i8042.direct=1 "
1268e704a7aSMatt Evans 				"i8042.dumbkbd=1 i8042.nopnp=1");
1278e704a7aSMatt Evans 	if (video) {
1288e704a7aSMatt Evans 		strcat(cmdline, " video=vesafb console=tty0");
1298e704a7aSMatt Evans 	} else
1308e704a7aSMatt Evans 		strcat(cmdline, " console=ttyS0 earlyprintk=serial i8042.noaux=1");
1318e704a7aSMatt Evans }
1328e704a7aSMatt Evans 
133af7b0868SMatt Evans /* Architecture-specific KVM init */
134af7b0868SMatt Evans void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name)
135af7b0868SMatt Evans {
136af7b0868SMatt Evans 	struct kvm_pit_config pit_config = { .flags = 0, };
137af7b0868SMatt Evans 	int ret;
138af7b0868SMatt Evans 
139af7b0868SMatt Evans 	ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
140af7b0868SMatt Evans 	if (ret < 0)
141af7b0868SMatt Evans 		die_perror("KVM_SET_TSS_ADDR ioctl");
142af7b0868SMatt Evans 
143af7b0868SMatt Evans 	ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config);
144af7b0868SMatt Evans 	if (ret < 0)
145af7b0868SMatt Evans 		die_perror("KVM_CREATE_PIT2 ioctl");
146af7b0868SMatt Evans 
147af7b0868SMatt Evans 	kvm->ram_size		= ram_size;
148af7b0868SMatt Evans 
149af7b0868SMatt Evans 	if (kvm->ram_size < KVM_32BIT_GAP_START) {
150af7b0868SMatt Evans 		kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
151af7b0868SMatt Evans 	} else {
152af7b0868SMatt Evans 		kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
153af7b0868SMatt Evans 		if (kvm->ram_start != MAP_FAILED) {
154af7b0868SMatt Evans 			/*
155af7b0868SMatt Evans 			 * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that
156af7b0868SMatt Evans 			 * if we accidently write to it, we will know.
157af7b0868SMatt Evans 			 */
158af7b0868SMatt Evans 			mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE);
159af7b0868SMatt Evans 		}
160af7b0868SMatt Evans 	}
161af7b0868SMatt Evans 	if (kvm->ram_start == MAP_FAILED)
162af7b0868SMatt Evans 		die("out of memory");
163af7b0868SMatt Evans 
164af7b0868SMatt Evans 	madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE);
165af7b0868SMatt Evans 
166af7b0868SMatt Evans 	ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
167af7b0868SMatt Evans 	if (ret < 0)
168af7b0868SMatt Evans 		die_perror("KVM_CREATE_IRQCHIP ioctl");
169af7b0868SMatt Evans }
170af7b0868SMatt Evans 
171af7b0868SMatt Evans void kvm__irq_line(struct kvm *kvm, int irq, int level)
172af7b0868SMatt Evans {
173af7b0868SMatt Evans 	struct kvm_irq_level irq_level;
174af7b0868SMatt Evans 
175af7b0868SMatt Evans 	irq_level	= (struct kvm_irq_level) {
176af7b0868SMatt Evans 		{
177af7b0868SMatt Evans 			.irq		= irq,
178af7b0868SMatt Evans 		},
179af7b0868SMatt Evans 		.level		= level,
180af7b0868SMatt Evans 	};
181af7b0868SMatt Evans 
182af7b0868SMatt Evans 	if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0)
183af7b0868SMatt Evans 		die_perror("KVM_IRQ_LINE failed");
184af7b0868SMatt Evans }
185af7b0868SMatt Evans 
186af7b0868SMatt Evans void kvm__irq_trigger(struct kvm *kvm, int irq)
187af7b0868SMatt Evans {
188af7b0868SMatt Evans 	kvm__irq_line(kvm, irq, 1);
189af7b0868SMatt Evans 	kvm__irq_line(kvm, irq, 0);
190af7b0868SMatt Evans }
191af7b0868SMatt Evans 
192af7b0868SMatt Evans #define BOOT_LOADER_SELECTOR	0x1000
193af7b0868SMatt Evans #define BOOT_LOADER_IP		0x0000
194af7b0868SMatt Evans #define BOOT_LOADER_SP		0x8000
195af7b0868SMatt Evans #define BOOT_CMDLINE_OFFSET	0x20000
196af7b0868SMatt Evans 
197af7b0868SMatt Evans #define BOOT_PROTOCOL_REQUIRED	0x206
198af7b0868SMatt Evans #define LOAD_HIGH		0x01
199af7b0868SMatt Evans 
200604dbd63SMatt Evans int load_flat_binary(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline)
201af7b0868SMatt Evans {
202af7b0868SMatt Evans 	void *p;
203af7b0868SMatt Evans 	int nr;
204af7b0868SMatt Evans 
205604dbd63SMatt Evans 	/*
206604dbd63SMatt Evans 	 * Some architectures may support loading an initrd alongside the flat kernel,
207604dbd63SMatt Evans 	 * but we do not.
208604dbd63SMatt Evans 	 */
209604dbd63SMatt Evans 	if (fd_initrd != -1)
210604dbd63SMatt Evans 		pr_warning("Loading initrd with flat binary not supported.");
211604dbd63SMatt Evans 
212604dbd63SMatt Evans 	if (lseek(fd_kernel, 0, SEEK_SET) < 0)
213af7b0868SMatt Evans 		die_perror("lseek");
214af7b0868SMatt Evans 
215af7b0868SMatt Evans 	p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
216af7b0868SMatt Evans 
217604dbd63SMatt Evans 	while ((nr = read(fd_kernel, p, 65536)) > 0)
218af7b0868SMatt Evans 		p += nr;
219af7b0868SMatt Evans 
220af7b0868SMatt Evans 	kvm->boot_selector	= BOOT_LOADER_SELECTOR;
221af7b0868SMatt Evans 	kvm->boot_ip		= BOOT_LOADER_IP;
222af7b0868SMatt Evans 	kvm->boot_sp		= BOOT_LOADER_SP;
223af7b0868SMatt Evans 
224af7b0868SMatt Evans 	return true;
225af7b0868SMatt Evans }
226af7b0868SMatt Evans 
227af7b0868SMatt Evans static const char *BZIMAGE_MAGIC	= "HdrS";
228af7b0868SMatt Evans 
229af7b0868SMatt Evans bool load_bzimage(struct kvm *kvm, int fd_kernel,
230af7b0868SMatt Evans 		  int fd_initrd, const char *kernel_cmdline, u16 vidmode)
231af7b0868SMatt Evans {
232af7b0868SMatt Evans 	struct boot_params *kern_boot;
233af7b0868SMatt Evans 	unsigned long setup_sects;
234af7b0868SMatt Evans 	struct boot_params boot;
235af7b0868SMatt Evans 	size_t cmdline_size;
236af7b0868SMatt Evans 	ssize_t setup_size;
237af7b0868SMatt Evans 	void *p;
238af7b0868SMatt Evans 	int nr;
239af7b0868SMatt Evans 
240af7b0868SMatt Evans 	/*
241af7b0868SMatt Evans 	 * See Documentation/x86/boot.txt for details no bzImage on-disk and
242af7b0868SMatt Evans 	 * memory layout.
243af7b0868SMatt Evans 	 */
244af7b0868SMatt Evans 
245af7b0868SMatt Evans 	if (lseek(fd_kernel, 0, SEEK_SET) < 0)
246af7b0868SMatt Evans 		die_perror("lseek");
247af7b0868SMatt Evans 
248af7b0868SMatt Evans 	if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot))
249af7b0868SMatt Evans 		return false;
250af7b0868SMatt Evans 
251af7b0868SMatt Evans 	if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)))
252af7b0868SMatt Evans 		return false;
253af7b0868SMatt Evans 
254af7b0868SMatt Evans 	if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED)
255af7b0868SMatt Evans 		die("Too old kernel");
256af7b0868SMatt Evans 
257af7b0868SMatt Evans 	if (lseek(fd_kernel, 0, SEEK_SET) < 0)
258af7b0868SMatt Evans 		die_perror("lseek");
259af7b0868SMatt Evans 
260af7b0868SMatt Evans 	if (!boot.hdr.setup_sects)
261af7b0868SMatt Evans 		boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS;
262af7b0868SMatt Evans 	setup_sects = boot.hdr.setup_sects + 1;
263af7b0868SMatt Evans 
264af7b0868SMatt Evans 	setup_size = setup_sects << 9;
265af7b0868SMatt Evans 	p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
266af7b0868SMatt Evans 
267af7b0868SMatt Evans 	/* copy setup.bin to mem*/
268af7b0868SMatt Evans 	if (read(fd_kernel, p, setup_size) != setup_size)
269af7b0868SMatt Evans 		die_perror("read");
270af7b0868SMatt Evans 
271af7b0868SMatt Evans 	/* copy vmlinux.bin to BZ_KERNEL_START*/
272af7b0868SMatt Evans 	p = guest_flat_to_host(kvm, BZ_KERNEL_START);
273af7b0868SMatt Evans 
274af7b0868SMatt Evans 	while ((nr = read(fd_kernel, p, 65536)) > 0)
275af7b0868SMatt Evans 		p += nr;
276af7b0868SMatt Evans 
277af7b0868SMatt Evans 	p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET);
278af7b0868SMatt Evans 	if (kernel_cmdline) {
279af7b0868SMatt Evans 		cmdline_size = strlen(kernel_cmdline) + 1;
280af7b0868SMatt Evans 		if (cmdline_size > boot.hdr.cmdline_size)
281af7b0868SMatt Evans 			cmdline_size = boot.hdr.cmdline_size;
282af7b0868SMatt Evans 
283af7b0868SMatt Evans 		memset(p, 0, boot.hdr.cmdline_size);
284af7b0868SMatt Evans 		memcpy(p, kernel_cmdline, cmdline_size - 1);
285af7b0868SMatt Evans 	}
286af7b0868SMatt Evans 
287af7b0868SMatt Evans 	kern_boot	= guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00);
288af7b0868SMatt Evans 
289af7b0868SMatt Evans 	kern_boot->hdr.cmd_line_ptr	= BOOT_CMDLINE_OFFSET;
290af7b0868SMatt Evans 	kern_boot->hdr.type_of_loader	= 0xff;
291af7b0868SMatt Evans 	kern_boot->hdr.heap_end_ptr	= 0xfe00;
292af7b0868SMatt Evans 	kern_boot->hdr.loadflags	|= CAN_USE_HEAP;
293af7b0868SMatt Evans 	kern_boot->hdr.vid_mode		= vidmode;
294af7b0868SMatt Evans 
295af7b0868SMatt Evans 	/*
296af7b0868SMatt Evans 	 * Read initrd image into guest memory
297af7b0868SMatt Evans 	 */
298af7b0868SMatt Evans 	if (fd_initrd >= 0) {
299af7b0868SMatt Evans 		struct stat initrd_stat;
300af7b0868SMatt Evans 		unsigned long addr;
301af7b0868SMatt Evans 
302af7b0868SMatt Evans 		if (fstat(fd_initrd, &initrd_stat))
303af7b0868SMatt Evans 			die_perror("fstat");
304af7b0868SMatt Evans 
305af7b0868SMatt Evans 		addr = boot.hdr.initrd_addr_max & ~0xfffff;
306af7b0868SMatt Evans 		for (;;) {
307af7b0868SMatt Evans 			if (addr < BZ_KERNEL_START)
308af7b0868SMatt Evans 				die("Not enough memory for initrd");
309af7b0868SMatt Evans 			else if (addr < (kvm->ram_size - initrd_stat.st_size))
310af7b0868SMatt Evans 				break;
311af7b0868SMatt Evans 			addr -= 0x100000;
312af7b0868SMatt Evans 		}
313af7b0868SMatt Evans 
314af7b0868SMatt Evans 		p = guest_flat_to_host(kvm, addr);
315af7b0868SMatt Evans 		nr = read(fd_initrd, p, initrd_stat.st_size);
316af7b0868SMatt Evans 		if (nr != initrd_stat.st_size)
317af7b0868SMatt Evans 			die("Failed to read initrd");
318af7b0868SMatt Evans 
319af7b0868SMatt Evans 		kern_boot->hdr.ramdisk_image	= addr;
320af7b0868SMatt Evans 		kern_boot->hdr.ramdisk_size	= initrd_stat.st_size;
321af7b0868SMatt Evans 	}
322af7b0868SMatt Evans 
323af7b0868SMatt Evans 	kvm->boot_selector	= BOOT_LOADER_SELECTOR;
324af7b0868SMatt Evans 	/*
325af7b0868SMatt Evans 	 * The real-mode setup code starts at offset 0x200 of a bzImage. See
326af7b0868SMatt Evans 	 * Documentation/x86/boot.txt for details.
327af7b0868SMatt Evans 	 */
328af7b0868SMatt Evans 	kvm->boot_ip		= BOOT_LOADER_IP + 0x200;
329af7b0868SMatt Evans 	kvm->boot_sp		= BOOT_LOADER_SP;
330af7b0868SMatt Evans 
331af7b0868SMatt Evans 	return true;
332af7b0868SMatt Evans }
333af7b0868SMatt Evans 
334af7b0868SMatt Evans /**
335af7b0868SMatt Evans  * kvm__arch_setup_firmware - inject BIOS into guest system memory
336af7b0868SMatt Evans  * @kvm - guest system descriptor
337af7b0868SMatt Evans  *
338af7b0868SMatt Evans  * This function is a main routine where we poke guest memory
339af7b0868SMatt Evans  * and install BIOS there.
340af7b0868SMatt Evans  */
341af7b0868SMatt Evans void kvm__arch_setup_firmware(struct kvm *kvm)
342af7b0868SMatt Evans {
343af7b0868SMatt Evans 	/* standart minimal configuration */
344af7b0868SMatt Evans 	setup_bios(kvm);
345af7b0868SMatt Evans 
346af7b0868SMatt Evans 	/* FIXME: SMP, ACPI and friends here */
347af7b0868SMatt Evans 
348af7b0868SMatt Evans 	/* MP table */
349af7b0868SMatt Evans 	mptable_setup(kvm, kvm->nrcpus);
350af7b0868SMatt Evans }
3510b69bdefSMatt Evans 
3520b69bdefSMatt Evans void kvm__arch_periodic_poll(struct kvm *kvm)
3530b69bdefSMatt Evans {
354*f6b8ccc1SThomas Gleixner 	serial8250__update_consoles(kvm);
3550b69bdefSMatt Evans 	virtio_console__inject_interrupt(kvm);
3560b69bdefSMatt Evans }
357