xref: /kvmtool/x86/kvm.c (revision e73a6b29f1ebf30c44f59a0a228ebed70aa76586)
1 #include "kvm/kvm.h"
2 #include "kvm/boot-protocol.h"
3 #include "kvm/cpufeature.h"
4 #include "kvm/interrupt.h"
5 #include "kvm/mptable.h"
6 #include "kvm/util.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/virtio-console.h"
9 
10 #include <asm/bootparam.h>
11 #include <linux/kvm.h>
12 #include <linux/kernel.h>
13 
14 #include <sys/types.h>
15 #include <sys/ioctl.h>
16 #include <sys/mman.h>
17 #include <sys/stat.h>
18 #include <stdbool.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <unistd.h>
22 #include <stdio.h>
23 #include <fcntl.h>
24 
25 struct kvm_ext kvm_req_ext[] = {
26 	{ DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) },
27 	{ DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) },
28 	{ DEFINE_KVM_EXT(KVM_CAP_PIT2) },
29 	{ DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) },
30 	{ DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) },
31 	{ DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) },
32 	{ DEFINE_KVM_EXT(KVM_CAP_HLT) },
33 	{ DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) },
34 	{ DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) },
35 	{ 0, 0 }
36 };
37 
kvm__arch_default_ram_address(void)38 u64 kvm__arch_default_ram_address(void)
39 {
40 	return 0;
41 }
42 
kvm__arch_validate_cfg(struct kvm * kvm)43 void kvm__arch_validate_cfg(struct kvm *kvm)
44 {
45 }
46 
kvm__arch_cpu_supports_vm(void)47 bool kvm__arch_cpu_supports_vm(void)
48 {
49 	struct cpuid_regs regs;
50 	u32 eax_base;
51 	int feature;
52 
53 	regs	= (struct cpuid_regs) {
54 		.eax		= 0x00,
55 	};
56 	host_cpuid(&regs);
57 
58 	switch (regs.ebx) {
59 	case CPUID_VENDOR_INTEL_1:
60 		eax_base	= 0x00;
61 		feature		= KVM__X86_FEATURE_VMX;
62 		break;
63 
64 	case CPUID_VENDOR_AMD_1:
65 		eax_base	= 0x80000000;
66 		feature		= KVM__X86_FEATURE_SVM;
67 		break;
68 
69 	default:
70 		return false;
71 	}
72 
73 	regs	= (struct cpuid_regs) {
74 		.eax		= eax_base,
75 	};
76 	host_cpuid(&regs);
77 
78 	if (regs.eax < eax_base + 0x01)
79 		return false;
80 
81 	regs	= (struct cpuid_regs) {
82 		.eax		= eax_base + 0x01
83 	};
84 	host_cpuid(&regs);
85 
86 	return regs.ecx & (1 << feature);
87 }
88 
89 /*
90  * Allocating RAM size bigger than 4GB requires us to leave a gap
91  * in the RAM which is used for PCI MMIO, hotplug, and unconfigured
92  * devices (see documentation of e820_setup_gap() for details).
93  *
94  * If we're required to initialize RAM bigger than 4GB, we will create
95  * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space.
96  */
97 
kvm__init_ram(struct kvm * kvm)98 void kvm__init_ram(struct kvm *kvm)
99 {
100 	u64	phys_start, phys_size;
101 	void	*host_mem;
102 
103 	if (kvm->ram_size < KVM_32BIT_GAP_START) {
104 		/* Use a single block of RAM for 32bit RAM */
105 
106 		phys_start = 0;
107 		phys_size  = kvm->ram_size;
108 		host_mem   = kvm->ram_start;
109 
110 		kvm__register_ram(kvm, phys_start, phys_size, host_mem);
111 	} else {
112 		/* First RAM range from zero to the PCI gap: */
113 
114 		phys_start = 0;
115 		phys_size  = KVM_32BIT_GAP_START;
116 		host_mem   = kvm->ram_start;
117 
118 		kvm__register_ram(kvm, phys_start, phys_size, host_mem);
119 
120 		/* Second RAM range from 4GB to the end of RAM: */
121 
122 		phys_start = KVM_32BIT_MAX_MEM_SIZE;
123 		phys_size  = kvm->ram_size - phys_start;
124 		host_mem   = kvm->ram_start + phys_start;
125 
126 		kvm__register_ram(kvm, phys_start, phys_size, host_mem);
127 	}
128 }
129 
130 /* Arch-specific commandline setup */
kvm__arch_set_cmdline(char * cmdline,bool video)131 void kvm__arch_set_cmdline(char *cmdline, bool video)
132 {
133 	strcpy(cmdline, "noapic noacpi pci=conf1 reboot=k panic=1 i8042.direct=1 "
134 				"i8042.dumbkbd=1 i8042.nopnp=1");
135 	if (video)
136 		strcat(cmdline, " video=vesafb");
137 	else
138 		strcat(cmdline, " earlyprintk=serial i8042.noaux=1");
139 }
140 
141 /* Architecture-specific KVM init */
kvm__arch_init(struct kvm * kvm)142 void kvm__arch_init(struct kvm *kvm)
143 {
144 	const char *hugetlbfs_path = kvm->cfg.hugetlbfs_path;
145 	struct kvm_pit_config pit_config = { .flags = 0, };
146 	u64 ram_size = kvm->cfg.ram_size;
147 	int ret;
148 
149 	ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
150 	if (ret < 0)
151 		die_perror("KVM_SET_TSS_ADDR ioctl");
152 
153 	if (ram_size < KVM_32BIT_GAP_START) {
154 		kvm->ram_size = ram_size;
155 		kvm->ram_start = mmap_anon_or_hugetlbfs(kvm, hugetlbfs_path, ram_size);
156 	} else {
157 		kvm->ram_start = mmap_anon_or_hugetlbfs(kvm, hugetlbfs_path, ram_size + KVM_32BIT_GAP_SIZE);
158 		kvm->ram_size = ram_size + KVM_32BIT_GAP_SIZE;
159 		if (kvm->ram_start != MAP_FAILED)
160 			/*
161 			 * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that
162 			 * if we accidently write to it, we will know.
163 			 */
164 			mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE);
165 	}
166 	if (kvm->ram_start == MAP_FAILED)
167 		die("out of memory");
168 
169 	madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE);
170 
171 	ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
172 	if (ret < 0)
173 		die_perror("KVM_CREATE_IRQCHIP ioctl");
174 
175 	ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config);
176 	if (ret < 0)
177 		die_perror("KVM_CREATE_PIT2 ioctl");
178 }
179 
kvm__arch_delete_ram(struct kvm * kvm)180 void kvm__arch_delete_ram(struct kvm *kvm)
181 {
182 	munmap(kvm->ram_start, kvm->ram_size);
183 }
184 
kvm__irq_line(struct kvm * kvm,int irq,int level)185 void kvm__irq_line(struct kvm *kvm, int irq, int level)
186 {
187 	struct kvm_irq_level irq_level;
188 
189 	irq_level	= (struct kvm_irq_level) {
190 		{
191 			.irq		= irq,
192 		},
193 		.level		= level,
194 	};
195 
196 	if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0)
197 		die_perror("KVM_IRQ_LINE failed");
198 }
199 
kvm__irq_trigger(struct kvm * kvm,int irq)200 void kvm__irq_trigger(struct kvm *kvm, int irq)
201 {
202 	kvm__irq_line(kvm, irq, 1);
203 	kvm__irq_line(kvm, irq, 0);
204 }
205 
206 #define BOOT_LOADER_SELECTOR	0x1000
207 #define BOOT_LOADER_IP		0x0000
208 #define BOOT_LOADER_SP		0x8000
209 #define BOOT_CMDLINE_OFFSET	0x20000
210 
211 #define BOOT_PROTOCOL_REQUIRED	0x206
212 #define LOAD_HIGH		0x01
213 
guest_real_to_host(struct kvm * kvm,u16 selector,u16 offset)214 static inline void *guest_real_to_host(struct kvm *kvm, u16 selector, u16 offset)
215 {
216 	unsigned long flat = ((u32)selector << 4) + offset;
217 
218 	return guest_flat_to_host(kvm, flat);
219 }
220 
load_flat_binary(struct kvm * kvm,int fd_kernel)221 static bool load_flat_binary(struct kvm *kvm, int fd_kernel)
222 {
223 	void *p;
224 
225 	if (lseek(fd_kernel, 0, SEEK_SET) < 0)
226 		die_perror("lseek");
227 
228 	p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
229 
230 	if (read_file(fd_kernel, p, kvm->cfg.ram_size) < 0)
231 		die_perror("read");
232 
233 	kvm->arch.boot_selector	= BOOT_LOADER_SELECTOR;
234 	kvm->arch.boot_ip	= BOOT_LOADER_IP;
235 	kvm->arch.boot_sp	= BOOT_LOADER_SP;
236 
237 	return true;
238 }
239 
240 static const char *BZIMAGE_MAGIC = "HdrS";
241 
load_bzimage(struct kvm * kvm,int fd_kernel,int fd_initrd,const char * kernel_cmdline)242 static bool load_bzimage(struct kvm *kvm, int fd_kernel, int fd_initrd,
243 			 const char *kernel_cmdline)
244 {
245 	struct boot_params *kern_boot;
246 	struct boot_params boot;
247 	size_t cmdline_size;
248 	ssize_t file_size;
249 	void *p;
250 	u16 vidmode;
251 
252 	/*
253 	 * See Documentation/x86/boot.txt for details no bzImage on-disk and
254 	 * memory layout.
255 	 */
256 
257 	if (read_in_full(fd_kernel, &boot, sizeof(boot)) != sizeof(boot))
258 		return false;
259 
260 	if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)))
261 		return false;
262 
263 	if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED)
264 		die("Too old kernel");
265 
266 	if (lseek(fd_kernel, 0, SEEK_SET) < 0)
267 		die_perror("lseek");
268 
269 	if (!boot.hdr.setup_sects)
270 		boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS;
271 	file_size = (boot.hdr.setup_sects + 1) << 9;
272 	p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
273 	if (read_in_full(fd_kernel, p, file_size) != file_size)
274 		die_perror("kernel setup read");
275 
276 	/* read actual kernel image (vmlinux.bin) to BZ_KERNEL_START */
277 	p = guest_flat_to_host(kvm, BZ_KERNEL_START);
278 	file_size = read_file(fd_kernel, p,
279 			      kvm->cfg.ram_size - BZ_KERNEL_START);
280 	if (file_size < 0)
281 		die_perror("kernel read");
282 
283 	p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET);
284 	if (kernel_cmdline) {
285 		cmdline_size = strlen(kernel_cmdline) + 1;
286 		if (cmdline_size > boot.hdr.cmdline_size)
287 			cmdline_size = boot.hdr.cmdline_size;
288 
289 		memset(p, 0, boot.hdr.cmdline_size);
290 		memcpy(p, kernel_cmdline, cmdline_size - 1);
291 	}
292 
293 	/* vidmode should be either specified or set by default */
294 	if (kvm->cfg.vnc || kvm->cfg.sdl || kvm->cfg.gtk) {
295 		if (!kvm->cfg.arch.vidmode)
296 			vidmode = 0x312;
297 		else
298 			vidmode = kvm->cfg.arch.vidmode;
299 	} else {
300 		vidmode = 0;
301 	}
302 
303 	kern_boot	= guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00);
304 
305 	kern_boot->hdr.cmd_line_ptr	= BOOT_CMDLINE_OFFSET;
306 	kern_boot->hdr.type_of_loader	= 0xff;
307 	kern_boot->hdr.heap_end_ptr	= 0xfe00;
308 	kern_boot->hdr.loadflags	|= CAN_USE_HEAP;
309 	kern_boot->hdr.vid_mode		= vidmode;
310 
311 	/*
312 	 * Read initrd image into guest memory
313 	 */
314 	if (fd_initrd >= 0) {
315 		struct stat initrd_stat;
316 		unsigned long addr;
317 
318 		if (fstat(fd_initrd, &initrd_stat))
319 			die_perror("fstat");
320 
321 		addr = boot.hdr.initrd_addr_max & ~0xfffff;
322 		for (;;) {
323 			if (addr < BZ_KERNEL_START)
324 				die("Not enough memory for initrd");
325 			else if (addr < (kvm->ram_size - initrd_stat.st_size))
326 				break;
327 			addr -= 0x100000;
328 		}
329 
330 		p = guest_flat_to_host(kvm, addr);
331 		if (read_in_full(fd_initrd, p, initrd_stat.st_size) < 0)
332 			die("Failed to read initrd");
333 
334 		kern_boot->hdr.ramdisk_image	= addr;
335 		kern_boot->hdr.ramdisk_size	= initrd_stat.st_size;
336 	}
337 
338 	kvm->arch.boot_selector = BOOT_LOADER_SELECTOR;
339 	/*
340 	 * The real-mode setup code starts at offset 0x200 of a bzImage. See
341 	 * Documentation/x86/boot.txt for details.
342 	 */
343 	kvm->arch.boot_ip = BOOT_LOADER_IP + 0x200;
344 	kvm->arch.boot_sp = BOOT_LOADER_SP;
345 
346 	return true;
347 }
348 
kvm__arch_load_kernel_image(struct kvm * kvm,int fd_kernel,int fd_initrd,const char * kernel_cmdline)349 bool kvm__arch_load_kernel_image(struct kvm *kvm, int fd_kernel, int fd_initrd,
350 				 const char *kernel_cmdline)
351 {
352 	if (load_bzimage(kvm, fd_kernel, fd_initrd, kernel_cmdline))
353 		return true;
354 	pr_warning("Kernel image is not a bzImage.");
355 	pr_warning("Trying to load it as a flat binary (no cmdline support)");
356 
357 	if (fd_initrd != -1)
358 		pr_warning("Loading initrd with flat binary not supported.");
359 
360 	return load_flat_binary(kvm, fd_kernel);
361 }
362 
363 /**
364  * kvm__arch_setup_firmware - inject BIOS into guest system memory
365  * @kvm - guest system descriptor
366  *
367  * This function is a main routine where we poke guest memory
368  * and install BIOS there.
369  */
kvm__arch_setup_firmware(struct kvm * kvm)370 int kvm__arch_setup_firmware(struct kvm *kvm)
371 {
372 	/* standart minimal configuration */
373 	setup_bios(kvm);
374 
375 	/* FIXME: SMP, ACPI and friends here */
376 
377 	return 0;
378 }
379 
kvm__arch_free_firmware(struct kvm * kvm)380 int kvm__arch_free_firmware(struct kvm *kvm)
381 {
382 	return 0;
383 }
384 
kvm__arch_read_term(struct kvm * kvm)385 void kvm__arch_read_term(struct kvm *kvm)
386 {
387 	serial8250__update_consoles(kvm);
388 	virtio_console__inject_interrupt(kvm);
389 }
390