1 #include "kvm/kvm.h"
2 #include "kvm/boot-protocol.h"
3 #include "kvm/cpufeature.h"
4 #include "kvm/interrupt.h"
5 #include "kvm/mptable.h"
6 #include "kvm/util.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/virtio-console.h"
9
10 #include <asm/bootparam.h>
11 #include <linux/kvm.h>
12 #include <linux/kernel.h>
13
14 #include <sys/types.h>
15 #include <sys/ioctl.h>
16 #include <sys/mman.h>
17 #include <sys/stat.h>
18 #include <stdbool.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <unistd.h>
22 #include <stdio.h>
23 #include <fcntl.h>
24
25 struct kvm_ext kvm_req_ext[] = {
26 { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) },
27 { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) },
28 { DEFINE_KVM_EXT(KVM_CAP_PIT2) },
29 { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) },
30 { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) },
31 { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) },
32 { DEFINE_KVM_EXT(KVM_CAP_HLT) },
33 { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) },
34 { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) },
35 { 0, 0 }
36 };
37
kvm__arch_default_ram_address(void)38 u64 kvm__arch_default_ram_address(void)
39 {
40 return 0;
41 }
42
kvm__arch_validate_cfg(struct kvm * kvm)43 void kvm__arch_validate_cfg(struct kvm *kvm)
44 {
45 }
46
kvm__arch_cpu_supports_vm(void)47 bool kvm__arch_cpu_supports_vm(void)
48 {
49 struct cpuid_regs regs;
50 u32 eax_base;
51 int feature;
52
53 regs = (struct cpuid_regs) {
54 .eax = 0x00,
55 };
56 host_cpuid(®s);
57
58 switch (regs.ebx) {
59 case CPUID_VENDOR_INTEL_1:
60 eax_base = 0x00;
61 feature = KVM__X86_FEATURE_VMX;
62 break;
63
64 case CPUID_VENDOR_AMD_1:
65 eax_base = 0x80000000;
66 feature = KVM__X86_FEATURE_SVM;
67 break;
68
69 default:
70 return false;
71 }
72
73 regs = (struct cpuid_regs) {
74 .eax = eax_base,
75 };
76 host_cpuid(®s);
77
78 if (regs.eax < eax_base + 0x01)
79 return false;
80
81 regs = (struct cpuid_regs) {
82 .eax = eax_base + 0x01
83 };
84 host_cpuid(®s);
85
86 return regs.ecx & (1 << feature);
87 }
88
89 /*
90 * Allocating RAM size bigger than 4GB requires us to leave a gap
91 * in the RAM which is used for PCI MMIO, hotplug, and unconfigured
92 * devices (see documentation of e820_setup_gap() for details).
93 *
94 * If we're required to initialize RAM bigger than 4GB, we will create
95 * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space.
96 */
97
kvm__init_ram(struct kvm * kvm)98 void kvm__init_ram(struct kvm *kvm)
99 {
100 u64 phys_start, phys_size;
101 void *host_mem;
102
103 if (kvm->ram_size < KVM_32BIT_GAP_START) {
104 /* Use a single block of RAM for 32bit RAM */
105
106 phys_start = 0;
107 phys_size = kvm->ram_size;
108 host_mem = kvm->ram_start;
109
110 kvm__register_ram(kvm, phys_start, phys_size, host_mem);
111 } else {
112 /* First RAM range from zero to the PCI gap: */
113
114 phys_start = 0;
115 phys_size = KVM_32BIT_GAP_START;
116 host_mem = kvm->ram_start;
117
118 kvm__register_ram(kvm, phys_start, phys_size, host_mem);
119
120 /* Second RAM range from 4GB to the end of RAM: */
121
122 phys_start = KVM_32BIT_MAX_MEM_SIZE;
123 phys_size = kvm->ram_size - phys_start;
124 host_mem = kvm->ram_start + phys_start;
125
126 kvm__register_ram(kvm, phys_start, phys_size, host_mem);
127 }
128 }
129
130 /* Arch-specific commandline setup */
kvm__arch_set_cmdline(char * cmdline,bool video)131 void kvm__arch_set_cmdline(char *cmdline, bool video)
132 {
133 strcpy(cmdline, "noapic noacpi pci=conf1 reboot=k panic=1 i8042.direct=1 "
134 "i8042.dumbkbd=1 i8042.nopnp=1");
135 if (video)
136 strcat(cmdline, " video=vesafb");
137 else
138 strcat(cmdline, " earlyprintk=serial i8042.noaux=1");
139 }
140
141 /* Architecture-specific KVM init */
kvm__arch_init(struct kvm * kvm)142 void kvm__arch_init(struct kvm *kvm)
143 {
144 const char *hugetlbfs_path = kvm->cfg.hugetlbfs_path;
145 struct kvm_pit_config pit_config = { .flags = 0, };
146 u64 ram_size = kvm->cfg.ram_size;
147 int ret;
148
149 ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000);
150 if (ret < 0)
151 die_perror("KVM_SET_TSS_ADDR ioctl");
152
153 if (ram_size < KVM_32BIT_GAP_START) {
154 kvm->ram_size = ram_size;
155 kvm->ram_start = mmap_anon_or_hugetlbfs(kvm, hugetlbfs_path, ram_size);
156 } else {
157 kvm->ram_start = mmap_anon_or_hugetlbfs(kvm, hugetlbfs_path, ram_size + KVM_32BIT_GAP_SIZE);
158 kvm->ram_size = ram_size + KVM_32BIT_GAP_SIZE;
159 if (kvm->ram_start != MAP_FAILED)
160 /*
161 * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that
162 * if we accidently write to it, we will know.
163 */
164 mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE);
165 }
166 if (kvm->ram_start == MAP_FAILED)
167 die("out of memory");
168
169 madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE);
170
171 ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
172 if (ret < 0)
173 die_perror("KVM_CREATE_IRQCHIP ioctl");
174
175 ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config);
176 if (ret < 0)
177 die_perror("KVM_CREATE_PIT2 ioctl");
178 }
179
kvm__arch_delete_ram(struct kvm * kvm)180 void kvm__arch_delete_ram(struct kvm *kvm)
181 {
182 munmap(kvm->ram_start, kvm->ram_size);
183 }
184
kvm__irq_line(struct kvm * kvm,int irq,int level)185 void kvm__irq_line(struct kvm *kvm, int irq, int level)
186 {
187 struct kvm_irq_level irq_level;
188
189 irq_level = (struct kvm_irq_level) {
190 {
191 .irq = irq,
192 },
193 .level = level,
194 };
195
196 if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0)
197 die_perror("KVM_IRQ_LINE failed");
198 }
199
kvm__irq_trigger(struct kvm * kvm,int irq)200 void kvm__irq_trigger(struct kvm *kvm, int irq)
201 {
202 kvm__irq_line(kvm, irq, 1);
203 kvm__irq_line(kvm, irq, 0);
204 }
205
206 #define BOOT_LOADER_SELECTOR 0x1000
207 #define BOOT_LOADER_IP 0x0000
208 #define BOOT_LOADER_SP 0x8000
209 #define BOOT_CMDLINE_OFFSET 0x20000
210
211 #define BOOT_PROTOCOL_REQUIRED 0x206
212 #define LOAD_HIGH 0x01
213
guest_real_to_host(struct kvm * kvm,u16 selector,u16 offset)214 static inline void *guest_real_to_host(struct kvm *kvm, u16 selector, u16 offset)
215 {
216 unsigned long flat = ((u32)selector << 4) + offset;
217
218 return guest_flat_to_host(kvm, flat);
219 }
220
load_flat_binary(struct kvm * kvm,int fd_kernel)221 static bool load_flat_binary(struct kvm *kvm, int fd_kernel)
222 {
223 void *p;
224
225 if (lseek(fd_kernel, 0, SEEK_SET) < 0)
226 die_perror("lseek");
227
228 p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
229
230 if (read_file(fd_kernel, p, kvm->cfg.ram_size) < 0)
231 die_perror("read");
232
233 kvm->arch.boot_selector = BOOT_LOADER_SELECTOR;
234 kvm->arch.boot_ip = BOOT_LOADER_IP;
235 kvm->arch.boot_sp = BOOT_LOADER_SP;
236
237 return true;
238 }
239
240 static const char *BZIMAGE_MAGIC = "HdrS";
241
load_bzimage(struct kvm * kvm,int fd_kernel,int fd_initrd,const char * kernel_cmdline)242 static bool load_bzimage(struct kvm *kvm, int fd_kernel, int fd_initrd,
243 const char *kernel_cmdline)
244 {
245 struct boot_params *kern_boot;
246 struct boot_params boot;
247 size_t cmdline_size;
248 ssize_t file_size;
249 void *p;
250 u16 vidmode;
251
252 /*
253 * See Documentation/x86/boot.txt for details no bzImage on-disk and
254 * memory layout.
255 */
256
257 if (read_in_full(fd_kernel, &boot, sizeof(boot)) != sizeof(boot))
258 return false;
259
260 if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC)))
261 return false;
262
263 if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED)
264 die("Too old kernel");
265
266 if (lseek(fd_kernel, 0, SEEK_SET) < 0)
267 die_perror("lseek");
268
269 if (!boot.hdr.setup_sects)
270 boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS;
271 file_size = (boot.hdr.setup_sects + 1) << 9;
272 p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP);
273 if (read_in_full(fd_kernel, p, file_size) != file_size)
274 die_perror("kernel setup read");
275
276 /* read actual kernel image (vmlinux.bin) to BZ_KERNEL_START */
277 p = guest_flat_to_host(kvm, BZ_KERNEL_START);
278 file_size = read_file(fd_kernel, p,
279 kvm->cfg.ram_size - BZ_KERNEL_START);
280 if (file_size < 0)
281 die_perror("kernel read");
282
283 p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET);
284 if (kernel_cmdline) {
285 cmdline_size = strlen(kernel_cmdline) + 1;
286 if (cmdline_size > boot.hdr.cmdline_size)
287 cmdline_size = boot.hdr.cmdline_size;
288
289 memset(p, 0, boot.hdr.cmdline_size);
290 memcpy(p, kernel_cmdline, cmdline_size - 1);
291 }
292
293 /* vidmode should be either specified or set by default */
294 if (kvm->cfg.vnc || kvm->cfg.sdl || kvm->cfg.gtk) {
295 if (!kvm->cfg.arch.vidmode)
296 vidmode = 0x312;
297 else
298 vidmode = kvm->cfg.arch.vidmode;
299 } else {
300 vidmode = 0;
301 }
302
303 kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00);
304
305 kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET;
306 kern_boot->hdr.type_of_loader = 0xff;
307 kern_boot->hdr.heap_end_ptr = 0xfe00;
308 kern_boot->hdr.loadflags |= CAN_USE_HEAP;
309 kern_boot->hdr.vid_mode = vidmode;
310
311 /*
312 * Read initrd image into guest memory
313 */
314 if (fd_initrd >= 0) {
315 struct stat initrd_stat;
316 unsigned long addr;
317
318 if (fstat(fd_initrd, &initrd_stat))
319 die_perror("fstat");
320
321 addr = boot.hdr.initrd_addr_max & ~0xfffff;
322 for (;;) {
323 if (addr < BZ_KERNEL_START)
324 die("Not enough memory for initrd");
325 else if (addr < (kvm->ram_size - initrd_stat.st_size))
326 break;
327 addr -= 0x100000;
328 }
329
330 p = guest_flat_to_host(kvm, addr);
331 if (read_in_full(fd_initrd, p, initrd_stat.st_size) < 0)
332 die("Failed to read initrd");
333
334 kern_boot->hdr.ramdisk_image = addr;
335 kern_boot->hdr.ramdisk_size = initrd_stat.st_size;
336 }
337
338 kvm->arch.boot_selector = BOOT_LOADER_SELECTOR;
339 /*
340 * The real-mode setup code starts at offset 0x200 of a bzImage. See
341 * Documentation/x86/boot.txt for details.
342 */
343 kvm->arch.boot_ip = BOOT_LOADER_IP + 0x200;
344 kvm->arch.boot_sp = BOOT_LOADER_SP;
345
346 return true;
347 }
348
kvm__arch_load_kernel_image(struct kvm * kvm,int fd_kernel,int fd_initrd,const char * kernel_cmdline)349 bool kvm__arch_load_kernel_image(struct kvm *kvm, int fd_kernel, int fd_initrd,
350 const char *kernel_cmdline)
351 {
352 if (load_bzimage(kvm, fd_kernel, fd_initrd, kernel_cmdline))
353 return true;
354 pr_warning("Kernel image is not a bzImage.");
355 pr_warning("Trying to load it as a flat binary (no cmdline support)");
356
357 if (fd_initrd != -1)
358 pr_warning("Loading initrd with flat binary not supported.");
359
360 return load_flat_binary(kvm, fd_kernel);
361 }
362
363 /**
364 * kvm__arch_setup_firmware - inject BIOS into guest system memory
365 * @kvm - guest system descriptor
366 *
367 * This function is a main routine where we poke guest memory
368 * and install BIOS there.
369 */
kvm__arch_setup_firmware(struct kvm * kvm)370 int kvm__arch_setup_firmware(struct kvm *kvm)
371 {
372 /* standart minimal configuration */
373 setup_bios(kvm);
374
375 /* FIXME: SMP, ACPI and friends here */
376
377 return 0;
378 }
379
kvm__arch_free_firmware(struct kvm * kvm)380 int kvm__arch_free_firmware(struct kvm *kvm)
381 {
382 return 0;
383 }
384
kvm__arch_read_term(struct kvm * kvm)385 void kvm__arch_read_term(struct kvm *kvm)
386 {
387 serial8250__update_consoles(kvm);
388 virtio_console__inject_interrupt(kvm);
389 }
390