1af7b0868SMatt Evans #include "kvm/kvm.h" 2af7b0868SMatt Evans #include "kvm/boot-protocol.h" 3af7b0868SMatt Evans #include "kvm/cpufeature.h" 4af7b0868SMatt Evans #include "kvm/interrupt.h" 5af7b0868SMatt Evans #include "kvm/mptable.h" 6af7b0868SMatt Evans #include "kvm/util.h" 70b69bdefSMatt Evans #include "kvm/8250-serial.h" 80b69bdefSMatt Evans #include "kvm/virtio-console.h" 9af7b0868SMatt Evans 10af7b0868SMatt Evans #include <asm/bootparam.h> 11af7b0868SMatt Evans #include <linux/kvm.h> 12af7b0868SMatt Evans 13af7b0868SMatt Evans #include <sys/types.h> 14af7b0868SMatt Evans #include <sys/ioctl.h> 15af7b0868SMatt Evans #include <sys/mman.h> 16af7b0868SMatt Evans #include <sys/stat.h> 17af7b0868SMatt Evans #include <stdbool.h> 18af7b0868SMatt Evans #include <assert.h> 19af7b0868SMatt Evans #include <stdlib.h> 20af7b0868SMatt Evans #include <string.h> 21af7b0868SMatt Evans #include <unistd.h> 22af7b0868SMatt Evans #include <stdio.h> 23af7b0868SMatt Evans #include <fcntl.h> 24af7b0868SMatt Evans #include <asm/unistd.h> 25af7b0868SMatt Evans 26af7b0868SMatt Evans struct kvm_ext kvm_req_ext[] = { 27af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, 28af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, 29af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, 30af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, 31af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, 32af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, 33af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_HLT) }, 34af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, 35af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, 36af7b0868SMatt Evans { 0, 0 } 37af7b0868SMatt Evans }; 38af7b0868SMatt Evans 39af7b0868SMatt Evans bool kvm__arch_cpu_supports_vm(void) 40af7b0868SMatt Evans { 41af7b0868SMatt Evans struct cpuid_regs regs; 42af7b0868SMatt Evans u32 eax_base; 43af7b0868SMatt Evans int feature; 44af7b0868SMatt Evans 45af7b0868SMatt Evans regs = (struct cpuid_regs) { 46af7b0868SMatt Evans .eax = 0x00, 47af7b0868SMatt Evans }; 48af7b0868SMatt Evans host_cpuid(®s); 49af7b0868SMatt Evans 50af7b0868SMatt Evans switch (regs.ebx) { 51af7b0868SMatt Evans case CPUID_VENDOR_INTEL_1: 52af7b0868SMatt Evans eax_base = 0x00; 53af7b0868SMatt Evans feature = KVM__X86_FEATURE_VMX; 54af7b0868SMatt Evans break; 55af7b0868SMatt Evans 56af7b0868SMatt Evans case CPUID_VENDOR_AMD_1: 57af7b0868SMatt Evans eax_base = 0x80000000; 58af7b0868SMatt Evans feature = KVM__X86_FEATURE_SVM; 59af7b0868SMatt Evans break; 60af7b0868SMatt Evans 61af7b0868SMatt Evans default: 62af7b0868SMatt Evans return false; 63af7b0868SMatt Evans } 64af7b0868SMatt Evans 65af7b0868SMatt Evans regs = (struct cpuid_regs) { 66af7b0868SMatt Evans .eax = eax_base, 67af7b0868SMatt Evans }; 68af7b0868SMatt Evans host_cpuid(®s); 69af7b0868SMatt Evans 70af7b0868SMatt Evans if (regs.eax < eax_base + 0x01) 71af7b0868SMatt Evans return false; 72af7b0868SMatt Evans 73af7b0868SMatt Evans regs = (struct cpuid_regs) { 74af7b0868SMatt Evans .eax = eax_base + 0x01 75af7b0868SMatt Evans }; 76af7b0868SMatt Evans host_cpuid(®s); 77af7b0868SMatt Evans 78af7b0868SMatt Evans return regs.ecx & (1 << feature); 79af7b0868SMatt Evans } 80af7b0868SMatt Evans 81af7b0868SMatt Evans /* 82af7b0868SMatt Evans * Allocating RAM size bigger than 4GB requires us to leave a gap 83af7b0868SMatt Evans * in the RAM which is used for PCI MMIO, hotplug, and unconfigured 84af7b0868SMatt Evans * devices (see documentation of e820_setup_gap() for details). 85af7b0868SMatt Evans * 86af7b0868SMatt Evans * If we're required to initialize RAM bigger than 4GB, we will create 87af7b0868SMatt Evans * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. 88af7b0868SMatt Evans */ 89af7b0868SMatt Evans 90af7b0868SMatt Evans void kvm__init_ram(struct kvm *kvm) 91af7b0868SMatt Evans { 92af7b0868SMatt Evans u64 phys_start, phys_size; 93af7b0868SMatt Evans void *host_mem; 94af7b0868SMatt Evans 95af7b0868SMatt Evans if (kvm->ram_size < KVM_32BIT_GAP_START) { 96af7b0868SMatt Evans /* Use a single block of RAM for 32bit RAM */ 97af7b0868SMatt Evans 98af7b0868SMatt Evans phys_start = 0; 99af7b0868SMatt Evans phys_size = kvm->ram_size; 100af7b0868SMatt Evans host_mem = kvm->ram_start; 101af7b0868SMatt Evans 102af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 103af7b0868SMatt Evans } else { 104af7b0868SMatt Evans /* First RAM range from zero to the PCI gap: */ 105af7b0868SMatt Evans 106af7b0868SMatt Evans phys_start = 0; 107af7b0868SMatt Evans phys_size = KVM_32BIT_GAP_START; 108af7b0868SMatt Evans host_mem = kvm->ram_start; 109af7b0868SMatt Evans 110af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 111af7b0868SMatt Evans 112af7b0868SMatt Evans /* Second RAM range from 4GB to the end of RAM: */ 113af7b0868SMatt Evans 114af7b0868SMatt Evans phys_start = 0x100000000ULL; 115af7b0868SMatt Evans phys_size = kvm->ram_size - phys_size; 116af7b0868SMatt Evans host_mem = kvm->ram_start + phys_start; 117af7b0868SMatt Evans 118af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 119af7b0868SMatt Evans } 120af7b0868SMatt Evans } 121af7b0868SMatt Evans 1228e704a7aSMatt Evans /* Arch-specific commandline setup */ 1238e704a7aSMatt Evans void kvm__arch_set_cmdline(char *cmdline, bool video) 1248e704a7aSMatt Evans { 1258e704a7aSMatt Evans strcpy(cmdline, "noapic noacpi pci=conf1 reboot=k panic=1 i8042.direct=1 " 1268e704a7aSMatt Evans "i8042.dumbkbd=1 i8042.nopnp=1"); 1278e704a7aSMatt Evans if (video) { 1288e704a7aSMatt Evans strcat(cmdline, " video=vesafb console=tty0"); 1298e704a7aSMatt Evans } else 1308e704a7aSMatt Evans strcat(cmdline, " console=ttyS0 earlyprintk=serial i8042.noaux=1"); 1318e704a7aSMatt Evans } 1328e704a7aSMatt Evans 133af7b0868SMatt Evans /* Architecture-specific KVM init */ 134af7b0868SMatt Evans void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name) 135af7b0868SMatt Evans { 136af7b0868SMatt Evans struct kvm_pit_config pit_config = { .flags = 0, }; 137af7b0868SMatt Evans int ret; 138af7b0868SMatt Evans 139af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); 140af7b0868SMatt Evans if (ret < 0) 141af7b0868SMatt Evans die_perror("KVM_SET_TSS_ADDR ioctl"); 142af7b0868SMatt Evans 143af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); 144af7b0868SMatt Evans if (ret < 0) 145af7b0868SMatt Evans die_perror("KVM_CREATE_PIT2 ioctl"); 146af7b0868SMatt Evans 147af7b0868SMatt Evans kvm->ram_size = ram_size; 148af7b0868SMatt Evans 149af7b0868SMatt Evans if (kvm->ram_size < KVM_32BIT_GAP_START) { 150af7b0868SMatt Evans kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); 151af7b0868SMatt Evans } else { 152af7b0868SMatt Evans kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0); 153af7b0868SMatt Evans if (kvm->ram_start != MAP_FAILED) { 154af7b0868SMatt Evans /* 155af7b0868SMatt Evans * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that 156af7b0868SMatt Evans * if we accidently write to it, we will know. 157af7b0868SMatt Evans */ 158af7b0868SMatt Evans mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); 159af7b0868SMatt Evans } 160af7b0868SMatt Evans } 161af7b0868SMatt Evans if (kvm->ram_start == MAP_FAILED) 162af7b0868SMatt Evans die("out of memory"); 163af7b0868SMatt Evans 164af7b0868SMatt Evans madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); 165af7b0868SMatt Evans 166af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); 167af7b0868SMatt Evans if (ret < 0) 168af7b0868SMatt Evans die_perror("KVM_CREATE_IRQCHIP ioctl"); 169af7b0868SMatt Evans } 170af7b0868SMatt Evans 171af7b0868SMatt Evans void kvm__irq_line(struct kvm *kvm, int irq, int level) 172af7b0868SMatt Evans { 173af7b0868SMatt Evans struct kvm_irq_level irq_level; 174af7b0868SMatt Evans 175af7b0868SMatt Evans irq_level = (struct kvm_irq_level) { 176af7b0868SMatt Evans { 177af7b0868SMatt Evans .irq = irq, 178af7b0868SMatt Evans }, 179af7b0868SMatt Evans .level = level, 180af7b0868SMatt Evans }; 181af7b0868SMatt Evans 182af7b0868SMatt Evans if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) 183af7b0868SMatt Evans die_perror("KVM_IRQ_LINE failed"); 184af7b0868SMatt Evans } 185af7b0868SMatt Evans 186af7b0868SMatt Evans void kvm__irq_trigger(struct kvm *kvm, int irq) 187af7b0868SMatt Evans { 188af7b0868SMatt Evans kvm__irq_line(kvm, irq, 1); 189af7b0868SMatt Evans kvm__irq_line(kvm, irq, 0); 190af7b0868SMatt Evans } 191af7b0868SMatt Evans 192af7b0868SMatt Evans #define BOOT_LOADER_SELECTOR 0x1000 193af7b0868SMatt Evans #define BOOT_LOADER_IP 0x0000 194af7b0868SMatt Evans #define BOOT_LOADER_SP 0x8000 195af7b0868SMatt Evans #define BOOT_CMDLINE_OFFSET 0x20000 196af7b0868SMatt Evans 197af7b0868SMatt Evans #define BOOT_PROTOCOL_REQUIRED 0x206 198af7b0868SMatt Evans #define LOAD_HIGH 0x01 199af7b0868SMatt Evans 200604dbd63SMatt Evans int load_flat_binary(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline) 201af7b0868SMatt Evans { 202af7b0868SMatt Evans void *p; 203af7b0868SMatt Evans int nr; 204af7b0868SMatt Evans 205604dbd63SMatt Evans /* 206604dbd63SMatt Evans * Some architectures may support loading an initrd alongside the flat kernel, 207604dbd63SMatt Evans * but we do not. 208604dbd63SMatt Evans */ 209604dbd63SMatt Evans if (fd_initrd != -1) 210604dbd63SMatt Evans pr_warning("Loading initrd with flat binary not supported."); 211604dbd63SMatt Evans 212604dbd63SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 213af7b0868SMatt Evans die_perror("lseek"); 214af7b0868SMatt Evans 215af7b0868SMatt Evans p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); 216af7b0868SMatt Evans 217604dbd63SMatt Evans while ((nr = read(fd_kernel, p, 65536)) > 0) 218af7b0868SMatt Evans p += nr; 219af7b0868SMatt Evans 220af7b0868SMatt Evans kvm->boot_selector = BOOT_LOADER_SELECTOR; 221af7b0868SMatt Evans kvm->boot_ip = BOOT_LOADER_IP; 222af7b0868SMatt Evans kvm->boot_sp = BOOT_LOADER_SP; 223af7b0868SMatt Evans 224af7b0868SMatt Evans return true; 225af7b0868SMatt Evans } 226af7b0868SMatt Evans 227af7b0868SMatt Evans static const char *BZIMAGE_MAGIC = "HdrS"; 228af7b0868SMatt Evans 229af7b0868SMatt Evans bool load_bzimage(struct kvm *kvm, int fd_kernel, 230af7b0868SMatt Evans int fd_initrd, const char *kernel_cmdline, u16 vidmode) 231af7b0868SMatt Evans { 232af7b0868SMatt Evans struct boot_params *kern_boot; 233af7b0868SMatt Evans unsigned long setup_sects; 234af7b0868SMatt Evans struct boot_params boot; 235af7b0868SMatt Evans size_t cmdline_size; 236af7b0868SMatt Evans ssize_t setup_size; 237af7b0868SMatt Evans void *p; 238af7b0868SMatt Evans int nr; 239af7b0868SMatt Evans 240af7b0868SMatt Evans /* 241af7b0868SMatt Evans * See Documentation/x86/boot.txt for details no bzImage on-disk and 242af7b0868SMatt Evans * memory layout. 243af7b0868SMatt Evans */ 244af7b0868SMatt Evans 245af7b0868SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 246af7b0868SMatt Evans die_perror("lseek"); 247af7b0868SMatt Evans 248af7b0868SMatt Evans if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) 249af7b0868SMatt Evans return false; 250af7b0868SMatt Evans 251af7b0868SMatt Evans if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) 252af7b0868SMatt Evans return false; 253af7b0868SMatt Evans 254af7b0868SMatt Evans if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) 255af7b0868SMatt Evans die("Too old kernel"); 256af7b0868SMatt Evans 257af7b0868SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 258af7b0868SMatt Evans die_perror("lseek"); 259af7b0868SMatt Evans 260af7b0868SMatt Evans if (!boot.hdr.setup_sects) 261af7b0868SMatt Evans boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; 262af7b0868SMatt Evans setup_sects = boot.hdr.setup_sects + 1; 263af7b0868SMatt Evans 264af7b0868SMatt Evans setup_size = setup_sects << 9; 265af7b0868SMatt Evans p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); 266af7b0868SMatt Evans 267af7b0868SMatt Evans /* copy setup.bin to mem*/ 268af7b0868SMatt Evans if (read(fd_kernel, p, setup_size) != setup_size) 269af7b0868SMatt Evans die_perror("read"); 270af7b0868SMatt Evans 271af7b0868SMatt Evans /* copy vmlinux.bin to BZ_KERNEL_START*/ 272af7b0868SMatt Evans p = guest_flat_to_host(kvm, BZ_KERNEL_START); 273af7b0868SMatt Evans 274af7b0868SMatt Evans while ((nr = read(fd_kernel, p, 65536)) > 0) 275af7b0868SMatt Evans p += nr; 276af7b0868SMatt Evans 277af7b0868SMatt Evans p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); 278af7b0868SMatt Evans if (kernel_cmdline) { 279af7b0868SMatt Evans cmdline_size = strlen(kernel_cmdline) + 1; 280af7b0868SMatt Evans if (cmdline_size > boot.hdr.cmdline_size) 281af7b0868SMatt Evans cmdline_size = boot.hdr.cmdline_size; 282af7b0868SMatt Evans 283af7b0868SMatt Evans memset(p, 0, boot.hdr.cmdline_size); 284af7b0868SMatt Evans memcpy(p, kernel_cmdline, cmdline_size - 1); 285af7b0868SMatt Evans } 286af7b0868SMatt Evans 287af7b0868SMatt Evans kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); 288af7b0868SMatt Evans 289af7b0868SMatt Evans kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; 290af7b0868SMatt Evans kern_boot->hdr.type_of_loader = 0xff; 291af7b0868SMatt Evans kern_boot->hdr.heap_end_ptr = 0xfe00; 292af7b0868SMatt Evans kern_boot->hdr.loadflags |= CAN_USE_HEAP; 293af7b0868SMatt Evans kern_boot->hdr.vid_mode = vidmode; 294af7b0868SMatt Evans 295af7b0868SMatt Evans /* 296af7b0868SMatt Evans * Read initrd image into guest memory 297af7b0868SMatt Evans */ 298af7b0868SMatt Evans if (fd_initrd >= 0) { 299af7b0868SMatt Evans struct stat initrd_stat; 300af7b0868SMatt Evans unsigned long addr; 301af7b0868SMatt Evans 302af7b0868SMatt Evans if (fstat(fd_initrd, &initrd_stat)) 303af7b0868SMatt Evans die_perror("fstat"); 304af7b0868SMatt Evans 305af7b0868SMatt Evans addr = boot.hdr.initrd_addr_max & ~0xfffff; 306af7b0868SMatt Evans for (;;) { 307af7b0868SMatt Evans if (addr < BZ_KERNEL_START) 308af7b0868SMatt Evans die("Not enough memory for initrd"); 309af7b0868SMatt Evans else if (addr < (kvm->ram_size - initrd_stat.st_size)) 310af7b0868SMatt Evans break; 311af7b0868SMatt Evans addr -= 0x100000; 312af7b0868SMatt Evans } 313af7b0868SMatt Evans 314af7b0868SMatt Evans p = guest_flat_to_host(kvm, addr); 315af7b0868SMatt Evans nr = read(fd_initrd, p, initrd_stat.st_size); 316af7b0868SMatt Evans if (nr != initrd_stat.st_size) 317af7b0868SMatt Evans die("Failed to read initrd"); 318af7b0868SMatt Evans 319af7b0868SMatt Evans kern_boot->hdr.ramdisk_image = addr; 320af7b0868SMatt Evans kern_boot->hdr.ramdisk_size = initrd_stat.st_size; 321af7b0868SMatt Evans } 322af7b0868SMatt Evans 323af7b0868SMatt Evans kvm->boot_selector = BOOT_LOADER_SELECTOR; 324af7b0868SMatt Evans /* 325af7b0868SMatt Evans * The real-mode setup code starts at offset 0x200 of a bzImage. See 326af7b0868SMatt Evans * Documentation/x86/boot.txt for details. 327af7b0868SMatt Evans */ 328af7b0868SMatt Evans kvm->boot_ip = BOOT_LOADER_IP + 0x200; 329af7b0868SMatt Evans kvm->boot_sp = BOOT_LOADER_SP; 330af7b0868SMatt Evans 331af7b0868SMatt Evans return true; 332af7b0868SMatt Evans } 333af7b0868SMatt Evans 334af7b0868SMatt Evans /** 335af7b0868SMatt Evans * kvm__arch_setup_firmware - inject BIOS into guest system memory 336af7b0868SMatt Evans * @kvm - guest system descriptor 337af7b0868SMatt Evans * 338af7b0868SMatt Evans * This function is a main routine where we poke guest memory 339af7b0868SMatt Evans * and install BIOS there. 340af7b0868SMatt Evans */ 341af7b0868SMatt Evans void kvm__arch_setup_firmware(struct kvm *kvm) 342af7b0868SMatt Evans { 343af7b0868SMatt Evans /* standart minimal configuration */ 344af7b0868SMatt Evans setup_bios(kvm); 345af7b0868SMatt Evans 346af7b0868SMatt Evans /* FIXME: SMP, ACPI and friends here */ 347af7b0868SMatt Evans 348af7b0868SMatt Evans /* MP table */ 349af7b0868SMatt Evans mptable_setup(kvm, kvm->nrcpus); 350af7b0868SMatt Evans } 3510b69bdefSMatt Evans 3520b69bdefSMatt Evans void kvm__arch_periodic_poll(struct kvm *kvm) 3530b69bdefSMatt Evans { 354*f6b8ccc1SThomas Gleixner serial8250__update_consoles(kvm); 3550b69bdefSMatt Evans virtio_console__inject_interrupt(kvm); 3560b69bdefSMatt Evans } 357