1af7b0868SMatt Evans #include "kvm/kvm.h" 2af7b0868SMatt Evans #include "kvm/boot-protocol.h" 3af7b0868SMatt Evans #include "kvm/cpufeature.h" 4af7b0868SMatt Evans #include "kvm/interrupt.h" 5af7b0868SMatt Evans #include "kvm/mptable.h" 6af7b0868SMatt Evans #include "kvm/util.h" 70b69bdefSMatt Evans #include "kvm/8250-serial.h" 80b69bdefSMatt Evans #include "kvm/virtio-console.h" 9af7b0868SMatt Evans 10af7b0868SMatt Evans #include <asm/bootparam.h> 11af7b0868SMatt Evans #include <linux/kvm.h> 12af7b0868SMatt Evans 13af7b0868SMatt Evans #include <sys/types.h> 14af7b0868SMatt Evans #include <sys/ioctl.h> 15af7b0868SMatt Evans #include <sys/mman.h> 16af7b0868SMatt Evans #include <sys/stat.h> 17af7b0868SMatt Evans #include <stdbool.h> 18af7b0868SMatt Evans #include <stdlib.h> 19af7b0868SMatt Evans #include <string.h> 20af7b0868SMatt Evans #include <unistd.h> 21af7b0868SMatt Evans #include <stdio.h> 22af7b0868SMatt Evans #include <fcntl.h> 23af7b0868SMatt Evans 24af7b0868SMatt Evans struct kvm_ext kvm_req_ext[] = { 25af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, 26af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, 27af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, 28af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, 29af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, 30af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, 31af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_HLT) }, 32af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, 33af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, 34af7b0868SMatt Evans { 0, 0 } 35af7b0868SMatt Evans }; 36af7b0868SMatt Evans 37af7b0868SMatt Evans bool kvm__arch_cpu_supports_vm(void) 38af7b0868SMatt Evans { 39af7b0868SMatt Evans struct cpuid_regs regs; 40af7b0868SMatt Evans u32 eax_base; 41af7b0868SMatt Evans int feature; 42af7b0868SMatt Evans 43af7b0868SMatt Evans regs = (struct cpuid_regs) { 44af7b0868SMatt Evans .eax = 0x00, 45af7b0868SMatt Evans }; 46af7b0868SMatt Evans host_cpuid(®s); 47af7b0868SMatt Evans 48af7b0868SMatt Evans switch (regs.ebx) { 49af7b0868SMatt Evans case CPUID_VENDOR_INTEL_1: 50af7b0868SMatt Evans eax_base = 0x00; 51af7b0868SMatt Evans feature = KVM__X86_FEATURE_VMX; 52af7b0868SMatt Evans break; 53af7b0868SMatt Evans 54af7b0868SMatt Evans case CPUID_VENDOR_AMD_1: 55af7b0868SMatt Evans eax_base = 0x80000000; 56af7b0868SMatt Evans feature = KVM__X86_FEATURE_SVM; 57af7b0868SMatt Evans break; 58af7b0868SMatt Evans 59af7b0868SMatt Evans default: 60af7b0868SMatt Evans return false; 61af7b0868SMatt Evans } 62af7b0868SMatt Evans 63af7b0868SMatt Evans regs = (struct cpuid_regs) { 64af7b0868SMatt Evans .eax = eax_base, 65af7b0868SMatt Evans }; 66af7b0868SMatt Evans host_cpuid(®s); 67af7b0868SMatt Evans 68af7b0868SMatt Evans if (regs.eax < eax_base + 0x01) 69af7b0868SMatt Evans return false; 70af7b0868SMatt Evans 71af7b0868SMatt Evans regs = (struct cpuid_regs) { 72af7b0868SMatt Evans .eax = eax_base + 0x01 73af7b0868SMatt Evans }; 74af7b0868SMatt Evans host_cpuid(®s); 75af7b0868SMatt Evans 76af7b0868SMatt Evans return regs.ecx & (1 << feature); 77af7b0868SMatt Evans } 78af7b0868SMatt Evans 79af7b0868SMatt Evans /* 80af7b0868SMatt Evans * Allocating RAM size bigger than 4GB requires us to leave a gap 81af7b0868SMatt Evans * in the RAM which is used for PCI MMIO, hotplug, and unconfigured 82af7b0868SMatt Evans * devices (see documentation of e820_setup_gap() for details). 83af7b0868SMatt Evans * 84af7b0868SMatt Evans * If we're required to initialize RAM bigger than 4GB, we will create 85af7b0868SMatt Evans * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. 86af7b0868SMatt Evans */ 87af7b0868SMatt Evans 88af7b0868SMatt Evans void kvm__init_ram(struct kvm *kvm) 89af7b0868SMatt Evans { 90af7b0868SMatt Evans u64 phys_start, phys_size; 91af7b0868SMatt Evans void *host_mem; 92af7b0868SMatt Evans 93af7b0868SMatt Evans if (kvm->ram_size < KVM_32BIT_GAP_START) { 94af7b0868SMatt Evans /* Use a single block of RAM for 32bit RAM */ 95af7b0868SMatt Evans 96af7b0868SMatt Evans phys_start = 0; 97af7b0868SMatt Evans phys_size = kvm->ram_size; 98af7b0868SMatt Evans host_mem = kvm->ram_start; 99af7b0868SMatt Evans 100af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 101af7b0868SMatt Evans } else { 102af7b0868SMatt Evans /* First RAM range from zero to the PCI gap: */ 103af7b0868SMatt Evans 104af7b0868SMatt Evans phys_start = 0; 105af7b0868SMatt Evans phys_size = KVM_32BIT_GAP_START; 106af7b0868SMatt Evans host_mem = kvm->ram_start; 107af7b0868SMatt Evans 108af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 109af7b0868SMatt Evans 110af7b0868SMatt Evans /* Second RAM range from 4GB to the end of RAM: */ 111af7b0868SMatt Evans 112af7b0868SMatt Evans phys_start = 0x100000000ULL; 113af7b0868SMatt Evans phys_size = kvm->ram_size - phys_size; 114af7b0868SMatt Evans host_mem = kvm->ram_start + phys_start; 115af7b0868SMatt Evans 116af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 117af7b0868SMatt Evans } 118af7b0868SMatt Evans } 119af7b0868SMatt Evans 1208e704a7aSMatt Evans /* Arch-specific commandline setup */ 1218e704a7aSMatt Evans void kvm__arch_set_cmdline(char *cmdline, bool video) 1228e704a7aSMatt Evans { 1238e704a7aSMatt Evans strcpy(cmdline, "noapic noacpi pci=conf1 reboot=k panic=1 i8042.direct=1 " 1248e704a7aSMatt Evans "i8042.dumbkbd=1 i8042.nopnp=1"); 1253a60be06SSasha Levin if (video) 1268e704a7aSMatt Evans strcat(cmdline, " video=vesafb console=tty0"); 1273a60be06SSasha Levin else 1288e704a7aSMatt Evans strcat(cmdline, " console=ttyS0 earlyprintk=serial i8042.noaux=1"); 1298e704a7aSMatt Evans } 1308e704a7aSMatt Evans 13161061257SMatt Evans /* This function wraps the decision between hugetlbfs map (if requested) or normal mmap */ 13261061257SMatt Evans static void *mmap_anon_or_hugetlbfs(const char *hugetlbfs_path, u64 size) 13361061257SMatt Evans { 1343a60be06SSasha Levin if (hugetlbfs_path) 13561061257SMatt Evans /* 13661061257SMatt Evans * We don't /need/ to map guest RAM from hugetlbfs, but we do so 13761061257SMatt Evans * if the user specifies a hugetlbfs path. 13861061257SMatt Evans */ 13961061257SMatt Evans return mmap_hugetlbfs(hugetlbfs_path, size); 1403a60be06SSasha Levin else 14161061257SMatt Evans return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); 14261061257SMatt Evans } 14361061257SMatt Evans 144af7b0868SMatt Evans /* Architecture-specific KVM init */ 145*7eff9f49SWanlong Gao void kvm__arch_init(struct kvm *kvm, const char *hugetlbfs_path, u64 ram_size) 146af7b0868SMatt Evans { 147af7b0868SMatt Evans struct kvm_pit_config pit_config = { .flags = 0, }; 148af7b0868SMatt Evans int ret; 149af7b0868SMatt Evans 150af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); 151af7b0868SMatt Evans if (ret < 0) 152af7b0868SMatt Evans die_perror("KVM_SET_TSS_ADDR ioctl"); 153af7b0868SMatt Evans 154af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); 155af7b0868SMatt Evans if (ret < 0) 156af7b0868SMatt Evans die_perror("KVM_CREATE_PIT2 ioctl"); 157af7b0868SMatt Evans 158af7b0868SMatt Evans kvm->ram_size = ram_size; 159af7b0868SMatt Evans 160af7b0868SMatt Evans if (kvm->ram_size < KVM_32BIT_GAP_START) { 16161061257SMatt Evans kvm->ram_start = mmap_anon_or_hugetlbfs(hugetlbfs_path, ram_size); 162af7b0868SMatt Evans } else { 16361061257SMatt Evans kvm->ram_start = mmap_anon_or_hugetlbfs(hugetlbfs_path, ram_size + KVM_32BIT_GAP_SIZE); 1643a60be06SSasha Levin if (kvm->ram_start != MAP_FAILED) 165af7b0868SMatt Evans /* 166af7b0868SMatt Evans * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that 167af7b0868SMatt Evans * if we accidently write to it, we will know. 168af7b0868SMatt Evans */ 169af7b0868SMatt Evans mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); 170af7b0868SMatt Evans } 171af7b0868SMatt Evans if (kvm->ram_start == MAP_FAILED) 172af7b0868SMatt Evans die("out of memory"); 173af7b0868SMatt Evans 174af7b0868SMatt Evans madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); 175af7b0868SMatt Evans 176af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); 177af7b0868SMatt Evans if (ret < 0) 178af7b0868SMatt Evans die_perror("KVM_CREATE_IRQCHIP ioctl"); 179af7b0868SMatt Evans } 180af7b0868SMatt Evans 181af7b0868SMatt Evans void kvm__irq_line(struct kvm *kvm, int irq, int level) 182af7b0868SMatt Evans { 183af7b0868SMatt Evans struct kvm_irq_level irq_level; 184af7b0868SMatt Evans 185af7b0868SMatt Evans irq_level = (struct kvm_irq_level) { 186af7b0868SMatt Evans { 187af7b0868SMatt Evans .irq = irq, 188af7b0868SMatt Evans }, 189af7b0868SMatt Evans .level = level, 190af7b0868SMatt Evans }; 191af7b0868SMatt Evans 192af7b0868SMatt Evans if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) 193af7b0868SMatt Evans die_perror("KVM_IRQ_LINE failed"); 194af7b0868SMatt Evans } 195af7b0868SMatt Evans 196af7b0868SMatt Evans void kvm__irq_trigger(struct kvm *kvm, int irq) 197af7b0868SMatt Evans { 198af7b0868SMatt Evans kvm__irq_line(kvm, irq, 1); 199af7b0868SMatt Evans kvm__irq_line(kvm, irq, 0); 200af7b0868SMatt Evans } 201af7b0868SMatt Evans 202af7b0868SMatt Evans #define BOOT_LOADER_SELECTOR 0x1000 203af7b0868SMatt Evans #define BOOT_LOADER_IP 0x0000 204af7b0868SMatt Evans #define BOOT_LOADER_SP 0x8000 205af7b0868SMatt Evans #define BOOT_CMDLINE_OFFSET 0x20000 206af7b0868SMatt Evans 207af7b0868SMatt Evans #define BOOT_PROTOCOL_REQUIRED 0x206 208af7b0868SMatt Evans #define LOAD_HIGH 0x01 209af7b0868SMatt Evans 210604dbd63SMatt Evans int load_flat_binary(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline) 211af7b0868SMatt Evans { 212af7b0868SMatt Evans void *p; 213af7b0868SMatt Evans int nr; 214af7b0868SMatt Evans 215604dbd63SMatt Evans /* 216604dbd63SMatt Evans * Some architectures may support loading an initrd alongside the flat kernel, 217604dbd63SMatt Evans * but we do not. 218604dbd63SMatt Evans */ 219604dbd63SMatt Evans if (fd_initrd != -1) 220604dbd63SMatt Evans pr_warning("Loading initrd with flat binary not supported."); 221604dbd63SMatt Evans 222604dbd63SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 223af7b0868SMatt Evans die_perror("lseek"); 224af7b0868SMatt Evans 225af7b0868SMatt Evans p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); 226af7b0868SMatt Evans 227604dbd63SMatt Evans while ((nr = read(fd_kernel, p, 65536)) > 0) 228af7b0868SMatt Evans p += nr; 229af7b0868SMatt Evans 230af7b0868SMatt Evans kvm->boot_selector = BOOT_LOADER_SELECTOR; 231af7b0868SMatt Evans kvm->boot_ip = BOOT_LOADER_IP; 232af7b0868SMatt Evans kvm->boot_sp = BOOT_LOADER_SP; 233af7b0868SMatt Evans 234af7b0868SMatt Evans return true; 235af7b0868SMatt Evans } 236af7b0868SMatt Evans 237af7b0868SMatt Evans static const char *BZIMAGE_MAGIC = "HdrS"; 238af7b0868SMatt Evans 239af7b0868SMatt Evans bool load_bzimage(struct kvm *kvm, int fd_kernel, 240af7b0868SMatt Evans int fd_initrd, const char *kernel_cmdline, u16 vidmode) 241af7b0868SMatt Evans { 242af7b0868SMatt Evans struct boot_params *kern_boot; 243af7b0868SMatt Evans unsigned long setup_sects; 244af7b0868SMatt Evans struct boot_params boot; 245af7b0868SMatt Evans size_t cmdline_size; 246af7b0868SMatt Evans ssize_t setup_size; 247af7b0868SMatt Evans void *p; 248af7b0868SMatt Evans int nr; 249af7b0868SMatt Evans 250af7b0868SMatt Evans /* 251af7b0868SMatt Evans * See Documentation/x86/boot.txt for details no bzImage on-disk and 252af7b0868SMatt Evans * memory layout. 253af7b0868SMatt Evans */ 254af7b0868SMatt Evans 255af7b0868SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 256af7b0868SMatt Evans die_perror("lseek"); 257af7b0868SMatt Evans 258af7b0868SMatt Evans if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) 259af7b0868SMatt Evans return false; 260af7b0868SMatt Evans 261af7b0868SMatt Evans if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) 262af7b0868SMatt Evans return false; 263af7b0868SMatt Evans 264af7b0868SMatt Evans if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) 265af7b0868SMatt Evans die("Too old kernel"); 266af7b0868SMatt Evans 267af7b0868SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 268af7b0868SMatt Evans die_perror("lseek"); 269af7b0868SMatt Evans 270af7b0868SMatt Evans if (!boot.hdr.setup_sects) 271af7b0868SMatt Evans boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; 272af7b0868SMatt Evans setup_sects = boot.hdr.setup_sects + 1; 273af7b0868SMatt Evans 274af7b0868SMatt Evans setup_size = setup_sects << 9; 275af7b0868SMatt Evans p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); 276af7b0868SMatt Evans 277af7b0868SMatt Evans /* copy setup.bin to mem*/ 278af7b0868SMatt Evans if (read(fd_kernel, p, setup_size) != setup_size) 279af7b0868SMatt Evans die_perror("read"); 280af7b0868SMatt Evans 281af7b0868SMatt Evans /* copy vmlinux.bin to BZ_KERNEL_START*/ 282af7b0868SMatt Evans p = guest_flat_to_host(kvm, BZ_KERNEL_START); 283af7b0868SMatt Evans 284af7b0868SMatt Evans while ((nr = read(fd_kernel, p, 65536)) > 0) 285af7b0868SMatt Evans p += nr; 286af7b0868SMatt Evans 287af7b0868SMatt Evans p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); 288af7b0868SMatt Evans if (kernel_cmdline) { 289af7b0868SMatt Evans cmdline_size = strlen(kernel_cmdline) + 1; 290af7b0868SMatt Evans if (cmdline_size > boot.hdr.cmdline_size) 291af7b0868SMatt Evans cmdline_size = boot.hdr.cmdline_size; 292af7b0868SMatt Evans 293af7b0868SMatt Evans memset(p, 0, boot.hdr.cmdline_size); 294af7b0868SMatt Evans memcpy(p, kernel_cmdline, cmdline_size - 1); 295af7b0868SMatt Evans } 296af7b0868SMatt Evans 297af7b0868SMatt Evans kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); 298af7b0868SMatt Evans 299af7b0868SMatt Evans kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; 300af7b0868SMatt Evans kern_boot->hdr.type_of_loader = 0xff; 301af7b0868SMatt Evans kern_boot->hdr.heap_end_ptr = 0xfe00; 302af7b0868SMatt Evans kern_boot->hdr.loadflags |= CAN_USE_HEAP; 303af7b0868SMatt Evans kern_boot->hdr.vid_mode = vidmode; 304af7b0868SMatt Evans 305af7b0868SMatt Evans /* 306af7b0868SMatt Evans * Read initrd image into guest memory 307af7b0868SMatt Evans */ 308af7b0868SMatt Evans if (fd_initrd >= 0) { 309af7b0868SMatt Evans struct stat initrd_stat; 310af7b0868SMatt Evans unsigned long addr; 311af7b0868SMatt Evans 312af7b0868SMatt Evans if (fstat(fd_initrd, &initrd_stat)) 313af7b0868SMatt Evans die_perror("fstat"); 314af7b0868SMatt Evans 315af7b0868SMatt Evans addr = boot.hdr.initrd_addr_max & ~0xfffff; 316af7b0868SMatt Evans for (;;) { 317af7b0868SMatt Evans if (addr < BZ_KERNEL_START) 318af7b0868SMatt Evans die("Not enough memory for initrd"); 319af7b0868SMatt Evans else if (addr < (kvm->ram_size - initrd_stat.st_size)) 320af7b0868SMatt Evans break; 321af7b0868SMatt Evans addr -= 0x100000; 322af7b0868SMatt Evans } 323af7b0868SMatt Evans 324af7b0868SMatt Evans p = guest_flat_to_host(kvm, addr); 325af7b0868SMatt Evans nr = read(fd_initrd, p, initrd_stat.st_size); 326af7b0868SMatt Evans if (nr != initrd_stat.st_size) 327af7b0868SMatt Evans die("Failed to read initrd"); 328af7b0868SMatt Evans 329af7b0868SMatt Evans kern_boot->hdr.ramdisk_image = addr; 330af7b0868SMatt Evans kern_boot->hdr.ramdisk_size = initrd_stat.st_size; 331af7b0868SMatt Evans } 332af7b0868SMatt Evans 333af7b0868SMatt Evans kvm->boot_selector = BOOT_LOADER_SELECTOR; 334af7b0868SMatt Evans /* 335af7b0868SMatt Evans * The real-mode setup code starts at offset 0x200 of a bzImage. See 336af7b0868SMatt Evans * Documentation/x86/boot.txt for details. 337af7b0868SMatt Evans */ 338af7b0868SMatt Evans kvm->boot_ip = BOOT_LOADER_IP + 0x200; 339af7b0868SMatt Evans kvm->boot_sp = BOOT_LOADER_SP; 340af7b0868SMatt Evans 341af7b0868SMatt Evans return true; 342af7b0868SMatt Evans } 343af7b0868SMatt Evans 344af7b0868SMatt Evans /** 345af7b0868SMatt Evans * kvm__arch_setup_firmware - inject BIOS into guest system memory 346af7b0868SMatt Evans * @kvm - guest system descriptor 347af7b0868SMatt Evans * 348af7b0868SMatt Evans * This function is a main routine where we poke guest memory 349af7b0868SMatt Evans * and install BIOS there. 350af7b0868SMatt Evans */ 351f7f9d02bSCyrill Gorcunov int kvm__arch_setup_firmware(struct kvm *kvm) 352af7b0868SMatt Evans { 353af7b0868SMatt Evans /* standart minimal configuration */ 354af7b0868SMatt Evans setup_bios(kvm); 355af7b0868SMatt Evans 356af7b0868SMatt Evans /* FIXME: SMP, ACPI and friends here */ 357af7b0868SMatt Evans 358af7b0868SMatt Evans /* MP table */ 359f7f9d02bSCyrill Gorcunov return mptable_setup(kvm, kvm->nrcpus); 360af7b0868SMatt Evans } 3610b69bdefSMatt Evans 3620b69bdefSMatt Evans void kvm__arch_periodic_poll(struct kvm *kvm) 3630b69bdefSMatt Evans { 364f6b8ccc1SThomas Gleixner serial8250__update_consoles(kvm); 3650b69bdefSMatt Evans virtio_console__inject_interrupt(kvm); 3660b69bdefSMatt Evans } 367