1af7b0868SMatt Evans #include "kvm/kvm.h" 2af7b0868SMatt Evans #include "kvm/boot-protocol.h" 3af7b0868SMatt Evans #include "kvm/cpufeature.h" 4af7b0868SMatt Evans #include "kvm/interrupt.h" 5af7b0868SMatt Evans #include "kvm/mptable.h" 6af7b0868SMatt Evans #include "kvm/util.h" 70b69bdefSMatt Evans #include "kvm/8250-serial.h" 80b69bdefSMatt Evans #include "kvm/virtio-console.h" 9af7b0868SMatt Evans 10af7b0868SMatt Evans #include <asm/bootparam.h> 11af7b0868SMatt Evans #include <linux/kvm.h> 12af7b0868SMatt Evans 13af7b0868SMatt Evans #include <sys/types.h> 14af7b0868SMatt Evans #include <sys/ioctl.h> 15af7b0868SMatt Evans #include <sys/mman.h> 16af7b0868SMatt Evans #include <sys/stat.h> 17af7b0868SMatt Evans #include <stdbool.h> 18af7b0868SMatt Evans #include <stdlib.h> 19af7b0868SMatt Evans #include <string.h> 20af7b0868SMatt Evans #include <unistd.h> 21af7b0868SMatt Evans #include <stdio.h> 22af7b0868SMatt Evans #include <fcntl.h> 23af7b0868SMatt Evans 24af7b0868SMatt Evans struct kvm_ext kvm_req_ext[] = { 25af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, 26af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, 27af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, 28af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, 29af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, 30af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, 31af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_HLT) }, 32af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, 33af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, 34af7b0868SMatt Evans { 0, 0 } 35af7b0868SMatt Evans }; 36af7b0868SMatt Evans 37af7b0868SMatt Evans bool kvm__arch_cpu_supports_vm(void) 38af7b0868SMatt Evans { 39af7b0868SMatt Evans struct cpuid_regs regs; 40af7b0868SMatt Evans u32 eax_base; 41af7b0868SMatt Evans int feature; 42af7b0868SMatt Evans 43af7b0868SMatt Evans regs = (struct cpuid_regs) { 44af7b0868SMatt Evans .eax = 0x00, 45af7b0868SMatt Evans }; 46af7b0868SMatt Evans host_cpuid(®s); 47af7b0868SMatt Evans 48af7b0868SMatt Evans switch (regs.ebx) { 49af7b0868SMatt Evans case CPUID_VENDOR_INTEL_1: 50af7b0868SMatt Evans eax_base = 0x00; 51af7b0868SMatt Evans feature = KVM__X86_FEATURE_VMX; 52af7b0868SMatt Evans break; 53af7b0868SMatt Evans 54af7b0868SMatt Evans case CPUID_VENDOR_AMD_1: 55af7b0868SMatt Evans eax_base = 0x80000000; 56af7b0868SMatt Evans feature = KVM__X86_FEATURE_SVM; 57af7b0868SMatt Evans break; 58af7b0868SMatt Evans 59af7b0868SMatt Evans default: 60af7b0868SMatt Evans return false; 61af7b0868SMatt Evans } 62af7b0868SMatt Evans 63af7b0868SMatt Evans regs = (struct cpuid_regs) { 64af7b0868SMatt Evans .eax = eax_base, 65af7b0868SMatt Evans }; 66af7b0868SMatt Evans host_cpuid(®s); 67af7b0868SMatt Evans 68af7b0868SMatt Evans if (regs.eax < eax_base + 0x01) 69af7b0868SMatt Evans return false; 70af7b0868SMatt Evans 71af7b0868SMatt Evans regs = (struct cpuid_regs) { 72af7b0868SMatt Evans .eax = eax_base + 0x01 73af7b0868SMatt Evans }; 74af7b0868SMatt Evans host_cpuid(®s); 75af7b0868SMatt Evans 76af7b0868SMatt Evans return regs.ecx & (1 << feature); 77af7b0868SMatt Evans } 78af7b0868SMatt Evans 79af7b0868SMatt Evans /* 80af7b0868SMatt Evans * Allocating RAM size bigger than 4GB requires us to leave a gap 81af7b0868SMatt Evans * in the RAM which is used for PCI MMIO, hotplug, and unconfigured 82af7b0868SMatt Evans * devices (see documentation of e820_setup_gap() for details). 83af7b0868SMatt Evans * 84af7b0868SMatt Evans * If we're required to initialize RAM bigger than 4GB, we will create 85af7b0868SMatt Evans * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. 86af7b0868SMatt Evans */ 87af7b0868SMatt Evans 88af7b0868SMatt Evans void kvm__init_ram(struct kvm *kvm) 89af7b0868SMatt Evans { 90af7b0868SMatt Evans u64 phys_start, phys_size; 91af7b0868SMatt Evans void *host_mem; 92af7b0868SMatt Evans 93af7b0868SMatt Evans if (kvm->ram_size < KVM_32BIT_GAP_START) { 94af7b0868SMatt Evans /* Use a single block of RAM for 32bit RAM */ 95af7b0868SMatt Evans 96af7b0868SMatt Evans phys_start = 0; 97af7b0868SMatt Evans phys_size = kvm->ram_size; 98af7b0868SMatt Evans host_mem = kvm->ram_start; 99af7b0868SMatt Evans 100af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 101af7b0868SMatt Evans } else { 102af7b0868SMatt Evans /* First RAM range from zero to the PCI gap: */ 103af7b0868SMatt Evans 104af7b0868SMatt Evans phys_start = 0; 105af7b0868SMatt Evans phys_size = KVM_32BIT_GAP_START; 106af7b0868SMatt Evans host_mem = kvm->ram_start; 107af7b0868SMatt Evans 108af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 109af7b0868SMatt Evans 110af7b0868SMatt Evans /* Second RAM range from 4GB to the end of RAM: */ 111af7b0868SMatt Evans 112f7abc4cdSHongyong Zang phys_start = KVM_32BIT_MAX_MEM_SIZE; 113f7abc4cdSHongyong Zang phys_size = kvm->ram_size - phys_start; 114af7b0868SMatt Evans host_mem = kvm->ram_start + phys_start; 115af7b0868SMatt Evans 116af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 117af7b0868SMatt Evans } 118af7b0868SMatt Evans } 119af7b0868SMatt Evans 1208e704a7aSMatt Evans /* Arch-specific commandline setup */ 1218e704a7aSMatt Evans void kvm__arch_set_cmdline(char *cmdline, bool video) 1228e704a7aSMatt Evans { 1238e704a7aSMatt Evans strcpy(cmdline, "noapic noacpi pci=conf1 reboot=k panic=1 i8042.direct=1 " 1248e704a7aSMatt Evans "i8042.dumbkbd=1 i8042.nopnp=1"); 1253a60be06SSasha Levin if (video) 1268e704a7aSMatt Evans strcat(cmdline, " video=vesafb console=tty0"); 1273a60be06SSasha Levin else 1288e704a7aSMatt Evans strcat(cmdline, " console=ttyS0 earlyprintk=serial i8042.noaux=1"); 1298e704a7aSMatt Evans } 1308e704a7aSMatt Evans 131af7b0868SMatt Evans /* Architecture-specific KVM init */ 1327eff9f49SWanlong Gao void kvm__arch_init(struct kvm *kvm, const char *hugetlbfs_path, u64 ram_size) 133af7b0868SMatt Evans { 134af7b0868SMatt Evans struct kvm_pit_config pit_config = { .flags = 0, }; 135af7b0868SMatt Evans int ret; 136af7b0868SMatt Evans 137af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); 138af7b0868SMatt Evans if (ret < 0) 139af7b0868SMatt Evans die_perror("KVM_SET_TSS_ADDR ioctl"); 140af7b0868SMatt Evans 141af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); 142af7b0868SMatt Evans if (ret < 0) 143af7b0868SMatt Evans die_perror("KVM_CREATE_PIT2 ioctl"); 144af7b0868SMatt Evans 145f7abc4cdSHongyong Zang if (ram_size < KVM_32BIT_GAP_START) { 146af7b0868SMatt Evans kvm->ram_size = ram_size; 1473ebd8e0bSMichael Ellerman kvm->ram_start = mmap_anon_or_hugetlbfs(kvm, hugetlbfs_path, ram_size); 148af7b0868SMatt Evans } else { 1493ebd8e0bSMichael Ellerman kvm->ram_start = mmap_anon_or_hugetlbfs(kvm, hugetlbfs_path, ram_size + KVM_32BIT_GAP_SIZE); 150f7abc4cdSHongyong Zang kvm->ram_size = ram_size + KVM_32BIT_GAP_SIZE; 1513a60be06SSasha Levin if (kvm->ram_start != MAP_FAILED) 152af7b0868SMatt Evans /* 153af7b0868SMatt Evans * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that 154af7b0868SMatt Evans * if we accidently write to it, we will know. 155af7b0868SMatt Evans */ 156af7b0868SMatt Evans mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); 157af7b0868SMatt Evans } 158af7b0868SMatt Evans if (kvm->ram_start == MAP_FAILED) 159af7b0868SMatt Evans die("out of memory"); 160af7b0868SMatt Evans 161af7b0868SMatt Evans madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); 162af7b0868SMatt Evans 163af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); 164af7b0868SMatt Evans if (ret < 0) 165af7b0868SMatt Evans die_perror("KVM_CREATE_IRQCHIP ioctl"); 166af7b0868SMatt Evans } 167af7b0868SMatt Evans 168e56e2de7SLai Jiangshan void kvm__arch_delete_ram(struct kvm *kvm) 169e56e2de7SLai Jiangshan { 170e56e2de7SLai Jiangshan munmap(kvm->ram_start, kvm->ram_size); 171e56e2de7SLai Jiangshan } 172e56e2de7SLai Jiangshan 173af7b0868SMatt Evans void kvm__irq_line(struct kvm *kvm, int irq, int level) 174af7b0868SMatt Evans { 175af7b0868SMatt Evans struct kvm_irq_level irq_level; 176af7b0868SMatt Evans 177af7b0868SMatt Evans irq_level = (struct kvm_irq_level) { 178af7b0868SMatt Evans { 179af7b0868SMatt Evans .irq = irq, 180af7b0868SMatt Evans }, 181af7b0868SMatt Evans .level = level, 182af7b0868SMatt Evans }; 183af7b0868SMatt Evans 184af7b0868SMatt Evans if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) 185af7b0868SMatt Evans die_perror("KVM_IRQ_LINE failed"); 186af7b0868SMatt Evans } 187af7b0868SMatt Evans 188af7b0868SMatt Evans void kvm__irq_trigger(struct kvm *kvm, int irq) 189af7b0868SMatt Evans { 190af7b0868SMatt Evans kvm__irq_line(kvm, irq, 1); 191af7b0868SMatt Evans kvm__irq_line(kvm, irq, 0); 192af7b0868SMatt Evans } 193af7b0868SMatt Evans 194af7b0868SMatt Evans #define BOOT_LOADER_SELECTOR 0x1000 195af7b0868SMatt Evans #define BOOT_LOADER_IP 0x0000 196af7b0868SMatt Evans #define BOOT_LOADER_SP 0x8000 197af7b0868SMatt Evans #define BOOT_CMDLINE_OFFSET 0x20000 198af7b0868SMatt Evans 199af7b0868SMatt Evans #define BOOT_PROTOCOL_REQUIRED 0x206 200af7b0868SMatt Evans #define LOAD_HIGH 0x01 201af7b0868SMatt Evans 202604dbd63SMatt Evans int load_flat_binary(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline) 203af7b0868SMatt Evans { 204af7b0868SMatt Evans void *p; 205af7b0868SMatt Evans int nr; 206af7b0868SMatt Evans 207604dbd63SMatt Evans /* 208604dbd63SMatt Evans * Some architectures may support loading an initrd alongside the flat kernel, 209604dbd63SMatt Evans * but we do not. 210604dbd63SMatt Evans */ 211604dbd63SMatt Evans if (fd_initrd != -1) 212604dbd63SMatt Evans pr_warning("Loading initrd with flat binary not supported."); 213604dbd63SMatt Evans 214604dbd63SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 215af7b0868SMatt Evans die_perror("lseek"); 216af7b0868SMatt Evans 217af7b0868SMatt Evans p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); 218af7b0868SMatt Evans 219604dbd63SMatt Evans while ((nr = read(fd_kernel, p, 65536)) > 0) 220af7b0868SMatt Evans p += nr; 221af7b0868SMatt Evans 22242ac24f9SSasha Levin kvm->arch.boot_selector = BOOT_LOADER_SELECTOR; 22342ac24f9SSasha Levin kvm->arch.boot_ip = BOOT_LOADER_IP; 22442ac24f9SSasha Levin kvm->arch.boot_sp = BOOT_LOADER_SP; 225af7b0868SMatt Evans 226af7b0868SMatt Evans return true; 227af7b0868SMatt Evans } 228af7b0868SMatt Evans 229af7b0868SMatt Evans static const char *BZIMAGE_MAGIC = "HdrS"; 230af7b0868SMatt Evans 231af7b0868SMatt Evans bool load_bzimage(struct kvm *kvm, int fd_kernel, 232af7b0868SMatt Evans int fd_initrd, const char *kernel_cmdline, u16 vidmode) 233af7b0868SMatt Evans { 234af7b0868SMatt Evans struct boot_params *kern_boot; 235af7b0868SMatt Evans unsigned long setup_sects; 236af7b0868SMatt Evans struct boot_params boot; 237af7b0868SMatt Evans size_t cmdline_size; 238af7b0868SMatt Evans ssize_t setup_size; 239af7b0868SMatt Evans void *p; 240af7b0868SMatt Evans int nr; 241af7b0868SMatt Evans 242af7b0868SMatt Evans /* 243af7b0868SMatt Evans * See Documentation/x86/boot.txt for details no bzImage on-disk and 244af7b0868SMatt Evans * memory layout. 245af7b0868SMatt Evans */ 246af7b0868SMatt Evans 247af7b0868SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 248af7b0868SMatt Evans die_perror("lseek"); 249af7b0868SMatt Evans 250af7b0868SMatt Evans if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) 251af7b0868SMatt Evans return false; 252af7b0868SMatt Evans 253af7b0868SMatt Evans if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) 254af7b0868SMatt Evans return false; 255af7b0868SMatt Evans 256af7b0868SMatt Evans if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) 257af7b0868SMatt Evans die("Too old kernel"); 258af7b0868SMatt Evans 259af7b0868SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 260af7b0868SMatt Evans die_perror("lseek"); 261af7b0868SMatt Evans 262af7b0868SMatt Evans if (!boot.hdr.setup_sects) 263af7b0868SMatt Evans boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; 264af7b0868SMatt Evans setup_sects = boot.hdr.setup_sects + 1; 265af7b0868SMatt Evans 266af7b0868SMatt Evans setup_size = setup_sects << 9; 267af7b0868SMatt Evans p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); 268af7b0868SMatt Evans 269af7b0868SMatt Evans /* copy setup.bin to mem*/ 270af7b0868SMatt Evans if (read(fd_kernel, p, setup_size) != setup_size) 271af7b0868SMatt Evans die_perror("read"); 272af7b0868SMatt Evans 273af7b0868SMatt Evans /* copy vmlinux.bin to BZ_KERNEL_START*/ 274af7b0868SMatt Evans p = guest_flat_to_host(kvm, BZ_KERNEL_START); 275af7b0868SMatt Evans 276af7b0868SMatt Evans while ((nr = read(fd_kernel, p, 65536)) > 0) 277af7b0868SMatt Evans p += nr; 278af7b0868SMatt Evans 279af7b0868SMatt Evans p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); 280af7b0868SMatt Evans if (kernel_cmdline) { 281af7b0868SMatt Evans cmdline_size = strlen(kernel_cmdline) + 1; 282af7b0868SMatt Evans if (cmdline_size > boot.hdr.cmdline_size) 283af7b0868SMatt Evans cmdline_size = boot.hdr.cmdline_size; 284af7b0868SMatt Evans 285af7b0868SMatt Evans memset(p, 0, boot.hdr.cmdline_size); 286af7b0868SMatt Evans memcpy(p, kernel_cmdline, cmdline_size - 1); 287af7b0868SMatt Evans } 288af7b0868SMatt Evans 289af7b0868SMatt Evans kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); 290af7b0868SMatt Evans 291af7b0868SMatt Evans kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; 292af7b0868SMatt Evans kern_boot->hdr.type_of_loader = 0xff; 293af7b0868SMatt Evans kern_boot->hdr.heap_end_ptr = 0xfe00; 294af7b0868SMatt Evans kern_boot->hdr.loadflags |= CAN_USE_HEAP; 295af7b0868SMatt Evans kern_boot->hdr.vid_mode = vidmode; 296af7b0868SMatt Evans 297af7b0868SMatt Evans /* 298af7b0868SMatt Evans * Read initrd image into guest memory 299af7b0868SMatt Evans */ 300af7b0868SMatt Evans if (fd_initrd >= 0) { 301af7b0868SMatt Evans struct stat initrd_stat; 302af7b0868SMatt Evans unsigned long addr; 303af7b0868SMatt Evans 304af7b0868SMatt Evans if (fstat(fd_initrd, &initrd_stat)) 305af7b0868SMatt Evans die_perror("fstat"); 306af7b0868SMatt Evans 307af7b0868SMatt Evans addr = boot.hdr.initrd_addr_max & ~0xfffff; 308af7b0868SMatt Evans for (;;) { 309af7b0868SMatt Evans if (addr < BZ_KERNEL_START) 310af7b0868SMatt Evans die("Not enough memory for initrd"); 311af7b0868SMatt Evans else if (addr < (kvm->ram_size - initrd_stat.st_size)) 312af7b0868SMatt Evans break; 313af7b0868SMatt Evans addr -= 0x100000; 314af7b0868SMatt Evans } 315af7b0868SMatt Evans 316af7b0868SMatt Evans p = guest_flat_to_host(kvm, addr); 317af7b0868SMatt Evans nr = read(fd_initrd, p, initrd_stat.st_size); 318af7b0868SMatt Evans if (nr != initrd_stat.st_size) 319af7b0868SMatt Evans die("Failed to read initrd"); 320af7b0868SMatt Evans 321af7b0868SMatt Evans kern_boot->hdr.ramdisk_image = addr; 322af7b0868SMatt Evans kern_boot->hdr.ramdisk_size = initrd_stat.st_size; 323af7b0868SMatt Evans } 324af7b0868SMatt Evans 32542ac24f9SSasha Levin kvm->arch.boot_selector = BOOT_LOADER_SELECTOR; 326af7b0868SMatt Evans /* 327af7b0868SMatt Evans * The real-mode setup code starts at offset 0x200 of a bzImage. See 328af7b0868SMatt Evans * Documentation/x86/boot.txt for details. 329af7b0868SMatt Evans */ 33042ac24f9SSasha Levin kvm->arch.boot_ip = BOOT_LOADER_IP + 0x200; 33142ac24f9SSasha Levin kvm->arch.boot_sp = BOOT_LOADER_SP; 332af7b0868SMatt Evans 333af7b0868SMatt Evans return true; 334af7b0868SMatt Evans } 335af7b0868SMatt Evans 336af7b0868SMatt Evans /** 337af7b0868SMatt Evans * kvm__arch_setup_firmware - inject BIOS into guest system memory 338af7b0868SMatt Evans * @kvm - guest system descriptor 339af7b0868SMatt Evans * 340af7b0868SMatt Evans * This function is a main routine where we poke guest memory 341af7b0868SMatt Evans * and install BIOS there. 342af7b0868SMatt Evans */ 343f7f9d02bSCyrill Gorcunov int kvm__arch_setup_firmware(struct kvm *kvm) 344af7b0868SMatt Evans { 345af7b0868SMatt Evans /* standart minimal configuration */ 346af7b0868SMatt Evans setup_bios(kvm); 347af7b0868SMatt Evans 348af7b0868SMatt Evans /* FIXME: SMP, ACPI and friends here */ 349af7b0868SMatt Evans 350*3d34111eSSasha Levin return 0; 3511add9f73SSasha Levin } 3521add9f73SSasha Levin 3531add9f73SSasha Levin int kvm__arch_free_firmware(struct kvm *kvm) 3541add9f73SSasha Levin { 355*3d34111eSSasha Levin return 0; 356af7b0868SMatt Evans } 3570b69bdefSMatt Evans 3580b69bdefSMatt Evans void kvm__arch_periodic_poll(struct kvm *kvm) 3590b69bdefSMatt Evans { 360f6b8ccc1SThomas Gleixner serial8250__update_consoles(kvm); 3610b69bdefSMatt Evans virtio_console__inject_interrupt(kvm); 3620b69bdefSMatt Evans } 363