1af7b0868SMatt Evans #include "kvm/kvm.h" 2af7b0868SMatt Evans #include "kvm/boot-protocol.h" 3af7b0868SMatt Evans #include "kvm/cpufeature.h" 4af7b0868SMatt Evans #include "kvm/interrupt.h" 5af7b0868SMatt Evans #include "kvm/mptable.h" 6af7b0868SMatt Evans #include "kvm/util.h" 7*0b69bdefSMatt Evans #include "kvm/8250-serial.h" 8*0b69bdefSMatt Evans #include "kvm/virtio-console.h" 9af7b0868SMatt Evans 10af7b0868SMatt Evans #include <asm/bootparam.h> 11af7b0868SMatt Evans #include <linux/kvm.h> 12af7b0868SMatt Evans 13af7b0868SMatt Evans #include <sys/types.h> 14af7b0868SMatt Evans #include <sys/ioctl.h> 15af7b0868SMatt Evans #include <sys/mman.h> 16af7b0868SMatt Evans #include <sys/stat.h> 17af7b0868SMatt Evans #include <stdbool.h> 18af7b0868SMatt Evans #include <assert.h> 19af7b0868SMatt Evans #include <stdlib.h> 20af7b0868SMatt Evans #include <string.h> 21af7b0868SMatt Evans #include <unistd.h> 22af7b0868SMatt Evans #include <stdio.h> 23af7b0868SMatt Evans #include <fcntl.h> 24af7b0868SMatt Evans #include <asm/unistd.h> 25af7b0868SMatt Evans 26af7b0868SMatt Evans struct kvm_ext kvm_req_ext[] = { 27af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, 28af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, 29af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, 30af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, 31af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, 32af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, 33af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_HLT) }, 34af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, 35af7b0868SMatt Evans { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, 36af7b0868SMatt Evans { 0, 0 } 37af7b0868SMatt Evans }; 38af7b0868SMatt Evans 39af7b0868SMatt Evans bool kvm__arch_cpu_supports_vm(void) 40af7b0868SMatt Evans { 41af7b0868SMatt Evans struct cpuid_regs regs; 42af7b0868SMatt Evans u32 eax_base; 43af7b0868SMatt Evans int feature; 44af7b0868SMatt Evans 45af7b0868SMatt Evans regs = (struct cpuid_regs) { 46af7b0868SMatt Evans .eax = 0x00, 47af7b0868SMatt Evans }; 48af7b0868SMatt Evans host_cpuid(®s); 49af7b0868SMatt Evans 50af7b0868SMatt Evans switch (regs.ebx) { 51af7b0868SMatt Evans case CPUID_VENDOR_INTEL_1: 52af7b0868SMatt Evans eax_base = 0x00; 53af7b0868SMatt Evans feature = KVM__X86_FEATURE_VMX; 54af7b0868SMatt Evans break; 55af7b0868SMatt Evans 56af7b0868SMatt Evans case CPUID_VENDOR_AMD_1: 57af7b0868SMatt Evans eax_base = 0x80000000; 58af7b0868SMatt Evans feature = KVM__X86_FEATURE_SVM; 59af7b0868SMatt Evans break; 60af7b0868SMatt Evans 61af7b0868SMatt Evans default: 62af7b0868SMatt Evans return false; 63af7b0868SMatt Evans } 64af7b0868SMatt Evans 65af7b0868SMatt Evans regs = (struct cpuid_regs) { 66af7b0868SMatt Evans .eax = eax_base, 67af7b0868SMatt Evans }; 68af7b0868SMatt Evans host_cpuid(®s); 69af7b0868SMatt Evans 70af7b0868SMatt Evans if (regs.eax < eax_base + 0x01) 71af7b0868SMatt Evans return false; 72af7b0868SMatt Evans 73af7b0868SMatt Evans regs = (struct cpuid_regs) { 74af7b0868SMatt Evans .eax = eax_base + 0x01 75af7b0868SMatt Evans }; 76af7b0868SMatt Evans host_cpuid(®s); 77af7b0868SMatt Evans 78af7b0868SMatt Evans return regs.ecx & (1 << feature); 79af7b0868SMatt Evans } 80af7b0868SMatt Evans 81af7b0868SMatt Evans /* 82af7b0868SMatt Evans * Allocating RAM size bigger than 4GB requires us to leave a gap 83af7b0868SMatt Evans * in the RAM which is used for PCI MMIO, hotplug, and unconfigured 84af7b0868SMatt Evans * devices (see documentation of e820_setup_gap() for details). 85af7b0868SMatt Evans * 86af7b0868SMatt Evans * If we're required to initialize RAM bigger than 4GB, we will create 87af7b0868SMatt Evans * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. 88af7b0868SMatt Evans */ 89af7b0868SMatt Evans 90af7b0868SMatt Evans void kvm__init_ram(struct kvm *kvm) 91af7b0868SMatt Evans { 92af7b0868SMatt Evans u64 phys_start, phys_size; 93af7b0868SMatt Evans void *host_mem; 94af7b0868SMatt Evans 95af7b0868SMatt Evans if (kvm->ram_size < KVM_32BIT_GAP_START) { 96af7b0868SMatt Evans /* Use a single block of RAM for 32bit RAM */ 97af7b0868SMatt Evans 98af7b0868SMatt Evans phys_start = 0; 99af7b0868SMatt Evans phys_size = kvm->ram_size; 100af7b0868SMatt Evans host_mem = kvm->ram_start; 101af7b0868SMatt Evans 102af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 103af7b0868SMatt Evans } else { 104af7b0868SMatt Evans /* First RAM range from zero to the PCI gap: */ 105af7b0868SMatt Evans 106af7b0868SMatt Evans phys_start = 0; 107af7b0868SMatt Evans phys_size = KVM_32BIT_GAP_START; 108af7b0868SMatt Evans host_mem = kvm->ram_start; 109af7b0868SMatt Evans 110af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 111af7b0868SMatt Evans 112af7b0868SMatt Evans /* Second RAM range from 4GB to the end of RAM: */ 113af7b0868SMatt Evans 114af7b0868SMatt Evans phys_start = 0x100000000ULL; 115af7b0868SMatt Evans phys_size = kvm->ram_size - phys_size; 116af7b0868SMatt Evans host_mem = kvm->ram_start + phys_start; 117af7b0868SMatt Evans 118af7b0868SMatt Evans kvm__register_mem(kvm, phys_start, phys_size, host_mem); 119af7b0868SMatt Evans } 120af7b0868SMatt Evans } 121af7b0868SMatt Evans 122af7b0868SMatt Evans /* Architecture-specific KVM init */ 123af7b0868SMatt Evans void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name) 124af7b0868SMatt Evans { 125af7b0868SMatt Evans struct kvm_pit_config pit_config = { .flags = 0, }; 126af7b0868SMatt Evans int ret; 127af7b0868SMatt Evans 128af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); 129af7b0868SMatt Evans if (ret < 0) 130af7b0868SMatt Evans die_perror("KVM_SET_TSS_ADDR ioctl"); 131af7b0868SMatt Evans 132af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); 133af7b0868SMatt Evans if (ret < 0) 134af7b0868SMatt Evans die_perror("KVM_CREATE_PIT2 ioctl"); 135af7b0868SMatt Evans 136af7b0868SMatt Evans kvm->ram_size = ram_size; 137af7b0868SMatt Evans 138af7b0868SMatt Evans if (kvm->ram_size < KVM_32BIT_GAP_START) { 139af7b0868SMatt Evans kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); 140af7b0868SMatt Evans } else { 141af7b0868SMatt Evans kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0); 142af7b0868SMatt Evans if (kvm->ram_start != MAP_FAILED) { 143af7b0868SMatt Evans /* 144af7b0868SMatt Evans * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that 145af7b0868SMatt Evans * if we accidently write to it, we will know. 146af7b0868SMatt Evans */ 147af7b0868SMatt Evans mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); 148af7b0868SMatt Evans } 149af7b0868SMatt Evans } 150af7b0868SMatt Evans if (kvm->ram_start == MAP_FAILED) 151af7b0868SMatt Evans die("out of memory"); 152af7b0868SMatt Evans 153af7b0868SMatt Evans madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); 154af7b0868SMatt Evans 155af7b0868SMatt Evans ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); 156af7b0868SMatt Evans if (ret < 0) 157af7b0868SMatt Evans die_perror("KVM_CREATE_IRQCHIP ioctl"); 158af7b0868SMatt Evans } 159af7b0868SMatt Evans 160af7b0868SMatt Evans void kvm__irq_line(struct kvm *kvm, int irq, int level) 161af7b0868SMatt Evans { 162af7b0868SMatt Evans struct kvm_irq_level irq_level; 163af7b0868SMatt Evans 164af7b0868SMatt Evans irq_level = (struct kvm_irq_level) { 165af7b0868SMatt Evans { 166af7b0868SMatt Evans .irq = irq, 167af7b0868SMatt Evans }, 168af7b0868SMatt Evans .level = level, 169af7b0868SMatt Evans }; 170af7b0868SMatt Evans 171af7b0868SMatt Evans if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) 172af7b0868SMatt Evans die_perror("KVM_IRQ_LINE failed"); 173af7b0868SMatt Evans } 174af7b0868SMatt Evans 175af7b0868SMatt Evans void kvm__irq_trigger(struct kvm *kvm, int irq) 176af7b0868SMatt Evans { 177af7b0868SMatt Evans kvm__irq_line(kvm, irq, 1); 178af7b0868SMatt Evans kvm__irq_line(kvm, irq, 0); 179af7b0868SMatt Evans } 180af7b0868SMatt Evans 181af7b0868SMatt Evans #define BOOT_LOADER_SELECTOR 0x1000 182af7b0868SMatt Evans #define BOOT_LOADER_IP 0x0000 183af7b0868SMatt Evans #define BOOT_LOADER_SP 0x8000 184af7b0868SMatt Evans #define BOOT_CMDLINE_OFFSET 0x20000 185af7b0868SMatt Evans 186af7b0868SMatt Evans #define BOOT_PROTOCOL_REQUIRED 0x206 187af7b0868SMatt Evans #define LOAD_HIGH 0x01 188af7b0868SMatt Evans 189af7b0868SMatt Evans int load_flat_binary(struct kvm *kvm, int fd) 190af7b0868SMatt Evans { 191af7b0868SMatt Evans void *p; 192af7b0868SMatt Evans int nr; 193af7b0868SMatt Evans 194af7b0868SMatt Evans if (lseek(fd, 0, SEEK_SET) < 0) 195af7b0868SMatt Evans die_perror("lseek"); 196af7b0868SMatt Evans 197af7b0868SMatt Evans p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); 198af7b0868SMatt Evans 199af7b0868SMatt Evans while ((nr = read(fd, p, 65536)) > 0) 200af7b0868SMatt Evans p += nr; 201af7b0868SMatt Evans 202af7b0868SMatt Evans kvm->boot_selector = BOOT_LOADER_SELECTOR; 203af7b0868SMatt Evans kvm->boot_ip = BOOT_LOADER_IP; 204af7b0868SMatt Evans kvm->boot_sp = BOOT_LOADER_SP; 205af7b0868SMatt Evans 206af7b0868SMatt Evans return true; 207af7b0868SMatt Evans } 208af7b0868SMatt Evans 209af7b0868SMatt Evans static const char *BZIMAGE_MAGIC = "HdrS"; 210af7b0868SMatt Evans 211af7b0868SMatt Evans bool load_bzimage(struct kvm *kvm, int fd_kernel, 212af7b0868SMatt Evans int fd_initrd, const char *kernel_cmdline, u16 vidmode) 213af7b0868SMatt Evans { 214af7b0868SMatt Evans struct boot_params *kern_boot; 215af7b0868SMatt Evans unsigned long setup_sects; 216af7b0868SMatt Evans struct boot_params boot; 217af7b0868SMatt Evans size_t cmdline_size; 218af7b0868SMatt Evans ssize_t setup_size; 219af7b0868SMatt Evans void *p; 220af7b0868SMatt Evans int nr; 221af7b0868SMatt Evans 222af7b0868SMatt Evans /* 223af7b0868SMatt Evans * See Documentation/x86/boot.txt for details no bzImage on-disk and 224af7b0868SMatt Evans * memory layout. 225af7b0868SMatt Evans */ 226af7b0868SMatt Evans 227af7b0868SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 228af7b0868SMatt Evans die_perror("lseek"); 229af7b0868SMatt Evans 230af7b0868SMatt Evans if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) 231af7b0868SMatt Evans return false; 232af7b0868SMatt Evans 233af7b0868SMatt Evans if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) 234af7b0868SMatt Evans return false; 235af7b0868SMatt Evans 236af7b0868SMatt Evans if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) 237af7b0868SMatt Evans die("Too old kernel"); 238af7b0868SMatt Evans 239af7b0868SMatt Evans if (lseek(fd_kernel, 0, SEEK_SET) < 0) 240af7b0868SMatt Evans die_perror("lseek"); 241af7b0868SMatt Evans 242af7b0868SMatt Evans if (!boot.hdr.setup_sects) 243af7b0868SMatt Evans boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; 244af7b0868SMatt Evans setup_sects = boot.hdr.setup_sects + 1; 245af7b0868SMatt Evans 246af7b0868SMatt Evans setup_size = setup_sects << 9; 247af7b0868SMatt Evans p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); 248af7b0868SMatt Evans 249af7b0868SMatt Evans /* copy setup.bin to mem*/ 250af7b0868SMatt Evans if (read(fd_kernel, p, setup_size) != setup_size) 251af7b0868SMatt Evans die_perror("read"); 252af7b0868SMatt Evans 253af7b0868SMatt Evans /* copy vmlinux.bin to BZ_KERNEL_START*/ 254af7b0868SMatt Evans p = guest_flat_to_host(kvm, BZ_KERNEL_START); 255af7b0868SMatt Evans 256af7b0868SMatt Evans while ((nr = read(fd_kernel, p, 65536)) > 0) 257af7b0868SMatt Evans p += nr; 258af7b0868SMatt Evans 259af7b0868SMatt Evans p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); 260af7b0868SMatt Evans if (kernel_cmdline) { 261af7b0868SMatt Evans cmdline_size = strlen(kernel_cmdline) + 1; 262af7b0868SMatt Evans if (cmdline_size > boot.hdr.cmdline_size) 263af7b0868SMatt Evans cmdline_size = boot.hdr.cmdline_size; 264af7b0868SMatt Evans 265af7b0868SMatt Evans memset(p, 0, boot.hdr.cmdline_size); 266af7b0868SMatt Evans memcpy(p, kernel_cmdline, cmdline_size - 1); 267af7b0868SMatt Evans } 268af7b0868SMatt Evans 269af7b0868SMatt Evans kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); 270af7b0868SMatt Evans 271af7b0868SMatt Evans kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; 272af7b0868SMatt Evans kern_boot->hdr.type_of_loader = 0xff; 273af7b0868SMatt Evans kern_boot->hdr.heap_end_ptr = 0xfe00; 274af7b0868SMatt Evans kern_boot->hdr.loadflags |= CAN_USE_HEAP; 275af7b0868SMatt Evans kern_boot->hdr.vid_mode = vidmode; 276af7b0868SMatt Evans 277af7b0868SMatt Evans /* 278af7b0868SMatt Evans * Read initrd image into guest memory 279af7b0868SMatt Evans */ 280af7b0868SMatt Evans if (fd_initrd >= 0) { 281af7b0868SMatt Evans struct stat initrd_stat; 282af7b0868SMatt Evans unsigned long addr; 283af7b0868SMatt Evans 284af7b0868SMatt Evans if (fstat(fd_initrd, &initrd_stat)) 285af7b0868SMatt Evans die_perror("fstat"); 286af7b0868SMatt Evans 287af7b0868SMatt Evans addr = boot.hdr.initrd_addr_max & ~0xfffff; 288af7b0868SMatt Evans for (;;) { 289af7b0868SMatt Evans if (addr < BZ_KERNEL_START) 290af7b0868SMatt Evans die("Not enough memory for initrd"); 291af7b0868SMatt Evans else if (addr < (kvm->ram_size - initrd_stat.st_size)) 292af7b0868SMatt Evans break; 293af7b0868SMatt Evans addr -= 0x100000; 294af7b0868SMatt Evans } 295af7b0868SMatt Evans 296af7b0868SMatt Evans p = guest_flat_to_host(kvm, addr); 297af7b0868SMatt Evans nr = read(fd_initrd, p, initrd_stat.st_size); 298af7b0868SMatt Evans if (nr != initrd_stat.st_size) 299af7b0868SMatt Evans die("Failed to read initrd"); 300af7b0868SMatt Evans 301af7b0868SMatt Evans kern_boot->hdr.ramdisk_image = addr; 302af7b0868SMatt Evans kern_boot->hdr.ramdisk_size = initrd_stat.st_size; 303af7b0868SMatt Evans } 304af7b0868SMatt Evans 305af7b0868SMatt Evans kvm->boot_selector = BOOT_LOADER_SELECTOR; 306af7b0868SMatt Evans /* 307af7b0868SMatt Evans * The real-mode setup code starts at offset 0x200 of a bzImage. See 308af7b0868SMatt Evans * Documentation/x86/boot.txt for details. 309af7b0868SMatt Evans */ 310af7b0868SMatt Evans kvm->boot_ip = BOOT_LOADER_IP + 0x200; 311af7b0868SMatt Evans kvm->boot_sp = BOOT_LOADER_SP; 312af7b0868SMatt Evans 313af7b0868SMatt Evans return true; 314af7b0868SMatt Evans } 315af7b0868SMatt Evans 316af7b0868SMatt Evans /** 317af7b0868SMatt Evans * kvm__arch_setup_firmware - inject BIOS into guest system memory 318af7b0868SMatt Evans * @kvm - guest system descriptor 319af7b0868SMatt Evans * 320af7b0868SMatt Evans * This function is a main routine where we poke guest memory 321af7b0868SMatt Evans * and install BIOS there. 322af7b0868SMatt Evans */ 323af7b0868SMatt Evans void kvm__arch_setup_firmware(struct kvm *kvm) 324af7b0868SMatt Evans { 325af7b0868SMatt Evans /* standart minimal configuration */ 326af7b0868SMatt Evans setup_bios(kvm); 327af7b0868SMatt Evans 328af7b0868SMatt Evans /* FIXME: SMP, ACPI and friends here */ 329af7b0868SMatt Evans 330af7b0868SMatt Evans /* MP table */ 331af7b0868SMatt Evans mptable_setup(kvm, kvm->nrcpus); 332af7b0868SMatt Evans } 333*0b69bdefSMatt Evans 334*0b69bdefSMatt Evans void kvm__arch_periodic_poll(struct kvm *kvm) 335*0b69bdefSMatt Evans { 336*0b69bdefSMatt Evans serial8250__inject_interrupt(kvm); 337*0b69bdefSMatt Evans virtio_console__inject_interrupt(kvm); 338*0b69bdefSMatt Evans } 339