#include "kvm/builtin-run.h" #include "kvm/builtin-setup.h" #include "kvm/virtio-balloon.h" #include "kvm/virtio-console.h" #include "kvm/parse-options.h" #include "kvm/8250-serial.h" #include "kvm/framebuffer.h" #include "kvm/disk-image.h" #include "kvm/threadpool.h" #include "kvm/virtio-blk.h" #include "kvm/virtio-net.h" #include "kvm/virtio-rng.h" #include "kvm/ioeventfd.h" #include "kvm/virtio-9p.h" #include "kvm/barrier.h" #include "kvm/kvm-cpu.h" #include "kvm/ioport.h" #include "kvm/symbol.h" #include "kvm/i8042.h" #include "kvm/mutex.h" #include "kvm/term.h" #include "kvm/util.h" #include "kvm/strbuf.h" #include "kvm/vesa.h" #include "kvm/irq.h" #include "kvm/kvm.h" #include "kvm/pci.h" #include "kvm/rtc.h" #include "kvm/sdl.h" #include "kvm/vnc.h" #include "kvm/guest_compat.h" #include "kvm/pci-shmem.h" #include "kvm/kvm-ipc.h" #include "kvm/builtin-debug.h" #include #include #include #include #include #include #include #include #include #include #include #define DEFAULT_KVM_DEV "/dev/kvm" #define DEFAULT_CONSOLE "serial" #define DEFAULT_NETWORK "user" #define DEFAULT_HOST_ADDR "192.168.33.1" #define DEFAULT_GUEST_ADDR "192.168.33.15" #define DEFAULT_GUEST_MAC "02:15:15:15:15:15" #define DEFAULT_HOST_MAC "02:01:01:01:01:01" #define DEFAULT_SCRIPT "none" const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh"; #define MB_SHIFT (20) #define KB_SHIFT (10) #define GB_SHIFT (30) #define MIN_RAM_SIZE_MB (64ULL) #define MIN_RAM_SIZE_BYTE (MIN_RAM_SIZE_MB << MB_SHIFT) struct kvm *kvm; struct kvm_cpu **kvm_cpus; __thread struct kvm_cpu *current_kvm_cpu; static u64 ram_size; static u8 image_count; static u8 num_net_devices; static bool virtio_rng; static const char *kernel_cmdline; static const char *kernel_filename; static const char *vmlinux_filename; static const char *initrd_filename; static const char *image_filename[MAX_DISK_IMAGES]; static const char *console; static const char *dev; static const char *network; static const char *host_ip; static const char *guest_ip; static const char *guest_mac; static const char *host_mac; static const char *script; static const char *guest_name; static const char *sandbox; static const char *hugetlbfs_path; static const char *custom_rootfs_name = "default"; static struct virtio_net_params *net_params; static bool single_step; static bool readonly_image[MAX_DISK_IMAGES]; static bool vnc; static bool sdl; static bool balloon; static bool using_rootfs; static bool custom_rootfs; static bool no_net; static bool no_dhcp; extern bool ioport_debug; static int kvm_run_wrapper; extern int active_console; extern int debug_iodelay; bool do_debug_print = false; static int nrcpus; static int vidmode = -1; static const char * const run_usage[] = { "lkvm run [] []", NULL }; enum { KVM_RUN_SANDBOX, }; void kvm_run_set_wrapper_sandbox(void) { kvm_run_wrapper = KVM_RUN_SANDBOX; } static int img_name_parser(const struct option *opt, const char *arg, int unset) { char *sep; struct stat st; char path[PATH_MAX]; if (stat(arg, &st) == 0 && S_ISDIR(st.st_mode)) { char tmp[PATH_MAX]; if (using_rootfs) die("Please use only one rootfs directory atmost"); if (realpath(arg, tmp) == 0 || virtio_9p__register(kvm, tmp, "/dev/root") < 0) die("Unable to initialize virtio 9p"); using_rootfs = 1; return 0; } snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); if (stat(path, &st) == 0 && S_ISDIR(st.st_mode)) { char tmp[PATH_MAX]; if (using_rootfs) die("Please use only one rootfs directory atmost"); if (realpath(path, tmp) == 0 || virtio_9p__register(kvm, tmp, "/dev/root") < 0) die("Unable to initialize virtio 9p"); if (virtio_9p__register(kvm, "/", "hostfs") < 0) die("Unable to initialize virtio 9p"); kvm_setup_resolv(arg); using_rootfs = custom_rootfs = 1; custom_rootfs_name = arg; return 0; } if (image_count >= MAX_DISK_IMAGES) die("Currently only 4 images are supported"); image_filename[image_count] = arg; sep = strstr(arg, ","); if (sep) { if (strcmp(sep + 1, "ro") == 0) readonly_image[image_count] = 1; *sep = 0; } image_count++; return 0; } static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) { char *tag_name; char tmp[PATH_MAX]; /* * 9p dir can be of the form dirname,tag_name or * just dirname. In the later case we use the * default tag name */ tag_name = strstr(arg, ","); if (tag_name) { *tag_name = '\0'; tag_name++; } if (realpath(arg, tmp)) { if (virtio_9p__register(kvm, tmp, tag_name) < 0) die("Unable to initialize virtio 9p"); } else die("Failed resolving 9p path"); return 0; } static int tty_parser(const struct option *opt, const char *arg, int unset) { int tty = atoi(arg); term_set_tty(tty); return 0; } static inline void str_to_mac(const char *str, char *mac) { sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", mac, mac+1, mac+2, mac+3, mac+4, mac+5); } static int set_net_param(struct virtio_net_params *p, const char *param, const char *val) { if (strcmp(param, "guest_mac") == 0) { str_to_mac(val, p->guest_mac); } else if (strcmp(param, "mode") == 0) { if (!strncmp(val, "user", 4)) { int i; for (i = 0; i < num_net_devices; i++) if (net_params[i].mode == NET_MODE_USER) die("Only one usermode network device allowed at a time"); p->mode = NET_MODE_USER; } else if (!strncmp(val, "tap", 3)) { p->mode = NET_MODE_TAP; } else if (!strncmp(val, "none", 4)) { no_net = 1; return -1; } else die("Unkown network mode %s, please use user, tap or none", network); } else if (strcmp(param, "script") == 0) { p->script = strdup(val); } else if (strcmp(param, "guest_ip") == 0) { p->guest_ip = strdup(val); } else if (strcmp(param, "host_ip") == 0) { p->host_ip = strdup(val); } else if (strcmp(param, "vhost") == 0) { p->vhost = atoi(val); } else if (strcmp(param, "fd") == 0) { p->fd = atoi(val); } return 0; } static int netdev_parser(const struct option *opt, const char *arg, int unset) { struct virtio_net_params p; char *buf = NULL, *cmd = NULL, *cur = NULL; bool on_cmd = true; if (arg) { buf = strdup(arg); if (buf == NULL) die("Failed allocating new net buffer"); cur = strtok(buf, ",="); } p = (struct virtio_net_params) { .guest_ip = DEFAULT_GUEST_ADDR, .host_ip = DEFAULT_HOST_ADDR, .script = DEFAULT_SCRIPT, .mode = NET_MODE_TAP, }; str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); p.guest_mac[5] += num_net_devices; while (cur) { if (on_cmd) { cmd = cur; } else { if (set_net_param(&p, cmd, cur) < 0) goto done; } on_cmd = !on_cmd; cur = strtok(NULL, ",="); }; num_net_devices++; net_params = realloc(net_params, num_net_devices * sizeof(*net_params)); if (net_params == NULL) die("Failed adding new network device"); net_params[num_net_devices - 1] = p; done: free(buf); return 0; } static int shmem_parser(const struct option *opt, const char *arg, int unset) { const u64 default_size = SHMEM_DEFAULT_SIZE; const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; const char *default_handle = SHMEM_DEFAULT_HANDLE; struct shmem_info *si = malloc(sizeof(struct shmem_info)); u64 phys_addr; u64 size; char *handle = NULL; int create = 0; const char *p = arg; char *next; int base = 10; int verbose = 0; const int skip_pci = strlen("pci:"); if (verbose) pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); /* parse out optional addr family */ if (strcasestr(p, "pci:")) { p += skip_pci; } else if (strcasestr(p, "mem:")) { die("I can't add to E820 map yet.\n"); } /* parse out physical addr */ base = 10; if (strcasestr(p, "0x")) base = 16; phys_addr = strtoll(p, &next, base); if (next == p && phys_addr == 0) { pr_info("shmem: no physical addr specified, using default."); phys_addr = default_phys_addr; } if (*next != ':' && *next != '\0') die("shmem: unexpected chars after phys addr.\n"); if (*next == '\0') p = next; else p = next + 1; /* parse out size */ base = 10; if (strcasestr(p, "0x")) base = 16; size = strtoll(p, &next, base); if (next == p && size == 0) { pr_info("shmem: no size specified, using default."); size = default_size; } /* look for [KMGkmg][Bb]* uses base 2. */ int skip_B = 0; if (strspn(next, "KMGkmg")) { /* might have a prefix */ if (*(next + 1) == 'B' || *(next + 1) == 'b') skip_B = 1; switch (*next) { case 'K': case 'k': size = size << KB_SHIFT; break; case 'M': case 'm': size = size << MB_SHIFT; break; case 'G': case 'g': size = size << GB_SHIFT; break; default: die("shmem: bug in detecting size prefix."); break; } next += 1 + skip_B; } if (*next != ':' && *next != '\0') { die("shmem: unexpected chars after phys size. <%c><%c>\n", *next, *p); } if (*next == '\0') p = next; else p = next + 1; /* parse out optional shmem handle */ const int skip_handle = strlen("handle="); next = strcasestr(p, "handle="); if (*p && next) { if (p != next) die("unexpected chars before handle\n"); p += skip_handle; next = strchrnul(p, ':'); if (next - p) { handle = malloc(next - p + 1); strncpy(handle, p, next - p); handle[next - p] = '\0'; /* just in case. */ } if (*next == '\0') p = next; else p = next + 1; } /* parse optional create flag to see if we should create shm seg. */ if (*p && strcasestr(p, "create")) { create = 1; p += strlen("create"); } if (*p != '\0') die("shmem: unexpected trailing chars\n"); if (handle == NULL) { handle = malloc(strlen(default_handle) + 1); strcpy(handle, default_handle); } if (verbose) { pr_info("shmem: phys_addr = %llx", phys_addr); pr_info("shmem: size = %llx", size); pr_info("shmem: handle = %s", handle); pr_info("shmem: create = %d", create); } si->phys_addr = phys_addr; si->size = size; si->handle = handle; si->create = create; pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ return 0; } static const struct option options[] = { OPT_GROUP("Basic options:"), OPT_STRING('\0', "name", &guest_name, "guest name", "A name for the guest"), OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"), OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."), OPT_CALLBACK('\0', "shmem", NULL, "[pci:]:[:handle=][:create]", "Share host shmem with guest via pci device", shmem_parser), OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser), OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"), OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"), OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"), OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"), OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser), OPT_STRING('\0', "console", &console, "serial, virtio or hv", "Console to use"), OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"), OPT_CALLBACK('\0', "tty", NULL, "tty id", "Remap guest TTY into a pty on the host", tty_parser), OPT_STRING('\0', "sandbox", &sandbox, "script", "Run this script when booting into custom rootfs"), OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"), OPT_GROUP("Kernel options:"), OPT_STRING('k', "kernel", &kernel_filename, "kernel", "Kernel to boot in virtual machine"), OPT_STRING('i', "initrd", &initrd_filename, "initrd", "Initial RAM disk image"), OPT_STRING('p', "params", &kernel_cmdline, "params", "Kernel command line arguments"), OPT_GROUP("Networking options:"), OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", "Create a new guest NIC", netdev_parser, NULL), OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"), OPT_GROUP("BIOS options:"), OPT_INTEGER('\0', "vidmode", &vidmode, "Video mode"), OPT_GROUP("Debug options:"), OPT_BOOLEAN('\0', "debug", &do_debug_print, "Enable debug messages"), OPT_BOOLEAN('\0', "debug-single-step", &single_step, "Enable single stepping"), OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug, "Enable ioport debugging"), OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay, "Delay IO by millisecond"), OPT_END() }; /* * Serialize debug printout so that the output of multiple vcpus does not * get mixed up: */ static int printout_done; static void handle_sigusr1(int sig) { struct kvm_cpu *cpu = current_kvm_cpu; int fd = kvm_cpu__get_debug_fd(); if (!cpu || cpu->needs_nmi) return; dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); kvm_cpu__show_registers(cpu); kvm_cpu__show_code(cpu); kvm_cpu__show_page_tables(cpu); fflush(stdout); printout_done = 1; mb(); } /* Pause/resume the guest using SIGUSR2 */ static int is_paused; static void handle_pause(int fd, u32 type, u32 len, u8 *msg) { if (WARN_ON(len)) return; if (type == KVM_IPC_RESUME && is_paused) { kvm->vm_state = KVM_VMSTATE_RUNNING; kvm__continue(); } else if (type == KVM_IPC_PAUSE && !is_paused) { kvm->vm_state = KVM_VMSTATE_PAUSED; kvm__pause(); } else { return; } is_paused = !is_paused; } static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) { int r = 0; if (type == KVM_IPC_VMSTATE) r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); if (r < 0) pr_warning("Failed sending VMSTATE"); } static void handle_debug(int fd, u32 type, u32 len, u8 *msg) { int i; struct debug_cmd_params *params; u32 dbg_type; u32 vcpu; if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) return; params = (void *)msg; dbg_type = params->dbg_type; vcpu = params->cpu; if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { if ((int)vcpu >= kvm->nrcpus) return; kvm_cpus[vcpu]->needs_nmi = 1; pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1); } if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) return; for (i = 0; i < nrcpus; i++) { struct kvm_cpu *cpu = kvm_cpus[i]; if (!cpu) continue; printout_done = 0; kvm_cpu__set_debug_fd(fd); pthread_kill(cpu->thread, SIGUSR1); /* * Wait for the vCPU to dump state before signalling * the next thread. Since this is debug code it does * not matter that we are burning CPU time a bit: */ while (!printout_done) mb(); } close(fd); serial8250__inject_sysrq(kvm); } static void handle_sigalrm(int sig) { kvm__arch_periodic_poll(kvm); } static void handle_stop(int fd, u32 type, u32 len, u8 *msg) { if (WARN_ON(type != KVM_IPC_STOP || len)) return; kvm_cpu__reboot(); } static void *kvm_cpu_thread(void *arg) { current_kvm_cpu = arg; if (kvm_cpu__start(current_kvm_cpu)) goto panic_kvm; kvm_cpu__delete(current_kvm_cpu); return (void *) (intptr_t) 0; panic_kvm: fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", current_kvm_cpu->kvm_run->exit_reason, kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) fprintf(stderr, "KVM exit code: 0x%Lu\n", current_kvm_cpu->kvm_run->hw.hardware_exit_reason); kvm_cpu__set_debug_fd(STDOUT_FILENO); kvm_cpu__show_registers(current_kvm_cpu); kvm_cpu__show_code(current_kvm_cpu); kvm_cpu__show_page_tables(current_kvm_cpu); kvm_cpu__delete(current_kvm_cpu); return (void *) (intptr_t) 1; } static char kernel[PATH_MAX]; static const char *host_kernels[] = { "/boot/vmlinuz", "/boot/bzImage", NULL }; static const char *default_kernels[] = { "./bzImage", "../../arch/" BUILD_ARCH "/boot/bzImage", NULL }; static const char *default_vmlinux[] = { "../../../vmlinux", "../../vmlinux", NULL }; static void kernel_usage_with_options(void) { const char **k; struct utsname uts; fprintf(stderr, "Fatal: could not find default kernel image in:\n"); k = &default_kernels[0]; while (*k) { fprintf(stderr, "\t%s\n", *k); k++; } if (uname(&uts) < 0) return; k = &host_kernels[0]; while (*k) { if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) return; fprintf(stderr, "\t%s\n", kernel); k++; } fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", KVM_BINARY_NAME); } static u64 host_ram_size(void) { long page_size; long nr_pages; nr_pages = sysconf(_SC_PHYS_PAGES); if (nr_pages < 0) { pr_warning("sysconf(_SC_PHYS_PAGES) failed"); return 0; } page_size = sysconf(_SC_PAGE_SIZE); if (page_size < 0) { pr_warning("sysconf(_SC_PAGE_SIZE) failed"); return 0; } return (nr_pages * page_size) >> MB_SHIFT; } /* * If user didn't specify how much memory it wants to allocate for the guest, * avoid filling the whole host RAM. */ #define RAM_SIZE_RATIO 0.8 static u64 get_ram_size(int nr_cpus) { u64 available; u64 ram_size; ram_size = 64 * (nr_cpus + 3); available = host_ram_size() * RAM_SIZE_RATIO; if (!available) available = MIN_RAM_SIZE_MB; if (ram_size > available) ram_size = available; return ram_size; } static const char *find_kernel(void) { const char **k; struct stat st; struct utsname uts; k = &default_kernels[0]; while (*k) { if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { k++; continue; } strncpy(kernel, *k, PATH_MAX); return kernel; } if (uname(&uts) < 0) return NULL; k = &host_kernels[0]; while (*k) { if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) return NULL; if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { k++; continue; } return kernel; } return NULL; } static const char *find_vmlinux(void) { const char **vmlinux; vmlinux = &default_vmlinux[0]; while (*vmlinux) { struct stat st; if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { vmlinux++; continue; } return *vmlinux; } return NULL; } void kvm_run_help(void) { usage_with_options(run_usage, options); } static int kvm_custom_stage2(void) { char tmp[PATH_MAX], dst[PATH_MAX], *src; const char *rootfs = custom_rootfs_name; int r; src = realpath("guest/init_stage2", NULL); if (src == NULL) return -ENOMEM; snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs); remove(tmp); snprintf(dst, PATH_MAX, "/host/%s", src); r = symlink(dst, tmp); free(src); return r; } static int kvm_run_set_sandbox(void) { const char *guestfs_name = custom_rootfs_name; char path[PATH_MAX], script[PATH_MAX], *tmp; snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); remove(path); if (sandbox == NULL) return 0; tmp = realpath(sandbox, NULL); if (tmp == NULL) return -ENOMEM; snprintf(script, PATH_MAX, "/host/%s", tmp); free(tmp); return symlink(script, path); } static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) { const char *single_quote; if (!*arg) { /* zero length string */ if (write(fd, "''", 2) <= 0) die("Failed writing sandbox script"); return; } while (*arg) { single_quote = strchrnul(arg, '\''); /* write non-single-quote string as #('string') */ if (arg != single_quote) { if (write(fd, "'", 1) <= 0 || write(fd, arg, single_quote - arg) <= 0 || write(fd, "'", 1) <= 0) die("Failed writing sandbox script"); } /* write single quote as #("'") */ if (*single_quote) { if (write(fd, "\"'\"", 3) <= 0) die("Failed writing sandbox script"); } else break; arg = single_quote + 1; } } static void kvm_run_write_sandbox_cmd(const char **argv, int argc) { const char script_hdr[] = "#! /bin/bash\n\n"; int fd; remove(sandbox); fd = open(sandbox, O_RDWR | O_CREAT, 0777); if (fd < 0) die("Failed creating sandbox script"); if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) die("Failed writing sandbox script"); while (argc) { kvm_write_sandbox_cmd_exactly(fd, argv[0]); if (argc - 1) if (write(fd, " ", 1) <= 0) die("Failed writing sandbox script"); argv++; argc--; } if (write(fd, "\n", 1) <= 0) die("Failed writing sandbox script"); close(fd); } int kvm_cmd_run(int argc, const char **argv, const char *prefix) { static char real_cmdline[2048], default_name[20]; struct framebuffer *fb = NULL; unsigned int nr_online_cpus; int exit_code = 0; int max_cpus, recommended_cpus; int i; void *ret; signal(SIGALRM, handle_sigalrm); kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); signal(SIGUSR1, handle_sigusr1); kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); while (argc != 0) { argc = parse_options(argc, argv, options, run_usage, PARSE_OPT_STOP_AT_NON_OPTION | PARSE_OPT_KEEP_DASHDASH); if (argc != 0) { /* Cusrom options, should have been handled elsewhere */ if (strcmp(argv[0], "--") == 0) { if (kvm_run_wrapper == KVM_RUN_SANDBOX) { sandbox = DEFAULT_SANDBOX_FILENAME; kvm_run_write_sandbox_cmd(argv+1, argc-1); break; } } if (kernel_filename) { fprintf(stderr, "Cannot handle parameter: " "%s\n", argv[0]); usage_with_options(run_usage, options); return EINVAL; } /* first unhandled parameter is treated as a kernel image */ kernel_filename = argv[0]; argv++; argc--; } } if (!kernel_filename) kernel_filename = find_kernel(); if (!kernel_filename) { kernel_usage_with_options(); return EINVAL; } vmlinux_filename = find_vmlinux(); if (nrcpus == 0) nrcpus = nr_online_cpus; if (!ram_size) ram_size = get_ram_size(nrcpus); if (ram_size < MIN_RAM_SIZE_MB) die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB); if (ram_size > host_ram_size()) pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size()); ram_size <<= MB_SHIFT; if (!dev) dev = DEFAULT_KVM_DEV; if (!console) console = DEFAULT_CONSOLE; if (!strncmp(console, "virtio", 6)) active_console = CONSOLE_VIRTIO; else if (!strncmp(console, "serial", 6)) active_console = CONSOLE_8250; else if (!strncmp(console, "hv", 2)) active_console = CONSOLE_HV; else pr_warning("No console!"); if (!host_ip) host_ip = DEFAULT_HOST_ADDR; if (!guest_ip) guest_ip = DEFAULT_GUEST_ADDR; if (!guest_mac) guest_mac = DEFAULT_GUEST_MAC; if (!host_mac) host_mac = DEFAULT_HOST_MAC; if (!script) script = DEFAULT_SCRIPT; symbol__init(vmlinux_filename); term_init(); if (!guest_name) { sprintf(default_name, "guest-%u", getpid()); guest_name = default_name; } kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name); kvm->single_step = single_step; ioeventfd__init(kvm); max_cpus = kvm__max_cpus(kvm); recommended_cpus = kvm__recommended_cpus(kvm); if (nrcpus > max_cpus) { printf(" # Limit the number of CPUs to %d\n", max_cpus); nrcpus = max_cpus; } else if (nrcpus > recommended_cpus) { printf(" # Warning: The maximum recommended amount of VCPUs" " is %d\n", recommended_cpus); } kvm->nrcpus = nrcpus; /* Alloc one pointer too many, so array ends up 0-terminated */ kvm_cpus = calloc(nrcpus + 1, sizeof(void *)); if (!kvm_cpus) die("Couldn't allocate array for %d CPUs", nrcpus); irq__init(kvm); pci__init(); /* * vidmode should be either specified * either set by default */ if (vnc || sdl) { if (vidmode == -1) vidmode = 0x312; } else vidmode = 0; memset(real_cmdline, 0, sizeof(real_cmdline)); kvm__arch_set_cmdline(real_cmdline, vnc || sdl); if (strlen(real_cmdline) > 0) strcat(real_cmdline, " "); if (kernel_cmdline) strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline)); if (!using_rootfs && !image_filename[0] && !initrd_filename) { char tmp[PATH_MAX]; kvm_setup_create_new(custom_rootfs_name); kvm_setup_resolv(custom_rootfs_name); snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) die("Unable to initialize virtio 9p"); if (virtio_9p__register(kvm, "/", "hostfs") < 0) die("Unable to initialize virtio 9p"); using_rootfs = custom_rootfs = 1; } if (using_rootfs) { strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); if (custom_rootfs) { kvm_run_set_sandbox(); strcat(real_cmdline, " init=/virt/init"); if (!no_dhcp) strcat(real_cmdline, " ip=dhcp"); if (kvm_custom_stage2()) die("Failed linking stage 2 of init."); } } else if (!strstr(real_cmdline, "root=")) { strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); } if (image_count) { kvm->nr_disks = image_count; kvm->disks = disk_image__open_all(image_filename, readonly_image, image_count); if (!kvm->disks) die("Unable to load all disk images."); virtio_blk__init_all(kvm); } printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name); if (!kvm__load_kernel(kvm, kernel_filename, initrd_filename, real_cmdline, vidmode)) die("unable to load kernel %s", kernel_filename); kvm->vmlinux = vmlinux_filename; ioport__setup_arch(); rtc__init(); serial8250__init(kvm); if (active_console == CONSOLE_VIRTIO) virtio_console__init(kvm); if (virtio_rng) virtio_rng__init(kvm); if (balloon) virtio_bln__init(kvm); if (!network) network = DEFAULT_NETWORK; virtio_9p__init(kvm); for (i = 0; i < num_net_devices; i++) { net_params[i].kvm = kvm; virtio_net__init(&net_params[i]); } if (num_net_devices == 0 && no_net == 0) { struct virtio_net_params net_params; net_params = (struct virtio_net_params) { .guest_ip = guest_ip, .host_ip = host_ip, .kvm = kvm, .script = script, .mode = NET_MODE_USER, }; str_to_mac(guest_mac, net_params.guest_mac); str_to_mac(host_mac, net_params.host_mac); virtio_net__init(&net_params); } kvm__init_ram(kvm); #ifdef CONFIG_X86 kbd__init(kvm); #endif pci_shmem__init(kvm); if (vnc || sdl) fb = vesa__init(kvm); if (vnc) { if (fb) vnc__init(fb); } if (sdl) { if (fb) sdl__init(fb); } fb__start(); /* Device init all done; firmware init must * come after this (it may set up device trees etc.) */ kvm__start_timer(kvm); exit_code = kvm__arch_setup_firmware(kvm); if (exit_code) goto err; for (i = 0; i < nrcpus; i++) { kvm_cpus[i] = kvm_cpu__init(kvm, i); if (!kvm_cpus[i]) die("unable to initialize KVM VCPU"); } thread_pool__init(nr_online_cpus); ioeventfd__start(); for (i = 0; i < nrcpus; i++) { if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0) die("unable to create KVM VCPU thread"); } /* Only VCPU #0 is going to exit by itself when shutting down */ if (pthread_join(kvm_cpus[0]->thread, &ret) != 0) exit_code = 1; for (i = 1; i < nrcpus; i++) { if (kvm_cpus[i]->is_running) { pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); if (pthread_join(kvm_cpus[i]->thread, &ret) != 0) die("pthread_join"); } if (ret != NULL) exit_code = 1; } err: compat__print_all_messages(); fb__stop(); virtio_blk__delete_all(kvm); virtio_rng__delete_all(kvm); disk_image__close_all(kvm->disks, image_count); kvm__delete(kvm); if (!exit_code) printf("\n # KVM session ended normally.\n"); return exit_code; }