1 #include "kvm/builtin-run.h" 2 3 #include "kvm/builtin-setup.h" 4 #include "kvm/virtio-balloon.h" 5 #include "kvm/virtio-console.h" 6 #include "kvm/parse-options.h" 7 #include "kvm/8250-serial.h" 8 #include "kvm/framebuffer.h" 9 #include "kvm/disk-image.h" 10 #include "kvm/threadpool.h" 11 #include "kvm/virtio-blk.h" 12 #include "kvm/virtio-net.h" 13 #include "kvm/virtio-rng.h" 14 #include "kvm/ioeventfd.h" 15 #include "kvm/virtio-9p.h" 16 #include "kvm/barrier.h" 17 #include "kvm/kvm-cpu.h" 18 #include "kvm/ioport.h" 19 #include "kvm/symbol.h" 20 #include "kvm/i8042.h" 21 #include "kvm/mutex.h" 22 #include "kvm/term.h" 23 #include "kvm/util.h" 24 #include "kvm/strbuf.h" 25 #include "kvm/vesa.h" 26 #include "kvm/irq.h" 27 #include "kvm/kvm.h" 28 #include "kvm/pci.h" 29 #include "kvm/rtc.h" 30 #include "kvm/sdl.h" 31 #include "kvm/vnc.h" 32 #include "kvm/guest_compat.h" 33 #include "kvm/pci-shmem.h" 34 #include "kvm/kvm-ipc.h" 35 #include "kvm/builtin-debug.h" 36 37 #include <linux/types.h> 38 #include <linux/err.h> 39 40 #include <sys/utsname.h> 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <termios.h> 44 #include <signal.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <ctype.h> 49 #include <stdio.h> 50 51 #define DEFAULT_KVM_DEV "/dev/kvm" 52 #define DEFAULT_CONSOLE "serial" 53 #define DEFAULT_NETWORK "user" 54 #define DEFAULT_HOST_ADDR "192.168.33.1" 55 #define DEFAULT_GUEST_ADDR "192.168.33.15" 56 #define DEFAULT_GUEST_MAC "02:15:15:15:15:15" 57 #define DEFAULT_HOST_MAC "02:01:01:01:01:01" 58 #define DEFAULT_SCRIPT "none" 59 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh"; 60 61 #define MB_SHIFT (20) 62 #define KB_SHIFT (10) 63 #define GB_SHIFT (30) 64 #define MIN_RAM_SIZE_MB (64ULL) 65 #define MIN_RAM_SIZE_BYTE (MIN_RAM_SIZE_MB << MB_SHIFT) 66 67 struct kvm *kvm; 68 struct kvm_cpu **kvm_cpus; 69 __thread struct kvm_cpu *current_kvm_cpu; 70 71 static struct disk_image_params disk_image[MAX_DISK_IMAGES]; 72 static u64 ram_size; 73 static u8 image_count; 74 static u8 num_net_devices; 75 static bool virtio_rng; 76 static const char *kernel_cmdline; 77 static const char *kernel_filename; 78 static const char *vmlinux_filename; 79 static const char *initrd_filename; 80 static const char *firmware_filename; 81 static const char *console; 82 static const char *dev; 83 static const char *network; 84 static const char *host_ip; 85 static const char *guest_ip; 86 static const char *guest_mac; 87 static const char *host_mac; 88 static const char *script; 89 static const char *guest_name; 90 static const char *sandbox; 91 static const char *hugetlbfs_path; 92 static const char *custom_rootfs_name = "default"; 93 static struct virtio_net_params *net_params; 94 static bool single_step; 95 static bool vnc; 96 static bool sdl; 97 static bool balloon; 98 static bool using_rootfs; 99 static bool custom_rootfs; 100 static bool no_net; 101 static bool no_dhcp; 102 extern bool ioport_debug; 103 extern bool mmio_debug; 104 static int kvm_run_wrapper; 105 extern int active_console; 106 extern int debug_iodelay; 107 108 bool do_debug_print = false; 109 110 static int nrcpus; 111 static int vidmode = -1; 112 113 static const char * const run_usage[] = { 114 "lkvm run [<options>] [<kernel image>]", 115 NULL 116 }; 117 118 enum { 119 KVM_RUN_DEFAULT, 120 KVM_RUN_SANDBOX, 121 }; 122 123 void kvm_run_set_wrapper_sandbox(void) 124 { 125 kvm_run_wrapper = KVM_RUN_SANDBOX; 126 } 127 128 static int img_name_parser(const struct option *opt, const char *arg, int unset) 129 { 130 char *sep; 131 struct stat st; 132 char path[PATH_MAX]; 133 134 if (stat(arg, &st) == 0 && 135 S_ISDIR(st.st_mode)) { 136 char tmp[PATH_MAX]; 137 138 if (using_rootfs) 139 die("Please use only one rootfs directory atmost"); 140 141 if (realpath(arg, tmp) == 0 || 142 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 143 die("Unable to initialize virtio 9p"); 144 using_rootfs = 1; 145 return 0; 146 } 147 148 snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); 149 150 if (stat(path, &st) == 0 && 151 S_ISDIR(st.st_mode)) { 152 char tmp[PATH_MAX]; 153 154 if (using_rootfs) 155 die("Please use only one rootfs directory atmost"); 156 157 if (realpath(path, tmp) == 0 || 158 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 159 die("Unable to initialize virtio 9p"); 160 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 161 die("Unable to initialize virtio 9p"); 162 kvm_setup_resolv(arg); 163 using_rootfs = custom_rootfs = 1; 164 custom_rootfs_name = arg; 165 return 0; 166 } 167 168 if (image_count >= MAX_DISK_IMAGES) 169 die("Currently only 4 images are supported"); 170 171 disk_image[image_count].filename = arg; 172 sep = strstr(arg, ","); 173 if (sep) { 174 if (strcmp(sep + 1, "ro") == 0) 175 disk_image[image_count].readonly = true; 176 *sep = 0; 177 } 178 179 image_count++; 180 181 return 0; 182 } 183 184 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) 185 { 186 char *tag_name; 187 char tmp[PATH_MAX]; 188 189 /* 190 * 9p dir can be of the form dirname,tag_name or 191 * just dirname. In the later case we use the 192 * default tag name 193 */ 194 tag_name = strstr(arg, ","); 195 if (tag_name) { 196 *tag_name = '\0'; 197 tag_name++; 198 } 199 if (realpath(arg, tmp)) { 200 if (virtio_9p__register(kvm, tmp, tag_name) < 0) 201 die("Unable to initialize virtio 9p"); 202 } else 203 die("Failed resolving 9p path"); 204 return 0; 205 } 206 207 static int tty_parser(const struct option *opt, const char *arg, int unset) 208 { 209 int tty = atoi(arg); 210 211 term_set_tty(tty); 212 213 return 0; 214 } 215 216 static inline void str_to_mac(const char *str, char *mac) 217 { 218 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 219 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 220 } 221 static int set_net_param(struct virtio_net_params *p, const char *param, 222 const char *val) 223 { 224 if (strcmp(param, "guest_mac") == 0) { 225 str_to_mac(val, p->guest_mac); 226 } else if (strcmp(param, "mode") == 0) { 227 if (!strncmp(val, "user", 4)) { 228 int i; 229 230 for (i = 0; i < num_net_devices; i++) 231 if (net_params[i].mode == NET_MODE_USER) 232 die("Only one usermode network device allowed at a time"); 233 p->mode = NET_MODE_USER; 234 } else if (!strncmp(val, "tap", 3)) { 235 p->mode = NET_MODE_TAP; 236 } else if (!strncmp(val, "none", 4)) { 237 no_net = 1; 238 return -1; 239 } else 240 die("Unkown network mode %s, please use user, tap or none", network); 241 } else if (strcmp(param, "script") == 0) { 242 p->script = strdup(val); 243 } else if (strcmp(param, "guest_ip") == 0) { 244 p->guest_ip = strdup(val); 245 } else if (strcmp(param, "host_ip") == 0) { 246 p->host_ip = strdup(val); 247 } else if (strcmp(param, "trans") == 0) { 248 p->trans = strdup(val); 249 } else if (strcmp(param, "vhost") == 0) { 250 p->vhost = atoi(val); 251 } else if (strcmp(param, "fd") == 0) { 252 p->fd = atoi(val); 253 } 254 255 return 0; 256 } 257 258 static int netdev_parser(const struct option *opt, const char *arg, int unset) 259 { 260 struct virtio_net_params p; 261 char *buf = NULL, *cmd = NULL, *cur = NULL; 262 bool on_cmd = true; 263 264 if (arg) { 265 buf = strdup(arg); 266 if (buf == NULL) 267 die("Failed allocating new net buffer"); 268 cur = strtok(buf, ",="); 269 } 270 271 p = (struct virtio_net_params) { 272 .guest_ip = DEFAULT_GUEST_ADDR, 273 .host_ip = DEFAULT_HOST_ADDR, 274 .script = DEFAULT_SCRIPT, 275 .mode = NET_MODE_TAP, 276 }; 277 278 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 279 p.guest_mac[5] += num_net_devices; 280 281 while (cur) { 282 if (on_cmd) { 283 cmd = cur; 284 } else { 285 if (set_net_param(&p, cmd, cur) < 0) 286 goto done; 287 } 288 on_cmd = !on_cmd; 289 290 cur = strtok(NULL, ",="); 291 }; 292 293 num_net_devices++; 294 295 net_params = realloc(net_params, num_net_devices * sizeof(*net_params)); 296 if (net_params == NULL) 297 die("Failed adding new network device"); 298 299 net_params[num_net_devices - 1] = p; 300 301 done: 302 free(buf); 303 return 0; 304 } 305 306 static int shmem_parser(const struct option *opt, const char *arg, int unset) 307 { 308 const u64 default_size = SHMEM_DEFAULT_SIZE; 309 const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; 310 const char *default_handle = SHMEM_DEFAULT_HANDLE; 311 struct shmem_info *si = malloc(sizeof(struct shmem_info)); 312 u64 phys_addr; 313 u64 size; 314 char *handle = NULL; 315 int create = 0; 316 const char *p = arg; 317 char *next; 318 int base = 10; 319 int verbose = 0; 320 321 const int skip_pci = strlen("pci:"); 322 if (verbose) 323 pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); 324 /* parse out optional addr family */ 325 if (strcasestr(p, "pci:")) { 326 p += skip_pci; 327 } else if (strcasestr(p, "mem:")) { 328 die("I can't add to E820 map yet.\n"); 329 } 330 /* parse out physical addr */ 331 base = 10; 332 if (strcasestr(p, "0x")) 333 base = 16; 334 phys_addr = strtoll(p, &next, base); 335 if (next == p && phys_addr == 0) { 336 pr_info("shmem: no physical addr specified, using default."); 337 phys_addr = default_phys_addr; 338 } 339 if (*next != ':' && *next != '\0') 340 die("shmem: unexpected chars after phys addr.\n"); 341 if (*next == '\0') 342 p = next; 343 else 344 p = next + 1; 345 /* parse out size */ 346 base = 10; 347 if (strcasestr(p, "0x")) 348 base = 16; 349 size = strtoll(p, &next, base); 350 if (next == p && size == 0) { 351 pr_info("shmem: no size specified, using default."); 352 size = default_size; 353 } 354 /* look for [KMGkmg][Bb]* uses base 2. */ 355 int skip_B = 0; 356 if (strspn(next, "KMGkmg")) { /* might have a prefix */ 357 if (*(next + 1) == 'B' || *(next + 1) == 'b') 358 skip_B = 1; 359 switch (*next) { 360 case 'K': 361 case 'k': 362 size = size << KB_SHIFT; 363 break; 364 case 'M': 365 case 'm': 366 size = size << MB_SHIFT; 367 break; 368 case 'G': 369 case 'g': 370 size = size << GB_SHIFT; 371 break; 372 default: 373 die("shmem: bug in detecting size prefix."); 374 break; 375 } 376 next += 1 + skip_B; 377 } 378 if (*next != ':' && *next != '\0') { 379 die("shmem: unexpected chars after phys size. <%c><%c>\n", 380 *next, *p); 381 } 382 if (*next == '\0') 383 p = next; 384 else 385 p = next + 1; 386 /* parse out optional shmem handle */ 387 const int skip_handle = strlen("handle="); 388 next = strcasestr(p, "handle="); 389 if (*p && next) { 390 if (p != next) 391 die("unexpected chars before handle\n"); 392 p += skip_handle; 393 next = strchrnul(p, ':'); 394 if (next - p) { 395 handle = malloc(next - p + 1); 396 strncpy(handle, p, next - p); 397 handle[next - p] = '\0'; /* just in case. */ 398 } 399 if (*next == '\0') 400 p = next; 401 else 402 p = next + 1; 403 } 404 /* parse optional create flag to see if we should create shm seg. */ 405 if (*p && strcasestr(p, "create")) { 406 create = 1; 407 p += strlen("create"); 408 } 409 if (*p != '\0') 410 die("shmem: unexpected trailing chars\n"); 411 if (handle == NULL) { 412 handle = malloc(strlen(default_handle) + 1); 413 strcpy(handle, default_handle); 414 } 415 if (verbose) { 416 pr_info("shmem: phys_addr = %llx", phys_addr); 417 pr_info("shmem: size = %llx", size); 418 pr_info("shmem: handle = %s", handle); 419 pr_info("shmem: create = %d", create); 420 } 421 422 si->phys_addr = phys_addr; 423 si->size = size; 424 si->handle = handle; 425 si->create = create; 426 pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ 427 return 0; 428 } 429 430 static const struct option options[] = { 431 OPT_GROUP("Basic options:"), 432 OPT_STRING('\0', "name", &guest_name, "guest name", 433 "A name for the guest"), 434 OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"), 435 OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."), 436 OPT_CALLBACK('\0', "shmem", NULL, 437 "[pci:]<addr>:<size>[:handle=<handle>][:create]", 438 "Share host shmem with guest via pci device", 439 shmem_parser), 440 OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser), 441 OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"), 442 OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"), 443 OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"), 444 OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"), 445 OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", 446 "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser), 447 OPT_STRING('\0', "console", &console, "serial, virtio or hv", 448 "Console to use"), 449 OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"), 450 OPT_CALLBACK('\0', "tty", NULL, "tty id", 451 "Remap guest TTY into a pty on the host", 452 tty_parser), 453 OPT_STRING('\0', "sandbox", &sandbox, "script", 454 "Run this script when booting into custom rootfs"), 455 OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"), 456 457 OPT_GROUP("Kernel options:"), 458 OPT_STRING('k', "kernel", &kernel_filename, "kernel", 459 "Kernel to boot in virtual machine"), 460 OPT_STRING('i', "initrd", &initrd_filename, "initrd", 461 "Initial RAM disk image"), 462 OPT_STRING('p', "params", &kernel_cmdline, "params", 463 "Kernel command line arguments"), 464 OPT_STRING('f', "firmware", &firmware_filename, "firmware", 465 "Firmware image to boot in virtual machine"), 466 467 OPT_GROUP("Networking options:"), 468 OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", 469 "Create a new guest NIC", 470 netdev_parser, NULL), 471 OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"), 472 473 OPT_GROUP("BIOS options:"), 474 OPT_INTEGER('\0', "vidmode", &vidmode, 475 "Video mode"), 476 477 OPT_GROUP("Debug options:"), 478 OPT_BOOLEAN('\0', "debug", &do_debug_print, 479 "Enable debug messages"), 480 OPT_BOOLEAN('\0', "debug-single-step", &single_step, 481 "Enable single stepping"), 482 OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug, 483 "Enable ioport debugging"), 484 OPT_BOOLEAN('\0', "debug-mmio", &mmio_debug, 485 "Enable MMIO debugging"), 486 OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay, 487 "Delay IO by millisecond"), 488 OPT_END() 489 }; 490 491 /* 492 * Serialize debug printout so that the output of multiple vcpus does not 493 * get mixed up: 494 */ 495 static int printout_done; 496 497 static void handle_sigusr1(int sig) 498 { 499 struct kvm_cpu *cpu = current_kvm_cpu; 500 int fd = kvm_cpu__get_debug_fd(); 501 502 if (!cpu || cpu->needs_nmi) 503 return; 504 505 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 506 kvm_cpu__show_registers(cpu); 507 kvm_cpu__show_code(cpu); 508 kvm_cpu__show_page_tables(cpu); 509 fflush(stdout); 510 printout_done = 1; 511 mb(); 512 } 513 514 /* Pause/resume the guest using SIGUSR2 */ 515 static int is_paused; 516 517 static void handle_pause(int fd, u32 type, u32 len, u8 *msg) 518 { 519 if (WARN_ON(len)) 520 return; 521 522 if (type == KVM_IPC_RESUME && is_paused) { 523 kvm->vm_state = KVM_VMSTATE_RUNNING; 524 kvm__continue(); 525 } else if (type == KVM_IPC_PAUSE && !is_paused) { 526 kvm->vm_state = KVM_VMSTATE_PAUSED; 527 kvm__pause(); 528 } else { 529 return; 530 } 531 532 is_paused = !is_paused; 533 } 534 535 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) 536 { 537 int r = 0; 538 539 if (type == KVM_IPC_VMSTATE) 540 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 541 542 if (r < 0) 543 pr_warning("Failed sending VMSTATE"); 544 } 545 546 static void handle_debug(int fd, u32 type, u32 len, u8 *msg) 547 { 548 int i; 549 struct debug_cmd_params *params; 550 u32 dbg_type; 551 u32 vcpu; 552 553 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 554 return; 555 556 params = (void *)msg; 557 dbg_type = params->dbg_type; 558 vcpu = params->cpu; 559 560 if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ) 561 serial8250__inject_sysrq(kvm, params->sysrq); 562 563 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 564 if ((int)vcpu >= kvm->nrcpus) 565 return; 566 567 kvm_cpus[vcpu]->needs_nmi = 1; 568 pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1); 569 } 570 571 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 572 return; 573 574 for (i = 0; i < nrcpus; i++) { 575 struct kvm_cpu *cpu = kvm_cpus[i]; 576 577 if (!cpu) 578 continue; 579 580 printout_done = 0; 581 582 kvm_cpu__set_debug_fd(fd); 583 pthread_kill(cpu->thread, SIGUSR1); 584 /* 585 * Wait for the vCPU to dump state before signalling 586 * the next thread. Since this is debug code it does 587 * not matter that we are burning CPU time a bit: 588 */ 589 while (!printout_done) 590 mb(); 591 } 592 593 close(fd); 594 595 serial8250__inject_sysrq(kvm, 'p'); 596 } 597 598 static void handle_sigalrm(int sig) 599 { 600 kvm__arch_periodic_poll(kvm); 601 } 602 603 static void handle_stop(int fd, u32 type, u32 len, u8 *msg) 604 { 605 if (WARN_ON(type != KVM_IPC_STOP || len)) 606 return; 607 608 kvm_cpu__reboot(); 609 } 610 611 static void *kvm_cpu_thread(void *arg) 612 { 613 current_kvm_cpu = arg; 614 615 if (kvm_cpu__start(current_kvm_cpu)) 616 goto panic_kvm; 617 618 return (void *) (intptr_t) 0; 619 620 panic_kvm: 621 fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", 622 current_kvm_cpu->kvm_run->exit_reason, 623 kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); 624 if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) 625 fprintf(stderr, "KVM exit code: 0x%Lu\n", 626 current_kvm_cpu->kvm_run->hw.hardware_exit_reason); 627 628 kvm_cpu__set_debug_fd(STDOUT_FILENO); 629 kvm_cpu__show_registers(current_kvm_cpu); 630 kvm_cpu__show_code(current_kvm_cpu); 631 kvm_cpu__show_page_tables(current_kvm_cpu); 632 633 return (void *) (intptr_t) 1; 634 } 635 636 static char kernel[PATH_MAX]; 637 638 static const char *host_kernels[] = { 639 "/boot/vmlinuz", 640 "/boot/bzImage", 641 NULL 642 }; 643 644 static const char *default_kernels[] = { 645 "./bzImage", 646 "arch/" BUILD_ARCH "/boot/bzImage", 647 "../../arch/" BUILD_ARCH "/boot/bzImage", 648 NULL 649 }; 650 651 static const char *default_vmlinux[] = { 652 "vmlinux", 653 "../../../vmlinux", 654 "../../vmlinux", 655 NULL 656 }; 657 658 static void kernel_usage_with_options(void) 659 { 660 const char **k; 661 struct utsname uts; 662 663 fprintf(stderr, "Fatal: could not find default kernel image in:\n"); 664 k = &default_kernels[0]; 665 while (*k) { 666 fprintf(stderr, "\t%s\n", *k); 667 k++; 668 } 669 670 if (uname(&uts) < 0) 671 return; 672 673 k = &host_kernels[0]; 674 while (*k) { 675 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 676 return; 677 fprintf(stderr, "\t%s\n", kernel); 678 k++; 679 } 680 fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", 681 KVM_BINARY_NAME); 682 } 683 684 static u64 host_ram_size(void) 685 { 686 long page_size; 687 long nr_pages; 688 689 nr_pages = sysconf(_SC_PHYS_PAGES); 690 if (nr_pages < 0) { 691 pr_warning("sysconf(_SC_PHYS_PAGES) failed"); 692 return 0; 693 } 694 695 page_size = sysconf(_SC_PAGE_SIZE); 696 if (page_size < 0) { 697 pr_warning("sysconf(_SC_PAGE_SIZE) failed"); 698 return 0; 699 } 700 701 return (nr_pages * page_size) >> MB_SHIFT; 702 } 703 704 /* 705 * If user didn't specify how much memory it wants to allocate for the guest, 706 * avoid filling the whole host RAM. 707 */ 708 #define RAM_SIZE_RATIO 0.8 709 710 static u64 get_ram_size(int nr_cpus) 711 { 712 u64 available; 713 u64 ram_size; 714 715 ram_size = 64 * (nr_cpus + 3); 716 717 available = host_ram_size() * RAM_SIZE_RATIO; 718 if (!available) 719 available = MIN_RAM_SIZE_MB; 720 721 if (ram_size > available) 722 ram_size = available; 723 724 return ram_size; 725 } 726 727 static const char *find_kernel(void) 728 { 729 const char **k; 730 struct stat st; 731 struct utsname uts; 732 733 k = &default_kernels[0]; 734 while (*k) { 735 if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { 736 k++; 737 continue; 738 } 739 strncpy(kernel, *k, PATH_MAX); 740 return kernel; 741 } 742 743 if (uname(&uts) < 0) 744 return NULL; 745 746 k = &host_kernels[0]; 747 while (*k) { 748 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 749 return NULL; 750 751 if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { 752 k++; 753 continue; 754 } 755 return kernel; 756 757 } 758 return NULL; 759 } 760 761 static const char *find_vmlinux(void) 762 { 763 const char **vmlinux; 764 765 vmlinux = &default_vmlinux[0]; 766 while (*vmlinux) { 767 struct stat st; 768 769 if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { 770 vmlinux++; 771 continue; 772 } 773 return *vmlinux; 774 } 775 return NULL; 776 } 777 778 void kvm_run_help(void) 779 { 780 usage_with_options(run_usage, options); 781 } 782 783 static int kvm_custom_stage2(void) 784 { 785 char tmp[PATH_MAX], dst[PATH_MAX], *src; 786 const char *rootfs = custom_rootfs_name; 787 int r; 788 789 src = realpath("guest/init_stage2", NULL); 790 if (src == NULL) 791 return -ENOMEM; 792 793 snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs); 794 remove(tmp); 795 796 snprintf(dst, PATH_MAX, "/host/%s", src); 797 r = symlink(dst, tmp); 798 free(src); 799 800 return r; 801 } 802 803 static int kvm_run_set_sandbox(void) 804 { 805 const char *guestfs_name = custom_rootfs_name; 806 char path[PATH_MAX], script[PATH_MAX], *tmp; 807 808 snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); 809 810 remove(path); 811 812 if (sandbox == NULL) 813 return 0; 814 815 tmp = realpath(sandbox, NULL); 816 if (tmp == NULL) 817 return -ENOMEM; 818 819 snprintf(script, PATH_MAX, "/host/%s", tmp); 820 free(tmp); 821 822 return symlink(script, path); 823 } 824 825 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) 826 { 827 const char *single_quote; 828 829 if (!*arg) { /* zero length string */ 830 if (write(fd, "''", 2) <= 0) 831 die("Failed writing sandbox script"); 832 return; 833 } 834 835 while (*arg) { 836 single_quote = strchrnul(arg, '\''); 837 838 /* write non-single-quote string as #('string') */ 839 if (arg != single_quote) { 840 if (write(fd, "'", 1) <= 0 || 841 write(fd, arg, single_quote - arg) <= 0 || 842 write(fd, "'", 1) <= 0) 843 die("Failed writing sandbox script"); 844 } 845 846 /* write single quote as #("'") */ 847 if (*single_quote) { 848 if (write(fd, "\"'\"", 3) <= 0) 849 die("Failed writing sandbox script"); 850 } else 851 break; 852 853 arg = single_quote + 1; 854 } 855 } 856 857 static void resolve_program(const char *src, char *dst, size_t len) 858 { 859 struct stat st; 860 int err; 861 862 err = stat(src, &st); 863 864 if (!err && S_ISREG(st.st_mode)) { 865 char resolved_path[PATH_MAX]; 866 867 if (!realpath(src, resolved_path)) 868 die("Unable to resolve program %s: %s\n", src, strerror(errno)); 869 870 snprintf(dst, len, "/host%s", resolved_path); 871 } else 872 strncpy(dst, src, len); 873 } 874 875 static void kvm_run_write_sandbox_cmd(const char **argv, int argc) 876 { 877 const char script_hdr[] = "#! /bin/bash\n\n"; 878 char program[PATH_MAX]; 879 int fd; 880 881 remove(sandbox); 882 883 fd = open(sandbox, O_RDWR | O_CREAT, 0777); 884 if (fd < 0) 885 die("Failed creating sandbox script"); 886 887 if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) 888 die("Failed writing sandbox script"); 889 890 resolve_program(argv[0], program, PATH_MAX); 891 kvm_write_sandbox_cmd_exactly(fd, program); 892 893 argv++; 894 argc--; 895 896 while (argc) { 897 if (write(fd, " ", 1) <= 0) 898 die("Failed writing sandbox script"); 899 900 kvm_write_sandbox_cmd_exactly(fd, argv[0]); 901 argv++; 902 argc--; 903 } 904 if (write(fd, "\n", 1) <= 0) 905 die("Failed writing sandbox script"); 906 907 close(fd); 908 } 909 910 static int kvm_cmd_run_init(int argc, const char **argv) 911 { 912 static char real_cmdline[2048], default_name[20]; 913 struct framebuffer *fb = NULL; 914 unsigned int nr_online_cpus; 915 int max_cpus, recommended_cpus; 916 int i, r; 917 918 signal(SIGALRM, handle_sigalrm); 919 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 920 signal(SIGUSR1, handle_sigusr1); 921 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 922 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 923 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 924 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 925 926 nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); 927 928 while (argc != 0) { 929 argc = parse_options(argc, argv, options, run_usage, 930 PARSE_OPT_STOP_AT_NON_OPTION | 931 PARSE_OPT_KEEP_DASHDASH); 932 if (argc != 0) { 933 /* Cusrom options, should have been handled elsewhere */ 934 if (strcmp(argv[0], "--") == 0) { 935 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 936 sandbox = DEFAULT_SANDBOX_FILENAME; 937 kvm_run_write_sandbox_cmd(argv+1, argc-1); 938 break; 939 } 940 } 941 942 if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) || 943 (kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) { 944 fprintf(stderr, "Cannot handle parameter: " 945 "%s\n", argv[0]); 946 usage_with_options(run_usage, options); 947 return EINVAL; 948 } 949 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 950 /* 951 * first unhandled parameter is treated as 952 * sandbox command 953 */ 954 sandbox = DEFAULT_SANDBOX_FILENAME; 955 kvm_run_write_sandbox_cmd(argv, argc); 956 } else { 957 /* 958 * first unhandled parameter is treated as a kernel 959 * image 960 */ 961 kernel_filename = argv[0]; 962 } 963 argv++; 964 argc--; 965 } 966 967 } 968 969 if (!kernel_filename) 970 kernel_filename = find_kernel(); 971 972 if (!kernel_filename) { 973 kernel_usage_with_options(); 974 return EINVAL; 975 } 976 977 vmlinux_filename = find_vmlinux(); 978 979 if (nrcpus == 0) 980 nrcpus = nr_online_cpus; 981 982 if (!ram_size) 983 ram_size = get_ram_size(nrcpus); 984 985 if (ram_size < MIN_RAM_SIZE_MB) 986 die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB); 987 988 if (ram_size > host_ram_size()) 989 pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size()); 990 991 ram_size <<= MB_SHIFT; 992 993 if (!dev) 994 dev = DEFAULT_KVM_DEV; 995 996 if (!console) 997 console = DEFAULT_CONSOLE; 998 999 if (!strncmp(console, "virtio", 6)) 1000 active_console = CONSOLE_VIRTIO; 1001 else if (!strncmp(console, "serial", 6)) 1002 active_console = CONSOLE_8250; 1003 else if (!strncmp(console, "hv", 2)) 1004 active_console = CONSOLE_HV; 1005 else 1006 pr_warning("No console!"); 1007 1008 if (!host_ip) 1009 host_ip = DEFAULT_HOST_ADDR; 1010 1011 if (!guest_ip) 1012 guest_ip = DEFAULT_GUEST_ADDR; 1013 1014 if (!guest_mac) 1015 guest_mac = DEFAULT_GUEST_MAC; 1016 1017 if (!host_mac) 1018 host_mac = DEFAULT_HOST_MAC; 1019 1020 if (!script) 1021 script = DEFAULT_SCRIPT; 1022 1023 term_init(); 1024 1025 if (!guest_name) { 1026 if (custom_rootfs) { 1027 guest_name = custom_rootfs_name; 1028 } else { 1029 sprintf(default_name, "guest-%u", getpid()); 1030 guest_name = default_name; 1031 } 1032 } 1033 1034 kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name); 1035 if (IS_ERR(kvm)) { 1036 r = PTR_ERR(kvm); 1037 goto fail; 1038 } 1039 1040 kvm->single_step = single_step; 1041 1042 r = ioeventfd__init(kvm); 1043 if (r < 0) { 1044 pr_err("ioeventfd__init() failed with error %d\n", r); 1045 goto fail; 1046 } 1047 1048 max_cpus = kvm__max_cpus(kvm); 1049 recommended_cpus = kvm__recommended_cpus(kvm); 1050 1051 if (nrcpus > max_cpus) { 1052 printf(" # Limit the number of CPUs to %d\n", max_cpus); 1053 nrcpus = max_cpus; 1054 } else if (nrcpus > recommended_cpus) { 1055 printf(" # Warning: The maximum recommended amount of VCPUs" 1056 " is %d\n", recommended_cpus); 1057 } 1058 1059 kvm->nrcpus = nrcpus; 1060 1061 /* Alloc one pointer too many, so array ends up 0-terminated */ 1062 kvm_cpus = calloc(nrcpus + 1, sizeof(void *)); 1063 if (!kvm_cpus) 1064 die("Couldn't allocate array for %d CPUs", nrcpus); 1065 1066 r = irq__init(kvm); 1067 if (r < 0) { 1068 pr_err("irq__init() failed with error %d\n", r); 1069 goto fail; 1070 } 1071 1072 r = pci__init(kvm); 1073 if (r < 0) { 1074 pr_err("pci__init() failed with error %d\n", r); 1075 goto fail; 1076 } 1077 1078 r = ioport__init(kvm); 1079 if (r < 0) { 1080 pr_err("ioport__init() failed with error %d\n", r); 1081 goto fail; 1082 } 1083 1084 /* 1085 * vidmode should be either specified 1086 * either set by default 1087 */ 1088 if (vnc || sdl) { 1089 if (vidmode == -1) 1090 vidmode = 0x312; 1091 } else { 1092 vidmode = 0; 1093 } 1094 1095 memset(real_cmdline, 0, sizeof(real_cmdline)); 1096 kvm__arch_set_cmdline(real_cmdline, vnc || sdl); 1097 1098 if (strlen(real_cmdline) > 0) 1099 strcat(real_cmdline, " "); 1100 1101 if (kernel_cmdline) 1102 strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline)); 1103 1104 if (!using_rootfs && !disk_image[0].filename && !initrd_filename) { 1105 char tmp[PATH_MAX]; 1106 1107 kvm_setup_create_new(custom_rootfs_name); 1108 kvm_setup_resolv(custom_rootfs_name); 1109 1110 snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); 1111 if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) 1112 die("Unable to initialize virtio 9p"); 1113 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 1114 die("Unable to initialize virtio 9p"); 1115 using_rootfs = custom_rootfs = 1; 1116 } 1117 1118 if (using_rootfs) { 1119 strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); 1120 if (custom_rootfs) { 1121 kvm_run_set_sandbox(); 1122 1123 strcat(real_cmdline, " init=/virt/init"); 1124 1125 if (!no_dhcp) 1126 strcat(real_cmdline, " ip=dhcp"); 1127 if (kvm_custom_stage2()) 1128 die("Failed linking stage 2 of init."); 1129 } 1130 } else if (!strstr(real_cmdline, "root=")) { 1131 strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); 1132 } 1133 1134 if (image_count) { 1135 kvm->nr_disks = image_count; 1136 kvm->disks = disk_image__open_all((struct disk_image_params *)&disk_image, image_count); 1137 if (IS_ERR(kvm->disks)) { 1138 r = PTR_ERR(kvm->disks); 1139 pr_err("disk_image__open_all() failed with error %ld\n", 1140 PTR_ERR(kvm->disks)); 1141 goto fail; 1142 } 1143 } 1144 1145 printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, 1146 kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name); 1147 1148 if (!firmware_filename) { 1149 if (!kvm__load_kernel(kvm, kernel_filename, 1150 initrd_filename, real_cmdline, vidmode)) 1151 die("unable to load kernel %s", kernel_filename); 1152 1153 kvm->vmlinux = vmlinux_filename; 1154 r = symbol_init(kvm); 1155 if (r < 0) 1156 pr_debug("symbol_init() failed with error %d\n", r); 1157 } 1158 1159 ioport__setup_arch(); 1160 1161 r = rtc__init(kvm); 1162 if (r < 0) { 1163 pr_err("rtc__init() failed with error %d\n", r); 1164 goto fail; 1165 } 1166 1167 r = serial8250__init(kvm); 1168 if (r < 0) { 1169 pr_err("serial__init() failed with error %d\n", r); 1170 goto fail; 1171 } 1172 1173 r = virtio_blk__init(kvm); 1174 if (r < 0) { 1175 pr_err("virtio_blk__init() failed with error %d\n", r); 1176 goto fail; 1177 } 1178 1179 if (active_console == CONSOLE_VIRTIO) 1180 virtio_console__init(kvm); 1181 1182 if (virtio_rng) 1183 virtio_rng__init(kvm); 1184 1185 if (balloon) 1186 virtio_bln__init(kvm); 1187 1188 if (!network) 1189 network = DEFAULT_NETWORK; 1190 1191 virtio_9p__init(kvm); 1192 1193 for (i = 0; i < num_net_devices; i++) { 1194 net_params[i].kvm = kvm; 1195 virtio_net__init(&net_params[i]); 1196 } 1197 1198 if (num_net_devices == 0 && no_net == 0) { 1199 struct virtio_net_params net_params; 1200 1201 net_params = (struct virtio_net_params) { 1202 .guest_ip = guest_ip, 1203 .host_ip = host_ip, 1204 .kvm = kvm, 1205 .script = script, 1206 .mode = NET_MODE_USER, 1207 }; 1208 str_to_mac(guest_mac, net_params.guest_mac); 1209 str_to_mac(host_mac, net_params.host_mac); 1210 1211 virtio_net__init(&net_params); 1212 } 1213 1214 kvm__init_ram(kvm); 1215 1216 #ifdef CONFIG_X86 1217 kbd__init(kvm); 1218 #endif 1219 1220 pci_shmem__init(kvm); 1221 1222 if (vnc || sdl) { 1223 fb = vesa__init(kvm); 1224 if (IS_ERR(fb)) { 1225 pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb)); 1226 goto fail; 1227 } 1228 } 1229 1230 if (vnc && fb) { 1231 r = vnc__init(fb); 1232 if (r < 0) { 1233 pr_err("vnc__init() failed with error %d\n", r); 1234 goto fail; 1235 } 1236 } 1237 1238 if (sdl && fb) { 1239 sdl__init(fb); 1240 if (r < 0) { 1241 pr_err("sdl__init() failed with error %d\n", r); 1242 goto fail; 1243 } 1244 } 1245 1246 r = fb__start(); 1247 if (r < 0) { 1248 pr_err("fb__init() failed with error %d\n", r); 1249 goto fail; 1250 } 1251 1252 /* Device init all done; firmware init must 1253 * come after this (it may set up device trees etc.) 1254 */ 1255 1256 kvm__start_timer(kvm); 1257 1258 if (firmware_filename) { 1259 if (!kvm__load_firmware(kvm, firmware_filename)) 1260 die("unable to load firmware image %s: %s", firmware_filename, strerror(errno)); 1261 } else { 1262 kvm__arch_setup_firmware(kvm); 1263 if (r < 0) { 1264 pr_err("kvm__arch_setup_firmware() failed with error %d\n", r); 1265 goto fail; 1266 } 1267 } 1268 1269 for (i = 0; i < nrcpus; i++) { 1270 kvm_cpus[i] = kvm_cpu__init(kvm, i); 1271 if (!kvm_cpus[i]) 1272 die("unable to initialize KVM VCPU"); 1273 } 1274 1275 thread_pool__init(nr_online_cpus); 1276 fail: 1277 return r; 1278 } 1279 1280 static int kvm_cmd_run_work(void) 1281 { 1282 int i, r = -1; 1283 void *ret = NULL; 1284 1285 for (i = 0; i < nrcpus; i++) { 1286 if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0) 1287 die("unable to create KVM VCPU thread"); 1288 } 1289 1290 /* Only VCPU #0 is going to exit by itself when shutting down */ 1291 if (pthread_join(kvm_cpus[0]->thread, &ret) != 0) 1292 r = 0; 1293 1294 kvm_cpu__delete(kvm_cpus[0]); 1295 kvm_cpus[0] = NULL; 1296 1297 for (i = 1; i < nrcpus; i++) { 1298 if (kvm_cpus[i]->is_running) { 1299 pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); 1300 if (pthread_join(kvm_cpus[i]->thread, &ret) != 0) 1301 die("pthread_join"); 1302 kvm_cpu__delete(kvm_cpus[i]); 1303 } 1304 if (ret == NULL) 1305 r = 0; 1306 } 1307 1308 return r; 1309 } 1310 1311 static void kvm_cmd_run_exit(int guest_ret) 1312 { 1313 int r = 0; 1314 1315 compat__print_all_messages(); 1316 1317 r = symbol_exit(kvm); 1318 if (r < 0) 1319 pr_warning("symbol_exit() failed with error %d\n", r); 1320 1321 r = irq__exit(kvm); 1322 if (r < 0) 1323 pr_warning("irq__exit() failed with error %d\n", r); 1324 1325 fb__stop(); 1326 1327 r = virtio_blk__exit(kvm); 1328 if (r < 0) 1329 pr_warning("virtio_blk__exit() failed with error %d\n", r); 1330 1331 r = virtio_rng__exit(kvm); 1332 if (r < 0) 1333 pr_warning("virtio_rng__exit() failed with error %d\n", r); 1334 1335 r = disk_image__close_all(kvm->disks, image_count); 1336 if (r < 0) 1337 pr_warning("disk_image__close_all() failed with error %d\n", r); 1338 1339 r = serial8250__exit(kvm); 1340 if (r < 0) 1341 pr_warning("serial8250__exit() failed with error %d\n", r); 1342 1343 r = rtc__exit(kvm); 1344 if (r < 0) 1345 pr_warning("rtc__exit() failed with error %d\n", r); 1346 1347 r = kvm__arch_free_firmware(kvm); 1348 if (r < 0) 1349 pr_warning("kvm__arch_free_firmware() failed with error %d\n", r); 1350 1351 r = ioport__exit(kvm); 1352 if (r < 0) 1353 pr_warning("ioport__exit() failed with error %d\n", r); 1354 1355 r = ioeventfd__exit(kvm); 1356 if (r < 0) 1357 pr_warning("ioeventfd__exit() failed with error %d\n", r); 1358 1359 r = pci__exit(kvm); 1360 if (r < 0) 1361 pr_warning("pci__exit() failed with error %d\n", r); 1362 1363 r = kvm__exit(kvm); 1364 if (r < 0) 1365 pr_warning("pci__exit() failed with error %d\n", r); 1366 1367 free(kvm_cpus); 1368 1369 if (guest_ret == 0) 1370 printf("\n # KVM session ended normally.\n"); 1371 } 1372 1373 int kvm_cmd_run(int argc, const char **argv, const char *prefix) 1374 { 1375 int r, ret = -EFAULT; 1376 1377 r = kvm_cmd_run_init(argc, argv); 1378 if (r < 0) 1379 return r; 1380 1381 ret = kvm_cmd_run_work(); 1382 kvm_cmd_run_exit(ret); 1383 1384 return ret; 1385 } 1386