1 #include "kvm/builtin-run.h" 2 3 #include "kvm/builtin-setup.h" 4 #include "kvm/virtio-balloon.h" 5 #include "kvm/virtio-console.h" 6 #include "kvm/parse-options.h" 7 #include "kvm/8250-serial.h" 8 #include "kvm/framebuffer.h" 9 #include "kvm/disk-image.h" 10 #include "kvm/threadpool.h" 11 #include "kvm/virtio-blk.h" 12 #include "kvm/virtio-net.h" 13 #include "kvm/virtio-rng.h" 14 #include "kvm/ioeventfd.h" 15 #include "kvm/virtio-9p.h" 16 #include "kvm/barrier.h" 17 #include "kvm/kvm-cpu.h" 18 #include "kvm/ioport.h" 19 #include "kvm/symbol.h" 20 #include "kvm/i8042.h" 21 #include "kvm/mutex.h" 22 #include "kvm/term.h" 23 #include "kvm/util.h" 24 #include "kvm/strbuf.h" 25 #include "kvm/vesa.h" 26 #include "kvm/irq.h" 27 #include "kvm/kvm.h" 28 #include "kvm/pci.h" 29 #include "kvm/rtc.h" 30 #include "kvm/sdl.h" 31 #include "kvm/vnc.h" 32 #include "kvm/guest_compat.h" 33 #include "kvm/pci-shmem.h" 34 #include "kvm/kvm-ipc.h" 35 #include "kvm/builtin-debug.h" 36 37 #include <linux/types.h> 38 #include <linux/err.h> 39 40 #include <sys/utsname.h> 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <termios.h> 44 #include <signal.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <ctype.h> 49 #include <stdio.h> 50 51 #define DEFAULT_KVM_DEV "/dev/kvm" 52 #define DEFAULT_CONSOLE "serial" 53 #define DEFAULT_NETWORK "user" 54 #define DEFAULT_HOST_ADDR "192.168.33.1" 55 #define DEFAULT_GUEST_ADDR "192.168.33.15" 56 #define DEFAULT_GUEST_MAC "02:15:15:15:15:15" 57 #define DEFAULT_HOST_MAC "02:01:01:01:01:01" 58 #define DEFAULT_SCRIPT "none" 59 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh"; 60 61 #define MB_SHIFT (20) 62 #define KB_SHIFT (10) 63 #define GB_SHIFT (30) 64 #define MIN_RAM_SIZE_MB (64ULL) 65 #define MIN_RAM_SIZE_BYTE (MIN_RAM_SIZE_MB << MB_SHIFT) 66 67 struct kvm *kvm; 68 struct kvm_cpu **kvm_cpus; 69 __thread struct kvm_cpu *current_kvm_cpu; 70 71 static struct disk_image_params disk_image[MAX_DISK_IMAGES]; 72 static u64 ram_size; 73 static u8 image_count; 74 static u8 num_net_devices; 75 static bool virtio_rng; 76 static const char *kernel_cmdline; 77 static const char *kernel_filename; 78 static const char *vmlinux_filename; 79 static const char *initrd_filename; 80 static const char *firmware_filename; 81 static const char *console; 82 static const char *dev; 83 static const char *network; 84 static const char *host_ip; 85 static const char *guest_ip; 86 static const char *guest_mac; 87 static const char *host_mac; 88 static const char *script; 89 static const char *guest_name; 90 static const char *sandbox; 91 static const char *hugetlbfs_path; 92 static const char *custom_rootfs_name = "default"; 93 static struct virtio_net_params *net_params; 94 static bool single_step; 95 static bool vnc; 96 static bool sdl; 97 static bool balloon; 98 static bool using_rootfs; 99 static bool custom_rootfs; 100 static bool no_net; 101 static bool no_dhcp; 102 extern bool ioport_debug; 103 extern bool mmio_debug; 104 static int kvm_run_wrapper; 105 extern int active_console; 106 extern int debug_iodelay; 107 108 bool do_debug_print = false; 109 110 static int nrcpus; 111 static int vidmode = -1; 112 113 static const char * const run_usage[] = { 114 "lkvm run [<options>] [<kernel image>]", 115 NULL 116 }; 117 118 enum { 119 KVM_RUN_DEFAULT, 120 KVM_RUN_SANDBOX, 121 }; 122 123 void kvm_run_set_wrapper_sandbox(void) 124 { 125 kvm_run_wrapper = KVM_RUN_SANDBOX; 126 } 127 128 static int img_name_parser(const struct option *opt, const char *arg, int unset) 129 { 130 char path[PATH_MAX]; 131 const char *cur; 132 struct stat st; 133 char *sep; 134 135 if (stat(arg, &st) == 0 && 136 S_ISDIR(st.st_mode)) { 137 char tmp[PATH_MAX]; 138 139 if (using_rootfs) 140 die("Please use only one rootfs directory atmost"); 141 142 if (realpath(arg, tmp) == 0 || 143 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 144 die("Unable to initialize virtio 9p"); 145 using_rootfs = 1; 146 return 0; 147 } 148 149 snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); 150 151 if (stat(path, &st) == 0 && 152 S_ISDIR(st.st_mode)) { 153 char tmp[PATH_MAX]; 154 155 if (using_rootfs) 156 die("Please use only one rootfs directory atmost"); 157 158 if (realpath(path, tmp) == 0 || 159 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 160 die("Unable to initialize virtio 9p"); 161 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 162 die("Unable to initialize virtio 9p"); 163 kvm_setup_resolv(arg); 164 using_rootfs = custom_rootfs = 1; 165 custom_rootfs_name = arg; 166 return 0; 167 } 168 169 if (image_count >= MAX_DISK_IMAGES) 170 die("Currently only 4 images are supported"); 171 172 disk_image[image_count].filename = arg; 173 cur = arg; 174 do { 175 sep = strstr(cur, ","); 176 if (sep) { 177 if (strncmp(sep + 1, "ro", 2) == 0) 178 disk_image[image_count].readonly = true; 179 else if (strncmp(sep + 1, "direct", 6) == 0) 180 disk_image[image_count].direct = true; 181 *sep = 0; 182 cur = sep + 1; 183 } 184 } while (sep); 185 186 image_count++; 187 188 return 0; 189 } 190 191 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) 192 { 193 char *tag_name; 194 char tmp[PATH_MAX]; 195 196 /* 197 * 9p dir can be of the form dirname,tag_name or 198 * just dirname. In the later case we use the 199 * default tag name 200 */ 201 tag_name = strstr(arg, ","); 202 if (tag_name) { 203 *tag_name = '\0'; 204 tag_name++; 205 } 206 if (realpath(arg, tmp)) { 207 if (virtio_9p__register(kvm, tmp, tag_name) < 0) 208 die("Unable to initialize virtio 9p"); 209 } else 210 die("Failed resolving 9p path"); 211 return 0; 212 } 213 214 static int tty_parser(const struct option *opt, const char *arg, int unset) 215 { 216 int tty = atoi(arg); 217 218 term_set_tty(tty); 219 220 return 0; 221 } 222 223 static inline void str_to_mac(const char *str, char *mac) 224 { 225 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 226 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 227 } 228 static int set_net_param(struct virtio_net_params *p, const char *param, 229 const char *val) 230 { 231 if (strcmp(param, "guest_mac") == 0) { 232 str_to_mac(val, p->guest_mac); 233 } else if (strcmp(param, "mode") == 0) { 234 if (!strncmp(val, "user", 4)) { 235 int i; 236 237 for (i = 0; i < num_net_devices; i++) 238 if (net_params[i].mode == NET_MODE_USER) 239 die("Only one usermode network device allowed at a time"); 240 p->mode = NET_MODE_USER; 241 } else if (!strncmp(val, "tap", 3)) { 242 p->mode = NET_MODE_TAP; 243 } else if (!strncmp(val, "none", 4)) { 244 no_net = 1; 245 return -1; 246 } else 247 die("Unkown network mode %s, please use user, tap or none", network); 248 } else if (strcmp(param, "script") == 0) { 249 p->script = strdup(val); 250 } else if (strcmp(param, "guest_ip") == 0) { 251 p->guest_ip = strdup(val); 252 } else if (strcmp(param, "host_ip") == 0) { 253 p->host_ip = strdup(val); 254 } else if (strcmp(param, "trans") == 0) { 255 p->trans = strdup(val); 256 } else if (strcmp(param, "vhost") == 0) { 257 p->vhost = atoi(val); 258 } else if (strcmp(param, "fd") == 0) { 259 p->fd = atoi(val); 260 } 261 262 return 0; 263 } 264 265 static int netdev_parser(const struct option *opt, const char *arg, int unset) 266 { 267 struct virtio_net_params p; 268 char *buf = NULL, *cmd = NULL, *cur = NULL; 269 bool on_cmd = true; 270 271 if (arg) { 272 buf = strdup(arg); 273 if (buf == NULL) 274 die("Failed allocating new net buffer"); 275 cur = strtok(buf, ",="); 276 } 277 278 p = (struct virtio_net_params) { 279 .guest_ip = DEFAULT_GUEST_ADDR, 280 .host_ip = DEFAULT_HOST_ADDR, 281 .script = DEFAULT_SCRIPT, 282 .mode = NET_MODE_TAP, 283 }; 284 285 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 286 p.guest_mac[5] += num_net_devices; 287 288 while (cur) { 289 if (on_cmd) { 290 cmd = cur; 291 } else { 292 if (set_net_param(&p, cmd, cur) < 0) 293 goto done; 294 } 295 on_cmd = !on_cmd; 296 297 cur = strtok(NULL, ",="); 298 }; 299 300 num_net_devices++; 301 302 net_params = realloc(net_params, num_net_devices * sizeof(*net_params)); 303 if (net_params == NULL) 304 die("Failed adding new network device"); 305 306 net_params[num_net_devices - 1] = p; 307 308 done: 309 free(buf); 310 return 0; 311 } 312 313 static int shmem_parser(const struct option *opt, const char *arg, int unset) 314 { 315 const u64 default_size = SHMEM_DEFAULT_SIZE; 316 const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; 317 const char *default_handle = SHMEM_DEFAULT_HANDLE; 318 struct shmem_info *si = malloc(sizeof(struct shmem_info)); 319 u64 phys_addr; 320 u64 size; 321 char *handle = NULL; 322 int create = 0; 323 const char *p = arg; 324 char *next; 325 int base = 10; 326 int verbose = 0; 327 328 const int skip_pci = strlen("pci:"); 329 if (verbose) 330 pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); 331 /* parse out optional addr family */ 332 if (strcasestr(p, "pci:")) { 333 p += skip_pci; 334 } else if (strcasestr(p, "mem:")) { 335 die("I can't add to E820 map yet.\n"); 336 } 337 /* parse out physical addr */ 338 base = 10; 339 if (strcasestr(p, "0x")) 340 base = 16; 341 phys_addr = strtoll(p, &next, base); 342 if (next == p && phys_addr == 0) { 343 pr_info("shmem: no physical addr specified, using default."); 344 phys_addr = default_phys_addr; 345 } 346 if (*next != ':' && *next != '\0') 347 die("shmem: unexpected chars after phys addr.\n"); 348 if (*next == '\0') 349 p = next; 350 else 351 p = next + 1; 352 /* parse out size */ 353 base = 10; 354 if (strcasestr(p, "0x")) 355 base = 16; 356 size = strtoll(p, &next, base); 357 if (next == p && size == 0) { 358 pr_info("shmem: no size specified, using default."); 359 size = default_size; 360 } 361 /* look for [KMGkmg][Bb]* uses base 2. */ 362 int skip_B = 0; 363 if (strspn(next, "KMGkmg")) { /* might have a prefix */ 364 if (*(next + 1) == 'B' || *(next + 1) == 'b') 365 skip_B = 1; 366 switch (*next) { 367 case 'K': 368 case 'k': 369 size = size << KB_SHIFT; 370 break; 371 case 'M': 372 case 'm': 373 size = size << MB_SHIFT; 374 break; 375 case 'G': 376 case 'g': 377 size = size << GB_SHIFT; 378 break; 379 default: 380 die("shmem: bug in detecting size prefix."); 381 break; 382 } 383 next += 1 + skip_B; 384 } 385 if (*next != ':' && *next != '\0') { 386 die("shmem: unexpected chars after phys size. <%c><%c>\n", 387 *next, *p); 388 } 389 if (*next == '\0') 390 p = next; 391 else 392 p = next + 1; 393 /* parse out optional shmem handle */ 394 const int skip_handle = strlen("handle="); 395 next = strcasestr(p, "handle="); 396 if (*p && next) { 397 if (p != next) 398 die("unexpected chars before handle\n"); 399 p += skip_handle; 400 next = strchrnul(p, ':'); 401 if (next - p) { 402 handle = malloc(next - p + 1); 403 strncpy(handle, p, next - p); 404 handle[next - p] = '\0'; /* just in case. */ 405 } 406 if (*next == '\0') 407 p = next; 408 else 409 p = next + 1; 410 } 411 /* parse optional create flag to see if we should create shm seg. */ 412 if (*p && strcasestr(p, "create")) { 413 create = 1; 414 p += strlen("create"); 415 } 416 if (*p != '\0') 417 die("shmem: unexpected trailing chars\n"); 418 if (handle == NULL) { 419 handle = malloc(strlen(default_handle) + 1); 420 strcpy(handle, default_handle); 421 } 422 if (verbose) { 423 pr_info("shmem: phys_addr = %llx", phys_addr); 424 pr_info("shmem: size = %llx", size); 425 pr_info("shmem: handle = %s", handle); 426 pr_info("shmem: create = %d", create); 427 } 428 429 si->phys_addr = phys_addr; 430 si->size = size; 431 si->handle = handle; 432 si->create = create; 433 pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ 434 return 0; 435 } 436 437 static const struct option options[] = { 438 OPT_GROUP("Basic options:"), 439 OPT_STRING('\0', "name", &guest_name, "guest name", 440 "A name for the guest"), 441 OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"), 442 OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."), 443 OPT_CALLBACK('\0', "shmem", NULL, 444 "[pci:]<addr>:<size>[:handle=<handle>][:create]", 445 "Share host shmem with guest via pci device", 446 shmem_parser), 447 OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser), 448 OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"), 449 OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"), 450 OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"), 451 OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"), 452 OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", 453 "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser), 454 OPT_STRING('\0', "console", &console, "serial, virtio or hv", 455 "Console to use"), 456 OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"), 457 OPT_CALLBACK('\0', "tty", NULL, "tty id", 458 "Remap guest TTY into a pty on the host", 459 tty_parser), 460 OPT_STRING('\0', "sandbox", &sandbox, "script", 461 "Run this script when booting into custom rootfs"), 462 OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"), 463 464 OPT_GROUP("Kernel options:"), 465 OPT_STRING('k', "kernel", &kernel_filename, "kernel", 466 "Kernel to boot in virtual machine"), 467 OPT_STRING('i', "initrd", &initrd_filename, "initrd", 468 "Initial RAM disk image"), 469 OPT_STRING('p', "params", &kernel_cmdline, "params", 470 "Kernel command line arguments"), 471 OPT_STRING('f', "firmware", &firmware_filename, "firmware", 472 "Firmware image to boot in virtual machine"), 473 474 OPT_GROUP("Networking options:"), 475 OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", 476 "Create a new guest NIC", 477 netdev_parser, NULL), 478 OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"), 479 480 OPT_GROUP("BIOS options:"), 481 OPT_INTEGER('\0', "vidmode", &vidmode, 482 "Video mode"), 483 484 OPT_GROUP("Debug options:"), 485 OPT_BOOLEAN('\0', "debug", &do_debug_print, 486 "Enable debug messages"), 487 OPT_BOOLEAN('\0', "debug-single-step", &single_step, 488 "Enable single stepping"), 489 OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug, 490 "Enable ioport debugging"), 491 OPT_BOOLEAN('\0', "debug-mmio", &mmio_debug, 492 "Enable MMIO debugging"), 493 OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay, 494 "Delay IO by millisecond"), 495 OPT_END() 496 }; 497 498 /* 499 * Serialize debug printout so that the output of multiple vcpus does not 500 * get mixed up: 501 */ 502 static int printout_done; 503 504 static void handle_sigusr1(int sig) 505 { 506 struct kvm_cpu *cpu = current_kvm_cpu; 507 int fd = kvm_cpu__get_debug_fd(); 508 509 if (!cpu || cpu->needs_nmi) 510 return; 511 512 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 513 kvm_cpu__show_registers(cpu); 514 kvm_cpu__show_code(cpu); 515 kvm_cpu__show_page_tables(cpu); 516 fflush(stdout); 517 printout_done = 1; 518 mb(); 519 } 520 521 /* Pause/resume the guest using SIGUSR2 */ 522 static int is_paused; 523 524 static void handle_pause(int fd, u32 type, u32 len, u8 *msg) 525 { 526 if (WARN_ON(len)) 527 return; 528 529 if (type == KVM_IPC_RESUME && is_paused) { 530 kvm->vm_state = KVM_VMSTATE_RUNNING; 531 kvm__continue(); 532 } else if (type == KVM_IPC_PAUSE && !is_paused) { 533 kvm->vm_state = KVM_VMSTATE_PAUSED; 534 kvm__pause(); 535 } else { 536 return; 537 } 538 539 is_paused = !is_paused; 540 } 541 542 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) 543 { 544 int r = 0; 545 546 if (type == KVM_IPC_VMSTATE) 547 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 548 549 if (r < 0) 550 pr_warning("Failed sending VMSTATE"); 551 } 552 553 static void handle_debug(int fd, u32 type, u32 len, u8 *msg) 554 { 555 int i; 556 struct debug_cmd_params *params; 557 u32 dbg_type; 558 u32 vcpu; 559 560 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 561 return; 562 563 params = (void *)msg; 564 dbg_type = params->dbg_type; 565 vcpu = params->cpu; 566 567 if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ) 568 serial8250__inject_sysrq(kvm, params->sysrq); 569 570 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 571 if ((int)vcpu >= kvm->nrcpus) 572 return; 573 574 kvm_cpus[vcpu]->needs_nmi = 1; 575 pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1); 576 } 577 578 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 579 return; 580 581 for (i = 0; i < nrcpus; i++) { 582 struct kvm_cpu *cpu = kvm_cpus[i]; 583 584 if (!cpu) 585 continue; 586 587 printout_done = 0; 588 589 kvm_cpu__set_debug_fd(fd); 590 pthread_kill(cpu->thread, SIGUSR1); 591 /* 592 * Wait for the vCPU to dump state before signalling 593 * the next thread. Since this is debug code it does 594 * not matter that we are burning CPU time a bit: 595 */ 596 while (!printout_done) 597 mb(); 598 } 599 600 close(fd); 601 602 serial8250__inject_sysrq(kvm, 'p'); 603 } 604 605 static void handle_sigalrm(int sig) 606 { 607 kvm__arch_periodic_poll(kvm); 608 } 609 610 static void handle_stop(int fd, u32 type, u32 len, u8 *msg) 611 { 612 if (WARN_ON(type != KVM_IPC_STOP || len)) 613 return; 614 615 kvm_cpu__reboot(); 616 } 617 618 static void *kvm_cpu_thread(void *arg) 619 { 620 current_kvm_cpu = arg; 621 622 if (kvm_cpu__start(current_kvm_cpu)) 623 goto panic_kvm; 624 625 return (void *) (intptr_t) 0; 626 627 panic_kvm: 628 fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", 629 current_kvm_cpu->kvm_run->exit_reason, 630 kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); 631 if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) 632 fprintf(stderr, "KVM exit code: 0x%Lu\n", 633 current_kvm_cpu->kvm_run->hw.hardware_exit_reason); 634 635 kvm_cpu__set_debug_fd(STDOUT_FILENO); 636 kvm_cpu__show_registers(current_kvm_cpu); 637 kvm_cpu__show_code(current_kvm_cpu); 638 kvm_cpu__show_page_tables(current_kvm_cpu); 639 640 return (void *) (intptr_t) 1; 641 } 642 643 static char kernel[PATH_MAX]; 644 645 static const char *host_kernels[] = { 646 "/boot/vmlinuz", 647 "/boot/bzImage", 648 NULL 649 }; 650 651 static const char *default_kernels[] = { 652 "./bzImage", 653 "arch/" BUILD_ARCH "/boot/bzImage", 654 "../../arch/" BUILD_ARCH "/boot/bzImage", 655 NULL 656 }; 657 658 static const char *default_vmlinux[] = { 659 "vmlinux", 660 "../../../vmlinux", 661 "../../vmlinux", 662 NULL 663 }; 664 665 static void kernel_usage_with_options(void) 666 { 667 const char **k; 668 struct utsname uts; 669 670 fprintf(stderr, "Fatal: could not find default kernel image in:\n"); 671 k = &default_kernels[0]; 672 while (*k) { 673 fprintf(stderr, "\t%s\n", *k); 674 k++; 675 } 676 677 if (uname(&uts) < 0) 678 return; 679 680 k = &host_kernels[0]; 681 while (*k) { 682 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 683 return; 684 fprintf(stderr, "\t%s\n", kernel); 685 k++; 686 } 687 fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", 688 KVM_BINARY_NAME); 689 } 690 691 static u64 host_ram_size(void) 692 { 693 long page_size; 694 long nr_pages; 695 696 nr_pages = sysconf(_SC_PHYS_PAGES); 697 if (nr_pages < 0) { 698 pr_warning("sysconf(_SC_PHYS_PAGES) failed"); 699 return 0; 700 } 701 702 page_size = sysconf(_SC_PAGE_SIZE); 703 if (page_size < 0) { 704 pr_warning("sysconf(_SC_PAGE_SIZE) failed"); 705 return 0; 706 } 707 708 return (nr_pages * page_size) >> MB_SHIFT; 709 } 710 711 /* 712 * If user didn't specify how much memory it wants to allocate for the guest, 713 * avoid filling the whole host RAM. 714 */ 715 #define RAM_SIZE_RATIO 0.8 716 717 static u64 get_ram_size(int nr_cpus) 718 { 719 u64 available; 720 u64 ram_size; 721 722 ram_size = 64 * (nr_cpus + 3); 723 724 available = host_ram_size() * RAM_SIZE_RATIO; 725 if (!available) 726 available = MIN_RAM_SIZE_MB; 727 728 if (ram_size > available) 729 ram_size = available; 730 731 return ram_size; 732 } 733 734 static const char *find_kernel(void) 735 { 736 const char **k; 737 struct stat st; 738 struct utsname uts; 739 740 k = &default_kernels[0]; 741 while (*k) { 742 if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { 743 k++; 744 continue; 745 } 746 strncpy(kernel, *k, PATH_MAX); 747 return kernel; 748 } 749 750 if (uname(&uts) < 0) 751 return NULL; 752 753 k = &host_kernels[0]; 754 while (*k) { 755 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 756 return NULL; 757 758 if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { 759 k++; 760 continue; 761 } 762 return kernel; 763 764 } 765 return NULL; 766 } 767 768 static const char *find_vmlinux(void) 769 { 770 const char **vmlinux; 771 772 vmlinux = &default_vmlinux[0]; 773 while (*vmlinux) { 774 struct stat st; 775 776 if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { 777 vmlinux++; 778 continue; 779 } 780 return *vmlinux; 781 } 782 return NULL; 783 } 784 785 void kvm_run_help(void) 786 { 787 usage_with_options(run_usage, options); 788 } 789 790 static int kvm_custom_stage2(void) 791 { 792 char tmp[PATH_MAX], dst[PATH_MAX], *src; 793 const char *rootfs = custom_rootfs_name; 794 int r; 795 796 src = realpath("guest/init_stage2", NULL); 797 if (src == NULL) 798 return -ENOMEM; 799 800 snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs); 801 remove(tmp); 802 803 snprintf(dst, PATH_MAX, "/host/%s", src); 804 r = symlink(dst, tmp); 805 free(src); 806 807 return r; 808 } 809 810 static int kvm_run_set_sandbox(void) 811 { 812 const char *guestfs_name = custom_rootfs_name; 813 char path[PATH_MAX], script[PATH_MAX], *tmp; 814 815 snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); 816 817 remove(path); 818 819 if (sandbox == NULL) 820 return 0; 821 822 tmp = realpath(sandbox, NULL); 823 if (tmp == NULL) 824 return -ENOMEM; 825 826 snprintf(script, PATH_MAX, "/host/%s", tmp); 827 free(tmp); 828 829 return symlink(script, path); 830 } 831 832 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) 833 { 834 const char *single_quote; 835 836 if (!*arg) { /* zero length string */ 837 if (write(fd, "''", 2) <= 0) 838 die("Failed writing sandbox script"); 839 return; 840 } 841 842 while (*arg) { 843 single_quote = strchrnul(arg, '\''); 844 845 /* write non-single-quote string as #('string') */ 846 if (arg != single_quote) { 847 if (write(fd, "'", 1) <= 0 || 848 write(fd, arg, single_quote - arg) <= 0 || 849 write(fd, "'", 1) <= 0) 850 die("Failed writing sandbox script"); 851 } 852 853 /* write single quote as #("'") */ 854 if (*single_quote) { 855 if (write(fd, "\"'\"", 3) <= 0) 856 die("Failed writing sandbox script"); 857 } else 858 break; 859 860 arg = single_quote + 1; 861 } 862 } 863 864 static void resolve_program(const char *src, char *dst, size_t len) 865 { 866 struct stat st; 867 int err; 868 869 err = stat(src, &st); 870 871 if (!err && S_ISREG(st.st_mode)) { 872 char resolved_path[PATH_MAX]; 873 874 if (!realpath(src, resolved_path)) 875 die("Unable to resolve program %s: %s\n", src, strerror(errno)); 876 877 snprintf(dst, len, "/host%s", resolved_path); 878 } else 879 strncpy(dst, src, len); 880 } 881 882 static void kvm_run_write_sandbox_cmd(const char **argv, int argc) 883 { 884 const char script_hdr[] = "#! /bin/bash\n\n"; 885 char program[PATH_MAX]; 886 int fd; 887 888 remove(sandbox); 889 890 fd = open(sandbox, O_RDWR | O_CREAT, 0777); 891 if (fd < 0) 892 die("Failed creating sandbox script"); 893 894 if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) 895 die("Failed writing sandbox script"); 896 897 resolve_program(argv[0], program, PATH_MAX); 898 kvm_write_sandbox_cmd_exactly(fd, program); 899 900 argv++; 901 argc--; 902 903 while (argc) { 904 if (write(fd, " ", 1) <= 0) 905 die("Failed writing sandbox script"); 906 907 kvm_write_sandbox_cmd_exactly(fd, argv[0]); 908 argv++; 909 argc--; 910 } 911 if (write(fd, "\n", 1) <= 0) 912 die("Failed writing sandbox script"); 913 914 close(fd); 915 } 916 917 static int kvm_cmd_run_init(int argc, const char **argv) 918 { 919 static char real_cmdline[2048], default_name[20]; 920 struct framebuffer *fb = NULL; 921 unsigned int nr_online_cpus; 922 int max_cpus, recommended_cpus; 923 int i, r; 924 925 signal(SIGALRM, handle_sigalrm); 926 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 927 signal(SIGUSR1, handle_sigusr1); 928 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 929 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 930 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 931 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 932 933 nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); 934 935 while (argc != 0) { 936 argc = parse_options(argc, argv, options, run_usage, 937 PARSE_OPT_STOP_AT_NON_OPTION | 938 PARSE_OPT_KEEP_DASHDASH); 939 if (argc != 0) { 940 /* Cusrom options, should have been handled elsewhere */ 941 if (strcmp(argv[0], "--") == 0) { 942 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 943 sandbox = DEFAULT_SANDBOX_FILENAME; 944 kvm_run_write_sandbox_cmd(argv+1, argc-1); 945 break; 946 } 947 } 948 949 if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) || 950 (kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) { 951 fprintf(stderr, "Cannot handle parameter: " 952 "%s\n", argv[0]); 953 usage_with_options(run_usage, options); 954 return EINVAL; 955 } 956 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 957 /* 958 * first unhandled parameter is treated as 959 * sandbox command 960 */ 961 sandbox = DEFAULT_SANDBOX_FILENAME; 962 kvm_run_write_sandbox_cmd(argv, argc); 963 } else { 964 /* 965 * first unhandled parameter is treated as a kernel 966 * image 967 */ 968 kernel_filename = argv[0]; 969 } 970 argv++; 971 argc--; 972 } 973 974 } 975 976 if (!kernel_filename) 977 kernel_filename = find_kernel(); 978 979 if (!kernel_filename) { 980 kernel_usage_with_options(); 981 return EINVAL; 982 } 983 984 vmlinux_filename = find_vmlinux(); 985 986 if (nrcpus == 0) 987 nrcpus = nr_online_cpus; 988 989 if (!ram_size) 990 ram_size = get_ram_size(nrcpus); 991 992 if (ram_size < MIN_RAM_SIZE_MB) 993 die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB); 994 995 if (ram_size > host_ram_size()) 996 pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size()); 997 998 ram_size <<= MB_SHIFT; 999 1000 if (!dev) 1001 dev = DEFAULT_KVM_DEV; 1002 1003 if (!console) 1004 console = DEFAULT_CONSOLE; 1005 1006 if (!strncmp(console, "virtio", 6)) 1007 active_console = CONSOLE_VIRTIO; 1008 else if (!strncmp(console, "serial", 6)) 1009 active_console = CONSOLE_8250; 1010 else if (!strncmp(console, "hv", 2)) 1011 active_console = CONSOLE_HV; 1012 else 1013 pr_warning("No console!"); 1014 1015 if (!host_ip) 1016 host_ip = DEFAULT_HOST_ADDR; 1017 1018 if (!guest_ip) 1019 guest_ip = DEFAULT_GUEST_ADDR; 1020 1021 if (!guest_mac) 1022 guest_mac = DEFAULT_GUEST_MAC; 1023 1024 if (!host_mac) 1025 host_mac = DEFAULT_HOST_MAC; 1026 1027 if (!script) 1028 script = DEFAULT_SCRIPT; 1029 1030 term_init(); 1031 1032 if (!guest_name) { 1033 if (custom_rootfs) { 1034 guest_name = custom_rootfs_name; 1035 } else { 1036 sprintf(default_name, "guest-%u", getpid()); 1037 guest_name = default_name; 1038 } 1039 } 1040 1041 kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name); 1042 if (IS_ERR(kvm)) { 1043 r = PTR_ERR(kvm); 1044 goto fail; 1045 } 1046 1047 kvm->single_step = single_step; 1048 1049 r = ioeventfd__init(kvm); 1050 if (r < 0) { 1051 pr_err("ioeventfd__init() failed with error %d\n", r); 1052 goto fail; 1053 } 1054 1055 max_cpus = kvm__max_cpus(kvm); 1056 recommended_cpus = kvm__recommended_cpus(kvm); 1057 1058 if (nrcpus > max_cpus) { 1059 printf(" # Limit the number of CPUs to %d\n", max_cpus); 1060 nrcpus = max_cpus; 1061 } else if (nrcpus > recommended_cpus) { 1062 printf(" # Warning: The maximum recommended amount of VCPUs" 1063 " is %d\n", recommended_cpus); 1064 } 1065 1066 kvm->nrcpus = nrcpus; 1067 1068 /* Alloc one pointer too many, so array ends up 0-terminated */ 1069 kvm_cpus = calloc(nrcpus + 1, sizeof(void *)); 1070 if (!kvm_cpus) 1071 die("Couldn't allocate array for %d CPUs", nrcpus); 1072 1073 r = irq__init(kvm); 1074 if (r < 0) { 1075 pr_err("irq__init() failed with error %d\n", r); 1076 goto fail; 1077 } 1078 1079 r = pci__init(kvm); 1080 if (r < 0) { 1081 pr_err("pci__init() failed with error %d\n", r); 1082 goto fail; 1083 } 1084 1085 r = ioport__init(kvm); 1086 if (r < 0) { 1087 pr_err("ioport__init() failed with error %d\n", r); 1088 goto fail; 1089 } 1090 1091 /* 1092 * vidmode should be either specified 1093 * either set by default 1094 */ 1095 if (vnc || sdl) { 1096 if (vidmode == -1) 1097 vidmode = 0x312; 1098 } else { 1099 vidmode = 0; 1100 } 1101 1102 memset(real_cmdline, 0, sizeof(real_cmdline)); 1103 kvm__arch_set_cmdline(real_cmdline, vnc || sdl); 1104 1105 if (strlen(real_cmdline) > 0) 1106 strcat(real_cmdline, " "); 1107 1108 if (kernel_cmdline) 1109 strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline)); 1110 1111 if (!using_rootfs && !disk_image[0].filename && !initrd_filename) { 1112 char tmp[PATH_MAX]; 1113 1114 kvm_setup_create_new(custom_rootfs_name); 1115 kvm_setup_resolv(custom_rootfs_name); 1116 1117 snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); 1118 if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) 1119 die("Unable to initialize virtio 9p"); 1120 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 1121 die("Unable to initialize virtio 9p"); 1122 using_rootfs = custom_rootfs = 1; 1123 } 1124 1125 if (using_rootfs) { 1126 strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); 1127 if (custom_rootfs) { 1128 kvm_run_set_sandbox(); 1129 1130 strcat(real_cmdline, " init=/virt/init"); 1131 1132 if (!no_dhcp) 1133 strcat(real_cmdline, " ip=dhcp"); 1134 if (kvm_custom_stage2()) 1135 die("Failed linking stage 2 of init."); 1136 } 1137 } else if (!strstr(real_cmdline, "root=")) { 1138 strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); 1139 } 1140 1141 if (image_count) { 1142 kvm->nr_disks = image_count; 1143 kvm->disks = disk_image__open_all((struct disk_image_params *)&disk_image, image_count); 1144 if (IS_ERR(kvm->disks)) { 1145 r = PTR_ERR(kvm->disks); 1146 pr_err("disk_image__open_all() failed with error %ld\n", 1147 PTR_ERR(kvm->disks)); 1148 goto fail; 1149 } 1150 } 1151 1152 printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, 1153 kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name); 1154 1155 if (!firmware_filename) { 1156 if (!kvm__load_kernel(kvm, kernel_filename, 1157 initrd_filename, real_cmdline, vidmode)) 1158 die("unable to load kernel %s", kernel_filename); 1159 1160 kvm->vmlinux = vmlinux_filename; 1161 r = symbol_init(kvm); 1162 if (r < 0) 1163 pr_debug("symbol_init() failed with error %d\n", r); 1164 } 1165 1166 ioport__setup_arch(); 1167 1168 r = rtc__init(kvm); 1169 if (r < 0) { 1170 pr_err("rtc__init() failed with error %d\n", r); 1171 goto fail; 1172 } 1173 1174 r = serial8250__init(kvm); 1175 if (r < 0) { 1176 pr_err("serial__init() failed with error %d\n", r); 1177 goto fail; 1178 } 1179 1180 r = virtio_blk__init(kvm); 1181 if (r < 0) { 1182 pr_err("virtio_blk__init() failed with error %d\n", r); 1183 goto fail; 1184 } 1185 1186 if (active_console == CONSOLE_VIRTIO) 1187 virtio_console__init(kvm); 1188 1189 if (virtio_rng) 1190 virtio_rng__init(kvm); 1191 1192 if (balloon) 1193 virtio_bln__init(kvm); 1194 1195 if (!network) 1196 network = DEFAULT_NETWORK; 1197 1198 virtio_9p__init(kvm); 1199 1200 for (i = 0; i < num_net_devices; i++) { 1201 net_params[i].kvm = kvm; 1202 virtio_net__init(&net_params[i]); 1203 } 1204 1205 if (num_net_devices == 0 && no_net == 0) { 1206 struct virtio_net_params net_params; 1207 1208 net_params = (struct virtio_net_params) { 1209 .guest_ip = guest_ip, 1210 .host_ip = host_ip, 1211 .kvm = kvm, 1212 .script = script, 1213 .mode = NET_MODE_USER, 1214 }; 1215 str_to_mac(guest_mac, net_params.guest_mac); 1216 str_to_mac(host_mac, net_params.host_mac); 1217 1218 virtio_net__init(&net_params); 1219 } 1220 1221 kvm__init_ram(kvm); 1222 1223 #ifdef CONFIG_X86 1224 kbd__init(kvm); 1225 #endif 1226 1227 pci_shmem__init(kvm); 1228 1229 if (vnc || sdl) { 1230 fb = vesa__init(kvm); 1231 if (IS_ERR(fb)) { 1232 pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb)); 1233 goto fail; 1234 } 1235 } 1236 1237 if (vnc && fb) { 1238 r = vnc__init(fb); 1239 if (r < 0) { 1240 pr_err("vnc__init() failed with error %d\n", r); 1241 goto fail; 1242 } 1243 } 1244 1245 if (sdl && fb) { 1246 sdl__init(fb); 1247 if (r < 0) { 1248 pr_err("sdl__init() failed with error %d\n", r); 1249 goto fail; 1250 } 1251 } 1252 1253 r = fb__start(); 1254 if (r < 0) { 1255 pr_err("fb__init() failed with error %d\n", r); 1256 goto fail; 1257 } 1258 1259 /* Device init all done; firmware init must 1260 * come after this (it may set up device trees etc.) 1261 */ 1262 1263 kvm__start_timer(kvm); 1264 1265 if (firmware_filename) { 1266 if (!kvm__load_firmware(kvm, firmware_filename)) 1267 die("unable to load firmware image %s: %s", firmware_filename, strerror(errno)); 1268 } else { 1269 kvm__arch_setup_firmware(kvm); 1270 if (r < 0) { 1271 pr_err("kvm__arch_setup_firmware() failed with error %d\n", r); 1272 goto fail; 1273 } 1274 } 1275 1276 for (i = 0; i < nrcpus; i++) { 1277 kvm_cpus[i] = kvm_cpu__init(kvm, i); 1278 if (!kvm_cpus[i]) 1279 die("unable to initialize KVM VCPU"); 1280 } 1281 1282 thread_pool__init(nr_online_cpus); 1283 fail: 1284 return r; 1285 } 1286 1287 static int kvm_cmd_run_work(void) 1288 { 1289 int i, r = -1; 1290 void *ret = NULL; 1291 1292 for (i = 0; i < nrcpus; i++) { 1293 if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0) 1294 die("unable to create KVM VCPU thread"); 1295 } 1296 1297 /* Only VCPU #0 is going to exit by itself when shutting down */ 1298 if (pthread_join(kvm_cpus[0]->thread, &ret) != 0) 1299 r = 0; 1300 1301 kvm_cpu__delete(kvm_cpus[0]); 1302 kvm_cpus[0] = NULL; 1303 1304 for (i = 1; i < nrcpus; i++) { 1305 if (kvm_cpus[i]->is_running) { 1306 pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); 1307 if (pthread_join(kvm_cpus[i]->thread, &ret) != 0) 1308 die("pthread_join"); 1309 kvm_cpu__delete(kvm_cpus[i]); 1310 } 1311 if (ret == NULL) 1312 r = 0; 1313 } 1314 1315 return r; 1316 } 1317 1318 static void kvm_cmd_run_exit(int guest_ret) 1319 { 1320 int r = 0; 1321 1322 compat__print_all_messages(); 1323 1324 r = symbol_exit(kvm); 1325 if (r < 0) 1326 pr_warning("symbol_exit() failed with error %d\n", r); 1327 1328 r = irq__exit(kvm); 1329 if (r < 0) 1330 pr_warning("irq__exit() failed with error %d\n", r); 1331 1332 fb__stop(); 1333 1334 r = virtio_blk__exit(kvm); 1335 if (r < 0) 1336 pr_warning("virtio_blk__exit() failed with error %d\n", r); 1337 1338 r = virtio_rng__exit(kvm); 1339 if (r < 0) 1340 pr_warning("virtio_rng__exit() failed with error %d\n", r); 1341 1342 r = disk_image__close_all(kvm->disks, image_count); 1343 if (r < 0) 1344 pr_warning("disk_image__close_all() failed with error %d\n", r); 1345 1346 r = serial8250__exit(kvm); 1347 if (r < 0) 1348 pr_warning("serial8250__exit() failed with error %d\n", r); 1349 1350 r = rtc__exit(kvm); 1351 if (r < 0) 1352 pr_warning("rtc__exit() failed with error %d\n", r); 1353 1354 r = kvm__arch_free_firmware(kvm); 1355 if (r < 0) 1356 pr_warning("kvm__arch_free_firmware() failed with error %d\n", r); 1357 1358 r = ioport__exit(kvm); 1359 if (r < 0) 1360 pr_warning("ioport__exit() failed with error %d\n", r); 1361 1362 r = ioeventfd__exit(kvm); 1363 if (r < 0) 1364 pr_warning("ioeventfd__exit() failed with error %d\n", r); 1365 1366 r = pci__exit(kvm); 1367 if (r < 0) 1368 pr_warning("pci__exit() failed with error %d\n", r); 1369 1370 r = kvm__exit(kvm); 1371 if (r < 0) 1372 pr_warning("pci__exit() failed with error %d\n", r); 1373 1374 free(kvm_cpus); 1375 1376 if (guest_ret == 0) 1377 printf("\n # KVM session ended normally.\n"); 1378 } 1379 1380 int kvm_cmd_run(int argc, const char **argv, const char *prefix) 1381 { 1382 int r, ret = -EFAULT; 1383 1384 r = kvm_cmd_run_init(argc, argv); 1385 if (r < 0) 1386 return r; 1387 1388 ret = kvm_cmd_run_work(); 1389 kvm_cmd_run_exit(ret); 1390 1391 return ret; 1392 } 1393