1 #include "kvm/builtin-run.h" 2 3 #include "kvm/builtin-setup.h" 4 #include "kvm/virtio-balloon.h" 5 #include "kvm/virtio-console.h" 6 #include "kvm/parse-options.h" 7 #include "kvm/8250-serial.h" 8 #include "kvm/framebuffer.h" 9 #include "kvm/disk-image.h" 10 #include "kvm/threadpool.h" 11 #include "kvm/virtio-blk.h" 12 #include "kvm/virtio-net.h" 13 #include "kvm/virtio-rng.h" 14 #include "kvm/ioeventfd.h" 15 #include "kvm/virtio-9p.h" 16 #include "kvm/barrier.h" 17 #include "kvm/kvm-cpu.h" 18 #include "kvm/ioport.h" 19 #include "kvm/symbol.h" 20 #include "kvm/i8042.h" 21 #include "kvm/mutex.h" 22 #include "kvm/term.h" 23 #include "kvm/util.h" 24 #include "kvm/strbuf.h" 25 #include "kvm/vesa.h" 26 #include "kvm/irq.h" 27 #include "kvm/kvm.h" 28 #include "kvm/pci.h" 29 #include "kvm/rtc.h" 30 #include "kvm/sdl.h" 31 #include "kvm/vnc.h" 32 #include "kvm/guest_compat.h" 33 #include "kvm/pci-shmem.h" 34 #include "kvm/kvm-ipc.h" 35 #include "kvm/builtin-debug.h" 36 37 #include <linux/types.h> 38 #include <linux/err.h> 39 40 #include <sys/utsname.h> 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <termios.h> 44 #include <signal.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <ctype.h> 49 #include <stdio.h> 50 51 #define DEFAULT_KVM_DEV "/dev/kvm" 52 #define DEFAULT_CONSOLE "serial" 53 #define DEFAULT_NETWORK "user" 54 #define DEFAULT_HOST_ADDR "192.168.33.1" 55 #define DEFAULT_GUEST_ADDR "192.168.33.15" 56 #define DEFAULT_GUEST_MAC "02:15:15:15:15:15" 57 #define DEFAULT_HOST_MAC "02:01:01:01:01:01" 58 #define DEFAULT_SCRIPT "none" 59 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh"; 60 61 #define MB_SHIFT (20) 62 #define KB_SHIFT (10) 63 #define GB_SHIFT (30) 64 #define MIN_RAM_SIZE_MB (64ULL) 65 #define MIN_RAM_SIZE_BYTE (MIN_RAM_SIZE_MB << MB_SHIFT) 66 67 struct kvm *kvm; 68 struct kvm_cpu **kvm_cpus; 69 __thread struct kvm_cpu *current_kvm_cpu; 70 71 static u64 ram_size; 72 static u8 image_count; 73 static u8 num_net_devices; 74 static bool virtio_rng; 75 static const char *kernel_cmdline; 76 static const char *kernel_filename; 77 static const char *vmlinux_filename; 78 static const char *initrd_filename; 79 static const char *image_filename[MAX_DISK_IMAGES]; 80 static const char *console; 81 static const char *dev; 82 static const char *network; 83 static const char *host_ip; 84 static const char *guest_ip; 85 static const char *guest_mac; 86 static const char *host_mac; 87 static const char *script; 88 static const char *guest_name; 89 static const char *sandbox; 90 static const char *hugetlbfs_path; 91 static const char *custom_rootfs_name = "default"; 92 static struct virtio_net_params *net_params; 93 static bool single_step; 94 static bool readonly_image[MAX_DISK_IMAGES]; 95 static bool vnc; 96 static bool sdl; 97 static bool balloon; 98 static bool using_rootfs; 99 static bool custom_rootfs; 100 static bool no_net; 101 static bool no_dhcp; 102 extern bool ioport_debug; 103 static int kvm_run_wrapper; 104 extern int active_console; 105 extern int debug_iodelay; 106 107 bool do_debug_print = false; 108 109 static int nrcpus; 110 static int vidmode = -1; 111 112 static const char * const run_usage[] = { 113 "lkvm run [<options>] [<kernel image>]", 114 NULL 115 }; 116 117 enum { 118 KVM_RUN_DEFAULT, 119 KVM_RUN_SANDBOX, 120 }; 121 122 void kvm_run_set_wrapper_sandbox(void) 123 { 124 kvm_run_wrapper = KVM_RUN_SANDBOX; 125 } 126 127 static int img_name_parser(const struct option *opt, const char *arg, int unset) 128 { 129 char *sep; 130 struct stat st; 131 char path[PATH_MAX]; 132 133 if (stat(arg, &st) == 0 && 134 S_ISDIR(st.st_mode)) { 135 char tmp[PATH_MAX]; 136 137 if (using_rootfs) 138 die("Please use only one rootfs directory atmost"); 139 140 if (realpath(arg, tmp) == 0 || 141 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 142 die("Unable to initialize virtio 9p"); 143 using_rootfs = 1; 144 return 0; 145 } 146 147 snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); 148 149 if (stat(path, &st) == 0 && 150 S_ISDIR(st.st_mode)) { 151 char tmp[PATH_MAX]; 152 153 if (using_rootfs) 154 die("Please use only one rootfs directory atmost"); 155 156 if (realpath(path, tmp) == 0 || 157 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 158 die("Unable to initialize virtio 9p"); 159 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 160 die("Unable to initialize virtio 9p"); 161 kvm_setup_resolv(arg); 162 using_rootfs = custom_rootfs = 1; 163 custom_rootfs_name = arg; 164 return 0; 165 } 166 167 if (image_count >= MAX_DISK_IMAGES) 168 die("Currently only 4 images are supported"); 169 170 image_filename[image_count] = arg; 171 sep = strstr(arg, ","); 172 if (sep) { 173 if (strcmp(sep + 1, "ro") == 0) 174 readonly_image[image_count] = 1; 175 *sep = 0; 176 } 177 178 image_count++; 179 180 return 0; 181 } 182 183 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) 184 { 185 char *tag_name; 186 char tmp[PATH_MAX]; 187 188 /* 189 * 9p dir can be of the form dirname,tag_name or 190 * just dirname. In the later case we use the 191 * default tag name 192 */ 193 tag_name = strstr(arg, ","); 194 if (tag_name) { 195 *tag_name = '\0'; 196 tag_name++; 197 } 198 if (realpath(arg, tmp)) { 199 if (virtio_9p__register(kvm, tmp, tag_name) < 0) 200 die("Unable to initialize virtio 9p"); 201 } else 202 die("Failed resolving 9p path"); 203 return 0; 204 } 205 206 static int tty_parser(const struct option *opt, const char *arg, int unset) 207 { 208 int tty = atoi(arg); 209 210 term_set_tty(tty); 211 212 return 0; 213 } 214 215 static inline void str_to_mac(const char *str, char *mac) 216 { 217 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 218 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 219 } 220 static int set_net_param(struct virtio_net_params *p, const char *param, 221 const char *val) 222 { 223 if (strcmp(param, "guest_mac") == 0) { 224 str_to_mac(val, p->guest_mac); 225 } else if (strcmp(param, "mode") == 0) { 226 if (!strncmp(val, "user", 4)) { 227 int i; 228 229 for (i = 0; i < num_net_devices; i++) 230 if (net_params[i].mode == NET_MODE_USER) 231 die("Only one usermode network device allowed at a time"); 232 p->mode = NET_MODE_USER; 233 } else if (!strncmp(val, "tap", 3)) { 234 p->mode = NET_MODE_TAP; 235 } else if (!strncmp(val, "none", 4)) { 236 no_net = 1; 237 return -1; 238 } else 239 die("Unkown network mode %s, please use user, tap or none", network); 240 } else if (strcmp(param, "script") == 0) { 241 p->script = strdup(val); 242 } else if (strcmp(param, "guest_ip") == 0) { 243 p->guest_ip = strdup(val); 244 } else if (strcmp(param, "host_ip") == 0) { 245 p->host_ip = strdup(val); 246 } else if (strcmp(param, "vhost") == 0) { 247 p->vhost = atoi(val); 248 } else if (strcmp(param, "fd") == 0) { 249 p->fd = atoi(val); 250 } 251 252 return 0; 253 } 254 255 static int netdev_parser(const struct option *opt, const char *arg, int unset) 256 { 257 struct virtio_net_params p; 258 char *buf = NULL, *cmd = NULL, *cur = NULL; 259 bool on_cmd = true; 260 261 if (arg) { 262 buf = strdup(arg); 263 if (buf == NULL) 264 die("Failed allocating new net buffer"); 265 cur = strtok(buf, ",="); 266 } 267 268 p = (struct virtio_net_params) { 269 .guest_ip = DEFAULT_GUEST_ADDR, 270 .host_ip = DEFAULT_HOST_ADDR, 271 .script = DEFAULT_SCRIPT, 272 .mode = NET_MODE_TAP, 273 }; 274 275 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 276 p.guest_mac[5] += num_net_devices; 277 278 while (cur) { 279 if (on_cmd) { 280 cmd = cur; 281 } else { 282 if (set_net_param(&p, cmd, cur) < 0) 283 goto done; 284 } 285 on_cmd = !on_cmd; 286 287 cur = strtok(NULL, ",="); 288 }; 289 290 num_net_devices++; 291 292 net_params = realloc(net_params, num_net_devices * sizeof(*net_params)); 293 if (net_params == NULL) 294 die("Failed adding new network device"); 295 296 net_params[num_net_devices - 1] = p; 297 298 done: 299 free(buf); 300 return 0; 301 } 302 303 static int shmem_parser(const struct option *opt, const char *arg, int unset) 304 { 305 const u64 default_size = SHMEM_DEFAULT_SIZE; 306 const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; 307 const char *default_handle = SHMEM_DEFAULT_HANDLE; 308 struct shmem_info *si = malloc(sizeof(struct shmem_info)); 309 u64 phys_addr; 310 u64 size; 311 char *handle = NULL; 312 int create = 0; 313 const char *p = arg; 314 char *next; 315 int base = 10; 316 int verbose = 0; 317 318 const int skip_pci = strlen("pci:"); 319 if (verbose) 320 pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); 321 /* parse out optional addr family */ 322 if (strcasestr(p, "pci:")) { 323 p += skip_pci; 324 } else if (strcasestr(p, "mem:")) { 325 die("I can't add to E820 map yet.\n"); 326 } 327 /* parse out physical addr */ 328 base = 10; 329 if (strcasestr(p, "0x")) 330 base = 16; 331 phys_addr = strtoll(p, &next, base); 332 if (next == p && phys_addr == 0) { 333 pr_info("shmem: no physical addr specified, using default."); 334 phys_addr = default_phys_addr; 335 } 336 if (*next != ':' && *next != '\0') 337 die("shmem: unexpected chars after phys addr.\n"); 338 if (*next == '\0') 339 p = next; 340 else 341 p = next + 1; 342 /* parse out size */ 343 base = 10; 344 if (strcasestr(p, "0x")) 345 base = 16; 346 size = strtoll(p, &next, base); 347 if (next == p && size == 0) { 348 pr_info("shmem: no size specified, using default."); 349 size = default_size; 350 } 351 /* look for [KMGkmg][Bb]* uses base 2. */ 352 int skip_B = 0; 353 if (strspn(next, "KMGkmg")) { /* might have a prefix */ 354 if (*(next + 1) == 'B' || *(next + 1) == 'b') 355 skip_B = 1; 356 switch (*next) { 357 case 'K': 358 case 'k': 359 size = size << KB_SHIFT; 360 break; 361 case 'M': 362 case 'm': 363 size = size << MB_SHIFT; 364 break; 365 case 'G': 366 case 'g': 367 size = size << GB_SHIFT; 368 break; 369 default: 370 die("shmem: bug in detecting size prefix."); 371 break; 372 } 373 next += 1 + skip_B; 374 } 375 if (*next != ':' && *next != '\0') { 376 die("shmem: unexpected chars after phys size. <%c><%c>\n", 377 *next, *p); 378 } 379 if (*next == '\0') 380 p = next; 381 else 382 p = next + 1; 383 /* parse out optional shmem handle */ 384 const int skip_handle = strlen("handle="); 385 next = strcasestr(p, "handle="); 386 if (*p && next) { 387 if (p != next) 388 die("unexpected chars before handle\n"); 389 p += skip_handle; 390 next = strchrnul(p, ':'); 391 if (next - p) { 392 handle = malloc(next - p + 1); 393 strncpy(handle, p, next - p); 394 handle[next - p] = '\0'; /* just in case. */ 395 } 396 if (*next == '\0') 397 p = next; 398 else 399 p = next + 1; 400 } 401 /* parse optional create flag to see if we should create shm seg. */ 402 if (*p && strcasestr(p, "create")) { 403 create = 1; 404 p += strlen("create"); 405 } 406 if (*p != '\0') 407 die("shmem: unexpected trailing chars\n"); 408 if (handle == NULL) { 409 handle = malloc(strlen(default_handle) + 1); 410 strcpy(handle, default_handle); 411 } 412 if (verbose) { 413 pr_info("shmem: phys_addr = %llx", phys_addr); 414 pr_info("shmem: size = %llx", size); 415 pr_info("shmem: handle = %s", handle); 416 pr_info("shmem: create = %d", create); 417 } 418 419 si->phys_addr = phys_addr; 420 si->size = size; 421 si->handle = handle; 422 si->create = create; 423 pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ 424 return 0; 425 } 426 427 static const struct option options[] = { 428 OPT_GROUP("Basic options:"), 429 OPT_STRING('\0', "name", &guest_name, "guest name", 430 "A name for the guest"), 431 OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"), 432 OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."), 433 OPT_CALLBACK('\0', "shmem", NULL, 434 "[pci:]<addr>:<size>[:handle=<handle>][:create]", 435 "Share host shmem with guest via pci device", 436 shmem_parser), 437 OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser), 438 OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"), 439 OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"), 440 OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"), 441 OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"), 442 OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", 443 "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser), 444 OPT_STRING('\0', "console", &console, "serial, virtio or hv", 445 "Console to use"), 446 OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"), 447 OPT_CALLBACK('\0', "tty", NULL, "tty id", 448 "Remap guest TTY into a pty on the host", 449 tty_parser), 450 OPT_STRING('\0', "sandbox", &sandbox, "script", 451 "Run this script when booting into custom rootfs"), 452 OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"), 453 454 OPT_GROUP("Kernel options:"), 455 OPT_STRING('k', "kernel", &kernel_filename, "kernel", 456 "Kernel to boot in virtual machine"), 457 OPT_STRING('i', "initrd", &initrd_filename, "initrd", 458 "Initial RAM disk image"), 459 OPT_STRING('p', "params", &kernel_cmdline, "params", 460 "Kernel command line arguments"), 461 462 OPT_GROUP("Networking options:"), 463 OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", 464 "Create a new guest NIC", 465 netdev_parser, NULL), 466 OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"), 467 468 OPT_GROUP("BIOS options:"), 469 OPT_INTEGER('\0', "vidmode", &vidmode, 470 "Video mode"), 471 472 OPT_GROUP("Debug options:"), 473 OPT_BOOLEAN('\0', "debug", &do_debug_print, 474 "Enable debug messages"), 475 OPT_BOOLEAN('\0', "debug-single-step", &single_step, 476 "Enable single stepping"), 477 OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug, 478 "Enable ioport debugging"), 479 OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay, 480 "Delay IO by millisecond"), 481 OPT_END() 482 }; 483 484 /* 485 * Serialize debug printout so that the output of multiple vcpus does not 486 * get mixed up: 487 */ 488 static int printout_done; 489 490 static void handle_sigusr1(int sig) 491 { 492 struct kvm_cpu *cpu = current_kvm_cpu; 493 int fd = kvm_cpu__get_debug_fd(); 494 495 if (!cpu || cpu->needs_nmi) 496 return; 497 498 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 499 kvm_cpu__show_registers(cpu); 500 kvm_cpu__show_code(cpu); 501 kvm_cpu__show_page_tables(cpu); 502 fflush(stdout); 503 printout_done = 1; 504 mb(); 505 } 506 507 /* Pause/resume the guest using SIGUSR2 */ 508 static int is_paused; 509 510 static void handle_pause(int fd, u32 type, u32 len, u8 *msg) 511 { 512 if (WARN_ON(len)) 513 return; 514 515 if (type == KVM_IPC_RESUME && is_paused) { 516 kvm->vm_state = KVM_VMSTATE_RUNNING; 517 kvm__continue(); 518 } else if (type == KVM_IPC_PAUSE && !is_paused) { 519 kvm->vm_state = KVM_VMSTATE_PAUSED; 520 kvm__pause(); 521 } else { 522 return; 523 } 524 525 is_paused = !is_paused; 526 } 527 528 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) 529 { 530 int r = 0; 531 532 if (type == KVM_IPC_VMSTATE) 533 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 534 535 if (r < 0) 536 pr_warning("Failed sending VMSTATE"); 537 } 538 539 static void handle_debug(int fd, u32 type, u32 len, u8 *msg) 540 { 541 int i; 542 struct debug_cmd_params *params; 543 u32 dbg_type; 544 u32 vcpu; 545 546 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 547 return; 548 549 params = (void *)msg; 550 dbg_type = params->dbg_type; 551 vcpu = params->cpu; 552 553 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 554 if ((int)vcpu >= kvm->nrcpus) 555 return; 556 557 kvm_cpus[vcpu]->needs_nmi = 1; 558 pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1); 559 } 560 561 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 562 return; 563 564 for (i = 0; i < nrcpus; i++) { 565 struct kvm_cpu *cpu = kvm_cpus[i]; 566 567 if (!cpu) 568 continue; 569 570 printout_done = 0; 571 572 kvm_cpu__set_debug_fd(fd); 573 pthread_kill(cpu->thread, SIGUSR1); 574 /* 575 * Wait for the vCPU to dump state before signalling 576 * the next thread. Since this is debug code it does 577 * not matter that we are burning CPU time a bit: 578 */ 579 while (!printout_done) 580 mb(); 581 } 582 583 close(fd); 584 585 serial8250__inject_sysrq(kvm); 586 } 587 588 static void handle_sigalrm(int sig) 589 { 590 kvm__arch_periodic_poll(kvm); 591 } 592 593 static void handle_stop(int fd, u32 type, u32 len, u8 *msg) 594 { 595 if (WARN_ON(type != KVM_IPC_STOP || len)) 596 return; 597 598 kvm_cpu__reboot(); 599 } 600 601 static void *kvm_cpu_thread(void *arg) 602 { 603 current_kvm_cpu = arg; 604 605 if (kvm_cpu__start(current_kvm_cpu)) 606 goto panic_kvm; 607 608 return (void *) (intptr_t) 0; 609 610 panic_kvm: 611 fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", 612 current_kvm_cpu->kvm_run->exit_reason, 613 kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); 614 if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) 615 fprintf(stderr, "KVM exit code: 0x%Lu\n", 616 current_kvm_cpu->kvm_run->hw.hardware_exit_reason); 617 618 kvm_cpu__set_debug_fd(STDOUT_FILENO); 619 kvm_cpu__show_registers(current_kvm_cpu); 620 kvm_cpu__show_code(current_kvm_cpu); 621 kvm_cpu__show_page_tables(current_kvm_cpu); 622 623 return (void *) (intptr_t) 1; 624 } 625 626 static char kernel[PATH_MAX]; 627 628 static const char *host_kernels[] = { 629 "/boot/vmlinuz", 630 "/boot/bzImage", 631 NULL 632 }; 633 634 static const char *default_kernels[] = { 635 "./bzImage", 636 "arch/" BUILD_ARCH "/boot/bzImage", 637 "../../arch/" BUILD_ARCH "/boot/bzImage", 638 NULL 639 }; 640 641 static const char *default_vmlinux[] = { 642 "vmlinux", 643 "../../../vmlinux", 644 "../../vmlinux", 645 NULL 646 }; 647 648 static void kernel_usage_with_options(void) 649 { 650 const char **k; 651 struct utsname uts; 652 653 fprintf(stderr, "Fatal: could not find default kernel image in:\n"); 654 k = &default_kernels[0]; 655 while (*k) { 656 fprintf(stderr, "\t%s\n", *k); 657 k++; 658 } 659 660 if (uname(&uts) < 0) 661 return; 662 663 k = &host_kernels[0]; 664 while (*k) { 665 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 666 return; 667 fprintf(stderr, "\t%s\n", kernel); 668 k++; 669 } 670 fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", 671 KVM_BINARY_NAME); 672 } 673 674 static u64 host_ram_size(void) 675 { 676 long page_size; 677 long nr_pages; 678 679 nr_pages = sysconf(_SC_PHYS_PAGES); 680 if (nr_pages < 0) { 681 pr_warning("sysconf(_SC_PHYS_PAGES) failed"); 682 return 0; 683 } 684 685 page_size = sysconf(_SC_PAGE_SIZE); 686 if (page_size < 0) { 687 pr_warning("sysconf(_SC_PAGE_SIZE) failed"); 688 return 0; 689 } 690 691 return (nr_pages * page_size) >> MB_SHIFT; 692 } 693 694 /* 695 * If user didn't specify how much memory it wants to allocate for the guest, 696 * avoid filling the whole host RAM. 697 */ 698 #define RAM_SIZE_RATIO 0.8 699 700 static u64 get_ram_size(int nr_cpus) 701 { 702 u64 available; 703 u64 ram_size; 704 705 ram_size = 64 * (nr_cpus + 3); 706 707 available = host_ram_size() * RAM_SIZE_RATIO; 708 if (!available) 709 available = MIN_RAM_SIZE_MB; 710 711 if (ram_size > available) 712 ram_size = available; 713 714 return ram_size; 715 } 716 717 static const char *find_kernel(void) 718 { 719 const char **k; 720 struct stat st; 721 struct utsname uts; 722 723 k = &default_kernels[0]; 724 while (*k) { 725 if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { 726 k++; 727 continue; 728 } 729 strncpy(kernel, *k, PATH_MAX); 730 return kernel; 731 } 732 733 if (uname(&uts) < 0) 734 return NULL; 735 736 k = &host_kernels[0]; 737 while (*k) { 738 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 739 return NULL; 740 741 if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { 742 k++; 743 continue; 744 } 745 return kernel; 746 747 } 748 return NULL; 749 } 750 751 static const char *find_vmlinux(void) 752 { 753 const char **vmlinux; 754 755 vmlinux = &default_vmlinux[0]; 756 while (*vmlinux) { 757 struct stat st; 758 759 if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { 760 vmlinux++; 761 continue; 762 } 763 return *vmlinux; 764 } 765 return NULL; 766 } 767 768 void kvm_run_help(void) 769 { 770 usage_with_options(run_usage, options); 771 } 772 773 static int kvm_custom_stage2(void) 774 { 775 char tmp[PATH_MAX], dst[PATH_MAX], *src; 776 const char *rootfs = custom_rootfs_name; 777 int r; 778 779 src = realpath("guest/init_stage2", NULL); 780 if (src == NULL) 781 return -ENOMEM; 782 783 snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs); 784 remove(tmp); 785 786 snprintf(dst, PATH_MAX, "/host/%s", src); 787 r = symlink(dst, tmp); 788 free(src); 789 790 return r; 791 } 792 793 static int kvm_run_set_sandbox(void) 794 { 795 const char *guestfs_name = custom_rootfs_name; 796 char path[PATH_MAX], script[PATH_MAX], *tmp; 797 798 snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); 799 800 remove(path); 801 802 if (sandbox == NULL) 803 return 0; 804 805 tmp = realpath(sandbox, NULL); 806 if (tmp == NULL) 807 return -ENOMEM; 808 809 snprintf(script, PATH_MAX, "/host/%s", tmp); 810 free(tmp); 811 812 return symlink(script, path); 813 } 814 815 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) 816 { 817 const char *single_quote; 818 819 if (!*arg) { /* zero length string */ 820 if (write(fd, "''", 2) <= 0) 821 die("Failed writing sandbox script"); 822 return; 823 } 824 825 while (*arg) { 826 single_quote = strchrnul(arg, '\''); 827 828 /* write non-single-quote string as #('string') */ 829 if (arg != single_quote) { 830 if (write(fd, "'", 1) <= 0 || 831 write(fd, arg, single_quote - arg) <= 0 || 832 write(fd, "'", 1) <= 0) 833 die("Failed writing sandbox script"); 834 } 835 836 /* write single quote as #("'") */ 837 if (*single_quote) { 838 if (write(fd, "\"'\"", 3) <= 0) 839 die("Failed writing sandbox script"); 840 } else 841 break; 842 843 arg = single_quote + 1; 844 } 845 } 846 847 static void kvm_run_write_sandbox_cmd(const char **argv, int argc) 848 { 849 const char script_hdr[] = "#! /bin/bash\n\n"; 850 int fd; 851 852 remove(sandbox); 853 854 fd = open(sandbox, O_RDWR | O_CREAT, 0777); 855 if (fd < 0) 856 die("Failed creating sandbox script"); 857 858 if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) 859 die("Failed writing sandbox script"); 860 861 while (argc) { 862 kvm_write_sandbox_cmd_exactly(fd, argv[0]); 863 if (argc - 1) 864 if (write(fd, " ", 1) <= 0) 865 die("Failed writing sandbox script"); 866 argv++; 867 argc--; 868 } 869 if (write(fd, "\n", 1) <= 0) 870 die("Failed writing sandbox script"); 871 872 close(fd); 873 } 874 875 static int kvm_cmd_run_init(int argc, const char **argv) 876 { 877 static char real_cmdline[2048], default_name[20]; 878 struct framebuffer *fb = NULL; 879 unsigned int nr_online_cpus; 880 int max_cpus, recommended_cpus; 881 int i, r; 882 883 signal(SIGALRM, handle_sigalrm); 884 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 885 signal(SIGUSR1, handle_sigusr1); 886 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 887 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 888 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 889 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 890 891 nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); 892 893 while (argc != 0) { 894 argc = parse_options(argc, argv, options, run_usage, 895 PARSE_OPT_STOP_AT_NON_OPTION | 896 PARSE_OPT_KEEP_DASHDASH); 897 if (argc != 0) { 898 /* Cusrom options, should have been handled elsewhere */ 899 if (strcmp(argv[0], "--") == 0) { 900 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 901 sandbox = DEFAULT_SANDBOX_FILENAME; 902 kvm_run_write_sandbox_cmd(argv+1, argc-1); 903 break; 904 } 905 } 906 907 if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) || 908 (kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) { 909 fprintf(stderr, "Cannot handle parameter: " 910 "%s\n", argv[0]); 911 usage_with_options(run_usage, options); 912 return EINVAL; 913 } 914 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 915 /* 916 * first unhandled parameter is treated as 917 * sandbox command 918 */ 919 sandbox = DEFAULT_SANDBOX_FILENAME; 920 kvm_run_write_sandbox_cmd(argv, argc); 921 } else { 922 /* 923 * first unhandled parameter is treated as a kernel 924 * image 925 */ 926 kernel_filename = argv[0]; 927 } 928 argv++; 929 argc--; 930 } 931 932 } 933 934 if (!kernel_filename) 935 kernel_filename = find_kernel(); 936 937 if (!kernel_filename) { 938 kernel_usage_with_options(); 939 return EINVAL; 940 } 941 942 vmlinux_filename = find_vmlinux(); 943 944 if (nrcpus == 0) 945 nrcpus = nr_online_cpus; 946 947 if (!ram_size) 948 ram_size = get_ram_size(nrcpus); 949 950 if (ram_size < MIN_RAM_SIZE_MB) 951 die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB); 952 953 if (ram_size > host_ram_size()) 954 pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size()); 955 956 ram_size <<= MB_SHIFT; 957 958 if (!dev) 959 dev = DEFAULT_KVM_DEV; 960 961 if (!console) 962 console = DEFAULT_CONSOLE; 963 964 if (!strncmp(console, "virtio", 6)) 965 active_console = CONSOLE_VIRTIO; 966 else if (!strncmp(console, "serial", 6)) 967 active_console = CONSOLE_8250; 968 else if (!strncmp(console, "hv", 2)) 969 active_console = CONSOLE_HV; 970 else 971 pr_warning("No console!"); 972 973 if (!host_ip) 974 host_ip = DEFAULT_HOST_ADDR; 975 976 if (!guest_ip) 977 guest_ip = DEFAULT_GUEST_ADDR; 978 979 if (!guest_mac) 980 guest_mac = DEFAULT_GUEST_MAC; 981 982 if (!host_mac) 983 host_mac = DEFAULT_HOST_MAC; 984 985 if (!script) 986 script = DEFAULT_SCRIPT; 987 988 term_init(); 989 990 if (!guest_name) { 991 if (custom_rootfs) { 992 guest_name = custom_rootfs_name; 993 } else { 994 sprintf(default_name, "guest-%u", getpid()); 995 guest_name = default_name; 996 } 997 } 998 999 kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name); 1000 1001 kvm->single_step = single_step; 1002 1003 r = ioeventfd__init(kvm); 1004 if (r < 0) { 1005 pr_err("ioeventfd__init() failed with error %d\n", r); 1006 goto fail; 1007 } 1008 1009 max_cpus = kvm__max_cpus(kvm); 1010 recommended_cpus = kvm__recommended_cpus(kvm); 1011 1012 if (nrcpus > max_cpus) { 1013 printf(" # Limit the number of CPUs to %d\n", max_cpus); 1014 nrcpus = max_cpus; 1015 } else if (nrcpus > recommended_cpus) { 1016 printf(" # Warning: The maximum recommended amount of VCPUs" 1017 " is %d\n", recommended_cpus); 1018 } 1019 1020 kvm->nrcpus = nrcpus; 1021 1022 /* Alloc one pointer too many, so array ends up 0-terminated */ 1023 kvm_cpus = calloc(nrcpus + 1, sizeof(void *)); 1024 if (!kvm_cpus) 1025 die("Couldn't allocate array for %d CPUs", nrcpus); 1026 1027 r = irq__init(kvm); 1028 if (r < 0) { 1029 pr_err("irq__init() failed with error %d\n", r); 1030 goto fail; 1031 } 1032 1033 pci__init(); 1034 1035 r = ioport__init(kvm); 1036 if (r < 0) { 1037 pr_err("ioport__init() failed with error %d\n", r); 1038 goto fail; 1039 } 1040 1041 /* 1042 * vidmode should be either specified 1043 * either set by default 1044 */ 1045 if (vnc || sdl) { 1046 if (vidmode == -1) 1047 vidmode = 0x312; 1048 } else { 1049 vidmode = 0; 1050 } 1051 1052 memset(real_cmdline, 0, sizeof(real_cmdline)); 1053 kvm__arch_set_cmdline(real_cmdline, vnc || sdl); 1054 1055 if (strlen(real_cmdline) > 0) 1056 strcat(real_cmdline, " "); 1057 1058 if (kernel_cmdline) 1059 strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline)); 1060 1061 if (!using_rootfs && !image_filename[0] && !initrd_filename) { 1062 char tmp[PATH_MAX]; 1063 1064 kvm_setup_create_new(custom_rootfs_name); 1065 kvm_setup_resolv(custom_rootfs_name); 1066 1067 snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); 1068 if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) 1069 die("Unable to initialize virtio 9p"); 1070 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 1071 die("Unable to initialize virtio 9p"); 1072 using_rootfs = custom_rootfs = 1; 1073 } 1074 1075 if (using_rootfs) { 1076 strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); 1077 if (custom_rootfs) { 1078 kvm_run_set_sandbox(); 1079 1080 strcat(real_cmdline, " init=/virt/init"); 1081 1082 if (!no_dhcp) 1083 strcat(real_cmdline, " ip=dhcp"); 1084 if (kvm_custom_stage2()) 1085 die("Failed linking stage 2 of init."); 1086 } 1087 } else if (!strstr(real_cmdline, "root=")) { 1088 strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); 1089 } 1090 1091 if (image_count) { 1092 kvm->nr_disks = image_count; 1093 kvm->disks = disk_image__open_all(image_filename, readonly_image, image_count); 1094 if (!kvm->disks) 1095 die("Unable to load all disk images."); 1096 1097 virtio_blk__init_all(kvm); 1098 } 1099 1100 printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, 1101 kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name); 1102 1103 if (!kvm__load_kernel(kvm, kernel_filename, initrd_filename, 1104 real_cmdline, vidmode)) 1105 die("unable to load kernel %s", kernel_filename); 1106 1107 kvm->vmlinux = vmlinux_filename; 1108 r = symbol__init(kvm); 1109 if (r < 0) { 1110 pr_err("symbol__init() failed with error %d\n", r); 1111 goto fail; 1112 } 1113 1114 ioport__setup_arch(); 1115 1116 rtc__init(); 1117 1118 r = serial8250__init(kvm); 1119 if (r < 0) { 1120 pr_err("serial__init() failed with error %d\n", r); 1121 goto fail; 1122 } 1123 1124 if (active_console == CONSOLE_VIRTIO) 1125 virtio_console__init(kvm); 1126 1127 if (virtio_rng) 1128 virtio_rng__init(kvm); 1129 1130 if (balloon) 1131 virtio_bln__init(kvm); 1132 1133 if (!network) 1134 network = DEFAULT_NETWORK; 1135 1136 virtio_9p__init(kvm); 1137 1138 for (i = 0; i < num_net_devices; i++) { 1139 net_params[i].kvm = kvm; 1140 virtio_net__init(&net_params[i]); 1141 } 1142 1143 if (num_net_devices == 0 && no_net == 0) { 1144 struct virtio_net_params net_params; 1145 1146 net_params = (struct virtio_net_params) { 1147 .guest_ip = guest_ip, 1148 .host_ip = host_ip, 1149 .kvm = kvm, 1150 .script = script, 1151 .mode = NET_MODE_USER, 1152 }; 1153 str_to_mac(guest_mac, net_params.guest_mac); 1154 str_to_mac(host_mac, net_params.host_mac); 1155 1156 virtio_net__init(&net_params); 1157 } 1158 1159 kvm__init_ram(kvm); 1160 1161 #ifdef CONFIG_X86 1162 kbd__init(kvm); 1163 #endif 1164 1165 pci_shmem__init(kvm); 1166 1167 if (vnc || sdl) { 1168 fb = vesa__init(kvm); 1169 if (IS_ERR(fb)) { 1170 pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb)); 1171 goto fail; 1172 } 1173 } 1174 1175 if (vnc && fb) { 1176 r = vnc__init(fb); 1177 if (r < 0) { 1178 pr_err("vnc__init() failed with error %d\n", r); 1179 goto fail; 1180 } 1181 } 1182 1183 if (sdl && fb) { 1184 sdl__init(fb); 1185 if (r < 0) { 1186 pr_err("sdl__init() failed with error %d\n", r); 1187 goto fail; 1188 } 1189 } 1190 1191 r = fb__start(); 1192 if (r < 0) { 1193 pr_err("fb__init() failed with error %d\n", r); 1194 goto fail; 1195 } 1196 1197 /* Device init all done; firmware init must 1198 * come after this (it may set up device trees etc.) 1199 */ 1200 1201 kvm__start_timer(kvm); 1202 1203 kvm__arch_setup_firmware(kvm); 1204 1205 for (i = 0; i < nrcpus; i++) { 1206 kvm_cpus[i] = kvm_cpu__init(kvm, i); 1207 if (!kvm_cpus[i]) 1208 die("unable to initialize KVM VCPU"); 1209 } 1210 1211 thread_pool__init(nr_online_cpus); 1212 fail: 1213 return r; 1214 } 1215 1216 static int kvm_cmd_run_work(void) 1217 { 1218 int i, r = -1; 1219 void *ret = NULL; 1220 1221 for (i = 0; i < nrcpus; i++) { 1222 if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0) 1223 die("unable to create KVM VCPU thread"); 1224 } 1225 1226 /* Only VCPU #0 is going to exit by itself when shutting down */ 1227 if (pthread_join(kvm_cpus[0]->thread, &ret) != 0) 1228 r = 0; 1229 1230 kvm_cpu__delete(kvm_cpus[0]); 1231 kvm_cpus[0] = NULL; 1232 1233 for (i = 1; i < nrcpus; i++) { 1234 if (kvm_cpus[i]->is_running) { 1235 pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); 1236 if (pthread_join(kvm_cpus[i]->thread, &ret) != 0) 1237 die("pthread_join"); 1238 kvm_cpu__delete(kvm_cpus[i]); 1239 } 1240 if (ret == NULL) 1241 r = 0; 1242 } 1243 1244 return r; 1245 } 1246 1247 static void kvm_cmd_run_exit(int guest_ret) 1248 { 1249 int r = 0; 1250 1251 compat__print_all_messages(); 1252 1253 r = symbol__exit(kvm); 1254 if (r < 0) 1255 pr_warning("symbol__exit() failed with error %d\n", r); 1256 1257 r = irq__exit(kvm); 1258 if (r < 0) 1259 pr_warning("irq__exit() failed with error %d\n", r); 1260 1261 fb__stop(); 1262 1263 virtio_blk__delete_all(kvm); 1264 virtio_rng__delete_all(kvm); 1265 1266 disk_image__close_all(kvm->disks, image_count); 1267 free(kvm_cpus); 1268 1269 r = serial8250__exit(kvm); 1270 if (r < 0) 1271 pr_warning("serial8250__exit() failed with error %d\n", r); 1272 1273 r = ioport__exit(kvm); 1274 if (r < 0) 1275 pr_warning("ioport__exit() failed with error %d\n", r); 1276 1277 r = ioeventfd__exit(kvm); 1278 if (r < 0) 1279 pr_warning("ioeventfd__exit() failed with error %d\n", r); 1280 1281 kvm__delete(kvm); 1282 1283 if (guest_ret == 0) 1284 printf("\n # KVM session ended normally.\n"); 1285 } 1286 1287 int kvm_cmd_run(int argc, const char **argv, const char *prefix) 1288 { 1289 int r, ret = -EFAULT; 1290 1291 r = kvm_cmd_run_init(argc, argv); 1292 if (r < 0) 1293 return r; 1294 1295 ret = kvm_cmd_run_work(); 1296 kvm_cmd_run_exit(ret); 1297 1298 return ret; 1299 } 1300