1 #include "kvm/builtin-run.h" 2 3 #include "kvm/builtin-setup.h" 4 #include "kvm/virtio-balloon.h" 5 #include "kvm/virtio-console.h" 6 #include "kvm/parse-options.h" 7 #include "kvm/8250-serial.h" 8 #include "kvm/framebuffer.h" 9 #include "kvm/disk-image.h" 10 #include "kvm/threadpool.h" 11 #include "kvm/virtio-blk.h" 12 #include "kvm/virtio-net.h" 13 #include "kvm/virtio-rng.h" 14 #include "kvm/ioeventfd.h" 15 #include "kvm/virtio-9p.h" 16 #include "kvm/barrier.h" 17 #include "kvm/kvm-cpu.h" 18 #include "kvm/ioport.h" 19 #include "kvm/symbol.h" 20 #include "kvm/i8042.h" 21 #include "kvm/mutex.h" 22 #include "kvm/term.h" 23 #include "kvm/util.h" 24 #include "kvm/strbuf.h" 25 #include "kvm/vesa.h" 26 #include "kvm/irq.h" 27 #include "kvm/kvm.h" 28 #include "kvm/pci.h" 29 #include "kvm/rtc.h" 30 #include "kvm/sdl.h" 31 #include "kvm/vnc.h" 32 #include "kvm/guest_compat.h" 33 #include "kvm/pci-shmem.h" 34 #include "kvm/kvm-ipc.h" 35 #include "kvm/builtin-debug.h" 36 37 #include <linux/types.h> 38 #include <linux/err.h> 39 40 #include <sys/utsname.h> 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <termios.h> 44 #include <signal.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <ctype.h> 49 #include <stdio.h> 50 51 #define DEFAULT_KVM_DEV "/dev/kvm" 52 #define DEFAULT_CONSOLE "serial" 53 #define DEFAULT_NETWORK "user" 54 #define DEFAULT_HOST_ADDR "192.168.33.1" 55 #define DEFAULT_GUEST_ADDR "192.168.33.15" 56 #define DEFAULT_GUEST_MAC "02:15:15:15:15:15" 57 #define DEFAULT_HOST_MAC "02:01:01:01:01:01" 58 #define DEFAULT_SCRIPT "none" 59 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh"; 60 61 #define MB_SHIFT (20) 62 #define KB_SHIFT (10) 63 #define GB_SHIFT (30) 64 #define MIN_RAM_SIZE_MB (64ULL) 65 #define MIN_RAM_SIZE_BYTE (MIN_RAM_SIZE_MB << MB_SHIFT) 66 67 struct kvm *kvm; 68 struct kvm_cpu **kvm_cpus; 69 __thread struct kvm_cpu *current_kvm_cpu; 70 71 static u64 ram_size; 72 static u8 image_count; 73 static u8 num_net_devices; 74 static bool virtio_rng; 75 static const char *kernel_cmdline; 76 static const char *kernel_filename; 77 static const char *vmlinux_filename; 78 static const char *initrd_filename; 79 static const char *firmware_filename; 80 static const char *image_filename[MAX_DISK_IMAGES]; 81 static const char *console; 82 static const char *dev; 83 static const char *network; 84 static const char *host_ip; 85 static const char *guest_ip; 86 static const char *guest_mac; 87 static const char *host_mac; 88 static const char *script; 89 static const char *guest_name; 90 static const char *sandbox; 91 static const char *hugetlbfs_path; 92 static const char *custom_rootfs_name = "default"; 93 static struct virtio_net_params *net_params; 94 static bool single_step; 95 static bool readonly_image[MAX_DISK_IMAGES]; 96 static bool vnc; 97 static bool sdl; 98 static bool balloon; 99 static bool using_rootfs; 100 static bool custom_rootfs; 101 static bool no_net; 102 static bool no_dhcp; 103 extern bool ioport_debug; 104 extern bool mmio_debug; 105 static int kvm_run_wrapper; 106 extern int active_console; 107 extern int debug_iodelay; 108 109 bool do_debug_print = false; 110 111 static int nrcpus; 112 static int vidmode = -1; 113 114 static const char * const run_usage[] = { 115 "lkvm run [<options>] [<kernel image>]", 116 NULL 117 }; 118 119 enum { 120 KVM_RUN_DEFAULT, 121 KVM_RUN_SANDBOX, 122 }; 123 124 void kvm_run_set_wrapper_sandbox(void) 125 { 126 kvm_run_wrapper = KVM_RUN_SANDBOX; 127 } 128 129 static int img_name_parser(const struct option *opt, const char *arg, int unset) 130 { 131 char *sep; 132 struct stat st; 133 char path[PATH_MAX]; 134 135 if (stat(arg, &st) == 0 && 136 S_ISDIR(st.st_mode)) { 137 char tmp[PATH_MAX]; 138 139 if (using_rootfs) 140 die("Please use only one rootfs directory atmost"); 141 142 if (realpath(arg, tmp) == 0 || 143 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 144 die("Unable to initialize virtio 9p"); 145 using_rootfs = 1; 146 return 0; 147 } 148 149 snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); 150 151 if (stat(path, &st) == 0 && 152 S_ISDIR(st.st_mode)) { 153 char tmp[PATH_MAX]; 154 155 if (using_rootfs) 156 die("Please use only one rootfs directory atmost"); 157 158 if (realpath(path, tmp) == 0 || 159 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 160 die("Unable to initialize virtio 9p"); 161 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 162 die("Unable to initialize virtio 9p"); 163 kvm_setup_resolv(arg); 164 using_rootfs = custom_rootfs = 1; 165 custom_rootfs_name = arg; 166 return 0; 167 } 168 169 if (image_count >= MAX_DISK_IMAGES) 170 die("Currently only 4 images are supported"); 171 172 image_filename[image_count] = arg; 173 sep = strstr(arg, ","); 174 if (sep) { 175 if (strcmp(sep + 1, "ro") == 0) 176 readonly_image[image_count] = 1; 177 *sep = 0; 178 } 179 180 image_count++; 181 182 return 0; 183 } 184 185 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) 186 { 187 char *tag_name; 188 char tmp[PATH_MAX]; 189 190 /* 191 * 9p dir can be of the form dirname,tag_name or 192 * just dirname. In the later case we use the 193 * default tag name 194 */ 195 tag_name = strstr(arg, ","); 196 if (tag_name) { 197 *tag_name = '\0'; 198 tag_name++; 199 } 200 if (realpath(arg, tmp)) { 201 if (virtio_9p__register(kvm, tmp, tag_name) < 0) 202 die("Unable to initialize virtio 9p"); 203 } else 204 die("Failed resolving 9p path"); 205 return 0; 206 } 207 208 static int tty_parser(const struct option *opt, const char *arg, int unset) 209 { 210 int tty = atoi(arg); 211 212 term_set_tty(tty); 213 214 return 0; 215 } 216 217 static inline void str_to_mac(const char *str, char *mac) 218 { 219 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 220 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 221 } 222 static int set_net_param(struct virtio_net_params *p, const char *param, 223 const char *val) 224 { 225 if (strcmp(param, "guest_mac") == 0) { 226 str_to_mac(val, p->guest_mac); 227 } else if (strcmp(param, "mode") == 0) { 228 if (!strncmp(val, "user", 4)) { 229 int i; 230 231 for (i = 0; i < num_net_devices; i++) 232 if (net_params[i].mode == NET_MODE_USER) 233 die("Only one usermode network device allowed at a time"); 234 p->mode = NET_MODE_USER; 235 } else if (!strncmp(val, "tap", 3)) { 236 p->mode = NET_MODE_TAP; 237 } else if (!strncmp(val, "none", 4)) { 238 no_net = 1; 239 return -1; 240 } else 241 die("Unkown network mode %s, please use user, tap or none", network); 242 } else if (strcmp(param, "script") == 0) { 243 p->script = strdup(val); 244 } else if (strcmp(param, "guest_ip") == 0) { 245 p->guest_ip = strdup(val); 246 } else if (strcmp(param, "host_ip") == 0) { 247 p->host_ip = strdup(val); 248 } else if (strcmp(param, "trans") == 0) { 249 p->trans = strdup(val); 250 } else if (strcmp(param, "vhost") == 0) { 251 p->vhost = atoi(val); 252 } else if (strcmp(param, "fd") == 0) { 253 p->fd = atoi(val); 254 } 255 256 return 0; 257 } 258 259 static int netdev_parser(const struct option *opt, const char *arg, int unset) 260 { 261 struct virtio_net_params p; 262 char *buf = NULL, *cmd = NULL, *cur = NULL; 263 bool on_cmd = true; 264 265 if (arg) { 266 buf = strdup(arg); 267 if (buf == NULL) 268 die("Failed allocating new net buffer"); 269 cur = strtok(buf, ",="); 270 } 271 272 p = (struct virtio_net_params) { 273 .guest_ip = DEFAULT_GUEST_ADDR, 274 .host_ip = DEFAULT_HOST_ADDR, 275 .script = DEFAULT_SCRIPT, 276 .mode = NET_MODE_TAP, 277 }; 278 279 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 280 p.guest_mac[5] += num_net_devices; 281 282 while (cur) { 283 if (on_cmd) { 284 cmd = cur; 285 } else { 286 if (set_net_param(&p, cmd, cur) < 0) 287 goto done; 288 } 289 on_cmd = !on_cmd; 290 291 cur = strtok(NULL, ",="); 292 }; 293 294 num_net_devices++; 295 296 net_params = realloc(net_params, num_net_devices * sizeof(*net_params)); 297 if (net_params == NULL) 298 die("Failed adding new network device"); 299 300 net_params[num_net_devices - 1] = p; 301 302 done: 303 free(buf); 304 return 0; 305 } 306 307 static int shmem_parser(const struct option *opt, const char *arg, int unset) 308 { 309 const u64 default_size = SHMEM_DEFAULT_SIZE; 310 const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; 311 const char *default_handle = SHMEM_DEFAULT_HANDLE; 312 struct shmem_info *si = malloc(sizeof(struct shmem_info)); 313 u64 phys_addr; 314 u64 size; 315 char *handle = NULL; 316 int create = 0; 317 const char *p = arg; 318 char *next; 319 int base = 10; 320 int verbose = 0; 321 322 const int skip_pci = strlen("pci:"); 323 if (verbose) 324 pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); 325 /* parse out optional addr family */ 326 if (strcasestr(p, "pci:")) { 327 p += skip_pci; 328 } else if (strcasestr(p, "mem:")) { 329 die("I can't add to E820 map yet.\n"); 330 } 331 /* parse out physical addr */ 332 base = 10; 333 if (strcasestr(p, "0x")) 334 base = 16; 335 phys_addr = strtoll(p, &next, base); 336 if (next == p && phys_addr == 0) { 337 pr_info("shmem: no physical addr specified, using default."); 338 phys_addr = default_phys_addr; 339 } 340 if (*next != ':' && *next != '\0') 341 die("shmem: unexpected chars after phys addr.\n"); 342 if (*next == '\0') 343 p = next; 344 else 345 p = next + 1; 346 /* parse out size */ 347 base = 10; 348 if (strcasestr(p, "0x")) 349 base = 16; 350 size = strtoll(p, &next, base); 351 if (next == p && size == 0) { 352 pr_info("shmem: no size specified, using default."); 353 size = default_size; 354 } 355 /* look for [KMGkmg][Bb]* uses base 2. */ 356 int skip_B = 0; 357 if (strspn(next, "KMGkmg")) { /* might have a prefix */ 358 if (*(next + 1) == 'B' || *(next + 1) == 'b') 359 skip_B = 1; 360 switch (*next) { 361 case 'K': 362 case 'k': 363 size = size << KB_SHIFT; 364 break; 365 case 'M': 366 case 'm': 367 size = size << MB_SHIFT; 368 break; 369 case 'G': 370 case 'g': 371 size = size << GB_SHIFT; 372 break; 373 default: 374 die("shmem: bug in detecting size prefix."); 375 break; 376 } 377 next += 1 + skip_B; 378 } 379 if (*next != ':' && *next != '\0') { 380 die("shmem: unexpected chars after phys size. <%c><%c>\n", 381 *next, *p); 382 } 383 if (*next == '\0') 384 p = next; 385 else 386 p = next + 1; 387 /* parse out optional shmem handle */ 388 const int skip_handle = strlen("handle="); 389 next = strcasestr(p, "handle="); 390 if (*p && next) { 391 if (p != next) 392 die("unexpected chars before handle\n"); 393 p += skip_handle; 394 next = strchrnul(p, ':'); 395 if (next - p) { 396 handle = malloc(next - p + 1); 397 strncpy(handle, p, next - p); 398 handle[next - p] = '\0'; /* just in case. */ 399 } 400 if (*next == '\0') 401 p = next; 402 else 403 p = next + 1; 404 } 405 /* parse optional create flag to see if we should create shm seg. */ 406 if (*p && strcasestr(p, "create")) { 407 create = 1; 408 p += strlen("create"); 409 } 410 if (*p != '\0') 411 die("shmem: unexpected trailing chars\n"); 412 if (handle == NULL) { 413 handle = malloc(strlen(default_handle) + 1); 414 strcpy(handle, default_handle); 415 } 416 if (verbose) { 417 pr_info("shmem: phys_addr = %llx", phys_addr); 418 pr_info("shmem: size = %llx", size); 419 pr_info("shmem: handle = %s", handle); 420 pr_info("shmem: create = %d", create); 421 } 422 423 si->phys_addr = phys_addr; 424 si->size = size; 425 si->handle = handle; 426 si->create = create; 427 pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ 428 return 0; 429 } 430 431 static const struct option options[] = { 432 OPT_GROUP("Basic options:"), 433 OPT_STRING('\0', "name", &guest_name, "guest name", 434 "A name for the guest"), 435 OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"), 436 OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."), 437 OPT_CALLBACK('\0', "shmem", NULL, 438 "[pci:]<addr>:<size>[:handle=<handle>][:create]", 439 "Share host shmem with guest via pci device", 440 shmem_parser), 441 OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser), 442 OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"), 443 OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"), 444 OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"), 445 OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"), 446 OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", 447 "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser), 448 OPT_STRING('\0', "console", &console, "serial, virtio or hv", 449 "Console to use"), 450 OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"), 451 OPT_CALLBACK('\0', "tty", NULL, "tty id", 452 "Remap guest TTY into a pty on the host", 453 tty_parser), 454 OPT_STRING('\0', "sandbox", &sandbox, "script", 455 "Run this script when booting into custom rootfs"), 456 OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"), 457 458 OPT_GROUP("Kernel options:"), 459 OPT_STRING('k', "kernel", &kernel_filename, "kernel", 460 "Kernel to boot in virtual machine"), 461 OPT_STRING('i', "initrd", &initrd_filename, "initrd", 462 "Initial RAM disk image"), 463 OPT_STRING('p', "params", &kernel_cmdline, "params", 464 "Kernel command line arguments"), 465 OPT_STRING('f', "firmware", &firmware_filename, "firmware", 466 "Firmware image to boot in virtual machine"), 467 468 OPT_GROUP("Networking options:"), 469 OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", 470 "Create a new guest NIC", 471 netdev_parser, NULL), 472 OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"), 473 474 OPT_GROUP("BIOS options:"), 475 OPT_INTEGER('\0', "vidmode", &vidmode, 476 "Video mode"), 477 478 OPT_GROUP("Debug options:"), 479 OPT_BOOLEAN('\0', "debug", &do_debug_print, 480 "Enable debug messages"), 481 OPT_BOOLEAN('\0', "debug-single-step", &single_step, 482 "Enable single stepping"), 483 OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug, 484 "Enable ioport debugging"), 485 OPT_BOOLEAN('\0', "debug-mmio", &mmio_debug, 486 "Enable MMIO debugging"), 487 OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay, 488 "Delay IO by millisecond"), 489 OPT_END() 490 }; 491 492 /* 493 * Serialize debug printout so that the output of multiple vcpus does not 494 * get mixed up: 495 */ 496 static int printout_done; 497 498 static void handle_sigusr1(int sig) 499 { 500 struct kvm_cpu *cpu = current_kvm_cpu; 501 int fd = kvm_cpu__get_debug_fd(); 502 503 if (!cpu || cpu->needs_nmi) 504 return; 505 506 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 507 kvm_cpu__show_registers(cpu); 508 kvm_cpu__show_code(cpu); 509 kvm_cpu__show_page_tables(cpu); 510 fflush(stdout); 511 printout_done = 1; 512 mb(); 513 } 514 515 /* Pause/resume the guest using SIGUSR2 */ 516 static int is_paused; 517 518 static void handle_pause(int fd, u32 type, u32 len, u8 *msg) 519 { 520 if (WARN_ON(len)) 521 return; 522 523 if (type == KVM_IPC_RESUME && is_paused) { 524 kvm->vm_state = KVM_VMSTATE_RUNNING; 525 kvm__continue(); 526 } else if (type == KVM_IPC_PAUSE && !is_paused) { 527 kvm->vm_state = KVM_VMSTATE_PAUSED; 528 kvm__pause(); 529 } else { 530 return; 531 } 532 533 is_paused = !is_paused; 534 } 535 536 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) 537 { 538 int r = 0; 539 540 if (type == KVM_IPC_VMSTATE) 541 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 542 543 if (r < 0) 544 pr_warning("Failed sending VMSTATE"); 545 } 546 547 static void handle_debug(int fd, u32 type, u32 len, u8 *msg) 548 { 549 int i; 550 struct debug_cmd_params *params; 551 u32 dbg_type; 552 u32 vcpu; 553 554 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 555 return; 556 557 params = (void *)msg; 558 dbg_type = params->dbg_type; 559 vcpu = params->cpu; 560 561 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 562 if ((int)vcpu >= kvm->nrcpus) 563 return; 564 565 kvm_cpus[vcpu]->needs_nmi = 1; 566 pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1); 567 } 568 569 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 570 return; 571 572 for (i = 0; i < nrcpus; i++) { 573 struct kvm_cpu *cpu = kvm_cpus[i]; 574 575 if (!cpu) 576 continue; 577 578 printout_done = 0; 579 580 kvm_cpu__set_debug_fd(fd); 581 pthread_kill(cpu->thread, SIGUSR1); 582 /* 583 * Wait for the vCPU to dump state before signalling 584 * the next thread. Since this is debug code it does 585 * not matter that we are burning CPU time a bit: 586 */ 587 while (!printout_done) 588 mb(); 589 } 590 591 close(fd); 592 593 serial8250__inject_sysrq(kvm); 594 } 595 596 static void handle_sigalrm(int sig) 597 { 598 kvm__arch_periodic_poll(kvm); 599 } 600 601 static void handle_stop(int fd, u32 type, u32 len, u8 *msg) 602 { 603 if (WARN_ON(type != KVM_IPC_STOP || len)) 604 return; 605 606 kvm_cpu__reboot(); 607 } 608 609 static void *kvm_cpu_thread(void *arg) 610 { 611 current_kvm_cpu = arg; 612 613 if (kvm_cpu__start(current_kvm_cpu)) 614 goto panic_kvm; 615 616 return (void *) (intptr_t) 0; 617 618 panic_kvm: 619 fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", 620 current_kvm_cpu->kvm_run->exit_reason, 621 kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); 622 if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) 623 fprintf(stderr, "KVM exit code: 0x%Lu\n", 624 current_kvm_cpu->kvm_run->hw.hardware_exit_reason); 625 626 kvm_cpu__set_debug_fd(STDOUT_FILENO); 627 kvm_cpu__show_registers(current_kvm_cpu); 628 kvm_cpu__show_code(current_kvm_cpu); 629 kvm_cpu__show_page_tables(current_kvm_cpu); 630 631 return (void *) (intptr_t) 1; 632 } 633 634 static char kernel[PATH_MAX]; 635 636 static const char *host_kernels[] = { 637 "/boot/vmlinuz", 638 "/boot/bzImage", 639 NULL 640 }; 641 642 static const char *default_kernels[] = { 643 "./bzImage", 644 "arch/" BUILD_ARCH "/boot/bzImage", 645 "../../arch/" BUILD_ARCH "/boot/bzImage", 646 NULL 647 }; 648 649 static const char *default_vmlinux[] = { 650 "vmlinux", 651 "../../../vmlinux", 652 "../../vmlinux", 653 NULL 654 }; 655 656 static void kernel_usage_with_options(void) 657 { 658 const char **k; 659 struct utsname uts; 660 661 fprintf(stderr, "Fatal: could not find default kernel image in:\n"); 662 k = &default_kernels[0]; 663 while (*k) { 664 fprintf(stderr, "\t%s\n", *k); 665 k++; 666 } 667 668 if (uname(&uts) < 0) 669 return; 670 671 k = &host_kernels[0]; 672 while (*k) { 673 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 674 return; 675 fprintf(stderr, "\t%s\n", kernel); 676 k++; 677 } 678 fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", 679 KVM_BINARY_NAME); 680 } 681 682 static u64 host_ram_size(void) 683 { 684 long page_size; 685 long nr_pages; 686 687 nr_pages = sysconf(_SC_PHYS_PAGES); 688 if (nr_pages < 0) { 689 pr_warning("sysconf(_SC_PHYS_PAGES) failed"); 690 return 0; 691 } 692 693 page_size = sysconf(_SC_PAGE_SIZE); 694 if (page_size < 0) { 695 pr_warning("sysconf(_SC_PAGE_SIZE) failed"); 696 return 0; 697 } 698 699 return (nr_pages * page_size) >> MB_SHIFT; 700 } 701 702 /* 703 * If user didn't specify how much memory it wants to allocate for the guest, 704 * avoid filling the whole host RAM. 705 */ 706 #define RAM_SIZE_RATIO 0.8 707 708 static u64 get_ram_size(int nr_cpus) 709 { 710 u64 available; 711 u64 ram_size; 712 713 ram_size = 64 * (nr_cpus + 3); 714 715 available = host_ram_size() * RAM_SIZE_RATIO; 716 if (!available) 717 available = MIN_RAM_SIZE_MB; 718 719 if (ram_size > available) 720 ram_size = available; 721 722 return ram_size; 723 } 724 725 static const char *find_kernel(void) 726 { 727 const char **k; 728 struct stat st; 729 struct utsname uts; 730 731 k = &default_kernels[0]; 732 while (*k) { 733 if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { 734 k++; 735 continue; 736 } 737 strncpy(kernel, *k, PATH_MAX); 738 return kernel; 739 } 740 741 if (uname(&uts) < 0) 742 return NULL; 743 744 k = &host_kernels[0]; 745 while (*k) { 746 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 747 return NULL; 748 749 if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { 750 k++; 751 continue; 752 } 753 return kernel; 754 755 } 756 return NULL; 757 } 758 759 static const char *find_vmlinux(void) 760 { 761 const char **vmlinux; 762 763 vmlinux = &default_vmlinux[0]; 764 while (*vmlinux) { 765 struct stat st; 766 767 if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { 768 vmlinux++; 769 continue; 770 } 771 return *vmlinux; 772 } 773 return NULL; 774 } 775 776 void kvm_run_help(void) 777 { 778 usage_with_options(run_usage, options); 779 } 780 781 static int kvm_custom_stage2(void) 782 { 783 char tmp[PATH_MAX], dst[PATH_MAX], *src; 784 const char *rootfs = custom_rootfs_name; 785 int r; 786 787 src = realpath("guest/init_stage2", NULL); 788 if (src == NULL) 789 return -ENOMEM; 790 791 snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs); 792 remove(tmp); 793 794 snprintf(dst, PATH_MAX, "/host/%s", src); 795 r = symlink(dst, tmp); 796 free(src); 797 798 return r; 799 } 800 801 static int kvm_run_set_sandbox(void) 802 { 803 const char *guestfs_name = custom_rootfs_name; 804 char path[PATH_MAX], script[PATH_MAX], *tmp; 805 806 snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); 807 808 remove(path); 809 810 if (sandbox == NULL) 811 return 0; 812 813 tmp = realpath(sandbox, NULL); 814 if (tmp == NULL) 815 return -ENOMEM; 816 817 snprintf(script, PATH_MAX, "/host/%s", tmp); 818 free(tmp); 819 820 return symlink(script, path); 821 } 822 823 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) 824 { 825 const char *single_quote; 826 827 if (!*arg) { /* zero length string */ 828 if (write(fd, "''", 2) <= 0) 829 die("Failed writing sandbox script"); 830 return; 831 } 832 833 while (*arg) { 834 single_quote = strchrnul(arg, '\''); 835 836 /* write non-single-quote string as #('string') */ 837 if (arg != single_quote) { 838 if (write(fd, "'", 1) <= 0 || 839 write(fd, arg, single_quote - arg) <= 0 || 840 write(fd, "'", 1) <= 0) 841 die("Failed writing sandbox script"); 842 } 843 844 /* write single quote as #("'") */ 845 if (*single_quote) { 846 if (write(fd, "\"'\"", 3) <= 0) 847 die("Failed writing sandbox script"); 848 } else 849 break; 850 851 arg = single_quote + 1; 852 } 853 } 854 855 static void resolve_program(const char *src, char *dst, size_t len) 856 { 857 struct stat st; 858 int err; 859 860 err = stat(src, &st); 861 862 if (!err && S_ISREG(st.st_mode)) { 863 char resolved_path[PATH_MAX]; 864 865 if (!realpath(src, resolved_path)) 866 die("Unable to resolve program %s: %s\n", src, strerror(errno)); 867 868 snprintf(dst, len, "/host%s", resolved_path); 869 } else 870 strncpy(dst, src, len); 871 } 872 873 static void kvm_run_write_sandbox_cmd(const char **argv, int argc) 874 { 875 const char script_hdr[] = "#! /bin/bash\n\n"; 876 char program[PATH_MAX]; 877 int fd; 878 879 remove(sandbox); 880 881 fd = open(sandbox, O_RDWR | O_CREAT, 0777); 882 if (fd < 0) 883 die("Failed creating sandbox script"); 884 885 if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) 886 die("Failed writing sandbox script"); 887 888 resolve_program(argv[0], program, PATH_MAX); 889 kvm_write_sandbox_cmd_exactly(fd, program); 890 891 argv++; 892 argc--; 893 894 while (argc) { 895 if (write(fd, " ", 1) <= 0) 896 die("Failed writing sandbox script"); 897 898 kvm_write_sandbox_cmd_exactly(fd, argv[0]); 899 argv++; 900 argc--; 901 } 902 if (write(fd, "\n", 1) <= 0) 903 die("Failed writing sandbox script"); 904 905 close(fd); 906 } 907 908 static int kvm_cmd_run_init(int argc, const char **argv) 909 { 910 static char real_cmdline[2048], default_name[20]; 911 struct framebuffer *fb = NULL; 912 unsigned int nr_online_cpus; 913 int max_cpus, recommended_cpus; 914 int i, r; 915 916 signal(SIGALRM, handle_sigalrm); 917 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 918 signal(SIGUSR1, handle_sigusr1); 919 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 920 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 921 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 922 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 923 924 nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); 925 926 while (argc != 0) { 927 argc = parse_options(argc, argv, options, run_usage, 928 PARSE_OPT_STOP_AT_NON_OPTION | 929 PARSE_OPT_KEEP_DASHDASH); 930 if (argc != 0) { 931 /* Cusrom options, should have been handled elsewhere */ 932 if (strcmp(argv[0], "--") == 0) { 933 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 934 sandbox = DEFAULT_SANDBOX_FILENAME; 935 kvm_run_write_sandbox_cmd(argv+1, argc-1); 936 break; 937 } 938 } 939 940 if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) || 941 (kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) { 942 fprintf(stderr, "Cannot handle parameter: " 943 "%s\n", argv[0]); 944 usage_with_options(run_usage, options); 945 return EINVAL; 946 } 947 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 948 /* 949 * first unhandled parameter is treated as 950 * sandbox command 951 */ 952 sandbox = DEFAULT_SANDBOX_FILENAME; 953 kvm_run_write_sandbox_cmd(argv, argc); 954 } else { 955 /* 956 * first unhandled parameter is treated as a kernel 957 * image 958 */ 959 kernel_filename = argv[0]; 960 } 961 argv++; 962 argc--; 963 } 964 965 } 966 967 if (!kernel_filename) 968 kernel_filename = find_kernel(); 969 970 if (!kernel_filename) { 971 kernel_usage_with_options(); 972 return EINVAL; 973 } 974 975 vmlinux_filename = find_vmlinux(); 976 977 if (nrcpus == 0) 978 nrcpus = nr_online_cpus; 979 980 if (!ram_size) 981 ram_size = get_ram_size(nrcpus); 982 983 if (ram_size < MIN_RAM_SIZE_MB) 984 die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB); 985 986 if (ram_size > host_ram_size()) 987 pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size()); 988 989 ram_size <<= MB_SHIFT; 990 991 if (!dev) 992 dev = DEFAULT_KVM_DEV; 993 994 if (!console) 995 console = DEFAULT_CONSOLE; 996 997 if (!strncmp(console, "virtio", 6)) 998 active_console = CONSOLE_VIRTIO; 999 else if (!strncmp(console, "serial", 6)) 1000 active_console = CONSOLE_8250; 1001 else if (!strncmp(console, "hv", 2)) 1002 active_console = CONSOLE_HV; 1003 else 1004 pr_warning("No console!"); 1005 1006 if (!host_ip) 1007 host_ip = DEFAULT_HOST_ADDR; 1008 1009 if (!guest_ip) 1010 guest_ip = DEFAULT_GUEST_ADDR; 1011 1012 if (!guest_mac) 1013 guest_mac = DEFAULT_GUEST_MAC; 1014 1015 if (!host_mac) 1016 host_mac = DEFAULT_HOST_MAC; 1017 1018 if (!script) 1019 script = DEFAULT_SCRIPT; 1020 1021 term_init(); 1022 1023 if (!guest_name) { 1024 if (custom_rootfs) { 1025 guest_name = custom_rootfs_name; 1026 } else { 1027 sprintf(default_name, "guest-%u", getpid()); 1028 guest_name = default_name; 1029 } 1030 } 1031 1032 kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name); 1033 if (IS_ERR(kvm)) { 1034 r = PTR_ERR(kvm); 1035 goto fail; 1036 } 1037 1038 kvm->single_step = single_step; 1039 1040 r = ioeventfd__init(kvm); 1041 if (r < 0) { 1042 pr_err("ioeventfd__init() failed with error %d\n", r); 1043 goto fail; 1044 } 1045 1046 max_cpus = kvm__max_cpus(kvm); 1047 recommended_cpus = kvm__recommended_cpus(kvm); 1048 1049 if (nrcpus > max_cpus) { 1050 printf(" # Limit the number of CPUs to %d\n", max_cpus); 1051 nrcpus = max_cpus; 1052 } else if (nrcpus > recommended_cpus) { 1053 printf(" # Warning: The maximum recommended amount of VCPUs" 1054 " is %d\n", recommended_cpus); 1055 } 1056 1057 kvm->nrcpus = nrcpus; 1058 1059 /* Alloc one pointer too many, so array ends up 0-terminated */ 1060 kvm_cpus = calloc(nrcpus + 1, sizeof(void *)); 1061 if (!kvm_cpus) 1062 die("Couldn't allocate array for %d CPUs", nrcpus); 1063 1064 r = irq__init(kvm); 1065 if (r < 0) { 1066 pr_err("irq__init() failed with error %d\n", r); 1067 goto fail; 1068 } 1069 1070 r = pci__init(kvm); 1071 if (r < 0) { 1072 pr_err("pci__init() failed with error %d\n", r); 1073 goto fail; 1074 } 1075 1076 r = ioport__init(kvm); 1077 if (r < 0) { 1078 pr_err("ioport__init() failed with error %d\n", r); 1079 goto fail; 1080 } 1081 1082 /* 1083 * vidmode should be either specified 1084 * either set by default 1085 */ 1086 if (vnc || sdl) { 1087 if (vidmode == -1) 1088 vidmode = 0x312; 1089 } else { 1090 vidmode = 0; 1091 } 1092 1093 memset(real_cmdline, 0, sizeof(real_cmdline)); 1094 kvm__arch_set_cmdline(real_cmdline, vnc || sdl); 1095 1096 if (strlen(real_cmdline) > 0) 1097 strcat(real_cmdline, " "); 1098 1099 if (kernel_cmdline) 1100 strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline)); 1101 1102 if (!using_rootfs && !image_filename[0] && !initrd_filename) { 1103 char tmp[PATH_MAX]; 1104 1105 kvm_setup_create_new(custom_rootfs_name); 1106 kvm_setup_resolv(custom_rootfs_name); 1107 1108 snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); 1109 if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) 1110 die("Unable to initialize virtio 9p"); 1111 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 1112 die("Unable to initialize virtio 9p"); 1113 using_rootfs = custom_rootfs = 1; 1114 } 1115 1116 if (using_rootfs) { 1117 strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); 1118 if (custom_rootfs) { 1119 kvm_run_set_sandbox(); 1120 1121 strcat(real_cmdline, " init=/virt/init"); 1122 1123 if (!no_dhcp) 1124 strcat(real_cmdline, " ip=dhcp"); 1125 if (kvm_custom_stage2()) 1126 die("Failed linking stage 2 of init."); 1127 } 1128 } else if (!strstr(real_cmdline, "root=")) { 1129 strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); 1130 } 1131 1132 if (image_count) { 1133 kvm->nr_disks = image_count; 1134 kvm->disks = disk_image__open_all(image_filename, readonly_image, image_count); 1135 if (IS_ERR(kvm->disks)) { 1136 r = PTR_ERR(kvm->disks); 1137 pr_err("disk_image__open_all() failed with error %ld\n", 1138 PTR_ERR(kvm->disks)); 1139 goto fail; 1140 } 1141 } 1142 1143 printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, 1144 kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name); 1145 1146 if (!firmware_filename) { 1147 if (!kvm__load_kernel(kvm, kernel_filename, 1148 initrd_filename, real_cmdline, vidmode)) 1149 die("unable to load kernel %s", kernel_filename); 1150 1151 kvm->vmlinux = vmlinux_filename; 1152 r = symbol_init(kvm); 1153 if (r < 0) 1154 pr_debug("symbol_init() failed with error %d\n", r); 1155 } 1156 1157 ioport__setup_arch(); 1158 1159 r = rtc__init(kvm); 1160 if (r < 0) { 1161 pr_err("rtc__init() failed with error %d\n", r); 1162 goto fail; 1163 } 1164 1165 r = serial8250__init(kvm); 1166 if (r < 0) { 1167 pr_err("serial__init() failed with error %d\n", r); 1168 goto fail; 1169 } 1170 1171 r = virtio_blk__init(kvm); 1172 if (r < 0) { 1173 pr_err("virtio_blk__init() failed with error %d\n", r); 1174 goto fail; 1175 } 1176 1177 if (active_console == CONSOLE_VIRTIO) 1178 virtio_console__init(kvm); 1179 1180 if (virtio_rng) 1181 virtio_rng__init(kvm); 1182 1183 if (balloon) 1184 virtio_bln__init(kvm); 1185 1186 if (!network) 1187 network = DEFAULT_NETWORK; 1188 1189 virtio_9p__init(kvm); 1190 1191 for (i = 0; i < num_net_devices; i++) { 1192 net_params[i].kvm = kvm; 1193 virtio_net__init(&net_params[i]); 1194 } 1195 1196 if (num_net_devices == 0 && no_net == 0) { 1197 struct virtio_net_params net_params; 1198 1199 net_params = (struct virtio_net_params) { 1200 .guest_ip = guest_ip, 1201 .host_ip = host_ip, 1202 .kvm = kvm, 1203 .script = script, 1204 .mode = NET_MODE_USER, 1205 }; 1206 str_to_mac(guest_mac, net_params.guest_mac); 1207 str_to_mac(host_mac, net_params.host_mac); 1208 1209 virtio_net__init(&net_params); 1210 } 1211 1212 kvm__init_ram(kvm); 1213 1214 #ifdef CONFIG_X86 1215 kbd__init(kvm); 1216 #endif 1217 1218 pci_shmem__init(kvm); 1219 1220 if (vnc || sdl) { 1221 fb = vesa__init(kvm); 1222 if (IS_ERR(fb)) { 1223 pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb)); 1224 goto fail; 1225 } 1226 } 1227 1228 if (vnc && fb) { 1229 r = vnc__init(fb); 1230 if (r < 0) { 1231 pr_err("vnc__init() failed with error %d\n", r); 1232 goto fail; 1233 } 1234 } 1235 1236 if (sdl && fb) { 1237 sdl__init(fb); 1238 if (r < 0) { 1239 pr_err("sdl__init() failed with error %d\n", r); 1240 goto fail; 1241 } 1242 } 1243 1244 r = fb__start(); 1245 if (r < 0) { 1246 pr_err("fb__init() failed with error %d\n", r); 1247 goto fail; 1248 } 1249 1250 /* Device init all done; firmware init must 1251 * come after this (it may set up device trees etc.) 1252 */ 1253 1254 kvm__start_timer(kvm); 1255 1256 if (firmware_filename) { 1257 if (!kvm__load_firmware(kvm, firmware_filename)) 1258 die("unable to load firmware image %s: %s", firmware_filename, strerror(errno)); 1259 } else { 1260 kvm__arch_setup_firmware(kvm); 1261 if (r < 0) { 1262 pr_err("kvm__arch_setup_firmware() failed with error %d\n", r); 1263 goto fail; 1264 } 1265 } 1266 1267 for (i = 0; i < nrcpus; i++) { 1268 kvm_cpus[i] = kvm_cpu__init(kvm, i); 1269 if (!kvm_cpus[i]) 1270 die("unable to initialize KVM VCPU"); 1271 } 1272 1273 thread_pool__init(nr_online_cpus); 1274 fail: 1275 return r; 1276 } 1277 1278 static int kvm_cmd_run_work(void) 1279 { 1280 int i, r = -1; 1281 void *ret = NULL; 1282 1283 for (i = 0; i < nrcpus; i++) { 1284 if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0) 1285 die("unable to create KVM VCPU thread"); 1286 } 1287 1288 /* Only VCPU #0 is going to exit by itself when shutting down */ 1289 if (pthread_join(kvm_cpus[0]->thread, &ret) != 0) 1290 r = 0; 1291 1292 kvm_cpu__delete(kvm_cpus[0]); 1293 kvm_cpus[0] = NULL; 1294 1295 for (i = 1; i < nrcpus; i++) { 1296 if (kvm_cpus[i]->is_running) { 1297 pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); 1298 if (pthread_join(kvm_cpus[i]->thread, &ret) != 0) 1299 die("pthread_join"); 1300 kvm_cpu__delete(kvm_cpus[i]); 1301 } 1302 if (ret == NULL) 1303 r = 0; 1304 } 1305 1306 return r; 1307 } 1308 1309 static void kvm_cmd_run_exit(int guest_ret) 1310 { 1311 int r = 0; 1312 1313 compat__print_all_messages(); 1314 1315 r = symbol_exit(kvm); 1316 if (r < 0) 1317 pr_warning("symbol_exit() failed with error %d\n", r); 1318 1319 r = irq__exit(kvm); 1320 if (r < 0) 1321 pr_warning("irq__exit() failed with error %d\n", r); 1322 1323 fb__stop(); 1324 1325 r = virtio_blk__exit(kvm); 1326 if (r < 0) 1327 pr_warning("virtio_blk__exit() failed with error %d\n", r); 1328 1329 r = virtio_rng__exit(kvm); 1330 if (r < 0) 1331 pr_warning("virtio_rng__exit() failed with error %d\n", r); 1332 1333 r = disk_image__close_all(kvm->disks, image_count); 1334 if (r < 0) 1335 pr_warning("disk_image__close_all() failed with error %d\n", r); 1336 1337 r = serial8250__exit(kvm); 1338 if (r < 0) 1339 pr_warning("serial8250__exit() failed with error %d\n", r); 1340 1341 r = rtc__exit(kvm); 1342 if (r < 0) 1343 pr_warning("rtc__exit() failed with error %d\n", r); 1344 1345 r = kvm__arch_free_firmware(kvm); 1346 if (r < 0) 1347 pr_warning("kvm__arch_free_firmware() failed with error %d\n", r); 1348 1349 r = ioport__exit(kvm); 1350 if (r < 0) 1351 pr_warning("ioport__exit() failed with error %d\n", r); 1352 1353 r = ioeventfd__exit(kvm); 1354 if (r < 0) 1355 pr_warning("ioeventfd__exit() failed with error %d\n", r); 1356 1357 r = pci__exit(kvm); 1358 if (r < 0) 1359 pr_warning("pci__exit() failed with error %d\n", r); 1360 1361 r = kvm__exit(kvm); 1362 if (r < 0) 1363 pr_warning("pci__exit() failed with error %d\n", r); 1364 1365 free(kvm_cpus); 1366 1367 if (guest_ret == 0) 1368 printf("\n # KVM session ended normally.\n"); 1369 } 1370 1371 int kvm_cmd_run(int argc, const char **argv, const char *prefix) 1372 { 1373 int r, ret = -EFAULT; 1374 1375 r = kvm_cmd_run_init(argc, argv); 1376 if (r < 0) 1377 return r; 1378 1379 ret = kvm_cmd_run_work(); 1380 kvm_cmd_run_exit(ret); 1381 1382 return ret; 1383 } 1384