1 #include "kvm/builtin-run.h" 2 3 #include "kvm/builtin-setup.h" 4 #include "kvm/virtio-balloon.h" 5 #include "kvm/virtio-console.h" 6 #include "kvm/parse-options.h" 7 #include "kvm/8250-serial.h" 8 #include "kvm/framebuffer.h" 9 #include "kvm/disk-image.h" 10 #include "kvm/threadpool.h" 11 #include "kvm/virtio-scsi.h" 12 #include "kvm/virtio-blk.h" 13 #include "kvm/virtio-net.h" 14 #include "kvm/virtio-rng.h" 15 #include "kvm/ioeventfd.h" 16 #include "kvm/virtio-9p.h" 17 #include "kvm/barrier.h" 18 #include "kvm/kvm-cpu.h" 19 #include "kvm/ioport.h" 20 #include "kvm/symbol.h" 21 #include "kvm/i8042.h" 22 #include "kvm/mutex.h" 23 #include "kvm/term.h" 24 #include "kvm/util.h" 25 #include "kvm/strbuf.h" 26 #include "kvm/vesa.h" 27 #include "kvm/irq.h" 28 #include "kvm/kvm.h" 29 #include "kvm/pci.h" 30 #include "kvm/rtc.h" 31 #include "kvm/sdl.h" 32 #include "kvm/vnc.h" 33 #include "kvm/guest_compat.h" 34 #include "kvm/pci-shmem.h" 35 #include "kvm/kvm-ipc.h" 36 #include "kvm/builtin-debug.h" 37 38 #include <linux/types.h> 39 #include <linux/err.h> 40 41 #include <sys/utsname.h> 42 #include <sys/types.h> 43 #include <sys/stat.h> 44 #include <termios.h> 45 #include <signal.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 #include <ctype.h> 50 #include <stdio.h> 51 52 #define DEFAULT_KVM_DEV "/dev/kvm" 53 #define DEFAULT_CONSOLE "serial" 54 #define DEFAULT_NETWORK "user" 55 #define DEFAULT_HOST_ADDR "192.168.33.1" 56 #define DEFAULT_GUEST_ADDR "192.168.33.15" 57 #define DEFAULT_GUEST_MAC "02:15:15:15:15:15" 58 #define DEFAULT_HOST_MAC "02:01:01:01:01:01" 59 #define DEFAULT_SCRIPT "none" 60 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh"; 61 62 #define MB_SHIFT (20) 63 #define KB_SHIFT (10) 64 #define GB_SHIFT (30) 65 #define MIN_RAM_SIZE_MB (64ULL) 66 #define MIN_RAM_SIZE_BYTE (MIN_RAM_SIZE_MB << MB_SHIFT) 67 68 struct kvm *kvm; 69 struct kvm_cpu **kvm_cpus; 70 __thread struct kvm_cpu *current_kvm_cpu; 71 72 static struct disk_image_params disk_image[MAX_DISK_IMAGES]; 73 static u64 ram_size; 74 static u8 image_count; 75 static u8 num_net_devices; 76 static bool virtio_rng; 77 static const char *kernel_cmdline; 78 static const char *kernel_filename; 79 static const char *vmlinux_filename; 80 static const char *initrd_filename; 81 static const char *firmware_filename; 82 static const char *console; 83 static const char *dev; 84 static const char *network; 85 static const char *host_ip; 86 static const char *guest_ip; 87 static const char *guest_mac; 88 static const char *host_mac; 89 static const char *script; 90 static const char *guest_name; 91 static const char *sandbox; 92 static const char *hugetlbfs_path; 93 static const char *custom_rootfs_name = "default"; 94 static struct virtio_net_params *net_params; 95 static bool single_step; 96 static bool vnc; 97 static bool sdl; 98 static bool balloon; 99 static bool using_rootfs; 100 static bool custom_rootfs; 101 static bool no_net; 102 static bool no_dhcp; 103 extern bool ioport_debug; 104 extern bool mmio_debug; 105 static int kvm_run_wrapper; 106 extern int active_console; 107 extern int debug_iodelay; 108 109 bool do_debug_print = false; 110 111 static int nrcpus; 112 static int vidmode = -1; 113 114 extern char _binary_guest_init_start; 115 extern char _binary_guest_init_size; 116 117 static const char * const run_usage[] = { 118 "lkvm run [<options>] [<kernel image>]", 119 NULL 120 }; 121 122 enum { 123 KVM_RUN_DEFAULT, 124 KVM_RUN_SANDBOX, 125 }; 126 127 void kvm_run_set_wrapper_sandbox(void) 128 { 129 kvm_run_wrapper = KVM_RUN_SANDBOX; 130 } 131 132 static int img_name_parser(const struct option *opt, const char *arg, int unset) 133 { 134 char path[PATH_MAX]; 135 const char *cur; 136 struct stat st; 137 char *sep; 138 139 if (stat(arg, &st) == 0 && 140 S_ISDIR(st.st_mode)) { 141 char tmp[PATH_MAX]; 142 143 if (using_rootfs) 144 die("Please use only one rootfs directory atmost"); 145 146 if (realpath(arg, tmp) == 0 || 147 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 148 die("Unable to initialize virtio 9p"); 149 using_rootfs = 1; 150 return 0; 151 } 152 153 snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); 154 155 if (stat(path, &st) == 0 && 156 S_ISDIR(st.st_mode)) { 157 char tmp[PATH_MAX]; 158 159 if (using_rootfs) 160 die("Please use only one rootfs directory atmost"); 161 162 if (realpath(path, tmp) == 0 || 163 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 164 die("Unable to initialize virtio 9p"); 165 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 166 die("Unable to initialize virtio 9p"); 167 kvm_setup_resolv(arg); 168 using_rootfs = custom_rootfs = 1; 169 custom_rootfs_name = arg; 170 return 0; 171 } 172 173 if (image_count >= MAX_DISK_IMAGES) 174 die("Currently only 4 images are supported"); 175 176 disk_image[image_count].filename = arg; 177 cur = arg; 178 179 if (strncmp(arg, "scsi:", 5) == 0) { 180 sep = strstr(arg, ":"); 181 if (sep) 182 disk_image[image_count].wwpn = sep + 1; 183 sep = strstr(sep + 1, ":"); 184 if (sep) { 185 *sep = 0; 186 disk_image[image_count].tpgt = sep + 1; 187 } 188 cur = sep + 1; 189 } 190 191 do { 192 sep = strstr(cur, ","); 193 if (sep) { 194 if (strncmp(sep + 1, "ro", 2) == 0) 195 disk_image[image_count].readonly = true; 196 else if (strncmp(sep + 1, "direct", 6) == 0) 197 disk_image[image_count].direct = true; 198 *sep = 0; 199 cur = sep + 1; 200 } 201 } while (sep); 202 203 image_count++; 204 205 return 0; 206 } 207 208 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) 209 { 210 char *tag_name; 211 char tmp[PATH_MAX]; 212 213 /* 214 * 9p dir can be of the form dirname,tag_name or 215 * just dirname. In the later case we use the 216 * default tag name 217 */ 218 tag_name = strstr(arg, ","); 219 if (tag_name) { 220 *tag_name = '\0'; 221 tag_name++; 222 } 223 if (realpath(arg, tmp)) { 224 if (virtio_9p__register(kvm, tmp, tag_name) < 0) 225 die("Unable to initialize virtio 9p"); 226 } else 227 die("Failed resolving 9p path"); 228 return 0; 229 } 230 231 static int tty_parser(const struct option *opt, const char *arg, int unset) 232 { 233 int tty = atoi(arg); 234 235 term_set_tty(tty); 236 237 return 0; 238 } 239 240 static inline void str_to_mac(const char *str, char *mac) 241 { 242 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 243 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 244 } 245 static int set_net_param(struct virtio_net_params *p, const char *param, 246 const char *val) 247 { 248 if (strcmp(param, "guest_mac") == 0) { 249 str_to_mac(val, p->guest_mac); 250 } else if (strcmp(param, "mode") == 0) { 251 if (!strncmp(val, "user", 4)) { 252 int i; 253 254 for (i = 0; i < num_net_devices; i++) 255 if (net_params[i].mode == NET_MODE_USER) 256 die("Only one usermode network device allowed at a time"); 257 p->mode = NET_MODE_USER; 258 } else if (!strncmp(val, "tap", 3)) { 259 p->mode = NET_MODE_TAP; 260 } else if (!strncmp(val, "none", 4)) { 261 no_net = 1; 262 return -1; 263 } else 264 die("Unkown network mode %s, please use user, tap or none", network); 265 } else if (strcmp(param, "script") == 0) { 266 p->script = strdup(val); 267 } else if (strcmp(param, "guest_ip") == 0) { 268 p->guest_ip = strdup(val); 269 } else if (strcmp(param, "host_ip") == 0) { 270 p->host_ip = strdup(val); 271 } else if (strcmp(param, "trans") == 0) { 272 p->trans = strdup(val); 273 } else if (strcmp(param, "vhost") == 0) { 274 p->vhost = atoi(val); 275 } else if (strcmp(param, "fd") == 0) { 276 p->fd = atoi(val); 277 } else 278 die("Unknown network parameter %s", param); 279 280 return 0; 281 } 282 283 static int netdev_parser(const struct option *opt, const char *arg, int unset) 284 { 285 struct virtio_net_params p; 286 char *buf = NULL, *cmd = NULL, *cur = NULL; 287 bool on_cmd = true; 288 289 if (arg) { 290 buf = strdup(arg); 291 if (buf == NULL) 292 die("Failed allocating new net buffer"); 293 cur = strtok(buf, ",="); 294 } 295 296 p = (struct virtio_net_params) { 297 .guest_ip = DEFAULT_GUEST_ADDR, 298 .host_ip = DEFAULT_HOST_ADDR, 299 .script = DEFAULT_SCRIPT, 300 .mode = NET_MODE_TAP, 301 }; 302 303 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 304 p.guest_mac[5] += num_net_devices; 305 306 while (cur) { 307 if (on_cmd) { 308 cmd = cur; 309 } else { 310 if (set_net_param(&p, cmd, cur) < 0) 311 goto done; 312 } 313 on_cmd = !on_cmd; 314 315 cur = strtok(NULL, ",="); 316 }; 317 318 num_net_devices++; 319 320 net_params = realloc(net_params, num_net_devices * sizeof(*net_params)); 321 if (net_params == NULL) 322 die("Failed adding new network device"); 323 324 net_params[num_net_devices - 1] = p; 325 326 done: 327 free(buf); 328 return 0; 329 } 330 331 static int shmem_parser(const struct option *opt, const char *arg, int unset) 332 { 333 const u64 default_size = SHMEM_DEFAULT_SIZE; 334 const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; 335 const char *default_handle = SHMEM_DEFAULT_HANDLE; 336 struct shmem_info *si = malloc(sizeof(struct shmem_info)); 337 u64 phys_addr; 338 u64 size; 339 char *handle = NULL; 340 int create = 0; 341 const char *p = arg; 342 char *next; 343 int base = 10; 344 int verbose = 0; 345 346 const int skip_pci = strlen("pci:"); 347 if (verbose) 348 pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); 349 /* parse out optional addr family */ 350 if (strcasestr(p, "pci:")) { 351 p += skip_pci; 352 } else if (strcasestr(p, "mem:")) { 353 die("I can't add to E820 map yet.\n"); 354 } 355 /* parse out physical addr */ 356 base = 10; 357 if (strcasestr(p, "0x")) 358 base = 16; 359 phys_addr = strtoll(p, &next, base); 360 if (next == p && phys_addr == 0) { 361 pr_info("shmem: no physical addr specified, using default."); 362 phys_addr = default_phys_addr; 363 } 364 if (*next != ':' && *next != '\0') 365 die("shmem: unexpected chars after phys addr.\n"); 366 if (*next == '\0') 367 p = next; 368 else 369 p = next + 1; 370 /* parse out size */ 371 base = 10; 372 if (strcasestr(p, "0x")) 373 base = 16; 374 size = strtoll(p, &next, base); 375 if (next == p && size == 0) { 376 pr_info("shmem: no size specified, using default."); 377 size = default_size; 378 } 379 /* look for [KMGkmg][Bb]* uses base 2. */ 380 int skip_B = 0; 381 if (strspn(next, "KMGkmg")) { /* might have a prefix */ 382 if (*(next + 1) == 'B' || *(next + 1) == 'b') 383 skip_B = 1; 384 switch (*next) { 385 case 'K': 386 case 'k': 387 size = size << KB_SHIFT; 388 break; 389 case 'M': 390 case 'm': 391 size = size << MB_SHIFT; 392 break; 393 case 'G': 394 case 'g': 395 size = size << GB_SHIFT; 396 break; 397 default: 398 die("shmem: bug in detecting size prefix."); 399 break; 400 } 401 next += 1 + skip_B; 402 } 403 if (*next != ':' && *next != '\0') { 404 die("shmem: unexpected chars after phys size. <%c><%c>\n", 405 *next, *p); 406 } 407 if (*next == '\0') 408 p = next; 409 else 410 p = next + 1; 411 /* parse out optional shmem handle */ 412 const int skip_handle = strlen("handle="); 413 next = strcasestr(p, "handle="); 414 if (*p && next) { 415 if (p != next) 416 die("unexpected chars before handle\n"); 417 p += skip_handle; 418 next = strchrnul(p, ':'); 419 if (next - p) { 420 handle = malloc(next - p + 1); 421 strncpy(handle, p, next - p); 422 handle[next - p] = '\0'; /* just in case. */ 423 } 424 if (*next == '\0') 425 p = next; 426 else 427 p = next + 1; 428 } 429 /* parse optional create flag to see if we should create shm seg. */ 430 if (*p && strcasestr(p, "create")) { 431 create = 1; 432 p += strlen("create"); 433 } 434 if (*p != '\0') 435 die("shmem: unexpected trailing chars\n"); 436 if (handle == NULL) { 437 handle = malloc(strlen(default_handle) + 1); 438 strcpy(handle, default_handle); 439 } 440 if (verbose) { 441 pr_info("shmem: phys_addr = %llx", phys_addr); 442 pr_info("shmem: size = %llx", size); 443 pr_info("shmem: handle = %s", handle); 444 pr_info("shmem: create = %d", create); 445 } 446 447 si->phys_addr = phys_addr; 448 si->size = size; 449 si->handle = handle; 450 si->create = create; 451 pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ 452 return 0; 453 } 454 455 static const struct option options[] = { 456 OPT_GROUP("Basic options:"), 457 OPT_STRING('\0', "name", &guest_name, "guest name", 458 "A name for the guest"), 459 OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"), 460 OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."), 461 OPT_CALLBACK('\0', "shmem", NULL, 462 "[pci:]<addr>:<size>[:handle=<handle>][:create]", 463 "Share host shmem with guest via pci device", 464 shmem_parser), 465 OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser), 466 OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"), 467 OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"), 468 OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"), 469 OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"), 470 OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", 471 "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser), 472 OPT_STRING('\0', "console", &console, "serial, virtio or hv", 473 "Console to use"), 474 OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"), 475 OPT_CALLBACK('\0', "tty", NULL, "tty id", 476 "Remap guest TTY into a pty on the host", 477 tty_parser), 478 OPT_STRING('\0', "sandbox", &sandbox, "script", 479 "Run this script when booting into custom rootfs"), 480 OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"), 481 482 OPT_GROUP("Kernel options:"), 483 OPT_STRING('k', "kernel", &kernel_filename, "kernel", 484 "Kernel to boot in virtual machine"), 485 OPT_STRING('i', "initrd", &initrd_filename, "initrd", 486 "Initial RAM disk image"), 487 OPT_STRING('p', "params", &kernel_cmdline, "params", 488 "Kernel command line arguments"), 489 OPT_STRING('f', "firmware", &firmware_filename, "firmware", 490 "Firmware image to boot in virtual machine"), 491 492 OPT_GROUP("Networking options:"), 493 OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", 494 "Create a new guest NIC", 495 netdev_parser, NULL), 496 OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"), 497 498 OPT_GROUP("BIOS options:"), 499 OPT_INTEGER('\0', "vidmode", &vidmode, 500 "Video mode"), 501 502 OPT_GROUP("Debug options:"), 503 OPT_BOOLEAN('\0', "debug", &do_debug_print, 504 "Enable debug messages"), 505 OPT_BOOLEAN('\0', "debug-single-step", &single_step, 506 "Enable single stepping"), 507 OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug, 508 "Enable ioport debugging"), 509 OPT_BOOLEAN('\0', "debug-mmio", &mmio_debug, 510 "Enable MMIO debugging"), 511 OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay, 512 "Delay IO by millisecond"), 513 OPT_END() 514 }; 515 516 /* 517 * Serialize debug printout so that the output of multiple vcpus does not 518 * get mixed up: 519 */ 520 static int printout_done; 521 522 static void handle_sigusr1(int sig) 523 { 524 struct kvm_cpu *cpu = current_kvm_cpu; 525 int fd = kvm_cpu__get_debug_fd(); 526 527 if (!cpu || cpu->needs_nmi) 528 return; 529 530 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 531 kvm_cpu__show_registers(cpu); 532 kvm_cpu__show_code(cpu); 533 kvm_cpu__show_page_tables(cpu); 534 fflush(stdout); 535 printout_done = 1; 536 mb(); 537 } 538 539 /* Pause/resume the guest using SIGUSR2 */ 540 static int is_paused; 541 542 static void handle_pause(int fd, u32 type, u32 len, u8 *msg) 543 { 544 if (WARN_ON(len)) 545 return; 546 547 if (type == KVM_IPC_RESUME && is_paused) { 548 kvm->vm_state = KVM_VMSTATE_RUNNING; 549 kvm__continue(); 550 } else if (type == KVM_IPC_PAUSE && !is_paused) { 551 kvm->vm_state = KVM_VMSTATE_PAUSED; 552 ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL); 553 kvm__pause(); 554 } else { 555 return; 556 } 557 558 is_paused = !is_paused; 559 } 560 561 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) 562 { 563 int r = 0; 564 565 if (type == KVM_IPC_VMSTATE) 566 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 567 568 if (r < 0) 569 pr_warning("Failed sending VMSTATE"); 570 } 571 572 static void handle_debug(int fd, u32 type, u32 len, u8 *msg) 573 { 574 int i; 575 struct debug_cmd_params *params; 576 u32 dbg_type; 577 u32 vcpu; 578 579 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 580 return; 581 582 params = (void *)msg; 583 dbg_type = params->dbg_type; 584 vcpu = params->cpu; 585 586 if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ) 587 serial8250__inject_sysrq(kvm, params->sysrq); 588 589 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 590 if ((int)vcpu >= kvm->nrcpus) 591 return; 592 593 kvm_cpus[vcpu]->needs_nmi = 1; 594 pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1); 595 } 596 597 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 598 return; 599 600 for (i = 0; i < nrcpus; i++) { 601 struct kvm_cpu *cpu = kvm_cpus[i]; 602 603 if (!cpu) 604 continue; 605 606 printout_done = 0; 607 608 kvm_cpu__set_debug_fd(fd); 609 pthread_kill(cpu->thread, SIGUSR1); 610 /* 611 * Wait for the vCPU to dump state before signalling 612 * the next thread. Since this is debug code it does 613 * not matter that we are burning CPU time a bit: 614 */ 615 while (!printout_done) 616 mb(); 617 } 618 619 close(fd); 620 621 serial8250__inject_sysrq(kvm, 'p'); 622 } 623 624 static void handle_sigalrm(int sig) 625 { 626 kvm__arch_periodic_poll(kvm); 627 } 628 629 static void handle_stop(int fd, u32 type, u32 len, u8 *msg) 630 { 631 if (WARN_ON(type != KVM_IPC_STOP || len)) 632 return; 633 634 kvm_cpu__reboot(); 635 } 636 637 static void *kvm_cpu_thread(void *arg) 638 { 639 current_kvm_cpu = arg; 640 641 if (kvm_cpu__start(current_kvm_cpu)) 642 goto panic_kvm; 643 644 return (void *) (intptr_t) 0; 645 646 panic_kvm: 647 fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", 648 current_kvm_cpu->kvm_run->exit_reason, 649 kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); 650 if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) 651 fprintf(stderr, "KVM exit code: 0x%Lu\n", 652 current_kvm_cpu->kvm_run->hw.hardware_exit_reason); 653 654 kvm_cpu__set_debug_fd(STDOUT_FILENO); 655 kvm_cpu__show_registers(current_kvm_cpu); 656 kvm_cpu__show_code(current_kvm_cpu); 657 kvm_cpu__show_page_tables(current_kvm_cpu); 658 659 return (void *) (intptr_t) 1; 660 } 661 662 static char kernel[PATH_MAX]; 663 664 static const char *host_kernels[] = { 665 "/boot/vmlinuz", 666 "/boot/bzImage", 667 NULL 668 }; 669 670 static const char *default_kernels[] = { 671 "./bzImage", 672 "arch/" BUILD_ARCH "/boot/bzImage", 673 "../../arch/" BUILD_ARCH "/boot/bzImage", 674 NULL 675 }; 676 677 static const char *default_vmlinux[] = { 678 "vmlinux", 679 "../../../vmlinux", 680 "../../vmlinux", 681 NULL 682 }; 683 684 static void kernel_usage_with_options(void) 685 { 686 const char **k; 687 struct utsname uts; 688 689 fprintf(stderr, "Fatal: could not find default kernel image in:\n"); 690 k = &default_kernels[0]; 691 while (*k) { 692 fprintf(stderr, "\t%s\n", *k); 693 k++; 694 } 695 696 if (uname(&uts) < 0) 697 return; 698 699 k = &host_kernels[0]; 700 while (*k) { 701 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 702 return; 703 fprintf(stderr, "\t%s\n", kernel); 704 k++; 705 } 706 fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", 707 KVM_BINARY_NAME); 708 } 709 710 static u64 host_ram_size(void) 711 { 712 long page_size; 713 long nr_pages; 714 715 nr_pages = sysconf(_SC_PHYS_PAGES); 716 if (nr_pages < 0) { 717 pr_warning("sysconf(_SC_PHYS_PAGES) failed"); 718 return 0; 719 } 720 721 page_size = sysconf(_SC_PAGE_SIZE); 722 if (page_size < 0) { 723 pr_warning("sysconf(_SC_PAGE_SIZE) failed"); 724 return 0; 725 } 726 727 return (nr_pages * page_size) >> MB_SHIFT; 728 } 729 730 /* 731 * If user didn't specify how much memory it wants to allocate for the guest, 732 * avoid filling the whole host RAM. 733 */ 734 #define RAM_SIZE_RATIO 0.8 735 736 static u64 get_ram_size(int nr_cpus) 737 { 738 u64 available; 739 u64 ram_size; 740 741 ram_size = 64 * (nr_cpus + 3); 742 743 available = host_ram_size() * RAM_SIZE_RATIO; 744 if (!available) 745 available = MIN_RAM_SIZE_MB; 746 747 if (ram_size > available) 748 ram_size = available; 749 750 return ram_size; 751 } 752 753 static const char *find_kernel(void) 754 { 755 const char **k; 756 struct stat st; 757 struct utsname uts; 758 759 k = &default_kernels[0]; 760 while (*k) { 761 if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { 762 k++; 763 continue; 764 } 765 strncpy(kernel, *k, PATH_MAX); 766 return kernel; 767 } 768 769 if (uname(&uts) < 0) 770 return NULL; 771 772 k = &host_kernels[0]; 773 while (*k) { 774 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 775 return NULL; 776 777 if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { 778 k++; 779 continue; 780 } 781 return kernel; 782 783 } 784 return NULL; 785 } 786 787 static const char *find_vmlinux(void) 788 { 789 const char **vmlinux; 790 791 vmlinux = &default_vmlinux[0]; 792 while (*vmlinux) { 793 struct stat st; 794 795 if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { 796 vmlinux++; 797 continue; 798 } 799 return *vmlinux; 800 } 801 return NULL; 802 } 803 804 void kvm_run_help(void) 805 { 806 usage_with_options(run_usage, options); 807 } 808 809 static int kvm_setup_guest_init(void) 810 { 811 const char *rootfs = custom_rootfs_name; 812 char tmp[PATH_MAX]; 813 size_t size; 814 int fd, ret; 815 char *data; 816 817 /* Setup /virt/init */ 818 size = (size_t)&_binary_guest_init_size; 819 data = (char *)&_binary_guest_init_start; 820 snprintf(tmp, PATH_MAX, "%s%s/virt/init", kvm__get_dir(), rootfs); 821 remove(tmp); 822 fd = open(tmp, O_CREAT | O_WRONLY, 0755); 823 if (fd < 0) 824 die("Fail to setup %s", tmp); 825 ret = xwrite(fd, data, size); 826 if (ret < 0) 827 die("Fail to setup %s", tmp); 828 close(fd); 829 830 return 0; 831 } 832 833 static int kvm_run_set_sandbox(void) 834 { 835 const char *guestfs_name = custom_rootfs_name; 836 char path[PATH_MAX], script[PATH_MAX], *tmp; 837 838 snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); 839 840 remove(path); 841 842 if (sandbox == NULL) 843 return 0; 844 845 tmp = realpath(sandbox, NULL); 846 if (tmp == NULL) 847 return -ENOMEM; 848 849 snprintf(script, PATH_MAX, "/host/%s", tmp); 850 free(tmp); 851 852 return symlink(script, path); 853 } 854 855 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) 856 { 857 const char *single_quote; 858 859 if (!*arg) { /* zero length string */ 860 if (write(fd, "''", 2) <= 0) 861 die("Failed writing sandbox script"); 862 return; 863 } 864 865 while (*arg) { 866 single_quote = strchrnul(arg, '\''); 867 868 /* write non-single-quote string as #('string') */ 869 if (arg != single_quote) { 870 if (write(fd, "'", 1) <= 0 || 871 write(fd, arg, single_quote - arg) <= 0 || 872 write(fd, "'", 1) <= 0) 873 die("Failed writing sandbox script"); 874 } 875 876 /* write single quote as #("'") */ 877 if (*single_quote) { 878 if (write(fd, "\"'\"", 3) <= 0) 879 die("Failed writing sandbox script"); 880 } else 881 break; 882 883 arg = single_quote + 1; 884 } 885 } 886 887 static void resolve_program(const char *src, char *dst, size_t len) 888 { 889 struct stat st; 890 int err; 891 892 err = stat(src, &st); 893 894 if (!err && S_ISREG(st.st_mode)) { 895 char resolved_path[PATH_MAX]; 896 897 if (!realpath(src, resolved_path)) 898 die("Unable to resolve program %s: %s\n", src, strerror(errno)); 899 900 snprintf(dst, len, "/host%s", resolved_path); 901 } else 902 strncpy(dst, src, len); 903 } 904 905 static void kvm_run_write_sandbox_cmd(const char **argv, int argc) 906 { 907 const char script_hdr[] = "#! /bin/bash\n\n"; 908 char program[PATH_MAX]; 909 int fd; 910 911 remove(sandbox); 912 913 fd = open(sandbox, O_RDWR | O_CREAT, 0777); 914 if (fd < 0) 915 die("Failed creating sandbox script"); 916 917 if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) 918 die("Failed writing sandbox script"); 919 920 resolve_program(argv[0], program, PATH_MAX); 921 kvm_write_sandbox_cmd_exactly(fd, program); 922 923 argv++; 924 argc--; 925 926 while (argc) { 927 if (write(fd, " ", 1) <= 0) 928 die("Failed writing sandbox script"); 929 930 kvm_write_sandbox_cmd_exactly(fd, argv[0]); 931 argv++; 932 argc--; 933 } 934 if (write(fd, "\n", 1) <= 0) 935 die("Failed writing sandbox script"); 936 937 close(fd); 938 } 939 940 static int kvm_cmd_run_init(int argc, const char **argv) 941 { 942 static char real_cmdline[2048], default_name[20]; 943 struct framebuffer *fb = NULL; 944 unsigned int nr_online_cpus; 945 int max_cpus, recommended_cpus; 946 int i, r; 947 948 signal(SIGALRM, handle_sigalrm); 949 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 950 signal(SIGUSR1, handle_sigusr1); 951 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 952 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 953 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 954 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 955 956 nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); 957 958 while (argc != 0) { 959 argc = parse_options(argc, argv, options, run_usage, 960 PARSE_OPT_STOP_AT_NON_OPTION | 961 PARSE_OPT_KEEP_DASHDASH); 962 if (argc != 0) { 963 /* Cusrom options, should have been handled elsewhere */ 964 if (strcmp(argv[0], "--") == 0) { 965 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 966 sandbox = DEFAULT_SANDBOX_FILENAME; 967 kvm_run_write_sandbox_cmd(argv+1, argc-1); 968 break; 969 } 970 } 971 972 if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) || 973 (kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) { 974 fprintf(stderr, "Cannot handle parameter: " 975 "%s\n", argv[0]); 976 usage_with_options(run_usage, options); 977 return -EINVAL; 978 } 979 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 980 /* 981 * first unhandled parameter is treated as 982 * sandbox command 983 */ 984 sandbox = DEFAULT_SANDBOX_FILENAME; 985 kvm_run_write_sandbox_cmd(argv, argc); 986 } else { 987 /* 988 * first unhandled parameter is treated as a kernel 989 * image 990 */ 991 kernel_filename = argv[0]; 992 } 993 argv++; 994 argc--; 995 } 996 997 } 998 999 if (!kernel_filename) 1000 kernel_filename = find_kernel(); 1001 1002 if (!kernel_filename) { 1003 kernel_usage_with_options(); 1004 return -EINVAL; 1005 } 1006 1007 vmlinux_filename = find_vmlinux(); 1008 1009 if (nrcpus == 0) 1010 nrcpus = nr_online_cpus; 1011 1012 if (!ram_size) 1013 ram_size = get_ram_size(nrcpus); 1014 1015 if (ram_size < MIN_RAM_SIZE_MB) 1016 die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB); 1017 1018 if (ram_size > host_ram_size()) 1019 pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size()); 1020 1021 ram_size <<= MB_SHIFT; 1022 1023 if (!dev) 1024 dev = DEFAULT_KVM_DEV; 1025 1026 if (!console) 1027 console = DEFAULT_CONSOLE; 1028 1029 if (!strncmp(console, "virtio", 6)) 1030 active_console = CONSOLE_VIRTIO; 1031 else if (!strncmp(console, "serial", 6)) 1032 active_console = CONSOLE_8250; 1033 else if (!strncmp(console, "hv", 2)) 1034 active_console = CONSOLE_HV; 1035 else 1036 pr_warning("No console!"); 1037 1038 if (!host_ip) 1039 host_ip = DEFAULT_HOST_ADDR; 1040 1041 if (!guest_ip) 1042 guest_ip = DEFAULT_GUEST_ADDR; 1043 1044 if (!guest_mac) 1045 guest_mac = DEFAULT_GUEST_MAC; 1046 1047 if (!host_mac) 1048 host_mac = DEFAULT_HOST_MAC; 1049 1050 if (!script) 1051 script = DEFAULT_SCRIPT; 1052 1053 term_init(); 1054 1055 if (!guest_name) { 1056 if (custom_rootfs) { 1057 guest_name = custom_rootfs_name; 1058 } else { 1059 sprintf(default_name, "guest-%u", getpid()); 1060 guest_name = default_name; 1061 } 1062 } 1063 1064 kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name); 1065 if (IS_ERR(kvm)) { 1066 r = PTR_ERR(kvm); 1067 goto fail; 1068 } 1069 1070 kvm->single_step = single_step; 1071 1072 r = ioeventfd__init(kvm); 1073 if (r < 0) { 1074 pr_err("ioeventfd__init() failed with error %d\n", r); 1075 goto fail; 1076 } 1077 1078 max_cpus = kvm__max_cpus(kvm); 1079 recommended_cpus = kvm__recommended_cpus(kvm); 1080 1081 if (nrcpus > max_cpus) { 1082 printf(" # Limit the number of CPUs to %d\n", max_cpus); 1083 nrcpus = max_cpus; 1084 } else if (nrcpus > recommended_cpus) { 1085 printf(" # Warning: The maximum recommended amount of VCPUs" 1086 " is %d\n", recommended_cpus); 1087 } 1088 1089 kvm->nrcpus = nrcpus; 1090 1091 /* Alloc one pointer too many, so array ends up 0-terminated */ 1092 kvm_cpus = calloc(nrcpus + 1, sizeof(void *)); 1093 if (!kvm_cpus) 1094 die("Couldn't allocate array for %d CPUs", nrcpus); 1095 1096 r = irq__init(kvm); 1097 if (r < 0) { 1098 pr_err("irq__init() failed with error %d\n", r); 1099 goto fail; 1100 } 1101 1102 r = pci__init(kvm); 1103 if (r < 0) { 1104 pr_err("pci__init() failed with error %d\n", r); 1105 goto fail; 1106 } 1107 1108 r = ioport__init(kvm); 1109 if (r < 0) { 1110 pr_err("ioport__init() failed with error %d\n", r); 1111 goto fail; 1112 } 1113 1114 /* 1115 * vidmode should be either specified 1116 * either set by default 1117 */ 1118 if (vnc || sdl) { 1119 if (vidmode == -1) 1120 vidmode = 0x312; 1121 } else { 1122 vidmode = 0; 1123 } 1124 1125 memset(real_cmdline, 0, sizeof(real_cmdline)); 1126 kvm__arch_set_cmdline(real_cmdline, vnc || sdl); 1127 1128 if (strlen(real_cmdline) > 0) 1129 strcat(real_cmdline, " "); 1130 1131 if (kernel_cmdline) 1132 strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline)); 1133 1134 if (!using_rootfs && !disk_image[0].filename && !initrd_filename) { 1135 char tmp[PATH_MAX]; 1136 1137 kvm_setup_create_new(custom_rootfs_name); 1138 kvm_setup_resolv(custom_rootfs_name); 1139 1140 snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); 1141 if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) 1142 die("Unable to initialize virtio 9p"); 1143 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 1144 die("Unable to initialize virtio 9p"); 1145 using_rootfs = custom_rootfs = 1; 1146 } 1147 1148 if (using_rootfs) { 1149 strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); 1150 if (custom_rootfs) { 1151 kvm_run_set_sandbox(); 1152 1153 strcat(real_cmdline, " init=/virt/init"); 1154 1155 if (!no_dhcp) 1156 strcat(real_cmdline, " ip=dhcp"); 1157 if (kvm_setup_guest_init()) 1158 die("Failed to setup init for guest."); 1159 } 1160 } else if (!strstr(real_cmdline, "root=")) { 1161 strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); 1162 } 1163 1164 if (image_count) { 1165 kvm->nr_disks = image_count; 1166 kvm->disks = disk_image__open_all((struct disk_image_params *)&disk_image, image_count); 1167 if (IS_ERR(kvm->disks)) { 1168 r = PTR_ERR(kvm->disks); 1169 pr_err("disk_image__open_all() failed with error %ld\n", 1170 PTR_ERR(kvm->disks)); 1171 goto fail; 1172 } 1173 } 1174 1175 printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, 1176 kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name); 1177 1178 if (!firmware_filename) { 1179 if (!kvm__load_kernel(kvm, kernel_filename, 1180 initrd_filename, real_cmdline, vidmode)) 1181 die("unable to load kernel %s", kernel_filename); 1182 1183 kvm->vmlinux = vmlinux_filename; 1184 r = symbol_init(kvm); 1185 if (r < 0) 1186 pr_debug("symbol_init() failed with error %d\n", r); 1187 } 1188 1189 ioport__setup_arch(); 1190 1191 r = rtc__init(kvm); 1192 if (r < 0) { 1193 pr_err("rtc__init() failed with error %d\n", r); 1194 goto fail; 1195 } 1196 1197 r = serial8250__init(kvm); 1198 if (r < 0) { 1199 pr_err("serial__init() failed with error %d\n", r); 1200 goto fail; 1201 } 1202 1203 r = virtio_blk__init(kvm); 1204 if (r < 0) { 1205 pr_err("virtio_blk__init() failed with error %d\n", r); 1206 goto fail; 1207 } 1208 1209 r = virtio_scsi_init(kvm); 1210 if (r < 0) { 1211 pr_err("virtio_scsi_init() failed with error %d\n", r); 1212 goto fail; 1213 } 1214 1215 1216 if (active_console == CONSOLE_VIRTIO) 1217 virtio_console__init(kvm); 1218 1219 if (virtio_rng) 1220 virtio_rng__init(kvm); 1221 1222 if (balloon) 1223 virtio_bln__init(kvm); 1224 1225 if (!network) 1226 network = DEFAULT_NETWORK; 1227 1228 virtio_9p__init(kvm); 1229 1230 for (i = 0; i < num_net_devices; i++) { 1231 net_params[i].kvm = kvm; 1232 virtio_net__init(&net_params[i]); 1233 } 1234 1235 if (num_net_devices == 0 && no_net == 0) { 1236 struct virtio_net_params net_params; 1237 1238 net_params = (struct virtio_net_params) { 1239 .guest_ip = guest_ip, 1240 .host_ip = host_ip, 1241 .kvm = kvm, 1242 .script = script, 1243 .mode = NET_MODE_USER, 1244 }; 1245 str_to_mac(guest_mac, net_params.guest_mac); 1246 str_to_mac(host_mac, net_params.host_mac); 1247 1248 virtio_net__init(&net_params); 1249 } 1250 1251 kvm__init_ram(kvm); 1252 1253 #ifdef CONFIG_X86 1254 kbd__init(kvm); 1255 #endif 1256 1257 pci_shmem__init(kvm); 1258 1259 if (vnc || sdl) { 1260 fb = vesa__init(kvm); 1261 if (IS_ERR(fb)) { 1262 pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb)); 1263 goto fail; 1264 } 1265 } 1266 1267 if (vnc && fb) { 1268 r = vnc__init(fb); 1269 if (r < 0) { 1270 pr_err("vnc__init() failed with error %d\n", r); 1271 goto fail; 1272 } 1273 } 1274 1275 if (sdl && fb) { 1276 sdl__init(fb); 1277 if (r < 0) { 1278 pr_err("sdl__init() failed with error %d\n", r); 1279 goto fail; 1280 } 1281 } 1282 1283 r = fb__start(); 1284 if (r < 0) { 1285 pr_err("fb__init() failed with error %d\n", r); 1286 goto fail; 1287 } 1288 1289 /* Device init all done; firmware init must 1290 * come after this (it may set up device trees etc.) 1291 */ 1292 1293 kvm__start_timer(kvm); 1294 1295 if (firmware_filename) { 1296 if (!kvm__load_firmware(kvm, firmware_filename)) 1297 die("unable to load firmware image %s: %s", firmware_filename, strerror(errno)); 1298 } else { 1299 kvm__arch_setup_firmware(kvm); 1300 if (r < 0) { 1301 pr_err("kvm__arch_setup_firmware() failed with error %d\n", r); 1302 goto fail; 1303 } 1304 } 1305 1306 for (i = 0; i < nrcpus; i++) { 1307 kvm_cpus[i] = kvm_cpu__init(kvm, i); 1308 if (!kvm_cpus[i]) 1309 die("unable to initialize KVM VCPU"); 1310 } 1311 1312 thread_pool__init(nr_online_cpus); 1313 fail: 1314 return r; 1315 } 1316 1317 static int kvm_cmd_run_work(void) 1318 { 1319 int i, r = -1; 1320 void *ret = NULL; 1321 1322 for (i = 0; i < nrcpus; i++) { 1323 if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0) 1324 die("unable to create KVM VCPU thread"); 1325 } 1326 1327 /* Only VCPU #0 is going to exit by itself when shutting down */ 1328 if (pthread_join(kvm_cpus[0]->thread, &ret) != 0) 1329 r = 0; 1330 1331 kvm_cpu__delete(kvm_cpus[0]); 1332 kvm_cpus[0] = NULL; 1333 1334 for (i = 1; i < nrcpus; i++) { 1335 if (kvm_cpus[i]->is_running) { 1336 pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); 1337 if (pthread_join(kvm_cpus[i]->thread, &ret) != 0) 1338 die("pthread_join"); 1339 kvm_cpu__delete(kvm_cpus[i]); 1340 } 1341 if (ret == NULL) 1342 r = 0; 1343 } 1344 1345 return r; 1346 } 1347 1348 static void kvm_cmd_run_exit(int guest_ret) 1349 { 1350 int r = 0; 1351 1352 compat__print_all_messages(); 1353 1354 r = symbol_exit(kvm); 1355 if (r < 0) 1356 pr_warning("symbol_exit() failed with error %d\n", r); 1357 1358 r = irq__exit(kvm); 1359 if (r < 0) 1360 pr_warning("irq__exit() failed with error %d\n", r); 1361 1362 fb__stop(); 1363 1364 r = virtio_scsi_exit(kvm); 1365 if (r < 0) 1366 pr_warning("virtio_scsi_exit() failed with error %d\n", r); 1367 1368 r = virtio_blk__exit(kvm); 1369 if (r < 0) 1370 pr_warning("virtio_blk__exit() failed with error %d\n", r); 1371 1372 r = virtio_rng__exit(kvm); 1373 if (r < 0) 1374 pr_warning("virtio_rng__exit() failed with error %d\n", r); 1375 1376 r = disk_image__close_all(kvm->disks, image_count); 1377 if (r < 0) 1378 pr_warning("disk_image__close_all() failed with error %d\n", r); 1379 1380 r = serial8250__exit(kvm); 1381 if (r < 0) 1382 pr_warning("serial8250__exit() failed with error %d\n", r); 1383 1384 r = rtc__exit(kvm); 1385 if (r < 0) 1386 pr_warning("rtc__exit() failed with error %d\n", r); 1387 1388 r = kvm__arch_free_firmware(kvm); 1389 if (r < 0) 1390 pr_warning("kvm__arch_free_firmware() failed with error %d\n", r); 1391 1392 r = ioport__exit(kvm); 1393 if (r < 0) 1394 pr_warning("ioport__exit() failed with error %d\n", r); 1395 1396 r = ioeventfd__exit(kvm); 1397 if (r < 0) 1398 pr_warning("ioeventfd__exit() failed with error %d\n", r); 1399 1400 r = pci__exit(kvm); 1401 if (r < 0) 1402 pr_warning("pci__exit() failed with error %d\n", r); 1403 1404 r = kvm__exit(kvm); 1405 if (r < 0) 1406 pr_warning("pci__exit() failed with error %d\n", r); 1407 1408 free(kvm_cpus); 1409 1410 if (guest_ret == 0) 1411 printf("\n # KVM session ended normally.\n"); 1412 } 1413 1414 int kvm_cmd_run(int argc, const char **argv, const char *prefix) 1415 { 1416 int r, ret = -EFAULT; 1417 1418 r = kvm_cmd_run_init(argc, argv); 1419 if (r < 0) 1420 return r; 1421 1422 ret = kvm_cmd_run_work(); 1423 kvm_cmd_run_exit(ret); 1424 1425 return ret; 1426 } 1427