1 #include "kvm/builtin-run.h" 2 3 #include "kvm/builtin-setup.h" 4 #include "kvm/virtio-balloon.h" 5 #include "kvm/virtio-console.h" 6 #include "kvm/parse-options.h" 7 #include "kvm/8250-serial.h" 8 #include "kvm/framebuffer.h" 9 #include "kvm/disk-image.h" 10 #include "kvm/threadpool.h" 11 #include "kvm/virtio-scsi.h" 12 #include "kvm/virtio-blk.h" 13 #include "kvm/virtio-net.h" 14 #include "kvm/virtio-rng.h" 15 #include "kvm/ioeventfd.h" 16 #include "kvm/virtio-9p.h" 17 #include "kvm/barrier.h" 18 #include "kvm/kvm-cpu.h" 19 #include "kvm/ioport.h" 20 #include "kvm/symbol.h" 21 #include "kvm/i8042.h" 22 #include "kvm/mutex.h" 23 #include "kvm/term.h" 24 #include "kvm/util.h" 25 #include "kvm/strbuf.h" 26 #include "kvm/vesa.h" 27 #include "kvm/irq.h" 28 #include "kvm/kvm.h" 29 #include "kvm/pci.h" 30 #include "kvm/rtc.h" 31 #include "kvm/sdl.h" 32 #include "kvm/vnc.h" 33 #include "kvm/guest_compat.h" 34 #include "kvm/pci-shmem.h" 35 #include "kvm/kvm-ipc.h" 36 #include "kvm/builtin-debug.h" 37 38 #include <linux/types.h> 39 #include <linux/err.h> 40 41 #include <sys/utsname.h> 42 #include <sys/types.h> 43 #include <sys/stat.h> 44 #include <termios.h> 45 #include <signal.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 #include <ctype.h> 50 #include <stdio.h> 51 52 #define DEFAULT_KVM_DEV "/dev/kvm" 53 #define DEFAULT_CONSOLE "serial" 54 #define DEFAULT_NETWORK "user" 55 #define DEFAULT_HOST_ADDR "192.168.33.1" 56 #define DEFAULT_GUEST_ADDR "192.168.33.15" 57 #define DEFAULT_GUEST_MAC "02:15:15:15:15:15" 58 #define DEFAULT_HOST_MAC "02:01:01:01:01:01" 59 #define DEFAULT_SCRIPT "none" 60 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh"; 61 62 #define MB_SHIFT (20) 63 #define KB_SHIFT (10) 64 #define GB_SHIFT (30) 65 #define MIN_RAM_SIZE_MB (64ULL) 66 #define MIN_RAM_SIZE_BYTE (MIN_RAM_SIZE_MB << MB_SHIFT) 67 68 struct kvm *kvm; 69 struct kvm_cpu **kvm_cpus; 70 __thread struct kvm_cpu *current_kvm_cpu; 71 72 static struct disk_image_params disk_image[MAX_DISK_IMAGES]; 73 static u64 ram_size; 74 static u8 image_count; 75 static u8 num_net_devices; 76 static bool virtio_rng; 77 static const char *kernel_cmdline; 78 static const char *kernel_filename; 79 static const char *vmlinux_filename; 80 static const char *initrd_filename; 81 static const char *firmware_filename; 82 static const char *console; 83 static const char *dev; 84 static const char *network; 85 static const char *host_ip; 86 static const char *guest_ip; 87 static const char *guest_mac; 88 static const char *host_mac; 89 static const char *script; 90 static const char *guest_name; 91 static const char *sandbox; 92 static const char *hugetlbfs_path; 93 static const char *custom_rootfs_name = "default"; 94 static struct virtio_net_params *net_params; 95 static bool single_step; 96 static bool vnc; 97 static bool sdl; 98 static bool balloon; 99 static bool using_rootfs; 100 static bool custom_rootfs; 101 static bool no_net; 102 static bool no_dhcp; 103 extern bool ioport_debug; 104 extern bool mmio_debug; 105 static int kvm_run_wrapper; 106 extern int active_console; 107 extern int debug_iodelay; 108 109 bool do_debug_print = false; 110 111 static int nrcpus; 112 static int vidmode = -1; 113 114 extern char _binary_guest_init_stage2_start; 115 extern char _binary_guest_init_stage2_size; 116 extern char _binary_guest_init_start; 117 extern char _binary_guest_init_size; 118 119 static const char * const run_usage[] = { 120 "lkvm run [<options>] [<kernel image>]", 121 NULL 122 }; 123 124 enum { 125 KVM_RUN_DEFAULT, 126 KVM_RUN_SANDBOX, 127 }; 128 129 void kvm_run_set_wrapper_sandbox(void) 130 { 131 kvm_run_wrapper = KVM_RUN_SANDBOX; 132 } 133 134 static int img_name_parser(const struct option *opt, const char *arg, int unset) 135 { 136 char path[PATH_MAX]; 137 const char *cur; 138 struct stat st; 139 char *sep; 140 141 if (stat(arg, &st) == 0 && 142 S_ISDIR(st.st_mode)) { 143 char tmp[PATH_MAX]; 144 145 if (using_rootfs) 146 die("Please use only one rootfs directory atmost"); 147 148 if (realpath(arg, tmp) == 0 || 149 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 150 die("Unable to initialize virtio 9p"); 151 using_rootfs = 1; 152 return 0; 153 } 154 155 snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); 156 157 if (stat(path, &st) == 0 && 158 S_ISDIR(st.st_mode)) { 159 char tmp[PATH_MAX]; 160 161 if (using_rootfs) 162 die("Please use only one rootfs directory atmost"); 163 164 if (realpath(path, tmp) == 0 || 165 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 166 die("Unable to initialize virtio 9p"); 167 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 168 die("Unable to initialize virtio 9p"); 169 kvm_setup_resolv(arg); 170 using_rootfs = custom_rootfs = 1; 171 custom_rootfs_name = arg; 172 return 0; 173 } 174 175 if (image_count >= MAX_DISK_IMAGES) 176 die("Currently only 4 images are supported"); 177 178 disk_image[image_count].filename = arg; 179 cur = arg; 180 181 if (strncmp(arg, "scsi:", 5) == 0) { 182 sep = strstr(arg, ":"); 183 if (sep) 184 disk_image[image_count].wwpn = sep + 1; 185 sep = strstr(sep + 1, ":"); 186 if (sep) { 187 *sep = 0; 188 disk_image[image_count].tpgt = sep + 1; 189 } 190 cur = sep + 1; 191 } 192 193 do { 194 sep = strstr(cur, ","); 195 if (sep) { 196 if (strncmp(sep + 1, "ro", 2) == 0) 197 disk_image[image_count].readonly = true; 198 else if (strncmp(sep + 1, "direct", 6) == 0) 199 disk_image[image_count].direct = true; 200 *sep = 0; 201 cur = sep + 1; 202 } 203 } while (sep); 204 205 image_count++; 206 207 return 0; 208 } 209 210 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) 211 { 212 char *tag_name; 213 char tmp[PATH_MAX]; 214 215 /* 216 * 9p dir can be of the form dirname,tag_name or 217 * just dirname. In the later case we use the 218 * default tag name 219 */ 220 tag_name = strstr(arg, ","); 221 if (tag_name) { 222 *tag_name = '\0'; 223 tag_name++; 224 } 225 if (realpath(arg, tmp)) { 226 if (virtio_9p__register(kvm, tmp, tag_name) < 0) 227 die("Unable to initialize virtio 9p"); 228 } else 229 die("Failed resolving 9p path"); 230 return 0; 231 } 232 233 static int tty_parser(const struct option *opt, const char *arg, int unset) 234 { 235 int tty = atoi(arg); 236 237 term_set_tty(tty); 238 239 return 0; 240 } 241 242 static inline void str_to_mac(const char *str, char *mac) 243 { 244 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 245 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 246 } 247 static int set_net_param(struct virtio_net_params *p, const char *param, 248 const char *val) 249 { 250 if (strcmp(param, "guest_mac") == 0) { 251 str_to_mac(val, p->guest_mac); 252 } else if (strcmp(param, "mode") == 0) { 253 if (!strncmp(val, "user", 4)) { 254 int i; 255 256 for (i = 0; i < num_net_devices; i++) 257 if (net_params[i].mode == NET_MODE_USER) 258 die("Only one usermode network device allowed at a time"); 259 p->mode = NET_MODE_USER; 260 } else if (!strncmp(val, "tap", 3)) { 261 p->mode = NET_MODE_TAP; 262 } else if (!strncmp(val, "none", 4)) { 263 no_net = 1; 264 return -1; 265 } else 266 die("Unkown network mode %s, please use user, tap or none", network); 267 } else if (strcmp(param, "script") == 0) { 268 p->script = strdup(val); 269 } else if (strcmp(param, "guest_ip") == 0) { 270 p->guest_ip = strdup(val); 271 } else if (strcmp(param, "host_ip") == 0) { 272 p->host_ip = strdup(val); 273 } else if (strcmp(param, "trans") == 0) { 274 p->trans = strdup(val); 275 } else if (strcmp(param, "vhost") == 0) { 276 p->vhost = atoi(val); 277 } else if (strcmp(param, "fd") == 0) { 278 p->fd = atoi(val); 279 } else 280 die("Unknown network parameter %s", param); 281 282 return 0; 283 } 284 285 static int netdev_parser(const struct option *opt, const char *arg, int unset) 286 { 287 struct virtio_net_params p; 288 char *buf = NULL, *cmd = NULL, *cur = NULL; 289 bool on_cmd = true; 290 291 if (arg) { 292 buf = strdup(arg); 293 if (buf == NULL) 294 die("Failed allocating new net buffer"); 295 cur = strtok(buf, ",="); 296 } 297 298 p = (struct virtio_net_params) { 299 .guest_ip = DEFAULT_GUEST_ADDR, 300 .host_ip = DEFAULT_HOST_ADDR, 301 .script = DEFAULT_SCRIPT, 302 .mode = NET_MODE_TAP, 303 }; 304 305 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 306 p.guest_mac[5] += num_net_devices; 307 308 while (cur) { 309 if (on_cmd) { 310 cmd = cur; 311 } else { 312 if (set_net_param(&p, cmd, cur) < 0) 313 goto done; 314 } 315 on_cmd = !on_cmd; 316 317 cur = strtok(NULL, ",="); 318 }; 319 320 num_net_devices++; 321 322 net_params = realloc(net_params, num_net_devices * sizeof(*net_params)); 323 if (net_params == NULL) 324 die("Failed adding new network device"); 325 326 net_params[num_net_devices - 1] = p; 327 328 done: 329 free(buf); 330 return 0; 331 } 332 333 static int shmem_parser(const struct option *opt, const char *arg, int unset) 334 { 335 const u64 default_size = SHMEM_DEFAULT_SIZE; 336 const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; 337 const char *default_handle = SHMEM_DEFAULT_HANDLE; 338 struct shmem_info *si = malloc(sizeof(struct shmem_info)); 339 u64 phys_addr; 340 u64 size; 341 char *handle = NULL; 342 int create = 0; 343 const char *p = arg; 344 char *next; 345 int base = 10; 346 int verbose = 0; 347 348 const int skip_pci = strlen("pci:"); 349 if (verbose) 350 pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); 351 /* parse out optional addr family */ 352 if (strcasestr(p, "pci:")) { 353 p += skip_pci; 354 } else if (strcasestr(p, "mem:")) { 355 die("I can't add to E820 map yet.\n"); 356 } 357 /* parse out physical addr */ 358 base = 10; 359 if (strcasestr(p, "0x")) 360 base = 16; 361 phys_addr = strtoll(p, &next, base); 362 if (next == p && phys_addr == 0) { 363 pr_info("shmem: no physical addr specified, using default."); 364 phys_addr = default_phys_addr; 365 } 366 if (*next != ':' && *next != '\0') 367 die("shmem: unexpected chars after phys addr.\n"); 368 if (*next == '\0') 369 p = next; 370 else 371 p = next + 1; 372 /* parse out size */ 373 base = 10; 374 if (strcasestr(p, "0x")) 375 base = 16; 376 size = strtoll(p, &next, base); 377 if (next == p && size == 0) { 378 pr_info("shmem: no size specified, using default."); 379 size = default_size; 380 } 381 /* look for [KMGkmg][Bb]* uses base 2. */ 382 int skip_B = 0; 383 if (strspn(next, "KMGkmg")) { /* might have a prefix */ 384 if (*(next + 1) == 'B' || *(next + 1) == 'b') 385 skip_B = 1; 386 switch (*next) { 387 case 'K': 388 case 'k': 389 size = size << KB_SHIFT; 390 break; 391 case 'M': 392 case 'm': 393 size = size << MB_SHIFT; 394 break; 395 case 'G': 396 case 'g': 397 size = size << GB_SHIFT; 398 break; 399 default: 400 die("shmem: bug in detecting size prefix."); 401 break; 402 } 403 next += 1 + skip_B; 404 } 405 if (*next != ':' && *next != '\0') { 406 die("shmem: unexpected chars after phys size. <%c><%c>\n", 407 *next, *p); 408 } 409 if (*next == '\0') 410 p = next; 411 else 412 p = next + 1; 413 /* parse out optional shmem handle */ 414 const int skip_handle = strlen("handle="); 415 next = strcasestr(p, "handle="); 416 if (*p && next) { 417 if (p != next) 418 die("unexpected chars before handle\n"); 419 p += skip_handle; 420 next = strchrnul(p, ':'); 421 if (next - p) { 422 handle = malloc(next - p + 1); 423 strncpy(handle, p, next - p); 424 handle[next - p] = '\0'; /* just in case. */ 425 } 426 if (*next == '\0') 427 p = next; 428 else 429 p = next + 1; 430 } 431 /* parse optional create flag to see if we should create shm seg. */ 432 if (*p && strcasestr(p, "create")) { 433 create = 1; 434 p += strlen("create"); 435 } 436 if (*p != '\0') 437 die("shmem: unexpected trailing chars\n"); 438 if (handle == NULL) { 439 handle = malloc(strlen(default_handle) + 1); 440 strcpy(handle, default_handle); 441 } 442 if (verbose) { 443 pr_info("shmem: phys_addr = %llx", phys_addr); 444 pr_info("shmem: size = %llx", size); 445 pr_info("shmem: handle = %s", handle); 446 pr_info("shmem: create = %d", create); 447 } 448 449 si->phys_addr = phys_addr; 450 si->size = size; 451 si->handle = handle; 452 si->create = create; 453 pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ 454 return 0; 455 } 456 457 static const struct option options[] = { 458 OPT_GROUP("Basic options:"), 459 OPT_STRING('\0', "name", &guest_name, "guest name", 460 "A name for the guest"), 461 OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"), 462 OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."), 463 OPT_CALLBACK('\0', "shmem", NULL, 464 "[pci:]<addr>:<size>[:handle=<handle>][:create]", 465 "Share host shmem with guest via pci device", 466 shmem_parser), 467 OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser), 468 OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"), 469 OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"), 470 OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"), 471 OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"), 472 OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", 473 "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser), 474 OPT_STRING('\0', "console", &console, "serial, virtio or hv", 475 "Console to use"), 476 OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"), 477 OPT_CALLBACK('\0', "tty", NULL, "tty id", 478 "Remap guest TTY into a pty on the host", 479 tty_parser), 480 OPT_STRING('\0', "sandbox", &sandbox, "script", 481 "Run this script when booting into custom rootfs"), 482 OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"), 483 484 OPT_GROUP("Kernel options:"), 485 OPT_STRING('k', "kernel", &kernel_filename, "kernel", 486 "Kernel to boot in virtual machine"), 487 OPT_STRING('i', "initrd", &initrd_filename, "initrd", 488 "Initial RAM disk image"), 489 OPT_STRING('p', "params", &kernel_cmdline, "params", 490 "Kernel command line arguments"), 491 OPT_STRING('f', "firmware", &firmware_filename, "firmware", 492 "Firmware image to boot in virtual machine"), 493 494 OPT_GROUP("Networking options:"), 495 OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", 496 "Create a new guest NIC", 497 netdev_parser, NULL), 498 OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"), 499 500 OPT_GROUP("BIOS options:"), 501 OPT_INTEGER('\0', "vidmode", &vidmode, 502 "Video mode"), 503 504 OPT_GROUP("Debug options:"), 505 OPT_BOOLEAN('\0', "debug", &do_debug_print, 506 "Enable debug messages"), 507 OPT_BOOLEAN('\0', "debug-single-step", &single_step, 508 "Enable single stepping"), 509 OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug, 510 "Enable ioport debugging"), 511 OPT_BOOLEAN('\0', "debug-mmio", &mmio_debug, 512 "Enable MMIO debugging"), 513 OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay, 514 "Delay IO by millisecond"), 515 OPT_END() 516 }; 517 518 /* 519 * Serialize debug printout so that the output of multiple vcpus does not 520 * get mixed up: 521 */ 522 static int printout_done; 523 524 static void handle_sigusr1(int sig) 525 { 526 struct kvm_cpu *cpu = current_kvm_cpu; 527 int fd = kvm_cpu__get_debug_fd(); 528 529 if (!cpu || cpu->needs_nmi) 530 return; 531 532 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 533 kvm_cpu__show_registers(cpu); 534 kvm_cpu__show_code(cpu); 535 kvm_cpu__show_page_tables(cpu); 536 fflush(stdout); 537 printout_done = 1; 538 mb(); 539 } 540 541 /* Pause/resume the guest using SIGUSR2 */ 542 static int is_paused; 543 544 static void handle_pause(int fd, u32 type, u32 len, u8 *msg) 545 { 546 if (WARN_ON(len)) 547 return; 548 549 if (type == KVM_IPC_RESUME && is_paused) { 550 kvm->vm_state = KVM_VMSTATE_RUNNING; 551 kvm__continue(); 552 } else if (type == KVM_IPC_PAUSE && !is_paused) { 553 kvm->vm_state = KVM_VMSTATE_PAUSED; 554 ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL); 555 kvm__pause(); 556 } else { 557 return; 558 } 559 560 is_paused = !is_paused; 561 } 562 563 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) 564 { 565 int r = 0; 566 567 if (type == KVM_IPC_VMSTATE) 568 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 569 570 if (r < 0) 571 pr_warning("Failed sending VMSTATE"); 572 } 573 574 static void handle_debug(int fd, u32 type, u32 len, u8 *msg) 575 { 576 int i; 577 struct debug_cmd_params *params; 578 u32 dbg_type; 579 u32 vcpu; 580 581 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 582 return; 583 584 params = (void *)msg; 585 dbg_type = params->dbg_type; 586 vcpu = params->cpu; 587 588 if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ) 589 serial8250__inject_sysrq(kvm, params->sysrq); 590 591 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 592 if ((int)vcpu >= kvm->nrcpus) 593 return; 594 595 kvm_cpus[vcpu]->needs_nmi = 1; 596 pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1); 597 } 598 599 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 600 return; 601 602 for (i = 0; i < nrcpus; i++) { 603 struct kvm_cpu *cpu = kvm_cpus[i]; 604 605 if (!cpu) 606 continue; 607 608 printout_done = 0; 609 610 kvm_cpu__set_debug_fd(fd); 611 pthread_kill(cpu->thread, SIGUSR1); 612 /* 613 * Wait for the vCPU to dump state before signalling 614 * the next thread. Since this is debug code it does 615 * not matter that we are burning CPU time a bit: 616 */ 617 while (!printout_done) 618 mb(); 619 } 620 621 close(fd); 622 623 serial8250__inject_sysrq(kvm, 'p'); 624 } 625 626 static void handle_sigalrm(int sig) 627 { 628 kvm__arch_periodic_poll(kvm); 629 } 630 631 static void handle_stop(int fd, u32 type, u32 len, u8 *msg) 632 { 633 if (WARN_ON(type != KVM_IPC_STOP || len)) 634 return; 635 636 kvm_cpu__reboot(); 637 } 638 639 static void *kvm_cpu_thread(void *arg) 640 { 641 current_kvm_cpu = arg; 642 643 if (kvm_cpu__start(current_kvm_cpu)) 644 goto panic_kvm; 645 646 return (void *) (intptr_t) 0; 647 648 panic_kvm: 649 fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", 650 current_kvm_cpu->kvm_run->exit_reason, 651 kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); 652 if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) 653 fprintf(stderr, "KVM exit code: 0x%Lu\n", 654 current_kvm_cpu->kvm_run->hw.hardware_exit_reason); 655 656 kvm_cpu__set_debug_fd(STDOUT_FILENO); 657 kvm_cpu__show_registers(current_kvm_cpu); 658 kvm_cpu__show_code(current_kvm_cpu); 659 kvm_cpu__show_page_tables(current_kvm_cpu); 660 661 return (void *) (intptr_t) 1; 662 } 663 664 static char kernel[PATH_MAX]; 665 666 static const char *host_kernels[] = { 667 "/boot/vmlinuz", 668 "/boot/bzImage", 669 NULL 670 }; 671 672 static const char *default_kernels[] = { 673 "./bzImage", 674 "arch/" BUILD_ARCH "/boot/bzImage", 675 "../../arch/" BUILD_ARCH "/boot/bzImage", 676 NULL 677 }; 678 679 static const char *default_vmlinux[] = { 680 "vmlinux", 681 "../../../vmlinux", 682 "../../vmlinux", 683 NULL 684 }; 685 686 static void kernel_usage_with_options(void) 687 { 688 const char **k; 689 struct utsname uts; 690 691 fprintf(stderr, "Fatal: could not find default kernel image in:\n"); 692 k = &default_kernels[0]; 693 while (*k) { 694 fprintf(stderr, "\t%s\n", *k); 695 k++; 696 } 697 698 if (uname(&uts) < 0) 699 return; 700 701 k = &host_kernels[0]; 702 while (*k) { 703 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 704 return; 705 fprintf(stderr, "\t%s\n", kernel); 706 k++; 707 } 708 fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", 709 KVM_BINARY_NAME); 710 } 711 712 static u64 host_ram_size(void) 713 { 714 long page_size; 715 long nr_pages; 716 717 nr_pages = sysconf(_SC_PHYS_PAGES); 718 if (nr_pages < 0) { 719 pr_warning("sysconf(_SC_PHYS_PAGES) failed"); 720 return 0; 721 } 722 723 page_size = sysconf(_SC_PAGE_SIZE); 724 if (page_size < 0) { 725 pr_warning("sysconf(_SC_PAGE_SIZE) failed"); 726 return 0; 727 } 728 729 return (nr_pages * page_size) >> MB_SHIFT; 730 } 731 732 /* 733 * If user didn't specify how much memory it wants to allocate for the guest, 734 * avoid filling the whole host RAM. 735 */ 736 #define RAM_SIZE_RATIO 0.8 737 738 static u64 get_ram_size(int nr_cpus) 739 { 740 u64 available; 741 u64 ram_size; 742 743 ram_size = 64 * (nr_cpus + 3); 744 745 available = host_ram_size() * RAM_SIZE_RATIO; 746 if (!available) 747 available = MIN_RAM_SIZE_MB; 748 749 if (ram_size > available) 750 ram_size = available; 751 752 return ram_size; 753 } 754 755 static const char *find_kernel(void) 756 { 757 const char **k; 758 struct stat st; 759 struct utsname uts; 760 761 k = &default_kernels[0]; 762 while (*k) { 763 if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { 764 k++; 765 continue; 766 } 767 strncpy(kernel, *k, PATH_MAX); 768 return kernel; 769 } 770 771 if (uname(&uts) < 0) 772 return NULL; 773 774 k = &host_kernels[0]; 775 while (*k) { 776 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 777 return NULL; 778 779 if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { 780 k++; 781 continue; 782 } 783 return kernel; 784 785 } 786 return NULL; 787 } 788 789 static const char *find_vmlinux(void) 790 { 791 const char **vmlinux; 792 793 vmlinux = &default_vmlinux[0]; 794 while (*vmlinux) { 795 struct stat st; 796 797 if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { 798 vmlinux++; 799 continue; 800 } 801 return *vmlinux; 802 } 803 return NULL; 804 } 805 806 void kvm_run_help(void) 807 { 808 usage_with_options(run_usage, options); 809 } 810 811 static int kvm_setup_guest_init(void) 812 { 813 const char *rootfs = custom_rootfs_name; 814 char tmp[PATH_MAX]; 815 size_t size; 816 int fd, ret; 817 char *data; 818 819 /* Setup /virt/init */ 820 size = (size_t)&_binary_guest_init_size; 821 data = (char *)&_binary_guest_init_start; 822 snprintf(tmp, PATH_MAX, "%s%s/virt/init", kvm__get_dir(), rootfs); 823 remove(tmp); 824 fd = open(tmp, O_CREAT | O_WRONLY, 0755); 825 if (fd < 0) 826 die("Fail to setup %s", tmp); 827 ret = xwrite(fd, data, size); 828 if (ret < 0) 829 die("Fail to setup %s", tmp); 830 close(fd); 831 832 /* Setup /virt/init_stage2 */ 833 size = (size_t)&_binary_guest_init_stage2_size; 834 data = (char *)&_binary_guest_init_stage2_start; 835 snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs); 836 remove(tmp); 837 fd = open(tmp, O_CREAT | O_WRONLY, 0755); 838 if (fd < 0) 839 die("Fail to setup %s", tmp); 840 ret = xwrite(fd, data, size); 841 if (ret < 0) 842 die("Fail to setup %s", tmp); 843 close(fd); 844 845 return 0; 846 } 847 848 static int kvm_run_set_sandbox(void) 849 { 850 const char *guestfs_name = custom_rootfs_name; 851 char path[PATH_MAX], script[PATH_MAX], *tmp; 852 853 snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); 854 855 remove(path); 856 857 if (sandbox == NULL) 858 return 0; 859 860 tmp = realpath(sandbox, NULL); 861 if (tmp == NULL) 862 return -ENOMEM; 863 864 snprintf(script, PATH_MAX, "/host/%s", tmp); 865 free(tmp); 866 867 return symlink(script, path); 868 } 869 870 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) 871 { 872 const char *single_quote; 873 874 if (!*arg) { /* zero length string */ 875 if (write(fd, "''", 2) <= 0) 876 die("Failed writing sandbox script"); 877 return; 878 } 879 880 while (*arg) { 881 single_quote = strchrnul(arg, '\''); 882 883 /* write non-single-quote string as #('string') */ 884 if (arg != single_quote) { 885 if (write(fd, "'", 1) <= 0 || 886 write(fd, arg, single_quote - arg) <= 0 || 887 write(fd, "'", 1) <= 0) 888 die("Failed writing sandbox script"); 889 } 890 891 /* write single quote as #("'") */ 892 if (*single_quote) { 893 if (write(fd, "\"'\"", 3) <= 0) 894 die("Failed writing sandbox script"); 895 } else 896 break; 897 898 arg = single_quote + 1; 899 } 900 } 901 902 static void resolve_program(const char *src, char *dst, size_t len) 903 { 904 struct stat st; 905 int err; 906 907 err = stat(src, &st); 908 909 if (!err && S_ISREG(st.st_mode)) { 910 char resolved_path[PATH_MAX]; 911 912 if (!realpath(src, resolved_path)) 913 die("Unable to resolve program %s: %s\n", src, strerror(errno)); 914 915 snprintf(dst, len, "/host%s", resolved_path); 916 } else 917 strncpy(dst, src, len); 918 } 919 920 static void kvm_run_write_sandbox_cmd(const char **argv, int argc) 921 { 922 const char script_hdr[] = "#! /bin/bash\n\n"; 923 char program[PATH_MAX]; 924 int fd; 925 926 remove(sandbox); 927 928 fd = open(sandbox, O_RDWR | O_CREAT, 0777); 929 if (fd < 0) 930 die("Failed creating sandbox script"); 931 932 if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) 933 die("Failed writing sandbox script"); 934 935 resolve_program(argv[0], program, PATH_MAX); 936 kvm_write_sandbox_cmd_exactly(fd, program); 937 938 argv++; 939 argc--; 940 941 while (argc) { 942 if (write(fd, " ", 1) <= 0) 943 die("Failed writing sandbox script"); 944 945 kvm_write_sandbox_cmd_exactly(fd, argv[0]); 946 argv++; 947 argc--; 948 } 949 if (write(fd, "\n", 1) <= 0) 950 die("Failed writing sandbox script"); 951 952 close(fd); 953 } 954 955 static int kvm_cmd_run_init(int argc, const char **argv) 956 { 957 static char real_cmdline[2048], default_name[20]; 958 struct framebuffer *fb = NULL; 959 unsigned int nr_online_cpus; 960 int max_cpus, recommended_cpus; 961 int i, r; 962 963 signal(SIGALRM, handle_sigalrm); 964 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 965 signal(SIGUSR1, handle_sigusr1); 966 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 967 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 968 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 969 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 970 971 nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); 972 973 while (argc != 0) { 974 argc = parse_options(argc, argv, options, run_usage, 975 PARSE_OPT_STOP_AT_NON_OPTION | 976 PARSE_OPT_KEEP_DASHDASH); 977 if (argc != 0) { 978 /* Cusrom options, should have been handled elsewhere */ 979 if (strcmp(argv[0], "--") == 0) { 980 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 981 sandbox = DEFAULT_SANDBOX_FILENAME; 982 kvm_run_write_sandbox_cmd(argv+1, argc-1); 983 break; 984 } 985 } 986 987 if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) || 988 (kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) { 989 fprintf(stderr, "Cannot handle parameter: " 990 "%s\n", argv[0]); 991 usage_with_options(run_usage, options); 992 return -EINVAL; 993 } 994 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 995 /* 996 * first unhandled parameter is treated as 997 * sandbox command 998 */ 999 sandbox = DEFAULT_SANDBOX_FILENAME; 1000 kvm_run_write_sandbox_cmd(argv, argc); 1001 } else { 1002 /* 1003 * first unhandled parameter is treated as a kernel 1004 * image 1005 */ 1006 kernel_filename = argv[0]; 1007 } 1008 argv++; 1009 argc--; 1010 } 1011 1012 } 1013 1014 if (!kernel_filename) 1015 kernel_filename = find_kernel(); 1016 1017 if (!kernel_filename) { 1018 kernel_usage_with_options(); 1019 return -EINVAL; 1020 } 1021 1022 vmlinux_filename = find_vmlinux(); 1023 1024 if (nrcpus == 0) 1025 nrcpus = nr_online_cpus; 1026 1027 if (!ram_size) 1028 ram_size = get_ram_size(nrcpus); 1029 1030 if (ram_size < MIN_RAM_SIZE_MB) 1031 die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB); 1032 1033 if (ram_size > host_ram_size()) 1034 pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size()); 1035 1036 ram_size <<= MB_SHIFT; 1037 1038 if (!dev) 1039 dev = DEFAULT_KVM_DEV; 1040 1041 if (!console) 1042 console = DEFAULT_CONSOLE; 1043 1044 if (!strncmp(console, "virtio", 6)) 1045 active_console = CONSOLE_VIRTIO; 1046 else if (!strncmp(console, "serial", 6)) 1047 active_console = CONSOLE_8250; 1048 else if (!strncmp(console, "hv", 2)) 1049 active_console = CONSOLE_HV; 1050 else 1051 pr_warning("No console!"); 1052 1053 if (!host_ip) 1054 host_ip = DEFAULT_HOST_ADDR; 1055 1056 if (!guest_ip) 1057 guest_ip = DEFAULT_GUEST_ADDR; 1058 1059 if (!guest_mac) 1060 guest_mac = DEFAULT_GUEST_MAC; 1061 1062 if (!host_mac) 1063 host_mac = DEFAULT_HOST_MAC; 1064 1065 if (!script) 1066 script = DEFAULT_SCRIPT; 1067 1068 term_init(); 1069 1070 if (!guest_name) { 1071 if (custom_rootfs) { 1072 guest_name = custom_rootfs_name; 1073 } else { 1074 sprintf(default_name, "guest-%u", getpid()); 1075 guest_name = default_name; 1076 } 1077 } 1078 1079 kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name); 1080 if (IS_ERR(kvm)) { 1081 r = PTR_ERR(kvm); 1082 goto fail; 1083 } 1084 1085 kvm->single_step = single_step; 1086 1087 r = ioeventfd__init(kvm); 1088 if (r < 0) { 1089 pr_err("ioeventfd__init() failed with error %d\n", r); 1090 goto fail; 1091 } 1092 1093 max_cpus = kvm__max_cpus(kvm); 1094 recommended_cpus = kvm__recommended_cpus(kvm); 1095 1096 if (nrcpus > max_cpus) { 1097 printf(" # Limit the number of CPUs to %d\n", max_cpus); 1098 nrcpus = max_cpus; 1099 } else if (nrcpus > recommended_cpus) { 1100 printf(" # Warning: The maximum recommended amount of VCPUs" 1101 " is %d\n", recommended_cpus); 1102 } 1103 1104 kvm->nrcpus = nrcpus; 1105 1106 /* Alloc one pointer too many, so array ends up 0-terminated */ 1107 kvm_cpus = calloc(nrcpus + 1, sizeof(void *)); 1108 if (!kvm_cpus) 1109 die("Couldn't allocate array for %d CPUs", nrcpus); 1110 1111 r = irq__init(kvm); 1112 if (r < 0) { 1113 pr_err("irq__init() failed with error %d\n", r); 1114 goto fail; 1115 } 1116 1117 r = pci__init(kvm); 1118 if (r < 0) { 1119 pr_err("pci__init() failed with error %d\n", r); 1120 goto fail; 1121 } 1122 1123 r = ioport__init(kvm); 1124 if (r < 0) { 1125 pr_err("ioport__init() failed with error %d\n", r); 1126 goto fail; 1127 } 1128 1129 /* 1130 * vidmode should be either specified 1131 * either set by default 1132 */ 1133 if (vnc || sdl) { 1134 if (vidmode == -1) 1135 vidmode = 0x312; 1136 } else { 1137 vidmode = 0; 1138 } 1139 1140 memset(real_cmdline, 0, sizeof(real_cmdline)); 1141 kvm__arch_set_cmdline(real_cmdline, vnc || sdl); 1142 1143 if (strlen(real_cmdline) > 0) 1144 strcat(real_cmdline, " "); 1145 1146 if (kernel_cmdline) 1147 strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline)); 1148 1149 if (!using_rootfs && !disk_image[0].filename && !initrd_filename) { 1150 char tmp[PATH_MAX]; 1151 1152 kvm_setup_create_new(custom_rootfs_name); 1153 kvm_setup_resolv(custom_rootfs_name); 1154 1155 snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); 1156 if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) 1157 die("Unable to initialize virtio 9p"); 1158 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 1159 die("Unable to initialize virtio 9p"); 1160 using_rootfs = custom_rootfs = 1; 1161 } 1162 1163 if (using_rootfs) { 1164 strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); 1165 if (custom_rootfs) { 1166 kvm_run_set_sandbox(); 1167 1168 strcat(real_cmdline, " init=/virt/init"); 1169 1170 if (!no_dhcp) 1171 strcat(real_cmdline, " ip=dhcp"); 1172 if (kvm_setup_guest_init()) 1173 die("Failed to setup init for guest."); 1174 } 1175 } else if (!strstr(real_cmdline, "root=")) { 1176 strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); 1177 } 1178 1179 if (image_count) { 1180 kvm->nr_disks = image_count; 1181 kvm->disks = disk_image__open_all((struct disk_image_params *)&disk_image, image_count); 1182 if (IS_ERR(kvm->disks)) { 1183 r = PTR_ERR(kvm->disks); 1184 pr_err("disk_image__open_all() failed with error %ld\n", 1185 PTR_ERR(kvm->disks)); 1186 goto fail; 1187 } 1188 } 1189 1190 printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, 1191 kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name); 1192 1193 if (!firmware_filename) { 1194 if (!kvm__load_kernel(kvm, kernel_filename, 1195 initrd_filename, real_cmdline, vidmode)) 1196 die("unable to load kernel %s", kernel_filename); 1197 1198 kvm->vmlinux = vmlinux_filename; 1199 r = symbol_init(kvm); 1200 if (r < 0) 1201 pr_debug("symbol_init() failed with error %d\n", r); 1202 } 1203 1204 ioport__setup_arch(); 1205 1206 r = rtc__init(kvm); 1207 if (r < 0) { 1208 pr_err("rtc__init() failed with error %d\n", r); 1209 goto fail; 1210 } 1211 1212 r = serial8250__init(kvm); 1213 if (r < 0) { 1214 pr_err("serial__init() failed with error %d\n", r); 1215 goto fail; 1216 } 1217 1218 r = virtio_blk__init(kvm); 1219 if (r < 0) { 1220 pr_err("virtio_blk__init() failed with error %d\n", r); 1221 goto fail; 1222 } 1223 1224 r = virtio_scsi_init(kvm); 1225 if (r < 0) { 1226 pr_err("virtio_scsi_init() failed with error %d\n", r); 1227 goto fail; 1228 } 1229 1230 1231 if (active_console == CONSOLE_VIRTIO) 1232 virtio_console__init(kvm); 1233 1234 if (virtio_rng) 1235 virtio_rng__init(kvm); 1236 1237 if (balloon) 1238 virtio_bln__init(kvm); 1239 1240 if (!network) 1241 network = DEFAULT_NETWORK; 1242 1243 virtio_9p__init(kvm); 1244 1245 for (i = 0; i < num_net_devices; i++) { 1246 net_params[i].kvm = kvm; 1247 virtio_net__init(&net_params[i]); 1248 } 1249 1250 if (num_net_devices == 0 && no_net == 0) { 1251 struct virtio_net_params net_params; 1252 1253 net_params = (struct virtio_net_params) { 1254 .guest_ip = guest_ip, 1255 .host_ip = host_ip, 1256 .kvm = kvm, 1257 .script = script, 1258 .mode = NET_MODE_USER, 1259 }; 1260 str_to_mac(guest_mac, net_params.guest_mac); 1261 str_to_mac(host_mac, net_params.host_mac); 1262 1263 virtio_net__init(&net_params); 1264 } 1265 1266 kvm__init_ram(kvm); 1267 1268 #ifdef CONFIG_X86 1269 kbd__init(kvm); 1270 #endif 1271 1272 pci_shmem__init(kvm); 1273 1274 if (vnc || sdl) { 1275 fb = vesa__init(kvm); 1276 if (IS_ERR(fb)) { 1277 pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb)); 1278 goto fail; 1279 } 1280 } 1281 1282 if (vnc && fb) { 1283 r = vnc__init(fb); 1284 if (r < 0) { 1285 pr_err("vnc__init() failed with error %d\n", r); 1286 goto fail; 1287 } 1288 } 1289 1290 if (sdl && fb) { 1291 sdl__init(fb); 1292 if (r < 0) { 1293 pr_err("sdl__init() failed with error %d\n", r); 1294 goto fail; 1295 } 1296 } 1297 1298 r = fb__start(); 1299 if (r < 0) { 1300 pr_err("fb__init() failed with error %d\n", r); 1301 goto fail; 1302 } 1303 1304 /* Device init all done; firmware init must 1305 * come after this (it may set up device trees etc.) 1306 */ 1307 1308 kvm__start_timer(kvm); 1309 1310 if (firmware_filename) { 1311 if (!kvm__load_firmware(kvm, firmware_filename)) 1312 die("unable to load firmware image %s: %s", firmware_filename, strerror(errno)); 1313 } else { 1314 kvm__arch_setup_firmware(kvm); 1315 if (r < 0) { 1316 pr_err("kvm__arch_setup_firmware() failed with error %d\n", r); 1317 goto fail; 1318 } 1319 } 1320 1321 for (i = 0; i < nrcpus; i++) { 1322 kvm_cpus[i] = kvm_cpu__init(kvm, i); 1323 if (!kvm_cpus[i]) 1324 die("unable to initialize KVM VCPU"); 1325 } 1326 1327 thread_pool__init(nr_online_cpus); 1328 fail: 1329 return r; 1330 } 1331 1332 static int kvm_cmd_run_work(void) 1333 { 1334 int i, r = -1; 1335 void *ret = NULL; 1336 1337 for (i = 0; i < nrcpus; i++) { 1338 if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0) 1339 die("unable to create KVM VCPU thread"); 1340 } 1341 1342 /* Only VCPU #0 is going to exit by itself when shutting down */ 1343 if (pthread_join(kvm_cpus[0]->thread, &ret) != 0) 1344 r = 0; 1345 1346 kvm_cpu__delete(kvm_cpus[0]); 1347 kvm_cpus[0] = NULL; 1348 1349 for (i = 1; i < nrcpus; i++) { 1350 if (kvm_cpus[i]->is_running) { 1351 pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); 1352 if (pthread_join(kvm_cpus[i]->thread, &ret) != 0) 1353 die("pthread_join"); 1354 kvm_cpu__delete(kvm_cpus[i]); 1355 } 1356 if (ret == NULL) 1357 r = 0; 1358 } 1359 1360 return r; 1361 } 1362 1363 static void kvm_cmd_run_exit(int guest_ret) 1364 { 1365 int r = 0; 1366 1367 compat__print_all_messages(); 1368 1369 r = symbol_exit(kvm); 1370 if (r < 0) 1371 pr_warning("symbol_exit() failed with error %d\n", r); 1372 1373 r = irq__exit(kvm); 1374 if (r < 0) 1375 pr_warning("irq__exit() failed with error %d\n", r); 1376 1377 fb__stop(); 1378 1379 r = virtio_scsi_exit(kvm); 1380 if (r < 0) 1381 pr_warning("virtio_scsi_exit() failed with error %d\n", r); 1382 1383 r = virtio_blk__exit(kvm); 1384 if (r < 0) 1385 pr_warning("virtio_blk__exit() failed with error %d\n", r); 1386 1387 r = virtio_rng__exit(kvm); 1388 if (r < 0) 1389 pr_warning("virtio_rng__exit() failed with error %d\n", r); 1390 1391 r = disk_image__close_all(kvm->disks, image_count); 1392 if (r < 0) 1393 pr_warning("disk_image__close_all() failed with error %d\n", r); 1394 1395 r = serial8250__exit(kvm); 1396 if (r < 0) 1397 pr_warning("serial8250__exit() failed with error %d\n", r); 1398 1399 r = rtc__exit(kvm); 1400 if (r < 0) 1401 pr_warning("rtc__exit() failed with error %d\n", r); 1402 1403 r = kvm__arch_free_firmware(kvm); 1404 if (r < 0) 1405 pr_warning("kvm__arch_free_firmware() failed with error %d\n", r); 1406 1407 r = ioport__exit(kvm); 1408 if (r < 0) 1409 pr_warning("ioport__exit() failed with error %d\n", r); 1410 1411 r = ioeventfd__exit(kvm); 1412 if (r < 0) 1413 pr_warning("ioeventfd__exit() failed with error %d\n", r); 1414 1415 r = pci__exit(kvm); 1416 if (r < 0) 1417 pr_warning("pci__exit() failed with error %d\n", r); 1418 1419 r = kvm__exit(kvm); 1420 if (r < 0) 1421 pr_warning("pci__exit() failed with error %d\n", r); 1422 1423 free(kvm_cpus); 1424 1425 if (guest_ret == 0) 1426 printf("\n # KVM session ended normally.\n"); 1427 } 1428 1429 int kvm_cmd_run(int argc, const char **argv, const char *prefix) 1430 { 1431 int r, ret = -EFAULT; 1432 1433 r = kvm_cmd_run_init(argc, argv); 1434 if (r < 0) 1435 return r; 1436 1437 ret = kvm_cmd_run_work(); 1438 kvm_cmd_run_exit(ret); 1439 1440 return ret; 1441 } 1442