1 #include "kvm/builtin-run.h" 2 3 #include "kvm/builtin-setup.h" 4 #include "kvm/virtio-balloon.h" 5 #include "kvm/virtio-console.h" 6 #include "kvm/parse-options.h" 7 #include "kvm/8250-serial.h" 8 #include "kvm/framebuffer.h" 9 #include "kvm/disk-image.h" 10 #include "kvm/threadpool.h" 11 #include "kvm/virtio-scsi.h" 12 #include "kvm/virtio-blk.h" 13 #include "kvm/virtio-net.h" 14 #include "kvm/virtio-rng.h" 15 #include "kvm/ioeventfd.h" 16 #include "kvm/virtio-9p.h" 17 #include "kvm/barrier.h" 18 #include "kvm/kvm-cpu.h" 19 #include "kvm/ioport.h" 20 #include "kvm/symbol.h" 21 #include "kvm/i8042.h" 22 #include "kvm/mutex.h" 23 #include "kvm/term.h" 24 #include "kvm/util.h" 25 #include "kvm/strbuf.h" 26 #include "kvm/vesa.h" 27 #include "kvm/irq.h" 28 #include "kvm/kvm.h" 29 #include "kvm/pci.h" 30 #include "kvm/rtc.h" 31 #include "kvm/sdl.h" 32 #include "kvm/vnc.h" 33 #include "kvm/guest_compat.h" 34 #include "kvm/pci-shmem.h" 35 #include "kvm/kvm-ipc.h" 36 #include "kvm/builtin-debug.h" 37 38 #include <linux/types.h> 39 #include <linux/err.h> 40 41 #include <sys/utsname.h> 42 #include <sys/types.h> 43 #include <sys/stat.h> 44 #include <termios.h> 45 #include <signal.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 #include <ctype.h> 50 #include <stdio.h> 51 52 #define DEFAULT_KVM_DEV "/dev/kvm" 53 #define DEFAULT_CONSOLE "serial" 54 #define DEFAULT_NETWORK "user" 55 #define DEFAULT_HOST_ADDR "192.168.33.1" 56 #define DEFAULT_GUEST_ADDR "192.168.33.15" 57 #define DEFAULT_GUEST_MAC "02:15:15:15:15:15" 58 #define DEFAULT_HOST_MAC "02:01:01:01:01:01" 59 #define DEFAULT_SCRIPT "none" 60 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh"; 61 62 #define MB_SHIFT (20) 63 #define KB_SHIFT (10) 64 #define GB_SHIFT (30) 65 #define MIN_RAM_SIZE_MB (64ULL) 66 #define MIN_RAM_SIZE_BYTE (MIN_RAM_SIZE_MB << MB_SHIFT) 67 68 struct kvm *kvm; 69 struct kvm_cpu **kvm_cpus; 70 __thread struct kvm_cpu *current_kvm_cpu; 71 72 static struct disk_image_params disk_image[MAX_DISK_IMAGES]; 73 static u64 ram_size; 74 static u8 image_count; 75 static u8 num_net_devices; 76 static bool virtio_rng; 77 static const char *kernel_cmdline; 78 static const char *kernel_filename; 79 static const char *vmlinux_filename; 80 static const char *initrd_filename; 81 static const char *firmware_filename; 82 static const char *console; 83 static const char *dev; 84 static const char *network; 85 static const char *host_ip; 86 static const char *guest_ip; 87 static const char *guest_mac; 88 static const char *host_mac; 89 static const char *script; 90 static const char *guest_name; 91 static const char *sandbox; 92 static const char *hugetlbfs_path; 93 static const char *custom_rootfs_name = "default"; 94 static struct virtio_net_params *net_params; 95 static bool single_step; 96 static bool vnc; 97 static bool sdl; 98 static bool balloon; 99 static bool using_rootfs; 100 static bool custom_rootfs; 101 static bool no_net; 102 static bool no_dhcp; 103 extern bool ioport_debug; 104 extern bool mmio_debug; 105 static int kvm_run_wrapper; 106 extern int active_console; 107 extern int debug_iodelay; 108 109 bool do_debug_print = false; 110 111 static int nrcpus; 112 static int vidmode = -1; 113 114 static const char * const run_usage[] = { 115 "lkvm run [<options>] [<kernel image>]", 116 NULL 117 }; 118 119 enum { 120 KVM_RUN_DEFAULT, 121 KVM_RUN_SANDBOX, 122 }; 123 124 void kvm_run_set_wrapper_sandbox(void) 125 { 126 kvm_run_wrapper = KVM_RUN_SANDBOX; 127 } 128 129 static int img_name_parser(const struct option *opt, const char *arg, int unset) 130 { 131 char path[PATH_MAX]; 132 const char *cur; 133 struct stat st; 134 char *sep; 135 136 if (stat(arg, &st) == 0 && 137 S_ISDIR(st.st_mode)) { 138 char tmp[PATH_MAX]; 139 140 if (using_rootfs) 141 die("Please use only one rootfs directory atmost"); 142 143 if (realpath(arg, tmp) == 0 || 144 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 145 die("Unable to initialize virtio 9p"); 146 using_rootfs = 1; 147 return 0; 148 } 149 150 snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); 151 152 if (stat(path, &st) == 0 && 153 S_ISDIR(st.st_mode)) { 154 char tmp[PATH_MAX]; 155 156 if (using_rootfs) 157 die("Please use only one rootfs directory atmost"); 158 159 if (realpath(path, tmp) == 0 || 160 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 161 die("Unable to initialize virtio 9p"); 162 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 163 die("Unable to initialize virtio 9p"); 164 kvm_setup_resolv(arg); 165 using_rootfs = custom_rootfs = 1; 166 custom_rootfs_name = arg; 167 return 0; 168 } 169 170 if (image_count >= MAX_DISK_IMAGES) 171 die("Currently only 4 images are supported"); 172 173 disk_image[image_count].filename = arg; 174 cur = arg; 175 176 if (strncmp(arg, "scsi:", 5) == 0) { 177 sep = strstr(arg, ":"); 178 if (sep) 179 disk_image[image_count].wwpn = sep + 1; 180 sep = strstr(sep + 1, ":"); 181 if (sep) { 182 *sep = 0; 183 disk_image[image_count].tpgt = sep + 1; 184 } 185 cur = sep + 1; 186 } 187 188 do { 189 sep = strstr(cur, ","); 190 if (sep) { 191 if (strncmp(sep + 1, "ro", 2) == 0) 192 disk_image[image_count].readonly = true; 193 else if (strncmp(sep + 1, "direct", 6) == 0) 194 disk_image[image_count].direct = true; 195 *sep = 0; 196 cur = sep + 1; 197 } 198 } while (sep); 199 200 image_count++; 201 202 return 0; 203 } 204 205 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) 206 { 207 char *tag_name; 208 char tmp[PATH_MAX]; 209 210 /* 211 * 9p dir can be of the form dirname,tag_name or 212 * just dirname. In the later case we use the 213 * default tag name 214 */ 215 tag_name = strstr(arg, ","); 216 if (tag_name) { 217 *tag_name = '\0'; 218 tag_name++; 219 } 220 if (realpath(arg, tmp)) { 221 if (virtio_9p__register(kvm, tmp, tag_name) < 0) 222 die("Unable to initialize virtio 9p"); 223 } else 224 die("Failed resolving 9p path"); 225 return 0; 226 } 227 228 static int tty_parser(const struct option *opt, const char *arg, int unset) 229 { 230 int tty = atoi(arg); 231 232 term_set_tty(tty); 233 234 return 0; 235 } 236 237 static inline void str_to_mac(const char *str, char *mac) 238 { 239 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 240 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 241 } 242 static int set_net_param(struct virtio_net_params *p, const char *param, 243 const char *val) 244 { 245 if (strcmp(param, "guest_mac") == 0) { 246 str_to_mac(val, p->guest_mac); 247 } else if (strcmp(param, "mode") == 0) { 248 if (!strncmp(val, "user", 4)) { 249 int i; 250 251 for (i = 0; i < num_net_devices; i++) 252 if (net_params[i].mode == NET_MODE_USER) 253 die("Only one usermode network device allowed at a time"); 254 p->mode = NET_MODE_USER; 255 } else if (!strncmp(val, "tap", 3)) { 256 p->mode = NET_MODE_TAP; 257 } else if (!strncmp(val, "none", 4)) { 258 no_net = 1; 259 return -1; 260 } else 261 die("Unkown network mode %s, please use user, tap or none", network); 262 } else if (strcmp(param, "script") == 0) { 263 p->script = strdup(val); 264 } else if (strcmp(param, "guest_ip") == 0) { 265 p->guest_ip = strdup(val); 266 } else if (strcmp(param, "host_ip") == 0) { 267 p->host_ip = strdup(val); 268 } else if (strcmp(param, "trans") == 0) { 269 p->trans = strdup(val); 270 } else if (strcmp(param, "vhost") == 0) { 271 p->vhost = atoi(val); 272 } else if (strcmp(param, "fd") == 0) { 273 p->fd = atoi(val); 274 } else 275 die("Unknown network parameter %s", param); 276 277 return 0; 278 } 279 280 static int netdev_parser(const struct option *opt, const char *arg, int unset) 281 { 282 struct virtio_net_params p; 283 char *buf = NULL, *cmd = NULL, *cur = NULL; 284 bool on_cmd = true; 285 286 if (arg) { 287 buf = strdup(arg); 288 if (buf == NULL) 289 die("Failed allocating new net buffer"); 290 cur = strtok(buf, ",="); 291 } 292 293 p = (struct virtio_net_params) { 294 .guest_ip = DEFAULT_GUEST_ADDR, 295 .host_ip = DEFAULT_HOST_ADDR, 296 .script = DEFAULT_SCRIPT, 297 .mode = NET_MODE_TAP, 298 }; 299 300 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 301 p.guest_mac[5] += num_net_devices; 302 303 while (cur) { 304 if (on_cmd) { 305 cmd = cur; 306 } else { 307 if (set_net_param(&p, cmd, cur) < 0) 308 goto done; 309 } 310 on_cmd = !on_cmd; 311 312 cur = strtok(NULL, ",="); 313 }; 314 315 num_net_devices++; 316 317 net_params = realloc(net_params, num_net_devices * sizeof(*net_params)); 318 if (net_params == NULL) 319 die("Failed adding new network device"); 320 321 net_params[num_net_devices - 1] = p; 322 323 done: 324 free(buf); 325 return 0; 326 } 327 328 static int shmem_parser(const struct option *opt, const char *arg, int unset) 329 { 330 const u64 default_size = SHMEM_DEFAULT_SIZE; 331 const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; 332 const char *default_handle = SHMEM_DEFAULT_HANDLE; 333 struct shmem_info *si = malloc(sizeof(struct shmem_info)); 334 u64 phys_addr; 335 u64 size; 336 char *handle = NULL; 337 int create = 0; 338 const char *p = arg; 339 char *next; 340 int base = 10; 341 int verbose = 0; 342 343 const int skip_pci = strlen("pci:"); 344 if (verbose) 345 pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); 346 /* parse out optional addr family */ 347 if (strcasestr(p, "pci:")) { 348 p += skip_pci; 349 } else if (strcasestr(p, "mem:")) { 350 die("I can't add to E820 map yet.\n"); 351 } 352 /* parse out physical addr */ 353 base = 10; 354 if (strcasestr(p, "0x")) 355 base = 16; 356 phys_addr = strtoll(p, &next, base); 357 if (next == p && phys_addr == 0) { 358 pr_info("shmem: no physical addr specified, using default."); 359 phys_addr = default_phys_addr; 360 } 361 if (*next != ':' && *next != '\0') 362 die("shmem: unexpected chars after phys addr.\n"); 363 if (*next == '\0') 364 p = next; 365 else 366 p = next + 1; 367 /* parse out size */ 368 base = 10; 369 if (strcasestr(p, "0x")) 370 base = 16; 371 size = strtoll(p, &next, base); 372 if (next == p && size == 0) { 373 pr_info("shmem: no size specified, using default."); 374 size = default_size; 375 } 376 /* look for [KMGkmg][Bb]* uses base 2. */ 377 int skip_B = 0; 378 if (strspn(next, "KMGkmg")) { /* might have a prefix */ 379 if (*(next + 1) == 'B' || *(next + 1) == 'b') 380 skip_B = 1; 381 switch (*next) { 382 case 'K': 383 case 'k': 384 size = size << KB_SHIFT; 385 break; 386 case 'M': 387 case 'm': 388 size = size << MB_SHIFT; 389 break; 390 case 'G': 391 case 'g': 392 size = size << GB_SHIFT; 393 break; 394 default: 395 die("shmem: bug in detecting size prefix."); 396 break; 397 } 398 next += 1 + skip_B; 399 } 400 if (*next != ':' && *next != '\0') { 401 die("shmem: unexpected chars after phys size. <%c><%c>\n", 402 *next, *p); 403 } 404 if (*next == '\0') 405 p = next; 406 else 407 p = next + 1; 408 /* parse out optional shmem handle */ 409 const int skip_handle = strlen("handle="); 410 next = strcasestr(p, "handle="); 411 if (*p && next) { 412 if (p != next) 413 die("unexpected chars before handle\n"); 414 p += skip_handle; 415 next = strchrnul(p, ':'); 416 if (next - p) { 417 handle = malloc(next - p + 1); 418 strncpy(handle, p, next - p); 419 handle[next - p] = '\0'; /* just in case. */ 420 } 421 if (*next == '\0') 422 p = next; 423 else 424 p = next + 1; 425 } 426 /* parse optional create flag to see if we should create shm seg. */ 427 if (*p && strcasestr(p, "create")) { 428 create = 1; 429 p += strlen("create"); 430 } 431 if (*p != '\0') 432 die("shmem: unexpected trailing chars\n"); 433 if (handle == NULL) { 434 handle = malloc(strlen(default_handle) + 1); 435 strcpy(handle, default_handle); 436 } 437 if (verbose) { 438 pr_info("shmem: phys_addr = %llx", phys_addr); 439 pr_info("shmem: size = %llx", size); 440 pr_info("shmem: handle = %s", handle); 441 pr_info("shmem: create = %d", create); 442 } 443 444 si->phys_addr = phys_addr; 445 si->size = size; 446 si->handle = handle; 447 si->create = create; 448 pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ 449 return 0; 450 } 451 452 static const struct option options[] = { 453 OPT_GROUP("Basic options:"), 454 OPT_STRING('\0', "name", &guest_name, "guest name", 455 "A name for the guest"), 456 OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"), 457 OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."), 458 OPT_CALLBACK('\0', "shmem", NULL, 459 "[pci:]<addr>:<size>[:handle=<handle>][:create]", 460 "Share host shmem with guest via pci device", 461 shmem_parser), 462 OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser), 463 OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"), 464 OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"), 465 OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"), 466 OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"), 467 OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", 468 "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser), 469 OPT_STRING('\0', "console", &console, "serial, virtio or hv", 470 "Console to use"), 471 OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"), 472 OPT_CALLBACK('\0', "tty", NULL, "tty id", 473 "Remap guest TTY into a pty on the host", 474 tty_parser), 475 OPT_STRING('\0', "sandbox", &sandbox, "script", 476 "Run this script when booting into custom rootfs"), 477 OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"), 478 479 OPT_GROUP("Kernel options:"), 480 OPT_STRING('k', "kernel", &kernel_filename, "kernel", 481 "Kernel to boot in virtual machine"), 482 OPT_STRING('i', "initrd", &initrd_filename, "initrd", 483 "Initial RAM disk image"), 484 OPT_STRING('p', "params", &kernel_cmdline, "params", 485 "Kernel command line arguments"), 486 OPT_STRING('f', "firmware", &firmware_filename, "firmware", 487 "Firmware image to boot in virtual machine"), 488 489 OPT_GROUP("Networking options:"), 490 OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", 491 "Create a new guest NIC", 492 netdev_parser, NULL), 493 OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"), 494 495 OPT_GROUP("BIOS options:"), 496 OPT_INTEGER('\0', "vidmode", &vidmode, 497 "Video mode"), 498 499 OPT_GROUP("Debug options:"), 500 OPT_BOOLEAN('\0', "debug", &do_debug_print, 501 "Enable debug messages"), 502 OPT_BOOLEAN('\0', "debug-single-step", &single_step, 503 "Enable single stepping"), 504 OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug, 505 "Enable ioport debugging"), 506 OPT_BOOLEAN('\0', "debug-mmio", &mmio_debug, 507 "Enable MMIO debugging"), 508 OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay, 509 "Delay IO by millisecond"), 510 OPT_END() 511 }; 512 513 /* 514 * Serialize debug printout so that the output of multiple vcpus does not 515 * get mixed up: 516 */ 517 static int printout_done; 518 519 static void handle_sigusr1(int sig) 520 { 521 struct kvm_cpu *cpu = current_kvm_cpu; 522 int fd = kvm_cpu__get_debug_fd(); 523 524 if (!cpu || cpu->needs_nmi) 525 return; 526 527 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 528 kvm_cpu__show_registers(cpu); 529 kvm_cpu__show_code(cpu); 530 kvm_cpu__show_page_tables(cpu); 531 fflush(stdout); 532 printout_done = 1; 533 mb(); 534 } 535 536 /* Pause/resume the guest using SIGUSR2 */ 537 static int is_paused; 538 539 static void handle_pause(int fd, u32 type, u32 len, u8 *msg) 540 { 541 if (WARN_ON(len)) 542 return; 543 544 if (type == KVM_IPC_RESUME && is_paused) { 545 kvm->vm_state = KVM_VMSTATE_RUNNING; 546 kvm__continue(); 547 } else if (type == KVM_IPC_PAUSE && !is_paused) { 548 kvm->vm_state = KVM_VMSTATE_PAUSED; 549 ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL); 550 kvm__pause(); 551 } else { 552 return; 553 } 554 555 is_paused = !is_paused; 556 } 557 558 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) 559 { 560 int r = 0; 561 562 if (type == KVM_IPC_VMSTATE) 563 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 564 565 if (r < 0) 566 pr_warning("Failed sending VMSTATE"); 567 } 568 569 static void handle_debug(int fd, u32 type, u32 len, u8 *msg) 570 { 571 int i; 572 struct debug_cmd_params *params; 573 u32 dbg_type; 574 u32 vcpu; 575 576 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 577 return; 578 579 params = (void *)msg; 580 dbg_type = params->dbg_type; 581 vcpu = params->cpu; 582 583 if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ) 584 serial8250__inject_sysrq(kvm, params->sysrq); 585 586 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 587 if ((int)vcpu >= kvm->nrcpus) 588 return; 589 590 kvm_cpus[vcpu]->needs_nmi = 1; 591 pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1); 592 } 593 594 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 595 return; 596 597 for (i = 0; i < nrcpus; i++) { 598 struct kvm_cpu *cpu = kvm_cpus[i]; 599 600 if (!cpu) 601 continue; 602 603 printout_done = 0; 604 605 kvm_cpu__set_debug_fd(fd); 606 pthread_kill(cpu->thread, SIGUSR1); 607 /* 608 * Wait for the vCPU to dump state before signalling 609 * the next thread. Since this is debug code it does 610 * not matter that we are burning CPU time a bit: 611 */ 612 while (!printout_done) 613 mb(); 614 } 615 616 close(fd); 617 618 serial8250__inject_sysrq(kvm, 'p'); 619 } 620 621 static void handle_sigalrm(int sig) 622 { 623 kvm__arch_periodic_poll(kvm); 624 } 625 626 static void handle_stop(int fd, u32 type, u32 len, u8 *msg) 627 { 628 if (WARN_ON(type != KVM_IPC_STOP || len)) 629 return; 630 631 kvm_cpu__reboot(); 632 } 633 634 static void *kvm_cpu_thread(void *arg) 635 { 636 current_kvm_cpu = arg; 637 638 if (kvm_cpu__start(current_kvm_cpu)) 639 goto panic_kvm; 640 641 return (void *) (intptr_t) 0; 642 643 panic_kvm: 644 fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", 645 current_kvm_cpu->kvm_run->exit_reason, 646 kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); 647 if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) 648 fprintf(stderr, "KVM exit code: 0x%Lu\n", 649 current_kvm_cpu->kvm_run->hw.hardware_exit_reason); 650 651 kvm_cpu__set_debug_fd(STDOUT_FILENO); 652 kvm_cpu__show_registers(current_kvm_cpu); 653 kvm_cpu__show_code(current_kvm_cpu); 654 kvm_cpu__show_page_tables(current_kvm_cpu); 655 656 return (void *) (intptr_t) 1; 657 } 658 659 static char kernel[PATH_MAX]; 660 661 static const char *host_kernels[] = { 662 "/boot/vmlinuz", 663 "/boot/bzImage", 664 NULL 665 }; 666 667 static const char *default_kernels[] = { 668 "./bzImage", 669 "arch/" BUILD_ARCH "/boot/bzImage", 670 "../../arch/" BUILD_ARCH "/boot/bzImage", 671 NULL 672 }; 673 674 static const char *default_vmlinux[] = { 675 "vmlinux", 676 "../../../vmlinux", 677 "../../vmlinux", 678 NULL 679 }; 680 681 static void kernel_usage_with_options(void) 682 { 683 const char **k; 684 struct utsname uts; 685 686 fprintf(stderr, "Fatal: could not find default kernel image in:\n"); 687 k = &default_kernels[0]; 688 while (*k) { 689 fprintf(stderr, "\t%s\n", *k); 690 k++; 691 } 692 693 if (uname(&uts) < 0) 694 return; 695 696 k = &host_kernels[0]; 697 while (*k) { 698 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 699 return; 700 fprintf(stderr, "\t%s\n", kernel); 701 k++; 702 } 703 fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", 704 KVM_BINARY_NAME); 705 } 706 707 static u64 host_ram_size(void) 708 { 709 long page_size; 710 long nr_pages; 711 712 nr_pages = sysconf(_SC_PHYS_PAGES); 713 if (nr_pages < 0) { 714 pr_warning("sysconf(_SC_PHYS_PAGES) failed"); 715 return 0; 716 } 717 718 page_size = sysconf(_SC_PAGE_SIZE); 719 if (page_size < 0) { 720 pr_warning("sysconf(_SC_PAGE_SIZE) failed"); 721 return 0; 722 } 723 724 return (nr_pages * page_size) >> MB_SHIFT; 725 } 726 727 /* 728 * If user didn't specify how much memory it wants to allocate for the guest, 729 * avoid filling the whole host RAM. 730 */ 731 #define RAM_SIZE_RATIO 0.8 732 733 static u64 get_ram_size(int nr_cpus) 734 { 735 u64 available; 736 u64 ram_size; 737 738 ram_size = 64 * (nr_cpus + 3); 739 740 available = host_ram_size() * RAM_SIZE_RATIO; 741 if (!available) 742 available = MIN_RAM_SIZE_MB; 743 744 if (ram_size > available) 745 ram_size = available; 746 747 return ram_size; 748 } 749 750 static const char *find_kernel(void) 751 { 752 const char **k; 753 struct stat st; 754 struct utsname uts; 755 756 k = &default_kernels[0]; 757 while (*k) { 758 if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { 759 k++; 760 continue; 761 } 762 strncpy(kernel, *k, PATH_MAX); 763 return kernel; 764 } 765 766 if (uname(&uts) < 0) 767 return NULL; 768 769 k = &host_kernels[0]; 770 while (*k) { 771 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 772 return NULL; 773 774 if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { 775 k++; 776 continue; 777 } 778 return kernel; 779 780 } 781 return NULL; 782 } 783 784 static const char *find_vmlinux(void) 785 { 786 const char **vmlinux; 787 788 vmlinux = &default_vmlinux[0]; 789 while (*vmlinux) { 790 struct stat st; 791 792 if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { 793 vmlinux++; 794 continue; 795 } 796 return *vmlinux; 797 } 798 return NULL; 799 } 800 801 void kvm_run_help(void) 802 { 803 usage_with_options(run_usage, options); 804 } 805 806 static int kvm_custom_stage2(void) 807 { 808 char tmp[PATH_MAX], dst[PATH_MAX], *src; 809 const char *rootfs = custom_rootfs_name; 810 int r; 811 812 src = realpath("guest/init_stage2", NULL); 813 if (src == NULL) 814 return -ENOMEM; 815 816 snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs); 817 remove(tmp); 818 819 snprintf(dst, PATH_MAX, "/host/%s", src); 820 r = symlink(dst, tmp); 821 free(src); 822 823 return r; 824 } 825 826 static int kvm_run_set_sandbox(void) 827 { 828 const char *guestfs_name = custom_rootfs_name; 829 char path[PATH_MAX], script[PATH_MAX], *tmp; 830 831 snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); 832 833 remove(path); 834 835 if (sandbox == NULL) 836 return 0; 837 838 tmp = realpath(sandbox, NULL); 839 if (tmp == NULL) 840 return -ENOMEM; 841 842 snprintf(script, PATH_MAX, "/host/%s", tmp); 843 free(tmp); 844 845 return symlink(script, path); 846 } 847 848 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) 849 { 850 const char *single_quote; 851 852 if (!*arg) { /* zero length string */ 853 if (write(fd, "''", 2) <= 0) 854 die("Failed writing sandbox script"); 855 return; 856 } 857 858 while (*arg) { 859 single_quote = strchrnul(arg, '\''); 860 861 /* write non-single-quote string as #('string') */ 862 if (arg != single_quote) { 863 if (write(fd, "'", 1) <= 0 || 864 write(fd, arg, single_quote - arg) <= 0 || 865 write(fd, "'", 1) <= 0) 866 die("Failed writing sandbox script"); 867 } 868 869 /* write single quote as #("'") */ 870 if (*single_quote) { 871 if (write(fd, "\"'\"", 3) <= 0) 872 die("Failed writing sandbox script"); 873 } else 874 break; 875 876 arg = single_quote + 1; 877 } 878 } 879 880 static void resolve_program(const char *src, char *dst, size_t len) 881 { 882 struct stat st; 883 int err; 884 885 err = stat(src, &st); 886 887 if (!err && S_ISREG(st.st_mode)) { 888 char resolved_path[PATH_MAX]; 889 890 if (!realpath(src, resolved_path)) 891 die("Unable to resolve program %s: %s\n", src, strerror(errno)); 892 893 snprintf(dst, len, "/host%s", resolved_path); 894 } else 895 strncpy(dst, src, len); 896 } 897 898 static void kvm_run_write_sandbox_cmd(const char **argv, int argc) 899 { 900 const char script_hdr[] = "#! /bin/bash\n\n"; 901 char program[PATH_MAX]; 902 int fd; 903 904 remove(sandbox); 905 906 fd = open(sandbox, O_RDWR | O_CREAT, 0777); 907 if (fd < 0) 908 die("Failed creating sandbox script"); 909 910 if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) 911 die("Failed writing sandbox script"); 912 913 resolve_program(argv[0], program, PATH_MAX); 914 kvm_write_sandbox_cmd_exactly(fd, program); 915 916 argv++; 917 argc--; 918 919 while (argc) { 920 if (write(fd, " ", 1) <= 0) 921 die("Failed writing sandbox script"); 922 923 kvm_write_sandbox_cmd_exactly(fd, argv[0]); 924 argv++; 925 argc--; 926 } 927 if (write(fd, "\n", 1) <= 0) 928 die("Failed writing sandbox script"); 929 930 close(fd); 931 } 932 933 static int kvm_cmd_run_init(int argc, const char **argv) 934 { 935 static char real_cmdline[2048], default_name[20]; 936 struct framebuffer *fb = NULL; 937 unsigned int nr_online_cpus; 938 int max_cpus, recommended_cpus; 939 int i, r; 940 941 signal(SIGALRM, handle_sigalrm); 942 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 943 signal(SIGUSR1, handle_sigusr1); 944 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 945 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 946 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 947 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 948 949 nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); 950 951 while (argc != 0) { 952 argc = parse_options(argc, argv, options, run_usage, 953 PARSE_OPT_STOP_AT_NON_OPTION | 954 PARSE_OPT_KEEP_DASHDASH); 955 if (argc != 0) { 956 /* Cusrom options, should have been handled elsewhere */ 957 if (strcmp(argv[0], "--") == 0) { 958 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 959 sandbox = DEFAULT_SANDBOX_FILENAME; 960 kvm_run_write_sandbox_cmd(argv+1, argc-1); 961 break; 962 } 963 } 964 965 if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) || 966 (kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) { 967 fprintf(stderr, "Cannot handle parameter: " 968 "%s\n", argv[0]); 969 usage_with_options(run_usage, options); 970 return -EINVAL; 971 } 972 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 973 /* 974 * first unhandled parameter is treated as 975 * sandbox command 976 */ 977 sandbox = DEFAULT_SANDBOX_FILENAME; 978 kvm_run_write_sandbox_cmd(argv, argc); 979 } else { 980 /* 981 * first unhandled parameter is treated as a kernel 982 * image 983 */ 984 kernel_filename = argv[0]; 985 } 986 argv++; 987 argc--; 988 } 989 990 } 991 992 if (!kernel_filename) 993 kernel_filename = find_kernel(); 994 995 if (!kernel_filename) { 996 kernel_usage_with_options(); 997 return -EINVAL; 998 } 999 1000 vmlinux_filename = find_vmlinux(); 1001 1002 if (nrcpus == 0) 1003 nrcpus = nr_online_cpus; 1004 1005 if (!ram_size) 1006 ram_size = get_ram_size(nrcpus); 1007 1008 if (ram_size < MIN_RAM_SIZE_MB) 1009 die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB); 1010 1011 if (ram_size > host_ram_size()) 1012 pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size()); 1013 1014 ram_size <<= MB_SHIFT; 1015 1016 if (!dev) 1017 dev = DEFAULT_KVM_DEV; 1018 1019 if (!console) 1020 console = DEFAULT_CONSOLE; 1021 1022 if (!strncmp(console, "virtio", 6)) 1023 active_console = CONSOLE_VIRTIO; 1024 else if (!strncmp(console, "serial", 6)) 1025 active_console = CONSOLE_8250; 1026 else if (!strncmp(console, "hv", 2)) 1027 active_console = CONSOLE_HV; 1028 else 1029 pr_warning("No console!"); 1030 1031 if (!host_ip) 1032 host_ip = DEFAULT_HOST_ADDR; 1033 1034 if (!guest_ip) 1035 guest_ip = DEFAULT_GUEST_ADDR; 1036 1037 if (!guest_mac) 1038 guest_mac = DEFAULT_GUEST_MAC; 1039 1040 if (!host_mac) 1041 host_mac = DEFAULT_HOST_MAC; 1042 1043 if (!script) 1044 script = DEFAULT_SCRIPT; 1045 1046 term_init(); 1047 1048 if (!guest_name) { 1049 if (custom_rootfs) { 1050 guest_name = custom_rootfs_name; 1051 } else { 1052 sprintf(default_name, "guest-%u", getpid()); 1053 guest_name = default_name; 1054 } 1055 } 1056 1057 kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name); 1058 if (IS_ERR(kvm)) { 1059 r = PTR_ERR(kvm); 1060 goto fail; 1061 } 1062 1063 kvm->single_step = single_step; 1064 1065 r = ioeventfd__init(kvm); 1066 if (r < 0) { 1067 pr_err("ioeventfd__init() failed with error %d\n", r); 1068 goto fail; 1069 } 1070 1071 max_cpus = kvm__max_cpus(kvm); 1072 recommended_cpus = kvm__recommended_cpus(kvm); 1073 1074 if (nrcpus > max_cpus) { 1075 printf(" # Limit the number of CPUs to %d\n", max_cpus); 1076 nrcpus = max_cpus; 1077 } else if (nrcpus > recommended_cpus) { 1078 printf(" # Warning: The maximum recommended amount of VCPUs" 1079 " is %d\n", recommended_cpus); 1080 } 1081 1082 kvm->nrcpus = nrcpus; 1083 1084 /* Alloc one pointer too many, so array ends up 0-terminated */ 1085 kvm_cpus = calloc(nrcpus + 1, sizeof(void *)); 1086 if (!kvm_cpus) 1087 die("Couldn't allocate array for %d CPUs", nrcpus); 1088 1089 r = irq__init(kvm); 1090 if (r < 0) { 1091 pr_err("irq__init() failed with error %d\n", r); 1092 goto fail; 1093 } 1094 1095 r = pci__init(kvm); 1096 if (r < 0) { 1097 pr_err("pci__init() failed with error %d\n", r); 1098 goto fail; 1099 } 1100 1101 r = ioport__init(kvm); 1102 if (r < 0) { 1103 pr_err("ioport__init() failed with error %d\n", r); 1104 goto fail; 1105 } 1106 1107 /* 1108 * vidmode should be either specified 1109 * either set by default 1110 */ 1111 if (vnc || sdl) { 1112 if (vidmode == -1) 1113 vidmode = 0x312; 1114 } else { 1115 vidmode = 0; 1116 } 1117 1118 memset(real_cmdline, 0, sizeof(real_cmdline)); 1119 kvm__arch_set_cmdline(real_cmdline, vnc || sdl); 1120 1121 if (strlen(real_cmdline) > 0) 1122 strcat(real_cmdline, " "); 1123 1124 if (kernel_cmdline) 1125 strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline)); 1126 1127 if (!using_rootfs && !disk_image[0].filename && !initrd_filename) { 1128 char tmp[PATH_MAX]; 1129 1130 kvm_setup_create_new(custom_rootfs_name); 1131 kvm_setup_resolv(custom_rootfs_name); 1132 1133 snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); 1134 if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) 1135 die("Unable to initialize virtio 9p"); 1136 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 1137 die("Unable to initialize virtio 9p"); 1138 using_rootfs = custom_rootfs = 1; 1139 } 1140 1141 if (using_rootfs) { 1142 strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); 1143 if (custom_rootfs) { 1144 kvm_run_set_sandbox(); 1145 1146 strcat(real_cmdline, " init=/virt/init"); 1147 1148 if (!no_dhcp) 1149 strcat(real_cmdline, " ip=dhcp"); 1150 if (kvm_custom_stage2()) 1151 die("Failed linking stage 2 of init."); 1152 } 1153 } else if (!strstr(real_cmdline, "root=")) { 1154 strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); 1155 } 1156 1157 if (image_count) { 1158 kvm->nr_disks = image_count; 1159 kvm->disks = disk_image__open_all((struct disk_image_params *)&disk_image, image_count); 1160 if (IS_ERR(kvm->disks)) { 1161 r = PTR_ERR(kvm->disks); 1162 pr_err("disk_image__open_all() failed with error %ld\n", 1163 PTR_ERR(kvm->disks)); 1164 goto fail; 1165 } 1166 } 1167 1168 printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, 1169 kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name); 1170 1171 if (!firmware_filename) { 1172 if (!kvm__load_kernel(kvm, kernel_filename, 1173 initrd_filename, real_cmdline, vidmode)) 1174 die("unable to load kernel %s", kernel_filename); 1175 1176 kvm->vmlinux = vmlinux_filename; 1177 r = symbol_init(kvm); 1178 if (r < 0) 1179 pr_debug("symbol_init() failed with error %d\n", r); 1180 } 1181 1182 ioport__setup_arch(); 1183 1184 r = rtc__init(kvm); 1185 if (r < 0) { 1186 pr_err("rtc__init() failed with error %d\n", r); 1187 goto fail; 1188 } 1189 1190 r = serial8250__init(kvm); 1191 if (r < 0) { 1192 pr_err("serial__init() failed with error %d\n", r); 1193 goto fail; 1194 } 1195 1196 r = virtio_blk__init(kvm); 1197 if (r < 0) { 1198 pr_err("virtio_blk__init() failed with error %d\n", r); 1199 goto fail; 1200 } 1201 1202 r = virtio_scsi_init(kvm); 1203 if (r < 0) { 1204 pr_err("virtio_scsi_init() failed with error %d\n", r); 1205 goto fail; 1206 } 1207 1208 1209 if (active_console == CONSOLE_VIRTIO) 1210 virtio_console__init(kvm); 1211 1212 if (virtio_rng) 1213 virtio_rng__init(kvm); 1214 1215 if (balloon) 1216 virtio_bln__init(kvm); 1217 1218 if (!network) 1219 network = DEFAULT_NETWORK; 1220 1221 virtio_9p__init(kvm); 1222 1223 for (i = 0; i < num_net_devices; i++) { 1224 net_params[i].kvm = kvm; 1225 virtio_net__init(&net_params[i]); 1226 } 1227 1228 if (num_net_devices == 0 && no_net == 0) { 1229 struct virtio_net_params net_params; 1230 1231 net_params = (struct virtio_net_params) { 1232 .guest_ip = guest_ip, 1233 .host_ip = host_ip, 1234 .kvm = kvm, 1235 .script = script, 1236 .mode = NET_MODE_USER, 1237 }; 1238 str_to_mac(guest_mac, net_params.guest_mac); 1239 str_to_mac(host_mac, net_params.host_mac); 1240 1241 virtio_net__init(&net_params); 1242 } 1243 1244 kvm__init_ram(kvm); 1245 1246 #ifdef CONFIG_X86 1247 kbd__init(kvm); 1248 #endif 1249 1250 pci_shmem__init(kvm); 1251 1252 if (vnc || sdl) { 1253 fb = vesa__init(kvm); 1254 if (IS_ERR(fb)) { 1255 pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb)); 1256 goto fail; 1257 } 1258 } 1259 1260 if (vnc && fb) { 1261 r = vnc__init(fb); 1262 if (r < 0) { 1263 pr_err("vnc__init() failed with error %d\n", r); 1264 goto fail; 1265 } 1266 } 1267 1268 if (sdl && fb) { 1269 sdl__init(fb); 1270 if (r < 0) { 1271 pr_err("sdl__init() failed with error %d\n", r); 1272 goto fail; 1273 } 1274 } 1275 1276 r = fb__start(); 1277 if (r < 0) { 1278 pr_err("fb__init() failed with error %d\n", r); 1279 goto fail; 1280 } 1281 1282 /* Device init all done; firmware init must 1283 * come after this (it may set up device trees etc.) 1284 */ 1285 1286 kvm__start_timer(kvm); 1287 1288 if (firmware_filename) { 1289 if (!kvm__load_firmware(kvm, firmware_filename)) 1290 die("unable to load firmware image %s: %s", firmware_filename, strerror(errno)); 1291 } else { 1292 kvm__arch_setup_firmware(kvm); 1293 if (r < 0) { 1294 pr_err("kvm__arch_setup_firmware() failed with error %d\n", r); 1295 goto fail; 1296 } 1297 } 1298 1299 for (i = 0; i < nrcpus; i++) { 1300 kvm_cpus[i] = kvm_cpu__init(kvm, i); 1301 if (!kvm_cpus[i]) 1302 die("unable to initialize KVM VCPU"); 1303 } 1304 1305 thread_pool__init(nr_online_cpus); 1306 fail: 1307 return r; 1308 } 1309 1310 static int kvm_cmd_run_work(void) 1311 { 1312 int i, r = -1; 1313 void *ret = NULL; 1314 1315 for (i = 0; i < nrcpus; i++) { 1316 if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0) 1317 die("unable to create KVM VCPU thread"); 1318 } 1319 1320 /* Only VCPU #0 is going to exit by itself when shutting down */ 1321 if (pthread_join(kvm_cpus[0]->thread, &ret) != 0) 1322 r = 0; 1323 1324 kvm_cpu__delete(kvm_cpus[0]); 1325 kvm_cpus[0] = NULL; 1326 1327 for (i = 1; i < nrcpus; i++) { 1328 if (kvm_cpus[i]->is_running) { 1329 pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); 1330 if (pthread_join(kvm_cpus[i]->thread, &ret) != 0) 1331 die("pthread_join"); 1332 kvm_cpu__delete(kvm_cpus[i]); 1333 } 1334 if (ret == NULL) 1335 r = 0; 1336 } 1337 1338 return r; 1339 } 1340 1341 static void kvm_cmd_run_exit(int guest_ret) 1342 { 1343 int r = 0; 1344 1345 compat__print_all_messages(); 1346 1347 r = symbol_exit(kvm); 1348 if (r < 0) 1349 pr_warning("symbol_exit() failed with error %d\n", r); 1350 1351 r = irq__exit(kvm); 1352 if (r < 0) 1353 pr_warning("irq__exit() failed with error %d\n", r); 1354 1355 fb__stop(); 1356 1357 r = virtio_scsi_exit(kvm); 1358 if (r < 0) 1359 pr_warning("virtio_scsi_exit() failed with error %d\n", r); 1360 1361 r = virtio_blk__exit(kvm); 1362 if (r < 0) 1363 pr_warning("virtio_blk__exit() failed with error %d\n", r); 1364 1365 r = virtio_rng__exit(kvm); 1366 if (r < 0) 1367 pr_warning("virtio_rng__exit() failed with error %d\n", r); 1368 1369 r = disk_image__close_all(kvm->disks, image_count); 1370 if (r < 0) 1371 pr_warning("disk_image__close_all() failed with error %d\n", r); 1372 1373 r = serial8250__exit(kvm); 1374 if (r < 0) 1375 pr_warning("serial8250__exit() failed with error %d\n", r); 1376 1377 r = rtc__exit(kvm); 1378 if (r < 0) 1379 pr_warning("rtc__exit() failed with error %d\n", r); 1380 1381 r = kvm__arch_free_firmware(kvm); 1382 if (r < 0) 1383 pr_warning("kvm__arch_free_firmware() failed with error %d\n", r); 1384 1385 r = ioport__exit(kvm); 1386 if (r < 0) 1387 pr_warning("ioport__exit() failed with error %d\n", r); 1388 1389 r = ioeventfd__exit(kvm); 1390 if (r < 0) 1391 pr_warning("ioeventfd__exit() failed with error %d\n", r); 1392 1393 r = pci__exit(kvm); 1394 if (r < 0) 1395 pr_warning("pci__exit() failed with error %d\n", r); 1396 1397 r = kvm__exit(kvm); 1398 if (r < 0) 1399 pr_warning("pci__exit() failed with error %d\n", r); 1400 1401 free(kvm_cpus); 1402 1403 if (guest_ret == 0) 1404 printf("\n # KVM session ended normally.\n"); 1405 } 1406 1407 int kvm_cmd_run(int argc, const char **argv, const char *prefix) 1408 { 1409 int r, ret = -EFAULT; 1410 1411 r = kvm_cmd_run_init(argc, argv); 1412 if (r < 0) 1413 return r; 1414 1415 ret = kvm_cmd_run_work(); 1416 kvm_cmd_run_exit(ret); 1417 1418 return ret; 1419 } 1420