1 #include "kvm/builtin-run.h" 2 3 #include "kvm/builtin-setup.h" 4 #include "kvm/virtio-balloon.h" 5 #include "kvm/virtio-console.h" 6 #include "kvm/parse-options.h" 7 #include "kvm/8250-serial.h" 8 #include "kvm/framebuffer.h" 9 #include "kvm/disk-image.h" 10 #include "kvm/threadpool.h" 11 #include "kvm/virtio-blk.h" 12 #include "kvm/virtio-net.h" 13 #include "kvm/virtio-rng.h" 14 #include "kvm/ioeventfd.h" 15 #include "kvm/virtio-9p.h" 16 #include "kvm/barrier.h" 17 #include "kvm/kvm-cpu.h" 18 #include "kvm/ioport.h" 19 #include "kvm/symbol.h" 20 #include "kvm/i8042.h" 21 #include "kvm/mutex.h" 22 #include "kvm/term.h" 23 #include "kvm/util.h" 24 #include "kvm/strbuf.h" 25 #include "kvm/vesa.h" 26 #include "kvm/irq.h" 27 #include "kvm/kvm.h" 28 #include "kvm/pci.h" 29 #include "kvm/rtc.h" 30 #include "kvm/sdl.h" 31 #include "kvm/vnc.h" 32 #include "kvm/guest_compat.h" 33 #include "kvm/pci-shmem.h" 34 #include "kvm/kvm-ipc.h" 35 #include "kvm/builtin-debug.h" 36 37 #include <linux/types.h> 38 #include <linux/err.h> 39 40 #include <sys/utsname.h> 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <termios.h> 44 #include <signal.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <ctype.h> 49 #include <stdio.h> 50 51 #define DEFAULT_KVM_DEV "/dev/kvm" 52 #define DEFAULT_CONSOLE "serial" 53 #define DEFAULT_NETWORK "user" 54 #define DEFAULT_HOST_ADDR "192.168.33.1" 55 #define DEFAULT_GUEST_ADDR "192.168.33.15" 56 #define DEFAULT_GUEST_MAC "02:15:15:15:15:15" 57 #define DEFAULT_HOST_MAC "02:01:01:01:01:01" 58 #define DEFAULT_SCRIPT "none" 59 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh"; 60 61 #define MB_SHIFT (20) 62 #define KB_SHIFT (10) 63 #define GB_SHIFT (30) 64 #define MIN_RAM_SIZE_MB (64ULL) 65 #define MIN_RAM_SIZE_BYTE (MIN_RAM_SIZE_MB << MB_SHIFT) 66 67 struct kvm *kvm; 68 struct kvm_cpu **kvm_cpus; 69 __thread struct kvm_cpu *current_kvm_cpu; 70 71 static struct disk_image_params disk_image[MAX_DISK_IMAGES]; 72 static u64 ram_size; 73 static u8 image_count; 74 static u8 num_net_devices; 75 static bool virtio_rng; 76 static const char *kernel_cmdline; 77 static const char *kernel_filename; 78 static const char *vmlinux_filename; 79 static const char *initrd_filename; 80 static const char *firmware_filename; 81 static const char *console; 82 static const char *dev; 83 static const char *network; 84 static const char *host_ip; 85 static const char *guest_ip; 86 static const char *guest_mac; 87 static const char *host_mac; 88 static const char *script; 89 static const char *guest_name; 90 static const char *sandbox; 91 static const char *hugetlbfs_path; 92 static const char *custom_rootfs_name = "default"; 93 static struct virtio_net_params *net_params; 94 static bool single_step; 95 static bool vnc; 96 static bool sdl; 97 static bool balloon; 98 static bool using_rootfs; 99 static bool custom_rootfs; 100 static bool no_net; 101 static bool no_dhcp; 102 extern bool ioport_debug; 103 extern bool mmio_debug; 104 static int kvm_run_wrapper; 105 extern int active_console; 106 extern int debug_iodelay; 107 108 bool do_debug_print = false; 109 110 static int nrcpus; 111 static int vidmode = -1; 112 113 static const char * const run_usage[] = { 114 "lkvm run [<options>] [<kernel image>]", 115 NULL 116 }; 117 118 enum { 119 KVM_RUN_DEFAULT, 120 KVM_RUN_SANDBOX, 121 }; 122 123 void kvm_run_set_wrapper_sandbox(void) 124 { 125 kvm_run_wrapper = KVM_RUN_SANDBOX; 126 } 127 128 static int img_name_parser(const struct option *opt, const char *arg, int unset) 129 { 130 char path[PATH_MAX]; 131 const char *cur; 132 struct stat st; 133 char *sep; 134 135 if (stat(arg, &st) == 0 && 136 S_ISDIR(st.st_mode)) { 137 char tmp[PATH_MAX]; 138 139 if (using_rootfs) 140 die("Please use only one rootfs directory atmost"); 141 142 if (realpath(arg, tmp) == 0 || 143 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 144 die("Unable to initialize virtio 9p"); 145 using_rootfs = 1; 146 return 0; 147 } 148 149 snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); 150 151 if (stat(path, &st) == 0 && 152 S_ISDIR(st.st_mode)) { 153 char tmp[PATH_MAX]; 154 155 if (using_rootfs) 156 die("Please use only one rootfs directory atmost"); 157 158 if (realpath(path, tmp) == 0 || 159 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 160 die("Unable to initialize virtio 9p"); 161 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 162 die("Unable to initialize virtio 9p"); 163 kvm_setup_resolv(arg); 164 using_rootfs = custom_rootfs = 1; 165 custom_rootfs_name = arg; 166 return 0; 167 } 168 169 if (image_count >= MAX_DISK_IMAGES) 170 die("Currently only 4 images are supported"); 171 172 disk_image[image_count].filename = arg; 173 cur = arg; 174 do { 175 sep = strstr(cur, ","); 176 if (sep) { 177 if (strncmp(sep + 1, "ro", 2) == 0) 178 disk_image[image_count].readonly = true; 179 else if (strncmp(sep + 1, "direct", 6) == 0) 180 disk_image[image_count].direct = true; 181 *sep = 0; 182 cur = sep + 1; 183 } 184 } while (sep); 185 186 image_count++; 187 188 return 0; 189 } 190 191 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) 192 { 193 char *tag_name; 194 char tmp[PATH_MAX]; 195 196 /* 197 * 9p dir can be of the form dirname,tag_name or 198 * just dirname. In the later case we use the 199 * default tag name 200 */ 201 tag_name = strstr(arg, ","); 202 if (tag_name) { 203 *tag_name = '\0'; 204 tag_name++; 205 } 206 if (realpath(arg, tmp)) { 207 if (virtio_9p__register(kvm, tmp, tag_name) < 0) 208 die("Unable to initialize virtio 9p"); 209 } else 210 die("Failed resolving 9p path"); 211 return 0; 212 } 213 214 static int tty_parser(const struct option *opt, const char *arg, int unset) 215 { 216 int tty = atoi(arg); 217 218 term_set_tty(tty); 219 220 return 0; 221 } 222 223 static inline void str_to_mac(const char *str, char *mac) 224 { 225 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 226 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 227 } 228 static int set_net_param(struct virtio_net_params *p, const char *param, 229 const char *val) 230 { 231 if (strcmp(param, "guest_mac") == 0) { 232 str_to_mac(val, p->guest_mac); 233 } else if (strcmp(param, "mode") == 0) { 234 if (!strncmp(val, "user", 4)) { 235 int i; 236 237 for (i = 0; i < num_net_devices; i++) 238 if (net_params[i].mode == NET_MODE_USER) 239 die("Only one usermode network device allowed at a time"); 240 p->mode = NET_MODE_USER; 241 } else if (!strncmp(val, "tap", 3)) { 242 p->mode = NET_MODE_TAP; 243 } else if (!strncmp(val, "none", 4)) { 244 no_net = 1; 245 return -1; 246 } else 247 die("Unkown network mode %s, please use user, tap or none", network); 248 } else if (strcmp(param, "script") == 0) { 249 p->script = strdup(val); 250 } else if (strcmp(param, "guest_ip") == 0) { 251 p->guest_ip = strdup(val); 252 } else if (strcmp(param, "host_ip") == 0) { 253 p->host_ip = strdup(val); 254 } else if (strcmp(param, "trans") == 0) { 255 p->trans = strdup(val); 256 } else if (strcmp(param, "vhost") == 0) { 257 p->vhost = atoi(val); 258 } else if (strcmp(param, "fd") == 0) { 259 p->fd = atoi(val); 260 } else 261 die("Unknown network parameter %s", param); 262 263 return 0; 264 } 265 266 static int netdev_parser(const struct option *opt, const char *arg, int unset) 267 { 268 struct virtio_net_params p; 269 char *buf = NULL, *cmd = NULL, *cur = NULL; 270 bool on_cmd = true; 271 272 if (arg) { 273 buf = strdup(arg); 274 if (buf == NULL) 275 die("Failed allocating new net buffer"); 276 cur = strtok(buf, ",="); 277 } 278 279 p = (struct virtio_net_params) { 280 .guest_ip = DEFAULT_GUEST_ADDR, 281 .host_ip = DEFAULT_HOST_ADDR, 282 .script = DEFAULT_SCRIPT, 283 .mode = NET_MODE_TAP, 284 }; 285 286 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 287 p.guest_mac[5] += num_net_devices; 288 289 while (cur) { 290 if (on_cmd) { 291 cmd = cur; 292 } else { 293 if (set_net_param(&p, cmd, cur) < 0) 294 goto done; 295 } 296 on_cmd = !on_cmd; 297 298 cur = strtok(NULL, ",="); 299 }; 300 301 num_net_devices++; 302 303 net_params = realloc(net_params, num_net_devices * sizeof(*net_params)); 304 if (net_params == NULL) 305 die("Failed adding new network device"); 306 307 net_params[num_net_devices - 1] = p; 308 309 done: 310 free(buf); 311 return 0; 312 } 313 314 static int shmem_parser(const struct option *opt, const char *arg, int unset) 315 { 316 const u64 default_size = SHMEM_DEFAULT_SIZE; 317 const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; 318 const char *default_handle = SHMEM_DEFAULT_HANDLE; 319 struct shmem_info *si = malloc(sizeof(struct shmem_info)); 320 u64 phys_addr; 321 u64 size; 322 char *handle = NULL; 323 int create = 0; 324 const char *p = arg; 325 char *next; 326 int base = 10; 327 int verbose = 0; 328 329 const int skip_pci = strlen("pci:"); 330 if (verbose) 331 pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); 332 /* parse out optional addr family */ 333 if (strcasestr(p, "pci:")) { 334 p += skip_pci; 335 } else if (strcasestr(p, "mem:")) { 336 die("I can't add to E820 map yet.\n"); 337 } 338 /* parse out physical addr */ 339 base = 10; 340 if (strcasestr(p, "0x")) 341 base = 16; 342 phys_addr = strtoll(p, &next, base); 343 if (next == p && phys_addr == 0) { 344 pr_info("shmem: no physical addr specified, using default."); 345 phys_addr = default_phys_addr; 346 } 347 if (*next != ':' && *next != '\0') 348 die("shmem: unexpected chars after phys addr.\n"); 349 if (*next == '\0') 350 p = next; 351 else 352 p = next + 1; 353 /* parse out size */ 354 base = 10; 355 if (strcasestr(p, "0x")) 356 base = 16; 357 size = strtoll(p, &next, base); 358 if (next == p && size == 0) { 359 pr_info("shmem: no size specified, using default."); 360 size = default_size; 361 } 362 /* look for [KMGkmg][Bb]* uses base 2. */ 363 int skip_B = 0; 364 if (strspn(next, "KMGkmg")) { /* might have a prefix */ 365 if (*(next + 1) == 'B' || *(next + 1) == 'b') 366 skip_B = 1; 367 switch (*next) { 368 case 'K': 369 case 'k': 370 size = size << KB_SHIFT; 371 break; 372 case 'M': 373 case 'm': 374 size = size << MB_SHIFT; 375 break; 376 case 'G': 377 case 'g': 378 size = size << GB_SHIFT; 379 break; 380 default: 381 die("shmem: bug in detecting size prefix."); 382 break; 383 } 384 next += 1 + skip_B; 385 } 386 if (*next != ':' && *next != '\0') { 387 die("shmem: unexpected chars after phys size. <%c><%c>\n", 388 *next, *p); 389 } 390 if (*next == '\0') 391 p = next; 392 else 393 p = next + 1; 394 /* parse out optional shmem handle */ 395 const int skip_handle = strlen("handle="); 396 next = strcasestr(p, "handle="); 397 if (*p && next) { 398 if (p != next) 399 die("unexpected chars before handle\n"); 400 p += skip_handle; 401 next = strchrnul(p, ':'); 402 if (next - p) { 403 handle = malloc(next - p + 1); 404 strncpy(handle, p, next - p); 405 handle[next - p] = '\0'; /* just in case. */ 406 } 407 if (*next == '\0') 408 p = next; 409 else 410 p = next + 1; 411 } 412 /* parse optional create flag to see if we should create shm seg. */ 413 if (*p && strcasestr(p, "create")) { 414 create = 1; 415 p += strlen("create"); 416 } 417 if (*p != '\0') 418 die("shmem: unexpected trailing chars\n"); 419 if (handle == NULL) { 420 handle = malloc(strlen(default_handle) + 1); 421 strcpy(handle, default_handle); 422 } 423 if (verbose) { 424 pr_info("shmem: phys_addr = %llx", phys_addr); 425 pr_info("shmem: size = %llx", size); 426 pr_info("shmem: handle = %s", handle); 427 pr_info("shmem: create = %d", create); 428 } 429 430 si->phys_addr = phys_addr; 431 si->size = size; 432 si->handle = handle; 433 si->create = create; 434 pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ 435 return 0; 436 } 437 438 static const struct option options[] = { 439 OPT_GROUP("Basic options:"), 440 OPT_STRING('\0', "name", &guest_name, "guest name", 441 "A name for the guest"), 442 OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"), 443 OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."), 444 OPT_CALLBACK('\0', "shmem", NULL, 445 "[pci:]<addr>:<size>[:handle=<handle>][:create]", 446 "Share host shmem with guest via pci device", 447 shmem_parser), 448 OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser), 449 OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"), 450 OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"), 451 OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"), 452 OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"), 453 OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", 454 "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser), 455 OPT_STRING('\0', "console", &console, "serial, virtio or hv", 456 "Console to use"), 457 OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"), 458 OPT_CALLBACK('\0', "tty", NULL, "tty id", 459 "Remap guest TTY into a pty on the host", 460 tty_parser), 461 OPT_STRING('\0', "sandbox", &sandbox, "script", 462 "Run this script when booting into custom rootfs"), 463 OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"), 464 465 OPT_GROUP("Kernel options:"), 466 OPT_STRING('k', "kernel", &kernel_filename, "kernel", 467 "Kernel to boot in virtual machine"), 468 OPT_STRING('i', "initrd", &initrd_filename, "initrd", 469 "Initial RAM disk image"), 470 OPT_STRING('p', "params", &kernel_cmdline, "params", 471 "Kernel command line arguments"), 472 OPT_STRING('f', "firmware", &firmware_filename, "firmware", 473 "Firmware image to boot in virtual machine"), 474 475 OPT_GROUP("Networking options:"), 476 OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", 477 "Create a new guest NIC", 478 netdev_parser, NULL), 479 OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"), 480 481 OPT_GROUP("BIOS options:"), 482 OPT_INTEGER('\0', "vidmode", &vidmode, 483 "Video mode"), 484 485 OPT_GROUP("Debug options:"), 486 OPT_BOOLEAN('\0', "debug", &do_debug_print, 487 "Enable debug messages"), 488 OPT_BOOLEAN('\0', "debug-single-step", &single_step, 489 "Enable single stepping"), 490 OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug, 491 "Enable ioport debugging"), 492 OPT_BOOLEAN('\0', "debug-mmio", &mmio_debug, 493 "Enable MMIO debugging"), 494 OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay, 495 "Delay IO by millisecond"), 496 OPT_END() 497 }; 498 499 /* 500 * Serialize debug printout so that the output of multiple vcpus does not 501 * get mixed up: 502 */ 503 static int printout_done; 504 505 static void handle_sigusr1(int sig) 506 { 507 struct kvm_cpu *cpu = current_kvm_cpu; 508 int fd = kvm_cpu__get_debug_fd(); 509 510 if (!cpu || cpu->needs_nmi) 511 return; 512 513 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 514 kvm_cpu__show_registers(cpu); 515 kvm_cpu__show_code(cpu); 516 kvm_cpu__show_page_tables(cpu); 517 fflush(stdout); 518 printout_done = 1; 519 mb(); 520 } 521 522 /* Pause/resume the guest using SIGUSR2 */ 523 static int is_paused; 524 525 static void handle_pause(int fd, u32 type, u32 len, u8 *msg) 526 { 527 if (WARN_ON(len)) 528 return; 529 530 if (type == KVM_IPC_RESUME && is_paused) { 531 kvm->vm_state = KVM_VMSTATE_RUNNING; 532 kvm__continue(); 533 } else if (type == KVM_IPC_PAUSE && !is_paused) { 534 kvm->vm_state = KVM_VMSTATE_PAUSED; 535 ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL); 536 kvm__pause(); 537 } else { 538 return; 539 } 540 541 is_paused = !is_paused; 542 } 543 544 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) 545 { 546 int r = 0; 547 548 if (type == KVM_IPC_VMSTATE) 549 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 550 551 if (r < 0) 552 pr_warning("Failed sending VMSTATE"); 553 } 554 555 static void handle_debug(int fd, u32 type, u32 len, u8 *msg) 556 { 557 int i; 558 struct debug_cmd_params *params; 559 u32 dbg_type; 560 u32 vcpu; 561 562 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 563 return; 564 565 params = (void *)msg; 566 dbg_type = params->dbg_type; 567 vcpu = params->cpu; 568 569 if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ) 570 serial8250__inject_sysrq(kvm, params->sysrq); 571 572 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 573 if ((int)vcpu >= kvm->nrcpus) 574 return; 575 576 kvm_cpus[vcpu]->needs_nmi = 1; 577 pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1); 578 } 579 580 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 581 return; 582 583 for (i = 0; i < nrcpus; i++) { 584 struct kvm_cpu *cpu = kvm_cpus[i]; 585 586 if (!cpu) 587 continue; 588 589 printout_done = 0; 590 591 kvm_cpu__set_debug_fd(fd); 592 pthread_kill(cpu->thread, SIGUSR1); 593 /* 594 * Wait for the vCPU to dump state before signalling 595 * the next thread. Since this is debug code it does 596 * not matter that we are burning CPU time a bit: 597 */ 598 while (!printout_done) 599 mb(); 600 } 601 602 close(fd); 603 604 serial8250__inject_sysrq(kvm, 'p'); 605 } 606 607 static void handle_sigalrm(int sig) 608 { 609 kvm__arch_periodic_poll(kvm); 610 } 611 612 static void handle_stop(int fd, u32 type, u32 len, u8 *msg) 613 { 614 if (WARN_ON(type != KVM_IPC_STOP || len)) 615 return; 616 617 kvm_cpu__reboot(); 618 } 619 620 static void *kvm_cpu_thread(void *arg) 621 { 622 current_kvm_cpu = arg; 623 624 if (kvm_cpu__start(current_kvm_cpu)) 625 goto panic_kvm; 626 627 return (void *) (intptr_t) 0; 628 629 panic_kvm: 630 fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", 631 current_kvm_cpu->kvm_run->exit_reason, 632 kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); 633 if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) 634 fprintf(stderr, "KVM exit code: 0x%Lu\n", 635 current_kvm_cpu->kvm_run->hw.hardware_exit_reason); 636 637 kvm_cpu__set_debug_fd(STDOUT_FILENO); 638 kvm_cpu__show_registers(current_kvm_cpu); 639 kvm_cpu__show_code(current_kvm_cpu); 640 kvm_cpu__show_page_tables(current_kvm_cpu); 641 642 return (void *) (intptr_t) 1; 643 } 644 645 static char kernel[PATH_MAX]; 646 647 static const char *host_kernels[] = { 648 "/boot/vmlinuz", 649 "/boot/bzImage", 650 NULL 651 }; 652 653 static const char *default_kernels[] = { 654 "./bzImage", 655 "arch/" BUILD_ARCH "/boot/bzImage", 656 "../../arch/" BUILD_ARCH "/boot/bzImage", 657 NULL 658 }; 659 660 static const char *default_vmlinux[] = { 661 "vmlinux", 662 "../../../vmlinux", 663 "../../vmlinux", 664 NULL 665 }; 666 667 static void kernel_usage_with_options(void) 668 { 669 const char **k; 670 struct utsname uts; 671 672 fprintf(stderr, "Fatal: could not find default kernel image in:\n"); 673 k = &default_kernels[0]; 674 while (*k) { 675 fprintf(stderr, "\t%s\n", *k); 676 k++; 677 } 678 679 if (uname(&uts) < 0) 680 return; 681 682 k = &host_kernels[0]; 683 while (*k) { 684 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 685 return; 686 fprintf(stderr, "\t%s\n", kernel); 687 k++; 688 } 689 fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", 690 KVM_BINARY_NAME); 691 } 692 693 static u64 host_ram_size(void) 694 { 695 long page_size; 696 long nr_pages; 697 698 nr_pages = sysconf(_SC_PHYS_PAGES); 699 if (nr_pages < 0) { 700 pr_warning("sysconf(_SC_PHYS_PAGES) failed"); 701 return 0; 702 } 703 704 page_size = sysconf(_SC_PAGE_SIZE); 705 if (page_size < 0) { 706 pr_warning("sysconf(_SC_PAGE_SIZE) failed"); 707 return 0; 708 } 709 710 return (nr_pages * page_size) >> MB_SHIFT; 711 } 712 713 /* 714 * If user didn't specify how much memory it wants to allocate for the guest, 715 * avoid filling the whole host RAM. 716 */ 717 #define RAM_SIZE_RATIO 0.8 718 719 static u64 get_ram_size(int nr_cpus) 720 { 721 u64 available; 722 u64 ram_size; 723 724 ram_size = 64 * (nr_cpus + 3); 725 726 available = host_ram_size() * RAM_SIZE_RATIO; 727 if (!available) 728 available = MIN_RAM_SIZE_MB; 729 730 if (ram_size > available) 731 ram_size = available; 732 733 return ram_size; 734 } 735 736 static const char *find_kernel(void) 737 { 738 const char **k; 739 struct stat st; 740 struct utsname uts; 741 742 k = &default_kernels[0]; 743 while (*k) { 744 if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { 745 k++; 746 continue; 747 } 748 strncpy(kernel, *k, PATH_MAX); 749 return kernel; 750 } 751 752 if (uname(&uts) < 0) 753 return NULL; 754 755 k = &host_kernels[0]; 756 while (*k) { 757 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 758 return NULL; 759 760 if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { 761 k++; 762 continue; 763 } 764 return kernel; 765 766 } 767 return NULL; 768 } 769 770 static const char *find_vmlinux(void) 771 { 772 const char **vmlinux; 773 774 vmlinux = &default_vmlinux[0]; 775 while (*vmlinux) { 776 struct stat st; 777 778 if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { 779 vmlinux++; 780 continue; 781 } 782 return *vmlinux; 783 } 784 return NULL; 785 } 786 787 void kvm_run_help(void) 788 { 789 usage_with_options(run_usage, options); 790 } 791 792 static int kvm_custom_stage2(void) 793 { 794 char tmp[PATH_MAX], dst[PATH_MAX], *src; 795 const char *rootfs = custom_rootfs_name; 796 int r; 797 798 src = realpath("guest/init_stage2", NULL); 799 if (src == NULL) 800 return -ENOMEM; 801 802 snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs); 803 remove(tmp); 804 805 snprintf(dst, PATH_MAX, "/host/%s", src); 806 r = symlink(dst, tmp); 807 free(src); 808 809 return r; 810 } 811 812 static int kvm_run_set_sandbox(void) 813 { 814 const char *guestfs_name = custom_rootfs_name; 815 char path[PATH_MAX], script[PATH_MAX], *tmp; 816 817 snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); 818 819 remove(path); 820 821 if (sandbox == NULL) 822 return 0; 823 824 tmp = realpath(sandbox, NULL); 825 if (tmp == NULL) 826 return -ENOMEM; 827 828 snprintf(script, PATH_MAX, "/host/%s", tmp); 829 free(tmp); 830 831 return symlink(script, path); 832 } 833 834 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) 835 { 836 const char *single_quote; 837 838 if (!*arg) { /* zero length string */ 839 if (write(fd, "''", 2) <= 0) 840 die("Failed writing sandbox script"); 841 return; 842 } 843 844 while (*arg) { 845 single_quote = strchrnul(arg, '\''); 846 847 /* write non-single-quote string as #('string') */ 848 if (arg != single_quote) { 849 if (write(fd, "'", 1) <= 0 || 850 write(fd, arg, single_quote - arg) <= 0 || 851 write(fd, "'", 1) <= 0) 852 die("Failed writing sandbox script"); 853 } 854 855 /* write single quote as #("'") */ 856 if (*single_quote) { 857 if (write(fd, "\"'\"", 3) <= 0) 858 die("Failed writing sandbox script"); 859 } else 860 break; 861 862 arg = single_quote + 1; 863 } 864 } 865 866 static void resolve_program(const char *src, char *dst, size_t len) 867 { 868 struct stat st; 869 int err; 870 871 err = stat(src, &st); 872 873 if (!err && S_ISREG(st.st_mode)) { 874 char resolved_path[PATH_MAX]; 875 876 if (!realpath(src, resolved_path)) 877 die("Unable to resolve program %s: %s\n", src, strerror(errno)); 878 879 snprintf(dst, len, "/host%s", resolved_path); 880 } else 881 strncpy(dst, src, len); 882 } 883 884 static void kvm_run_write_sandbox_cmd(const char **argv, int argc) 885 { 886 const char script_hdr[] = "#! /bin/bash\n\n"; 887 char program[PATH_MAX]; 888 int fd; 889 890 remove(sandbox); 891 892 fd = open(sandbox, O_RDWR | O_CREAT, 0777); 893 if (fd < 0) 894 die("Failed creating sandbox script"); 895 896 if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) 897 die("Failed writing sandbox script"); 898 899 resolve_program(argv[0], program, PATH_MAX); 900 kvm_write_sandbox_cmd_exactly(fd, program); 901 902 argv++; 903 argc--; 904 905 while (argc) { 906 if (write(fd, " ", 1) <= 0) 907 die("Failed writing sandbox script"); 908 909 kvm_write_sandbox_cmd_exactly(fd, argv[0]); 910 argv++; 911 argc--; 912 } 913 if (write(fd, "\n", 1) <= 0) 914 die("Failed writing sandbox script"); 915 916 close(fd); 917 } 918 919 static int kvm_cmd_run_init(int argc, const char **argv) 920 { 921 static char real_cmdline[2048], default_name[20]; 922 struct framebuffer *fb = NULL; 923 unsigned int nr_online_cpus; 924 int max_cpus, recommended_cpus; 925 int i, r; 926 927 signal(SIGALRM, handle_sigalrm); 928 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 929 signal(SIGUSR1, handle_sigusr1); 930 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 931 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 932 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 933 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 934 935 nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); 936 937 while (argc != 0) { 938 argc = parse_options(argc, argv, options, run_usage, 939 PARSE_OPT_STOP_AT_NON_OPTION | 940 PARSE_OPT_KEEP_DASHDASH); 941 if (argc != 0) { 942 /* Cusrom options, should have been handled elsewhere */ 943 if (strcmp(argv[0], "--") == 0) { 944 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 945 sandbox = DEFAULT_SANDBOX_FILENAME; 946 kvm_run_write_sandbox_cmd(argv+1, argc-1); 947 break; 948 } 949 } 950 951 if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) || 952 (kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) { 953 fprintf(stderr, "Cannot handle parameter: " 954 "%s\n", argv[0]); 955 usage_with_options(run_usage, options); 956 return -EINVAL; 957 } 958 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 959 /* 960 * first unhandled parameter is treated as 961 * sandbox command 962 */ 963 sandbox = DEFAULT_SANDBOX_FILENAME; 964 kvm_run_write_sandbox_cmd(argv, argc); 965 } else { 966 /* 967 * first unhandled parameter is treated as a kernel 968 * image 969 */ 970 kernel_filename = argv[0]; 971 } 972 argv++; 973 argc--; 974 } 975 976 } 977 978 if (!kernel_filename) 979 kernel_filename = find_kernel(); 980 981 if (!kernel_filename) { 982 kernel_usage_with_options(); 983 return -EINVAL; 984 } 985 986 vmlinux_filename = find_vmlinux(); 987 988 if (nrcpus == 0) 989 nrcpus = nr_online_cpus; 990 991 if (!ram_size) 992 ram_size = get_ram_size(nrcpus); 993 994 if (ram_size < MIN_RAM_SIZE_MB) 995 die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB); 996 997 if (ram_size > host_ram_size()) 998 pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size()); 999 1000 ram_size <<= MB_SHIFT; 1001 1002 if (!dev) 1003 dev = DEFAULT_KVM_DEV; 1004 1005 if (!console) 1006 console = DEFAULT_CONSOLE; 1007 1008 if (!strncmp(console, "virtio", 6)) 1009 active_console = CONSOLE_VIRTIO; 1010 else if (!strncmp(console, "serial", 6)) 1011 active_console = CONSOLE_8250; 1012 else if (!strncmp(console, "hv", 2)) 1013 active_console = CONSOLE_HV; 1014 else 1015 pr_warning("No console!"); 1016 1017 if (!host_ip) 1018 host_ip = DEFAULT_HOST_ADDR; 1019 1020 if (!guest_ip) 1021 guest_ip = DEFAULT_GUEST_ADDR; 1022 1023 if (!guest_mac) 1024 guest_mac = DEFAULT_GUEST_MAC; 1025 1026 if (!host_mac) 1027 host_mac = DEFAULT_HOST_MAC; 1028 1029 if (!script) 1030 script = DEFAULT_SCRIPT; 1031 1032 term_init(); 1033 1034 if (!guest_name) { 1035 if (custom_rootfs) { 1036 guest_name = custom_rootfs_name; 1037 } else { 1038 sprintf(default_name, "guest-%u", getpid()); 1039 guest_name = default_name; 1040 } 1041 } 1042 1043 kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name); 1044 if (IS_ERR(kvm)) { 1045 r = PTR_ERR(kvm); 1046 goto fail; 1047 } 1048 1049 kvm->single_step = single_step; 1050 1051 r = ioeventfd__init(kvm); 1052 if (r < 0) { 1053 pr_err("ioeventfd__init() failed with error %d\n", r); 1054 goto fail; 1055 } 1056 1057 max_cpus = kvm__max_cpus(kvm); 1058 recommended_cpus = kvm__recommended_cpus(kvm); 1059 1060 if (nrcpus > max_cpus) { 1061 printf(" # Limit the number of CPUs to %d\n", max_cpus); 1062 nrcpus = max_cpus; 1063 } else if (nrcpus > recommended_cpus) { 1064 printf(" # Warning: The maximum recommended amount of VCPUs" 1065 " is %d\n", recommended_cpus); 1066 } 1067 1068 kvm->nrcpus = nrcpus; 1069 1070 /* Alloc one pointer too many, so array ends up 0-terminated */ 1071 kvm_cpus = calloc(nrcpus + 1, sizeof(void *)); 1072 if (!kvm_cpus) 1073 die("Couldn't allocate array for %d CPUs", nrcpus); 1074 1075 r = irq__init(kvm); 1076 if (r < 0) { 1077 pr_err("irq__init() failed with error %d\n", r); 1078 goto fail; 1079 } 1080 1081 r = pci__init(kvm); 1082 if (r < 0) { 1083 pr_err("pci__init() failed with error %d\n", r); 1084 goto fail; 1085 } 1086 1087 r = ioport__init(kvm); 1088 if (r < 0) { 1089 pr_err("ioport__init() failed with error %d\n", r); 1090 goto fail; 1091 } 1092 1093 /* 1094 * vidmode should be either specified 1095 * either set by default 1096 */ 1097 if (vnc || sdl) { 1098 if (vidmode == -1) 1099 vidmode = 0x312; 1100 } else { 1101 vidmode = 0; 1102 } 1103 1104 memset(real_cmdline, 0, sizeof(real_cmdline)); 1105 kvm__arch_set_cmdline(real_cmdline, vnc || sdl); 1106 1107 if (strlen(real_cmdline) > 0) 1108 strcat(real_cmdline, " "); 1109 1110 if (kernel_cmdline) 1111 strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline)); 1112 1113 if (!using_rootfs && !disk_image[0].filename && !initrd_filename) { 1114 char tmp[PATH_MAX]; 1115 1116 kvm_setup_create_new(custom_rootfs_name); 1117 kvm_setup_resolv(custom_rootfs_name); 1118 1119 snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); 1120 if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) 1121 die("Unable to initialize virtio 9p"); 1122 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 1123 die("Unable to initialize virtio 9p"); 1124 using_rootfs = custom_rootfs = 1; 1125 } 1126 1127 if (using_rootfs) { 1128 strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); 1129 if (custom_rootfs) { 1130 kvm_run_set_sandbox(); 1131 1132 strcat(real_cmdline, " init=/virt/init"); 1133 1134 if (!no_dhcp) 1135 strcat(real_cmdline, " ip=dhcp"); 1136 if (kvm_custom_stage2()) 1137 die("Failed linking stage 2 of init."); 1138 } 1139 } else if (!strstr(real_cmdline, "root=")) { 1140 strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); 1141 } 1142 1143 if (image_count) { 1144 kvm->nr_disks = image_count; 1145 kvm->disks = disk_image__open_all((struct disk_image_params *)&disk_image, image_count); 1146 if (IS_ERR(kvm->disks)) { 1147 r = PTR_ERR(kvm->disks); 1148 pr_err("disk_image__open_all() failed with error %ld\n", 1149 PTR_ERR(kvm->disks)); 1150 goto fail; 1151 } 1152 } 1153 1154 printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, 1155 kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name); 1156 1157 if (!firmware_filename) { 1158 if (!kvm__load_kernel(kvm, kernel_filename, 1159 initrd_filename, real_cmdline, vidmode)) 1160 die("unable to load kernel %s", kernel_filename); 1161 1162 kvm->vmlinux = vmlinux_filename; 1163 r = symbol_init(kvm); 1164 if (r < 0) 1165 pr_debug("symbol_init() failed with error %d\n", r); 1166 } 1167 1168 ioport__setup_arch(); 1169 1170 r = rtc__init(kvm); 1171 if (r < 0) { 1172 pr_err("rtc__init() failed with error %d\n", r); 1173 goto fail; 1174 } 1175 1176 r = serial8250__init(kvm); 1177 if (r < 0) { 1178 pr_err("serial__init() failed with error %d\n", r); 1179 goto fail; 1180 } 1181 1182 r = virtio_blk__init(kvm); 1183 if (r < 0) { 1184 pr_err("virtio_blk__init() failed with error %d\n", r); 1185 goto fail; 1186 } 1187 1188 if (active_console == CONSOLE_VIRTIO) 1189 virtio_console__init(kvm); 1190 1191 if (virtio_rng) 1192 virtio_rng__init(kvm); 1193 1194 if (balloon) 1195 virtio_bln__init(kvm); 1196 1197 if (!network) 1198 network = DEFAULT_NETWORK; 1199 1200 virtio_9p__init(kvm); 1201 1202 for (i = 0; i < num_net_devices; i++) { 1203 net_params[i].kvm = kvm; 1204 virtio_net__init(&net_params[i]); 1205 } 1206 1207 if (num_net_devices == 0 && no_net == 0) { 1208 struct virtio_net_params net_params; 1209 1210 net_params = (struct virtio_net_params) { 1211 .guest_ip = guest_ip, 1212 .host_ip = host_ip, 1213 .kvm = kvm, 1214 .script = script, 1215 .mode = NET_MODE_USER, 1216 }; 1217 str_to_mac(guest_mac, net_params.guest_mac); 1218 str_to_mac(host_mac, net_params.host_mac); 1219 1220 virtio_net__init(&net_params); 1221 } 1222 1223 kvm__init_ram(kvm); 1224 1225 #ifdef CONFIG_X86 1226 kbd__init(kvm); 1227 #endif 1228 1229 pci_shmem__init(kvm); 1230 1231 if (vnc || sdl) { 1232 fb = vesa__init(kvm); 1233 if (IS_ERR(fb)) { 1234 pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb)); 1235 goto fail; 1236 } 1237 } 1238 1239 if (vnc && fb) { 1240 r = vnc__init(fb); 1241 if (r < 0) { 1242 pr_err("vnc__init() failed with error %d\n", r); 1243 goto fail; 1244 } 1245 } 1246 1247 if (sdl && fb) { 1248 sdl__init(fb); 1249 if (r < 0) { 1250 pr_err("sdl__init() failed with error %d\n", r); 1251 goto fail; 1252 } 1253 } 1254 1255 r = fb__start(); 1256 if (r < 0) { 1257 pr_err("fb__init() failed with error %d\n", r); 1258 goto fail; 1259 } 1260 1261 /* Device init all done; firmware init must 1262 * come after this (it may set up device trees etc.) 1263 */ 1264 1265 kvm__start_timer(kvm); 1266 1267 if (firmware_filename) { 1268 if (!kvm__load_firmware(kvm, firmware_filename)) 1269 die("unable to load firmware image %s: %s", firmware_filename, strerror(errno)); 1270 } else { 1271 kvm__arch_setup_firmware(kvm); 1272 if (r < 0) { 1273 pr_err("kvm__arch_setup_firmware() failed with error %d\n", r); 1274 goto fail; 1275 } 1276 } 1277 1278 for (i = 0; i < nrcpus; i++) { 1279 kvm_cpus[i] = kvm_cpu__init(kvm, i); 1280 if (!kvm_cpus[i]) 1281 die("unable to initialize KVM VCPU"); 1282 } 1283 1284 thread_pool__init(nr_online_cpus); 1285 fail: 1286 return r; 1287 } 1288 1289 static int kvm_cmd_run_work(void) 1290 { 1291 int i, r = -1; 1292 void *ret = NULL; 1293 1294 for (i = 0; i < nrcpus; i++) { 1295 if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0) 1296 die("unable to create KVM VCPU thread"); 1297 } 1298 1299 /* Only VCPU #0 is going to exit by itself when shutting down */ 1300 if (pthread_join(kvm_cpus[0]->thread, &ret) != 0) 1301 r = 0; 1302 1303 kvm_cpu__delete(kvm_cpus[0]); 1304 kvm_cpus[0] = NULL; 1305 1306 for (i = 1; i < nrcpus; i++) { 1307 if (kvm_cpus[i]->is_running) { 1308 pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); 1309 if (pthread_join(kvm_cpus[i]->thread, &ret) != 0) 1310 die("pthread_join"); 1311 kvm_cpu__delete(kvm_cpus[i]); 1312 } 1313 if (ret == NULL) 1314 r = 0; 1315 } 1316 1317 return r; 1318 } 1319 1320 static void kvm_cmd_run_exit(int guest_ret) 1321 { 1322 int r = 0; 1323 1324 compat__print_all_messages(); 1325 1326 r = symbol_exit(kvm); 1327 if (r < 0) 1328 pr_warning("symbol_exit() failed with error %d\n", r); 1329 1330 r = irq__exit(kvm); 1331 if (r < 0) 1332 pr_warning("irq__exit() failed with error %d\n", r); 1333 1334 fb__stop(); 1335 1336 r = virtio_blk__exit(kvm); 1337 if (r < 0) 1338 pr_warning("virtio_blk__exit() failed with error %d\n", r); 1339 1340 r = virtio_rng__exit(kvm); 1341 if (r < 0) 1342 pr_warning("virtio_rng__exit() failed with error %d\n", r); 1343 1344 r = disk_image__close_all(kvm->disks, image_count); 1345 if (r < 0) 1346 pr_warning("disk_image__close_all() failed with error %d\n", r); 1347 1348 r = serial8250__exit(kvm); 1349 if (r < 0) 1350 pr_warning("serial8250__exit() failed with error %d\n", r); 1351 1352 r = rtc__exit(kvm); 1353 if (r < 0) 1354 pr_warning("rtc__exit() failed with error %d\n", r); 1355 1356 r = kvm__arch_free_firmware(kvm); 1357 if (r < 0) 1358 pr_warning("kvm__arch_free_firmware() failed with error %d\n", r); 1359 1360 r = ioport__exit(kvm); 1361 if (r < 0) 1362 pr_warning("ioport__exit() failed with error %d\n", r); 1363 1364 r = ioeventfd__exit(kvm); 1365 if (r < 0) 1366 pr_warning("ioeventfd__exit() failed with error %d\n", r); 1367 1368 r = pci__exit(kvm); 1369 if (r < 0) 1370 pr_warning("pci__exit() failed with error %d\n", r); 1371 1372 r = kvm__exit(kvm); 1373 if (r < 0) 1374 pr_warning("pci__exit() failed with error %d\n", r); 1375 1376 free(kvm_cpus); 1377 1378 if (guest_ret == 0) 1379 printf("\n # KVM session ended normally.\n"); 1380 } 1381 1382 int kvm_cmd_run(int argc, const char **argv, const char *prefix) 1383 { 1384 int r, ret = -EFAULT; 1385 1386 r = kvm_cmd_run_init(argc, argv); 1387 if (r < 0) 1388 return r; 1389 1390 ret = kvm_cmd_run_work(); 1391 kvm_cmd_run_exit(ret); 1392 1393 return ret; 1394 } 1395