1 #include "kvm/builtin-run.h" 2 3 #include "kvm/builtin-setup.h" 4 #include "kvm/virtio-balloon.h" 5 #include "kvm/virtio-console.h" 6 #include "kvm/parse-options.h" 7 #include "kvm/8250-serial.h" 8 #include "kvm/framebuffer.h" 9 #include "kvm/disk-image.h" 10 #include "kvm/threadpool.h" 11 #include "kvm/virtio-scsi.h" 12 #include "kvm/virtio-blk.h" 13 #include "kvm/virtio-net.h" 14 #include "kvm/virtio-rng.h" 15 #include "kvm/ioeventfd.h" 16 #include "kvm/virtio-9p.h" 17 #include "kvm/barrier.h" 18 #include "kvm/kvm-cpu.h" 19 #include "kvm/ioport.h" 20 #include "kvm/symbol.h" 21 #include "kvm/i8042.h" 22 #include "kvm/mutex.h" 23 #include "kvm/term.h" 24 #include "kvm/util.h" 25 #include "kvm/strbuf.h" 26 #include "kvm/vesa.h" 27 #include "kvm/irq.h" 28 #include "kvm/kvm.h" 29 #include "kvm/pci.h" 30 #include "kvm/rtc.h" 31 #include "kvm/sdl.h" 32 #include "kvm/vnc.h" 33 #include "kvm/guest_compat.h" 34 #include "kvm/pci-shmem.h" 35 #include "kvm/kvm-ipc.h" 36 #include "kvm/builtin-debug.h" 37 38 #include <linux/types.h> 39 #include <linux/err.h> 40 41 #include <sys/utsname.h> 42 #include <sys/types.h> 43 #include <sys/stat.h> 44 #include <termios.h> 45 #include <signal.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 #include <ctype.h> 50 #include <stdio.h> 51 52 #define MB_SHIFT (20) 53 #define KB_SHIFT (10) 54 #define GB_SHIFT (30) 55 56 struct kvm *kvm; 57 struct kvm_cpu **kvm_cpus; 58 __thread struct kvm_cpu *current_kvm_cpu; 59 60 static int kvm_run_wrapper; 61 62 bool do_debug_print = false; 63 64 static int vidmode = -1; 65 66 extern char _binary_guest_init_start; 67 extern char _binary_guest_init_size; 68 69 static const char * const run_usage[] = { 70 "lkvm run [<options>] [<kernel image>]", 71 NULL 72 }; 73 74 enum { 75 KVM_RUN_DEFAULT, 76 KVM_RUN_SANDBOX, 77 }; 78 79 static int img_name_parser(const struct option *opt, const char *arg, int unset) 80 { 81 char path[PATH_MAX]; 82 struct stat st; 83 struct kvm *kvm = opt->ptr; 84 85 if (stat(arg, &st) == 0 && 86 S_ISDIR(st.st_mode)) { 87 char tmp[PATH_MAX]; 88 89 if (kvm->cfg.using_rootfs) 90 die("Please use only one rootfs directory atmost"); 91 92 if (realpath(arg, tmp) == 0 || 93 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 94 die("Unable to initialize virtio 9p"); 95 kvm->cfg.using_rootfs = 1; 96 return 0; 97 } 98 99 snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); 100 101 if (stat(path, &st) == 0 && 102 S_ISDIR(st.st_mode)) { 103 char tmp[PATH_MAX]; 104 105 if (kvm->cfg.using_rootfs) 106 die("Please use only one rootfs directory atmost"); 107 108 if (realpath(path, tmp) == 0 || 109 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 110 die("Unable to initialize virtio 9p"); 111 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 112 die("Unable to initialize virtio 9p"); 113 kvm_setup_resolv(arg); 114 kvm->cfg.using_rootfs = kvm->cfg.custom_rootfs = 1; 115 kvm->cfg.custom_rootfs_name = arg; 116 return 0; 117 } 118 119 return disk_img_name_parser(opt, arg, unset); 120 } 121 122 void kvm_run_set_wrapper_sandbox(void) 123 { 124 kvm_run_wrapper = KVM_RUN_SANDBOX; 125 } 126 127 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) 128 { 129 char *tag_name; 130 char tmp[PATH_MAX]; 131 132 /* 133 * 9p dir can be of the form dirname,tag_name or 134 * just dirname. In the later case we use the 135 * default tag name 136 */ 137 tag_name = strstr(arg, ","); 138 if (tag_name) { 139 *tag_name = '\0'; 140 tag_name++; 141 } 142 if (realpath(arg, tmp)) { 143 if (virtio_9p__register(kvm, tmp, tag_name) < 0) 144 die("Unable to initialize virtio 9p"); 145 } else 146 die("Failed resolving 9p path"); 147 return 0; 148 } 149 150 static int tty_parser(const struct option *opt, const char *arg, int unset) 151 { 152 int tty = atoi(arg); 153 154 term_set_tty(tty); 155 156 return 0; 157 } 158 159 static inline void str_to_mac(const char *str, char *mac) 160 { 161 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 162 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 163 } 164 static int set_net_param(struct virtio_net_params *p, const char *param, 165 const char *val) 166 { 167 if (strcmp(param, "guest_mac") == 0) { 168 str_to_mac(val, p->guest_mac); 169 } else if (strcmp(param, "mode") == 0) { 170 if (!strncmp(val, "user", 4)) { 171 int i; 172 173 for (i = 0; i < kvm->cfg.num_net_devices; i++) 174 if (kvm->cfg.net_params[i].mode == NET_MODE_USER) 175 die("Only one usermode network device allowed at a time"); 176 p->mode = NET_MODE_USER; 177 } else if (!strncmp(val, "tap", 3)) { 178 p->mode = NET_MODE_TAP; 179 } else if (!strncmp(val, "none", 4)) { 180 kvm->cfg.no_net = 1; 181 return -1; 182 } else 183 die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network); 184 } else if (strcmp(param, "script") == 0) { 185 p->script = strdup(val); 186 } else if (strcmp(param, "guest_ip") == 0) { 187 p->guest_ip = strdup(val); 188 } else if (strcmp(param, "host_ip") == 0) { 189 p->host_ip = strdup(val); 190 } else if (strcmp(param, "trans") == 0) { 191 p->trans = strdup(val); 192 } else if (strcmp(param, "vhost") == 0) { 193 p->vhost = atoi(val); 194 } else if (strcmp(param, "fd") == 0) { 195 p->fd = atoi(val); 196 } else 197 die("Unknown network parameter %s", param); 198 199 return 0; 200 } 201 202 static int netdev_parser(const struct option *opt, const char *arg, int unset) 203 { 204 struct virtio_net_params p; 205 char *buf = NULL, *cmd = NULL, *cur = NULL; 206 bool on_cmd = true; 207 208 if (arg) { 209 buf = strdup(arg); 210 if (buf == NULL) 211 die("Failed allocating new net buffer"); 212 cur = strtok(buf, ",="); 213 } 214 215 p = (struct virtio_net_params) { 216 .guest_ip = DEFAULT_GUEST_ADDR, 217 .host_ip = DEFAULT_HOST_ADDR, 218 .script = DEFAULT_SCRIPT, 219 .mode = NET_MODE_TAP, 220 }; 221 222 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 223 p.guest_mac[5] += kvm->cfg.num_net_devices; 224 225 while (cur) { 226 if (on_cmd) { 227 cmd = cur; 228 } else { 229 if (set_net_param(&p, cmd, cur) < 0) 230 goto done; 231 } 232 on_cmd = !on_cmd; 233 234 cur = strtok(NULL, ",="); 235 }; 236 237 kvm->cfg.num_net_devices++; 238 239 kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params)); 240 if (kvm->cfg.net_params == NULL) 241 die("Failed adding new network device"); 242 243 kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p; 244 245 done: 246 free(buf); 247 return 0; 248 } 249 250 static int shmem_parser(const struct option *opt, const char *arg, int unset) 251 { 252 const u64 default_size = SHMEM_DEFAULT_SIZE; 253 const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; 254 const char *default_handle = SHMEM_DEFAULT_HANDLE; 255 struct shmem_info *si = malloc(sizeof(struct shmem_info)); 256 u64 phys_addr; 257 u64 size; 258 char *handle = NULL; 259 int create = 0; 260 const char *p = arg; 261 char *next; 262 int base = 10; 263 int verbose = 0; 264 265 const int skip_pci = strlen("pci:"); 266 if (verbose) 267 pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); 268 /* parse out optional addr family */ 269 if (strcasestr(p, "pci:")) { 270 p += skip_pci; 271 } else if (strcasestr(p, "mem:")) { 272 die("I can't add to E820 map yet.\n"); 273 } 274 /* parse out physical addr */ 275 base = 10; 276 if (strcasestr(p, "0x")) 277 base = 16; 278 phys_addr = strtoll(p, &next, base); 279 if (next == p && phys_addr == 0) { 280 pr_info("shmem: no physical addr specified, using default."); 281 phys_addr = default_phys_addr; 282 } 283 if (*next != ':' && *next != '\0') 284 die("shmem: unexpected chars after phys addr.\n"); 285 if (*next == '\0') 286 p = next; 287 else 288 p = next + 1; 289 /* parse out size */ 290 base = 10; 291 if (strcasestr(p, "0x")) 292 base = 16; 293 size = strtoll(p, &next, base); 294 if (next == p && size == 0) { 295 pr_info("shmem: no size specified, using default."); 296 size = default_size; 297 } 298 /* look for [KMGkmg][Bb]* uses base 2. */ 299 int skip_B = 0; 300 if (strspn(next, "KMGkmg")) { /* might have a prefix */ 301 if (*(next + 1) == 'B' || *(next + 1) == 'b') 302 skip_B = 1; 303 switch (*next) { 304 case 'K': 305 case 'k': 306 size = size << KB_SHIFT; 307 break; 308 case 'M': 309 case 'm': 310 size = size << MB_SHIFT; 311 break; 312 case 'G': 313 case 'g': 314 size = size << GB_SHIFT; 315 break; 316 default: 317 die("shmem: bug in detecting size prefix."); 318 break; 319 } 320 next += 1 + skip_B; 321 } 322 if (*next != ':' && *next != '\0') { 323 die("shmem: unexpected chars after phys size. <%c><%c>\n", 324 *next, *p); 325 } 326 if (*next == '\0') 327 p = next; 328 else 329 p = next + 1; 330 /* parse out optional shmem handle */ 331 const int skip_handle = strlen("handle="); 332 next = strcasestr(p, "handle="); 333 if (*p && next) { 334 if (p != next) 335 die("unexpected chars before handle\n"); 336 p += skip_handle; 337 next = strchrnul(p, ':'); 338 if (next - p) { 339 handle = malloc(next - p + 1); 340 strncpy(handle, p, next - p); 341 handle[next - p] = '\0'; /* just in case. */ 342 } 343 if (*next == '\0') 344 p = next; 345 else 346 p = next + 1; 347 } 348 /* parse optional create flag to see if we should create shm seg. */ 349 if (*p && strcasestr(p, "create")) { 350 create = 1; 351 p += strlen("create"); 352 } 353 if (*p != '\0') 354 die("shmem: unexpected trailing chars\n"); 355 if (handle == NULL) { 356 handle = malloc(strlen(default_handle) + 1); 357 strcpy(handle, default_handle); 358 } 359 if (verbose) { 360 pr_info("shmem: phys_addr = %llx", phys_addr); 361 pr_info("shmem: size = %llx", size); 362 pr_info("shmem: handle = %s", handle); 363 pr_info("shmem: create = %d", create); 364 } 365 366 si->phys_addr = phys_addr; 367 si->size = size; 368 si->handle = handle; 369 si->create = create; 370 pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ 371 return 0; 372 } 373 374 #define BUILD_OPTIONS(name, cfg, kvm) \ 375 struct option name[] = { \ 376 OPT_GROUP("Basic options:"), \ 377 OPT_STRING('\0', "name", &(cfg)->guest_name, "guest name", \ 378 "A name for the guest"), \ 379 OPT_INTEGER('c', "cpus", &(cfg)->nrcpus, "Number of CPUs"), \ 380 OPT_U64('m', "mem", &(cfg)->ram_size, "Virtual machine memory size\ 381 in MiB."), \ 382 OPT_CALLBACK('\0', "shmem", NULL, \ 383 "[pci:]<addr>:<size>[:handle=<handle>][:create]", \ 384 "Share host shmem with guest via pci device", \ 385 shmem_parser, NULL), \ 386 OPT_CALLBACK('d', "disk", kvm, "image or rootfs_dir", "Disk \ 387 image or rootfs directory", img_name_parser, \ 388 kvm), \ 389 OPT_BOOLEAN('\0', "balloon", &(cfg)->balloon, "Enable virtio \ 390 balloon"), \ 391 OPT_BOOLEAN('\0', "vnc", &(cfg)->vnc, "Enable VNC framebuffer"),\ 392 OPT_BOOLEAN('\0', "sdl", &(cfg)->sdl, "Enable SDL framebuffer"),\ 393 OPT_BOOLEAN('\0', "rng", &(cfg)->virtio_rng, "Enable virtio Random\ 394 Number Generator"), \ 395 OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", \ 396 "Enable virtio 9p to share files between host and \ 397 guest", virtio_9p_rootdir_parser, NULL), \ 398 OPT_STRING('\0', "console", &(cfg)->console, "serial, virtio or \ 399 hv", "Console to use"), \ 400 OPT_STRING('\0', "dev", &(cfg)->dev, "device_file", \ 401 "KVM device file"), \ 402 OPT_CALLBACK('\0', "tty", NULL, "tty id", \ 403 "Remap guest TTY into a pty on the host", \ 404 tty_parser, NULL), \ 405 OPT_STRING('\0', "sandbox", &(cfg)->sandbox, "script", \ 406 "Run this script when booting into custom \ 407 rootfs"), \ 408 OPT_STRING('\0', "hugetlbfs", &(cfg)->hugetlbfs_path, "path", \ 409 "Hugetlbfs path"), \ 410 \ 411 OPT_GROUP("Kernel options:"), \ 412 OPT_STRING('k', "kernel", &(cfg)->kernel_filename, "kernel", \ 413 "Kernel to boot in virtual machine"), \ 414 OPT_STRING('i', "initrd", &(cfg)->initrd_filename, "initrd", \ 415 "Initial RAM disk image"), \ 416 OPT_STRING('p', "params", &(cfg)->kernel_cmdline, "params", \ 417 "Kernel command line arguments"), \ 418 OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\ 419 "Firmware image to boot in virtual machine"), \ 420 \ 421 OPT_GROUP("Networking options:"), \ 422 OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", \ 423 "Create a new guest NIC", \ 424 netdev_parser, NULL, NULL), \ 425 OPT_BOOLEAN('\0', "no-dhcp", &(cfg)->no_dhcp, "Disable kernel DHCP\ 426 in rootfs mode"), \ 427 \ 428 OPT_GROUP("BIOS options:"), \ 429 OPT_INTEGER('\0', "vidmode", &vidmode, \ 430 "Video mode"), \ 431 \ 432 OPT_GROUP("Debug options:"), \ 433 OPT_BOOLEAN('\0', "debug", &do_debug_print, \ 434 "Enable debug messages"), \ 435 OPT_BOOLEAN('\0', "debug-single-step", &(cfg)->single_step, \ 436 "Enable single stepping"), \ 437 OPT_BOOLEAN('\0', "debug-ioport", &(cfg)->ioport_debug, \ 438 "Enable ioport debugging"), \ 439 OPT_BOOLEAN('\0', "debug-mmio", &(cfg)->mmio_debug, \ 440 "Enable MMIO debugging"), \ 441 OPT_INTEGER('\0', "debug-iodelay", &(cfg)->debug_iodelay, \ 442 "Delay IO by millisecond"), \ 443 OPT_END() \ 444 }; 445 446 /* 447 * Serialize debug printout so that the output of multiple vcpus does not 448 * get mixed up: 449 */ 450 static int printout_done; 451 452 static void handle_sigusr1(int sig) 453 { 454 struct kvm_cpu *cpu = current_kvm_cpu; 455 int fd = kvm_cpu__get_debug_fd(); 456 457 if (!cpu || cpu->needs_nmi) 458 return; 459 460 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 461 kvm_cpu__show_registers(cpu); 462 kvm_cpu__show_code(cpu); 463 kvm_cpu__show_page_tables(cpu); 464 fflush(stdout); 465 printout_done = 1; 466 mb(); 467 } 468 469 /* Pause/resume the guest using SIGUSR2 */ 470 static int is_paused; 471 472 static void handle_pause(int fd, u32 type, u32 len, u8 *msg) 473 { 474 if (WARN_ON(len)) 475 return; 476 477 if (type == KVM_IPC_RESUME && is_paused) { 478 kvm->vm_state = KVM_VMSTATE_RUNNING; 479 kvm__continue(); 480 } else if (type == KVM_IPC_PAUSE && !is_paused) { 481 kvm->vm_state = KVM_VMSTATE_PAUSED; 482 ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL); 483 kvm__pause(); 484 } else { 485 return; 486 } 487 488 is_paused = !is_paused; 489 } 490 491 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) 492 { 493 int r = 0; 494 495 if (type == KVM_IPC_VMSTATE) 496 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 497 498 if (r < 0) 499 pr_warning("Failed sending VMSTATE"); 500 } 501 502 static void handle_debug(int fd, u32 type, u32 len, u8 *msg) 503 { 504 int i; 505 struct debug_cmd_params *params; 506 u32 dbg_type; 507 u32 vcpu; 508 509 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 510 return; 511 512 params = (void *)msg; 513 dbg_type = params->dbg_type; 514 vcpu = params->cpu; 515 516 if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ) 517 serial8250__inject_sysrq(kvm, params->sysrq); 518 519 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 520 if ((int)vcpu >= kvm->nrcpus) 521 return; 522 523 kvm_cpus[vcpu]->needs_nmi = 1; 524 pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1); 525 } 526 527 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 528 return; 529 530 for (i = 0; i < kvm->nrcpus; i++) { 531 struct kvm_cpu *cpu = kvm_cpus[i]; 532 533 if (!cpu) 534 continue; 535 536 printout_done = 0; 537 538 kvm_cpu__set_debug_fd(fd); 539 pthread_kill(cpu->thread, SIGUSR1); 540 /* 541 * Wait for the vCPU to dump state before signalling 542 * the next thread. Since this is debug code it does 543 * not matter that we are burning CPU time a bit: 544 */ 545 while (!printout_done) 546 mb(); 547 } 548 549 close(fd); 550 551 serial8250__inject_sysrq(kvm, 'p'); 552 } 553 554 static void handle_sigalrm(int sig) 555 { 556 kvm__arch_periodic_poll(kvm); 557 } 558 559 static void handle_stop(int fd, u32 type, u32 len, u8 *msg) 560 { 561 if (WARN_ON(type != KVM_IPC_STOP || len)) 562 return; 563 564 kvm_cpu__reboot(); 565 } 566 567 static void *kvm_cpu_thread(void *arg) 568 { 569 current_kvm_cpu = arg; 570 571 if (kvm_cpu__start(current_kvm_cpu)) 572 goto panic_kvm; 573 574 return (void *) (intptr_t) 0; 575 576 panic_kvm: 577 fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", 578 current_kvm_cpu->kvm_run->exit_reason, 579 kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); 580 if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) 581 fprintf(stderr, "KVM exit code: 0x%Lu\n", 582 current_kvm_cpu->kvm_run->hw.hardware_exit_reason); 583 584 kvm_cpu__set_debug_fd(STDOUT_FILENO); 585 kvm_cpu__show_registers(current_kvm_cpu); 586 kvm_cpu__show_code(current_kvm_cpu); 587 kvm_cpu__show_page_tables(current_kvm_cpu); 588 589 return (void *) (intptr_t) 1; 590 } 591 592 static char kernel[PATH_MAX]; 593 594 static const char *host_kernels[] = { 595 "/boot/vmlinuz", 596 "/boot/bzImage", 597 NULL 598 }; 599 600 static const char *default_kernels[] = { 601 "./bzImage", 602 "arch/" BUILD_ARCH "/boot/bzImage", 603 "../../arch/" BUILD_ARCH "/boot/bzImage", 604 NULL 605 }; 606 607 static const char *default_vmlinux[] = { 608 "vmlinux", 609 "../../../vmlinux", 610 "../../vmlinux", 611 NULL 612 }; 613 614 static void kernel_usage_with_options(void) 615 { 616 const char **k; 617 struct utsname uts; 618 619 fprintf(stderr, "Fatal: could not find default kernel image in:\n"); 620 k = &default_kernels[0]; 621 while (*k) { 622 fprintf(stderr, "\t%s\n", *k); 623 k++; 624 } 625 626 if (uname(&uts) < 0) 627 return; 628 629 k = &host_kernels[0]; 630 while (*k) { 631 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 632 return; 633 fprintf(stderr, "\t%s\n", kernel); 634 k++; 635 } 636 fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", 637 KVM_BINARY_NAME); 638 } 639 640 static u64 host_ram_size(void) 641 { 642 long page_size; 643 long nr_pages; 644 645 nr_pages = sysconf(_SC_PHYS_PAGES); 646 if (nr_pages < 0) { 647 pr_warning("sysconf(_SC_PHYS_PAGES) failed"); 648 return 0; 649 } 650 651 page_size = sysconf(_SC_PAGE_SIZE); 652 if (page_size < 0) { 653 pr_warning("sysconf(_SC_PAGE_SIZE) failed"); 654 return 0; 655 } 656 657 return (nr_pages * page_size) >> MB_SHIFT; 658 } 659 660 /* 661 * If user didn't specify how much memory it wants to allocate for the guest, 662 * avoid filling the whole host RAM. 663 */ 664 #define RAM_SIZE_RATIO 0.8 665 666 static u64 get_ram_size(int nr_cpus) 667 { 668 u64 available; 669 u64 ram_size; 670 671 ram_size = 64 * (nr_cpus + 3); 672 673 available = host_ram_size() * RAM_SIZE_RATIO; 674 if (!available) 675 available = MIN_RAM_SIZE_MB; 676 677 if (ram_size > available) 678 ram_size = available; 679 680 return ram_size; 681 } 682 683 static const char *find_kernel(void) 684 { 685 const char **k; 686 struct stat st; 687 struct utsname uts; 688 689 k = &default_kernels[0]; 690 while (*k) { 691 if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { 692 k++; 693 continue; 694 } 695 strncpy(kernel, *k, PATH_MAX); 696 return kernel; 697 } 698 699 if (uname(&uts) < 0) 700 return NULL; 701 702 k = &host_kernels[0]; 703 while (*k) { 704 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 705 return NULL; 706 707 if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { 708 k++; 709 continue; 710 } 711 return kernel; 712 713 } 714 return NULL; 715 } 716 717 static const char *find_vmlinux(void) 718 { 719 const char **vmlinux; 720 721 vmlinux = &default_vmlinux[0]; 722 while (*vmlinux) { 723 struct stat st; 724 725 if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { 726 vmlinux++; 727 continue; 728 } 729 return *vmlinux; 730 } 731 return NULL; 732 } 733 734 void kvm_run_help(void) 735 { 736 BUILD_OPTIONS(options, &kvm->cfg, kvm); 737 usage_with_options(run_usage, options); 738 } 739 740 static int kvm_setup_guest_init(void) 741 { 742 const char *rootfs = kvm->cfg.custom_rootfs_name; 743 char tmp[PATH_MAX]; 744 size_t size; 745 int fd, ret; 746 char *data; 747 748 /* Setup /virt/init */ 749 size = (size_t)&_binary_guest_init_size; 750 data = (char *)&_binary_guest_init_start; 751 snprintf(tmp, PATH_MAX, "%s%s/virt/init", kvm__get_dir(), rootfs); 752 remove(tmp); 753 fd = open(tmp, O_CREAT | O_WRONLY, 0755); 754 if (fd < 0) 755 die("Fail to setup %s", tmp); 756 ret = xwrite(fd, data, size); 757 if (ret < 0) 758 die("Fail to setup %s", tmp); 759 close(fd); 760 761 return 0; 762 } 763 764 static int kvm_run_set_sandbox(void) 765 { 766 const char *guestfs_name = kvm->cfg.custom_rootfs_name; 767 char path[PATH_MAX], script[PATH_MAX], *tmp; 768 769 snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); 770 771 remove(path); 772 773 if (kvm->cfg.sandbox == NULL) 774 return 0; 775 776 tmp = realpath(kvm->cfg.sandbox, NULL); 777 if (tmp == NULL) 778 return -ENOMEM; 779 780 snprintf(script, PATH_MAX, "/host/%s", tmp); 781 free(tmp); 782 783 return symlink(script, path); 784 } 785 786 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) 787 { 788 const char *single_quote; 789 790 if (!*arg) { /* zero length string */ 791 if (write(fd, "''", 2) <= 0) 792 die("Failed writing sandbox script"); 793 return; 794 } 795 796 while (*arg) { 797 single_quote = strchrnul(arg, '\''); 798 799 /* write non-single-quote string as #('string') */ 800 if (arg != single_quote) { 801 if (write(fd, "'", 1) <= 0 || 802 write(fd, arg, single_quote - arg) <= 0 || 803 write(fd, "'", 1) <= 0) 804 die("Failed writing sandbox script"); 805 } 806 807 /* write single quote as #("'") */ 808 if (*single_quote) { 809 if (write(fd, "\"'\"", 3) <= 0) 810 die("Failed writing sandbox script"); 811 } else 812 break; 813 814 arg = single_quote + 1; 815 } 816 } 817 818 static void resolve_program(const char *src, char *dst, size_t len) 819 { 820 struct stat st; 821 int err; 822 823 err = stat(src, &st); 824 825 if (!err && S_ISREG(st.st_mode)) { 826 char resolved_path[PATH_MAX]; 827 828 if (!realpath(src, resolved_path)) 829 die("Unable to resolve program %s: %s\n", src, strerror(errno)); 830 831 snprintf(dst, len, "/host%s", resolved_path); 832 } else 833 strncpy(dst, src, len); 834 } 835 836 static void kvm_run_write_sandbox_cmd(const char **argv, int argc) 837 { 838 const char script_hdr[] = "#! /bin/bash\n\n"; 839 char program[PATH_MAX]; 840 int fd; 841 842 remove(kvm->cfg.sandbox); 843 844 fd = open(kvm->cfg.sandbox, O_RDWR | O_CREAT, 0777); 845 if (fd < 0) 846 die("Failed creating sandbox script"); 847 848 if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) 849 die("Failed writing sandbox script"); 850 851 resolve_program(argv[0], program, PATH_MAX); 852 kvm_write_sandbox_cmd_exactly(fd, program); 853 854 argv++; 855 argc--; 856 857 while (argc) { 858 if (write(fd, " ", 1) <= 0) 859 die("Failed writing sandbox script"); 860 861 kvm_write_sandbox_cmd_exactly(fd, argv[0]); 862 argv++; 863 argc--; 864 } 865 if (write(fd, "\n", 1) <= 0) 866 die("Failed writing sandbox script"); 867 868 close(fd); 869 } 870 871 static int kvm_cmd_run_init(int argc, const char **argv) 872 { 873 static char real_cmdline[2048], default_name[20]; 874 struct framebuffer *fb = NULL; 875 unsigned int nr_online_cpus; 876 int max_cpus, recommended_cpus; 877 int i, r; 878 879 kvm = kvm__new(); 880 if (IS_ERR(kvm)) 881 return PTR_ERR(kvm); 882 883 signal(SIGALRM, handle_sigalrm); 884 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 885 signal(SIGUSR1, handle_sigusr1); 886 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 887 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 888 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 889 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 890 891 nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); 892 kvm->cfg.custom_rootfs_name = "default"; 893 894 while (argc != 0) { 895 BUILD_OPTIONS(options, &kvm->cfg, kvm); 896 argc = parse_options(argc, argv, options, run_usage, 897 PARSE_OPT_STOP_AT_NON_OPTION | 898 PARSE_OPT_KEEP_DASHDASH); 899 if (argc != 0) { 900 /* Cusrom options, should have been handled elsewhere */ 901 if (strcmp(argv[0], "--") == 0) { 902 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 903 kvm->cfg.sandbox = DEFAULT_SANDBOX_FILENAME; 904 kvm_run_write_sandbox_cmd(argv+1, argc-1); 905 break; 906 } 907 } 908 909 if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kvm->cfg.kernel_filename) || 910 (kvm_run_wrapper == KVM_RUN_SANDBOX && kvm->cfg.sandbox)) { 911 fprintf(stderr, "Cannot handle parameter: " 912 "%s\n", argv[0]); 913 usage_with_options(run_usage, options); 914 free(kvm); 915 return -EINVAL; 916 } 917 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 918 /* 919 * first unhandled parameter is treated as 920 * sandbox command 921 */ 922 kvm->cfg.sandbox = DEFAULT_SANDBOX_FILENAME; 923 kvm_run_write_sandbox_cmd(argv, argc); 924 } else { 925 /* 926 * first unhandled parameter is treated as a kernel 927 * image 928 */ 929 kvm->cfg.kernel_filename = argv[0]; 930 } 931 argv++; 932 argc--; 933 } 934 935 } 936 937 kvm->nr_disks = kvm->cfg.image_count; 938 939 if (!kvm->cfg.kernel_filename) 940 kvm->cfg.kernel_filename = find_kernel(); 941 942 if (!kvm->cfg.kernel_filename) { 943 kernel_usage_with_options(); 944 return -EINVAL; 945 } 946 947 kvm->cfg.vmlinux_filename = find_vmlinux(); 948 949 if (kvm->cfg.nrcpus == 0) 950 kvm->cfg.nrcpus = nr_online_cpus; 951 952 if (!kvm->cfg.ram_size) 953 kvm->cfg.ram_size = get_ram_size(kvm->cfg.nrcpus); 954 955 if (kvm->cfg.ram_size < MIN_RAM_SIZE_MB) 956 die("Not enough memory specified: %lluMB (min %lluMB)", kvm->cfg.ram_size, MIN_RAM_SIZE_MB); 957 958 if (kvm->cfg.ram_size > host_ram_size()) 959 pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", kvm->cfg.ram_size, host_ram_size()); 960 961 kvm->cfg.ram_size <<= MB_SHIFT; 962 963 if (!kvm->cfg.dev) 964 kvm->cfg.dev = DEFAULT_KVM_DEV; 965 966 if (!kvm->cfg.console) 967 kvm->cfg.console = DEFAULT_CONSOLE; 968 969 if (!strncmp(kvm->cfg.console, "virtio", 6)) 970 kvm->cfg.active_console = CONSOLE_VIRTIO; 971 else if (!strncmp(kvm->cfg.console, "serial", 6)) 972 kvm->cfg.active_console = CONSOLE_8250; 973 else if (!strncmp(kvm->cfg.console, "hv", 2)) 974 kvm->cfg.active_console = CONSOLE_HV; 975 else 976 pr_warning("No console!"); 977 978 if (!kvm->cfg.host_ip) 979 kvm->cfg.host_ip = DEFAULT_HOST_ADDR; 980 981 if (!kvm->cfg.guest_ip) 982 kvm->cfg.guest_ip = DEFAULT_GUEST_ADDR; 983 984 if (!kvm->cfg.guest_mac) 985 kvm->cfg.guest_mac = DEFAULT_GUEST_MAC; 986 987 if (!kvm->cfg.host_mac) 988 kvm->cfg.host_mac = DEFAULT_HOST_MAC; 989 990 if (!kvm->cfg.script) 991 kvm->cfg.script = DEFAULT_SCRIPT; 992 993 term_init(); 994 995 if (!kvm->cfg.guest_name) { 996 if (kvm->cfg.custom_rootfs) { 997 kvm->cfg.guest_name = kvm->cfg.custom_rootfs_name; 998 } else { 999 sprintf(default_name, "guest-%u", getpid()); 1000 kvm->cfg.guest_name = default_name; 1001 } 1002 } 1003 1004 r = kvm__init(kvm); 1005 if (r) 1006 goto fail; 1007 1008 r = ioeventfd__init(kvm); 1009 if (r < 0) { 1010 pr_err("ioeventfd__init() failed with error %d\n", r); 1011 goto fail; 1012 } 1013 1014 max_cpus = kvm__max_cpus(kvm); 1015 recommended_cpus = kvm__recommended_cpus(kvm); 1016 1017 if (kvm->cfg.nrcpus > max_cpus) { 1018 printf(" # Limit the number of CPUs to %d\n", max_cpus); 1019 kvm->cfg.nrcpus = max_cpus; 1020 } else if (kvm->cfg.nrcpus > recommended_cpus) { 1021 printf(" # Warning: The maximum recommended amount of VCPUs" 1022 " is %d\n", recommended_cpus); 1023 } 1024 1025 kvm->nrcpus = kvm->cfg.nrcpus; 1026 1027 /* Alloc one pointer too many, so array ends up 0-terminated */ 1028 kvm_cpus = calloc(kvm->nrcpus + 1, sizeof(void *)); 1029 if (!kvm_cpus) 1030 die("Couldn't allocate array for %d CPUs", kvm->nrcpus); 1031 1032 r = irq__init(kvm); 1033 if (r < 0) { 1034 pr_err("irq__init() failed with error %d\n", r); 1035 goto fail; 1036 } 1037 1038 r = pci__init(kvm); 1039 if (r < 0) { 1040 pr_err("pci__init() failed with error %d\n", r); 1041 goto fail; 1042 } 1043 1044 r = ioport__init(kvm); 1045 if (r < 0) { 1046 pr_err("ioport__init() failed with error %d\n", r); 1047 goto fail; 1048 } 1049 1050 /* 1051 * vidmode should be either specified 1052 * either set by default 1053 */ 1054 if (kvm->cfg.vnc || kvm->cfg.sdl) { 1055 if (vidmode == -1) 1056 vidmode = 0x312; 1057 } else { 1058 vidmode = 0; 1059 } 1060 1061 memset(real_cmdline, 0, sizeof(real_cmdline)); 1062 kvm__arch_set_cmdline(real_cmdline, kvm->cfg.vnc || kvm->cfg.sdl); 1063 1064 if (strlen(real_cmdline) > 0) 1065 strcat(real_cmdline, " "); 1066 1067 if (kvm->cfg.kernel_cmdline) 1068 strlcat(real_cmdline, kvm->cfg.kernel_cmdline, sizeof(real_cmdline)); 1069 1070 if (!kvm->cfg.using_rootfs && !kvm->cfg.disk_image[0].filename && !kvm->cfg.initrd_filename) { 1071 char tmp[PATH_MAX]; 1072 1073 kvm_setup_create_new(kvm->cfg.custom_rootfs_name); 1074 kvm_setup_resolv(kvm->cfg.custom_rootfs_name); 1075 1076 snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); 1077 if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) 1078 die("Unable to initialize virtio 9p"); 1079 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 1080 die("Unable to initialize virtio 9p"); 1081 kvm->cfg.using_rootfs = kvm->cfg.custom_rootfs = 1; 1082 } 1083 1084 if (kvm->cfg.using_rootfs) { 1085 strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); 1086 if (kvm->cfg.custom_rootfs) { 1087 kvm_run_set_sandbox(); 1088 1089 strcat(real_cmdline, " init=/virt/init"); 1090 1091 if (!kvm->cfg.no_dhcp) 1092 strcat(real_cmdline, " ip=dhcp"); 1093 if (kvm_setup_guest_init()) 1094 die("Failed to setup init for guest."); 1095 } 1096 } else if (!strstr(real_cmdline, "root=")) { 1097 strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); 1098 } 1099 1100 r = disk_image__init(kvm); 1101 if (r < 0) { 1102 pr_err("disk_image__init() failed with error %d\n", r); 1103 goto fail; 1104 } 1105 1106 printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, 1107 kvm->cfg.kernel_filename, kvm->cfg.ram_size / 1024 / 1024, kvm->cfg.nrcpus, kvm->cfg.guest_name); 1108 1109 if (!kvm->cfg.firmware_filename) { 1110 if (!kvm__load_kernel(kvm, kvm->cfg.kernel_filename, 1111 kvm->cfg.initrd_filename, real_cmdline, vidmode)) 1112 die("unable to load kernel %s", kvm->cfg.kernel_filename); 1113 1114 kvm->vmlinux = kvm->cfg.vmlinux_filename; 1115 r = symbol_init(kvm); 1116 if (r < 0) 1117 pr_debug("symbol_init() failed with error %d\n", r); 1118 } 1119 1120 ioport__setup_arch(); 1121 1122 r = rtc__init(kvm); 1123 if (r < 0) { 1124 pr_err("rtc__init() failed with error %d\n", r); 1125 goto fail; 1126 } 1127 1128 r = serial8250__init(kvm); 1129 if (r < 0) { 1130 pr_err("serial__init() failed with error %d\n", r); 1131 goto fail; 1132 } 1133 1134 r = virtio_blk__init(kvm); 1135 if (r < 0) { 1136 pr_err("virtio_blk__init() failed with error %d\n", r); 1137 goto fail; 1138 } 1139 1140 r = virtio_scsi_init(kvm); 1141 if (r < 0) { 1142 pr_err("virtio_scsi_init() failed with error %d\n", r); 1143 goto fail; 1144 } 1145 1146 1147 if (kvm->cfg.active_console == CONSOLE_VIRTIO) 1148 virtio_console__init(kvm); 1149 1150 if (kvm->cfg.virtio_rng) 1151 virtio_rng__init(kvm); 1152 1153 if (kvm->cfg.balloon) 1154 virtio_bln__init(kvm); 1155 1156 if (!kvm->cfg.network) 1157 kvm->cfg.network = DEFAULT_NETWORK; 1158 1159 virtio_9p__init(kvm); 1160 1161 for (i = 0; i < kvm->cfg.num_net_devices; i++) { 1162 kvm->cfg.net_params[i].kvm = kvm; 1163 virtio_net__init(&kvm->cfg.net_params[i]); 1164 } 1165 1166 if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) { 1167 struct virtio_net_params net_params; 1168 1169 net_params = (struct virtio_net_params) { 1170 .guest_ip = kvm->cfg.guest_ip, 1171 .host_ip = kvm->cfg.host_ip, 1172 .kvm = kvm, 1173 .script = kvm->cfg.script, 1174 .mode = NET_MODE_USER, 1175 }; 1176 str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac); 1177 str_to_mac(kvm->cfg.host_mac, net_params.host_mac); 1178 1179 virtio_net__init(&net_params); 1180 } 1181 1182 kvm__init_ram(kvm); 1183 1184 #ifdef CONFIG_X86 1185 kbd__init(kvm); 1186 #endif 1187 1188 pci_shmem__init(kvm); 1189 1190 if (kvm->cfg.vnc || kvm->cfg.sdl) { 1191 fb = vesa__init(kvm); 1192 if (IS_ERR(fb)) { 1193 pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb)); 1194 goto fail; 1195 } 1196 } 1197 1198 if (kvm->cfg.vnc && fb) { 1199 r = vnc__init(fb); 1200 if (r < 0) { 1201 pr_err("vnc__init() failed with error %d\n", r); 1202 goto fail; 1203 } 1204 } 1205 1206 if (kvm->cfg.sdl && fb) { 1207 sdl__init(fb); 1208 if (r < 0) { 1209 pr_err("sdl__init() failed with error %d\n", r); 1210 goto fail; 1211 } 1212 } 1213 1214 r = fb__start(); 1215 if (r < 0) { 1216 pr_err("fb__init() failed with error %d\n", r); 1217 goto fail; 1218 } 1219 1220 /* Device init all done; firmware init must 1221 * come after this (it may set up device trees etc.) 1222 */ 1223 1224 kvm__start_timer(kvm); 1225 1226 if (kvm->cfg.firmware_filename) { 1227 if (!kvm__load_firmware(kvm, kvm->cfg.firmware_filename)) 1228 die("unable to load firmware image %s: %s", kvm->cfg.firmware_filename, strerror(errno)); 1229 } else { 1230 kvm__arch_setup_firmware(kvm); 1231 if (r < 0) { 1232 pr_err("kvm__arch_setup_firmware() failed with error %d\n", r); 1233 goto fail; 1234 } 1235 } 1236 1237 for (i = 0; i < kvm->nrcpus; i++) { 1238 kvm_cpus[i] = kvm_cpu__init(kvm, i); 1239 if (!kvm_cpus[i]) 1240 die("unable to initialize KVM VCPU"); 1241 } 1242 1243 thread_pool__init(nr_online_cpus); 1244 fail: 1245 return r; 1246 } 1247 1248 static int kvm_cmd_run_work(void) 1249 { 1250 int i, r = -1; 1251 void *ret = NULL; 1252 1253 for (i = 0; i < kvm->nrcpus; i++) { 1254 if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0) 1255 die("unable to create KVM VCPU thread"); 1256 } 1257 1258 /* Only VCPU #0 is going to exit by itself when shutting down */ 1259 if (pthread_join(kvm_cpus[0]->thread, &ret) != 0) 1260 r = 0; 1261 1262 kvm_cpu__delete(kvm_cpus[0]); 1263 kvm_cpus[0] = NULL; 1264 1265 for (i = 1; i < kvm->nrcpus; i++) { 1266 if (kvm_cpus[i]->is_running) { 1267 pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT); 1268 if (pthread_join(kvm_cpus[i]->thread, &ret) != 0) 1269 die("pthread_join"); 1270 kvm_cpu__delete(kvm_cpus[i]); 1271 } 1272 if (ret == NULL) 1273 r = 0; 1274 } 1275 1276 return r; 1277 } 1278 1279 static void kvm_cmd_run_exit(int guest_ret) 1280 { 1281 int r = 0; 1282 1283 compat__print_all_messages(); 1284 1285 r = symbol_exit(kvm); 1286 if (r < 0) 1287 pr_warning("symbol_exit() failed with error %d\n", r); 1288 1289 r = irq__exit(kvm); 1290 if (r < 0) 1291 pr_warning("irq__exit() failed with error %d\n", r); 1292 1293 fb__stop(); 1294 1295 r = virtio_scsi_exit(kvm); 1296 if (r < 0) 1297 pr_warning("virtio_scsi_exit() failed with error %d\n", r); 1298 1299 r = virtio_blk__exit(kvm); 1300 if (r < 0) 1301 pr_warning("virtio_blk__exit() failed with error %d\n", r); 1302 1303 r = virtio_rng__exit(kvm); 1304 if (r < 0) 1305 pr_warning("virtio_rng__exit() failed with error %d\n", r); 1306 1307 r = disk_image__exit(kvm); 1308 if (r < 0) 1309 pr_warning("disk_image__exit() failed with error %d\n", r); 1310 1311 r = serial8250__exit(kvm); 1312 if (r < 0) 1313 pr_warning("serial8250__exit() failed with error %d\n", r); 1314 1315 r = rtc__exit(kvm); 1316 if (r < 0) 1317 pr_warning("rtc__exit() failed with error %d\n", r); 1318 1319 r = kvm__arch_free_firmware(kvm); 1320 if (r < 0) 1321 pr_warning("kvm__arch_free_firmware() failed with error %d\n", r); 1322 1323 r = ioport__exit(kvm); 1324 if (r < 0) 1325 pr_warning("ioport__exit() failed with error %d\n", r); 1326 1327 r = ioeventfd__exit(kvm); 1328 if (r < 0) 1329 pr_warning("ioeventfd__exit() failed with error %d\n", r); 1330 1331 r = pci__exit(kvm); 1332 if (r < 0) 1333 pr_warning("pci__exit() failed with error %d\n", r); 1334 1335 r = kvm__exit(kvm); 1336 if (r < 0) 1337 pr_warning("pci__exit() failed with error %d\n", r); 1338 1339 free(kvm_cpus); 1340 1341 if (guest_ret == 0) 1342 printf("\n # KVM session ended normally.\n"); 1343 } 1344 1345 int kvm_cmd_run(int argc, const char **argv, const char *prefix) 1346 { 1347 int r, ret = -EFAULT; 1348 1349 r = kvm_cmd_run_init(argc, argv); 1350 if (r < 0) 1351 return r; 1352 1353 ret = kvm_cmd_run_work(); 1354 kvm_cmd_run_exit(ret); 1355 1356 return ret; 1357 } 1358