1 #include "kvm/builtin-run.h" 2 3 #include "kvm/builtin-setup.h" 4 #include "kvm/virtio-balloon.h" 5 #include "kvm/virtio-console.h" 6 #include "kvm/parse-options.h" 7 #include "kvm/8250-serial.h" 8 #include "kvm/framebuffer.h" 9 #include "kvm/disk-image.h" 10 #include "kvm/threadpool.h" 11 #include "kvm/virtio-scsi.h" 12 #include "kvm/virtio-blk.h" 13 #include "kvm/virtio-net.h" 14 #include "kvm/virtio-rng.h" 15 #include "kvm/ioeventfd.h" 16 #include "kvm/virtio-9p.h" 17 #include "kvm/barrier.h" 18 #include "kvm/kvm-cpu.h" 19 #include "kvm/ioport.h" 20 #include "kvm/symbol.h" 21 #include "kvm/i8042.h" 22 #include "kvm/mutex.h" 23 #include "kvm/term.h" 24 #include "kvm/util.h" 25 #include "kvm/strbuf.h" 26 #include "kvm/vesa.h" 27 #include "kvm/irq.h" 28 #include "kvm/kvm.h" 29 #include "kvm/pci.h" 30 #include "kvm/rtc.h" 31 #include "kvm/sdl.h" 32 #include "kvm/vnc.h" 33 #include "kvm/guest_compat.h" 34 #include "kvm/pci-shmem.h" 35 #include "kvm/kvm-ipc.h" 36 #include "kvm/builtin-debug.h" 37 38 #include <linux/types.h> 39 #include <linux/err.h> 40 41 #include <sys/utsname.h> 42 #include <sys/types.h> 43 #include <sys/stat.h> 44 #include <termios.h> 45 #include <signal.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 #include <ctype.h> 50 #include <stdio.h> 51 52 #define MB_SHIFT (20) 53 #define KB_SHIFT (10) 54 #define GB_SHIFT (30) 55 56 struct kvm *kvm; 57 __thread struct kvm_cpu *current_kvm_cpu; 58 59 static int kvm_run_wrapper; 60 61 bool do_debug_print = false; 62 63 static int vidmode = -1; 64 65 extern char _binary_guest_init_start; 66 extern char _binary_guest_init_size; 67 68 static const char * const run_usage[] = { 69 "lkvm run [<options>] [<kernel image>]", 70 NULL 71 }; 72 73 enum { 74 KVM_RUN_DEFAULT, 75 KVM_RUN_SANDBOX, 76 }; 77 78 static int img_name_parser(const struct option *opt, const char *arg, int unset) 79 { 80 char path[PATH_MAX]; 81 struct stat st; 82 struct kvm *kvm = opt->ptr; 83 84 if (stat(arg, &st) == 0 && 85 S_ISDIR(st.st_mode)) { 86 char tmp[PATH_MAX]; 87 88 if (kvm->cfg.using_rootfs) 89 die("Please use only one rootfs directory atmost"); 90 91 if (realpath(arg, tmp) == 0 || 92 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 93 die("Unable to initialize virtio 9p"); 94 kvm->cfg.using_rootfs = 1; 95 return 0; 96 } 97 98 snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg); 99 100 if (stat(path, &st) == 0 && 101 S_ISDIR(st.st_mode)) { 102 char tmp[PATH_MAX]; 103 104 if (kvm->cfg.using_rootfs) 105 die("Please use only one rootfs directory atmost"); 106 107 if (realpath(path, tmp) == 0 || 108 virtio_9p__register(kvm, tmp, "/dev/root") < 0) 109 die("Unable to initialize virtio 9p"); 110 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 111 die("Unable to initialize virtio 9p"); 112 kvm_setup_resolv(arg); 113 kvm->cfg.using_rootfs = kvm->cfg.custom_rootfs = 1; 114 kvm->cfg.custom_rootfs_name = arg; 115 return 0; 116 } 117 118 return disk_img_name_parser(opt, arg, unset); 119 } 120 121 void kvm_run_set_wrapper_sandbox(void) 122 { 123 kvm_run_wrapper = KVM_RUN_SANDBOX; 124 } 125 126 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset) 127 { 128 char *tag_name; 129 char tmp[PATH_MAX]; 130 131 /* 132 * 9p dir can be of the form dirname,tag_name or 133 * just dirname. In the later case we use the 134 * default tag name 135 */ 136 tag_name = strstr(arg, ","); 137 if (tag_name) { 138 *tag_name = '\0'; 139 tag_name++; 140 } 141 if (realpath(arg, tmp)) { 142 if (virtio_9p__register(kvm, tmp, tag_name) < 0) 143 die("Unable to initialize virtio 9p"); 144 } else 145 die("Failed resolving 9p path"); 146 return 0; 147 } 148 149 static int tty_parser(const struct option *opt, const char *arg, int unset) 150 { 151 int tty = atoi(arg); 152 153 term_set_tty(tty); 154 155 return 0; 156 } 157 158 static inline void str_to_mac(const char *str, char *mac) 159 { 160 sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 161 mac, mac+1, mac+2, mac+3, mac+4, mac+5); 162 } 163 static int set_net_param(struct virtio_net_params *p, const char *param, 164 const char *val) 165 { 166 if (strcmp(param, "guest_mac") == 0) { 167 str_to_mac(val, p->guest_mac); 168 } else if (strcmp(param, "mode") == 0) { 169 if (!strncmp(val, "user", 4)) { 170 int i; 171 172 for (i = 0; i < kvm->cfg.num_net_devices; i++) 173 if (kvm->cfg.net_params[i].mode == NET_MODE_USER) 174 die("Only one usermode network device allowed at a time"); 175 p->mode = NET_MODE_USER; 176 } else if (!strncmp(val, "tap", 3)) { 177 p->mode = NET_MODE_TAP; 178 } else if (!strncmp(val, "none", 4)) { 179 kvm->cfg.no_net = 1; 180 return -1; 181 } else 182 die("Unknown network mode %s, please use user, tap or none", kvm->cfg.network); 183 } else if (strcmp(param, "script") == 0) { 184 p->script = strdup(val); 185 } else if (strcmp(param, "guest_ip") == 0) { 186 p->guest_ip = strdup(val); 187 } else if (strcmp(param, "host_ip") == 0) { 188 p->host_ip = strdup(val); 189 } else if (strcmp(param, "trans") == 0) { 190 p->trans = strdup(val); 191 } else if (strcmp(param, "vhost") == 0) { 192 p->vhost = atoi(val); 193 } else if (strcmp(param, "fd") == 0) { 194 p->fd = atoi(val); 195 } else 196 die("Unknown network parameter %s", param); 197 198 return 0; 199 } 200 201 static int netdev_parser(const struct option *opt, const char *arg, int unset) 202 { 203 struct virtio_net_params p; 204 char *buf = NULL, *cmd = NULL, *cur = NULL; 205 bool on_cmd = true; 206 207 if (arg) { 208 buf = strdup(arg); 209 if (buf == NULL) 210 die("Failed allocating new net buffer"); 211 cur = strtok(buf, ",="); 212 } 213 214 p = (struct virtio_net_params) { 215 .guest_ip = DEFAULT_GUEST_ADDR, 216 .host_ip = DEFAULT_HOST_ADDR, 217 .script = DEFAULT_SCRIPT, 218 .mode = NET_MODE_TAP, 219 }; 220 221 str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac); 222 p.guest_mac[5] += kvm->cfg.num_net_devices; 223 224 while (cur) { 225 if (on_cmd) { 226 cmd = cur; 227 } else { 228 if (set_net_param(&p, cmd, cur) < 0) 229 goto done; 230 } 231 on_cmd = !on_cmd; 232 233 cur = strtok(NULL, ",="); 234 }; 235 236 kvm->cfg.num_net_devices++; 237 238 kvm->cfg.net_params = realloc(kvm->cfg.net_params, kvm->cfg.num_net_devices * sizeof(*kvm->cfg.net_params)); 239 if (kvm->cfg.net_params == NULL) 240 die("Failed adding new network device"); 241 242 kvm->cfg.net_params[kvm->cfg.num_net_devices - 1] = p; 243 244 done: 245 free(buf); 246 return 0; 247 } 248 249 static int shmem_parser(const struct option *opt, const char *arg, int unset) 250 { 251 const u64 default_size = SHMEM_DEFAULT_SIZE; 252 const u64 default_phys_addr = SHMEM_DEFAULT_ADDR; 253 const char *default_handle = SHMEM_DEFAULT_HANDLE; 254 struct shmem_info *si = malloc(sizeof(struct shmem_info)); 255 u64 phys_addr; 256 u64 size; 257 char *handle = NULL; 258 int create = 0; 259 const char *p = arg; 260 char *next; 261 int base = 10; 262 int verbose = 0; 263 264 const int skip_pci = strlen("pci:"); 265 if (verbose) 266 pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset); 267 /* parse out optional addr family */ 268 if (strcasestr(p, "pci:")) { 269 p += skip_pci; 270 } else if (strcasestr(p, "mem:")) { 271 die("I can't add to E820 map yet.\n"); 272 } 273 /* parse out physical addr */ 274 base = 10; 275 if (strcasestr(p, "0x")) 276 base = 16; 277 phys_addr = strtoll(p, &next, base); 278 if (next == p && phys_addr == 0) { 279 pr_info("shmem: no physical addr specified, using default."); 280 phys_addr = default_phys_addr; 281 } 282 if (*next != ':' && *next != '\0') 283 die("shmem: unexpected chars after phys addr.\n"); 284 if (*next == '\0') 285 p = next; 286 else 287 p = next + 1; 288 /* parse out size */ 289 base = 10; 290 if (strcasestr(p, "0x")) 291 base = 16; 292 size = strtoll(p, &next, base); 293 if (next == p && size == 0) { 294 pr_info("shmem: no size specified, using default."); 295 size = default_size; 296 } 297 /* look for [KMGkmg][Bb]* uses base 2. */ 298 int skip_B = 0; 299 if (strspn(next, "KMGkmg")) { /* might have a prefix */ 300 if (*(next + 1) == 'B' || *(next + 1) == 'b') 301 skip_B = 1; 302 switch (*next) { 303 case 'K': 304 case 'k': 305 size = size << KB_SHIFT; 306 break; 307 case 'M': 308 case 'm': 309 size = size << MB_SHIFT; 310 break; 311 case 'G': 312 case 'g': 313 size = size << GB_SHIFT; 314 break; 315 default: 316 die("shmem: bug in detecting size prefix."); 317 break; 318 } 319 next += 1 + skip_B; 320 } 321 if (*next != ':' && *next != '\0') { 322 die("shmem: unexpected chars after phys size. <%c><%c>\n", 323 *next, *p); 324 } 325 if (*next == '\0') 326 p = next; 327 else 328 p = next + 1; 329 /* parse out optional shmem handle */ 330 const int skip_handle = strlen("handle="); 331 next = strcasestr(p, "handle="); 332 if (*p && next) { 333 if (p != next) 334 die("unexpected chars before handle\n"); 335 p += skip_handle; 336 next = strchrnul(p, ':'); 337 if (next - p) { 338 handle = malloc(next - p + 1); 339 strncpy(handle, p, next - p); 340 handle[next - p] = '\0'; /* just in case. */ 341 } 342 if (*next == '\0') 343 p = next; 344 else 345 p = next + 1; 346 } 347 /* parse optional create flag to see if we should create shm seg. */ 348 if (*p && strcasestr(p, "create")) { 349 create = 1; 350 p += strlen("create"); 351 } 352 if (*p != '\0') 353 die("shmem: unexpected trailing chars\n"); 354 if (handle == NULL) { 355 handle = malloc(strlen(default_handle) + 1); 356 strcpy(handle, default_handle); 357 } 358 if (verbose) { 359 pr_info("shmem: phys_addr = %llx", phys_addr); 360 pr_info("shmem: size = %llx", size); 361 pr_info("shmem: handle = %s", handle); 362 pr_info("shmem: create = %d", create); 363 } 364 365 si->phys_addr = phys_addr; 366 si->size = size; 367 si->handle = handle; 368 si->create = create; 369 pci_shmem__register_mem(si); /* ownership of si, etc. passed on. */ 370 return 0; 371 } 372 373 #define BUILD_OPTIONS(name, cfg, kvm) \ 374 struct option name[] = { \ 375 OPT_GROUP("Basic options:"), \ 376 OPT_STRING('\0', "name", &(cfg)->guest_name, "guest name", \ 377 "A name for the guest"), \ 378 OPT_INTEGER('c', "cpus", &(cfg)->nrcpus, "Number of CPUs"), \ 379 OPT_U64('m', "mem", &(cfg)->ram_size, "Virtual machine memory size\ 380 in MiB."), \ 381 OPT_CALLBACK('\0', "shmem", NULL, \ 382 "[pci:]<addr>:<size>[:handle=<handle>][:create]", \ 383 "Share host shmem with guest via pci device", \ 384 shmem_parser, NULL), \ 385 OPT_CALLBACK('d', "disk", kvm, "image or rootfs_dir", "Disk \ 386 image or rootfs directory", img_name_parser, \ 387 kvm), \ 388 OPT_BOOLEAN('\0', "balloon", &(cfg)->balloon, "Enable virtio \ 389 balloon"), \ 390 OPT_BOOLEAN('\0', "vnc", &(cfg)->vnc, "Enable VNC framebuffer"),\ 391 OPT_BOOLEAN('\0', "sdl", &(cfg)->sdl, "Enable SDL framebuffer"),\ 392 OPT_BOOLEAN('\0', "rng", &(cfg)->virtio_rng, "Enable virtio Random\ 393 Number Generator"), \ 394 OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name", \ 395 "Enable virtio 9p to share files between host and \ 396 guest", virtio_9p_rootdir_parser, NULL), \ 397 OPT_STRING('\0', "console", &(cfg)->console, "serial, virtio or \ 398 hv", "Console to use"), \ 399 OPT_STRING('\0', "dev", &(cfg)->dev, "device_file", \ 400 "KVM device file"), \ 401 OPT_CALLBACK('\0', "tty", NULL, "tty id", \ 402 "Remap guest TTY into a pty on the host", \ 403 tty_parser, NULL), \ 404 OPT_STRING('\0', "sandbox", &(cfg)->sandbox, "script", \ 405 "Run this script when booting into custom \ 406 rootfs"), \ 407 OPT_STRING('\0', "hugetlbfs", &(cfg)->hugetlbfs_path, "path", \ 408 "Hugetlbfs path"), \ 409 \ 410 OPT_GROUP("Kernel options:"), \ 411 OPT_STRING('k', "kernel", &(cfg)->kernel_filename, "kernel", \ 412 "Kernel to boot in virtual machine"), \ 413 OPT_STRING('i', "initrd", &(cfg)->initrd_filename, "initrd", \ 414 "Initial RAM disk image"), \ 415 OPT_STRING('p', "params", &(cfg)->kernel_cmdline, "params", \ 416 "Kernel command line arguments"), \ 417 OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\ 418 "Firmware image to boot in virtual machine"), \ 419 \ 420 OPT_GROUP("Networking options:"), \ 421 OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params", \ 422 "Create a new guest NIC", \ 423 netdev_parser, NULL, NULL), \ 424 OPT_BOOLEAN('\0', "no-dhcp", &(cfg)->no_dhcp, "Disable kernel DHCP\ 425 in rootfs mode"), \ 426 \ 427 OPT_GROUP("BIOS options:"), \ 428 OPT_INTEGER('\0', "vidmode", &vidmode, \ 429 "Video mode"), \ 430 \ 431 OPT_GROUP("Debug options:"), \ 432 OPT_BOOLEAN('\0', "debug", &do_debug_print, \ 433 "Enable debug messages"), \ 434 OPT_BOOLEAN('\0', "debug-single-step", &(cfg)->single_step, \ 435 "Enable single stepping"), \ 436 OPT_BOOLEAN('\0', "debug-ioport", &(cfg)->ioport_debug, \ 437 "Enable ioport debugging"), \ 438 OPT_BOOLEAN('\0', "debug-mmio", &(cfg)->mmio_debug, \ 439 "Enable MMIO debugging"), \ 440 OPT_INTEGER('\0', "debug-iodelay", &(cfg)->debug_iodelay, \ 441 "Delay IO by millisecond"), \ 442 OPT_END() \ 443 }; 444 445 /* 446 * Serialize debug printout so that the output of multiple vcpus does not 447 * get mixed up: 448 */ 449 static int printout_done; 450 451 static void handle_sigusr1(int sig) 452 { 453 struct kvm_cpu *cpu = current_kvm_cpu; 454 int fd = kvm_cpu__get_debug_fd(); 455 456 if (!cpu || cpu->needs_nmi) 457 return; 458 459 dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); 460 kvm_cpu__show_registers(cpu); 461 kvm_cpu__show_code(cpu); 462 kvm_cpu__show_page_tables(cpu); 463 fflush(stdout); 464 printout_done = 1; 465 mb(); 466 } 467 468 /* Pause/resume the guest using SIGUSR2 */ 469 static int is_paused; 470 471 static void handle_pause(int fd, u32 type, u32 len, u8 *msg) 472 { 473 if (WARN_ON(len)) 474 return; 475 476 if (type == KVM_IPC_RESUME && is_paused) { 477 kvm->vm_state = KVM_VMSTATE_RUNNING; 478 kvm__continue(); 479 } else if (type == KVM_IPC_PAUSE && !is_paused) { 480 kvm->vm_state = KVM_VMSTATE_PAUSED; 481 ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL); 482 kvm__pause(); 483 } else { 484 return; 485 } 486 487 is_paused = !is_paused; 488 } 489 490 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg) 491 { 492 int r = 0; 493 494 if (type == KVM_IPC_VMSTATE) 495 r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); 496 497 if (r < 0) 498 pr_warning("Failed sending VMSTATE"); 499 } 500 501 static void handle_debug(int fd, u32 type, u32 len, u8 *msg) 502 { 503 int i; 504 struct debug_cmd_params *params; 505 u32 dbg_type; 506 u32 vcpu; 507 508 if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) 509 return; 510 511 params = (void *)msg; 512 dbg_type = params->dbg_type; 513 vcpu = params->cpu; 514 515 if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ) 516 serial8250__inject_sysrq(kvm, params->sysrq); 517 518 if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { 519 if ((int)vcpu >= kvm->nrcpus) 520 return; 521 522 kvm->cpus[vcpu]->needs_nmi = 1; 523 pthread_kill(kvm->cpus[vcpu]->thread, SIGUSR1); 524 } 525 526 if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) 527 return; 528 529 for (i = 0; i < kvm->nrcpus; i++) { 530 struct kvm_cpu *cpu = kvm->cpus[i]; 531 532 if (!cpu) 533 continue; 534 535 printout_done = 0; 536 537 kvm_cpu__set_debug_fd(fd); 538 pthread_kill(cpu->thread, SIGUSR1); 539 /* 540 * Wait for the vCPU to dump state before signalling 541 * the next thread. Since this is debug code it does 542 * not matter that we are burning CPU time a bit: 543 */ 544 while (!printout_done) 545 mb(); 546 } 547 548 close(fd); 549 550 serial8250__inject_sysrq(kvm, 'p'); 551 } 552 553 static void handle_sigalrm(int sig) 554 { 555 kvm__arch_periodic_poll(kvm); 556 } 557 558 static void handle_stop(int fd, u32 type, u32 len, u8 *msg) 559 { 560 if (WARN_ON(type != KVM_IPC_STOP || len)) 561 return; 562 563 kvm_cpu__reboot(kvm); 564 } 565 566 static void *kvm_cpu_thread(void *arg) 567 { 568 current_kvm_cpu = arg; 569 570 if (kvm_cpu__start(current_kvm_cpu)) 571 goto panic_kvm; 572 573 return (void *) (intptr_t) 0; 574 575 panic_kvm: 576 fprintf(stderr, "KVM exit reason: %u (\"%s\")\n", 577 current_kvm_cpu->kvm_run->exit_reason, 578 kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]); 579 if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN) 580 fprintf(stderr, "KVM exit code: 0x%Lu\n", 581 current_kvm_cpu->kvm_run->hw.hardware_exit_reason); 582 583 kvm_cpu__set_debug_fd(STDOUT_FILENO); 584 kvm_cpu__show_registers(current_kvm_cpu); 585 kvm_cpu__show_code(current_kvm_cpu); 586 kvm_cpu__show_page_tables(current_kvm_cpu); 587 588 return (void *) (intptr_t) 1; 589 } 590 591 static char kernel[PATH_MAX]; 592 593 static const char *host_kernels[] = { 594 "/boot/vmlinuz", 595 "/boot/bzImage", 596 NULL 597 }; 598 599 static const char *default_kernels[] = { 600 "./bzImage", 601 "arch/" BUILD_ARCH "/boot/bzImage", 602 "../../arch/" BUILD_ARCH "/boot/bzImage", 603 NULL 604 }; 605 606 static const char *default_vmlinux[] = { 607 "vmlinux", 608 "../../../vmlinux", 609 "../../vmlinux", 610 NULL 611 }; 612 613 static void kernel_usage_with_options(void) 614 { 615 const char **k; 616 struct utsname uts; 617 618 fprintf(stderr, "Fatal: could not find default kernel image in:\n"); 619 k = &default_kernels[0]; 620 while (*k) { 621 fprintf(stderr, "\t%s\n", *k); 622 k++; 623 } 624 625 if (uname(&uts) < 0) 626 return; 627 628 k = &host_kernels[0]; 629 while (*k) { 630 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 631 return; 632 fprintf(stderr, "\t%s\n", kernel); 633 k++; 634 } 635 fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n", 636 KVM_BINARY_NAME); 637 } 638 639 static u64 host_ram_size(void) 640 { 641 long page_size; 642 long nr_pages; 643 644 nr_pages = sysconf(_SC_PHYS_PAGES); 645 if (nr_pages < 0) { 646 pr_warning("sysconf(_SC_PHYS_PAGES) failed"); 647 return 0; 648 } 649 650 page_size = sysconf(_SC_PAGE_SIZE); 651 if (page_size < 0) { 652 pr_warning("sysconf(_SC_PAGE_SIZE) failed"); 653 return 0; 654 } 655 656 return (nr_pages * page_size) >> MB_SHIFT; 657 } 658 659 /* 660 * If user didn't specify how much memory it wants to allocate for the guest, 661 * avoid filling the whole host RAM. 662 */ 663 #define RAM_SIZE_RATIO 0.8 664 665 static u64 get_ram_size(int nr_cpus) 666 { 667 u64 available; 668 u64 ram_size; 669 670 ram_size = 64 * (nr_cpus + 3); 671 672 available = host_ram_size() * RAM_SIZE_RATIO; 673 if (!available) 674 available = MIN_RAM_SIZE_MB; 675 676 if (ram_size > available) 677 ram_size = available; 678 679 return ram_size; 680 } 681 682 static const char *find_kernel(void) 683 { 684 const char **k; 685 struct stat st; 686 struct utsname uts; 687 688 k = &default_kernels[0]; 689 while (*k) { 690 if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) { 691 k++; 692 continue; 693 } 694 strncpy(kernel, *k, PATH_MAX); 695 return kernel; 696 } 697 698 if (uname(&uts) < 0) 699 return NULL; 700 701 k = &host_kernels[0]; 702 while (*k) { 703 if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0) 704 return NULL; 705 706 if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) { 707 k++; 708 continue; 709 } 710 return kernel; 711 712 } 713 return NULL; 714 } 715 716 static const char *find_vmlinux(void) 717 { 718 const char **vmlinux; 719 720 vmlinux = &default_vmlinux[0]; 721 while (*vmlinux) { 722 struct stat st; 723 724 if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) { 725 vmlinux++; 726 continue; 727 } 728 return *vmlinux; 729 } 730 return NULL; 731 } 732 733 void kvm_run_help(void) 734 { 735 BUILD_OPTIONS(options, &kvm->cfg, kvm); 736 usage_with_options(run_usage, options); 737 } 738 739 static int kvm_setup_guest_init(void) 740 { 741 const char *rootfs = kvm->cfg.custom_rootfs_name; 742 char tmp[PATH_MAX]; 743 size_t size; 744 int fd, ret; 745 char *data; 746 747 /* Setup /virt/init */ 748 size = (size_t)&_binary_guest_init_size; 749 data = (char *)&_binary_guest_init_start; 750 snprintf(tmp, PATH_MAX, "%s%s/virt/init", kvm__get_dir(), rootfs); 751 remove(tmp); 752 fd = open(tmp, O_CREAT | O_WRONLY, 0755); 753 if (fd < 0) 754 die("Fail to setup %s", tmp); 755 ret = xwrite(fd, data, size); 756 if (ret < 0) 757 die("Fail to setup %s", tmp); 758 close(fd); 759 760 return 0; 761 } 762 763 static int kvm_run_set_sandbox(void) 764 { 765 const char *guestfs_name = kvm->cfg.custom_rootfs_name; 766 char path[PATH_MAX], script[PATH_MAX], *tmp; 767 768 snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name); 769 770 remove(path); 771 772 if (kvm->cfg.sandbox == NULL) 773 return 0; 774 775 tmp = realpath(kvm->cfg.sandbox, NULL); 776 if (tmp == NULL) 777 return -ENOMEM; 778 779 snprintf(script, PATH_MAX, "/host/%s", tmp); 780 free(tmp); 781 782 return symlink(script, path); 783 } 784 785 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg) 786 { 787 const char *single_quote; 788 789 if (!*arg) { /* zero length string */ 790 if (write(fd, "''", 2) <= 0) 791 die("Failed writing sandbox script"); 792 return; 793 } 794 795 while (*arg) { 796 single_quote = strchrnul(arg, '\''); 797 798 /* write non-single-quote string as #('string') */ 799 if (arg != single_quote) { 800 if (write(fd, "'", 1) <= 0 || 801 write(fd, arg, single_quote - arg) <= 0 || 802 write(fd, "'", 1) <= 0) 803 die("Failed writing sandbox script"); 804 } 805 806 /* write single quote as #("'") */ 807 if (*single_quote) { 808 if (write(fd, "\"'\"", 3) <= 0) 809 die("Failed writing sandbox script"); 810 } else 811 break; 812 813 arg = single_quote + 1; 814 } 815 } 816 817 static void resolve_program(const char *src, char *dst, size_t len) 818 { 819 struct stat st; 820 int err; 821 822 err = stat(src, &st); 823 824 if (!err && S_ISREG(st.st_mode)) { 825 char resolved_path[PATH_MAX]; 826 827 if (!realpath(src, resolved_path)) 828 die("Unable to resolve program %s: %s\n", src, strerror(errno)); 829 830 snprintf(dst, len, "/host%s", resolved_path); 831 } else 832 strncpy(dst, src, len); 833 } 834 835 static void kvm_run_write_sandbox_cmd(const char **argv, int argc) 836 { 837 const char script_hdr[] = "#! /bin/bash\n\n"; 838 char program[PATH_MAX]; 839 int fd; 840 841 remove(kvm->cfg.sandbox); 842 843 fd = open(kvm->cfg.sandbox, O_RDWR | O_CREAT, 0777); 844 if (fd < 0) 845 die("Failed creating sandbox script"); 846 847 if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0) 848 die("Failed writing sandbox script"); 849 850 resolve_program(argv[0], program, PATH_MAX); 851 kvm_write_sandbox_cmd_exactly(fd, program); 852 853 argv++; 854 argc--; 855 856 while (argc) { 857 if (write(fd, " ", 1) <= 0) 858 die("Failed writing sandbox script"); 859 860 kvm_write_sandbox_cmd_exactly(fd, argv[0]); 861 argv++; 862 argc--; 863 } 864 if (write(fd, "\n", 1) <= 0) 865 die("Failed writing sandbox script"); 866 867 close(fd); 868 } 869 870 static int kvm_cmd_run_init(int argc, const char **argv) 871 { 872 static char real_cmdline[2048], default_name[20]; 873 struct framebuffer *fb = NULL; 874 unsigned int nr_online_cpus; 875 int i, r; 876 877 kvm = kvm__new(); 878 if (IS_ERR(kvm)) 879 return PTR_ERR(kvm); 880 881 signal(SIGALRM, handle_sigalrm); 882 kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); 883 signal(SIGUSR1, handle_sigusr1); 884 kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); 885 kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); 886 kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); 887 kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); 888 889 nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN); 890 kvm->cfg.custom_rootfs_name = "default"; 891 892 while (argc != 0) { 893 BUILD_OPTIONS(options, &kvm->cfg, kvm); 894 argc = parse_options(argc, argv, options, run_usage, 895 PARSE_OPT_STOP_AT_NON_OPTION | 896 PARSE_OPT_KEEP_DASHDASH); 897 if (argc != 0) { 898 /* Cusrom options, should have been handled elsewhere */ 899 if (strcmp(argv[0], "--") == 0) { 900 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 901 kvm->cfg.sandbox = DEFAULT_SANDBOX_FILENAME; 902 kvm_run_write_sandbox_cmd(argv+1, argc-1); 903 break; 904 } 905 } 906 907 if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kvm->cfg.kernel_filename) || 908 (kvm_run_wrapper == KVM_RUN_SANDBOX && kvm->cfg.sandbox)) { 909 fprintf(stderr, "Cannot handle parameter: " 910 "%s\n", argv[0]); 911 usage_with_options(run_usage, options); 912 free(kvm); 913 return -EINVAL; 914 } 915 if (kvm_run_wrapper == KVM_RUN_SANDBOX) { 916 /* 917 * first unhandled parameter is treated as 918 * sandbox command 919 */ 920 kvm->cfg.sandbox = DEFAULT_SANDBOX_FILENAME; 921 kvm_run_write_sandbox_cmd(argv, argc); 922 } else { 923 /* 924 * first unhandled parameter is treated as a kernel 925 * image 926 */ 927 kvm->cfg.kernel_filename = argv[0]; 928 } 929 argv++; 930 argc--; 931 } 932 933 } 934 935 kvm->nr_disks = kvm->cfg.image_count; 936 937 if (!kvm->cfg.kernel_filename) 938 kvm->cfg.kernel_filename = find_kernel(); 939 940 if (!kvm->cfg.kernel_filename) { 941 kernel_usage_with_options(); 942 return -EINVAL; 943 } 944 945 kvm->cfg.vmlinux_filename = find_vmlinux(); 946 947 if (kvm->cfg.nrcpus == 0) 948 kvm->cfg.nrcpus = nr_online_cpus; 949 950 if (!kvm->cfg.ram_size) 951 kvm->cfg.ram_size = get_ram_size(kvm->cfg.nrcpus); 952 953 if (kvm->cfg.ram_size < MIN_RAM_SIZE_MB) 954 die("Not enough memory specified: %lluMB (min %lluMB)", kvm->cfg.ram_size, MIN_RAM_SIZE_MB); 955 956 if (kvm->cfg.ram_size > host_ram_size()) 957 pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", kvm->cfg.ram_size, host_ram_size()); 958 959 kvm->cfg.ram_size <<= MB_SHIFT; 960 961 if (!kvm->cfg.dev) 962 kvm->cfg.dev = DEFAULT_KVM_DEV; 963 964 if (!kvm->cfg.console) 965 kvm->cfg.console = DEFAULT_CONSOLE; 966 967 if (!strncmp(kvm->cfg.console, "virtio", 6)) 968 kvm->cfg.active_console = CONSOLE_VIRTIO; 969 else if (!strncmp(kvm->cfg.console, "serial", 6)) 970 kvm->cfg.active_console = CONSOLE_8250; 971 else if (!strncmp(kvm->cfg.console, "hv", 2)) 972 kvm->cfg.active_console = CONSOLE_HV; 973 else 974 pr_warning("No console!"); 975 976 if (!kvm->cfg.host_ip) 977 kvm->cfg.host_ip = DEFAULT_HOST_ADDR; 978 979 if (!kvm->cfg.guest_ip) 980 kvm->cfg.guest_ip = DEFAULT_GUEST_ADDR; 981 982 if (!kvm->cfg.guest_mac) 983 kvm->cfg.guest_mac = DEFAULT_GUEST_MAC; 984 985 if (!kvm->cfg.host_mac) 986 kvm->cfg.host_mac = DEFAULT_HOST_MAC; 987 988 if (!kvm->cfg.script) 989 kvm->cfg.script = DEFAULT_SCRIPT; 990 991 term_init(); 992 993 if (!kvm->cfg.guest_name) { 994 if (kvm->cfg.custom_rootfs) { 995 kvm->cfg.guest_name = kvm->cfg.custom_rootfs_name; 996 } else { 997 sprintf(default_name, "guest-%u", getpid()); 998 kvm->cfg.guest_name = default_name; 999 } 1000 } 1001 1002 r = kvm__init(kvm); 1003 if (r) 1004 goto fail; 1005 1006 r = ioeventfd__init(kvm); 1007 if (r < 0) { 1008 pr_err("ioeventfd__init() failed with error %d\n", r); 1009 goto fail; 1010 } 1011 1012 r = kvm_cpu__init(kvm); 1013 if (r < 0) { 1014 pr_err("kvm_cpu__init() failed with error %d\n", r); 1015 goto fail; 1016 } 1017 1018 r = irq__init(kvm); 1019 if (r < 0) { 1020 pr_err("irq__init() failed with error %d\n", r); 1021 goto fail; 1022 } 1023 1024 r = pci__init(kvm); 1025 if (r < 0) { 1026 pr_err("pci__init() failed with error %d\n", r); 1027 goto fail; 1028 } 1029 1030 r = ioport__init(kvm); 1031 if (r < 0) { 1032 pr_err("ioport__init() failed with error %d\n", r); 1033 goto fail; 1034 } 1035 1036 /* 1037 * vidmode should be either specified 1038 * either set by default 1039 */ 1040 if (kvm->cfg.vnc || kvm->cfg.sdl) { 1041 if (vidmode == -1) 1042 vidmode = 0x312; 1043 } else { 1044 vidmode = 0; 1045 } 1046 1047 memset(real_cmdline, 0, sizeof(real_cmdline)); 1048 kvm__arch_set_cmdline(real_cmdline, kvm->cfg.vnc || kvm->cfg.sdl); 1049 1050 if (strlen(real_cmdline) > 0) 1051 strcat(real_cmdline, " "); 1052 1053 if (kvm->cfg.kernel_cmdline) 1054 strlcat(real_cmdline, kvm->cfg.kernel_cmdline, sizeof(real_cmdline)); 1055 1056 if (!kvm->cfg.using_rootfs && !kvm->cfg.disk_image[0].filename && !kvm->cfg.initrd_filename) { 1057 char tmp[PATH_MAX]; 1058 1059 kvm_setup_create_new(kvm->cfg.custom_rootfs_name); 1060 kvm_setup_resolv(kvm->cfg.custom_rootfs_name); 1061 1062 snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default"); 1063 if (virtio_9p__register(kvm, tmp, "/dev/root") < 0) 1064 die("Unable to initialize virtio 9p"); 1065 if (virtio_9p__register(kvm, "/", "hostfs") < 0) 1066 die("Unable to initialize virtio 9p"); 1067 kvm->cfg.using_rootfs = kvm->cfg.custom_rootfs = 1; 1068 } 1069 1070 if (kvm->cfg.using_rootfs) { 1071 strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p"); 1072 if (kvm->cfg.custom_rootfs) { 1073 kvm_run_set_sandbox(); 1074 1075 strcat(real_cmdline, " init=/virt/init"); 1076 1077 if (!kvm->cfg.no_dhcp) 1078 strcat(real_cmdline, " ip=dhcp"); 1079 if (kvm_setup_guest_init()) 1080 die("Failed to setup init for guest."); 1081 } 1082 } else if (!strstr(real_cmdline, "root=")) { 1083 strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline)); 1084 } 1085 1086 r = disk_image__init(kvm); 1087 if (r < 0) { 1088 pr_err("disk_image__init() failed with error %d\n", r); 1089 goto fail; 1090 } 1091 1092 printf(" # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME, 1093 kvm->cfg.kernel_filename, kvm->cfg.ram_size / 1024 / 1024, kvm->cfg.nrcpus, kvm->cfg.guest_name); 1094 1095 if (!kvm->cfg.firmware_filename) { 1096 if (!kvm__load_kernel(kvm, kvm->cfg.kernel_filename, 1097 kvm->cfg.initrd_filename, real_cmdline, vidmode)) 1098 die("unable to load kernel %s", kvm->cfg.kernel_filename); 1099 1100 kvm->vmlinux = kvm->cfg.vmlinux_filename; 1101 r = symbol_init(kvm); 1102 if (r < 0) 1103 pr_debug("symbol_init() failed with error %d\n", r); 1104 } 1105 1106 ioport__setup_arch(); 1107 1108 r = rtc__init(kvm); 1109 if (r < 0) { 1110 pr_err("rtc__init() failed with error %d\n", r); 1111 goto fail; 1112 } 1113 1114 r = serial8250__init(kvm); 1115 if (r < 0) { 1116 pr_err("serial__init() failed with error %d\n", r); 1117 goto fail; 1118 } 1119 1120 r = virtio_blk__init(kvm); 1121 if (r < 0) { 1122 pr_err("virtio_blk__init() failed with error %d\n", r); 1123 goto fail; 1124 } 1125 1126 r = virtio_scsi_init(kvm); 1127 if (r < 0) { 1128 pr_err("virtio_scsi_init() failed with error %d\n", r); 1129 goto fail; 1130 } 1131 1132 1133 if (kvm->cfg.active_console == CONSOLE_VIRTIO) 1134 virtio_console__init(kvm); 1135 1136 if (kvm->cfg.virtio_rng) 1137 virtio_rng__init(kvm); 1138 1139 if (kvm->cfg.balloon) 1140 virtio_bln__init(kvm); 1141 1142 if (!kvm->cfg.network) 1143 kvm->cfg.network = DEFAULT_NETWORK; 1144 1145 virtio_9p__init(kvm); 1146 1147 for (i = 0; i < kvm->cfg.num_net_devices; i++) { 1148 kvm->cfg.net_params[i].kvm = kvm; 1149 virtio_net__init(&kvm->cfg.net_params[i]); 1150 } 1151 1152 if (kvm->cfg.num_net_devices == 0 && kvm->cfg.no_net == 0) { 1153 struct virtio_net_params net_params; 1154 1155 net_params = (struct virtio_net_params) { 1156 .guest_ip = kvm->cfg.guest_ip, 1157 .host_ip = kvm->cfg.host_ip, 1158 .kvm = kvm, 1159 .script = kvm->cfg.script, 1160 .mode = NET_MODE_USER, 1161 }; 1162 str_to_mac(kvm->cfg.guest_mac, net_params.guest_mac); 1163 str_to_mac(kvm->cfg.host_mac, net_params.host_mac); 1164 1165 virtio_net__init(&net_params); 1166 } 1167 1168 kvm__init_ram(kvm); 1169 1170 #ifdef CONFIG_X86 1171 kbd__init(kvm); 1172 #endif 1173 1174 pci_shmem__init(kvm); 1175 1176 if (kvm->cfg.vnc || kvm->cfg.sdl) { 1177 fb = vesa__init(kvm); 1178 if (IS_ERR(fb)) { 1179 pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb)); 1180 goto fail; 1181 } 1182 } 1183 1184 if (kvm->cfg.vnc && fb) { 1185 r = vnc__init(fb); 1186 if (r < 0) { 1187 pr_err("vnc__init() failed with error %d\n", r); 1188 goto fail; 1189 } 1190 } 1191 1192 if (kvm->cfg.sdl && fb) { 1193 sdl__init(fb); 1194 if (r < 0) { 1195 pr_err("sdl__init() failed with error %d\n", r); 1196 goto fail; 1197 } 1198 } 1199 1200 r = fb__init(kvm); 1201 if (r < 0) { 1202 pr_err("fb__init() failed with error %d\n", r); 1203 goto fail; 1204 } 1205 1206 /* 1207 * Device init all done; firmware init must 1208 * come after this (it may set up device trees etc.) 1209 */ 1210 1211 kvm__start_timer(kvm); 1212 1213 if (kvm->cfg.firmware_filename) { 1214 if (!kvm__load_firmware(kvm, kvm->cfg.firmware_filename)) 1215 die("unable to load firmware image %s: %s", kvm->cfg.firmware_filename, strerror(errno)); 1216 } else { 1217 kvm__arch_setup_firmware(kvm); 1218 if (r < 0) { 1219 pr_err("kvm__arch_setup_firmware() failed with error %d\n", r); 1220 goto fail; 1221 } 1222 } 1223 1224 thread_pool__init(nr_online_cpus); 1225 fail: 1226 return r; 1227 } 1228 1229 static int kvm_cmd_run_work(void) 1230 { 1231 int i; 1232 void *ret = NULL; 1233 1234 for (i = 0; i < kvm->nrcpus; i++) { 1235 if (pthread_create(&kvm->cpus[i]->thread, NULL, kvm_cpu_thread, kvm->cpus[i]) != 0) 1236 die("unable to create KVM VCPU thread"); 1237 } 1238 1239 /* Only VCPU #0 is going to exit by itself when shutting down */ 1240 return pthread_join(kvm->cpus[0]->thread, &ret); 1241 } 1242 1243 static void kvm_cmd_run_exit(int guest_ret) 1244 { 1245 int r = 0; 1246 1247 compat__print_all_messages(); 1248 1249 r = kvm_cpu__exit(kvm); 1250 if (r < 0) 1251 pr_warning("kvm_cpu__exit() failed with error %d\n", r); 1252 1253 r = symbol_exit(kvm); 1254 if (r < 0) 1255 pr_warning("symbol_exit() failed with error %d\n", r); 1256 1257 r = irq__exit(kvm); 1258 if (r < 0) 1259 pr_warning("irq__exit() failed with error %d\n", r); 1260 1261 r = fb__exit(kvm); 1262 if (r < 0) 1263 pr_warning("fb__exit() failed with error %d\n", r); 1264 1265 r = virtio_scsi_exit(kvm); 1266 if (r < 0) 1267 pr_warning("virtio_scsi_exit() failed with error %d\n", r); 1268 1269 r = virtio_blk__exit(kvm); 1270 if (r < 0) 1271 pr_warning("virtio_blk__exit() failed with error %d\n", r); 1272 1273 r = virtio_rng__exit(kvm); 1274 if (r < 0) 1275 pr_warning("virtio_rng__exit() failed with error %d\n", r); 1276 1277 r = disk_image__exit(kvm); 1278 if (r < 0) 1279 pr_warning("disk_image__exit() failed with error %d\n", r); 1280 1281 r = serial8250__exit(kvm); 1282 if (r < 0) 1283 pr_warning("serial8250__exit() failed with error %d\n", r); 1284 1285 r = rtc__exit(kvm); 1286 if (r < 0) 1287 pr_warning("rtc__exit() failed with error %d\n", r); 1288 1289 r = kvm__arch_free_firmware(kvm); 1290 if (r < 0) 1291 pr_warning("kvm__arch_free_firmware() failed with error %d\n", r); 1292 1293 r = ioport__exit(kvm); 1294 if (r < 0) 1295 pr_warning("ioport__exit() failed with error %d\n", r); 1296 1297 r = ioeventfd__exit(kvm); 1298 if (r < 0) 1299 pr_warning("ioeventfd__exit() failed with error %d\n", r); 1300 1301 r = pci__exit(kvm); 1302 if (r < 0) 1303 pr_warning("pci__exit() failed with error %d\n", r); 1304 1305 r = kvm__exit(kvm); 1306 if (r < 0) 1307 pr_warning("pci__exit() failed with error %d\n", r); 1308 1309 if (guest_ret == 0) 1310 printf("\n # KVM session ended normally.\n"); 1311 } 1312 1313 int kvm_cmd_run(int argc, const char **argv, const char *prefix) 1314 { 1315 int r, ret = -EFAULT; 1316 1317 r = kvm_cmd_run_init(argc, argv); 1318 if (r < 0) 1319 return r; 1320 1321 ret = kvm_cmd_run_work(); 1322 kvm_cmd_run_exit(ret); 1323 1324 return ret; 1325 } 1326