xref: /kvmtool/builtin-run.c (revision d50fe489e8d5008250c07efaa44c4c428a996f55)
1 #include "kvm/builtin-run.h"
2 
3 #include "kvm/builtin-setup.h"
4 #include "kvm/virtio-balloon.h"
5 #include "kvm/virtio-console.h"
6 #include "kvm/parse-options.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/framebuffer.h"
9 #include "kvm/disk-image.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/virtio-blk.h"
12 #include "kvm/virtio-net.h"
13 #include "kvm/virtio-rng.h"
14 #include "kvm/ioeventfd.h"
15 #include "kvm/virtio-9p.h"
16 #include "kvm/barrier.h"
17 #include "kvm/kvm-cpu.h"
18 #include "kvm/ioport.h"
19 #include "kvm/symbol.h"
20 #include "kvm/i8042.h"
21 #include "kvm/mutex.h"
22 #include "kvm/term.h"
23 #include "kvm/util.h"
24 #include "kvm/vesa.h"
25 #include "kvm/irq.h"
26 #include "kvm/kvm.h"
27 #include "kvm/pci.h"
28 #include "kvm/rtc.h"
29 #include "kvm/sdl.h"
30 #include "kvm/vnc.h"
31 #include "kvm/guest_compat.h"
32 #include "kvm/pci-shmem.h"
33 #include "kvm/kvm-ipc.h"
34 
35 #include <linux/types.h>
36 
37 #include <sys/utsname.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <termios.h>
41 #include <signal.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45 #include <ctype.h>
46 #include <stdio.h>
47 
48 #define DEFAULT_KVM_DEV		"/dev/kvm"
49 #define DEFAULT_CONSOLE		"serial"
50 #define DEFAULT_NETWORK		"user"
51 #define DEFAULT_HOST_ADDR	"192.168.33.1"
52 #define DEFAULT_GUEST_ADDR	"192.168.33.15"
53 #define DEFAULT_GUEST_MAC	"02:15:15:15:15:15"
54 #define DEFAULT_HOST_MAC	"02:01:01:01:01:01"
55 #define DEFAULT_SCRIPT		"none"
56 
57 #define MB_SHIFT		(20)
58 #define KB_SHIFT		(10)
59 #define GB_SHIFT		(30)
60 #define MIN_RAM_SIZE_MB		(64ULL)
61 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
62 
63 struct kvm *kvm;
64 struct kvm_cpu *kvm_cpus[KVM_NR_CPUS];
65 __thread struct kvm_cpu *current_kvm_cpu;
66 
67 static u64 ram_size;
68 static u8  image_count;
69 static u8 num_net_devices;
70 static bool virtio_rng;
71 static const char *kernel_cmdline;
72 static const char *kernel_filename;
73 static const char *vmlinux_filename;
74 static const char *initrd_filename;
75 static const char *image_filename[MAX_DISK_IMAGES];
76 static const char *console;
77 static const char *dev;
78 static const char *network;
79 static const char *host_ip;
80 static const char *guest_ip;
81 static const char *guest_mac;
82 static const char *host_mac;
83 static const char *script;
84 static const char *guest_name;
85 static const char *sandbox;
86 static struct virtio_net_params *net_params;
87 static bool single_step;
88 static bool readonly_image[MAX_DISK_IMAGES];
89 static bool vnc;
90 static bool sdl;
91 static bool balloon;
92 static bool using_rootfs;
93 static bool custom_rootfs;
94 static bool no_net;
95 static bool no_dhcp;
96 extern bool ioport_debug;
97 extern int  active_console;
98 extern int  debug_iodelay;
99 
100 bool do_debug_print = false;
101 
102 static int nrcpus;
103 static int vidmode = -1;
104 
105 static const char * const run_usage[] = {
106 	"kvm run [<options>] [<kernel image>]",
107 	NULL
108 };
109 
110 static int img_name_parser(const struct option *opt, const char *arg, int unset)
111 {
112 	char *sep;
113 	struct stat st;
114 	char path[PATH_MAX];
115 
116 	if (stat(arg, &st) == 0 &&
117 	    S_ISDIR(st.st_mode)) {
118 		char tmp[PATH_MAX];
119 
120 		if (realpath(arg, tmp) == 0 ||
121 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
122 			die("Unable to initialize virtio 9p");
123 		using_rootfs = 1;
124 		return 0;
125 	}
126 
127 	snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg);
128 
129 	if (stat(path, &st) == 0 &&
130 	    S_ISDIR(st.st_mode)) {
131 		char tmp[PATH_MAX];
132 
133 		if (realpath(path, tmp) == 0 ||
134 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
135 			die("Unable to initialize virtio 9p");
136 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
137 			die("Unable to initialize virtio 9p");
138 		kvm_setup_resolv(arg);
139 		using_rootfs = custom_rootfs = 1;
140 		return 0;
141 	}
142 
143 	if (image_count >= MAX_DISK_IMAGES)
144 		die("Currently only 4 images are supported");
145 
146 	image_filename[image_count] = arg;
147 	sep = strstr(arg, ",");
148 	if (sep) {
149 		if (strcmp(sep + 1, "ro") == 0)
150 			readonly_image[image_count] = 1;
151 		*sep = 0;
152 	}
153 
154 	image_count++;
155 
156 	return 0;
157 }
158 
159 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset)
160 {
161 	char *tag_name;
162 	char tmp[PATH_MAX];
163 
164 	/*
165 	 * 9p dir can be of the form dirname,tag_name or
166 	 * just dirname. In the later case we use the
167 	 * default tag name
168 	 */
169 	tag_name = strstr(arg, ",");
170 	if (tag_name) {
171 		*tag_name = '\0';
172 		tag_name++;
173 	}
174 	if (realpath(arg, tmp)) {
175 		if (virtio_9p__register(kvm, tmp, tag_name) < 0)
176 			die("Unable to initialize virtio 9p");
177 	} else
178 		die("Failed resolving 9p path");
179 	return 0;
180 }
181 
182 static int tty_parser(const struct option *opt, const char *arg, int unset)
183 {
184 	int tty = atoi(arg);
185 
186 	term_set_tty(tty);
187 
188 	return 0;
189 }
190 
191 static inline void str_to_mac(const char *str, char *mac)
192 {
193 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
194 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
195 }
196 static int set_net_param(struct virtio_net_params *p, const char *param,
197 				const char *val)
198 {
199 	if (strcmp(param, "guest_mac") == 0) {
200 		str_to_mac(val, p->guest_mac);
201 	} else if (strcmp(param, "mode") == 0) {
202 		if (!strncmp(val, "user", 4)) {
203 			int i;
204 
205 			for (i = 0; i < num_net_devices; i++)
206 				if (net_params[i].mode == NET_MODE_USER)
207 					die("Only one usermode network device allowed at a time");
208 			p->mode = NET_MODE_USER;
209 		} else if (!strncmp(val, "tap", 3)) {
210 			p->mode = NET_MODE_TAP;
211 		} else if (!strncmp(val, "none", 4)) {
212 			no_net = 1;
213 			return -1;
214 		} else
215 			die("Unkown network mode %s, please use user, tap or none", network);
216 	} else if (strcmp(param, "script") == 0) {
217 		p->script = strdup(val);
218 	} else if (strcmp(param, "guest_ip") == 0) {
219 		p->guest_ip = strdup(val);
220 	} else if (strcmp(param, "host_ip") == 0) {
221 		p->host_ip = strdup(val);
222 	} else if (strcmp(param, "vhost") == 0) {
223 		p->vhost = atoi(val);
224 	} else if (strcmp(param, "fd") == 0) {
225 		p->fd = atoi(val);
226 	}
227 
228 	return 0;
229 }
230 
231 static int netdev_parser(const struct option *opt, const char *arg, int unset)
232 {
233 	struct virtio_net_params p;
234 	char *buf = NULL, *cmd = NULL, *cur = NULL;
235 	bool on_cmd = true;
236 
237 	if (arg) {
238 		buf = strdup(arg);
239 		if (buf == NULL)
240 			die("Failed allocating new net buffer");
241 		cur = strtok(buf, ",=");
242 	}
243 
244 	p = (struct virtio_net_params) {
245 		.guest_ip	= DEFAULT_GUEST_ADDR,
246 		.host_ip	= DEFAULT_HOST_ADDR,
247 		.script		= DEFAULT_SCRIPT,
248 		.mode		= NET_MODE_TAP,
249 	};
250 
251 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
252 	p.guest_mac[5] += num_net_devices;
253 
254 	while (cur) {
255 		if (on_cmd) {
256 			cmd = cur;
257 		} else {
258 			if (set_net_param(&p, cmd, cur) < 0)
259 				goto done;
260 		}
261 		on_cmd = !on_cmd;
262 
263 		cur = strtok(NULL, ",=");
264 	};
265 
266 	num_net_devices++;
267 
268 	net_params = realloc(net_params, num_net_devices * sizeof(*net_params));
269 	if (net_params == NULL)
270 		die("Failed adding new network device");
271 
272 	net_params[num_net_devices - 1] = p;
273 
274 done:
275 	free(buf);
276 	return 0;
277 }
278 
279 static int shmem_parser(const struct option *opt, const char *arg, int unset)
280 {
281 	const u64 default_size = SHMEM_DEFAULT_SIZE;
282 	const u64 default_phys_addr = SHMEM_DEFAULT_ADDR;
283 	const char *default_handle = SHMEM_DEFAULT_HANDLE;
284 	struct shmem_info *si = malloc(sizeof(struct shmem_info));
285 	u64 phys_addr;
286 	u64 size;
287 	char *handle = NULL;
288 	int create = 0;
289 	const char *p = arg;
290 	char *next;
291 	int base = 10;
292 	int verbose = 0;
293 
294 	const int skip_pci = strlen("pci:");
295 	if (verbose)
296 		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
297 	/* parse out optional addr family */
298 	if (strcasestr(p, "pci:")) {
299 		p += skip_pci;
300 	} else if (strcasestr(p, "mem:")) {
301 		die("I can't add to E820 map yet.\n");
302 	}
303 	/* parse out physical addr */
304 	base = 10;
305 	if (strcasestr(p, "0x"))
306 		base = 16;
307 	phys_addr = strtoll(p, &next, base);
308 	if (next == p && phys_addr == 0) {
309 		pr_info("shmem: no physical addr specified, using default.");
310 		phys_addr = default_phys_addr;
311 	}
312 	if (*next != ':' && *next != '\0')
313 		die("shmem: unexpected chars after phys addr.\n");
314 	if (*next == '\0')
315 		p = next;
316 	else
317 		p = next + 1;
318 	/* parse out size */
319 	base = 10;
320 	if (strcasestr(p, "0x"))
321 		base = 16;
322 	size = strtoll(p, &next, base);
323 	if (next == p && size == 0) {
324 		pr_info("shmem: no size specified, using default.");
325 		size = default_size;
326 	}
327 	/* look for [KMGkmg][Bb]*  uses base 2. */
328 	int skip_B = 0;
329 	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
330 		if (*(next + 1) == 'B' || *(next + 1) == 'b')
331 			skip_B = 1;
332 		switch (*next) {
333 		case 'K':
334 		case 'k':
335 			size = size << KB_SHIFT;
336 			break;
337 		case 'M':
338 		case 'm':
339 			size = size << MB_SHIFT;
340 			break;
341 		case 'G':
342 		case 'g':
343 			size = size << GB_SHIFT;
344 			break;
345 		default:
346 			die("shmem: bug in detecting size prefix.");
347 			break;
348 		}
349 		next += 1 + skip_B;
350 	}
351 	if (*next != ':' && *next != '\0') {
352 		die("shmem: unexpected chars after phys size. <%c><%c>\n",
353 		    *next, *p);
354 	}
355 	if (*next == '\0')
356 		p = next;
357 	else
358 		p = next + 1;
359 	/* parse out optional shmem handle */
360 	const int skip_handle = strlen("handle=");
361 	next = strcasestr(p, "handle=");
362 	if (*p && next) {
363 		if (p != next)
364 			die("unexpected chars before handle\n");
365 		p += skip_handle;
366 		next = strchrnul(p, ':');
367 		if (next - p) {
368 			handle = malloc(next - p + 1);
369 			strncpy(handle, p, next - p);
370 			handle[next - p] = '\0';	/* just in case. */
371 		}
372 		if (*next == '\0')
373 			p = next;
374 		else
375 			p = next + 1;
376 	}
377 	/* parse optional create flag to see if we should create shm seg. */
378 	if (*p && strcasestr(p, "create")) {
379 		create = 1;
380 		p += strlen("create");
381 	}
382 	if (*p != '\0')
383 		die("shmem: unexpected trailing chars\n");
384 	if (handle == NULL) {
385 		handle = malloc(strlen(default_handle) + 1);
386 		strcpy(handle, default_handle);
387 	}
388 	if (verbose) {
389 		pr_info("shmem: phys_addr = %llx", phys_addr);
390 		pr_info("shmem: size      = %llx", size);
391 		pr_info("shmem: handle    = %s", handle);
392 		pr_info("shmem: create    = %d", create);
393 	}
394 
395 	si->phys_addr = phys_addr;
396 	si->size = size;
397 	si->handle = handle;
398 	si->create = create;
399 	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
400 	return 0;
401 }
402 
403 static const struct option options[] = {
404 	OPT_GROUP("Basic options:"),
405 	OPT_STRING('\0', "name", &guest_name, "guest name",
406 			"A name for the guest"),
407 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),
408 	OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."),
409 	OPT_CALLBACK('\0', "shmem", NULL,
410 		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",
411 		     "Share host shmem with guest via pci device",
412 		     shmem_parser),
413 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser),
414 	OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"),
415 	OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"),
416 	OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"),
417 	OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"),
418 	OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",
419 		     "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser),
420 	OPT_STRING('\0', "console", &console, "serial, virtio or hv",
421 			"Console to use"),
422 	OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"),
423 	OPT_CALLBACK('\0', "tty", NULL, "tty id",
424 		     "Remap guest TTY into a pty on the host",
425 		     tty_parser),
426 	OPT_STRING('\0', "sandbox", &sandbox, "script",
427 			"Run this script when booting into custom rootfs"),
428 
429 	OPT_GROUP("Kernel options:"),
430 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
431 			"Kernel to boot in virtual machine"),
432 	OPT_STRING('i', "initrd", &initrd_filename, "initrd",
433 			"Initial RAM disk image"),
434 	OPT_STRING('p', "params", &kernel_cmdline, "params",
435 			"Kernel command line arguments"),
436 
437 	OPT_GROUP("Networking options:"),
438 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",
439 		     "Create a new guest NIC",
440 		     netdev_parser, NULL),
441 	OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"),
442 
443 	OPT_GROUP("BIOS options:"),
444 	OPT_INTEGER('\0', "vidmode", &vidmode,
445 		    "Video mode"),
446 
447 	OPT_GROUP("Debug options:"),
448 	OPT_BOOLEAN('\0', "debug", &do_debug_print,
449 			"Enable debug messages"),
450 	OPT_BOOLEAN('\0', "debug-single-step", &single_step,
451 			"Enable single stepping"),
452 	OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug,
453 			"Enable ioport debugging"),
454 	OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay,
455 			"Delay IO by millisecond"),
456 	OPT_END()
457 };
458 
459 /*
460  * Serialize debug printout so that the output of multiple vcpus does not
461  * get mixed up:
462  */
463 static int printout_done;
464 
465 static void handle_sigusr1(int sig)
466 {
467 	struct kvm_cpu *cpu = current_kvm_cpu;
468 	int fd = kvm_cpu__get_debug_fd();
469 
470 	if (!cpu)
471 		return;
472 
473 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
474 	kvm_cpu__show_registers(cpu);
475 	kvm_cpu__show_code(cpu);
476 	kvm_cpu__show_page_tables(cpu);
477 	fflush(stdout);
478 	printout_done = 1;
479 	mb();
480 }
481 
482 /* Pause/resume the guest using SIGUSR2 */
483 static int is_paused;
484 
485 static void handle_pause(int fd, u32 type, u32 len, u8 *msg)
486 {
487 	if (type == KVM_IPC_RESUME && is_paused)
488 		kvm__continue();
489 	else if (type == KVM_IPC_PAUSE && !is_paused)
490 		kvm__pause();
491 	else
492 		return;
493 
494 	is_paused = !is_paused;
495 	pr_info("Guest %s\n", is_paused ? "paused" : "resumed");
496 }
497 
498 static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
499 {
500 	int i;
501 
502 	for (i = 0; i < nrcpus; i++) {
503 		struct kvm_cpu *cpu = kvm_cpus[i];
504 
505 		if (!cpu)
506 			continue;
507 
508 		printout_done = 0;
509 
510 		kvm_cpu__set_debug_fd(fd);
511 		pthread_kill(cpu->thread, SIGUSR1);
512 		/*
513 		 * Wait for the vCPU to dump state before signalling
514 		 * the next thread. Since this is debug code it does
515 		 * not matter that we are burning CPU time a bit:
516 		 */
517 		while (!printout_done)
518 			mb();
519 	}
520 
521 	close(fd);
522 
523 	serial8250__inject_sysrq(kvm);
524 }
525 
526 static void handle_sigalrm(int sig)
527 {
528 	kvm__arch_periodic_poll(kvm);
529 }
530 
531 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
532 {
533 	kvm_cpu__reboot();
534 }
535 
536 static void *kvm_cpu_thread(void *arg)
537 {
538 	current_kvm_cpu		= arg;
539 
540 	if (kvm_cpu__start(current_kvm_cpu))
541 		goto panic_kvm;
542 
543 	kvm_cpu__delete(current_kvm_cpu);
544 
545 	return (void *) (intptr_t) 0;
546 
547 panic_kvm:
548 	fprintf(stderr, "KVM exit reason: %u (\"%s\")\n",
549 		current_kvm_cpu->kvm_run->exit_reason,
550 		kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]);
551 	if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
552 		fprintf(stderr, "KVM exit code: 0x%Lu\n",
553 			current_kvm_cpu->kvm_run->hw.hardware_exit_reason);
554 
555 	kvm_cpu__set_debug_fd(STDOUT_FILENO);
556 	kvm_cpu__show_registers(current_kvm_cpu);
557 	kvm_cpu__show_code(current_kvm_cpu);
558 	kvm_cpu__show_page_tables(current_kvm_cpu);
559 
560 	kvm_cpu__delete(current_kvm_cpu);
561 
562 	return (void *) (intptr_t) 1;
563 }
564 
565 static char kernel[PATH_MAX];
566 
567 static const char *host_kernels[] = {
568 	"/boot/vmlinuz",
569 	"/boot/bzImage",
570 	NULL
571 };
572 
573 static const char *default_kernels[] = {
574 	"./bzImage",
575 	"../../arch/" BUILD_ARCH "/boot/bzImage",
576 	NULL
577 };
578 
579 static const char *default_vmlinux[] = {
580 	"../../../vmlinux",
581 	"../../vmlinux",
582 	NULL
583 };
584 
585 static void kernel_usage_with_options(void)
586 {
587 	const char **k;
588 	struct utsname uts;
589 
590 	fprintf(stderr, "Fatal: could not find default kernel image in:\n");
591 	k = &default_kernels[0];
592 	while (*k) {
593 		fprintf(stderr, "\t%s\n", *k);
594 		k++;
595 	}
596 
597 	if (uname(&uts) < 0)
598 		return;
599 
600 	k = &host_kernels[0];
601 	while (*k) {
602 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
603 			return;
604 		fprintf(stderr, "\t%s\n", kernel);
605 		k++;
606 	}
607 	fprintf(stderr, "\nPlease see 'kvm run --help' for more options.\n\n");
608 }
609 
610 static u64 host_ram_size(void)
611 {
612 	long page_size;
613 	long nr_pages;
614 
615 	nr_pages	= sysconf(_SC_PHYS_PAGES);
616 	if (nr_pages < 0) {
617 		pr_warning("sysconf(_SC_PHYS_PAGES) failed");
618 		return 0;
619 	}
620 
621 	page_size	= sysconf(_SC_PAGE_SIZE);
622 	if (page_size < 0) {
623 		pr_warning("sysconf(_SC_PAGE_SIZE) failed");
624 		return 0;
625 	}
626 
627 	return (nr_pages * page_size) >> MB_SHIFT;
628 }
629 
630 /*
631  * If user didn't specify how much memory it wants to allocate for the guest,
632  * avoid filling the whole host RAM.
633  */
634 #define RAM_SIZE_RATIO		0.8
635 
636 static u64 get_ram_size(int nr_cpus)
637 {
638 	u64 available;
639 	u64 ram_size;
640 
641 	ram_size	= 64 * (nr_cpus + 3);
642 
643 	available	= host_ram_size() * RAM_SIZE_RATIO;
644 	if (!available)
645 		available = MIN_RAM_SIZE_MB;
646 
647 	if (ram_size > available)
648 		ram_size	= available;
649 
650 	return ram_size;
651 }
652 
653 static const char *find_kernel(void)
654 {
655 	const char **k;
656 	struct stat st;
657 	struct utsname uts;
658 
659 	k = &default_kernels[0];
660 	while (*k) {
661 		if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) {
662 			k++;
663 			continue;
664 		}
665 		strncpy(kernel, *k, PATH_MAX);
666 		return kernel;
667 	}
668 
669 	if (uname(&uts) < 0)
670 		return NULL;
671 
672 	k = &host_kernels[0];
673 	while (*k) {
674 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
675 			return NULL;
676 
677 		if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) {
678 			k++;
679 			continue;
680 		}
681 		return kernel;
682 
683 	}
684 	return NULL;
685 }
686 
687 static const char *find_vmlinux(void)
688 {
689 	const char **vmlinux;
690 
691 	vmlinux = &default_vmlinux[0];
692 	while (*vmlinux) {
693 		struct stat st;
694 
695 		if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) {
696 			vmlinux++;
697 			continue;
698 		}
699 		return *vmlinux;
700 	}
701 	return NULL;
702 }
703 
704 void kvm_run_help(void)
705 {
706 	usage_with_options(run_usage, options);
707 }
708 
709 static int kvm_custom_stage2(void)
710 {
711 	char tmp[PATH_MAX], dst[PATH_MAX], *src;
712 	const char *rootfs;
713 	int r;
714 
715 	src = realpath("guest/init_stage2", NULL);
716 	if (src == NULL)
717 		return -ENOMEM;
718 
719 	if (image_filename[0] == NULL)
720 		rootfs = "default";
721 	else
722 		rootfs = image_filename[0];
723 
724 	snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs);
725 	remove(tmp);
726 
727 	snprintf(dst, PATH_MAX, "/host/%s", src);
728 	r = symlink(dst, tmp);
729 	free(src);
730 
731 	return r;
732 }
733 
734 static int kvm_run_set_sandbox(void)
735 {
736 	const char *guestfs_name = "default";
737 	char path[PATH_MAX], script[PATH_MAX], *tmp;
738 
739 	if (image_filename[0])
740 		guestfs_name = image_filename[0];
741 
742 	snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name);
743 
744 	remove(path);
745 
746 	if (sandbox == NULL)
747 		return 0;
748 
749 	tmp = realpath(sandbox, NULL);
750 	if (tmp == NULL)
751 		return -ENOMEM;
752 
753 	snprintf(script, PATH_MAX, "/host/%s", tmp);
754 	free(tmp);
755 
756 	return symlink(script, path);
757 }
758 
759 int kvm_cmd_run(int argc, const char **argv, const char *prefix)
760 {
761 	static char real_cmdline[2048], default_name[20];
762 	struct framebuffer *fb = NULL;
763 	unsigned int nr_online_cpus;
764 	int exit_code = 0;
765 	int max_cpus, recommended_cpus;
766 	int i;
767 	void *ret;
768 
769 	signal(SIGALRM, handle_sigalrm);
770 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
771 	signal(SIGUSR1, handle_sigusr1);
772 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
773 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
774 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
775 
776 	nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
777 
778 	while (argc != 0) {
779 		argc = parse_options(argc, argv, options, run_usage,
780 				PARSE_OPT_STOP_AT_NON_OPTION);
781 		if (argc != 0) {
782 			if (kernel_filename) {
783 				fprintf(stderr, "Cannot handle parameter: "
784 						"%s\n", argv[0]);
785 				usage_with_options(run_usage, options);
786 				return EINVAL;
787 			}
788 			/* first unhandled parameter is treated as a kernel
789 			   image
790 			 */
791 			kernel_filename = argv[0];
792 			argv++;
793 			argc--;
794 		}
795 
796 	}
797 
798 	if (!kernel_filename)
799 		kernel_filename = find_kernel();
800 
801 	if (!kernel_filename) {
802 		kernel_usage_with_options();
803 		return EINVAL;
804 	}
805 
806 	vmlinux_filename = find_vmlinux();
807 
808 	if (nrcpus == 0)
809 		nrcpus = nr_online_cpus;
810 	else if (nrcpus < 1 || nrcpus > KVM_NR_CPUS)
811 		die("Number of CPUs %d is out of [1;%d] range", nrcpus, KVM_NR_CPUS);
812 
813 	if (!ram_size)
814 		ram_size	= get_ram_size(nrcpus);
815 
816 	if (ram_size < MIN_RAM_SIZE_MB)
817 		die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB);
818 
819 	if (ram_size > host_ram_size())
820 		pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size());
821 
822 	ram_size <<= MB_SHIFT;
823 
824 	if (!dev)
825 		dev = DEFAULT_KVM_DEV;
826 
827 	if (!console)
828 		console = DEFAULT_CONSOLE;
829 
830 	if (!strncmp(console, "virtio", 6))
831 		active_console  = CONSOLE_VIRTIO;
832 	else if (!strncmp(console, "serial", 6))
833 		active_console  = CONSOLE_8250;
834 	else if (!strncmp(console, "hv", 2))
835 		active_console = CONSOLE_HV;
836 	else
837 		pr_warning("No console!");
838 
839 	if (!host_ip)
840 		host_ip = DEFAULT_HOST_ADDR;
841 
842 	if (!guest_ip)
843 		guest_ip = DEFAULT_GUEST_ADDR;
844 
845 	if (!guest_mac)
846 		guest_mac = DEFAULT_GUEST_MAC;
847 
848 	if (!host_mac)
849 		host_mac = DEFAULT_HOST_MAC;
850 
851 	if (!script)
852 		script = DEFAULT_SCRIPT;
853 
854 	symbol__init(vmlinux_filename);
855 
856 	term_init();
857 
858 	if (!guest_name) {
859 		sprintf(default_name, "guest-%u", getpid());
860 		guest_name = default_name;
861 	}
862 
863 	kvm = kvm__init(dev, ram_size, guest_name);
864 
865 	kvm->single_step = single_step;
866 
867 	ioeventfd__init();
868 
869 	max_cpus = kvm__max_cpus(kvm);
870 	recommended_cpus = kvm__recommended_cpus(kvm);
871 
872 	if (nrcpus > max_cpus) {
873 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
874 		nrcpus = max_cpus;
875 	} else if (nrcpus > recommended_cpus) {
876 		printf("  # Warning: The maximum recommended amount of VCPUs"
877 			" is %d\n", recommended_cpus);
878 	}
879 
880 	kvm->nrcpus = nrcpus;
881 
882 	irq__init(kvm);
883 
884 	pci__init();
885 
886 	/*
887 	 * vidmode should be either specified
888 	 * either set by default
889 	 */
890 	if (vnc || sdl) {
891 		if (vidmode == -1)
892 			vidmode = 0x312;
893 	} else
894 		vidmode = 0;
895 
896 	memset(real_cmdline, 0, sizeof(real_cmdline));
897 	kvm__arch_set_cmdline(real_cmdline, vnc || sdl);
898 
899 	if (strlen(real_cmdline) > 0)
900 		strcat(real_cmdline, " ");
901 
902 	if (kernel_cmdline)
903 		strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline));
904 
905 	if (!using_rootfs && !image_filename[0]) {
906 		char tmp[PATH_MAX];
907 
908 		kvm_setup_create_new("default");
909 		kvm_setup_resolv("default");
910 
911 		snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default");
912 		if (virtio_9p__register(kvm, tmp, "/dev/root") < 0)
913 			die("Unable to initialize virtio 9p");
914 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
915 			die("Unable to initialize virtio 9p");
916 		using_rootfs = custom_rootfs = 1;
917 	}
918 
919 	if (using_rootfs) {
920 		strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p");
921 		if (custom_rootfs) {
922 			kvm_run_set_sandbox();
923 
924 			strcat(real_cmdline, " init=/virt/init");
925 
926 			if (!no_dhcp)
927 				strcat(real_cmdline, "  ip=dhcp");
928 			if (kvm_custom_stage2())
929 				die("Failed linking stage 2 of init.");
930 		}
931 	} else if (!strstr(real_cmdline, "root=")) {
932 		strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline));
933 	}
934 
935 	if (image_count) {
936 		kvm->nr_disks = image_count;
937 		kvm->disks    = disk_image__open_all(image_filename, readonly_image, image_count);
938 		if (!kvm->disks)
939 			die("Unable to load all disk images.");
940 
941 		virtio_blk__init_all(kvm);
942 	}
943 
944 	printf("  # kvm run -k %s -m %Lu -c %d --name %s\n", kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name);
945 
946 	if (!kvm__load_kernel(kvm, kernel_filename, initrd_filename,
947 				real_cmdline, vidmode))
948 		die("unable to load kernel %s", kernel_filename);
949 
950 	kvm->vmlinux		= vmlinux_filename;
951 
952 	ioport__setup_arch();
953 
954 	rtc__init();
955 
956 	serial8250__init(kvm);
957 
958 	if (active_console == CONSOLE_VIRTIO)
959 		virtio_console__init(kvm);
960 
961 	if (virtio_rng)
962 		virtio_rng__init(kvm);
963 
964 	if (balloon)
965 		virtio_bln__init(kvm);
966 
967 	if (!network)
968 		network = DEFAULT_NETWORK;
969 
970 	virtio_9p__init(kvm);
971 
972 	for (i = 0; i < num_net_devices; i++) {
973 		net_params[i].kvm = kvm;
974 		virtio_net__init(&net_params[i]);
975 	}
976 
977 	if (num_net_devices == 0 && no_net == 0) {
978 		struct virtio_net_params net_params;
979 
980 		net_params = (struct virtio_net_params) {
981 			.guest_ip	= guest_ip,
982 			.host_ip	= host_ip,
983 			.kvm		= kvm,
984 			.script		= script,
985 			.mode		= NET_MODE_USER,
986 		};
987 		str_to_mac(guest_mac, net_params.guest_mac);
988 		str_to_mac(host_mac, net_params.host_mac);
989 
990 		virtio_net__init(&net_params);
991 	}
992 
993 	kvm__init_ram(kvm);
994 
995 #ifdef CONFIG_X86
996 	kbd__init(kvm);
997 #endif
998 
999 	pci_shmem__init(kvm);
1000 
1001 	if (vnc || sdl)
1002 		fb = vesa__init(kvm);
1003 
1004 	if (vnc) {
1005 		if (fb)
1006 			vnc__init(fb);
1007 	}
1008 
1009 	if (sdl) {
1010 		if (fb)
1011 			sdl__init(fb);
1012 	}
1013 
1014 	fb__start();
1015 
1016 	/* Device init all done; firmware init must
1017 	 * come after this (it may set up device trees etc.)
1018 	 */
1019 
1020 	kvm__start_timer(kvm);
1021 
1022 	kvm__arch_setup_firmware(kvm);
1023 
1024 	for (i = 0; i < nrcpus; i++) {
1025 		kvm_cpus[i] = kvm_cpu__init(kvm, i);
1026 		if (!kvm_cpus[i])
1027 			die("unable to initialize KVM VCPU");
1028 	}
1029 
1030 	thread_pool__init(nr_online_cpus);
1031 	ioeventfd__start();
1032 
1033 	for (i = 0; i < nrcpus; i++) {
1034 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)
1035 			die("unable to create KVM VCPU thread");
1036 	}
1037 
1038 	/* Only VCPU #0 is going to exit by itself when shutting down */
1039 	if (pthread_join(kvm_cpus[0]->thread, &ret) != 0)
1040 		exit_code = 1;
1041 
1042 	for (i = 1; i < nrcpus; i++) {
1043 		if (kvm_cpus[i]->is_running) {
1044 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
1045 			if (pthread_join(kvm_cpus[i]->thread, &ret) != 0)
1046 				die("pthread_join");
1047 		}
1048 		if (ret != NULL)
1049 			exit_code = 1;
1050 	}
1051 
1052 	compat__print_all_messages();
1053 
1054 	fb__stop();
1055 
1056 	virtio_blk__delete_all(kvm);
1057 	virtio_rng__delete_all(kvm);
1058 
1059 	disk_image__close_all(kvm->disks, image_count);
1060 	kvm__delete(kvm);
1061 
1062 	if (!exit_code)
1063 		printf("\n  # KVM session ended normally.\n");
1064 
1065 	return exit_code;
1066 }
1067