xref: /kvmtool/builtin-run.c (revision 3c29e2aabd59b4ed6eb9a4495224dd8eede138de)
1 #include "kvm/builtin-run.h"
2 
3 #include "kvm/builtin-setup.h"
4 #include "kvm/virtio-balloon.h"
5 #include "kvm/virtio-console.h"
6 #include "kvm/parse-options.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/framebuffer.h"
9 #include "kvm/disk-image.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/virtio-blk.h"
12 #include "kvm/virtio-net.h"
13 #include "kvm/virtio-rng.h"
14 #include "kvm/ioeventfd.h"
15 #include "kvm/virtio-9p.h"
16 #include "kvm/barrier.h"
17 #include "kvm/kvm-cpu.h"
18 #include "kvm/ioport.h"
19 #include "kvm/symbol.h"
20 #include "kvm/i8042.h"
21 #include "kvm/mutex.h"
22 #include "kvm/term.h"
23 #include "kvm/util.h"
24 #include "kvm/vesa.h"
25 #include "kvm/irq.h"
26 #include "kvm/kvm.h"
27 #include "kvm/pci.h"
28 #include "kvm/rtc.h"
29 #include "kvm/sdl.h"
30 #include "kvm/vnc.h"
31 #include "kvm/guest_compat.h"
32 #include "kvm/pci-shmem.h"
33 #include "kvm/kvm-ipc.h"
34 
35 #include <linux/types.h>
36 
37 #include <sys/utsname.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <termios.h>
41 #include <signal.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45 #include <ctype.h>
46 #include <stdio.h>
47 
48 #define DEFAULT_KVM_DEV		"/dev/kvm"
49 #define DEFAULT_CONSOLE		"serial"
50 #define DEFAULT_NETWORK		"user"
51 #define DEFAULT_HOST_ADDR	"192.168.33.1"
52 #define DEFAULT_GUEST_ADDR	"192.168.33.15"
53 #define DEFAULT_GUEST_MAC	"02:15:15:15:15:15"
54 #define DEFAULT_HOST_MAC	"02:01:01:01:01:01"
55 #define DEFAULT_SCRIPT		"none"
56 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh";
57 
58 #define MB_SHIFT		(20)
59 #define KB_SHIFT		(10)
60 #define GB_SHIFT		(30)
61 #define MIN_RAM_SIZE_MB		(64ULL)
62 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
63 
64 struct kvm *kvm;
65 struct kvm_cpu *kvm_cpus[KVM_NR_CPUS];
66 __thread struct kvm_cpu *current_kvm_cpu;
67 
68 static u64 ram_size;
69 static u8  image_count;
70 static u8 num_net_devices;
71 static bool virtio_rng;
72 static const char *kernel_cmdline;
73 static const char *kernel_filename;
74 static const char *vmlinux_filename;
75 static const char *initrd_filename;
76 static const char *image_filename[MAX_DISK_IMAGES];
77 static const char *console;
78 static const char *dev;
79 static const char *network;
80 static const char *host_ip;
81 static const char *guest_ip;
82 static const char *guest_mac;
83 static const char *host_mac;
84 static const char *script;
85 static const char *guest_name;
86 static const char *sandbox;
87 static struct virtio_net_params *net_params;
88 static bool single_step;
89 static bool readonly_image[MAX_DISK_IMAGES];
90 static bool vnc;
91 static bool sdl;
92 static bool balloon;
93 static bool using_rootfs;
94 static bool custom_rootfs;
95 static bool no_net;
96 static bool no_dhcp;
97 extern bool ioport_debug;
98 static int  kvm_run_wrapper;
99 extern int  active_console;
100 extern int  debug_iodelay;
101 
102 bool do_debug_print = false;
103 
104 static int nrcpus;
105 static int vidmode = -1;
106 
107 static const char * const run_usage[] = {
108 	"kvm run [<options>] [<kernel image>]",
109 	NULL
110 };
111 
112 enum {
113 	KVM_RUN_SANDBOX,
114 };
115 
116 void kvm_run_set_wrapper_sandbox(void)
117 {
118 	kvm_run_wrapper = KVM_RUN_SANDBOX;
119 }
120 
121 static int img_name_parser(const struct option *opt, const char *arg, int unset)
122 {
123 	char *sep;
124 	struct stat st;
125 	char path[PATH_MAX];
126 
127 	if (stat(arg, &st) == 0 &&
128 	    S_ISDIR(st.st_mode)) {
129 		char tmp[PATH_MAX];
130 
131 		if (realpath(arg, tmp) == 0 ||
132 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
133 			die("Unable to initialize virtio 9p");
134 		using_rootfs = 1;
135 		return 0;
136 	}
137 
138 	snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg);
139 
140 	if (stat(path, &st) == 0 &&
141 	    S_ISDIR(st.st_mode)) {
142 		char tmp[PATH_MAX];
143 
144 		if (realpath(path, tmp) == 0 ||
145 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
146 			die("Unable to initialize virtio 9p");
147 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
148 			die("Unable to initialize virtio 9p");
149 		kvm_setup_resolv(arg);
150 		using_rootfs = custom_rootfs = 1;
151 		return 0;
152 	}
153 
154 	if (image_count >= MAX_DISK_IMAGES)
155 		die("Currently only 4 images are supported");
156 
157 	image_filename[image_count] = arg;
158 	sep = strstr(arg, ",");
159 	if (sep) {
160 		if (strcmp(sep + 1, "ro") == 0)
161 			readonly_image[image_count] = 1;
162 		*sep = 0;
163 	}
164 
165 	image_count++;
166 
167 	return 0;
168 }
169 
170 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset)
171 {
172 	char *tag_name;
173 	char tmp[PATH_MAX];
174 
175 	/*
176 	 * 9p dir can be of the form dirname,tag_name or
177 	 * just dirname. In the later case we use the
178 	 * default tag name
179 	 */
180 	tag_name = strstr(arg, ",");
181 	if (tag_name) {
182 		*tag_name = '\0';
183 		tag_name++;
184 	}
185 	if (realpath(arg, tmp)) {
186 		if (virtio_9p__register(kvm, tmp, tag_name) < 0)
187 			die("Unable to initialize virtio 9p");
188 	} else
189 		die("Failed resolving 9p path");
190 	return 0;
191 }
192 
193 static int tty_parser(const struct option *opt, const char *arg, int unset)
194 {
195 	int tty = atoi(arg);
196 
197 	term_set_tty(tty);
198 
199 	return 0;
200 }
201 
202 static inline void str_to_mac(const char *str, char *mac)
203 {
204 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
205 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
206 }
207 static int set_net_param(struct virtio_net_params *p, const char *param,
208 				const char *val)
209 {
210 	if (strcmp(param, "guest_mac") == 0) {
211 		str_to_mac(val, p->guest_mac);
212 	} else if (strcmp(param, "mode") == 0) {
213 		if (!strncmp(val, "user", 4)) {
214 			int i;
215 
216 			for (i = 0; i < num_net_devices; i++)
217 				if (net_params[i].mode == NET_MODE_USER)
218 					die("Only one usermode network device allowed at a time");
219 			p->mode = NET_MODE_USER;
220 		} else if (!strncmp(val, "tap", 3)) {
221 			p->mode = NET_MODE_TAP;
222 		} else if (!strncmp(val, "none", 4)) {
223 			no_net = 1;
224 			return -1;
225 		} else
226 			die("Unkown network mode %s, please use user, tap or none", network);
227 	} else if (strcmp(param, "script") == 0) {
228 		p->script = strdup(val);
229 	} else if (strcmp(param, "guest_ip") == 0) {
230 		p->guest_ip = strdup(val);
231 	} else if (strcmp(param, "host_ip") == 0) {
232 		p->host_ip = strdup(val);
233 	} else if (strcmp(param, "vhost") == 0) {
234 		p->vhost = atoi(val);
235 	} else if (strcmp(param, "fd") == 0) {
236 		p->fd = atoi(val);
237 	}
238 
239 	return 0;
240 }
241 
242 static int netdev_parser(const struct option *opt, const char *arg, int unset)
243 {
244 	struct virtio_net_params p;
245 	char *buf = NULL, *cmd = NULL, *cur = NULL;
246 	bool on_cmd = true;
247 
248 	if (arg) {
249 		buf = strdup(arg);
250 		if (buf == NULL)
251 			die("Failed allocating new net buffer");
252 		cur = strtok(buf, ",=");
253 	}
254 
255 	p = (struct virtio_net_params) {
256 		.guest_ip	= DEFAULT_GUEST_ADDR,
257 		.host_ip	= DEFAULT_HOST_ADDR,
258 		.script		= DEFAULT_SCRIPT,
259 		.mode		= NET_MODE_TAP,
260 	};
261 
262 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
263 	p.guest_mac[5] += num_net_devices;
264 
265 	while (cur) {
266 		if (on_cmd) {
267 			cmd = cur;
268 		} else {
269 			if (set_net_param(&p, cmd, cur) < 0)
270 				goto done;
271 		}
272 		on_cmd = !on_cmd;
273 
274 		cur = strtok(NULL, ",=");
275 	};
276 
277 	num_net_devices++;
278 
279 	net_params = realloc(net_params, num_net_devices * sizeof(*net_params));
280 	if (net_params == NULL)
281 		die("Failed adding new network device");
282 
283 	net_params[num_net_devices - 1] = p;
284 
285 done:
286 	free(buf);
287 	return 0;
288 }
289 
290 static int shmem_parser(const struct option *opt, const char *arg, int unset)
291 {
292 	const u64 default_size = SHMEM_DEFAULT_SIZE;
293 	const u64 default_phys_addr = SHMEM_DEFAULT_ADDR;
294 	const char *default_handle = SHMEM_DEFAULT_HANDLE;
295 	struct shmem_info *si = malloc(sizeof(struct shmem_info));
296 	u64 phys_addr;
297 	u64 size;
298 	char *handle = NULL;
299 	int create = 0;
300 	const char *p = arg;
301 	char *next;
302 	int base = 10;
303 	int verbose = 0;
304 
305 	const int skip_pci = strlen("pci:");
306 	if (verbose)
307 		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
308 	/* parse out optional addr family */
309 	if (strcasestr(p, "pci:")) {
310 		p += skip_pci;
311 	} else if (strcasestr(p, "mem:")) {
312 		die("I can't add to E820 map yet.\n");
313 	}
314 	/* parse out physical addr */
315 	base = 10;
316 	if (strcasestr(p, "0x"))
317 		base = 16;
318 	phys_addr = strtoll(p, &next, base);
319 	if (next == p && phys_addr == 0) {
320 		pr_info("shmem: no physical addr specified, using default.");
321 		phys_addr = default_phys_addr;
322 	}
323 	if (*next != ':' && *next != '\0')
324 		die("shmem: unexpected chars after phys addr.\n");
325 	if (*next == '\0')
326 		p = next;
327 	else
328 		p = next + 1;
329 	/* parse out size */
330 	base = 10;
331 	if (strcasestr(p, "0x"))
332 		base = 16;
333 	size = strtoll(p, &next, base);
334 	if (next == p && size == 0) {
335 		pr_info("shmem: no size specified, using default.");
336 		size = default_size;
337 	}
338 	/* look for [KMGkmg][Bb]*  uses base 2. */
339 	int skip_B = 0;
340 	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
341 		if (*(next + 1) == 'B' || *(next + 1) == 'b')
342 			skip_B = 1;
343 		switch (*next) {
344 		case 'K':
345 		case 'k':
346 			size = size << KB_SHIFT;
347 			break;
348 		case 'M':
349 		case 'm':
350 			size = size << MB_SHIFT;
351 			break;
352 		case 'G':
353 		case 'g':
354 			size = size << GB_SHIFT;
355 			break;
356 		default:
357 			die("shmem: bug in detecting size prefix.");
358 			break;
359 		}
360 		next += 1 + skip_B;
361 	}
362 	if (*next != ':' && *next != '\0') {
363 		die("shmem: unexpected chars after phys size. <%c><%c>\n",
364 		    *next, *p);
365 	}
366 	if (*next == '\0')
367 		p = next;
368 	else
369 		p = next + 1;
370 	/* parse out optional shmem handle */
371 	const int skip_handle = strlen("handle=");
372 	next = strcasestr(p, "handle=");
373 	if (*p && next) {
374 		if (p != next)
375 			die("unexpected chars before handle\n");
376 		p += skip_handle;
377 		next = strchrnul(p, ':');
378 		if (next - p) {
379 			handle = malloc(next - p + 1);
380 			strncpy(handle, p, next - p);
381 			handle[next - p] = '\0';	/* just in case. */
382 		}
383 		if (*next == '\0')
384 			p = next;
385 		else
386 			p = next + 1;
387 	}
388 	/* parse optional create flag to see if we should create shm seg. */
389 	if (*p && strcasestr(p, "create")) {
390 		create = 1;
391 		p += strlen("create");
392 	}
393 	if (*p != '\0')
394 		die("shmem: unexpected trailing chars\n");
395 	if (handle == NULL) {
396 		handle = malloc(strlen(default_handle) + 1);
397 		strcpy(handle, default_handle);
398 	}
399 	if (verbose) {
400 		pr_info("shmem: phys_addr = %llx", phys_addr);
401 		pr_info("shmem: size      = %llx", size);
402 		pr_info("shmem: handle    = %s", handle);
403 		pr_info("shmem: create    = %d", create);
404 	}
405 
406 	si->phys_addr = phys_addr;
407 	si->size = size;
408 	si->handle = handle;
409 	si->create = create;
410 	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
411 	return 0;
412 }
413 
414 static const struct option options[] = {
415 	OPT_GROUP("Basic options:"),
416 	OPT_STRING('\0', "name", &guest_name, "guest name",
417 			"A name for the guest"),
418 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),
419 	OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."),
420 	OPT_CALLBACK('\0', "shmem", NULL,
421 		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",
422 		     "Share host shmem with guest via pci device",
423 		     shmem_parser),
424 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser),
425 	OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"),
426 	OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"),
427 	OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"),
428 	OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"),
429 	OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",
430 		     "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser),
431 	OPT_STRING('\0', "console", &console, "serial, virtio or hv",
432 			"Console to use"),
433 	OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"),
434 	OPT_CALLBACK('\0', "tty", NULL, "tty id",
435 		     "Remap guest TTY into a pty on the host",
436 		     tty_parser),
437 	OPT_STRING('\0', "sandbox", &sandbox, "script",
438 			"Run this script when booting into custom rootfs"),
439 
440 	OPT_GROUP("Kernel options:"),
441 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
442 			"Kernel to boot in virtual machine"),
443 	OPT_STRING('i', "initrd", &initrd_filename, "initrd",
444 			"Initial RAM disk image"),
445 	OPT_STRING('p', "params", &kernel_cmdline, "params",
446 			"Kernel command line arguments"),
447 
448 	OPT_GROUP("Networking options:"),
449 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",
450 		     "Create a new guest NIC",
451 		     netdev_parser, NULL),
452 	OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"),
453 
454 	OPT_GROUP("BIOS options:"),
455 	OPT_INTEGER('\0', "vidmode", &vidmode,
456 		    "Video mode"),
457 
458 	OPT_GROUP("Debug options:"),
459 	OPT_BOOLEAN('\0', "debug", &do_debug_print,
460 			"Enable debug messages"),
461 	OPT_BOOLEAN('\0', "debug-single-step", &single_step,
462 			"Enable single stepping"),
463 	OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug,
464 			"Enable ioport debugging"),
465 	OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay,
466 			"Delay IO by millisecond"),
467 	OPT_END()
468 };
469 
470 /*
471  * Serialize debug printout so that the output of multiple vcpus does not
472  * get mixed up:
473  */
474 static int printout_done;
475 
476 static void handle_sigusr1(int sig)
477 {
478 	struct kvm_cpu *cpu = current_kvm_cpu;
479 	int fd = kvm_cpu__get_debug_fd();
480 
481 	if (!cpu)
482 		return;
483 
484 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
485 	kvm_cpu__show_registers(cpu);
486 	kvm_cpu__show_code(cpu);
487 	kvm_cpu__show_page_tables(cpu);
488 	fflush(stdout);
489 	printout_done = 1;
490 	mb();
491 }
492 
493 /* Pause/resume the guest using SIGUSR2 */
494 static int is_paused;
495 
496 static void handle_pause(int fd, u32 type, u32 len, u8 *msg)
497 {
498 	if (type == KVM_IPC_RESUME && is_paused)
499 		kvm__continue();
500 	else if (type == KVM_IPC_PAUSE && !is_paused)
501 		kvm__pause();
502 	else
503 		return;
504 
505 	is_paused = !is_paused;
506 	pr_info("Guest %s\n", is_paused ? "paused" : "resumed");
507 }
508 
509 static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
510 {
511 	int i;
512 
513 	for (i = 0; i < nrcpus; i++) {
514 		struct kvm_cpu *cpu = kvm_cpus[i];
515 
516 		if (!cpu)
517 			continue;
518 
519 		printout_done = 0;
520 
521 		kvm_cpu__set_debug_fd(fd);
522 		pthread_kill(cpu->thread, SIGUSR1);
523 		/*
524 		 * Wait for the vCPU to dump state before signalling
525 		 * the next thread. Since this is debug code it does
526 		 * not matter that we are burning CPU time a bit:
527 		 */
528 		while (!printout_done)
529 			mb();
530 	}
531 
532 	close(fd);
533 
534 	serial8250__inject_sysrq(kvm);
535 }
536 
537 static void handle_sigalrm(int sig)
538 {
539 	kvm__arch_periodic_poll(kvm);
540 }
541 
542 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
543 {
544 	kvm_cpu__reboot();
545 }
546 
547 static void *kvm_cpu_thread(void *arg)
548 {
549 	current_kvm_cpu		= arg;
550 
551 	if (kvm_cpu__start(current_kvm_cpu))
552 		goto panic_kvm;
553 
554 	kvm_cpu__delete(current_kvm_cpu);
555 
556 	return (void *) (intptr_t) 0;
557 
558 panic_kvm:
559 	fprintf(stderr, "KVM exit reason: %u (\"%s\")\n",
560 		current_kvm_cpu->kvm_run->exit_reason,
561 		kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]);
562 	if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
563 		fprintf(stderr, "KVM exit code: 0x%Lu\n",
564 			current_kvm_cpu->kvm_run->hw.hardware_exit_reason);
565 
566 	kvm_cpu__set_debug_fd(STDOUT_FILENO);
567 	kvm_cpu__show_registers(current_kvm_cpu);
568 	kvm_cpu__show_code(current_kvm_cpu);
569 	kvm_cpu__show_page_tables(current_kvm_cpu);
570 
571 	kvm_cpu__delete(current_kvm_cpu);
572 
573 	return (void *) (intptr_t) 1;
574 }
575 
576 static char kernel[PATH_MAX];
577 
578 static const char *host_kernels[] = {
579 	"/boot/vmlinuz",
580 	"/boot/bzImage",
581 	NULL
582 };
583 
584 static const char *default_kernels[] = {
585 	"./bzImage",
586 	"../../arch/" BUILD_ARCH "/boot/bzImage",
587 	NULL
588 };
589 
590 static const char *default_vmlinux[] = {
591 	"../../../vmlinux",
592 	"../../vmlinux",
593 	NULL
594 };
595 
596 static void kernel_usage_with_options(void)
597 {
598 	const char **k;
599 	struct utsname uts;
600 
601 	fprintf(stderr, "Fatal: could not find default kernel image in:\n");
602 	k = &default_kernels[0];
603 	while (*k) {
604 		fprintf(stderr, "\t%s\n", *k);
605 		k++;
606 	}
607 
608 	if (uname(&uts) < 0)
609 		return;
610 
611 	k = &host_kernels[0];
612 	while (*k) {
613 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
614 			return;
615 		fprintf(stderr, "\t%s\n", kernel);
616 		k++;
617 	}
618 	fprintf(stderr, "\nPlease see 'kvm run --help' for more options.\n\n");
619 }
620 
621 static u64 host_ram_size(void)
622 {
623 	long page_size;
624 	long nr_pages;
625 
626 	nr_pages	= sysconf(_SC_PHYS_PAGES);
627 	if (nr_pages < 0) {
628 		pr_warning("sysconf(_SC_PHYS_PAGES) failed");
629 		return 0;
630 	}
631 
632 	page_size	= sysconf(_SC_PAGE_SIZE);
633 	if (page_size < 0) {
634 		pr_warning("sysconf(_SC_PAGE_SIZE) failed");
635 		return 0;
636 	}
637 
638 	return (nr_pages * page_size) >> MB_SHIFT;
639 }
640 
641 /*
642  * If user didn't specify how much memory it wants to allocate for the guest,
643  * avoid filling the whole host RAM.
644  */
645 #define RAM_SIZE_RATIO		0.8
646 
647 static u64 get_ram_size(int nr_cpus)
648 {
649 	u64 available;
650 	u64 ram_size;
651 
652 	ram_size	= 64 * (nr_cpus + 3);
653 
654 	available	= host_ram_size() * RAM_SIZE_RATIO;
655 	if (!available)
656 		available = MIN_RAM_SIZE_MB;
657 
658 	if (ram_size > available)
659 		ram_size	= available;
660 
661 	return ram_size;
662 }
663 
664 static const char *find_kernel(void)
665 {
666 	const char **k;
667 	struct stat st;
668 	struct utsname uts;
669 
670 	k = &default_kernels[0];
671 	while (*k) {
672 		if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) {
673 			k++;
674 			continue;
675 		}
676 		strncpy(kernel, *k, PATH_MAX);
677 		return kernel;
678 	}
679 
680 	if (uname(&uts) < 0)
681 		return NULL;
682 
683 	k = &host_kernels[0];
684 	while (*k) {
685 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
686 			return NULL;
687 
688 		if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) {
689 			k++;
690 			continue;
691 		}
692 		return kernel;
693 
694 	}
695 	return NULL;
696 }
697 
698 static const char *find_vmlinux(void)
699 {
700 	const char **vmlinux;
701 
702 	vmlinux = &default_vmlinux[0];
703 	while (*vmlinux) {
704 		struct stat st;
705 
706 		if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) {
707 			vmlinux++;
708 			continue;
709 		}
710 		return *vmlinux;
711 	}
712 	return NULL;
713 }
714 
715 void kvm_run_help(void)
716 {
717 	usage_with_options(run_usage, options);
718 }
719 
720 static int kvm_custom_stage2(void)
721 {
722 	char tmp[PATH_MAX], dst[PATH_MAX], *src;
723 	const char *rootfs;
724 	int r;
725 
726 	src = realpath("guest/init_stage2", NULL);
727 	if (src == NULL)
728 		return -ENOMEM;
729 
730 	if (image_filename[0] == NULL)
731 		rootfs = "default";
732 	else
733 		rootfs = image_filename[0];
734 
735 	snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs);
736 	remove(tmp);
737 
738 	snprintf(dst, PATH_MAX, "/host/%s", src);
739 	r = symlink(dst, tmp);
740 	free(src);
741 
742 	return r;
743 }
744 
745 static int kvm_run_set_sandbox(void)
746 {
747 	const char *guestfs_name = "default";
748 	char path[PATH_MAX], script[PATH_MAX], *tmp;
749 
750 	if (image_filename[0])
751 		guestfs_name = image_filename[0];
752 
753 	snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name);
754 
755 	remove(path);
756 
757 	if (sandbox == NULL)
758 		return 0;
759 
760 	tmp = realpath(sandbox, NULL);
761 	if (tmp == NULL)
762 		return -ENOMEM;
763 
764 	snprintf(script, PATH_MAX, "/host/%s", tmp);
765 	free(tmp);
766 
767 	return symlink(script, path);
768 }
769 
770 static void kvm_run_write_sandbox_cmd(const char **argv, int argc)
771 {
772 	const char script_hdr[] = "#! /bin/bash\n\n";
773 	int fd;
774 
775 	remove(sandbox);
776 
777 	fd = open(sandbox, O_RDWR | O_CREAT, 0777);
778 	if (fd < 0)
779 		die("Failed creating sandbox script");
780 
781 	if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0)
782 		die("Failed writing sandbox script");
783 
784 	while (argc) {
785 		if (write(fd, argv[0], strlen(argv[0])) <= 0)
786 			die("Failed writing sandbox script");
787 		if (argc - 1)
788 			if (write(fd, " ", 1) <= 0)
789 				die("Failed writing sandbox script");
790 		argv++;
791 		argc--;
792 	}
793 	if (write(fd, "\n", 1) <= 0)
794 		die("Failed writing sandbox script");
795 
796 	close(fd);
797 }
798 
799 int kvm_cmd_run(int argc, const char **argv, const char *prefix)
800 {
801 	static char real_cmdline[2048], default_name[20];
802 	struct framebuffer *fb = NULL;
803 	unsigned int nr_online_cpus;
804 	int exit_code = 0;
805 	int max_cpus, recommended_cpus;
806 	int i;
807 	void *ret;
808 
809 	signal(SIGALRM, handle_sigalrm);
810 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
811 	signal(SIGUSR1, handle_sigusr1);
812 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
813 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
814 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
815 
816 	nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
817 
818 	while (argc != 0) {
819 		argc = parse_options(argc, argv, options, run_usage,
820 				PARSE_OPT_STOP_AT_NON_OPTION |
821 				PARSE_OPT_KEEP_DASHDASH);
822 		if (argc != 0) {
823 			/* Cusrom options, should have been handled elsewhere */
824 			if (strcmp(argv[0], "--") == 0) {
825 				if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
826 					sandbox = DEFAULT_SANDBOX_FILENAME;
827 					kvm_run_write_sandbox_cmd(argv+1, argc-1);
828 					break;
829 				}
830 			}
831 
832 			if (kernel_filename) {
833 				fprintf(stderr, "Cannot handle parameter: "
834 						"%s\n", argv[0]);
835 				usage_with_options(run_usage, options);
836 				return EINVAL;
837 			}
838 			/* first unhandled parameter is treated as a kernel
839 			   image
840 			 */
841 			kernel_filename = argv[0];
842 			argv++;
843 			argc--;
844 		}
845 
846 	}
847 
848 	if (!kernel_filename)
849 		kernel_filename = find_kernel();
850 
851 	if (!kernel_filename) {
852 		kernel_usage_with_options();
853 		return EINVAL;
854 	}
855 
856 	vmlinux_filename = find_vmlinux();
857 
858 	if (nrcpus == 0)
859 		nrcpus = nr_online_cpus;
860 	else if (nrcpus < 1 || nrcpus > KVM_NR_CPUS)
861 		die("Number of CPUs %d is out of [1;%d] range", nrcpus, KVM_NR_CPUS);
862 
863 	if (!ram_size)
864 		ram_size	= get_ram_size(nrcpus);
865 
866 	if (ram_size < MIN_RAM_SIZE_MB)
867 		die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB);
868 
869 	if (ram_size > host_ram_size())
870 		pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size());
871 
872 	ram_size <<= MB_SHIFT;
873 
874 	if (!dev)
875 		dev = DEFAULT_KVM_DEV;
876 
877 	if (!console)
878 		console = DEFAULT_CONSOLE;
879 
880 	if (!strncmp(console, "virtio", 6))
881 		active_console  = CONSOLE_VIRTIO;
882 	else if (!strncmp(console, "serial", 6))
883 		active_console  = CONSOLE_8250;
884 	else if (!strncmp(console, "hv", 2))
885 		active_console = CONSOLE_HV;
886 	else
887 		pr_warning("No console!");
888 
889 	if (!host_ip)
890 		host_ip = DEFAULT_HOST_ADDR;
891 
892 	if (!guest_ip)
893 		guest_ip = DEFAULT_GUEST_ADDR;
894 
895 	if (!guest_mac)
896 		guest_mac = DEFAULT_GUEST_MAC;
897 
898 	if (!host_mac)
899 		host_mac = DEFAULT_HOST_MAC;
900 
901 	if (!script)
902 		script = DEFAULT_SCRIPT;
903 
904 	symbol__init(vmlinux_filename);
905 
906 	term_init();
907 
908 	if (!guest_name) {
909 		sprintf(default_name, "guest-%u", getpid());
910 		guest_name = default_name;
911 	}
912 
913 	kvm = kvm__init(dev, ram_size, guest_name);
914 
915 	kvm->single_step = single_step;
916 
917 	ioeventfd__init();
918 
919 	max_cpus = kvm__max_cpus(kvm);
920 	recommended_cpus = kvm__recommended_cpus(kvm);
921 
922 	if (nrcpus > max_cpus) {
923 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
924 		nrcpus = max_cpus;
925 	} else if (nrcpus > recommended_cpus) {
926 		printf("  # Warning: The maximum recommended amount of VCPUs"
927 			" is %d\n", recommended_cpus);
928 	}
929 
930 	kvm->nrcpus = nrcpus;
931 
932 	irq__init(kvm);
933 
934 	pci__init();
935 
936 	/*
937 	 * vidmode should be either specified
938 	 * either set by default
939 	 */
940 	if (vnc || sdl) {
941 		if (vidmode == -1)
942 			vidmode = 0x312;
943 	} else
944 		vidmode = 0;
945 
946 	memset(real_cmdline, 0, sizeof(real_cmdline));
947 	kvm__arch_set_cmdline(real_cmdline, vnc || sdl);
948 
949 	if (strlen(real_cmdline) > 0)
950 		strcat(real_cmdline, " ");
951 
952 	if (kernel_cmdline)
953 		strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline));
954 
955 	if (!using_rootfs && !image_filename[0]) {
956 		char tmp[PATH_MAX];
957 
958 		kvm_setup_create_new("default");
959 		kvm_setup_resolv("default");
960 
961 		snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default");
962 		if (virtio_9p__register(kvm, tmp, "/dev/root") < 0)
963 			die("Unable to initialize virtio 9p");
964 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
965 			die("Unable to initialize virtio 9p");
966 		using_rootfs = custom_rootfs = 1;
967 	}
968 
969 	if (using_rootfs) {
970 		strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p");
971 		if (custom_rootfs) {
972 			kvm_run_set_sandbox();
973 
974 			strcat(real_cmdline, " init=/virt/init");
975 
976 			if (!no_dhcp)
977 				strcat(real_cmdline, "  ip=dhcp");
978 			if (kvm_custom_stage2())
979 				die("Failed linking stage 2 of init.");
980 		}
981 	} else if (!strstr(real_cmdline, "root=")) {
982 		strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline));
983 	}
984 
985 	if (image_count) {
986 		kvm->nr_disks = image_count;
987 		kvm->disks    = disk_image__open_all(image_filename, readonly_image, image_count);
988 		if (!kvm->disks)
989 			die("Unable to load all disk images.");
990 
991 		virtio_blk__init_all(kvm);
992 	}
993 
994 	printf("  # kvm run -k %s -m %Lu -c %d --name %s\n", kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name);
995 
996 	if (!kvm__load_kernel(kvm, kernel_filename, initrd_filename,
997 				real_cmdline, vidmode))
998 		die("unable to load kernel %s", kernel_filename);
999 
1000 	kvm->vmlinux		= vmlinux_filename;
1001 
1002 	ioport__setup_arch();
1003 
1004 	rtc__init();
1005 
1006 	serial8250__init(kvm);
1007 
1008 	if (active_console == CONSOLE_VIRTIO)
1009 		virtio_console__init(kvm);
1010 
1011 	if (virtio_rng)
1012 		virtio_rng__init(kvm);
1013 
1014 	if (balloon)
1015 		virtio_bln__init(kvm);
1016 
1017 	if (!network)
1018 		network = DEFAULT_NETWORK;
1019 
1020 	virtio_9p__init(kvm);
1021 
1022 	for (i = 0; i < num_net_devices; i++) {
1023 		net_params[i].kvm = kvm;
1024 		virtio_net__init(&net_params[i]);
1025 	}
1026 
1027 	if (num_net_devices == 0 && no_net == 0) {
1028 		struct virtio_net_params net_params;
1029 
1030 		net_params = (struct virtio_net_params) {
1031 			.guest_ip	= guest_ip,
1032 			.host_ip	= host_ip,
1033 			.kvm		= kvm,
1034 			.script		= script,
1035 			.mode		= NET_MODE_USER,
1036 		};
1037 		str_to_mac(guest_mac, net_params.guest_mac);
1038 		str_to_mac(host_mac, net_params.host_mac);
1039 
1040 		virtio_net__init(&net_params);
1041 	}
1042 
1043 	kvm__init_ram(kvm);
1044 
1045 #ifdef CONFIG_X86
1046 	kbd__init(kvm);
1047 #endif
1048 
1049 	pci_shmem__init(kvm);
1050 
1051 	if (vnc || sdl)
1052 		fb = vesa__init(kvm);
1053 
1054 	if (vnc) {
1055 		if (fb)
1056 			vnc__init(fb);
1057 	}
1058 
1059 	if (sdl) {
1060 		if (fb)
1061 			sdl__init(fb);
1062 	}
1063 
1064 	fb__start();
1065 
1066 	/* Device init all done; firmware init must
1067 	 * come after this (it may set up device trees etc.)
1068 	 */
1069 
1070 	kvm__start_timer(kvm);
1071 
1072 	kvm__arch_setup_firmware(kvm);
1073 
1074 	for (i = 0; i < nrcpus; i++) {
1075 		kvm_cpus[i] = kvm_cpu__init(kvm, i);
1076 		if (!kvm_cpus[i])
1077 			die("unable to initialize KVM VCPU");
1078 	}
1079 
1080 	thread_pool__init(nr_online_cpus);
1081 	ioeventfd__start();
1082 
1083 	for (i = 0; i < nrcpus; i++) {
1084 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)
1085 			die("unable to create KVM VCPU thread");
1086 	}
1087 
1088 	/* Only VCPU #0 is going to exit by itself when shutting down */
1089 	if (pthread_join(kvm_cpus[0]->thread, &ret) != 0)
1090 		exit_code = 1;
1091 
1092 	for (i = 1; i < nrcpus; i++) {
1093 		if (kvm_cpus[i]->is_running) {
1094 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
1095 			if (pthread_join(kvm_cpus[i]->thread, &ret) != 0)
1096 				die("pthread_join");
1097 		}
1098 		if (ret != NULL)
1099 			exit_code = 1;
1100 	}
1101 
1102 	compat__print_all_messages();
1103 
1104 	fb__stop();
1105 
1106 	virtio_blk__delete_all(kvm);
1107 	virtio_rng__delete_all(kvm);
1108 
1109 	disk_image__close_all(kvm->disks, image_count);
1110 	kvm__delete(kvm);
1111 
1112 	if (!exit_code)
1113 		printf("\n  # KVM session ended normally.\n");
1114 
1115 	return exit_code;
1116 }
1117