xref: /kvmtool/builtin-run.c (revision 4b1c6f6e947ba8c35c0dc49346916817e943786f)
1 #include "kvm/builtin-run.h"
2 
3 #include "kvm/builtin-setup.h"
4 #include "kvm/virtio-balloon.h"
5 #include "kvm/virtio-console.h"
6 #include "kvm/parse-options.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/framebuffer.h"
9 #include "kvm/disk-image.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/virtio-blk.h"
12 #include "kvm/virtio-net.h"
13 #include "kvm/virtio-rng.h"
14 #include "kvm/ioeventfd.h"
15 #include "kvm/virtio-9p.h"
16 #include "kvm/barrier.h"
17 #include "kvm/kvm-cpu.h"
18 #include "kvm/ioport.h"
19 #include "kvm/symbol.h"
20 #include "kvm/i8042.h"
21 #include "kvm/mutex.h"
22 #include "kvm/term.h"
23 #include "kvm/util.h"
24 #include "kvm/vesa.h"
25 #include "kvm/irq.h"
26 #include "kvm/kvm.h"
27 #include "kvm/pci.h"
28 #include "kvm/rtc.h"
29 #include "kvm/sdl.h"
30 #include "kvm/vnc.h"
31 #include "kvm/guest_compat.h"
32 #include "kvm/pci-shmem.h"
33 #include "kvm/kvm-ipc.h"
34 #include "kvm/builtin-debug.h"
35 
36 #include <linux/types.h>
37 
38 #include <sys/utsname.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <termios.h>
42 #include <signal.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 #include <ctype.h>
47 #include <stdio.h>
48 
49 #define DEFAULT_KVM_DEV		"/dev/kvm"
50 #define DEFAULT_CONSOLE		"serial"
51 #define DEFAULT_NETWORK		"user"
52 #define DEFAULT_HOST_ADDR	"192.168.33.1"
53 #define DEFAULT_GUEST_ADDR	"192.168.33.15"
54 #define DEFAULT_GUEST_MAC	"02:15:15:15:15:15"
55 #define DEFAULT_HOST_MAC	"02:01:01:01:01:01"
56 #define DEFAULT_SCRIPT		"none"
57 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh";
58 
59 #define MB_SHIFT		(20)
60 #define KB_SHIFT		(10)
61 #define GB_SHIFT		(30)
62 #define MIN_RAM_SIZE_MB		(64ULL)
63 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
64 
65 struct kvm *kvm;
66 struct kvm_cpu *kvm_cpus[KVM_NR_CPUS];
67 __thread struct kvm_cpu *current_kvm_cpu;
68 
69 static u64 ram_size;
70 static u8  image_count;
71 static u8 num_net_devices;
72 static bool virtio_rng;
73 static const char *kernel_cmdline;
74 static const char *kernel_filename;
75 static const char *vmlinux_filename;
76 static const char *initrd_filename;
77 static const char *image_filename[MAX_DISK_IMAGES];
78 static const char *console;
79 static const char *dev;
80 static const char *network;
81 static const char *host_ip;
82 static const char *guest_ip;
83 static const char *guest_mac;
84 static const char *host_mac;
85 static const char *script;
86 static const char *guest_name;
87 static const char *sandbox;
88 static struct virtio_net_params *net_params;
89 static bool single_step;
90 static bool readonly_image[MAX_DISK_IMAGES];
91 static bool vnc;
92 static bool sdl;
93 static bool balloon;
94 static bool using_rootfs;
95 static bool custom_rootfs;
96 static bool no_net;
97 static bool no_dhcp;
98 extern bool ioport_debug;
99 static int  kvm_run_wrapper;
100 extern int  active_console;
101 extern int  debug_iodelay;
102 
103 bool do_debug_print = false;
104 
105 static int nrcpus;
106 static int vidmode = -1;
107 
108 static const char * const run_usage[] = {
109 	"kvm run [<options>] [<kernel image>]",
110 	NULL
111 };
112 
113 enum {
114 	KVM_RUN_SANDBOX,
115 };
116 
117 void kvm_run_set_wrapper_sandbox(void)
118 {
119 	kvm_run_wrapper = KVM_RUN_SANDBOX;
120 }
121 
122 static int img_name_parser(const struct option *opt, const char *arg, int unset)
123 {
124 	char *sep;
125 	struct stat st;
126 	char path[PATH_MAX];
127 
128 	if (stat(arg, &st) == 0 &&
129 	    S_ISDIR(st.st_mode)) {
130 		char tmp[PATH_MAX];
131 
132 		if (realpath(arg, tmp) == 0 ||
133 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
134 			die("Unable to initialize virtio 9p");
135 		using_rootfs = 1;
136 		return 0;
137 	}
138 
139 	snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg);
140 
141 	if (stat(path, &st) == 0 &&
142 	    S_ISDIR(st.st_mode)) {
143 		char tmp[PATH_MAX];
144 
145 		if (realpath(path, tmp) == 0 ||
146 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
147 			die("Unable to initialize virtio 9p");
148 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
149 			die("Unable to initialize virtio 9p");
150 		kvm_setup_resolv(arg);
151 		using_rootfs = custom_rootfs = 1;
152 		return 0;
153 	}
154 
155 	if (image_count >= MAX_DISK_IMAGES)
156 		die("Currently only 4 images are supported");
157 
158 	image_filename[image_count] = arg;
159 	sep = strstr(arg, ",");
160 	if (sep) {
161 		if (strcmp(sep + 1, "ro") == 0)
162 			readonly_image[image_count] = 1;
163 		*sep = 0;
164 	}
165 
166 	image_count++;
167 
168 	return 0;
169 }
170 
171 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset)
172 {
173 	char *tag_name;
174 	char tmp[PATH_MAX];
175 
176 	/*
177 	 * 9p dir can be of the form dirname,tag_name or
178 	 * just dirname. In the later case we use the
179 	 * default tag name
180 	 */
181 	tag_name = strstr(arg, ",");
182 	if (tag_name) {
183 		*tag_name = '\0';
184 		tag_name++;
185 	}
186 	if (realpath(arg, tmp)) {
187 		if (virtio_9p__register(kvm, tmp, tag_name) < 0)
188 			die("Unable to initialize virtio 9p");
189 	} else
190 		die("Failed resolving 9p path");
191 	return 0;
192 }
193 
194 static int tty_parser(const struct option *opt, const char *arg, int unset)
195 {
196 	int tty = atoi(arg);
197 
198 	term_set_tty(tty);
199 
200 	return 0;
201 }
202 
203 static inline void str_to_mac(const char *str, char *mac)
204 {
205 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
206 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
207 }
208 static int set_net_param(struct virtio_net_params *p, const char *param,
209 				const char *val)
210 {
211 	if (strcmp(param, "guest_mac") == 0) {
212 		str_to_mac(val, p->guest_mac);
213 	} else if (strcmp(param, "mode") == 0) {
214 		if (!strncmp(val, "user", 4)) {
215 			int i;
216 
217 			for (i = 0; i < num_net_devices; i++)
218 				if (net_params[i].mode == NET_MODE_USER)
219 					die("Only one usermode network device allowed at a time");
220 			p->mode = NET_MODE_USER;
221 		} else if (!strncmp(val, "tap", 3)) {
222 			p->mode = NET_MODE_TAP;
223 		} else if (!strncmp(val, "none", 4)) {
224 			no_net = 1;
225 			return -1;
226 		} else
227 			die("Unkown network mode %s, please use user, tap or none", network);
228 	} else if (strcmp(param, "script") == 0) {
229 		p->script = strdup(val);
230 	} else if (strcmp(param, "guest_ip") == 0) {
231 		p->guest_ip = strdup(val);
232 	} else if (strcmp(param, "host_ip") == 0) {
233 		p->host_ip = strdup(val);
234 	} else if (strcmp(param, "vhost") == 0) {
235 		p->vhost = atoi(val);
236 	} else if (strcmp(param, "fd") == 0) {
237 		p->fd = atoi(val);
238 	}
239 
240 	return 0;
241 }
242 
243 static int netdev_parser(const struct option *opt, const char *arg, int unset)
244 {
245 	struct virtio_net_params p;
246 	char *buf = NULL, *cmd = NULL, *cur = NULL;
247 	bool on_cmd = true;
248 
249 	if (arg) {
250 		buf = strdup(arg);
251 		if (buf == NULL)
252 			die("Failed allocating new net buffer");
253 		cur = strtok(buf, ",=");
254 	}
255 
256 	p = (struct virtio_net_params) {
257 		.guest_ip	= DEFAULT_GUEST_ADDR,
258 		.host_ip	= DEFAULT_HOST_ADDR,
259 		.script		= DEFAULT_SCRIPT,
260 		.mode		= NET_MODE_TAP,
261 	};
262 
263 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
264 	p.guest_mac[5] += num_net_devices;
265 
266 	while (cur) {
267 		if (on_cmd) {
268 			cmd = cur;
269 		} else {
270 			if (set_net_param(&p, cmd, cur) < 0)
271 				goto done;
272 		}
273 		on_cmd = !on_cmd;
274 
275 		cur = strtok(NULL, ",=");
276 	};
277 
278 	num_net_devices++;
279 
280 	net_params = realloc(net_params, num_net_devices * sizeof(*net_params));
281 	if (net_params == NULL)
282 		die("Failed adding new network device");
283 
284 	net_params[num_net_devices - 1] = p;
285 
286 done:
287 	free(buf);
288 	return 0;
289 }
290 
291 static int shmem_parser(const struct option *opt, const char *arg, int unset)
292 {
293 	const u64 default_size = SHMEM_DEFAULT_SIZE;
294 	const u64 default_phys_addr = SHMEM_DEFAULT_ADDR;
295 	const char *default_handle = SHMEM_DEFAULT_HANDLE;
296 	struct shmem_info *si = malloc(sizeof(struct shmem_info));
297 	u64 phys_addr;
298 	u64 size;
299 	char *handle = NULL;
300 	int create = 0;
301 	const char *p = arg;
302 	char *next;
303 	int base = 10;
304 	int verbose = 0;
305 
306 	const int skip_pci = strlen("pci:");
307 	if (verbose)
308 		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
309 	/* parse out optional addr family */
310 	if (strcasestr(p, "pci:")) {
311 		p += skip_pci;
312 	} else if (strcasestr(p, "mem:")) {
313 		die("I can't add to E820 map yet.\n");
314 	}
315 	/* parse out physical addr */
316 	base = 10;
317 	if (strcasestr(p, "0x"))
318 		base = 16;
319 	phys_addr = strtoll(p, &next, base);
320 	if (next == p && phys_addr == 0) {
321 		pr_info("shmem: no physical addr specified, using default.");
322 		phys_addr = default_phys_addr;
323 	}
324 	if (*next != ':' && *next != '\0')
325 		die("shmem: unexpected chars after phys addr.\n");
326 	if (*next == '\0')
327 		p = next;
328 	else
329 		p = next + 1;
330 	/* parse out size */
331 	base = 10;
332 	if (strcasestr(p, "0x"))
333 		base = 16;
334 	size = strtoll(p, &next, base);
335 	if (next == p && size == 0) {
336 		pr_info("shmem: no size specified, using default.");
337 		size = default_size;
338 	}
339 	/* look for [KMGkmg][Bb]*  uses base 2. */
340 	int skip_B = 0;
341 	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
342 		if (*(next + 1) == 'B' || *(next + 1) == 'b')
343 			skip_B = 1;
344 		switch (*next) {
345 		case 'K':
346 		case 'k':
347 			size = size << KB_SHIFT;
348 			break;
349 		case 'M':
350 		case 'm':
351 			size = size << MB_SHIFT;
352 			break;
353 		case 'G':
354 		case 'g':
355 			size = size << GB_SHIFT;
356 			break;
357 		default:
358 			die("shmem: bug in detecting size prefix.");
359 			break;
360 		}
361 		next += 1 + skip_B;
362 	}
363 	if (*next != ':' && *next != '\0') {
364 		die("shmem: unexpected chars after phys size. <%c><%c>\n",
365 		    *next, *p);
366 	}
367 	if (*next == '\0')
368 		p = next;
369 	else
370 		p = next + 1;
371 	/* parse out optional shmem handle */
372 	const int skip_handle = strlen("handle=");
373 	next = strcasestr(p, "handle=");
374 	if (*p && next) {
375 		if (p != next)
376 			die("unexpected chars before handle\n");
377 		p += skip_handle;
378 		next = strchrnul(p, ':');
379 		if (next - p) {
380 			handle = malloc(next - p + 1);
381 			strncpy(handle, p, next - p);
382 			handle[next - p] = '\0';	/* just in case. */
383 		}
384 		if (*next == '\0')
385 			p = next;
386 		else
387 			p = next + 1;
388 	}
389 	/* parse optional create flag to see if we should create shm seg. */
390 	if (*p && strcasestr(p, "create")) {
391 		create = 1;
392 		p += strlen("create");
393 	}
394 	if (*p != '\0')
395 		die("shmem: unexpected trailing chars\n");
396 	if (handle == NULL) {
397 		handle = malloc(strlen(default_handle) + 1);
398 		strcpy(handle, default_handle);
399 	}
400 	if (verbose) {
401 		pr_info("shmem: phys_addr = %llx", phys_addr);
402 		pr_info("shmem: size      = %llx", size);
403 		pr_info("shmem: handle    = %s", handle);
404 		pr_info("shmem: create    = %d", create);
405 	}
406 
407 	si->phys_addr = phys_addr;
408 	si->size = size;
409 	si->handle = handle;
410 	si->create = create;
411 	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
412 	return 0;
413 }
414 
415 static const struct option options[] = {
416 	OPT_GROUP("Basic options:"),
417 	OPT_STRING('\0', "name", &guest_name, "guest name",
418 			"A name for the guest"),
419 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),
420 	OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."),
421 	OPT_CALLBACK('\0', "shmem", NULL,
422 		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",
423 		     "Share host shmem with guest via pci device",
424 		     shmem_parser),
425 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser),
426 	OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"),
427 	OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"),
428 	OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"),
429 	OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"),
430 	OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",
431 		     "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser),
432 	OPT_STRING('\0', "console", &console, "serial, virtio or hv",
433 			"Console to use"),
434 	OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"),
435 	OPT_CALLBACK('\0', "tty", NULL, "tty id",
436 		     "Remap guest TTY into a pty on the host",
437 		     tty_parser),
438 	OPT_STRING('\0', "sandbox", &sandbox, "script",
439 			"Run this script when booting into custom rootfs"),
440 
441 	OPT_GROUP("Kernel options:"),
442 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
443 			"Kernel to boot in virtual machine"),
444 	OPT_STRING('i', "initrd", &initrd_filename, "initrd",
445 			"Initial RAM disk image"),
446 	OPT_STRING('p', "params", &kernel_cmdline, "params",
447 			"Kernel command line arguments"),
448 
449 	OPT_GROUP("Networking options:"),
450 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",
451 		     "Create a new guest NIC",
452 		     netdev_parser, NULL),
453 	OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"),
454 
455 	OPT_GROUP("BIOS options:"),
456 	OPT_INTEGER('\0', "vidmode", &vidmode,
457 		    "Video mode"),
458 
459 	OPT_GROUP("Debug options:"),
460 	OPT_BOOLEAN('\0', "debug", &do_debug_print,
461 			"Enable debug messages"),
462 	OPT_BOOLEAN('\0', "debug-single-step", &single_step,
463 			"Enable single stepping"),
464 	OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug,
465 			"Enable ioport debugging"),
466 	OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay,
467 			"Delay IO by millisecond"),
468 	OPT_END()
469 };
470 
471 /*
472  * Serialize debug printout so that the output of multiple vcpus does not
473  * get mixed up:
474  */
475 static int printout_done;
476 
477 static void handle_sigusr1(int sig)
478 {
479 	struct kvm_cpu *cpu = current_kvm_cpu;
480 	int fd = kvm_cpu__get_debug_fd();
481 
482 	if (!cpu || cpu->needs_nmi)
483 		return;
484 
485 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
486 	kvm_cpu__show_registers(cpu);
487 	kvm_cpu__show_code(cpu);
488 	kvm_cpu__show_page_tables(cpu);
489 	fflush(stdout);
490 	printout_done = 1;
491 	mb();
492 }
493 
494 /* Pause/resume the guest using SIGUSR2 */
495 static int is_paused;
496 
497 static void handle_pause(int fd, u32 type, u32 len, u8 *msg)
498 {
499 	if (type == KVM_IPC_RESUME && is_paused)
500 		kvm__continue();
501 	else if (type == KVM_IPC_PAUSE && !is_paused)
502 		kvm__pause();
503 	else
504 		return;
505 
506 	is_paused = !is_paused;
507 	pr_info("Guest %s\n", is_paused ? "paused" : "resumed");
508 }
509 
510 static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
511 {
512 	int i;
513 	u32 dbg_type = *(u32 *)msg;
514 	int vcpu = *(((u32 *)msg) + 1);
515 
516 	if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) {
517 		if (vcpu >= kvm->nrcpus)
518 			return;
519 
520 		kvm_cpus[vcpu]->needs_nmi = 1;
521 		pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1);
522 	}
523 
524 	if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP))
525 		return;
526 
527 	for (i = 0; i < nrcpus; i++) {
528 		struct kvm_cpu *cpu = kvm_cpus[i];
529 
530 		if (!cpu)
531 			continue;
532 
533 		printout_done = 0;
534 
535 		kvm_cpu__set_debug_fd(fd);
536 		pthread_kill(cpu->thread, SIGUSR1);
537 		/*
538 		 * Wait for the vCPU to dump state before signalling
539 		 * the next thread. Since this is debug code it does
540 		 * not matter that we are burning CPU time a bit:
541 		 */
542 		while (!printout_done)
543 			mb();
544 	}
545 
546 	close(fd);
547 
548 	serial8250__inject_sysrq(kvm);
549 }
550 
551 static void handle_sigalrm(int sig)
552 {
553 	kvm__arch_periodic_poll(kvm);
554 }
555 
556 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
557 {
558 	kvm_cpu__reboot();
559 }
560 
561 static void *kvm_cpu_thread(void *arg)
562 {
563 	current_kvm_cpu		= arg;
564 
565 	if (kvm_cpu__start(current_kvm_cpu))
566 		goto panic_kvm;
567 
568 	kvm_cpu__delete(current_kvm_cpu);
569 
570 	return (void *) (intptr_t) 0;
571 
572 panic_kvm:
573 	fprintf(stderr, "KVM exit reason: %u (\"%s\")\n",
574 		current_kvm_cpu->kvm_run->exit_reason,
575 		kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]);
576 	if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
577 		fprintf(stderr, "KVM exit code: 0x%Lu\n",
578 			current_kvm_cpu->kvm_run->hw.hardware_exit_reason);
579 
580 	kvm_cpu__set_debug_fd(STDOUT_FILENO);
581 	kvm_cpu__show_registers(current_kvm_cpu);
582 	kvm_cpu__show_code(current_kvm_cpu);
583 	kvm_cpu__show_page_tables(current_kvm_cpu);
584 
585 	kvm_cpu__delete(current_kvm_cpu);
586 
587 	return (void *) (intptr_t) 1;
588 }
589 
590 static char kernel[PATH_MAX];
591 
592 static const char *host_kernels[] = {
593 	"/boot/vmlinuz",
594 	"/boot/bzImage",
595 	NULL
596 };
597 
598 static const char *default_kernels[] = {
599 	"./bzImage",
600 	"../../arch/" BUILD_ARCH "/boot/bzImage",
601 	NULL
602 };
603 
604 static const char *default_vmlinux[] = {
605 	"../../../vmlinux",
606 	"../../vmlinux",
607 	NULL
608 };
609 
610 static void kernel_usage_with_options(void)
611 {
612 	const char **k;
613 	struct utsname uts;
614 
615 	fprintf(stderr, "Fatal: could not find default kernel image in:\n");
616 	k = &default_kernels[0];
617 	while (*k) {
618 		fprintf(stderr, "\t%s\n", *k);
619 		k++;
620 	}
621 
622 	if (uname(&uts) < 0)
623 		return;
624 
625 	k = &host_kernels[0];
626 	while (*k) {
627 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
628 			return;
629 		fprintf(stderr, "\t%s\n", kernel);
630 		k++;
631 	}
632 	fprintf(stderr, "\nPlease see 'kvm run --help' for more options.\n\n");
633 }
634 
635 static u64 host_ram_size(void)
636 {
637 	long page_size;
638 	long nr_pages;
639 
640 	nr_pages	= sysconf(_SC_PHYS_PAGES);
641 	if (nr_pages < 0) {
642 		pr_warning("sysconf(_SC_PHYS_PAGES) failed");
643 		return 0;
644 	}
645 
646 	page_size	= sysconf(_SC_PAGE_SIZE);
647 	if (page_size < 0) {
648 		pr_warning("sysconf(_SC_PAGE_SIZE) failed");
649 		return 0;
650 	}
651 
652 	return (nr_pages * page_size) >> MB_SHIFT;
653 }
654 
655 /*
656  * If user didn't specify how much memory it wants to allocate for the guest,
657  * avoid filling the whole host RAM.
658  */
659 #define RAM_SIZE_RATIO		0.8
660 
661 static u64 get_ram_size(int nr_cpus)
662 {
663 	u64 available;
664 	u64 ram_size;
665 
666 	ram_size	= 64 * (nr_cpus + 3);
667 
668 	available	= host_ram_size() * RAM_SIZE_RATIO;
669 	if (!available)
670 		available = MIN_RAM_SIZE_MB;
671 
672 	if (ram_size > available)
673 		ram_size	= available;
674 
675 	return ram_size;
676 }
677 
678 static const char *find_kernel(void)
679 {
680 	const char **k;
681 	struct stat st;
682 	struct utsname uts;
683 
684 	k = &default_kernels[0];
685 	while (*k) {
686 		if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) {
687 			k++;
688 			continue;
689 		}
690 		strncpy(kernel, *k, PATH_MAX);
691 		return kernel;
692 	}
693 
694 	if (uname(&uts) < 0)
695 		return NULL;
696 
697 	k = &host_kernels[0];
698 	while (*k) {
699 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
700 			return NULL;
701 
702 		if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) {
703 			k++;
704 			continue;
705 		}
706 		return kernel;
707 
708 	}
709 	return NULL;
710 }
711 
712 static const char *find_vmlinux(void)
713 {
714 	const char **vmlinux;
715 
716 	vmlinux = &default_vmlinux[0];
717 	while (*vmlinux) {
718 		struct stat st;
719 
720 		if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) {
721 			vmlinux++;
722 			continue;
723 		}
724 		return *vmlinux;
725 	}
726 	return NULL;
727 }
728 
729 void kvm_run_help(void)
730 {
731 	usage_with_options(run_usage, options);
732 }
733 
734 static int kvm_custom_stage2(void)
735 {
736 	char tmp[PATH_MAX], dst[PATH_MAX], *src;
737 	const char *rootfs;
738 	int r;
739 
740 	src = realpath("guest/init_stage2", NULL);
741 	if (src == NULL)
742 		return -ENOMEM;
743 
744 	if (image_filename[0] == NULL)
745 		rootfs = "default";
746 	else
747 		rootfs = image_filename[0];
748 
749 	snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs);
750 	remove(tmp);
751 
752 	snprintf(dst, PATH_MAX, "/host/%s", src);
753 	r = symlink(dst, tmp);
754 	free(src);
755 
756 	return r;
757 }
758 
759 static int kvm_run_set_sandbox(void)
760 {
761 	const char *guestfs_name = "default";
762 	char path[PATH_MAX], script[PATH_MAX], *tmp;
763 
764 	if (image_filename[0])
765 		guestfs_name = image_filename[0];
766 
767 	snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name);
768 
769 	remove(path);
770 
771 	if (sandbox == NULL)
772 		return 0;
773 
774 	tmp = realpath(sandbox, NULL);
775 	if (tmp == NULL)
776 		return -ENOMEM;
777 
778 	snprintf(script, PATH_MAX, "/host/%s", tmp);
779 	free(tmp);
780 
781 	return symlink(script, path);
782 }
783 
784 static void kvm_run_write_sandbox_cmd(const char **argv, int argc)
785 {
786 	const char script_hdr[] = "#! /bin/bash\n\n";
787 	int fd;
788 
789 	remove(sandbox);
790 
791 	fd = open(sandbox, O_RDWR | O_CREAT, 0777);
792 	if (fd < 0)
793 		die("Failed creating sandbox script");
794 
795 	if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0)
796 		die("Failed writing sandbox script");
797 
798 	while (argc) {
799 		if (write(fd, argv[0], strlen(argv[0])) <= 0)
800 			die("Failed writing sandbox script");
801 		if (argc - 1)
802 			if (write(fd, " ", 1) <= 0)
803 				die("Failed writing sandbox script");
804 		argv++;
805 		argc--;
806 	}
807 	if (write(fd, "\n", 1) <= 0)
808 		die("Failed writing sandbox script");
809 
810 	close(fd);
811 }
812 
813 int kvm_cmd_run(int argc, const char **argv, const char *prefix)
814 {
815 	static char real_cmdline[2048], default_name[20];
816 	struct framebuffer *fb = NULL;
817 	unsigned int nr_online_cpus;
818 	int exit_code = 0;
819 	int max_cpus, recommended_cpus;
820 	int i;
821 	void *ret;
822 
823 	signal(SIGALRM, handle_sigalrm);
824 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
825 	signal(SIGUSR1, handle_sigusr1);
826 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
827 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
828 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
829 
830 	nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
831 
832 	while (argc != 0) {
833 		argc = parse_options(argc, argv, options, run_usage,
834 				PARSE_OPT_STOP_AT_NON_OPTION |
835 				PARSE_OPT_KEEP_DASHDASH);
836 		if (argc != 0) {
837 			/* Cusrom options, should have been handled elsewhere */
838 			if (strcmp(argv[0], "--") == 0) {
839 				if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
840 					sandbox = DEFAULT_SANDBOX_FILENAME;
841 					kvm_run_write_sandbox_cmd(argv+1, argc-1);
842 					break;
843 				}
844 			}
845 
846 			if (kernel_filename) {
847 				fprintf(stderr, "Cannot handle parameter: "
848 						"%s\n", argv[0]);
849 				usage_with_options(run_usage, options);
850 				return EINVAL;
851 			}
852 			/* first unhandled parameter is treated as a kernel
853 			   image
854 			 */
855 			kernel_filename = argv[0];
856 			argv++;
857 			argc--;
858 		}
859 
860 	}
861 
862 	if (!kernel_filename)
863 		kernel_filename = find_kernel();
864 
865 	if (!kernel_filename) {
866 		kernel_usage_with_options();
867 		return EINVAL;
868 	}
869 
870 	vmlinux_filename = find_vmlinux();
871 
872 	if (nrcpus == 0)
873 		nrcpus = nr_online_cpus;
874 	else if (nrcpus < 1 || nrcpus > KVM_NR_CPUS)
875 		die("Number of CPUs %d is out of [1;%d] range", nrcpus, KVM_NR_CPUS);
876 
877 	if (!ram_size)
878 		ram_size	= get_ram_size(nrcpus);
879 
880 	if (ram_size < MIN_RAM_SIZE_MB)
881 		die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB);
882 
883 	if (ram_size > host_ram_size())
884 		pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size());
885 
886 	ram_size <<= MB_SHIFT;
887 
888 	if (!dev)
889 		dev = DEFAULT_KVM_DEV;
890 
891 	if (!console)
892 		console = DEFAULT_CONSOLE;
893 
894 	if (!strncmp(console, "virtio", 6))
895 		active_console  = CONSOLE_VIRTIO;
896 	else if (!strncmp(console, "serial", 6))
897 		active_console  = CONSOLE_8250;
898 	else if (!strncmp(console, "hv", 2))
899 		active_console = CONSOLE_HV;
900 	else
901 		pr_warning("No console!");
902 
903 	if (!host_ip)
904 		host_ip = DEFAULT_HOST_ADDR;
905 
906 	if (!guest_ip)
907 		guest_ip = DEFAULT_GUEST_ADDR;
908 
909 	if (!guest_mac)
910 		guest_mac = DEFAULT_GUEST_MAC;
911 
912 	if (!host_mac)
913 		host_mac = DEFAULT_HOST_MAC;
914 
915 	if (!script)
916 		script = DEFAULT_SCRIPT;
917 
918 	symbol__init(vmlinux_filename);
919 
920 	term_init();
921 
922 	if (!guest_name) {
923 		sprintf(default_name, "guest-%u", getpid());
924 		guest_name = default_name;
925 	}
926 
927 	kvm = kvm__init(dev, ram_size, guest_name);
928 
929 	kvm->single_step = single_step;
930 
931 	ioeventfd__init();
932 
933 	max_cpus = kvm__max_cpus(kvm);
934 	recommended_cpus = kvm__recommended_cpus(kvm);
935 
936 	if (nrcpus > max_cpus) {
937 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
938 		nrcpus = max_cpus;
939 	} else if (nrcpus > recommended_cpus) {
940 		printf("  # Warning: The maximum recommended amount of VCPUs"
941 			" is %d\n", recommended_cpus);
942 	}
943 
944 	kvm->nrcpus = nrcpus;
945 
946 	irq__init(kvm);
947 
948 	pci__init();
949 
950 	/*
951 	 * vidmode should be either specified
952 	 * either set by default
953 	 */
954 	if (vnc || sdl) {
955 		if (vidmode == -1)
956 			vidmode = 0x312;
957 	} else
958 		vidmode = 0;
959 
960 	memset(real_cmdline, 0, sizeof(real_cmdline));
961 	kvm__arch_set_cmdline(real_cmdline, vnc || sdl);
962 
963 	if (strlen(real_cmdline) > 0)
964 		strcat(real_cmdline, " ");
965 
966 	if (kernel_cmdline)
967 		strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline));
968 
969 	if (!using_rootfs && !image_filename[0]) {
970 		char tmp[PATH_MAX];
971 
972 		kvm_setup_create_new("default");
973 		kvm_setup_resolv("default");
974 
975 		snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default");
976 		if (virtio_9p__register(kvm, tmp, "/dev/root") < 0)
977 			die("Unable to initialize virtio 9p");
978 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
979 			die("Unable to initialize virtio 9p");
980 		using_rootfs = custom_rootfs = 1;
981 	}
982 
983 	if (using_rootfs) {
984 		strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p");
985 		if (custom_rootfs) {
986 			kvm_run_set_sandbox();
987 
988 			strcat(real_cmdline, " init=/virt/init");
989 
990 			if (!no_dhcp)
991 				strcat(real_cmdline, "  ip=dhcp");
992 			if (kvm_custom_stage2())
993 				die("Failed linking stage 2 of init.");
994 		}
995 	} else if (!strstr(real_cmdline, "root=")) {
996 		strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline));
997 	}
998 
999 	if (image_count) {
1000 		kvm->nr_disks = image_count;
1001 		kvm->disks    = disk_image__open_all(image_filename, readonly_image, image_count);
1002 		if (!kvm->disks)
1003 			die("Unable to load all disk images.");
1004 
1005 		virtio_blk__init_all(kvm);
1006 	}
1007 
1008 	printf("  # kvm run -k %s -m %Lu -c %d --name %s\n", kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name);
1009 
1010 	if (!kvm__load_kernel(kvm, kernel_filename, initrd_filename,
1011 				real_cmdline, vidmode))
1012 		die("unable to load kernel %s", kernel_filename);
1013 
1014 	kvm->vmlinux		= vmlinux_filename;
1015 
1016 	ioport__setup_arch();
1017 
1018 	rtc__init();
1019 
1020 	serial8250__init(kvm);
1021 
1022 	if (active_console == CONSOLE_VIRTIO)
1023 		virtio_console__init(kvm);
1024 
1025 	if (virtio_rng)
1026 		virtio_rng__init(kvm);
1027 
1028 	if (balloon)
1029 		virtio_bln__init(kvm);
1030 
1031 	if (!network)
1032 		network = DEFAULT_NETWORK;
1033 
1034 	virtio_9p__init(kvm);
1035 
1036 	for (i = 0; i < num_net_devices; i++) {
1037 		net_params[i].kvm = kvm;
1038 		virtio_net__init(&net_params[i]);
1039 	}
1040 
1041 	if (num_net_devices == 0 && no_net == 0) {
1042 		struct virtio_net_params net_params;
1043 
1044 		net_params = (struct virtio_net_params) {
1045 			.guest_ip	= guest_ip,
1046 			.host_ip	= host_ip,
1047 			.kvm		= kvm,
1048 			.script		= script,
1049 			.mode		= NET_MODE_USER,
1050 		};
1051 		str_to_mac(guest_mac, net_params.guest_mac);
1052 		str_to_mac(host_mac, net_params.host_mac);
1053 
1054 		virtio_net__init(&net_params);
1055 	}
1056 
1057 	kvm__init_ram(kvm);
1058 
1059 #ifdef CONFIG_X86
1060 	kbd__init(kvm);
1061 #endif
1062 
1063 	pci_shmem__init(kvm);
1064 
1065 	if (vnc || sdl)
1066 		fb = vesa__init(kvm);
1067 
1068 	if (vnc) {
1069 		if (fb)
1070 			vnc__init(fb);
1071 	}
1072 
1073 	if (sdl) {
1074 		if (fb)
1075 			sdl__init(fb);
1076 	}
1077 
1078 	fb__start();
1079 
1080 	/* Device init all done; firmware init must
1081 	 * come after this (it may set up device trees etc.)
1082 	 */
1083 
1084 	kvm__start_timer(kvm);
1085 
1086 	kvm__arch_setup_firmware(kvm);
1087 
1088 	for (i = 0; i < nrcpus; i++) {
1089 		kvm_cpus[i] = kvm_cpu__init(kvm, i);
1090 		if (!kvm_cpus[i])
1091 			die("unable to initialize KVM VCPU");
1092 	}
1093 
1094 	thread_pool__init(nr_online_cpus);
1095 	ioeventfd__start();
1096 
1097 	for (i = 0; i < nrcpus; i++) {
1098 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)
1099 			die("unable to create KVM VCPU thread");
1100 	}
1101 
1102 	/* Only VCPU #0 is going to exit by itself when shutting down */
1103 	if (pthread_join(kvm_cpus[0]->thread, &ret) != 0)
1104 		exit_code = 1;
1105 
1106 	for (i = 1; i < nrcpus; i++) {
1107 		if (kvm_cpus[i]->is_running) {
1108 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
1109 			if (pthread_join(kvm_cpus[i]->thread, &ret) != 0)
1110 				die("pthread_join");
1111 		}
1112 		if (ret != NULL)
1113 			exit_code = 1;
1114 	}
1115 
1116 	compat__print_all_messages();
1117 
1118 	fb__stop();
1119 
1120 	virtio_blk__delete_all(kvm);
1121 	virtio_rng__delete_all(kvm);
1122 
1123 	disk_image__close_all(kvm->disks, image_count);
1124 	kvm__delete(kvm);
1125 
1126 	if (!exit_code)
1127 		printf("\n  # KVM session ended normally.\n");
1128 
1129 	return exit_code;
1130 }
1131