xref: /kvmtool/builtin-run.c (revision ec52d504239182f1a208bfc0000339a7601ae48e)
1 #include "kvm/builtin-run.h"
2 
3 #include "kvm/builtin-setup.h"
4 #include "kvm/virtio-balloon.h"
5 #include "kvm/virtio-console.h"
6 #include "kvm/parse-options.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/framebuffer.h"
9 #include "kvm/disk-image.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/virtio-blk.h"
12 #include "kvm/virtio-net.h"
13 #include "kvm/virtio-rng.h"
14 #include "kvm/ioeventfd.h"
15 #include "kvm/virtio-9p.h"
16 #include "kvm/barrier.h"
17 #include "kvm/kvm-cpu.h"
18 #include "kvm/ioport.h"
19 #include "kvm/symbol.h"
20 #include "kvm/i8042.h"
21 #include "kvm/mutex.h"
22 #include "kvm/term.h"
23 #include "kvm/util.h"
24 #include "kvm/strbuf.h"
25 #include "kvm/vesa.h"
26 #include "kvm/irq.h"
27 #include "kvm/kvm.h"
28 #include "kvm/pci.h"
29 #include "kvm/rtc.h"
30 #include "kvm/sdl.h"
31 #include "kvm/vnc.h"
32 #include "kvm/guest_compat.h"
33 #include "kvm/pci-shmem.h"
34 #include "kvm/kvm-ipc.h"
35 #include "kvm/builtin-debug.h"
36 
37 #include <linux/types.h>
38 
39 #include <sys/utsname.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <termios.h>
43 #include <signal.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <ctype.h>
48 #include <stdio.h>
49 
50 #define DEFAULT_KVM_DEV		"/dev/kvm"
51 #define DEFAULT_CONSOLE		"serial"
52 #define DEFAULT_NETWORK		"user"
53 #define DEFAULT_HOST_ADDR	"192.168.33.1"
54 #define DEFAULT_GUEST_ADDR	"192.168.33.15"
55 #define DEFAULT_GUEST_MAC	"02:15:15:15:15:15"
56 #define DEFAULT_HOST_MAC	"02:01:01:01:01:01"
57 #define DEFAULT_SCRIPT		"none"
58 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh";
59 
60 #define MB_SHIFT		(20)
61 #define KB_SHIFT		(10)
62 #define GB_SHIFT		(30)
63 #define MIN_RAM_SIZE_MB		(64ULL)
64 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
65 
66 struct kvm *kvm;
67 struct kvm_cpu *kvm_cpus[KVM_NR_CPUS];
68 __thread struct kvm_cpu *current_kvm_cpu;
69 
70 static u64 ram_size;
71 static u8  image_count;
72 static u8 num_net_devices;
73 static bool virtio_rng;
74 static const char *kernel_cmdline;
75 static const char *kernel_filename;
76 static const char *vmlinux_filename;
77 static const char *initrd_filename;
78 static const char *image_filename[MAX_DISK_IMAGES];
79 static const char *console;
80 static const char *dev;
81 static const char *network;
82 static const char *host_ip;
83 static const char *guest_ip;
84 static const char *guest_mac;
85 static const char *host_mac;
86 static const char *script;
87 static const char *guest_name;
88 static const char *sandbox;
89 static struct virtio_net_params *net_params;
90 static bool single_step;
91 static bool readonly_image[MAX_DISK_IMAGES];
92 static bool vnc;
93 static bool sdl;
94 static bool balloon;
95 static bool using_rootfs;
96 static bool custom_rootfs;
97 static bool no_net;
98 static bool no_dhcp;
99 extern bool ioport_debug;
100 static int  kvm_run_wrapper;
101 extern int  active_console;
102 extern int  debug_iodelay;
103 
104 bool do_debug_print = false;
105 
106 static int nrcpus;
107 static int vidmode = -1;
108 
109 static const char * const run_usage[] = {
110 	"kvm run [<options>] [<kernel image>]",
111 	NULL
112 };
113 
114 enum {
115 	KVM_RUN_SANDBOX,
116 };
117 
118 void kvm_run_set_wrapper_sandbox(void)
119 {
120 	kvm_run_wrapper = KVM_RUN_SANDBOX;
121 }
122 
123 static int img_name_parser(const struct option *opt, const char *arg, int unset)
124 {
125 	char *sep;
126 	struct stat st;
127 	char path[PATH_MAX];
128 
129 	if (stat(arg, &st) == 0 &&
130 	    S_ISDIR(st.st_mode)) {
131 		char tmp[PATH_MAX];
132 
133 		if (realpath(arg, tmp) == 0 ||
134 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
135 			die("Unable to initialize virtio 9p");
136 		using_rootfs = 1;
137 		return 0;
138 	}
139 
140 	snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg);
141 
142 	if (stat(path, &st) == 0 &&
143 	    S_ISDIR(st.st_mode)) {
144 		char tmp[PATH_MAX];
145 
146 		if (realpath(path, tmp) == 0 ||
147 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
148 			die("Unable to initialize virtio 9p");
149 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
150 			die("Unable to initialize virtio 9p");
151 		kvm_setup_resolv(arg);
152 		using_rootfs = custom_rootfs = 1;
153 		return 0;
154 	}
155 
156 	if (image_count >= MAX_DISK_IMAGES)
157 		die("Currently only 4 images are supported");
158 
159 	image_filename[image_count] = arg;
160 	sep = strstr(arg, ",");
161 	if (sep) {
162 		if (strcmp(sep + 1, "ro") == 0)
163 			readonly_image[image_count] = 1;
164 		*sep = 0;
165 	}
166 
167 	image_count++;
168 
169 	return 0;
170 }
171 
172 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset)
173 {
174 	char *tag_name;
175 	char tmp[PATH_MAX];
176 
177 	/*
178 	 * 9p dir can be of the form dirname,tag_name or
179 	 * just dirname. In the later case we use the
180 	 * default tag name
181 	 */
182 	tag_name = strstr(arg, ",");
183 	if (tag_name) {
184 		*tag_name = '\0';
185 		tag_name++;
186 	}
187 	if (realpath(arg, tmp)) {
188 		if (virtio_9p__register(kvm, tmp, tag_name) < 0)
189 			die("Unable to initialize virtio 9p");
190 	} else
191 		die("Failed resolving 9p path");
192 	return 0;
193 }
194 
195 static int tty_parser(const struct option *opt, const char *arg, int unset)
196 {
197 	int tty = atoi(arg);
198 
199 	term_set_tty(tty);
200 
201 	return 0;
202 }
203 
204 static inline void str_to_mac(const char *str, char *mac)
205 {
206 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
207 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
208 }
209 static int set_net_param(struct virtio_net_params *p, const char *param,
210 				const char *val)
211 {
212 	if (strcmp(param, "guest_mac") == 0) {
213 		str_to_mac(val, p->guest_mac);
214 	} else if (strcmp(param, "mode") == 0) {
215 		if (!strncmp(val, "user", 4)) {
216 			int i;
217 
218 			for (i = 0; i < num_net_devices; i++)
219 				if (net_params[i].mode == NET_MODE_USER)
220 					die("Only one usermode network device allowed at a time");
221 			p->mode = NET_MODE_USER;
222 		} else if (!strncmp(val, "tap", 3)) {
223 			p->mode = NET_MODE_TAP;
224 		} else if (!strncmp(val, "none", 4)) {
225 			no_net = 1;
226 			return -1;
227 		} else
228 			die("Unkown network mode %s, please use user, tap or none", network);
229 	} else if (strcmp(param, "script") == 0) {
230 		p->script = strdup(val);
231 	} else if (strcmp(param, "guest_ip") == 0) {
232 		p->guest_ip = strdup(val);
233 	} else if (strcmp(param, "host_ip") == 0) {
234 		p->host_ip = strdup(val);
235 	} else if (strcmp(param, "vhost") == 0) {
236 		p->vhost = atoi(val);
237 	} else if (strcmp(param, "fd") == 0) {
238 		p->fd = atoi(val);
239 	}
240 
241 	return 0;
242 }
243 
244 static int netdev_parser(const struct option *opt, const char *arg, int unset)
245 {
246 	struct virtio_net_params p;
247 	char *buf = NULL, *cmd = NULL, *cur = NULL;
248 	bool on_cmd = true;
249 
250 	if (arg) {
251 		buf = strdup(arg);
252 		if (buf == NULL)
253 			die("Failed allocating new net buffer");
254 		cur = strtok(buf, ",=");
255 	}
256 
257 	p = (struct virtio_net_params) {
258 		.guest_ip	= DEFAULT_GUEST_ADDR,
259 		.host_ip	= DEFAULT_HOST_ADDR,
260 		.script		= DEFAULT_SCRIPT,
261 		.mode		= NET_MODE_TAP,
262 	};
263 
264 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
265 	p.guest_mac[5] += num_net_devices;
266 
267 	while (cur) {
268 		if (on_cmd) {
269 			cmd = cur;
270 		} else {
271 			if (set_net_param(&p, cmd, cur) < 0)
272 				goto done;
273 		}
274 		on_cmd = !on_cmd;
275 
276 		cur = strtok(NULL, ",=");
277 	};
278 
279 	num_net_devices++;
280 
281 	net_params = realloc(net_params, num_net_devices * sizeof(*net_params));
282 	if (net_params == NULL)
283 		die("Failed adding new network device");
284 
285 	net_params[num_net_devices - 1] = p;
286 
287 done:
288 	free(buf);
289 	return 0;
290 }
291 
292 static int shmem_parser(const struct option *opt, const char *arg, int unset)
293 {
294 	const u64 default_size = SHMEM_DEFAULT_SIZE;
295 	const u64 default_phys_addr = SHMEM_DEFAULT_ADDR;
296 	const char *default_handle = SHMEM_DEFAULT_HANDLE;
297 	struct shmem_info *si = malloc(sizeof(struct shmem_info));
298 	u64 phys_addr;
299 	u64 size;
300 	char *handle = NULL;
301 	int create = 0;
302 	const char *p = arg;
303 	char *next;
304 	int base = 10;
305 	int verbose = 0;
306 
307 	const int skip_pci = strlen("pci:");
308 	if (verbose)
309 		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
310 	/* parse out optional addr family */
311 	if (strcasestr(p, "pci:")) {
312 		p += skip_pci;
313 	} else if (strcasestr(p, "mem:")) {
314 		die("I can't add to E820 map yet.\n");
315 	}
316 	/* parse out physical addr */
317 	base = 10;
318 	if (strcasestr(p, "0x"))
319 		base = 16;
320 	phys_addr = strtoll(p, &next, base);
321 	if (next == p && phys_addr == 0) {
322 		pr_info("shmem: no physical addr specified, using default.");
323 		phys_addr = default_phys_addr;
324 	}
325 	if (*next != ':' && *next != '\0')
326 		die("shmem: unexpected chars after phys addr.\n");
327 	if (*next == '\0')
328 		p = next;
329 	else
330 		p = next + 1;
331 	/* parse out size */
332 	base = 10;
333 	if (strcasestr(p, "0x"))
334 		base = 16;
335 	size = strtoll(p, &next, base);
336 	if (next == p && size == 0) {
337 		pr_info("shmem: no size specified, using default.");
338 		size = default_size;
339 	}
340 	/* look for [KMGkmg][Bb]*  uses base 2. */
341 	int skip_B = 0;
342 	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
343 		if (*(next + 1) == 'B' || *(next + 1) == 'b')
344 			skip_B = 1;
345 		switch (*next) {
346 		case 'K':
347 		case 'k':
348 			size = size << KB_SHIFT;
349 			break;
350 		case 'M':
351 		case 'm':
352 			size = size << MB_SHIFT;
353 			break;
354 		case 'G':
355 		case 'g':
356 			size = size << GB_SHIFT;
357 			break;
358 		default:
359 			die("shmem: bug in detecting size prefix.");
360 			break;
361 		}
362 		next += 1 + skip_B;
363 	}
364 	if (*next != ':' && *next != '\0') {
365 		die("shmem: unexpected chars after phys size. <%c><%c>\n",
366 		    *next, *p);
367 	}
368 	if (*next == '\0')
369 		p = next;
370 	else
371 		p = next + 1;
372 	/* parse out optional shmem handle */
373 	const int skip_handle = strlen("handle=");
374 	next = strcasestr(p, "handle=");
375 	if (*p && next) {
376 		if (p != next)
377 			die("unexpected chars before handle\n");
378 		p += skip_handle;
379 		next = strchrnul(p, ':');
380 		if (next - p) {
381 			handle = malloc(next - p + 1);
382 			strncpy(handle, p, next - p);
383 			handle[next - p] = '\0';	/* just in case. */
384 		}
385 		if (*next == '\0')
386 			p = next;
387 		else
388 			p = next + 1;
389 	}
390 	/* parse optional create flag to see if we should create shm seg. */
391 	if (*p && strcasestr(p, "create")) {
392 		create = 1;
393 		p += strlen("create");
394 	}
395 	if (*p != '\0')
396 		die("shmem: unexpected trailing chars\n");
397 	if (handle == NULL) {
398 		handle = malloc(strlen(default_handle) + 1);
399 		strcpy(handle, default_handle);
400 	}
401 	if (verbose) {
402 		pr_info("shmem: phys_addr = %llx", phys_addr);
403 		pr_info("shmem: size      = %llx", size);
404 		pr_info("shmem: handle    = %s", handle);
405 		pr_info("shmem: create    = %d", create);
406 	}
407 
408 	si->phys_addr = phys_addr;
409 	si->size = size;
410 	si->handle = handle;
411 	si->create = create;
412 	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
413 	return 0;
414 }
415 
416 static const struct option options[] = {
417 	OPT_GROUP("Basic options:"),
418 	OPT_STRING('\0', "name", &guest_name, "guest name",
419 			"A name for the guest"),
420 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),
421 	OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."),
422 	OPT_CALLBACK('\0', "shmem", NULL,
423 		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",
424 		     "Share host shmem with guest via pci device",
425 		     shmem_parser),
426 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser),
427 	OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"),
428 	OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"),
429 	OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"),
430 	OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"),
431 	OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",
432 		     "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser),
433 	OPT_STRING('\0', "console", &console, "serial, virtio or hv",
434 			"Console to use"),
435 	OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"),
436 	OPT_CALLBACK('\0', "tty", NULL, "tty id",
437 		     "Remap guest TTY into a pty on the host",
438 		     tty_parser),
439 	OPT_STRING('\0', "sandbox", &sandbox, "script",
440 			"Run this script when booting into custom rootfs"),
441 
442 	OPT_GROUP("Kernel options:"),
443 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
444 			"Kernel to boot in virtual machine"),
445 	OPT_STRING('i', "initrd", &initrd_filename, "initrd",
446 			"Initial RAM disk image"),
447 	OPT_STRING('p', "params", &kernel_cmdline, "params",
448 			"Kernel command line arguments"),
449 
450 	OPT_GROUP("Networking options:"),
451 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",
452 		     "Create a new guest NIC",
453 		     netdev_parser, NULL),
454 	OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"),
455 
456 	OPT_GROUP("BIOS options:"),
457 	OPT_INTEGER('\0', "vidmode", &vidmode,
458 		    "Video mode"),
459 
460 	OPT_GROUP("Debug options:"),
461 	OPT_BOOLEAN('\0', "debug", &do_debug_print,
462 			"Enable debug messages"),
463 	OPT_BOOLEAN('\0', "debug-single-step", &single_step,
464 			"Enable single stepping"),
465 	OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug,
466 			"Enable ioport debugging"),
467 	OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay,
468 			"Delay IO by millisecond"),
469 	OPT_END()
470 };
471 
472 /*
473  * Serialize debug printout so that the output of multiple vcpus does not
474  * get mixed up:
475  */
476 static int printout_done;
477 
478 static void handle_sigusr1(int sig)
479 {
480 	struct kvm_cpu *cpu = current_kvm_cpu;
481 	int fd = kvm_cpu__get_debug_fd();
482 
483 	if (!cpu || cpu->needs_nmi)
484 		return;
485 
486 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
487 	kvm_cpu__show_registers(cpu);
488 	kvm_cpu__show_code(cpu);
489 	kvm_cpu__show_page_tables(cpu);
490 	fflush(stdout);
491 	printout_done = 1;
492 	mb();
493 }
494 
495 /* Pause/resume the guest using SIGUSR2 */
496 static int is_paused;
497 
498 static void handle_pause(int fd, u32 type, u32 len, u8 *msg)
499 {
500 	if (type == KVM_IPC_RESUME && is_paused)
501 		kvm__continue();
502 	else if (type == KVM_IPC_PAUSE && !is_paused)
503 		kvm__pause();
504 	else
505 		return;
506 
507 	is_paused = !is_paused;
508 	pr_info("Guest %s\n", is_paused ? "paused" : "resumed");
509 }
510 
511 static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
512 {
513 	int i;
514 	struct debug_cmd_params *params = (void *)msg;
515 	u32 dbg_type = params->dbg_type;
516 	u32 vcpu = params->cpu;
517 
518 	if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) {
519 		if ((int)vcpu >= kvm->nrcpus)
520 			return;
521 
522 		kvm_cpus[vcpu]->needs_nmi = 1;
523 		pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1);
524 	}
525 
526 	if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP))
527 		return;
528 
529 	for (i = 0; i < nrcpus; i++) {
530 		struct kvm_cpu *cpu = kvm_cpus[i];
531 
532 		if (!cpu)
533 			continue;
534 
535 		printout_done = 0;
536 
537 		kvm_cpu__set_debug_fd(fd);
538 		pthread_kill(cpu->thread, SIGUSR1);
539 		/*
540 		 * Wait for the vCPU to dump state before signalling
541 		 * the next thread. Since this is debug code it does
542 		 * not matter that we are burning CPU time a bit:
543 		 */
544 		while (!printout_done)
545 			mb();
546 	}
547 
548 	close(fd);
549 
550 	serial8250__inject_sysrq(kvm);
551 }
552 
553 static void handle_sigalrm(int sig)
554 {
555 	kvm__arch_periodic_poll(kvm);
556 }
557 
558 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
559 {
560 	kvm_cpu__reboot();
561 }
562 
563 static void *kvm_cpu_thread(void *arg)
564 {
565 	current_kvm_cpu		= arg;
566 
567 	if (kvm_cpu__start(current_kvm_cpu))
568 		goto panic_kvm;
569 
570 	kvm_cpu__delete(current_kvm_cpu);
571 
572 	return (void *) (intptr_t) 0;
573 
574 panic_kvm:
575 	fprintf(stderr, "KVM exit reason: %u (\"%s\")\n",
576 		current_kvm_cpu->kvm_run->exit_reason,
577 		kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]);
578 	if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
579 		fprintf(stderr, "KVM exit code: 0x%Lu\n",
580 			current_kvm_cpu->kvm_run->hw.hardware_exit_reason);
581 
582 	kvm_cpu__set_debug_fd(STDOUT_FILENO);
583 	kvm_cpu__show_registers(current_kvm_cpu);
584 	kvm_cpu__show_code(current_kvm_cpu);
585 	kvm_cpu__show_page_tables(current_kvm_cpu);
586 
587 	kvm_cpu__delete(current_kvm_cpu);
588 
589 	return (void *) (intptr_t) 1;
590 }
591 
592 static char kernel[PATH_MAX];
593 
594 static const char *host_kernels[] = {
595 	"/boot/vmlinuz",
596 	"/boot/bzImage",
597 	NULL
598 };
599 
600 static const char *default_kernels[] = {
601 	"./bzImage",
602 	"../../arch/" BUILD_ARCH "/boot/bzImage",
603 	NULL
604 };
605 
606 static const char *default_vmlinux[] = {
607 	"../../../vmlinux",
608 	"../../vmlinux",
609 	NULL
610 };
611 
612 static void kernel_usage_with_options(void)
613 {
614 	const char **k;
615 	struct utsname uts;
616 
617 	fprintf(stderr, "Fatal: could not find default kernel image in:\n");
618 	k = &default_kernels[0];
619 	while (*k) {
620 		fprintf(stderr, "\t%s\n", *k);
621 		k++;
622 	}
623 
624 	if (uname(&uts) < 0)
625 		return;
626 
627 	k = &host_kernels[0];
628 	while (*k) {
629 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
630 			return;
631 		fprintf(stderr, "\t%s\n", kernel);
632 		k++;
633 	}
634 	fprintf(stderr, "\nPlease see 'kvm run --help' for more options.\n\n");
635 }
636 
637 static u64 host_ram_size(void)
638 {
639 	long page_size;
640 	long nr_pages;
641 
642 	nr_pages	= sysconf(_SC_PHYS_PAGES);
643 	if (nr_pages < 0) {
644 		pr_warning("sysconf(_SC_PHYS_PAGES) failed");
645 		return 0;
646 	}
647 
648 	page_size	= sysconf(_SC_PAGE_SIZE);
649 	if (page_size < 0) {
650 		pr_warning("sysconf(_SC_PAGE_SIZE) failed");
651 		return 0;
652 	}
653 
654 	return (nr_pages * page_size) >> MB_SHIFT;
655 }
656 
657 /*
658  * If user didn't specify how much memory it wants to allocate for the guest,
659  * avoid filling the whole host RAM.
660  */
661 #define RAM_SIZE_RATIO		0.8
662 
663 static u64 get_ram_size(int nr_cpus)
664 {
665 	u64 available;
666 	u64 ram_size;
667 
668 	ram_size	= 64 * (nr_cpus + 3);
669 
670 	available	= host_ram_size() * RAM_SIZE_RATIO;
671 	if (!available)
672 		available = MIN_RAM_SIZE_MB;
673 
674 	if (ram_size > available)
675 		ram_size	= available;
676 
677 	return ram_size;
678 }
679 
680 static const char *find_kernel(void)
681 {
682 	const char **k;
683 	struct stat st;
684 	struct utsname uts;
685 
686 	k = &default_kernels[0];
687 	while (*k) {
688 		if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) {
689 			k++;
690 			continue;
691 		}
692 		strncpy(kernel, *k, PATH_MAX);
693 		return kernel;
694 	}
695 
696 	if (uname(&uts) < 0)
697 		return NULL;
698 
699 	k = &host_kernels[0];
700 	while (*k) {
701 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
702 			return NULL;
703 
704 		if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) {
705 			k++;
706 			continue;
707 		}
708 		return kernel;
709 
710 	}
711 	return NULL;
712 }
713 
714 static const char *find_vmlinux(void)
715 {
716 	const char **vmlinux;
717 
718 	vmlinux = &default_vmlinux[0];
719 	while (*vmlinux) {
720 		struct stat st;
721 
722 		if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) {
723 			vmlinux++;
724 			continue;
725 		}
726 		return *vmlinux;
727 	}
728 	return NULL;
729 }
730 
731 void kvm_run_help(void)
732 {
733 	usage_with_options(run_usage, options);
734 }
735 
736 static int kvm_custom_stage2(void)
737 {
738 	char tmp[PATH_MAX], dst[PATH_MAX], *src;
739 	const char *rootfs;
740 	int r;
741 
742 	src = realpath("guest/init_stage2", NULL);
743 	if (src == NULL)
744 		return -ENOMEM;
745 
746 	if (image_filename[0] == NULL)
747 		rootfs = "default";
748 	else
749 		rootfs = image_filename[0];
750 
751 	snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs);
752 	remove(tmp);
753 
754 	snprintf(dst, PATH_MAX, "/host/%s", src);
755 	r = symlink(dst, tmp);
756 	free(src);
757 
758 	return r;
759 }
760 
761 static int kvm_run_set_sandbox(void)
762 {
763 	const char *guestfs_name = "default";
764 	char path[PATH_MAX], script[PATH_MAX], *tmp;
765 
766 	if (image_filename[0])
767 		guestfs_name = image_filename[0];
768 
769 	snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name);
770 
771 	remove(path);
772 
773 	if (sandbox == NULL)
774 		return 0;
775 
776 	tmp = realpath(sandbox, NULL);
777 	if (tmp == NULL)
778 		return -ENOMEM;
779 
780 	snprintf(script, PATH_MAX, "/host/%s", tmp);
781 	free(tmp);
782 
783 	return symlink(script, path);
784 }
785 
786 static void kvm_run_write_sandbox_cmd(const char **argv, int argc)
787 {
788 	const char script_hdr[] = "#! /bin/bash\n\n";
789 	int fd;
790 
791 	remove(sandbox);
792 
793 	fd = open(sandbox, O_RDWR | O_CREAT, 0777);
794 	if (fd < 0)
795 		die("Failed creating sandbox script");
796 
797 	if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0)
798 		die("Failed writing sandbox script");
799 
800 	while (argc) {
801 		if (write(fd, argv[0], strlen(argv[0])) <= 0)
802 			die("Failed writing sandbox script");
803 		if (argc - 1)
804 			if (write(fd, " ", 1) <= 0)
805 				die("Failed writing sandbox script");
806 		argv++;
807 		argc--;
808 	}
809 	if (write(fd, "\n", 1) <= 0)
810 		die("Failed writing sandbox script");
811 
812 	close(fd);
813 }
814 
815 int kvm_cmd_run(int argc, const char **argv, const char *prefix)
816 {
817 	static char real_cmdline[2048], default_name[20];
818 	struct framebuffer *fb = NULL;
819 	unsigned int nr_online_cpus;
820 	int exit_code = 0;
821 	int max_cpus, recommended_cpus;
822 	int i;
823 	void *ret;
824 
825 	signal(SIGALRM, handle_sigalrm);
826 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
827 	signal(SIGUSR1, handle_sigusr1);
828 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
829 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
830 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
831 
832 	nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
833 
834 	while (argc != 0) {
835 		argc = parse_options(argc, argv, options, run_usage,
836 				PARSE_OPT_STOP_AT_NON_OPTION |
837 				PARSE_OPT_KEEP_DASHDASH);
838 		if (argc != 0) {
839 			/* Cusrom options, should have been handled elsewhere */
840 			if (strcmp(argv[0], "--") == 0) {
841 				if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
842 					sandbox = DEFAULT_SANDBOX_FILENAME;
843 					kvm_run_write_sandbox_cmd(argv+1, argc-1);
844 					break;
845 				}
846 			}
847 
848 			if (kernel_filename) {
849 				fprintf(stderr, "Cannot handle parameter: "
850 						"%s\n", argv[0]);
851 				usage_with_options(run_usage, options);
852 				return EINVAL;
853 			}
854 			/* first unhandled parameter is treated as a kernel
855 			   image
856 			 */
857 			kernel_filename = argv[0];
858 			argv++;
859 			argc--;
860 		}
861 
862 	}
863 
864 	if (!kernel_filename)
865 		kernel_filename = find_kernel();
866 
867 	if (!kernel_filename) {
868 		kernel_usage_with_options();
869 		return EINVAL;
870 	}
871 
872 	vmlinux_filename = find_vmlinux();
873 
874 	if (nrcpus == 0)
875 		nrcpus = nr_online_cpus;
876 	else if (nrcpus < 1 || nrcpus > KVM_NR_CPUS)
877 		die("Number of CPUs %d is out of [1;%d] range", nrcpus, KVM_NR_CPUS);
878 
879 	if (!ram_size)
880 		ram_size	= get_ram_size(nrcpus);
881 
882 	if (ram_size < MIN_RAM_SIZE_MB)
883 		die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB);
884 
885 	if (ram_size > host_ram_size())
886 		pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size());
887 
888 	ram_size <<= MB_SHIFT;
889 
890 	if (!dev)
891 		dev = DEFAULT_KVM_DEV;
892 
893 	if (!console)
894 		console = DEFAULT_CONSOLE;
895 
896 	if (!strncmp(console, "virtio", 6))
897 		active_console  = CONSOLE_VIRTIO;
898 	else if (!strncmp(console, "serial", 6))
899 		active_console  = CONSOLE_8250;
900 	else if (!strncmp(console, "hv", 2))
901 		active_console = CONSOLE_HV;
902 	else
903 		pr_warning("No console!");
904 
905 	if (!host_ip)
906 		host_ip = DEFAULT_HOST_ADDR;
907 
908 	if (!guest_ip)
909 		guest_ip = DEFAULT_GUEST_ADDR;
910 
911 	if (!guest_mac)
912 		guest_mac = DEFAULT_GUEST_MAC;
913 
914 	if (!host_mac)
915 		host_mac = DEFAULT_HOST_MAC;
916 
917 	if (!script)
918 		script = DEFAULT_SCRIPT;
919 
920 	symbol__init(vmlinux_filename);
921 
922 	term_init();
923 
924 	if (!guest_name) {
925 		sprintf(default_name, "guest-%u", getpid());
926 		guest_name = default_name;
927 	}
928 
929 	kvm = kvm__init(dev, ram_size, guest_name);
930 
931 	kvm->single_step = single_step;
932 
933 	ioeventfd__init();
934 
935 	max_cpus = kvm__max_cpus(kvm);
936 	recommended_cpus = kvm__recommended_cpus(kvm);
937 
938 	if (nrcpus > max_cpus) {
939 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
940 		nrcpus = max_cpus;
941 	} else if (nrcpus > recommended_cpus) {
942 		printf("  # Warning: The maximum recommended amount of VCPUs"
943 			" is %d\n", recommended_cpus);
944 	}
945 
946 	kvm->nrcpus = nrcpus;
947 
948 	irq__init(kvm);
949 
950 	pci__init();
951 
952 	/*
953 	 * vidmode should be either specified
954 	 * either set by default
955 	 */
956 	if (vnc || sdl) {
957 		if (vidmode == -1)
958 			vidmode = 0x312;
959 	} else
960 		vidmode = 0;
961 
962 	memset(real_cmdline, 0, sizeof(real_cmdline));
963 	kvm__arch_set_cmdline(real_cmdline, vnc || sdl);
964 
965 	if (strlen(real_cmdline) > 0)
966 		strcat(real_cmdline, " ");
967 
968 	if (kernel_cmdline)
969 		strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline));
970 
971 	if (!using_rootfs && !image_filename[0]) {
972 		char tmp[PATH_MAX];
973 
974 		kvm_setup_create_new("default");
975 		kvm_setup_resolv("default");
976 
977 		snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default");
978 		if (virtio_9p__register(kvm, tmp, "/dev/root") < 0)
979 			die("Unable to initialize virtio 9p");
980 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
981 			die("Unable to initialize virtio 9p");
982 		using_rootfs = custom_rootfs = 1;
983 	}
984 
985 	if (using_rootfs) {
986 		strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p");
987 		if (custom_rootfs) {
988 			kvm_run_set_sandbox();
989 
990 			strcat(real_cmdline, " init=/virt/init");
991 
992 			if (!no_dhcp)
993 				strcat(real_cmdline, "  ip=dhcp");
994 			if (kvm_custom_stage2())
995 				die("Failed linking stage 2 of init.");
996 		}
997 	} else if (!strstr(real_cmdline, "root=")) {
998 		strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline));
999 	}
1000 
1001 	if (image_count) {
1002 		kvm->nr_disks = image_count;
1003 		kvm->disks    = disk_image__open_all(image_filename, readonly_image, image_count);
1004 		if (!kvm->disks)
1005 			die("Unable to load all disk images.");
1006 
1007 		virtio_blk__init_all(kvm);
1008 	}
1009 
1010 	printf("  # kvm run -k %s -m %Lu -c %d --name %s\n", kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name);
1011 
1012 	if (!kvm__load_kernel(kvm, kernel_filename, initrd_filename,
1013 				real_cmdline, vidmode))
1014 		die("unable to load kernel %s", kernel_filename);
1015 
1016 	kvm->vmlinux		= vmlinux_filename;
1017 
1018 	ioport__setup_arch();
1019 
1020 	rtc__init();
1021 
1022 	serial8250__init(kvm);
1023 
1024 	if (active_console == CONSOLE_VIRTIO)
1025 		virtio_console__init(kvm);
1026 
1027 	if (virtio_rng)
1028 		virtio_rng__init(kvm);
1029 
1030 	if (balloon)
1031 		virtio_bln__init(kvm);
1032 
1033 	if (!network)
1034 		network = DEFAULT_NETWORK;
1035 
1036 	virtio_9p__init(kvm);
1037 
1038 	for (i = 0; i < num_net_devices; i++) {
1039 		net_params[i].kvm = kvm;
1040 		virtio_net__init(&net_params[i]);
1041 	}
1042 
1043 	if (num_net_devices == 0 && no_net == 0) {
1044 		struct virtio_net_params net_params;
1045 
1046 		net_params = (struct virtio_net_params) {
1047 			.guest_ip	= guest_ip,
1048 			.host_ip	= host_ip,
1049 			.kvm		= kvm,
1050 			.script		= script,
1051 			.mode		= NET_MODE_USER,
1052 		};
1053 		str_to_mac(guest_mac, net_params.guest_mac);
1054 		str_to_mac(host_mac, net_params.host_mac);
1055 
1056 		virtio_net__init(&net_params);
1057 	}
1058 
1059 	kvm__init_ram(kvm);
1060 
1061 #ifdef CONFIG_X86
1062 	kbd__init(kvm);
1063 #endif
1064 
1065 	pci_shmem__init(kvm);
1066 
1067 	if (vnc || sdl)
1068 		fb = vesa__init(kvm);
1069 
1070 	if (vnc) {
1071 		if (fb)
1072 			vnc__init(fb);
1073 	}
1074 
1075 	if (sdl) {
1076 		if (fb)
1077 			sdl__init(fb);
1078 	}
1079 
1080 	fb__start();
1081 
1082 	/* Device init all done; firmware init must
1083 	 * come after this (it may set up device trees etc.)
1084 	 */
1085 
1086 	kvm__start_timer(kvm);
1087 
1088 	kvm__arch_setup_firmware(kvm);
1089 
1090 	for (i = 0; i < nrcpus; i++) {
1091 		kvm_cpus[i] = kvm_cpu__init(kvm, i);
1092 		if (!kvm_cpus[i])
1093 			die("unable to initialize KVM VCPU");
1094 	}
1095 
1096 	thread_pool__init(nr_online_cpus);
1097 	ioeventfd__start();
1098 
1099 	for (i = 0; i < nrcpus; i++) {
1100 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)
1101 			die("unable to create KVM VCPU thread");
1102 	}
1103 
1104 	/* Only VCPU #0 is going to exit by itself when shutting down */
1105 	if (pthread_join(kvm_cpus[0]->thread, &ret) != 0)
1106 		exit_code = 1;
1107 
1108 	for (i = 1; i < nrcpus; i++) {
1109 		if (kvm_cpus[i]->is_running) {
1110 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
1111 			if (pthread_join(kvm_cpus[i]->thread, &ret) != 0)
1112 				die("pthread_join");
1113 		}
1114 		if (ret != NULL)
1115 			exit_code = 1;
1116 	}
1117 
1118 	compat__print_all_messages();
1119 
1120 	fb__stop();
1121 
1122 	virtio_blk__delete_all(kvm);
1123 	virtio_rng__delete_all(kvm);
1124 
1125 	disk_image__close_all(kvm->disks, image_count);
1126 	kvm__delete(kvm);
1127 
1128 	if (!exit_code)
1129 		printf("\n  # KVM session ended normally.\n");
1130 
1131 	return exit_code;
1132 }
1133