xref: /kvmtool/builtin-run.c (revision 69205aa12bc4d49078ce129299c9d8f748a4f1c6)
1 #include "kvm/builtin-run.h"
2 
3 #include "kvm/builtin-setup.h"
4 #include "kvm/virtio-balloon.h"
5 #include "kvm/virtio-console.h"
6 #include "kvm/parse-options.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/framebuffer.h"
9 #include "kvm/disk-image.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/virtio-blk.h"
12 #include "kvm/virtio-net.h"
13 #include "kvm/virtio-rng.h"
14 #include "kvm/ioeventfd.h"
15 #include "kvm/virtio-9p.h"
16 #include "kvm/barrier.h"
17 #include "kvm/kvm-cpu.h"
18 #include "kvm/ioport.h"
19 #include "kvm/symbol.h"
20 #include "kvm/i8042.h"
21 #include "kvm/mutex.h"
22 #include "kvm/term.h"
23 #include "kvm/util.h"
24 #include "kvm/strbuf.h"
25 #include "kvm/vesa.h"
26 #include "kvm/irq.h"
27 #include "kvm/kvm.h"
28 #include "kvm/pci.h"
29 #include "kvm/rtc.h"
30 #include "kvm/sdl.h"
31 #include "kvm/vnc.h"
32 #include "kvm/guest_compat.h"
33 #include "kvm/pci-shmem.h"
34 #include "kvm/kvm-ipc.h"
35 #include "kvm/builtin-debug.h"
36 
37 #include <linux/types.h>
38 #include <linux/err.h>
39 
40 #include <sys/utsname.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <termios.h>
44 #include <signal.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <ctype.h>
49 #include <stdio.h>
50 
51 #define DEFAULT_KVM_DEV		"/dev/kvm"
52 #define DEFAULT_CONSOLE		"serial"
53 #define DEFAULT_NETWORK		"user"
54 #define DEFAULT_HOST_ADDR	"192.168.33.1"
55 #define DEFAULT_GUEST_ADDR	"192.168.33.15"
56 #define DEFAULT_GUEST_MAC	"02:15:15:15:15:15"
57 #define DEFAULT_HOST_MAC	"02:01:01:01:01:01"
58 #define DEFAULT_SCRIPT		"none"
59 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh";
60 
61 #define MB_SHIFT		(20)
62 #define KB_SHIFT		(10)
63 #define GB_SHIFT		(30)
64 #define MIN_RAM_SIZE_MB		(64ULL)
65 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
66 
67 struct kvm *kvm;
68 struct kvm_cpu **kvm_cpus;
69 __thread struct kvm_cpu *current_kvm_cpu;
70 
71 static u64 ram_size;
72 static u8  image_count;
73 static u8 num_net_devices;
74 static bool virtio_rng;
75 static const char *kernel_cmdline;
76 static const char *kernel_filename;
77 static const char *vmlinux_filename;
78 static const char *initrd_filename;
79 static const char *firmware_filename;
80 static const char *image_filename[MAX_DISK_IMAGES];
81 static const char *console;
82 static const char *dev;
83 static const char *network;
84 static const char *host_ip;
85 static const char *guest_ip;
86 static const char *guest_mac;
87 static const char *host_mac;
88 static const char *script;
89 static const char *guest_name;
90 static const char *sandbox;
91 static const char *hugetlbfs_path;
92 static const char *custom_rootfs_name = "default";
93 static struct virtio_net_params *net_params;
94 static bool single_step;
95 static bool readonly_image[MAX_DISK_IMAGES];
96 static bool vnc;
97 static bool sdl;
98 static bool balloon;
99 static bool using_rootfs;
100 static bool custom_rootfs;
101 static bool no_net;
102 static bool no_dhcp;
103 extern bool ioport_debug;
104 static int  kvm_run_wrapper;
105 extern int  active_console;
106 extern int  debug_iodelay;
107 
108 bool do_debug_print = false;
109 
110 static int nrcpus;
111 static int vidmode = -1;
112 
113 static const char * const run_usage[] = {
114 	"lkvm run [<options>] [<kernel image>]",
115 	NULL
116 };
117 
118 enum {
119 	KVM_RUN_DEFAULT,
120 	KVM_RUN_SANDBOX,
121 };
122 
123 void kvm_run_set_wrapper_sandbox(void)
124 {
125 	kvm_run_wrapper = KVM_RUN_SANDBOX;
126 }
127 
128 static int img_name_parser(const struct option *opt, const char *arg, int unset)
129 {
130 	char *sep;
131 	struct stat st;
132 	char path[PATH_MAX];
133 
134 	if (stat(arg, &st) == 0 &&
135 	    S_ISDIR(st.st_mode)) {
136 		char tmp[PATH_MAX];
137 
138 		if (using_rootfs)
139 			die("Please use only one rootfs directory atmost");
140 
141 		if (realpath(arg, tmp) == 0 ||
142 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
143 			die("Unable to initialize virtio 9p");
144 		using_rootfs = 1;
145 		return 0;
146 	}
147 
148 	snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg);
149 
150 	if (stat(path, &st) == 0 &&
151 	    S_ISDIR(st.st_mode)) {
152 		char tmp[PATH_MAX];
153 
154 		if (using_rootfs)
155 			die("Please use only one rootfs directory atmost");
156 
157 		if (realpath(path, tmp) == 0 ||
158 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
159 			die("Unable to initialize virtio 9p");
160 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
161 			die("Unable to initialize virtio 9p");
162 		kvm_setup_resolv(arg);
163 		using_rootfs = custom_rootfs = 1;
164 		custom_rootfs_name = arg;
165 		return 0;
166 	}
167 
168 	if (image_count >= MAX_DISK_IMAGES)
169 		die("Currently only 4 images are supported");
170 
171 	image_filename[image_count] = arg;
172 	sep = strstr(arg, ",");
173 	if (sep) {
174 		if (strcmp(sep + 1, "ro") == 0)
175 			readonly_image[image_count] = 1;
176 		*sep = 0;
177 	}
178 
179 	image_count++;
180 
181 	return 0;
182 }
183 
184 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset)
185 {
186 	char *tag_name;
187 	char tmp[PATH_MAX];
188 
189 	/*
190 	 * 9p dir can be of the form dirname,tag_name or
191 	 * just dirname. In the later case we use the
192 	 * default tag name
193 	 */
194 	tag_name = strstr(arg, ",");
195 	if (tag_name) {
196 		*tag_name = '\0';
197 		tag_name++;
198 	}
199 	if (realpath(arg, tmp)) {
200 		if (virtio_9p__register(kvm, tmp, tag_name) < 0)
201 			die("Unable to initialize virtio 9p");
202 	} else
203 		die("Failed resolving 9p path");
204 	return 0;
205 }
206 
207 static int tty_parser(const struct option *opt, const char *arg, int unset)
208 {
209 	int tty = atoi(arg);
210 
211 	term_set_tty(tty);
212 
213 	return 0;
214 }
215 
216 static inline void str_to_mac(const char *str, char *mac)
217 {
218 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
219 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
220 }
221 static int set_net_param(struct virtio_net_params *p, const char *param,
222 				const char *val)
223 {
224 	if (strcmp(param, "guest_mac") == 0) {
225 		str_to_mac(val, p->guest_mac);
226 	} else if (strcmp(param, "mode") == 0) {
227 		if (!strncmp(val, "user", 4)) {
228 			int i;
229 
230 			for (i = 0; i < num_net_devices; i++)
231 				if (net_params[i].mode == NET_MODE_USER)
232 					die("Only one usermode network device allowed at a time");
233 			p->mode = NET_MODE_USER;
234 		} else if (!strncmp(val, "tap", 3)) {
235 			p->mode = NET_MODE_TAP;
236 		} else if (!strncmp(val, "none", 4)) {
237 			no_net = 1;
238 			return -1;
239 		} else
240 			die("Unkown network mode %s, please use user, tap or none", network);
241 	} else if (strcmp(param, "script") == 0) {
242 		p->script = strdup(val);
243 	} else if (strcmp(param, "guest_ip") == 0) {
244 		p->guest_ip = strdup(val);
245 	} else if (strcmp(param, "host_ip") == 0) {
246 		p->host_ip = strdup(val);
247 	} else if (strcmp(param, "trans") == 0) {
248 		p->trans = strdup(val);
249 	} else if (strcmp(param, "vhost") == 0) {
250 		p->vhost = atoi(val);
251 	} else if (strcmp(param, "fd") == 0) {
252 		p->fd = atoi(val);
253 	}
254 
255 	return 0;
256 }
257 
258 static int netdev_parser(const struct option *opt, const char *arg, int unset)
259 {
260 	struct virtio_net_params p;
261 	char *buf = NULL, *cmd = NULL, *cur = NULL;
262 	bool on_cmd = true;
263 
264 	if (arg) {
265 		buf = strdup(arg);
266 		if (buf == NULL)
267 			die("Failed allocating new net buffer");
268 		cur = strtok(buf, ",=");
269 	}
270 
271 	p = (struct virtio_net_params) {
272 		.guest_ip	= DEFAULT_GUEST_ADDR,
273 		.host_ip	= DEFAULT_HOST_ADDR,
274 		.script		= DEFAULT_SCRIPT,
275 		.mode		= NET_MODE_TAP,
276 	};
277 
278 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
279 	p.guest_mac[5] += num_net_devices;
280 
281 	while (cur) {
282 		if (on_cmd) {
283 			cmd = cur;
284 		} else {
285 			if (set_net_param(&p, cmd, cur) < 0)
286 				goto done;
287 		}
288 		on_cmd = !on_cmd;
289 
290 		cur = strtok(NULL, ",=");
291 	};
292 
293 	num_net_devices++;
294 
295 	net_params = realloc(net_params, num_net_devices * sizeof(*net_params));
296 	if (net_params == NULL)
297 		die("Failed adding new network device");
298 
299 	net_params[num_net_devices - 1] = p;
300 
301 done:
302 	free(buf);
303 	return 0;
304 }
305 
306 static int shmem_parser(const struct option *opt, const char *arg, int unset)
307 {
308 	const u64 default_size = SHMEM_DEFAULT_SIZE;
309 	const u64 default_phys_addr = SHMEM_DEFAULT_ADDR;
310 	const char *default_handle = SHMEM_DEFAULT_HANDLE;
311 	struct shmem_info *si = malloc(sizeof(struct shmem_info));
312 	u64 phys_addr;
313 	u64 size;
314 	char *handle = NULL;
315 	int create = 0;
316 	const char *p = arg;
317 	char *next;
318 	int base = 10;
319 	int verbose = 0;
320 
321 	const int skip_pci = strlen("pci:");
322 	if (verbose)
323 		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
324 	/* parse out optional addr family */
325 	if (strcasestr(p, "pci:")) {
326 		p += skip_pci;
327 	} else if (strcasestr(p, "mem:")) {
328 		die("I can't add to E820 map yet.\n");
329 	}
330 	/* parse out physical addr */
331 	base = 10;
332 	if (strcasestr(p, "0x"))
333 		base = 16;
334 	phys_addr = strtoll(p, &next, base);
335 	if (next == p && phys_addr == 0) {
336 		pr_info("shmem: no physical addr specified, using default.");
337 		phys_addr = default_phys_addr;
338 	}
339 	if (*next != ':' && *next != '\0')
340 		die("shmem: unexpected chars after phys addr.\n");
341 	if (*next == '\0')
342 		p = next;
343 	else
344 		p = next + 1;
345 	/* parse out size */
346 	base = 10;
347 	if (strcasestr(p, "0x"))
348 		base = 16;
349 	size = strtoll(p, &next, base);
350 	if (next == p && size == 0) {
351 		pr_info("shmem: no size specified, using default.");
352 		size = default_size;
353 	}
354 	/* look for [KMGkmg][Bb]*  uses base 2. */
355 	int skip_B = 0;
356 	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
357 		if (*(next + 1) == 'B' || *(next + 1) == 'b')
358 			skip_B = 1;
359 		switch (*next) {
360 		case 'K':
361 		case 'k':
362 			size = size << KB_SHIFT;
363 			break;
364 		case 'M':
365 		case 'm':
366 			size = size << MB_SHIFT;
367 			break;
368 		case 'G':
369 		case 'g':
370 			size = size << GB_SHIFT;
371 			break;
372 		default:
373 			die("shmem: bug in detecting size prefix.");
374 			break;
375 		}
376 		next += 1 + skip_B;
377 	}
378 	if (*next != ':' && *next != '\0') {
379 		die("shmem: unexpected chars after phys size. <%c><%c>\n",
380 		    *next, *p);
381 	}
382 	if (*next == '\0')
383 		p = next;
384 	else
385 		p = next + 1;
386 	/* parse out optional shmem handle */
387 	const int skip_handle = strlen("handle=");
388 	next = strcasestr(p, "handle=");
389 	if (*p && next) {
390 		if (p != next)
391 			die("unexpected chars before handle\n");
392 		p += skip_handle;
393 		next = strchrnul(p, ':');
394 		if (next - p) {
395 			handle = malloc(next - p + 1);
396 			strncpy(handle, p, next - p);
397 			handle[next - p] = '\0';	/* just in case. */
398 		}
399 		if (*next == '\0')
400 			p = next;
401 		else
402 			p = next + 1;
403 	}
404 	/* parse optional create flag to see if we should create shm seg. */
405 	if (*p && strcasestr(p, "create")) {
406 		create = 1;
407 		p += strlen("create");
408 	}
409 	if (*p != '\0')
410 		die("shmem: unexpected trailing chars\n");
411 	if (handle == NULL) {
412 		handle = malloc(strlen(default_handle) + 1);
413 		strcpy(handle, default_handle);
414 	}
415 	if (verbose) {
416 		pr_info("shmem: phys_addr = %llx", phys_addr);
417 		pr_info("shmem: size      = %llx", size);
418 		pr_info("shmem: handle    = %s", handle);
419 		pr_info("shmem: create    = %d", create);
420 	}
421 
422 	si->phys_addr = phys_addr;
423 	si->size = size;
424 	si->handle = handle;
425 	si->create = create;
426 	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
427 	return 0;
428 }
429 
430 static const struct option options[] = {
431 	OPT_GROUP("Basic options:"),
432 	OPT_STRING('\0', "name", &guest_name, "guest name",
433 			"A name for the guest"),
434 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),
435 	OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."),
436 	OPT_CALLBACK('\0', "shmem", NULL,
437 		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",
438 		     "Share host shmem with guest via pci device",
439 		     shmem_parser),
440 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser),
441 	OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"),
442 	OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"),
443 	OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"),
444 	OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"),
445 	OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",
446 		     "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser),
447 	OPT_STRING('\0', "console", &console, "serial, virtio or hv",
448 			"Console to use"),
449 	OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"),
450 	OPT_CALLBACK('\0', "tty", NULL, "tty id",
451 		     "Remap guest TTY into a pty on the host",
452 		     tty_parser),
453 	OPT_STRING('\0', "sandbox", &sandbox, "script",
454 			"Run this script when booting into custom rootfs"),
455 	OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"),
456 
457 	OPT_GROUP("Kernel options:"),
458 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
459 			"Kernel to boot in virtual machine"),
460 	OPT_STRING('i', "initrd", &initrd_filename, "initrd",
461 			"Initial RAM disk image"),
462 	OPT_STRING('p', "params", &kernel_cmdline, "params",
463 			"Kernel command line arguments"),
464 	OPT_STRING('f', "firmware", &firmware_filename, "firmware",
465 			"Firmware image to boot in virtual machine"),
466 
467 	OPT_GROUP("Networking options:"),
468 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",
469 		     "Create a new guest NIC",
470 		     netdev_parser, NULL),
471 	OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"),
472 
473 	OPT_GROUP("BIOS options:"),
474 	OPT_INTEGER('\0', "vidmode", &vidmode,
475 		    "Video mode"),
476 
477 	OPT_GROUP("Debug options:"),
478 	OPT_BOOLEAN('\0', "debug", &do_debug_print,
479 			"Enable debug messages"),
480 	OPT_BOOLEAN('\0', "debug-single-step", &single_step,
481 			"Enable single stepping"),
482 	OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug,
483 			"Enable ioport debugging"),
484 	OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay,
485 			"Delay IO by millisecond"),
486 	OPT_END()
487 };
488 
489 /*
490  * Serialize debug printout so that the output of multiple vcpus does not
491  * get mixed up:
492  */
493 static int printout_done;
494 
495 static void handle_sigusr1(int sig)
496 {
497 	struct kvm_cpu *cpu = current_kvm_cpu;
498 	int fd = kvm_cpu__get_debug_fd();
499 
500 	if (!cpu || cpu->needs_nmi)
501 		return;
502 
503 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
504 	kvm_cpu__show_registers(cpu);
505 	kvm_cpu__show_code(cpu);
506 	kvm_cpu__show_page_tables(cpu);
507 	fflush(stdout);
508 	printout_done = 1;
509 	mb();
510 }
511 
512 /* Pause/resume the guest using SIGUSR2 */
513 static int is_paused;
514 
515 static void handle_pause(int fd, u32 type, u32 len, u8 *msg)
516 {
517 	if (WARN_ON(len))
518 		return;
519 
520 	if (type == KVM_IPC_RESUME && is_paused) {
521 		kvm->vm_state = KVM_VMSTATE_RUNNING;
522 		kvm__continue();
523 	} else if (type == KVM_IPC_PAUSE && !is_paused) {
524 		kvm->vm_state = KVM_VMSTATE_PAUSED;
525 		kvm__pause();
526 	} else {
527 		return;
528 	}
529 
530 	is_paused = !is_paused;
531 }
532 
533 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg)
534 {
535 	int r = 0;
536 
537 	if (type == KVM_IPC_VMSTATE)
538 		r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state));
539 
540 	if (r < 0)
541 		pr_warning("Failed sending VMSTATE");
542 }
543 
544 static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
545 {
546 	int i;
547 	struct debug_cmd_params *params;
548 	u32 dbg_type;
549 	u32 vcpu;
550 
551 	if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params)))
552 		return;
553 
554 	params = (void *)msg;
555 	dbg_type = params->dbg_type;
556 	vcpu = params->cpu;
557 
558 	if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) {
559 		if ((int)vcpu >= kvm->nrcpus)
560 			return;
561 
562 		kvm_cpus[vcpu]->needs_nmi = 1;
563 		pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1);
564 	}
565 
566 	if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP))
567 		return;
568 
569 	for (i = 0; i < nrcpus; i++) {
570 		struct kvm_cpu *cpu = kvm_cpus[i];
571 
572 		if (!cpu)
573 			continue;
574 
575 		printout_done = 0;
576 
577 		kvm_cpu__set_debug_fd(fd);
578 		pthread_kill(cpu->thread, SIGUSR1);
579 		/*
580 		 * Wait for the vCPU to dump state before signalling
581 		 * the next thread. Since this is debug code it does
582 		 * not matter that we are burning CPU time a bit:
583 		 */
584 		while (!printout_done)
585 			mb();
586 	}
587 
588 	close(fd);
589 
590 	serial8250__inject_sysrq(kvm);
591 }
592 
593 static void handle_sigalrm(int sig)
594 {
595 	kvm__arch_periodic_poll(kvm);
596 }
597 
598 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
599 {
600 	if (WARN_ON(type != KVM_IPC_STOP || len))
601 		return;
602 
603 	kvm_cpu__reboot();
604 }
605 
606 static void *kvm_cpu_thread(void *arg)
607 {
608 	current_kvm_cpu		= arg;
609 
610 	if (kvm_cpu__start(current_kvm_cpu))
611 		goto panic_kvm;
612 
613 	return (void *) (intptr_t) 0;
614 
615 panic_kvm:
616 	fprintf(stderr, "KVM exit reason: %u (\"%s\")\n",
617 		current_kvm_cpu->kvm_run->exit_reason,
618 		kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]);
619 	if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
620 		fprintf(stderr, "KVM exit code: 0x%Lu\n",
621 			current_kvm_cpu->kvm_run->hw.hardware_exit_reason);
622 
623 	kvm_cpu__set_debug_fd(STDOUT_FILENO);
624 	kvm_cpu__show_registers(current_kvm_cpu);
625 	kvm_cpu__show_code(current_kvm_cpu);
626 	kvm_cpu__show_page_tables(current_kvm_cpu);
627 
628 	return (void *) (intptr_t) 1;
629 }
630 
631 static char kernel[PATH_MAX];
632 
633 static const char *host_kernels[] = {
634 	"/boot/vmlinuz",
635 	"/boot/bzImage",
636 	NULL
637 };
638 
639 static const char *default_kernels[] = {
640 	"./bzImage",
641 	"arch/" BUILD_ARCH "/boot/bzImage",
642 	"../../arch/" BUILD_ARCH "/boot/bzImage",
643 	NULL
644 };
645 
646 static const char *default_vmlinux[] = {
647 	"vmlinux",
648 	"../../../vmlinux",
649 	"../../vmlinux",
650 	NULL
651 };
652 
653 static void kernel_usage_with_options(void)
654 {
655 	const char **k;
656 	struct utsname uts;
657 
658 	fprintf(stderr, "Fatal: could not find default kernel image in:\n");
659 	k = &default_kernels[0];
660 	while (*k) {
661 		fprintf(stderr, "\t%s\n", *k);
662 		k++;
663 	}
664 
665 	if (uname(&uts) < 0)
666 		return;
667 
668 	k = &host_kernels[0];
669 	while (*k) {
670 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
671 			return;
672 		fprintf(stderr, "\t%s\n", kernel);
673 		k++;
674 	}
675 	fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n",
676 		KVM_BINARY_NAME);
677 }
678 
679 static u64 host_ram_size(void)
680 {
681 	long page_size;
682 	long nr_pages;
683 
684 	nr_pages	= sysconf(_SC_PHYS_PAGES);
685 	if (nr_pages < 0) {
686 		pr_warning("sysconf(_SC_PHYS_PAGES) failed");
687 		return 0;
688 	}
689 
690 	page_size	= sysconf(_SC_PAGE_SIZE);
691 	if (page_size < 0) {
692 		pr_warning("sysconf(_SC_PAGE_SIZE) failed");
693 		return 0;
694 	}
695 
696 	return (nr_pages * page_size) >> MB_SHIFT;
697 }
698 
699 /*
700  * If user didn't specify how much memory it wants to allocate for the guest,
701  * avoid filling the whole host RAM.
702  */
703 #define RAM_SIZE_RATIO		0.8
704 
705 static u64 get_ram_size(int nr_cpus)
706 {
707 	u64 available;
708 	u64 ram_size;
709 
710 	ram_size	= 64 * (nr_cpus + 3);
711 
712 	available	= host_ram_size() * RAM_SIZE_RATIO;
713 	if (!available)
714 		available = MIN_RAM_SIZE_MB;
715 
716 	if (ram_size > available)
717 		ram_size	= available;
718 
719 	return ram_size;
720 }
721 
722 static const char *find_kernel(void)
723 {
724 	const char **k;
725 	struct stat st;
726 	struct utsname uts;
727 
728 	k = &default_kernels[0];
729 	while (*k) {
730 		if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) {
731 			k++;
732 			continue;
733 		}
734 		strncpy(kernel, *k, PATH_MAX);
735 		return kernel;
736 	}
737 
738 	if (uname(&uts) < 0)
739 		return NULL;
740 
741 	k = &host_kernels[0];
742 	while (*k) {
743 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
744 			return NULL;
745 
746 		if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) {
747 			k++;
748 			continue;
749 		}
750 		return kernel;
751 
752 	}
753 	return NULL;
754 }
755 
756 static const char *find_vmlinux(void)
757 {
758 	const char **vmlinux;
759 
760 	vmlinux = &default_vmlinux[0];
761 	while (*vmlinux) {
762 		struct stat st;
763 
764 		if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) {
765 			vmlinux++;
766 			continue;
767 		}
768 		return *vmlinux;
769 	}
770 	return NULL;
771 }
772 
773 void kvm_run_help(void)
774 {
775 	usage_with_options(run_usage, options);
776 }
777 
778 static int kvm_custom_stage2(void)
779 {
780 	char tmp[PATH_MAX], dst[PATH_MAX], *src;
781 	const char *rootfs = custom_rootfs_name;
782 	int r;
783 
784 	src = realpath("guest/init_stage2", NULL);
785 	if (src == NULL)
786 		return -ENOMEM;
787 
788 	snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs);
789 	remove(tmp);
790 
791 	snprintf(dst, PATH_MAX, "/host/%s", src);
792 	r = symlink(dst, tmp);
793 	free(src);
794 
795 	return r;
796 }
797 
798 static int kvm_run_set_sandbox(void)
799 {
800 	const char *guestfs_name = custom_rootfs_name;
801 	char path[PATH_MAX], script[PATH_MAX], *tmp;
802 
803 	snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name);
804 
805 	remove(path);
806 
807 	if (sandbox == NULL)
808 		return 0;
809 
810 	tmp = realpath(sandbox, NULL);
811 	if (tmp == NULL)
812 		return -ENOMEM;
813 
814 	snprintf(script, PATH_MAX, "/host/%s", tmp);
815 	free(tmp);
816 
817 	return symlink(script, path);
818 }
819 
820 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg)
821 {
822 	const char *single_quote;
823 
824 	if (!*arg) { /* zero length string */
825 		if (write(fd, "''", 2) <= 0)
826 			die("Failed writing sandbox script");
827 		return;
828 	}
829 
830 	while (*arg) {
831 		single_quote = strchrnul(arg, '\'');
832 
833 		/* write non-single-quote string as #('string') */
834 		if (arg != single_quote) {
835 			if (write(fd, "'", 1) <= 0 ||
836 			    write(fd, arg, single_quote - arg) <= 0 ||
837 			    write(fd, "'", 1) <= 0)
838 				die("Failed writing sandbox script");
839 		}
840 
841 		/* write single quote as #("'") */
842 		if (*single_quote) {
843 			if (write(fd, "\"'\"", 3) <= 0)
844 				die("Failed writing sandbox script");
845 		} else
846 			break;
847 
848 		arg = single_quote + 1;
849 	}
850 }
851 
852 static void resolve_program(const char *src, char *dst, size_t len)
853 {
854 	struct stat st;
855 	int err;
856 
857 	err = stat(src, &st);
858 
859 	if (!err && S_ISREG(st.st_mode)) {
860 		char resolved_path[PATH_MAX];
861 
862 		if (!realpath(src, resolved_path))
863 			die("Unable to resolve program %s: %s\n", src, strerror(errno));
864 
865 		snprintf(dst, len, "/host%s", resolved_path);
866 	} else
867 		strncpy(dst, src, len);
868 }
869 
870 static void kvm_run_write_sandbox_cmd(const char **argv, int argc)
871 {
872 	const char script_hdr[] = "#! /bin/bash\n\n";
873 	char program[PATH_MAX];
874 	int fd;
875 
876 	remove(sandbox);
877 
878 	fd = open(sandbox, O_RDWR | O_CREAT, 0777);
879 	if (fd < 0)
880 		die("Failed creating sandbox script");
881 
882 	if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0)
883 		die("Failed writing sandbox script");
884 
885 	resolve_program(argv[0], program, PATH_MAX);
886 	kvm_write_sandbox_cmd_exactly(fd, program);
887 
888 	argv++;
889 	argc--;
890 
891 	while (argc) {
892 		if (write(fd, " ", 1) <= 0)
893 			die("Failed writing sandbox script");
894 
895 		kvm_write_sandbox_cmd_exactly(fd, argv[0]);
896 		argv++;
897 		argc--;
898 	}
899 	if (write(fd, "\n", 1) <= 0)
900 		die("Failed writing sandbox script");
901 
902 	close(fd);
903 }
904 
905 static int kvm_cmd_run_init(int argc, const char **argv)
906 {
907 	static char real_cmdline[2048], default_name[20];
908 	struct framebuffer *fb = NULL;
909 	unsigned int nr_online_cpus;
910 	int max_cpus, recommended_cpus;
911 	int i, r;
912 
913 	signal(SIGALRM, handle_sigalrm);
914 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
915 	signal(SIGUSR1, handle_sigusr1);
916 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
917 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
918 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
919 	kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate);
920 
921 	nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
922 
923 	while (argc != 0) {
924 		argc = parse_options(argc, argv, options, run_usage,
925 				PARSE_OPT_STOP_AT_NON_OPTION |
926 				PARSE_OPT_KEEP_DASHDASH);
927 		if (argc != 0) {
928 			/* Cusrom options, should have been handled elsewhere */
929 			if (strcmp(argv[0], "--") == 0) {
930 				if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
931 					sandbox = DEFAULT_SANDBOX_FILENAME;
932 					kvm_run_write_sandbox_cmd(argv+1, argc-1);
933 					break;
934 				}
935 			}
936 
937 			if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) ||
938 				(kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) {
939 				fprintf(stderr, "Cannot handle parameter: "
940 						"%s\n", argv[0]);
941 				usage_with_options(run_usage, options);
942 				return EINVAL;
943 			}
944 			if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
945 				/*
946 				 * first unhandled parameter is treated as
947 				 * sandbox command
948 				 */
949 				sandbox = DEFAULT_SANDBOX_FILENAME;
950 				kvm_run_write_sandbox_cmd(argv, argc);
951 			} else {
952 				/*
953 				 * first unhandled parameter is treated as a kernel
954 				 * image
955 				 */
956 				kernel_filename = argv[0];
957 			}
958 			argv++;
959 			argc--;
960 		}
961 
962 	}
963 
964 	if (!kernel_filename)
965 		kernel_filename = find_kernel();
966 
967 	if (!kernel_filename) {
968 		kernel_usage_with_options();
969 		return EINVAL;
970 	}
971 
972 	vmlinux_filename = find_vmlinux();
973 
974 	if (nrcpus == 0)
975 		nrcpus = nr_online_cpus;
976 
977 	if (!ram_size)
978 		ram_size	= get_ram_size(nrcpus);
979 
980 	if (ram_size < MIN_RAM_SIZE_MB)
981 		die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB);
982 
983 	if (ram_size > host_ram_size())
984 		pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size());
985 
986 	ram_size <<= MB_SHIFT;
987 
988 	if (!dev)
989 		dev = DEFAULT_KVM_DEV;
990 
991 	if (!console)
992 		console = DEFAULT_CONSOLE;
993 
994 	if (!strncmp(console, "virtio", 6))
995 		active_console  = CONSOLE_VIRTIO;
996 	else if (!strncmp(console, "serial", 6))
997 		active_console  = CONSOLE_8250;
998 	else if (!strncmp(console, "hv", 2))
999 		active_console = CONSOLE_HV;
1000 	else
1001 		pr_warning("No console!");
1002 
1003 	if (!host_ip)
1004 		host_ip = DEFAULT_HOST_ADDR;
1005 
1006 	if (!guest_ip)
1007 		guest_ip = DEFAULT_GUEST_ADDR;
1008 
1009 	if (!guest_mac)
1010 		guest_mac = DEFAULT_GUEST_MAC;
1011 
1012 	if (!host_mac)
1013 		host_mac = DEFAULT_HOST_MAC;
1014 
1015 	if (!script)
1016 		script = DEFAULT_SCRIPT;
1017 
1018 	term_init();
1019 
1020 	if (!guest_name) {
1021 		if (custom_rootfs) {
1022 			guest_name = custom_rootfs_name;
1023 		} else {
1024 			sprintf(default_name, "guest-%u", getpid());
1025 			guest_name = default_name;
1026 		}
1027 	}
1028 
1029 	kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name);
1030 	if (IS_ERR(kvm)) {
1031 		r = PTR_ERR(kvm);
1032 		goto fail;
1033 	}
1034 
1035 	kvm->single_step = single_step;
1036 
1037 	r = ioeventfd__init(kvm);
1038 	if (r < 0) {
1039 		pr_err("ioeventfd__init() failed with error %d\n", r);
1040 		goto fail;
1041 	}
1042 
1043 	max_cpus = kvm__max_cpus(kvm);
1044 	recommended_cpus = kvm__recommended_cpus(kvm);
1045 
1046 	if (nrcpus > max_cpus) {
1047 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
1048 		nrcpus = max_cpus;
1049 	} else if (nrcpus > recommended_cpus) {
1050 		printf("  # Warning: The maximum recommended amount of VCPUs"
1051 			" is %d\n", recommended_cpus);
1052 	}
1053 
1054 	kvm->nrcpus = nrcpus;
1055 
1056 	/* Alloc one pointer too many, so array ends up 0-terminated */
1057 	kvm_cpus = calloc(nrcpus + 1, sizeof(void *));
1058 	if (!kvm_cpus)
1059 		die("Couldn't allocate array for %d CPUs", nrcpus);
1060 
1061 	r = irq__init(kvm);
1062 	if (r < 0) {
1063 		pr_err("irq__init() failed with error %d\n", r);
1064 		goto fail;
1065 	}
1066 
1067 	r = pci__init(kvm);
1068 	if (r < 0) {
1069 		pr_err("pci__init() failed with error %d\n", r);
1070 		goto fail;
1071 	}
1072 
1073 	r = ioport__init(kvm);
1074 	if (r < 0) {
1075 		pr_err("ioport__init() failed with error %d\n", r);
1076 		goto fail;
1077 	}
1078 
1079 	/*
1080 	 * vidmode should be either specified
1081 	 * either set by default
1082 	 */
1083 	if (vnc || sdl) {
1084 		if (vidmode == -1)
1085 			vidmode = 0x312;
1086 	} else {
1087 		vidmode = 0;
1088 	}
1089 
1090 	memset(real_cmdline, 0, sizeof(real_cmdline));
1091 	kvm__arch_set_cmdline(real_cmdline, vnc || sdl);
1092 
1093 	if (strlen(real_cmdline) > 0)
1094 		strcat(real_cmdline, " ");
1095 
1096 	if (kernel_cmdline)
1097 		strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline));
1098 
1099 	if (!using_rootfs && !image_filename[0] && !initrd_filename) {
1100 		char tmp[PATH_MAX];
1101 
1102 		kvm_setup_create_new(custom_rootfs_name);
1103 		kvm_setup_resolv(custom_rootfs_name);
1104 
1105 		snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default");
1106 		if (virtio_9p__register(kvm, tmp, "/dev/root") < 0)
1107 			die("Unable to initialize virtio 9p");
1108 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
1109 			die("Unable to initialize virtio 9p");
1110 		using_rootfs = custom_rootfs = 1;
1111 	}
1112 
1113 	if (using_rootfs) {
1114 		strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p");
1115 		if (custom_rootfs) {
1116 			kvm_run_set_sandbox();
1117 
1118 			strcat(real_cmdline, " init=/virt/init");
1119 
1120 			if (!no_dhcp)
1121 				strcat(real_cmdline, "  ip=dhcp");
1122 			if (kvm_custom_stage2())
1123 				die("Failed linking stage 2 of init.");
1124 		}
1125 	} else if (!strstr(real_cmdline, "root=")) {
1126 		strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline));
1127 	}
1128 
1129 	if (image_count) {
1130 		kvm->nr_disks = image_count;
1131 		kvm->disks = disk_image__open_all(image_filename, readonly_image, image_count);
1132 		if (IS_ERR(kvm->disks)) {
1133 			r = PTR_ERR(kvm->disks);
1134 			pr_err("disk_image__open_all() failed with error %ld\n",
1135 					PTR_ERR(kvm->disks));
1136 			goto fail;
1137 		}
1138 	}
1139 
1140 	printf("  # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME,
1141 		kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name);
1142 
1143 	if (!firmware_filename) {
1144 		if (!kvm__load_kernel(kvm, kernel_filename,
1145 				initrd_filename, real_cmdline, vidmode))
1146 			die("unable to load kernel %s", kernel_filename);
1147 
1148 		kvm->vmlinux = vmlinux_filename;
1149 		r = symbol_init(kvm);
1150 		if (r < 0)
1151 			pr_debug("symbol_init() failed with error %d\n", r);
1152 	}
1153 
1154 	ioport__setup_arch();
1155 
1156 	r = rtc__init(kvm);
1157 	if (r < 0) {
1158 		pr_err("rtc__init() failed with error %d\n", r);
1159 		goto fail;
1160 	}
1161 
1162 	r = serial8250__init(kvm);
1163 	if (r < 0) {
1164 		pr_err("serial__init() failed with error %d\n", r);
1165 		goto fail;
1166 	}
1167 
1168 	r = virtio_blk__init(kvm);
1169 	if (r < 0) {
1170 		pr_err("virtio_blk__init() failed with error %d\n", r);
1171 		goto fail;
1172 	}
1173 
1174 	if (active_console == CONSOLE_VIRTIO)
1175 		virtio_console__init(kvm);
1176 
1177 	if (virtio_rng)
1178 		virtio_rng__init(kvm);
1179 
1180 	if (balloon)
1181 		virtio_bln__init(kvm);
1182 
1183 	if (!network)
1184 		network = DEFAULT_NETWORK;
1185 
1186 	virtio_9p__init(kvm);
1187 
1188 	for (i = 0; i < num_net_devices; i++) {
1189 		net_params[i].kvm = kvm;
1190 		virtio_net__init(&net_params[i]);
1191 	}
1192 
1193 	if (num_net_devices == 0 && no_net == 0) {
1194 		struct virtio_net_params net_params;
1195 
1196 		net_params = (struct virtio_net_params) {
1197 			.guest_ip	= guest_ip,
1198 			.host_ip	= host_ip,
1199 			.kvm		= kvm,
1200 			.script		= script,
1201 			.mode		= NET_MODE_USER,
1202 		};
1203 		str_to_mac(guest_mac, net_params.guest_mac);
1204 		str_to_mac(host_mac, net_params.host_mac);
1205 
1206 		virtio_net__init(&net_params);
1207 	}
1208 
1209 	kvm__init_ram(kvm);
1210 
1211 #ifdef CONFIG_X86
1212 	kbd__init(kvm);
1213 #endif
1214 
1215 	pci_shmem__init(kvm);
1216 
1217 	if (vnc || sdl) {
1218 		fb = vesa__init(kvm);
1219 		if (IS_ERR(fb)) {
1220 			pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb));
1221 			goto fail;
1222 		}
1223 	}
1224 
1225 	if (vnc && fb) {
1226 		r = vnc__init(fb);
1227 		if (r < 0) {
1228 			pr_err("vnc__init() failed with error %d\n", r);
1229 			goto fail;
1230 		}
1231 	}
1232 
1233 	if (sdl && fb) {
1234 		sdl__init(fb);
1235 		if (r < 0) {
1236 			pr_err("sdl__init() failed with error %d\n", r);
1237 			goto fail;
1238 		}
1239 	}
1240 
1241 	r = fb__start();
1242 	if (r < 0) {
1243 		pr_err("fb__init() failed with error %d\n", r);
1244 		goto fail;
1245 	}
1246 
1247 	/* Device init all done; firmware init must
1248 	 * come after this (it may set up device trees etc.)
1249 	 */
1250 
1251 	kvm__start_timer(kvm);
1252 
1253 	if (firmware_filename) {
1254 		if (!kvm__load_firmware(kvm, firmware_filename))
1255 			die("unable to load firmware image %s: %s", firmware_filename, strerror(errno));
1256 	} else {
1257 		kvm__arch_setup_firmware(kvm);
1258 		if (r < 0) {
1259 			pr_err("kvm__arch_setup_firmware() failed with error %d\n", r);
1260 			goto fail;
1261 		}
1262 	}
1263 
1264 	for (i = 0; i < nrcpus; i++) {
1265 		kvm_cpus[i] = kvm_cpu__init(kvm, i);
1266 		if (!kvm_cpus[i])
1267 			die("unable to initialize KVM VCPU");
1268 	}
1269 
1270 	thread_pool__init(nr_online_cpus);
1271 fail:
1272 	return r;
1273 }
1274 
1275 static int kvm_cmd_run_work(void)
1276 {
1277 	int i, r = -1;
1278 	void *ret = NULL;
1279 
1280 	for (i = 0; i < nrcpus; i++) {
1281 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)
1282 			die("unable to create KVM VCPU thread");
1283 	}
1284 
1285 	/* Only VCPU #0 is going to exit by itself when shutting down */
1286 	if (pthread_join(kvm_cpus[0]->thread, &ret) != 0)
1287 		r = 0;
1288 
1289 	kvm_cpu__delete(kvm_cpus[0]);
1290 	kvm_cpus[0] = NULL;
1291 
1292 	for (i = 1; i < nrcpus; i++) {
1293 		if (kvm_cpus[i]->is_running) {
1294 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
1295 			if (pthread_join(kvm_cpus[i]->thread, &ret) != 0)
1296 				die("pthread_join");
1297 			kvm_cpu__delete(kvm_cpus[i]);
1298 		}
1299 		if (ret == NULL)
1300 			r = 0;
1301 	}
1302 
1303 	return r;
1304 }
1305 
1306 static void kvm_cmd_run_exit(int guest_ret)
1307 {
1308 	int r = 0;
1309 
1310 	compat__print_all_messages();
1311 
1312 	r = symbol_exit(kvm);
1313 	if (r < 0)
1314 		pr_warning("symbol_exit() failed with error %d\n", r);
1315 
1316 	r = irq__exit(kvm);
1317 	if (r < 0)
1318 		pr_warning("irq__exit() failed with error %d\n", r);
1319 
1320 	fb__stop();
1321 
1322 	r = virtio_blk__exit(kvm);
1323 	if (r < 0)
1324 		pr_warning("virtio_blk__exit() failed with error %d\n", r);
1325 
1326 	r = virtio_rng__exit(kvm);
1327 	if (r < 0)
1328 		pr_warning("virtio_rng__exit() failed with error %d\n", r);
1329 
1330 	r = disk_image__close_all(kvm->disks, image_count);
1331 	if (r < 0)
1332 		pr_warning("disk_image__close_all() failed with error %d\n", r);
1333 
1334 	r = serial8250__exit(kvm);
1335 	if (r < 0)
1336 		pr_warning("serial8250__exit() failed with error %d\n", r);
1337 
1338 	r = rtc__exit(kvm);
1339 	if (r < 0)
1340 		pr_warning("rtc__exit() failed with error %d\n", r);
1341 
1342 	r = kvm__arch_free_firmware(kvm);
1343 	if (r < 0)
1344 		pr_warning("kvm__arch_free_firmware() failed with error %d\n", r);
1345 
1346 	r = ioport__exit(kvm);
1347 	if (r < 0)
1348 		pr_warning("ioport__exit() failed with error %d\n", r);
1349 
1350 	r = ioeventfd__exit(kvm);
1351 	if (r < 0)
1352 		pr_warning("ioeventfd__exit() failed with error %d\n", r);
1353 
1354 	r = pci__exit(kvm);
1355 	if (r < 0)
1356 		pr_warning("pci__exit() failed with error %d\n", r);
1357 
1358 	r = kvm__exit(kvm);
1359 	if (r < 0)
1360 		pr_warning("pci__exit() failed with error %d\n", r);
1361 
1362 	free(kvm_cpus);
1363 
1364 	if (guest_ret == 0)
1365 		printf("\n  # KVM session ended normally.\n");
1366 }
1367 
1368 int kvm_cmd_run(int argc, const char **argv, const char *prefix)
1369 {
1370 	int r, ret = -EFAULT;
1371 
1372 	r = kvm_cmd_run_init(argc, argv);
1373 	if (r < 0)
1374 		return r;
1375 
1376 	ret = kvm_cmd_run_work();
1377 	kvm_cmd_run_exit(ret);
1378 
1379 	return ret;
1380 }
1381