xref: /kvmtool/builtin-run.c (revision 5ad8db5edcb2bed4bef6924c286d7575c4cae72e)
1 #include "kvm/builtin-run.h"
2 
3 #include "kvm/builtin-setup.h"
4 #include "kvm/virtio-balloon.h"
5 #include "kvm/virtio-console.h"
6 #include "kvm/parse-options.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/framebuffer.h"
9 #include "kvm/disk-image.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/virtio-blk.h"
12 #include "kvm/virtio-net.h"
13 #include "kvm/virtio-rng.h"
14 #include "kvm/ioeventfd.h"
15 #include "kvm/virtio-9p.h"
16 #include "kvm/barrier.h"
17 #include "kvm/kvm-cpu.h"
18 #include "kvm/ioport.h"
19 #include "kvm/symbol.h"
20 #include "kvm/i8042.h"
21 #include "kvm/mutex.h"
22 #include "kvm/term.h"
23 #include "kvm/util.h"
24 #include "kvm/strbuf.h"
25 #include "kvm/vesa.h"
26 #include "kvm/irq.h"
27 #include "kvm/kvm.h"
28 #include "kvm/pci.h"
29 #include "kvm/rtc.h"
30 #include "kvm/sdl.h"
31 #include "kvm/vnc.h"
32 #include "kvm/guest_compat.h"
33 #include "kvm/pci-shmem.h"
34 #include "kvm/kvm-ipc.h"
35 #include "kvm/builtin-debug.h"
36 
37 #include <linux/types.h>
38 #include <linux/err.h>
39 
40 #include <sys/utsname.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <termios.h>
44 #include <signal.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <ctype.h>
49 #include <stdio.h>
50 
51 #define DEFAULT_KVM_DEV		"/dev/kvm"
52 #define DEFAULT_CONSOLE		"serial"
53 #define DEFAULT_NETWORK		"user"
54 #define DEFAULT_HOST_ADDR	"192.168.33.1"
55 #define DEFAULT_GUEST_ADDR	"192.168.33.15"
56 #define DEFAULT_GUEST_MAC	"02:15:15:15:15:15"
57 #define DEFAULT_HOST_MAC	"02:01:01:01:01:01"
58 #define DEFAULT_SCRIPT		"none"
59 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh";
60 
61 #define MB_SHIFT		(20)
62 #define KB_SHIFT		(10)
63 #define GB_SHIFT		(30)
64 #define MIN_RAM_SIZE_MB		(64ULL)
65 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
66 
67 struct kvm *kvm;
68 struct kvm_cpu **kvm_cpus;
69 __thread struct kvm_cpu *current_kvm_cpu;
70 
71 static u64 ram_size;
72 static u8  image_count;
73 static u8 num_net_devices;
74 static bool virtio_rng;
75 static const char *kernel_cmdline;
76 static const char *kernel_filename;
77 static const char *vmlinux_filename;
78 static const char *initrd_filename;
79 static const char *firmware_filename;
80 static const char *image_filename[MAX_DISK_IMAGES];
81 static const char *console;
82 static const char *dev;
83 static const char *network;
84 static const char *host_ip;
85 static const char *guest_ip;
86 static const char *guest_mac;
87 static const char *host_mac;
88 static const char *script;
89 static const char *guest_name;
90 static const char *sandbox;
91 static const char *hugetlbfs_path;
92 static const char *custom_rootfs_name = "default";
93 static struct virtio_net_params *net_params;
94 static bool single_step;
95 static bool readonly_image[MAX_DISK_IMAGES];
96 static bool vnc;
97 static bool sdl;
98 static bool balloon;
99 static bool using_rootfs;
100 static bool custom_rootfs;
101 static bool no_net;
102 static bool no_dhcp;
103 extern bool ioport_debug;
104 static int  kvm_run_wrapper;
105 extern int  active_console;
106 extern int  debug_iodelay;
107 
108 bool do_debug_print = false;
109 
110 static int nrcpus;
111 static int vidmode = -1;
112 
113 static const char * const run_usage[] = {
114 	"lkvm run [<options>] [<kernel image>]",
115 	NULL
116 };
117 
118 enum {
119 	KVM_RUN_DEFAULT,
120 	KVM_RUN_SANDBOX,
121 };
122 
123 void kvm_run_set_wrapper_sandbox(void)
124 {
125 	kvm_run_wrapper = KVM_RUN_SANDBOX;
126 }
127 
128 static int img_name_parser(const struct option *opt, const char *arg, int unset)
129 {
130 	char *sep;
131 	struct stat st;
132 	char path[PATH_MAX];
133 
134 	if (stat(arg, &st) == 0 &&
135 	    S_ISDIR(st.st_mode)) {
136 		char tmp[PATH_MAX];
137 
138 		if (using_rootfs)
139 			die("Please use only one rootfs directory atmost");
140 
141 		if (realpath(arg, tmp) == 0 ||
142 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
143 			die("Unable to initialize virtio 9p");
144 		using_rootfs = 1;
145 		return 0;
146 	}
147 
148 	snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg);
149 
150 	if (stat(path, &st) == 0 &&
151 	    S_ISDIR(st.st_mode)) {
152 		char tmp[PATH_MAX];
153 
154 		if (using_rootfs)
155 			die("Please use only one rootfs directory atmost");
156 
157 		if (realpath(path, tmp) == 0 ||
158 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
159 			die("Unable to initialize virtio 9p");
160 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
161 			die("Unable to initialize virtio 9p");
162 		kvm_setup_resolv(arg);
163 		using_rootfs = custom_rootfs = 1;
164 		custom_rootfs_name = arg;
165 		return 0;
166 	}
167 
168 	if (image_count >= MAX_DISK_IMAGES)
169 		die("Currently only 4 images are supported");
170 
171 	image_filename[image_count] = arg;
172 	sep = strstr(arg, ",");
173 	if (sep) {
174 		if (strcmp(sep + 1, "ro") == 0)
175 			readonly_image[image_count] = 1;
176 		*sep = 0;
177 	}
178 
179 	image_count++;
180 
181 	return 0;
182 }
183 
184 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset)
185 {
186 	char *tag_name;
187 	char tmp[PATH_MAX];
188 
189 	/*
190 	 * 9p dir can be of the form dirname,tag_name or
191 	 * just dirname. In the later case we use the
192 	 * default tag name
193 	 */
194 	tag_name = strstr(arg, ",");
195 	if (tag_name) {
196 		*tag_name = '\0';
197 		tag_name++;
198 	}
199 	if (realpath(arg, tmp)) {
200 		if (virtio_9p__register(kvm, tmp, tag_name) < 0)
201 			die("Unable to initialize virtio 9p");
202 	} else
203 		die("Failed resolving 9p path");
204 	return 0;
205 }
206 
207 static int tty_parser(const struct option *opt, const char *arg, int unset)
208 {
209 	int tty = atoi(arg);
210 
211 	term_set_tty(tty);
212 
213 	return 0;
214 }
215 
216 static inline void str_to_mac(const char *str, char *mac)
217 {
218 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
219 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
220 }
221 static int set_net_param(struct virtio_net_params *p, const char *param,
222 				const char *val)
223 {
224 	if (strcmp(param, "guest_mac") == 0) {
225 		str_to_mac(val, p->guest_mac);
226 	} else if (strcmp(param, "mode") == 0) {
227 		if (!strncmp(val, "user", 4)) {
228 			int i;
229 
230 			for (i = 0; i < num_net_devices; i++)
231 				if (net_params[i].mode == NET_MODE_USER)
232 					die("Only one usermode network device allowed at a time");
233 			p->mode = NET_MODE_USER;
234 		} else if (!strncmp(val, "tap", 3)) {
235 			p->mode = NET_MODE_TAP;
236 		} else if (!strncmp(val, "none", 4)) {
237 			no_net = 1;
238 			return -1;
239 		} else
240 			die("Unkown network mode %s, please use user, tap or none", network);
241 	} else if (strcmp(param, "script") == 0) {
242 		p->script = strdup(val);
243 	} else if (strcmp(param, "guest_ip") == 0) {
244 		p->guest_ip = strdup(val);
245 	} else if (strcmp(param, "host_ip") == 0) {
246 		p->host_ip = strdup(val);
247 	} else if (strcmp(param, "vhost") == 0) {
248 		p->vhost = atoi(val);
249 	} else if (strcmp(param, "fd") == 0) {
250 		p->fd = atoi(val);
251 	}
252 
253 	return 0;
254 }
255 
256 static int netdev_parser(const struct option *opt, const char *arg, int unset)
257 {
258 	struct virtio_net_params p;
259 	char *buf = NULL, *cmd = NULL, *cur = NULL;
260 	bool on_cmd = true;
261 
262 	if (arg) {
263 		buf = strdup(arg);
264 		if (buf == NULL)
265 			die("Failed allocating new net buffer");
266 		cur = strtok(buf, ",=");
267 	}
268 
269 	p = (struct virtio_net_params) {
270 		.guest_ip	= DEFAULT_GUEST_ADDR,
271 		.host_ip	= DEFAULT_HOST_ADDR,
272 		.script		= DEFAULT_SCRIPT,
273 		.mode		= NET_MODE_TAP,
274 	};
275 
276 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
277 	p.guest_mac[5] += num_net_devices;
278 
279 	while (cur) {
280 		if (on_cmd) {
281 			cmd = cur;
282 		} else {
283 			if (set_net_param(&p, cmd, cur) < 0)
284 				goto done;
285 		}
286 		on_cmd = !on_cmd;
287 
288 		cur = strtok(NULL, ",=");
289 	};
290 
291 	num_net_devices++;
292 
293 	net_params = realloc(net_params, num_net_devices * sizeof(*net_params));
294 	if (net_params == NULL)
295 		die("Failed adding new network device");
296 
297 	net_params[num_net_devices - 1] = p;
298 
299 done:
300 	free(buf);
301 	return 0;
302 }
303 
304 static int shmem_parser(const struct option *opt, const char *arg, int unset)
305 {
306 	const u64 default_size = SHMEM_DEFAULT_SIZE;
307 	const u64 default_phys_addr = SHMEM_DEFAULT_ADDR;
308 	const char *default_handle = SHMEM_DEFAULT_HANDLE;
309 	struct shmem_info *si = malloc(sizeof(struct shmem_info));
310 	u64 phys_addr;
311 	u64 size;
312 	char *handle = NULL;
313 	int create = 0;
314 	const char *p = arg;
315 	char *next;
316 	int base = 10;
317 	int verbose = 0;
318 
319 	const int skip_pci = strlen("pci:");
320 	if (verbose)
321 		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
322 	/* parse out optional addr family */
323 	if (strcasestr(p, "pci:")) {
324 		p += skip_pci;
325 	} else if (strcasestr(p, "mem:")) {
326 		die("I can't add to E820 map yet.\n");
327 	}
328 	/* parse out physical addr */
329 	base = 10;
330 	if (strcasestr(p, "0x"))
331 		base = 16;
332 	phys_addr = strtoll(p, &next, base);
333 	if (next == p && phys_addr == 0) {
334 		pr_info("shmem: no physical addr specified, using default.");
335 		phys_addr = default_phys_addr;
336 	}
337 	if (*next != ':' && *next != '\0')
338 		die("shmem: unexpected chars after phys addr.\n");
339 	if (*next == '\0')
340 		p = next;
341 	else
342 		p = next + 1;
343 	/* parse out size */
344 	base = 10;
345 	if (strcasestr(p, "0x"))
346 		base = 16;
347 	size = strtoll(p, &next, base);
348 	if (next == p && size == 0) {
349 		pr_info("shmem: no size specified, using default.");
350 		size = default_size;
351 	}
352 	/* look for [KMGkmg][Bb]*  uses base 2. */
353 	int skip_B = 0;
354 	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
355 		if (*(next + 1) == 'B' || *(next + 1) == 'b')
356 			skip_B = 1;
357 		switch (*next) {
358 		case 'K':
359 		case 'k':
360 			size = size << KB_SHIFT;
361 			break;
362 		case 'M':
363 		case 'm':
364 			size = size << MB_SHIFT;
365 			break;
366 		case 'G':
367 		case 'g':
368 			size = size << GB_SHIFT;
369 			break;
370 		default:
371 			die("shmem: bug in detecting size prefix.");
372 			break;
373 		}
374 		next += 1 + skip_B;
375 	}
376 	if (*next != ':' && *next != '\0') {
377 		die("shmem: unexpected chars after phys size. <%c><%c>\n",
378 		    *next, *p);
379 	}
380 	if (*next == '\0')
381 		p = next;
382 	else
383 		p = next + 1;
384 	/* parse out optional shmem handle */
385 	const int skip_handle = strlen("handle=");
386 	next = strcasestr(p, "handle=");
387 	if (*p && next) {
388 		if (p != next)
389 			die("unexpected chars before handle\n");
390 		p += skip_handle;
391 		next = strchrnul(p, ':');
392 		if (next - p) {
393 			handle = malloc(next - p + 1);
394 			strncpy(handle, p, next - p);
395 			handle[next - p] = '\0';	/* just in case. */
396 		}
397 		if (*next == '\0')
398 			p = next;
399 		else
400 			p = next + 1;
401 	}
402 	/* parse optional create flag to see if we should create shm seg. */
403 	if (*p && strcasestr(p, "create")) {
404 		create = 1;
405 		p += strlen("create");
406 	}
407 	if (*p != '\0')
408 		die("shmem: unexpected trailing chars\n");
409 	if (handle == NULL) {
410 		handle = malloc(strlen(default_handle) + 1);
411 		strcpy(handle, default_handle);
412 	}
413 	if (verbose) {
414 		pr_info("shmem: phys_addr = %llx", phys_addr);
415 		pr_info("shmem: size      = %llx", size);
416 		pr_info("shmem: handle    = %s", handle);
417 		pr_info("shmem: create    = %d", create);
418 	}
419 
420 	si->phys_addr = phys_addr;
421 	si->size = size;
422 	si->handle = handle;
423 	si->create = create;
424 	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
425 	return 0;
426 }
427 
428 static const struct option options[] = {
429 	OPT_GROUP("Basic options:"),
430 	OPT_STRING('\0', "name", &guest_name, "guest name",
431 			"A name for the guest"),
432 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),
433 	OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."),
434 	OPT_CALLBACK('\0', "shmem", NULL,
435 		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",
436 		     "Share host shmem with guest via pci device",
437 		     shmem_parser),
438 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser),
439 	OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"),
440 	OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"),
441 	OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"),
442 	OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"),
443 	OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",
444 		     "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser),
445 	OPT_STRING('\0', "console", &console, "serial, virtio or hv",
446 			"Console to use"),
447 	OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"),
448 	OPT_CALLBACK('\0', "tty", NULL, "tty id",
449 		     "Remap guest TTY into a pty on the host",
450 		     tty_parser),
451 	OPT_STRING('\0', "sandbox", &sandbox, "script",
452 			"Run this script when booting into custom rootfs"),
453 	OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"),
454 
455 	OPT_GROUP("Kernel options:"),
456 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
457 			"Kernel to boot in virtual machine"),
458 	OPT_STRING('i', "initrd", &initrd_filename, "initrd",
459 			"Initial RAM disk image"),
460 	OPT_STRING('p', "params", &kernel_cmdline, "params",
461 			"Kernel command line arguments"),
462 	OPT_STRING('f', "firmware", &firmware_filename, "firmware",
463 			"Firmware image to boot in virtual machine"),
464 
465 	OPT_GROUP("Networking options:"),
466 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",
467 		     "Create a new guest NIC",
468 		     netdev_parser, NULL),
469 	OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"),
470 
471 	OPT_GROUP("BIOS options:"),
472 	OPT_INTEGER('\0', "vidmode", &vidmode,
473 		    "Video mode"),
474 
475 	OPT_GROUP("Debug options:"),
476 	OPT_BOOLEAN('\0', "debug", &do_debug_print,
477 			"Enable debug messages"),
478 	OPT_BOOLEAN('\0', "debug-single-step", &single_step,
479 			"Enable single stepping"),
480 	OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug,
481 			"Enable ioport debugging"),
482 	OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay,
483 			"Delay IO by millisecond"),
484 	OPT_END()
485 };
486 
487 /*
488  * Serialize debug printout so that the output of multiple vcpus does not
489  * get mixed up:
490  */
491 static int printout_done;
492 
493 static void handle_sigusr1(int sig)
494 {
495 	struct kvm_cpu *cpu = current_kvm_cpu;
496 	int fd = kvm_cpu__get_debug_fd();
497 
498 	if (!cpu || cpu->needs_nmi)
499 		return;
500 
501 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
502 	kvm_cpu__show_registers(cpu);
503 	kvm_cpu__show_code(cpu);
504 	kvm_cpu__show_page_tables(cpu);
505 	fflush(stdout);
506 	printout_done = 1;
507 	mb();
508 }
509 
510 /* Pause/resume the guest using SIGUSR2 */
511 static int is_paused;
512 
513 static void handle_pause(int fd, u32 type, u32 len, u8 *msg)
514 {
515 	if (WARN_ON(len))
516 		return;
517 
518 	if (type == KVM_IPC_RESUME && is_paused) {
519 		kvm->vm_state = KVM_VMSTATE_RUNNING;
520 		kvm__continue();
521 	} else if (type == KVM_IPC_PAUSE && !is_paused) {
522 		kvm->vm_state = KVM_VMSTATE_PAUSED;
523 		kvm__pause();
524 	} else {
525 		return;
526 	}
527 
528 	is_paused = !is_paused;
529 }
530 
531 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg)
532 {
533 	int r = 0;
534 
535 	if (type == KVM_IPC_VMSTATE)
536 		r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state));
537 
538 	if (r < 0)
539 		pr_warning("Failed sending VMSTATE");
540 }
541 
542 static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
543 {
544 	int i;
545 	struct debug_cmd_params *params;
546 	u32 dbg_type;
547 	u32 vcpu;
548 
549 	if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params)))
550 		return;
551 
552 	params = (void *)msg;
553 	dbg_type = params->dbg_type;
554 	vcpu = params->cpu;
555 
556 	if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) {
557 		if ((int)vcpu >= kvm->nrcpus)
558 			return;
559 
560 		kvm_cpus[vcpu]->needs_nmi = 1;
561 		pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1);
562 	}
563 
564 	if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP))
565 		return;
566 
567 	for (i = 0; i < nrcpus; i++) {
568 		struct kvm_cpu *cpu = kvm_cpus[i];
569 
570 		if (!cpu)
571 			continue;
572 
573 		printout_done = 0;
574 
575 		kvm_cpu__set_debug_fd(fd);
576 		pthread_kill(cpu->thread, SIGUSR1);
577 		/*
578 		 * Wait for the vCPU to dump state before signalling
579 		 * the next thread. Since this is debug code it does
580 		 * not matter that we are burning CPU time a bit:
581 		 */
582 		while (!printout_done)
583 			mb();
584 	}
585 
586 	close(fd);
587 
588 	serial8250__inject_sysrq(kvm);
589 }
590 
591 static void handle_sigalrm(int sig)
592 {
593 	kvm__arch_periodic_poll(kvm);
594 }
595 
596 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
597 {
598 	if (WARN_ON(type != KVM_IPC_STOP || len))
599 		return;
600 
601 	kvm_cpu__reboot();
602 }
603 
604 static void *kvm_cpu_thread(void *arg)
605 {
606 	current_kvm_cpu		= arg;
607 
608 	if (kvm_cpu__start(current_kvm_cpu))
609 		goto panic_kvm;
610 
611 	return (void *) (intptr_t) 0;
612 
613 panic_kvm:
614 	fprintf(stderr, "KVM exit reason: %u (\"%s\")\n",
615 		current_kvm_cpu->kvm_run->exit_reason,
616 		kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]);
617 	if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
618 		fprintf(stderr, "KVM exit code: 0x%Lu\n",
619 			current_kvm_cpu->kvm_run->hw.hardware_exit_reason);
620 
621 	kvm_cpu__set_debug_fd(STDOUT_FILENO);
622 	kvm_cpu__show_registers(current_kvm_cpu);
623 	kvm_cpu__show_code(current_kvm_cpu);
624 	kvm_cpu__show_page_tables(current_kvm_cpu);
625 
626 	return (void *) (intptr_t) 1;
627 }
628 
629 static char kernel[PATH_MAX];
630 
631 static const char *host_kernels[] = {
632 	"/boot/vmlinuz",
633 	"/boot/bzImage",
634 	NULL
635 };
636 
637 static const char *default_kernels[] = {
638 	"./bzImage",
639 	"arch/" BUILD_ARCH "/boot/bzImage",
640 	"../../arch/" BUILD_ARCH "/boot/bzImage",
641 	NULL
642 };
643 
644 static const char *default_vmlinux[] = {
645 	"vmlinux",
646 	"../../../vmlinux",
647 	"../../vmlinux",
648 	NULL
649 };
650 
651 static void kernel_usage_with_options(void)
652 {
653 	const char **k;
654 	struct utsname uts;
655 
656 	fprintf(stderr, "Fatal: could not find default kernel image in:\n");
657 	k = &default_kernels[0];
658 	while (*k) {
659 		fprintf(stderr, "\t%s\n", *k);
660 		k++;
661 	}
662 
663 	if (uname(&uts) < 0)
664 		return;
665 
666 	k = &host_kernels[0];
667 	while (*k) {
668 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
669 			return;
670 		fprintf(stderr, "\t%s\n", kernel);
671 		k++;
672 	}
673 	fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n",
674 		KVM_BINARY_NAME);
675 }
676 
677 static u64 host_ram_size(void)
678 {
679 	long page_size;
680 	long nr_pages;
681 
682 	nr_pages	= sysconf(_SC_PHYS_PAGES);
683 	if (nr_pages < 0) {
684 		pr_warning("sysconf(_SC_PHYS_PAGES) failed");
685 		return 0;
686 	}
687 
688 	page_size	= sysconf(_SC_PAGE_SIZE);
689 	if (page_size < 0) {
690 		pr_warning("sysconf(_SC_PAGE_SIZE) failed");
691 		return 0;
692 	}
693 
694 	return (nr_pages * page_size) >> MB_SHIFT;
695 }
696 
697 /*
698  * If user didn't specify how much memory it wants to allocate for the guest,
699  * avoid filling the whole host RAM.
700  */
701 #define RAM_SIZE_RATIO		0.8
702 
703 static u64 get_ram_size(int nr_cpus)
704 {
705 	u64 available;
706 	u64 ram_size;
707 
708 	ram_size	= 64 * (nr_cpus + 3);
709 
710 	available	= host_ram_size() * RAM_SIZE_RATIO;
711 	if (!available)
712 		available = MIN_RAM_SIZE_MB;
713 
714 	if (ram_size > available)
715 		ram_size	= available;
716 
717 	return ram_size;
718 }
719 
720 static const char *find_kernel(void)
721 {
722 	const char **k;
723 	struct stat st;
724 	struct utsname uts;
725 
726 	k = &default_kernels[0];
727 	while (*k) {
728 		if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) {
729 			k++;
730 			continue;
731 		}
732 		strncpy(kernel, *k, PATH_MAX);
733 		return kernel;
734 	}
735 
736 	if (uname(&uts) < 0)
737 		return NULL;
738 
739 	k = &host_kernels[0];
740 	while (*k) {
741 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
742 			return NULL;
743 
744 		if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) {
745 			k++;
746 			continue;
747 		}
748 		return kernel;
749 
750 	}
751 	return NULL;
752 }
753 
754 static const char *find_vmlinux(void)
755 {
756 	const char **vmlinux;
757 
758 	vmlinux = &default_vmlinux[0];
759 	while (*vmlinux) {
760 		struct stat st;
761 
762 		if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) {
763 			vmlinux++;
764 			continue;
765 		}
766 		return *vmlinux;
767 	}
768 	return NULL;
769 }
770 
771 void kvm_run_help(void)
772 {
773 	usage_with_options(run_usage, options);
774 }
775 
776 static int kvm_custom_stage2(void)
777 {
778 	char tmp[PATH_MAX], dst[PATH_MAX], *src;
779 	const char *rootfs = custom_rootfs_name;
780 	int r;
781 
782 	src = realpath("guest/init_stage2", NULL);
783 	if (src == NULL)
784 		return -ENOMEM;
785 
786 	snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs);
787 	remove(tmp);
788 
789 	snprintf(dst, PATH_MAX, "/host/%s", src);
790 	r = symlink(dst, tmp);
791 	free(src);
792 
793 	return r;
794 }
795 
796 static int kvm_run_set_sandbox(void)
797 {
798 	const char *guestfs_name = custom_rootfs_name;
799 	char path[PATH_MAX], script[PATH_MAX], *tmp;
800 
801 	snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name);
802 
803 	remove(path);
804 
805 	if (sandbox == NULL)
806 		return 0;
807 
808 	tmp = realpath(sandbox, NULL);
809 	if (tmp == NULL)
810 		return -ENOMEM;
811 
812 	snprintf(script, PATH_MAX, "/host/%s", tmp);
813 	free(tmp);
814 
815 	return symlink(script, path);
816 }
817 
818 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg)
819 {
820 	const char *single_quote;
821 
822 	if (!*arg) { /* zero length string */
823 		if (write(fd, "''", 2) <= 0)
824 			die("Failed writing sandbox script");
825 		return;
826 	}
827 
828 	while (*arg) {
829 		single_quote = strchrnul(arg, '\'');
830 
831 		/* write non-single-quote string as #('string') */
832 		if (arg != single_quote) {
833 			if (write(fd, "'", 1) <= 0 ||
834 			    write(fd, arg, single_quote - arg) <= 0 ||
835 			    write(fd, "'", 1) <= 0)
836 				die("Failed writing sandbox script");
837 		}
838 
839 		/* write single quote as #("'") */
840 		if (*single_quote) {
841 			if (write(fd, "\"'\"", 3) <= 0)
842 				die("Failed writing sandbox script");
843 		} else
844 			break;
845 
846 		arg = single_quote + 1;
847 	}
848 }
849 
850 static void kvm_run_write_sandbox_cmd(const char **argv, int argc)
851 {
852 	const char script_hdr[] = "#! /bin/bash\n\n";
853 	int fd;
854 
855 	remove(sandbox);
856 
857 	fd = open(sandbox, O_RDWR | O_CREAT, 0777);
858 	if (fd < 0)
859 		die("Failed creating sandbox script");
860 
861 	if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0)
862 		die("Failed writing sandbox script");
863 
864 	while (argc) {
865 		kvm_write_sandbox_cmd_exactly(fd, argv[0]);
866 		if (argc - 1)
867 			if (write(fd, " ", 1) <= 0)
868 				die("Failed writing sandbox script");
869 		argv++;
870 		argc--;
871 	}
872 	if (write(fd, "\n", 1) <= 0)
873 		die("Failed writing sandbox script");
874 
875 	close(fd);
876 }
877 
878 static int kvm_cmd_run_init(int argc, const char **argv)
879 {
880 	static char real_cmdline[2048], default_name[20];
881 	struct framebuffer *fb = NULL;
882 	unsigned int nr_online_cpus;
883 	int max_cpus, recommended_cpus;
884 	int i, r;
885 
886 	signal(SIGALRM, handle_sigalrm);
887 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
888 	signal(SIGUSR1, handle_sigusr1);
889 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
890 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
891 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
892 	kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate);
893 
894 	nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
895 
896 	while (argc != 0) {
897 		argc = parse_options(argc, argv, options, run_usage,
898 				PARSE_OPT_STOP_AT_NON_OPTION |
899 				PARSE_OPT_KEEP_DASHDASH);
900 		if (argc != 0) {
901 			/* Cusrom options, should have been handled elsewhere */
902 			if (strcmp(argv[0], "--") == 0) {
903 				if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
904 					sandbox = DEFAULT_SANDBOX_FILENAME;
905 					kvm_run_write_sandbox_cmd(argv+1, argc-1);
906 					break;
907 				}
908 			}
909 
910 			if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) ||
911 				(kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) {
912 				fprintf(stderr, "Cannot handle parameter: "
913 						"%s\n", argv[0]);
914 				usage_with_options(run_usage, options);
915 				return EINVAL;
916 			}
917 			if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
918 				/*
919 				 * first unhandled parameter is treated as
920 				 * sandbox command
921 				 */
922 				sandbox = DEFAULT_SANDBOX_FILENAME;
923 				kvm_run_write_sandbox_cmd(argv, argc);
924 			} else {
925 				/*
926 				 * first unhandled parameter is treated as a kernel
927 				 * image
928 				 */
929 				kernel_filename = argv[0];
930 			}
931 			argv++;
932 			argc--;
933 		}
934 
935 	}
936 
937 	if (!kernel_filename)
938 		kernel_filename = find_kernel();
939 
940 	if (!kernel_filename) {
941 		kernel_usage_with_options();
942 		return EINVAL;
943 	}
944 
945 	vmlinux_filename = find_vmlinux();
946 
947 	if (nrcpus == 0)
948 		nrcpus = nr_online_cpus;
949 
950 	if (!ram_size)
951 		ram_size	= get_ram_size(nrcpus);
952 
953 	if (ram_size < MIN_RAM_SIZE_MB)
954 		die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB);
955 
956 	if (ram_size > host_ram_size())
957 		pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size());
958 
959 	ram_size <<= MB_SHIFT;
960 
961 	if (!dev)
962 		dev = DEFAULT_KVM_DEV;
963 
964 	if (!console)
965 		console = DEFAULT_CONSOLE;
966 
967 	if (!strncmp(console, "virtio", 6))
968 		active_console  = CONSOLE_VIRTIO;
969 	else if (!strncmp(console, "serial", 6))
970 		active_console  = CONSOLE_8250;
971 	else if (!strncmp(console, "hv", 2))
972 		active_console = CONSOLE_HV;
973 	else
974 		pr_warning("No console!");
975 
976 	if (!host_ip)
977 		host_ip = DEFAULT_HOST_ADDR;
978 
979 	if (!guest_ip)
980 		guest_ip = DEFAULT_GUEST_ADDR;
981 
982 	if (!guest_mac)
983 		guest_mac = DEFAULT_GUEST_MAC;
984 
985 	if (!host_mac)
986 		host_mac = DEFAULT_HOST_MAC;
987 
988 	if (!script)
989 		script = DEFAULT_SCRIPT;
990 
991 	term_init();
992 
993 	if (!guest_name) {
994 		if (custom_rootfs) {
995 			guest_name = custom_rootfs_name;
996 		} else {
997 			sprintf(default_name, "guest-%u", getpid());
998 			guest_name = default_name;
999 		}
1000 	}
1001 
1002 	kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name);
1003 	if (IS_ERR(kvm)) {
1004 		r = PTR_ERR(kvm);
1005 		goto fail;
1006 	}
1007 
1008 	kvm->single_step = single_step;
1009 
1010 	r = ioeventfd__init(kvm);
1011 	if (r < 0) {
1012 		pr_err("ioeventfd__init() failed with error %d\n", r);
1013 		goto fail;
1014 	}
1015 
1016 	max_cpus = kvm__max_cpus(kvm);
1017 	recommended_cpus = kvm__recommended_cpus(kvm);
1018 
1019 	if (nrcpus > max_cpus) {
1020 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
1021 		nrcpus = max_cpus;
1022 	} else if (nrcpus > recommended_cpus) {
1023 		printf("  # Warning: The maximum recommended amount of VCPUs"
1024 			" is %d\n", recommended_cpus);
1025 	}
1026 
1027 	kvm->nrcpus = nrcpus;
1028 
1029 	/* Alloc one pointer too many, so array ends up 0-terminated */
1030 	kvm_cpus = calloc(nrcpus + 1, sizeof(void *));
1031 	if (!kvm_cpus)
1032 		die("Couldn't allocate array for %d CPUs", nrcpus);
1033 
1034 	r = irq__init(kvm);
1035 	if (r < 0) {
1036 		pr_err("irq__init() failed with error %d\n", r);
1037 		goto fail;
1038 	}
1039 
1040 	r = pci__init(kvm);
1041 	if (r < 0) {
1042 		pr_err("pci__init() failed with error %d\n", r);
1043 		goto fail;
1044 	}
1045 
1046 	r = ioport__init(kvm);
1047 	if (r < 0) {
1048 		pr_err("ioport__init() failed with error %d\n", r);
1049 		goto fail;
1050 	}
1051 
1052 	/*
1053 	 * vidmode should be either specified
1054 	 * either set by default
1055 	 */
1056 	if (vnc || sdl) {
1057 		if (vidmode == -1)
1058 			vidmode = 0x312;
1059 	} else {
1060 		vidmode = 0;
1061 	}
1062 
1063 	memset(real_cmdline, 0, sizeof(real_cmdline));
1064 	kvm__arch_set_cmdline(real_cmdline, vnc || sdl);
1065 
1066 	if (strlen(real_cmdline) > 0)
1067 		strcat(real_cmdline, " ");
1068 
1069 	if (kernel_cmdline)
1070 		strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline));
1071 
1072 	if (!using_rootfs && !image_filename[0] && !initrd_filename) {
1073 		char tmp[PATH_MAX];
1074 
1075 		kvm_setup_create_new(custom_rootfs_name);
1076 		kvm_setup_resolv(custom_rootfs_name);
1077 
1078 		snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default");
1079 		if (virtio_9p__register(kvm, tmp, "/dev/root") < 0)
1080 			die("Unable to initialize virtio 9p");
1081 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
1082 			die("Unable to initialize virtio 9p");
1083 		using_rootfs = custom_rootfs = 1;
1084 	}
1085 
1086 	if (using_rootfs) {
1087 		strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p");
1088 		if (custom_rootfs) {
1089 			kvm_run_set_sandbox();
1090 
1091 			strcat(real_cmdline, " init=/virt/init");
1092 
1093 			if (!no_dhcp)
1094 				strcat(real_cmdline, "  ip=dhcp");
1095 			if (kvm_custom_stage2())
1096 				die("Failed linking stage 2 of init.");
1097 		}
1098 	} else if (!strstr(real_cmdline, "root=")) {
1099 		strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline));
1100 	}
1101 
1102 	if (image_count) {
1103 		kvm->nr_disks = image_count;
1104 		kvm->disks = disk_image__open_all(image_filename, readonly_image, image_count);
1105 		if (IS_ERR(kvm->disks)) {
1106 			r = PTR_ERR(kvm->disks);
1107 			pr_err("disk_image__open_all() failed with error %ld\n",
1108 					PTR_ERR(kvm->disks));
1109 			goto fail;
1110 		}
1111 	}
1112 
1113 	printf("  # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME,
1114 		kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name);
1115 
1116 	if (!firmware_filename) {
1117 		if (!kvm__load_kernel(kvm, kernel_filename,
1118 				initrd_filename, real_cmdline, vidmode))
1119 			die("unable to load kernel %s", kernel_filename);
1120 
1121 		kvm->vmlinux = vmlinux_filename;
1122 		r = symbol_init(kvm);
1123 		if (r < 0)
1124 			pr_debug("symbol_init() failed with error %d\n", r);
1125 	}
1126 
1127 	ioport__setup_arch();
1128 
1129 	r = rtc__init(kvm);
1130 	if (r < 0) {
1131 		pr_err("rtc__init() failed with error %d\n", r);
1132 		goto fail;
1133 	}
1134 
1135 	r = serial8250__init(kvm);
1136 	if (r < 0) {
1137 		pr_err("serial__init() failed with error %d\n", r);
1138 		goto fail;
1139 	}
1140 
1141 	r = virtio_blk__init(kvm);
1142 	if (r < 0) {
1143 		pr_err("virtio_blk__init() failed with error %d\n", r);
1144 		goto fail;
1145 	}
1146 
1147 	if (active_console == CONSOLE_VIRTIO)
1148 		virtio_console__init(kvm);
1149 
1150 	if (virtio_rng)
1151 		virtio_rng__init(kvm);
1152 
1153 	if (balloon)
1154 		virtio_bln__init(kvm);
1155 
1156 	if (!network)
1157 		network = DEFAULT_NETWORK;
1158 
1159 	virtio_9p__init(kvm);
1160 
1161 	for (i = 0; i < num_net_devices; i++) {
1162 		net_params[i].kvm = kvm;
1163 		virtio_net__init(&net_params[i]);
1164 	}
1165 
1166 	if (num_net_devices == 0 && no_net == 0) {
1167 		struct virtio_net_params net_params;
1168 
1169 		net_params = (struct virtio_net_params) {
1170 			.guest_ip	= guest_ip,
1171 			.host_ip	= host_ip,
1172 			.kvm		= kvm,
1173 			.script		= script,
1174 			.mode		= NET_MODE_USER,
1175 		};
1176 		str_to_mac(guest_mac, net_params.guest_mac);
1177 		str_to_mac(host_mac, net_params.host_mac);
1178 
1179 		virtio_net__init(&net_params);
1180 	}
1181 
1182 	kvm__init_ram(kvm);
1183 
1184 #ifdef CONFIG_X86
1185 	kbd__init(kvm);
1186 #endif
1187 
1188 	pci_shmem__init(kvm);
1189 
1190 	if (vnc || sdl) {
1191 		fb = vesa__init(kvm);
1192 		if (IS_ERR(fb)) {
1193 			pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb));
1194 			goto fail;
1195 		}
1196 	}
1197 
1198 	if (vnc && fb) {
1199 		r = vnc__init(fb);
1200 		if (r < 0) {
1201 			pr_err("vnc__init() failed with error %d\n", r);
1202 			goto fail;
1203 		}
1204 	}
1205 
1206 	if (sdl && fb) {
1207 		sdl__init(fb);
1208 		if (r < 0) {
1209 			pr_err("sdl__init() failed with error %d\n", r);
1210 			goto fail;
1211 		}
1212 	}
1213 
1214 	r = fb__start();
1215 	if (r < 0) {
1216 		pr_err("fb__init() failed with error %d\n", r);
1217 		goto fail;
1218 	}
1219 
1220 	/* Device init all done; firmware init must
1221 	 * come after this (it may set up device trees etc.)
1222 	 */
1223 
1224 	kvm__start_timer(kvm);
1225 
1226 	if (firmware_filename) {
1227 		if (!kvm__load_firmware(kvm, firmware_filename))
1228 			die("unable to load firmware image %s: %s", firmware_filename, strerror(errno));
1229 	} else {
1230 		kvm__arch_setup_firmware(kvm);
1231 		if (r < 0) {
1232 			pr_err("kvm__arch_setup_firmware() failed with error %d\n", r);
1233 			goto fail;
1234 		}
1235 	}
1236 
1237 	for (i = 0; i < nrcpus; i++) {
1238 		kvm_cpus[i] = kvm_cpu__init(kvm, i);
1239 		if (!kvm_cpus[i])
1240 			die("unable to initialize KVM VCPU");
1241 	}
1242 
1243 	thread_pool__init(nr_online_cpus);
1244 fail:
1245 	return r;
1246 }
1247 
1248 static int kvm_cmd_run_work(void)
1249 {
1250 	int i, r = -1;
1251 	void *ret = NULL;
1252 
1253 	for (i = 0; i < nrcpus; i++) {
1254 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)
1255 			die("unable to create KVM VCPU thread");
1256 	}
1257 
1258 	/* Only VCPU #0 is going to exit by itself when shutting down */
1259 	if (pthread_join(kvm_cpus[0]->thread, &ret) != 0)
1260 		r = 0;
1261 
1262 	kvm_cpu__delete(kvm_cpus[0]);
1263 	kvm_cpus[0] = NULL;
1264 
1265 	for (i = 1; i < nrcpus; i++) {
1266 		if (kvm_cpus[i]->is_running) {
1267 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
1268 			if (pthread_join(kvm_cpus[i]->thread, &ret) != 0)
1269 				die("pthread_join");
1270 			kvm_cpu__delete(kvm_cpus[i]);
1271 		}
1272 		if (ret == NULL)
1273 			r = 0;
1274 	}
1275 
1276 	return r;
1277 }
1278 
1279 static void kvm_cmd_run_exit(int guest_ret)
1280 {
1281 	int r = 0;
1282 
1283 	compat__print_all_messages();
1284 
1285 	r = symbol_exit(kvm);
1286 	if (r < 0)
1287 		pr_warning("symbol_exit() failed with error %d\n", r);
1288 
1289 	r = irq__exit(kvm);
1290 	if (r < 0)
1291 		pr_warning("irq__exit() failed with error %d\n", r);
1292 
1293 	fb__stop();
1294 
1295 	r = virtio_blk__exit(kvm);
1296 	if (r < 0)
1297 		pr_warning("virtio_blk__exit() failed with error %d\n", r);
1298 
1299 	r = virtio_rng__exit(kvm);
1300 	if (r < 0)
1301 		pr_warning("virtio_rng__exit() failed with error %d\n", r);
1302 
1303 	r = disk_image__close_all(kvm->disks, image_count);
1304 	if (r < 0)
1305 		pr_warning("disk_image__close_all() failed with error %d\n", r);
1306 
1307 	r = serial8250__exit(kvm);
1308 	if (r < 0)
1309 		pr_warning("serial8250__exit() failed with error %d\n", r);
1310 
1311 	r = rtc__exit(kvm);
1312 	if (r < 0)
1313 		pr_warning("rtc__exit() failed with error %d\n", r);
1314 
1315 	r = kvm__arch_free_firmware(kvm);
1316 	if (r < 0)
1317 		pr_warning("kvm__arch_free_firmware() failed with error %d\n", r);
1318 
1319 	r = ioport__exit(kvm);
1320 	if (r < 0)
1321 		pr_warning("ioport__exit() failed with error %d\n", r);
1322 
1323 	r = ioeventfd__exit(kvm);
1324 	if (r < 0)
1325 		pr_warning("ioeventfd__exit() failed with error %d\n", r);
1326 
1327 	r = pci__exit(kvm);
1328 	if (r < 0)
1329 		pr_warning("pci__exit() failed with error %d\n", r);
1330 
1331 	r = kvm__exit(kvm);
1332 	if (r < 0)
1333 		pr_warning("pci__exit() failed with error %d\n", r);
1334 
1335 	free(kvm_cpus);
1336 
1337 	if (guest_ret == 0)
1338 		printf("\n  # KVM session ended normally.\n");
1339 }
1340 
1341 int kvm_cmd_run(int argc, const char **argv, const char *prefix)
1342 {
1343 	int r, ret = -EFAULT;
1344 
1345 	r = kvm_cmd_run_init(argc, argv);
1346 	if (r < 0)
1347 		return r;
1348 
1349 	ret = kvm_cmd_run_work();
1350 	kvm_cmd_run_exit(ret);
1351 
1352 	return ret;
1353 }
1354