xref: /kvmtool/builtin-run.c (revision b816364a4a22603a13b8a34d63542eb0765103a1)
1 #include "kvm/builtin-run.h"
2 
3 #include "kvm/builtin-setup.h"
4 #include "kvm/virtio-balloon.h"
5 #include "kvm/virtio-console.h"
6 #include "kvm/parse-options.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/framebuffer.h"
9 #include "kvm/disk-image.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/virtio-scsi.h"
12 #include "kvm/virtio-blk.h"
13 #include "kvm/virtio-net.h"
14 #include "kvm/virtio-rng.h"
15 #include "kvm/ioeventfd.h"
16 #include "kvm/virtio-9p.h"
17 #include "kvm/barrier.h"
18 #include "kvm/kvm-cpu.h"
19 #include "kvm/ioport.h"
20 #include "kvm/symbol.h"
21 #include "kvm/i8042.h"
22 #include "kvm/mutex.h"
23 #include "kvm/term.h"
24 #include "kvm/util.h"
25 #include "kvm/strbuf.h"
26 #include "kvm/vesa.h"
27 #include "kvm/irq.h"
28 #include "kvm/kvm.h"
29 #include "kvm/pci.h"
30 #include "kvm/rtc.h"
31 #include "kvm/sdl.h"
32 #include "kvm/vnc.h"
33 #include "kvm/guest_compat.h"
34 #include "kvm/pci-shmem.h"
35 #include "kvm/kvm-ipc.h"
36 #include "kvm/builtin-debug.h"
37 
38 #include <linux/types.h>
39 #include <linux/err.h>
40 
41 #include <sys/utsname.h>
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <termios.h>
45 #include <signal.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 #include <ctype.h>
50 #include <stdio.h>
51 
52 #define DEFAULT_KVM_DEV		"/dev/kvm"
53 #define DEFAULT_CONSOLE		"serial"
54 #define DEFAULT_NETWORK		"user"
55 #define DEFAULT_HOST_ADDR	"192.168.33.1"
56 #define DEFAULT_GUEST_ADDR	"192.168.33.15"
57 #define DEFAULT_GUEST_MAC	"02:15:15:15:15:15"
58 #define DEFAULT_HOST_MAC	"02:01:01:01:01:01"
59 #define DEFAULT_SCRIPT		"none"
60 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh";
61 
62 #define MB_SHIFT		(20)
63 #define KB_SHIFT		(10)
64 #define GB_SHIFT		(30)
65 #define MIN_RAM_SIZE_MB		(64ULL)
66 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
67 
68 struct kvm *kvm;
69 struct kvm_cpu **kvm_cpus;
70 __thread struct kvm_cpu *current_kvm_cpu;
71 
72 struct kvm_config {
73 	struct disk_image_params disk_image[MAX_DISK_IMAGES];
74 	u64 ram_size;
75 	u8  image_count;
76 	u8 num_net_devices;
77 	bool virtio_rng;
78 	const char *kernel_cmdline;
79 	const char *kernel_filename;
80 	const char *vmlinux_filename;
81 	const char *initrd_filename;
82 	const char *firmware_filename;
83 	const char *console;
84 	const char *dev;
85 	const char *network;
86 	const char *host_ip;
87 	const char *guest_ip;
88 	const char *guest_mac;
89 	const char *host_mac;
90 	const char *script;
91 	const char *guest_name;
92 	const char *sandbox;
93 	const char *hugetlbfs_path;
94 	const char *custom_rootfs_name;
95 	struct virtio_net_params *net_params;
96 	bool single_step;
97 	bool vnc;
98 	bool sdl;
99 	bool balloon;
100 	bool using_rootfs;
101 	bool custom_rootfs;
102 	bool no_net;
103 	bool no_dhcp;
104 } cfg;
105 
106 extern bool ioport_debug;
107 extern bool mmio_debug;
108 static int  kvm_run_wrapper;
109 extern int  active_console;
110 extern int  debug_iodelay;
111 
112 bool do_debug_print = false;
113 
114 static int nrcpus;
115 static int vidmode = -1;
116 
117 extern char _binary_guest_init_start;
118 extern char _binary_guest_init_size;
119 
120 static const char * const run_usage[] = {
121 	"lkvm run [<options>] [<kernel image>]",
122 	NULL
123 };
124 
125 enum {
126 	KVM_RUN_DEFAULT,
127 	KVM_RUN_SANDBOX,
128 };
129 
130 void kvm_run_set_wrapper_sandbox(void)
131 {
132 	kvm_run_wrapper = KVM_RUN_SANDBOX;
133 }
134 
135 static int img_name_parser(const struct option *opt, const char *arg, int unset)
136 {
137 	char path[PATH_MAX];
138 	const char *cur;
139 	struct stat st;
140 	char *sep;
141 
142 	if (stat(arg, &st) == 0 &&
143 	    S_ISDIR(st.st_mode)) {
144 		char tmp[PATH_MAX];
145 
146 		if (cfg.using_rootfs)
147 			die("Please use only one rootfs directory atmost");
148 
149 		if (realpath(arg, tmp) == 0 ||
150 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
151 			die("Unable to initialize virtio 9p");
152 		cfg.using_rootfs = 1;
153 		return 0;
154 	}
155 
156 	snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg);
157 
158 	if (stat(path, &st) == 0 &&
159 	    S_ISDIR(st.st_mode)) {
160 		char tmp[PATH_MAX];
161 
162 		if (cfg.using_rootfs)
163 			die("Please use only one rootfs directory atmost");
164 
165 		if (realpath(path, tmp) == 0 ||
166 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
167 			die("Unable to initialize virtio 9p");
168 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
169 			die("Unable to initialize virtio 9p");
170 		kvm_setup_resolv(arg);
171 		cfg.using_rootfs = cfg.custom_rootfs = 1;
172 		cfg.custom_rootfs_name = arg;
173 		return 0;
174 	}
175 
176 	if (cfg.image_count >= MAX_DISK_IMAGES)
177 		die("Currently only 4 images are supported");
178 
179 	cfg.disk_image[cfg.image_count].filename = arg;
180 	cur = arg;
181 
182 	if (strncmp(arg, "scsi:", 5) == 0) {
183 		sep = strstr(arg, ":");
184 		if (sep)
185 			cfg.disk_image[cfg.image_count].wwpn = sep + 1;
186 		sep = strstr(sep + 1, ":");
187 		if (sep) {
188 			*sep = 0;
189 			cfg.disk_image[cfg.image_count].tpgt = sep + 1;
190 		}
191 		cur = sep + 1;
192 	}
193 
194 	do {
195 		sep = strstr(cur, ",");
196 		if (sep) {
197 			if (strncmp(sep + 1, "ro", 2) == 0)
198 				cfg.disk_image[cfg.image_count].readonly = true;
199 			else if (strncmp(sep + 1, "direct", 6) == 0)
200 				cfg.disk_image[cfg.image_count].direct = true;
201 			*sep = 0;
202 			cur = sep + 1;
203 		}
204 	} while (sep);
205 
206 	cfg.image_count++;
207 
208 	return 0;
209 }
210 
211 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset)
212 {
213 	char *tag_name;
214 	char tmp[PATH_MAX];
215 
216 	/*
217 	 * 9p dir can be of the form dirname,tag_name or
218 	 * just dirname. In the later case we use the
219 	 * default tag name
220 	 */
221 	tag_name = strstr(arg, ",");
222 	if (tag_name) {
223 		*tag_name = '\0';
224 		tag_name++;
225 	}
226 	if (realpath(arg, tmp)) {
227 		if (virtio_9p__register(kvm, tmp, tag_name) < 0)
228 			die("Unable to initialize virtio 9p");
229 	} else
230 		die("Failed resolving 9p path");
231 	return 0;
232 }
233 
234 static int tty_parser(const struct option *opt, const char *arg, int unset)
235 {
236 	int tty = atoi(arg);
237 
238 	term_set_tty(tty);
239 
240 	return 0;
241 }
242 
243 static inline void str_to_mac(const char *str, char *mac)
244 {
245 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
246 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
247 }
248 static int set_net_param(struct virtio_net_params *p, const char *param,
249 				const char *val)
250 {
251 	if (strcmp(param, "guest_mac") == 0) {
252 		str_to_mac(val, p->guest_mac);
253 	} else if (strcmp(param, "mode") == 0) {
254 		if (!strncmp(val, "user", 4)) {
255 			int i;
256 
257 			for (i = 0; i < cfg.num_net_devices; i++)
258 				if (cfg.net_params[i].mode == NET_MODE_USER)
259 					die("Only one usermode network device allowed at a time");
260 			p->mode = NET_MODE_USER;
261 		} else if (!strncmp(val, "tap", 3)) {
262 			p->mode = NET_MODE_TAP;
263 		} else if (!strncmp(val, "none", 4)) {
264 			cfg.no_net = 1;
265 			return -1;
266 		} else
267 			die("Unknown network mode %s, please use user, tap or none", cfg.network);
268 	} else if (strcmp(param, "script") == 0) {
269 		p->script = strdup(val);
270 	} else if (strcmp(param, "guest_ip") == 0) {
271 		p->guest_ip = strdup(val);
272 	} else if (strcmp(param, "host_ip") == 0) {
273 		p->host_ip = strdup(val);
274 	} else if (strcmp(param, "trans") == 0) {
275 		p->trans = strdup(val);
276 	} else if (strcmp(param, "vhost") == 0) {
277 		p->vhost = atoi(val);
278 	} else if (strcmp(param, "fd") == 0) {
279 		p->fd = atoi(val);
280 	} else
281 		die("Unknown network parameter %s", param);
282 
283 	return 0;
284 }
285 
286 static int netdev_parser(const struct option *opt, const char *arg, int unset)
287 {
288 	struct virtio_net_params p;
289 	char *buf = NULL, *cmd = NULL, *cur = NULL;
290 	bool on_cmd = true;
291 
292 	if (arg) {
293 		buf = strdup(arg);
294 		if (buf == NULL)
295 			die("Failed allocating new net buffer");
296 		cur = strtok(buf, ",=");
297 	}
298 
299 	p = (struct virtio_net_params) {
300 		.guest_ip	= DEFAULT_GUEST_ADDR,
301 		.host_ip	= DEFAULT_HOST_ADDR,
302 		.script		= DEFAULT_SCRIPT,
303 		.mode		= NET_MODE_TAP,
304 	};
305 
306 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
307 	p.guest_mac[5] += cfg.num_net_devices;
308 
309 	while (cur) {
310 		if (on_cmd) {
311 			cmd = cur;
312 		} else {
313 			if (set_net_param(&p, cmd, cur) < 0)
314 				goto done;
315 		}
316 		on_cmd = !on_cmd;
317 
318 		cur = strtok(NULL, ",=");
319 	};
320 
321 	cfg.num_net_devices++;
322 
323 	cfg.net_params = realloc(cfg.net_params, cfg.num_net_devices * sizeof(*cfg.net_params));
324 	if (cfg.net_params == NULL)
325 		die("Failed adding new network device");
326 
327 	cfg.net_params[cfg.num_net_devices - 1] = p;
328 
329 done:
330 	free(buf);
331 	return 0;
332 }
333 
334 static int shmem_parser(const struct option *opt, const char *arg, int unset)
335 {
336 	const u64 default_size = SHMEM_DEFAULT_SIZE;
337 	const u64 default_phys_addr = SHMEM_DEFAULT_ADDR;
338 	const char *default_handle = SHMEM_DEFAULT_HANDLE;
339 	struct shmem_info *si = malloc(sizeof(struct shmem_info));
340 	u64 phys_addr;
341 	u64 size;
342 	char *handle = NULL;
343 	int create = 0;
344 	const char *p = arg;
345 	char *next;
346 	int base = 10;
347 	int verbose = 0;
348 
349 	const int skip_pci = strlen("pci:");
350 	if (verbose)
351 		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
352 	/* parse out optional addr family */
353 	if (strcasestr(p, "pci:")) {
354 		p += skip_pci;
355 	} else if (strcasestr(p, "mem:")) {
356 		die("I can't add to E820 map yet.\n");
357 	}
358 	/* parse out physical addr */
359 	base = 10;
360 	if (strcasestr(p, "0x"))
361 		base = 16;
362 	phys_addr = strtoll(p, &next, base);
363 	if (next == p && phys_addr == 0) {
364 		pr_info("shmem: no physical addr specified, using default.");
365 		phys_addr = default_phys_addr;
366 	}
367 	if (*next != ':' && *next != '\0')
368 		die("shmem: unexpected chars after phys addr.\n");
369 	if (*next == '\0')
370 		p = next;
371 	else
372 		p = next + 1;
373 	/* parse out size */
374 	base = 10;
375 	if (strcasestr(p, "0x"))
376 		base = 16;
377 	size = strtoll(p, &next, base);
378 	if (next == p && size == 0) {
379 		pr_info("shmem: no size specified, using default.");
380 		size = default_size;
381 	}
382 	/* look for [KMGkmg][Bb]*  uses base 2. */
383 	int skip_B = 0;
384 	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
385 		if (*(next + 1) == 'B' || *(next + 1) == 'b')
386 			skip_B = 1;
387 		switch (*next) {
388 		case 'K':
389 		case 'k':
390 			size = size << KB_SHIFT;
391 			break;
392 		case 'M':
393 		case 'm':
394 			size = size << MB_SHIFT;
395 			break;
396 		case 'G':
397 		case 'g':
398 			size = size << GB_SHIFT;
399 			break;
400 		default:
401 			die("shmem: bug in detecting size prefix.");
402 			break;
403 		}
404 		next += 1 + skip_B;
405 	}
406 	if (*next != ':' && *next != '\0') {
407 		die("shmem: unexpected chars after phys size. <%c><%c>\n",
408 		    *next, *p);
409 	}
410 	if (*next == '\0')
411 		p = next;
412 	else
413 		p = next + 1;
414 	/* parse out optional shmem handle */
415 	const int skip_handle = strlen("handle=");
416 	next = strcasestr(p, "handle=");
417 	if (*p && next) {
418 		if (p != next)
419 			die("unexpected chars before handle\n");
420 		p += skip_handle;
421 		next = strchrnul(p, ':');
422 		if (next - p) {
423 			handle = malloc(next - p + 1);
424 			strncpy(handle, p, next - p);
425 			handle[next - p] = '\0';	/* just in case. */
426 		}
427 		if (*next == '\0')
428 			p = next;
429 		else
430 			p = next + 1;
431 	}
432 	/* parse optional create flag to see if we should create shm seg. */
433 	if (*p && strcasestr(p, "create")) {
434 		create = 1;
435 		p += strlen("create");
436 	}
437 	if (*p != '\0')
438 		die("shmem: unexpected trailing chars\n");
439 	if (handle == NULL) {
440 		handle = malloc(strlen(default_handle) + 1);
441 		strcpy(handle, default_handle);
442 	}
443 	if (verbose) {
444 		pr_info("shmem: phys_addr = %llx", phys_addr);
445 		pr_info("shmem: size      = %llx", size);
446 		pr_info("shmem: handle    = %s", handle);
447 		pr_info("shmem: create    = %d", create);
448 	}
449 
450 	si->phys_addr = phys_addr;
451 	si->size = size;
452 	si->handle = handle;
453 	si->create = create;
454 	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
455 	return 0;
456 }
457 
458 #define BUILD_OPTIONS(name, cfg)					\
459 	struct option name[] = {					\
460 	OPT_GROUP("Basic options:"),					\
461 	OPT_STRING('\0', "name", &(cfg)->guest_name, "guest name",	\
462 			"A name for the guest"),			\
463 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),		\
464 	OPT_U64('m', "mem", &(cfg)->ram_size, "Virtual machine memory size\
465 		in MiB."),						\
466 	OPT_CALLBACK('\0', "shmem", NULL,				\
467 		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",	\
468 		     "Share host shmem with guest via pci device",	\
469 		     shmem_parser),					\
470 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk	\
471 			image or rootfs directory", img_name_parser),	\
472 	OPT_BOOLEAN('\0', "balloon", &(cfg)->balloon, "Enable virtio	\
473 			balloon"),					\
474 	OPT_BOOLEAN('\0', "vnc", &(cfg)->vnc, "Enable VNC framebuffer"),\
475 	OPT_BOOLEAN('\0', "sdl", &(cfg)->sdl, "Enable SDL framebuffer"),\
476 	OPT_BOOLEAN('\0', "rng", &(cfg)->virtio_rng, "Enable virtio Random\
477 			Number Generator"),				\
478 	OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",		\
479 		     "Enable virtio 9p to share files between host and	\
480 		     guest", virtio_9p_rootdir_parser),			\
481 	OPT_STRING('\0', "console", &(cfg)->console, "serial, virtio or	\
482 			hv", "Console to use"),				\
483 	OPT_STRING('\0', "dev", &(cfg)->dev, "device_file",		\
484 			"KVM device file"),				\
485 	OPT_CALLBACK('\0', "tty", NULL, "tty id",			\
486 		     "Remap guest TTY into a pty on the host",		\
487 		     tty_parser),					\
488 	OPT_STRING('\0', "sandbox", &(cfg)->sandbox, "script",		\
489 			"Run this script when booting into custom	\
490 			rootfs"),					\
491 	OPT_STRING('\0', "hugetlbfs", &(cfg)->hugetlbfs_path, "path",	\
492 			"Hugetlbfs path"),				\
493 									\
494 	OPT_GROUP("Kernel options:"),					\
495 	OPT_STRING('k', "kernel", &(cfg)->kernel_filename, "kernel",	\
496 			"Kernel to boot in virtual machine"),		\
497 	OPT_STRING('i', "initrd", &(cfg)->initrd_filename, "initrd",	\
498 			"Initial RAM disk image"),			\
499 	OPT_STRING('p', "params", &(cfg)->kernel_cmdline, "params",	\
500 			"Kernel command line arguments"),		\
501 	OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\
502 			"Firmware image to boot in virtual machine"),	\
503 									\
504 	OPT_GROUP("Networking options:"),				\
505 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",	\
506 		     "Create a new guest NIC",				\
507 		     netdev_parser, NULL),				\
508 	OPT_BOOLEAN('\0', "no-dhcp", &(cfg)->no_dhcp, "Disable kernel DHCP\
509 			in rootfs mode"),				\
510 									\
511 	OPT_GROUP("BIOS options:"),					\
512 	OPT_INTEGER('\0', "vidmode", &vidmode,				\
513 		    "Video mode"),					\
514 									\
515 	OPT_GROUP("Debug options:"),					\
516 	OPT_BOOLEAN('\0', "debug", &do_debug_print,			\
517 			"Enable debug messages"),			\
518 	OPT_BOOLEAN('\0', "debug-single-step", &(cfg)->single_step,	\
519 			"Enable single stepping"),			\
520 	OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug,		\
521 			"Enable ioport debugging"),			\
522 	OPT_BOOLEAN('\0', "debug-mmio", &mmio_debug,			\
523 			"Enable MMIO debugging"),			\
524 	OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay,		\
525 			"Delay IO by millisecond"),			\
526 	OPT_END()							\
527 	};
528 
529 /*
530  * Serialize debug printout so that the output of multiple vcpus does not
531  * get mixed up:
532  */
533 static int printout_done;
534 
535 static void handle_sigusr1(int sig)
536 {
537 	struct kvm_cpu *cpu = current_kvm_cpu;
538 	int fd = kvm_cpu__get_debug_fd();
539 
540 	if (!cpu || cpu->needs_nmi)
541 		return;
542 
543 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
544 	kvm_cpu__show_registers(cpu);
545 	kvm_cpu__show_code(cpu);
546 	kvm_cpu__show_page_tables(cpu);
547 	fflush(stdout);
548 	printout_done = 1;
549 	mb();
550 }
551 
552 /* Pause/resume the guest using SIGUSR2 */
553 static int is_paused;
554 
555 static void handle_pause(int fd, u32 type, u32 len, u8 *msg)
556 {
557 	if (WARN_ON(len))
558 		return;
559 
560 	if (type == KVM_IPC_RESUME && is_paused) {
561 		kvm->vm_state = KVM_VMSTATE_RUNNING;
562 		kvm__continue();
563 	} else if (type == KVM_IPC_PAUSE && !is_paused) {
564 		kvm->vm_state = KVM_VMSTATE_PAUSED;
565 		ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL);
566 		kvm__pause();
567 	} else {
568 		return;
569 	}
570 
571 	is_paused = !is_paused;
572 }
573 
574 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg)
575 {
576 	int r = 0;
577 
578 	if (type == KVM_IPC_VMSTATE)
579 		r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state));
580 
581 	if (r < 0)
582 		pr_warning("Failed sending VMSTATE");
583 }
584 
585 static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
586 {
587 	int i;
588 	struct debug_cmd_params *params;
589 	u32 dbg_type;
590 	u32 vcpu;
591 
592 	if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params)))
593 		return;
594 
595 	params = (void *)msg;
596 	dbg_type = params->dbg_type;
597 	vcpu = params->cpu;
598 
599 	if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ)
600 		serial8250__inject_sysrq(kvm, params->sysrq);
601 
602 	if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) {
603 		if ((int)vcpu >= kvm->nrcpus)
604 			return;
605 
606 		kvm_cpus[vcpu]->needs_nmi = 1;
607 		pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1);
608 	}
609 
610 	if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP))
611 		return;
612 
613 	for (i = 0; i < nrcpus; i++) {
614 		struct kvm_cpu *cpu = kvm_cpus[i];
615 
616 		if (!cpu)
617 			continue;
618 
619 		printout_done = 0;
620 
621 		kvm_cpu__set_debug_fd(fd);
622 		pthread_kill(cpu->thread, SIGUSR1);
623 		/*
624 		 * Wait for the vCPU to dump state before signalling
625 		 * the next thread. Since this is debug code it does
626 		 * not matter that we are burning CPU time a bit:
627 		 */
628 		while (!printout_done)
629 			mb();
630 	}
631 
632 	close(fd);
633 
634 	serial8250__inject_sysrq(kvm, 'p');
635 }
636 
637 static void handle_sigalrm(int sig)
638 {
639 	kvm__arch_periodic_poll(kvm);
640 }
641 
642 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
643 {
644 	if (WARN_ON(type != KVM_IPC_STOP || len))
645 		return;
646 
647 	kvm_cpu__reboot();
648 }
649 
650 static void *kvm_cpu_thread(void *arg)
651 {
652 	current_kvm_cpu		= arg;
653 
654 	if (kvm_cpu__start(current_kvm_cpu))
655 		goto panic_kvm;
656 
657 	return (void *) (intptr_t) 0;
658 
659 panic_kvm:
660 	fprintf(stderr, "KVM exit reason: %u (\"%s\")\n",
661 		current_kvm_cpu->kvm_run->exit_reason,
662 		kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]);
663 	if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
664 		fprintf(stderr, "KVM exit code: 0x%Lu\n",
665 			current_kvm_cpu->kvm_run->hw.hardware_exit_reason);
666 
667 	kvm_cpu__set_debug_fd(STDOUT_FILENO);
668 	kvm_cpu__show_registers(current_kvm_cpu);
669 	kvm_cpu__show_code(current_kvm_cpu);
670 	kvm_cpu__show_page_tables(current_kvm_cpu);
671 
672 	return (void *) (intptr_t) 1;
673 }
674 
675 static char kernel[PATH_MAX];
676 
677 static const char *host_kernels[] = {
678 	"/boot/vmlinuz",
679 	"/boot/bzImage",
680 	NULL
681 };
682 
683 static const char *default_kernels[] = {
684 	"./bzImage",
685 	"arch/" BUILD_ARCH "/boot/bzImage",
686 	"../../arch/" BUILD_ARCH "/boot/bzImage",
687 	NULL
688 };
689 
690 static const char *default_vmlinux[] = {
691 	"vmlinux",
692 	"../../../vmlinux",
693 	"../../vmlinux",
694 	NULL
695 };
696 
697 static void kernel_usage_with_options(void)
698 {
699 	const char **k;
700 	struct utsname uts;
701 
702 	fprintf(stderr, "Fatal: could not find default kernel image in:\n");
703 	k = &default_kernels[0];
704 	while (*k) {
705 		fprintf(stderr, "\t%s\n", *k);
706 		k++;
707 	}
708 
709 	if (uname(&uts) < 0)
710 		return;
711 
712 	k = &host_kernels[0];
713 	while (*k) {
714 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
715 			return;
716 		fprintf(stderr, "\t%s\n", kernel);
717 		k++;
718 	}
719 	fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n",
720 		KVM_BINARY_NAME);
721 }
722 
723 static u64 host_ram_size(void)
724 {
725 	long page_size;
726 	long nr_pages;
727 
728 	nr_pages	= sysconf(_SC_PHYS_PAGES);
729 	if (nr_pages < 0) {
730 		pr_warning("sysconf(_SC_PHYS_PAGES) failed");
731 		return 0;
732 	}
733 
734 	page_size	= sysconf(_SC_PAGE_SIZE);
735 	if (page_size < 0) {
736 		pr_warning("sysconf(_SC_PAGE_SIZE) failed");
737 		return 0;
738 	}
739 
740 	return (nr_pages * page_size) >> MB_SHIFT;
741 }
742 
743 /*
744  * If user didn't specify how much memory it wants to allocate for the guest,
745  * avoid filling the whole host RAM.
746  */
747 #define RAM_SIZE_RATIO		0.8
748 
749 static u64 get_ram_size(int nr_cpus)
750 {
751 	u64 available;
752 	u64 ram_size;
753 
754 	ram_size	= 64 * (nr_cpus + 3);
755 
756 	available	= host_ram_size() * RAM_SIZE_RATIO;
757 	if (!available)
758 		available = MIN_RAM_SIZE_MB;
759 
760 	if (ram_size > available)
761 		ram_size	= available;
762 
763 	return ram_size;
764 }
765 
766 static const char *find_kernel(void)
767 {
768 	const char **k;
769 	struct stat st;
770 	struct utsname uts;
771 
772 	k = &default_kernels[0];
773 	while (*k) {
774 		if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) {
775 			k++;
776 			continue;
777 		}
778 		strncpy(kernel, *k, PATH_MAX);
779 		return kernel;
780 	}
781 
782 	if (uname(&uts) < 0)
783 		return NULL;
784 
785 	k = &host_kernels[0];
786 	while (*k) {
787 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
788 			return NULL;
789 
790 		if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) {
791 			k++;
792 			continue;
793 		}
794 		return kernel;
795 
796 	}
797 	return NULL;
798 }
799 
800 static const char *find_vmlinux(void)
801 {
802 	const char **vmlinux;
803 
804 	vmlinux = &default_vmlinux[0];
805 	while (*vmlinux) {
806 		struct stat st;
807 
808 		if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) {
809 			vmlinux++;
810 			continue;
811 		}
812 		return *vmlinux;
813 	}
814 	return NULL;
815 }
816 
817 void kvm_run_help(void)
818 {
819 	BUILD_OPTIONS(options, &cfg);
820 	usage_with_options(run_usage, options);
821 }
822 
823 static int kvm_setup_guest_init(void)
824 {
825 	const char *rootfs = cfg.custom_rootfs_name;
826 	char tmp[PATH_MAX];
827 	size_t size;
828 	int fd, ret;
829 	char *data;
830 
831 	/* Setup /virt/init */
832 	size = (size_t)&_binary_guest_init_size;
833 	data = (char *)&_binary_guest_init_start;
834 	snprintf(tmp, PATH_MAX, "%s%s/virt/init", kvm__get_dir(), rootfs);
835 	remove(tmp);
836 	fd = open(tmp, O_CREAT | O_WRONLY, 0755);
837 	if (fd < 0)
838 		die("Fail to setup %s", tmp);
839 	ret = xwrite(fd, data, size);
840 	if (ret < 0)
841 		die("Fail to setup %s", tmp);
842 	close(fd);
843 
844 	return 0;
845 }
846 
847 static int kvm_run_set_sandbox(void)
848 {
849 	const char *guestfs_name = cfg.custom_rootfs_name;
850 	char path[PATH_MAX], script[PATH_MAX], *tmp;
851 
852 	snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name);
853 
854 	remove(path);
855 
856 	if (cfg.sandbox == NULL)
857 		return 0;
858 
859 	tmp = realpath(cfg.sandbox, NULL);
860 	if (tmp == NULL)
861 		return -ENOMEM;
862 
863 	snprintf(script, PATH_MAX, "/host/%s", tmp);
864 	free(tmp);
865 
866 	return symlink(script, path);
867 }
868 
869 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg)
870 {
871 	const char *single_quote;
872 
873 	if (!*arg) { /* zero length string */
874 		if (write(fd, "''", 2) <= 0)
875 			die("Failed writing sandbox script");
876 		return;
877 	}
878 
879 	while (*arg) {
880 		single_quote = strchrnul(arg, '\'');
881 
882 		/* write non-single-quote string as #('string') */
883 		if (arg != single_quote) {
884 			if (write(fd, "'", 1) <= 0 ||
885 			    write(fd, arg, single_quote - arg) <= 0 ||
886 			    write(fd, "'", 1) <= 0)
887 				die("Failed writing sandbox script");
888 		}
889 
890 		/* write single quote as #("'") */
891 		if (*single_quote) {
892 			if (write(fd, "\"'\"", 3) <= 0)
893 				die("Failed writing sandbox script");
894 		} else
895 			break;
896 
897 		arg = single_quote + 1;
898 	}
899 }
900 
901 static void resolve_program(const char *src, char *dst, size_t len)
902 {
903 	struct stat st;
904 	int err;
905 
906 	err = stat(src, &st);
907 
908 	if (!err && S_ISREG(st.st_mode)) {
909 		char resolved_path[PATH_MAX];
910 
911 		if (!realpath(src, resolved_path))
912 			die("Unable to resolve program %s: %s\n", src, strerror(errno));
913 
914 		snprintf(dst, len, "/host%s", resolved_path);
915 	} else
916 		strncpy(dst, src, len);
917 }
918 
919 static void kvm_run_write_sandbox_cmd(const char **argv, int argc)
920 {
921 	const char script_hdr[] = "#! /bin/bash\n\n";
922 	char program[PATH_MAX];
923 	int fd;
924 
925 	remove(cfg.sandbox);
926 
927 	fd = open(cfg.sandbox, O_RDWR | O_CREAT, 0777);
928 	if (fd < 0)
929 		die("Failed creating sandbox script");
930 
931 	if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0)
932 		die("Failed writing sandbox script");
933 
934 	resolve_program(argv[0], program, PATH_MAX);
935 	kvm_write_sandbox_cmd_exactly(fd, program);
936 
937 	argv++;
938 	argc--;
939 
940 	while (argc) {
941 		if (write(fd, " ", 1) <= 0)
942 			die("Failed writing sandbox script");
943 
944 		kvm_write_sandbox_cmd_exactly(fd, argv[0]);
945 		argv++;
946 		argc--;
947 	}
948 	if (write(fd, "\n", 1) <= 0)
949 		die("Failed writing sandbox script");
950 
951 	close(fd);
952 }
953 
954 static int kvm_cmd_run_init(int argc, const char **argv)
955 {
956 	static char real_cmdline[2048], default_name[20];
957 	struct framebuffer *fb = NULL;
958 	unsigned int nr_online_cpus;
959 	int max_cpus, recommended_cpus;
960 	int i, r;
961 	BUILD_OPTIONS(options, &cfg);
962 
963 	signal(SIGALRM, handle_sigalrm);
964 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
965 	signal(SIGUSR1, handle_sigusr1);
966 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
967 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
968 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
969 	kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate);
970 
971 	nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
972 	cfg.custom_rootfs_name = "default";
973 
974 	while (argc != 0) {
975 		argc = parse_options(argc, argv, options, run_usage,
976 				PARSE_OPT_STOP_AT_NON_OPTION |
977 				PARSE_OPT_KEEP_DASHDASH);
978 		if (argc != 0) {
979 			/* Cusrom options, should have been handled elsewhere */
980 			if (strcmp(argv[0], "--") == 0) {
981 				if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
982 					cfg.sandbox = DEFAULT_SANDBOX_FILENAME;
983 					kvm_run_write_sandbox_cmd(argv+1, argc-1);
984 					break;
985 				}
986 			}
987 
988 			if ((kvm_run_wrapper == KVM_RUN_DEFAULT && cfg.kernel_filename) ||
989 				(kvm_run_wrapper == KVM_RUN_SANDBOX && cfg.sandbox)) {
990 				fprintf(stderr, "Cannot handle parameter: "
991 						"%s\n", argv[0]);
992 				usage_with_options(run_usage, options);
993 				return -EINVAL;
994 			}
995 			if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
996 				/*
997 				 * first unhandled parameter is treated as
998 				 * sandbox command
999 				 */
1000 				cfg.sandbox = DEFAULT_SANDBOX_FILENAME;
1001 				kvm_run_write_sandbox_cmd(argv, argc);
1002 			} else {
1003 				/*
1004 				 * first unhandled parameter is treated as a kernel
1005 				 * image
1006 				 */
1007 				cfg.kernel_filename = argv[0];
1008 			}
1009 			argv++;
1010 			argc--;
1011 		}
1012 
1013 	}
1014 
1015 	if (!cfg.kernel_filename)
1016 		cfg.kernel_filename = find_kernel();
1017 
1018 	if (!cfg.kernel_filename) {
1019 		kernel_usage_with_options();
1020 		return -EINVAL;
1021 	}
1022 
1023 	cfg.vmlinux_filename = find_vmlinux();
1024 
1025 	if (nrcpus == 0)
1026 		nrcpus = nr_online_cpus;
1027 
1028 	if (!cfg.ram_size)
1029 		cfg.ram_size = get_ram_size(nrcpus);
1030 
1031 	if (cfg.ram_size < MIN_RAM_SIZE_MB)
1032 		die("Not enough memory specified: %lluMB (min %lluMB)", cfg.ram_size, MIN_RAM_SIZE_MB);
1033 
1034 	if (cfg.ram_size > host_ram_size())
1035 		pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", cfg.ram_size, host_ram_size());
1036 
1037 	cfg.ram_size <<= MB_SHIFT;
1038 
1039 	if (!cfg.dev)
1040 		cfg.dev = DEFAULT_KVM_DEV;
1041 
1042 	if (!cfg.console)
1043 		cfg.console = DEFAULT_CONSOLE;
1044 
1045 	if (!strncmp(cfg.console, "virtio", 6))
1046 		active_console  = CONSOLE_VIRTIO;
1047 	else if (!strncmp(cfg.console, "serial", 6))
1048 		active_console  = CONSOLE_8250;
1049 	else if (!strncmp(cfg.console, "hv", 2))
1050 		active_console = CONSOLE_HV;
1051 	else
1052 		pr_warning("No console!");
1053 
1054 	if (!cfg.host_ip)
1055 		cfg.host_ip = DEFAULT_HOST_ADDR;
1056 
1057 	if (!cfg.guest_ip)
1058 		cfg.guest_ip = DEFAULT_GUEST_ADDR;
1059 
1060 	if (!cfg.guest_mac)
1061 		cfg.guest_mac = DEFAULT_GUEST_MAC;
1062 
1063 	if (!cfg.host_mac)
1064 		cfg.host_mac = DEFAULT_HOST_MAC;
1065 
1066 	if (!cfg.script)
1067 		cfg.script = DEFAULT_SCRIPT;
1068 
1069 	term_init();
1070 
1071 	if (!cfg.guest_name) {
1072 		if (cfg.custom_rootfs) {
1073 			cfg.guest_name = cfg.custom_rootfs_name;
1074 		} else {
1075 			sprintf(default_name, "guest-%u", getpid());
1076 			cfg.guest_name = default_name;
1077 		}
1078 	}
1079 
1080 	kvm = kvm__init(cfg.dev, cfg.hugetlbfs_path, cfg.ram_size, cfg.guest_name);
1081 	if (IS_ERR(kvm)) {
1082 		r = PTR_ERR(kvm);
1083 		goto fail;
1084 	}
1085 
1086 	kvm->single_step = cfg.single_step;
1087 
1088 	r = ioeventfd__init(kvm);
1089 	if (r < 0) {
1090 		pr_err("ioeventfd__init() failed with error %d\n", r);
1091 		goto fail;
1092 	}
1093 
1094 	max_cpus = kvm__max_cpus(kvm);
1095 	recommended_cpus = kvm__recommended_cpus(kvm);
1096 
1097 	if (nrcpus > max_cpus) {
1098 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
1099 		nrcpus = max_cpus;
1100 	} else if (nrcpus > recommended_cpus) {
1101 		printf("  # Warning: The maximum recommended amount of VCPUs"
1102 			" is %d\n", recommended_cpus);
1103 	}
1104 
1105 	kvm->nrcpus = nrcpus;
1106 
1107 	/* Alloc one pointer too many, so array ends up 0-terminated */
1108 	kvm_cpus = calloc(nrcpus + 1, sizeof(void *));
1109 	if (!kvm_cpus)
1110 		die("Couldn't allocate array for %d CPUs", nrcpus);
1111 
1112 	r = irq__init(kvm);
1113 	if (r < 0) {
1114 		pr_err("irq__init() failed with error %d\n", r);
1115 		goto fail;
1116 	}
1117 
1118 	r = pci__init(kvm);
1119 	if (r < 0) {
1120 		pr_err("pci__init() failed with error %d\n", r);
1121 		goto fail;
1122 	}
1123 
1124 	r = ioport__init(kvm);
1125 	if (r < 0) {
1126 		pr_err("ioport__init() failed with error %d\n", r);
1127 		goto fail;
1128 	}
1129 
1130 	/*
1131 	 * vidmode should be either specified
1132 	 * either set by default
1133 	 */
1134 	if (cfg.vnc || cfg.sdl) {
1135 		if (vidmode == -1)
1136 			vidmode = 0x312;
1137 	} else {
1138 		vidmode = 0;
1139 	}
1140 
1141 	memset(real_cmdline, 0, sizeof(real_cmdline));
1142 	kvm__arch_set_cmdline(real_cmdline, cfg.vnc || cfg.sdl);
1143 
1144 	if (strlen(real_cmdline) > 0)
1145 		strcat(real_cmdline, " ");
1146 
1147 	if (cfg.kernel_cmdline)
1148 		strlcat(real_cmdline, cfg.kernel_cmdline, sizeof(real_cmdline));
1149 
1150 	if (!cfg.using_rootfs && !cfg.disk_image[0].filename && !cfg.initrd_filename) {
1151 		char tmp[PATH_MAX];
1152 
1153 		kvm_setup_create_new(cfg.custom_rootfs_name);
1154 		kvm_setup_resolv(cfg.custom_rootfs_name);
1155 
1156 		snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default");
1157 		if (virtio_9p__register(kvm, tmp, "/dev/root") < 0)
1158 			die("Unable to initialize virtio 9p");
1159 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
1160 			die("Unable to initialize virtio 9p");
1161 		cfg.using_rootfs = cfg.custom_rootfs = 1;
1162 	}
1163 
1164 	if (cfg.using_rootfs) {
1165 		strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p");
1166 		if (cfg.custom_rootfs) {
1167 			kvm_run_set_sandbox();
1168 
1169 			strcat(real_cmdline, " init=/virt/init");
1170 
1171 			if (!cfg.no_dhcp)
1172 				strcat(real_cmdline, "  ip=dhcp");
1173 			if (kvm_setup_guest_init())
1174 				die("Failed to setup init for guest.");
1175 		}
1176 	} else if (!strstr(real_cmdline, "root=")) {
1177 		strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline));
1178 	}
1179 
1180 	if (cfg.image_count) {
1181 		kvm->nr_disks = cfg.image_count;
1182 		kvm->disks = disk_image__open_all((struct disk_image_params *)&cfg.disk_image, cfg.image_count);
1183 		if (IS_ERR(kvm->disks)) {
1184 			r = PTR_ERR(kvm->disks);
1185 			pr_err("disk_image__open_all() failed with error %ld\n",
1186 					PTR_ERR(kvm->disks));
1187 			goto fail;
1188 		}
1189 	}
1190 
1191 	printf("  # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME,
1192 		cfg.kernel_filename, cfg.ram_size / 1024 / 1024, nrcpus, cfg.guest_name);
1193 
1194 	if (!cfg.firmware_filename) {
1195 		if (!kvm__load_kernel(kvm, cfg.kernel_filename,
1196 				cfg.initrd_filename, real_cmdline, vidmode))
1197 			die("unable to load kernel %s", cfg.kernel_filename);
1198 
1199 		kvm->vmlinux = cfg.vmlinux_filename;
1200 		r = symbol_init(kvm);
1201 		if (r < 0)
1202 			pr_debug("symbol_init() failed with error %d\n", r);
1203 	}
1204 
1205 	ioport__setup_arch();
1206 
1207 	r = rtc__init(kvm);
1208 	if (r < 0) {
1209 		pr_err("rtc__init() failed with error %d\n", r);
1210 		goto fail;
1211 	}
1212 
1213 	r = serial8250__init(kvm);
1214 	if (r < 0) {
1215 		pr_err("serial__init() failed with error %d\n", r);
1216 		goto fail;
1217 	}
1218 
1219 	r = virtio_blk__init(kvm);
1220 	if (r < 0) {
1221 		pr_err("virtio_blk__init() failed with error %d\n", r);
1222 		goto fail;
1223 	}
1224 
1225 	r = virtio_scsi_init(kvm);
1226 	if (r < 0) {
1227 		pr_err("virtio_scsi_init() failed with error %d\n", r);
1228 		goto fail;
1229 	}
1230 
1231 
1232 	if (active_console == CONSOLE_VIRTIO)
1233 		virtio_console__init(kvm);
1234 
1235 	if (cfg.virtio_rng)
1236 		virtio_rng__init(kvm);
1237 
1238 	if (cfg.balloon)
1239 		virtio_bln__init(kvm);
1240 
1241 	if (!cfg.network)
1242 		cfg.network = DEFAULT_NETWORK;
1243 
1244 	virtio_9p__init(kvm);
1245 
1246 	for (i = 0; i < cfg.num_net_devices; i++) {
1247 		cfg.net_params[i].kvm = kvm;
1248 		virtio_net__init(&cfg.net_params[i]);
1249 	}
1250 
1251 	if (cfg.num_net_devices == 0 && cfg.no_net == 0) {
1252 		struct virtio_net_params net_params;
1253 
1254 		net_params = (struct virtio_net_params) {
1255 			.guest_ip	= cfg.guest_ip,
1256 			.host_ip	= cfg.host_ip,
1257 			.kvm		= kvm,
1258 			.script		= cfg.script,
1259 			.mode		= NET_MODE_USER,
1260 		};
1261 		str_to_mac(cfg.guest_mac, net_params.guest_mac);
1262 		str_to_mac(cfg.host_mac, net_params.host_mac);
1263 
1264 		virtio_net__init(&net_params);
1265 	}
1266 
1267 	kvm__init_ram(kvm);
1268 
1269 #ifdef CONFIG_X86
1270 	kbd__init(kvm);
1271 #endif
1272 
1273 	pci_shmem__init(kvm);
1274 
1275 	if (cfg.vnc || cfg.sdl) {
1276 		fb = vesa__init(kvm);
1277 		if (IS_ERR(fb)) {
1278 			pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb));
1279 			goto fail;
1280 		}
1281 	}
1282 
1283 	if (cfg.vnc && fb) {
1284 		r = vnc__init(fb);
1285 		if (r < 0) {
1286 			pr_err("vnc__init() failed with error %d\n", r);
1287 			goto fail;
1288 		}
1289 	}
1290 
1291 	if (cfg.sdl && fb) {
1292 		sdl__init(fb);
1293 		if (r < 0) {
1294 			pr_err("sdl__init() failed with error %d\n", r);
1295 			goto fail;
1296 		}
1297 	}
1298 
1299 	r = fb__start();
1300 	if (r < 0) {
1301 		pr_err("fb__init() failed with error %d\n", r);
1302 		goto fail;
1303 	}
1304 
1305 	/* Device init all done; firmware init must
1306 	 * come after this (it may set up device trees etc.)
1307 	 */
1308 
1309 	kvm__start_timer(kvm);
1310 
1311 	if (cfg.firmware_filename) {
1312 		if (!kvm__load_firmware(kvm, cfg.firmware_filename))
1313 			die("unable to load firmware image %s: %s", cfg.firmware_filename, strerror(errno));
1314 	} else {
1315 		kvm__arch_setup_firmware(kvm);
1316 		if (r < 0) {
1317 			pr_err("kvm__arch_setup_firmware() failed with error %d\n", r);
1318 			goto fail;
1319 		}
1320 	}
1321 
1322 	for (i = 0; i < nrcpus; i++) {
1323 		kvm_cpus[i] = kvm_cpu__init(kvm, i);
1324 		if (!kvm_cpus[i])
1325 			die("unable to initialize KVM VCPU");
1326 	}
1327 
1328 	thread_pool__init(nr_online_cpus);
1329 fail:
1330 	return r;
1331 }
1332 
1333 static int kvm_cmd_run_work(void)
1334 {
1335 	int i, r = -1;
1336 	void *ret = NULL;
1337 
1338 	for (i = 0; i < nrcpus; i++) {
1339 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)
1340 			die("unable to create KVM VCPU thread");
1341 	}
1342 
1343 	/* Only VCPU #0 is going to exit by itself when shutting down */
1344 	if (pthread_join(kvm_cpus[0]->thread, &ret) != 0)
1345 		r = 0;
1346 
1347 	kvm_cpu__delete(kvm_cpus[0]);
1348 	kvm_cpus[0] = NULL;
1349 
1350 	for (i = 1; i < nrcpus; i++) {
1351 		if (kvm_cpus[i]->is_running) {
1352 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
1353 			if (pthread_join(kvm_cpus[i]->thread, &ret) != 0)
1354 				die("pthread_join");
1355 			kvm_cpu__delete(kvm_cpus[i]);
1356 		}
1357 		if (ret == NULL)
1358 			r = 0;
1359 	}
1360 
1361 	return r;
1362 }
1363 
1364 static void kvm_cmd_run_exit(int guest_ret)
1365 {
1366 	int r = 0;
1367 
1368 	compat__print_all_messages();
1369 
1370 	r = symbol_exit(kvm);
1371 	if (r < 0)
1372 		pr_warning("symbol_exit() failed with error %d\n", r);
1373 
1374 	r = irq__exit(kvm);
1375 	if (r < 0)
1376 		pr_warning("irq__exit() failed with error %d\n", r);
1377 
1378 	fb__stop();
1379 
1380 	r = virtio_scsi_exit(kvm);
1381 	if (r < 0)
1382 		pr_warning("virtio_scsi_exit() failed with error %d\n", r);
1383 
1384 	r = virtio_blk__exit(kvm);
1385 	if (r < 0)
1386 		pr_warning("virtio_blk__exit() failed with error %d\n", r);
1387 
1388 	r = virtio_rng__exit(kvm);
1389 	if (r < 0)
1390 		pr_warning("virtio_rng__exit() failed with error %d\n", r);
1391 
1392 	r = disk_image__close_all(kvm->disks, cfg.image_count);
1393 	if (r < 0)
1394 		pr_warning("disk_image__close_all() failed with error %d\n", r);
1395 
1396 	r = serial8250__exit(kvm);
1397 	if (r < 0)
1398 		pr_warning("serial8250__exit() failed with error %d\n", r);
1399 
1400 	r = rtc__exit(kvm);
1401 	if (r < 0)
1402 		pr_warning("rtc__exit() failed with error %d\n", r);
1403 
1404 	r = kvm__arch_free_firmware(kvm);
1405 	if (r < 0)
1406 		pr_warning("kvm__arch_free_firmware() failed with error %d\n", r);
1407 
1408 	r = ioport__exit(kvm);
1409 	if (r < 0)
1410 		pr_warning("ioport__exit() failed with error %d\n", r);
1411 
1412 	r = ioeventfd__exit(kvm);
1413 	if (r < 0)
1414 		pr_warning("ioeventfd__exit() failed with error %d\n", r);
1415 
1416 	r = pci__exit(kvm);
1417 	if (r < 0)
1418 		pr_warning("pci__exit() failed with error %d\n", r);
1419 
1420 	r = kvm__exit(kvm);
1421 	if (r < 0)
1422 		pr_warning("pci__exit() failed with error %d\n", r);
1423 
1424 	free(kvm_cpus);
1425 
1426 	if (guest_ret == 0)
1427 		printf("\n  # KVM session ended normally.\n");
1428 }
1429 
1430 int kvm_cmd_run(int argc, const char **argv, const char *prefix)
1431 {
1432 	int r, ret = -EFAULT;
1433 
1434 	r = kvm_cmd_run_init(argc, argv);
1435 	if (r < 0)
1436 		return r;
1437 
1438 	ret = kvm_cmd_run_work();
1439 	kvm_cmd_run_exit(ret);
1440 
1441 	return ret;
1442 }
1443