xref: /kvmtool/builtin-run.c (revision 20715a221d1f50d9eebd9ec8bfeb15d3ccb24b36)
1 #include "kvm/builtin-run.h"
2 
3 #include "kvm/builtin-setup.h"
4 #include "kvm/virtio-balloon.h"
5 #include "kvm/virtio-console.h"
6 #include "kvm/parse-options.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/framebuffer.h"
9 #include "kvm/disk-image.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/virtio-blk.h"
12 #include "kvm/virtio-net.h"
13 #include "kvm/virtio-rng.h"
14 #include "kvm/ioeventfd.h"
15 #include "kvm/virtio-9p.h"
16 #include "kvm/barrier.h"
17 #include "kvm/kvm-cpu.h"
18 #include "kvm/ioport.h"
19 #include "kvm/symbol.h"
20 #include "kvm/i8042.h"
21 #include "kvm/mutex.h"
22 #include "kvm/term.h"
23 #include "kvm/util.h"
24 #include "kvm/strbuf.h"
25 #include "kvm/vesa.h"
26 #include "kvm/irq.h"
27 #include "kvm/kvm.h"
28 #include "kvm/pci.h"
29 #include "kvm/rtc.h"
30 #include "kvm/sdl.h"
31 #include "kvm/vnc.h"
32 #include "kvm/guest_compat.h"
33 #include "kvm/pci-shmem.h"
34 #include "kvm/kvm-ipc.h"
35 #include "kvm/builtin-debug.h"
36 
37 #include <linux/types.h>
38 #include <linux/err.h>
39 
40 #include <sys/utsname.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <termios.h>
44 #include <signal.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <ctype.h>
49 #include <stdio.h>
50 
51 #define DEFAULT_KVM_DEV		"/dev/kvm"
52 #define DEFAULT_CONSOLE		"serial"
53 #define DEFAULT_NETWORK		"user"
54 #define DEFAULT_HOST_ADDR	"192.168.33.1"
55 #define DEFAULT_GUEST_ADDR	"192.168.33.15"
56 #define DEFAULT_GUEST_MAC	"02:15:15:15:15:15"
57 #define DEFAULT_HOST_MAC	"02:01:01:01:01:01"
58 #define DEFAULT_SCRIPT		"none"
59 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh";
60 
61 #define MB_SHIFT		(20)
62 #define KB_SHIFT		(10)
63 #define GB_SHIFT		(30)
64 #define MIN_RAM_SIZE_MB		(64ULL)
65 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
66 
67 struct kvm *kvm;
68 struct kvm_cpu **kvm_cpus;
69 __thread struct kvm_cpu *current_kvm_cpu;
70 
71 static u64 ram_size;
72 static u8  image_count;
73 static u8 num_net_devices;
74 static bool virtio_rng;
75 static const char *kernel_cmdline;
76 static const char *kernel_filename;
77 static const char *vmlinux_filename;
78 static const char *initrd_filename;
79 static const char *image_filename[MAX_DISK_IMAGES];
80 static const char *console;
81 static const char *dev;
82 static const char *network;
83 static const char *host_ip;
84 static const char *guest_ip;
85 static const char *guest_mac;
86 static const char *host_mac;
87 static const char *script;
88 static const char *guest_name;
89 static const char *sandbox;
90 static const char *hugetlbfs_path;
91 static const char *custom_rootfs_name = "default";
92 static struct virtio_net_params *net_params;
93 static bool single_step;
94 static bool readonly_image[MAX_DISK_IMAGES];
95 static bool vnc;
96 static bool sdl;
97 static bool balloon;
98 static bool using_rootfs;
99 static bool custom_rootfs;
100 static bool no_net;
101 static bool no_dhcp;
102 extern bool ioport_debug;
103 static int  kvm_run_wrapper;
104 extern int  active_console;
105 extern int  debug_iodelay;
106 
107 bool do_debug_print = false;
108 
109 static int nrcpus;
110 static int vidmode = -1;
111 
112 static const char * const run_usage[] = {
113 	"lkvm run [<options>] [<kernel image>]",
114 	NULL
115 };
116 
117 enum {
118 	KVM_RUN_DEFAULT,
119 	KVM_RUN_SANDBOX,
120 };
121 
122 void kvm_run_set_wrapper_sandbox(void)
123 {
124 	kvm_run_wrapper = KVM_RUN_SANDBOX;
125 }
126 
127 static int img_name_parser(const struct option *opt, const char *arg, int unset)
128 {
129 	char *sep;
130 	struct stat st;
131 	char path[PATH_MAX];
132 
133 	if (stat(arg, &st) == 0 &&
134 	    S_ISDIR(st.st_mode)) {
135 		char tmp[PATH_MAX];
136 
137 		if (using_rootfs)
138 			die("Please use only one rootfs directory atmost");
139 
140 		if (realpath(arg, tmp) == 0 ||
141 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
142 			die("Unable to initialize virtio 9p");
143 		using_rootfs = 1;
144 		return 0;
145 	}
146 
147 	snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg);
148 
149 	if (stat(path, &st) == 0 &&
150 	    S_ISDIR(st.st_mode)) {
151 		char tmp[PATH_MAX];
152 
153 		if (using_rootfs)
154 			die("Please use only one rootfs directory atmost");
155 
156 		if (realpath(path, tmp) == 0 ||
157 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
158 			die("Unable to initialize virtio 9p");
159 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
160 			die("Unable to initialize virtio 9p");
161 		kvm_setup_resolv(arg);
162 		using_rootfs = custom_rootfs = 1;
163 		custom_rootfs_name = arg;
164 		return 0;
165 	}
166 
167 	if (image_count >= MAX_DISK_IMAGES)
168 		die("Currently only 4 images are supported");
169 
170 	image_filename[image_count] = arg;
171 	sep = strstr(arg, ",");
172 	if (sep) {
173 		if (strcmp(sep + 1, "ro") == 0)
174 			readonly_image[image_count] = 1;
175 		*sep = 0;
176 	}
177 
178 	image_count++;
179 
180 	return 0;
181 }
182 
183 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset)
184 {
185 	char *tag_name;
186 	char tmp[PATH_MAX];
187 
188 	/*
189 	 * 9p dir can be of the form dirname,tag_name or
190 	 * just dirname. In the later case we use the
191 	 * default tag name
192 	 */
193 	tag_name = strstr(arg, ",");
194 	if (tag_name) {
195 		*tag_name = '\0';
196 		tag_name++;
197 	}
198 	if (realpath(arg, tmp)) {
199 		if (virtio_9p__register(kvm, tmp, tag_name) < 0)
200 			die("Unable to initialize virtio 9p");
201 	} else
202 		die("Failed resolving 9p path");
203 	return 0;
204 }
205 
206 static int tty_parser(const struct option *opt, const char *arg, int unset)
207 {
208 	int tty = atoi(arg);
209 
210 	term_set_tty(tty);
211 
212 	return 0;
213 }
214 
215 static inline void str_to_mac(const char *str, char *mac)
216 {
217 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
218 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
219 }
220 static int set_net_param(struct virtio_net_params *p, const char *param,
221 				const char *val)
222 {
223 	if (strcmp(param, "guest_mac") == 0) {
224 		str_to_mac(val, p->guest_mac);
225 	} else if (strcmp(param, "mode") == 0) {
226 		if (!strncmp(val, "user", 4)) {
227 			int i;
228 
229 			for (i = 0; i < num_net_devices; i++)
230 				if (net_params[i].mode == NET_MODE_USER)
231 					die("Only one usermode network device allowed at a time");
232 			p->mode = NET_MODE_USER;
233 		} else if (!strncmp(val, "tap", 3)) {
234 			p->mode = NET_MODE_TAP;
235 		} else if (!strncmp(val, "none", 4)) {
236 			no_net = 1;
237 			return -1;
238 		} else
239 			die("Unkown network mode %s, please use user, tap or none", network);
240 	} else if (strcmp(param, "script") == 0) {
241 		p->script = strdup(val);
242 	} else if (strcmp(param, "guest_ip") == 0) {
243 		p->guest_ip = strdup(val);
244 	} else if (strcmp(param, "host_ip") == 0) {
245 		p->host_ip = strdup(val);
246 	} else if (strcmp(param, "vhost") == 0) {
247 		p->vhost = atoi(val);
248 	} else if (strcmp(param, "fd") == 0) {
249 		p->fd = atoi(val);
250 	}
251 
252 	return 0;
253 }
254 
255 static int netdev_parser(const struct option *opt, const char *arg, int unset)
256 {
257 	struct virtio_net_params p;
258 	char *buf = NULL, *cmd = NULL, *cur = NULL;
259 	bool on_cmd = true;
260 
261 	if (arg) {
262 		buf = strdup(arg);
263 		if (buf == NULL)
264 			die("Failed allocating new net buffer");
265 		cur = strtok(buf, ",=");
266 	}
267 
268 	p = (struct virtio_net_params) {
269 		.guest_ip	= DEFAULT_GUEST_ADDR,
270 		.host_ip	= DEFAULT_HOST_ADDR,
271 		.script		= DEFAULT_SCRIPT,
272 		.mode		= NET_MODE_TAP,
273 	};
274 
275 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
276 	p.guest_mac[5] += num_net_devices;
277 
278 	while (cur) {
279 		if (on_cmd) {
280 			cmd = cur;
281 		} else {
282 			if (set_net_param(&p, cmd, cur) < 0)
283 				goto done;
284 		}
285 		on_cmd = !on_cmd;
286 
287 		cur = strtok(NULL, ",=");
288 	};
289 
290 	num_net_devices++;
291 
292 	net_params = realloc(net_params, num_net_devices * sizeof(*net_params));
293 	if (net_params == NULL)
294 		die("Failed adding new network device");
295 
296 	net_params[num_net_devices - 1] = p;
297 
298 done:
299 	free(buf);
300 	return 0;
301 }
302 
303 static int shmem_parser(const struct option *opt, const char *arg, int unset)
304 {
305 	const u64 default_size = SHMEM_DEFAULT_SIZE;
306 	const u64 default_phys_addr = SHMEM_DEFAULT_ADDR;
307 	const char *default_handle = SHMEM_DEFAULT_HANDLE;
308 	struct shmem_info *si = malloc(sizeof(struct shmem_info));
309 	u64 phys_addr;
310 	u64 size;
311 	char *handle = NULL;
312 	int create = 0;
313 	const char *p = arg;
314 	char *next;
315 	int base = 10;
316 	int verbose = 0;
317 
318 	const int skip_pci = strlen("pci:");
319 	if (verbose)
320 		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
321 	/* parse out optional addr family */
322 	if (strcasestr(p, "pci:")) {
323 		p += skip_pci;
324 	} else if (strcasestr(p, "mem:")) {
325 		die("I can't add to E820 map yet.\n");
326 	}
327 	/* parse out physical addr */
328 	base = 10;
329 	if (strcasestr(p, "0x"))
330 		base = 16;
331 	phys_addr = strtoll(p, &next, base);
332 	if (next == p && phys_addr == 0) {
333 		pr_info("shmem: no physical addr specified, using default.");
334 		phys_addr = default_phys_addr;
335 	}
336 	if (*next != ':' && *next != '\0')
337 		die("shmem: unexpected chars after phys addr.\n");
338 	if (*next == '\0')
339 		p = next;
340 	else
341 		p = next + 1;
342 	/* parse out size */
343 	base = 10;
344 	if (strcasestr(p, "0x"))
345 		base = 16;
346 	size = strtoll(p, &next, base);
347 	if (next == p && size == 0) {
348 		pr_info("shmem: no size specified, using default.");
349 		size = default_size;
350 	}
351 	/* look for [KMGkmg][Bb]*  uses base 2. */
352 	int skip_B = 0;
353 	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
354 		if (*(next + 1) == 'B' || *(next + 1) == 'b')
355 			skip_B = 1;
356 		switch (*next) {
357 		case 'K':
358 		case 'k':
359 			size = size << KB_SHIFT;
360 			break;
361 		case 'M':
362 		case 'm':
363 			size = size << MB_SHIFT;
364 			break;
365 		case 'G':
366 		case 'g':
367 			size = size << GB_SHIFT;
368 			break;
369 		default:
370 			die("shmem: bug in detecting size prefix.");
371 			break;
372 		}
373 		next += 1 + skip_B;
374 	}
375 	if (*next != ':' && *next != '\0') {
376 		die("shmem: unexpected chars after phys size. <%c><%c>\n",
377 		    *next, *p);
378 	}
379 	if (*next == '\0')
380 		p = next;
381 	else
382 		p = next + 1;
383 	/* parse out optional shmem handle */
384 	const int skip_handle = strlen("handle=");
385 	next = strcasestr(p, "handle=");
386 	if (*p && next) {
387 		if (p != next)
388 			die("unexpected chars before handle\n");
389 		p += skip_handle;
390 		next = strchrnul(p, ':');
391 		if (next - p) {
392 			handle = malloc(next - p + 1);
393 			strncpy(handle, p, next - p);
394 			handle[next - p] = '\0';	/* just in case. */
395 		}
396 		if (*next == '\0')
397 			p = next;
398 		else
399 			p = next + 1;
400 	}
401 	/* parse optional create flag to see if we should create shm seg. */
402 	if (*p && strcasestr(p, "create")) {
403 		create = 1;
404 		p += strlen("create");
405 	}
406 	if (*p != '\0')
407 		die("shmem: unexpected trailing chars\n");
408 	if (handle == NULL) {
409 		handle = malloc(strlen(default_handle) + 1);
410 		strcpy(handle, default_handle);
411 	}
412 	if (verbose) {
413 		pr_info("shmem: phys_addr = %llx", phys_addr);
414 		pr_info("shmem: size      = %llx", size);
415 		pr_info("shmem: handle    = %s", handle);
416 		pr_info("shmem: create    = %d", create);
417 	}
418 
419 	si->phys_addr = phys_addr;
420 	si->size = size;
421 	si->handle = handle;
422 	si->create = create;
423 	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
424 	return 0;
425 }
426 
427 static const struct option options[] = {
428 	OPT_GROUP("Basic options:"),
429 	OPT_STRING('\0', "name", &guest_name, "guest name",
430 			"A name for the guest"),
431 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),
432 	OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."),
433 	OPT_CALLBACK('\0', "shmem", NULL,
434 		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",
435 		     "Share host shmem with guest via pci device",
436 		     shmem_parser),
437 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser),
438 	OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"),
439 	OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"),
440 	OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"),
441 	OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"),
442 	OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",
443 		     "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser),
444 	OPT_STRING('\0', "console", &console, "serial, virtio or hv",
445 			"Console to use"),
446 	OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"),
447 	OPT_CALLBACK('\0', "tty", NULL, "tty id",
448 		     "Remap guest TTY into a pty on the host",
449 		     tty_parser),
450 	OPT_STRING('\0', "sandbox", &sandbox, "script",
451 			"Run this script when booting into custom rootfs"),
452 	OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"),
453 
454 	OPT_GROUP("Kernel options:"),
455 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
456 			"Kernel to boot in virtual machine"),
457 	OPT_STRING('i', "initrd", &initrd_filename, "initrd",
458 			"Initial RAM disk image"),
459 	OPT_STRING('p', "params", &kernel_cmdline, "params",
460 			"Kernel command line arguments"),
461 
462 	OPT_GROUP("Networking options:"),
463 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",
464 		     "Create a new guest NIC",
465 		     netdev_parser, NULL),
466 	OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"),
467 
468 	OPT_GROUP("BIOS options:"),
469 	OPT_INTEGER('\0', "vidmode", &vidmode,
470 		    "Video mode"),
471 
472 	OPT_GROUP("Debug options:"),
473 	OPT_BOOLEAN('\0', "debug", &do_debug_print,
474 			"Enable debug messages"),
475 	OPT_BOOLEAN('\0', "debug-single-step", &single_step,
476 			"Enable single stepping"),
477 	OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug,
478 			"Enable ioport debugging"),
479 	OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay,
480 			"Delay IO by millisecond"),
481 	OPT_END()
482 };
483 
484 /*
485  * Serialize debug printout so that the output of multiple vcpus does not
486  * get mixed up:
487  */
488 static int printout_done;
489 
490 static void handle_sigusr1(int sig)
491 {
492 	struct kvm_cpu *cpu = current_kvm_cpu;
493 	int fd = kvm_cpu__get_debug_fd();
494 
495 	if (!cpu || cpu->needs_nmi)
496 		return;
497 
498 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
499 	kvm_cpu__show_registers(cpu);
500 	kvm_cpu__show_code(cpu);
501 	kvm_cpu__show_page_tables(cpu);
502 	fflush(stdout);
503 	printout_done = 1;
504 	mb();
505 }
506 
507 /* Pause/resume the guest using SIGUSR2 */
508 static int is_paused;
509 
510 static void handle_pause(int fd, u32 type, u32 len, u8 *msg)
511 {
512 	if (WARN_ON(len))
513 		return;
514 
515 	if (type == KVM_IPC_RESUME && is_paused) {
516 		kvm->vm_state = KVM_VMSTATE_RUNNING;
517 		kvm__continue();
518 	} else if (type == KVM_IPC_PAUSE && !is_paused) {
519 		kvm->vm_state = KVM_VMSTATE_PAUSED;
520 		kvm__pause();
521 	} else {
522 		return;
523 	}
524 
525 	is_paused = !is_paused;
526 }
527 
528 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg)
529 {
530 	int r = 0;
531 
532 	if (type == KVM_IPC_VMSTATE)
533 		r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state));
534 
535 	if (r < 0)
536 		pr_warning("Failed sending VMSTATE");
537 }
538 
539 static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
540 {
541 	int i;
542 	struct debug_cmd_params *params;
543 	u32 dbg_type;
544 	u32 vcpu;
545 
546 	if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params)))
547 		return;
548 
549 	params = (void *)msg;
550 	dbg_type = params->dbg_type;
551 	vcpu = params->cpu;
552 
553 	if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) {
554 		if ((int)vcpu >= kvm->nrcpus)
555 			return;
556 
557 		kvm_cpus[vcpu]->needs_nmi = 1;
558 		pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1);
559 	}
560 
561 	if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP))
562 		return;
563 
564 	for (i = 0; i < nrcpus; i++) {
565 		struct kvm_cpu *cpu = kvm_cpus[i];
566 
567 		if (!cpu)
568 			continue;
569 
570 		printout_done = 0;
571 
572 		kvm_cpu__set_debug_fd(fd);
573 		pthread_kill(cpu->thread, SIGUSR1);
574 		/*
575 		 * Wait for the vCPU to dump state before signalling
576 		 * the next thread. Since this is debug code it does
577 		 * not matter that we are burning CPU time a bit:
578 		 */
579 		while (!printout_done)
580 			mb();
581 	}
582 
583 	close(fd);
584 
585 	serial8250__inject_sysrq(kvm);
586 }
587 
588 static void handle_sigalrm(int sig)
589 {
590 	kvm__arch_periodic_poll(kvm);
591 }
592 
593 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
594 {
595 	if (WARN_ON(type != KVM_IPC_STOP || len))
596 		return;
597 
598 	kvm_cpu__reboot();
599 }
600 
601 static void *kvm_cpu_thread(void *arg)
602 {
603 	current_kvm_cpu		= arg;
604 
605 	if (kvm_cpu__start(current_kvm_cpu))
606 		goto panic_kvm;
607 
608 	return (void *) (intptr_t) 0;
609 
610 panic_kvm:
611 	fprintf(stderr, "KVM exit reason: %u (\"%s\")\n",
612 		current_kvm_cpu->kvm_run->exit_reason,
613 		kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]);
614 	if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
615 		fprintf(stderr, "KVM exit code: 0x%Lu\n",
616 			current_kvm_cpu->kvm_run->hw.hardware_exit_reason);
617 
618 	kvm_cpu__set_debug_fd(STDOUT_FILENO);
619 	kvm_cpu__show_registers(current_kvm_cpu);
620 	kvm_cpu__show_code(current_kvm_cpu);
621 	kvm_cpu__show_page_tables(current_kvm_cpu);
622 
623 	return (void *) (intptr_t) 1;
624 }
625 
626 static char kernel[PATH_MAX];
627 
628 static const char *host_kernels[] = {
629 	"/boot/vmlinuz",
630 	"/boot/bzImage",
631 	NULL
632 };
633 
634 static const char *default_kernels[] = {
635 	"./bzImage",
636 	"arch/" BUILD_ARCH "/boot/bzImage",
637 	"../../arch/" BUILD_ARCH "/boot/bzImage",
638 	NULL
639 };
640 
641 static const char *default_vmlinux[] = {
642 	"vmlinux",
643 	"../../../vmlinux",
644 	"../../vmlinux",
645 	NULL
646 };
647 
648 static void kernel_usage_with_options(void)
649 {
650 	const char **k;
651 	struct utsname uts;
652 
653 	fprintf(stderr, "Fatal: could not find default kernel image in:\n");
654 	k = &default_kernels[0];
655 	while (*k) {
656 		fprintf(stderr, "\t%s\n", *k);
657 		k++;
658 	}
659 
660 	if (uname(&uts) < 0)
661 		return;
662 
663 	k = &host_kernels[0];
664 	while (*k) {
665 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
666 			return;
667 		fprintf(stderr, "\t%s\n", kernel);
668 		k++;
669 	}
670 	fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n",
671 		KVM_BINARY_NAME);
672 }
673 
674 static u64 host_ram_size(void)
675 {
676 	long page_size;
677 	long nr_pages;
678 
679 	nr_pages	= sysconf(_SC_PHYS_PAGES);
680 	if (nr_pages < 0) {
681 		pr_warning("sysconf(_SC_PHYS_PAGES) failed");
682 		return 0;
683 	}
684 
685 	page_size	= sysconf(_SC_PAGE_SIZE);
686 	if (page_size < 0) {
687 		pr_warning("sysconf(_SC_PAGE_SIZE) failed");
688 		return 0;
689 	}
690 
691 	return (nr_pages * page_size) >> MB_SHIFT;
692 }
693 
694 /*
695  * If user didn't specify how much memory it wants to allocate for the guest,
696  * avoid filling the whole host RAM.
697  */
698 #define RAM_SIZE_RATIO		0.8
699 
700 static u64 get_ram_size(int nr_cpus)
701 {
702 	u64 available;
703 	u64 ram_size;
704 
705 	ram_size	= 64 * (nr_cpus + 3);
706 
707 	available	= host_ram_size() * RAM_SIZE_RATIO;
708 	if (!available)
709 		available = MIN_RAM_SIZE_MB;
710 
711 	if (ram_size > available)
712 		ram_size	= available;
713 
714 	return ram_size;
715 }
716 
717 static const char *find_kernel(void)
718 {
719 	const char **k;
720 	struct stat st;
721 	struct utsname uts;
722 
723 	k = &default_kernels[0];
724 	while (*k) {
725 		if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) {
726 			k++;
727 			continue;
728 		}
729 		strncpy(kernel, *k, PATH_MAX);
730 		return kernel;
731 	}
732 
733 	if (uname(&uts) < 0)
734 		return NULL;
735 
736 	k = &host_kernels[0];
737 	while (*k) {
738 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
739 			return NULL;
740 
741 		if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) {
742 			k++;
743 			continue;
744 		}
745 		return kernel;
746 
747 	}
748 	return NULL;
749 }
750 
751 static const char *find_vmlinux(void)
752 {
753 	const char **vmlinux;
754 
755 	vmlinux = &default_vmlinux[0];
756 	while (*vmlinux) {
757 		struct stat st;
758 
759 		if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) {
760 			vmlinux++;
761 			continue;
762 		}
763 		return *vmlinux;
764 	}
765 	return NULL;
766 }
767 
768 void kvm_run_help(void)
769 {
770 	usage_with_options(run_usage, options);
771 }
772 
773 static int kvm_custom_stage2(void)
774 {
775 	char tmp[PATH_MAX], dst[PATH_MAX], *src;
776 	const char *rootfs = custom_rootfs_name;
777 	int r;
778 
779 	src = realpath("guest/init_stage2", NULL);
780 	if (src == NULL)
781 		return -ENOMEM;
782 
783 	snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs);
784 	remove(tmp);
785 
786 	snprintf(dst, PATH_MAX, "/host/%s", src);
787 	r = symlink(dst, tmp);
788 	free(src);
789 
790 	return r;
791 }
792 
793 static int kvm_run_set_sandbox(void)
794 {
795 	const char *guestfs_name = custom_rootfs_name;
796 	char path[PATH_MAX], script[PATH_MAX], *tmp;
797 
798 	snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name);
799 
800 	remove(path);
801 
802 	if (sandbox == NULL)
803 		return 0;
804 
805 	tmp = realpath(sandbox, NULL);
806 	if (tmp == NULL)
807 		return -ENOMEM;
808 
809 	snprintf(script, PATH_MAX, "/host/%s", tmp);
810 	free(tmp);
811 
812 	return symlink(script, path);
813 }
814 
815 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg)
816 {
817 	const char *single_quote;
818 
819 	if (!*arg) { /* zero length string */
820 		if (write(fd, "''", 2) <= 0)
821 			die("Failed writing sandbox script");
822 		return;
823 	}
824 
825 	while (*arg) {
826 		single_quote = strchrnul(arg, '\'');
827 
828 		/* write non-single-quote string as #('string') */
829 		if (arg != single_quote) {
830 			if (write(fd, "'", 1) <= 0 ||
831 			    write(fd, arg, single_quote - arg) <= 0 ||
832 			    write(fd, "'", 1) <= 0)
833 				die("Failed writing sandbox script");
834 		}
835 
836 		/* write single quote as #("'") */
837 		if (*single_quote) {
838 			if (write(fd, "\"'\"", 3) <= 0)
839 				die("Failed writing sandbox script");
840 		} else
841 			break;
842 
843 		arg = single_quote + 1;
844 	}
845 }
846 
847 static void kvm_run_write_sandbox_cmd(const char **argv, int argc)
848 {
849 	const char script_hdr[] = "#! /bin/bash\n\n";
850 	int fd;
851 
852 	remove(sandbox);
853 
854 	fd = open(sandbox, O_RDWR | O_CREAT, 0777);
855 	if (fd < 0)
856 		die("Failed creating sandbox script");
857 
858 	if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0)
859 		die("Failed writing sandbox script");
860 
861 	while (argc) {
862 		kvm_write_sandbox_cmd_exactly(fd, argv[0]);
863 		if (argc - 1)
864 			if (write(fd, " ", 1) <= 0)
865 				die("Failed writing sandbox script");
866 		argv++;
867 		argc--;
868 	}
869 	if (write(fd, "\n", 1) <= 0)
870 		die("Failed writing sandbox script");
871 
872 	close(fd);
873 }
874 
875 static int kvm_cmd_run_init(int argc, const char **argv)
876 {
877 	static char real_cmdline[2048], default_name[20];
878 	struct framebuffer *fb = NULL;
879 	unsigned int nr_online_cpus;
880 	int max_cpus, recommended_cpus;
881 	int i, r;
882 
883 	signal(SIGALRM, handle_sigalrm);
884 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
885 	signal(SIGUSR1, handle_sigusr1);
886 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
887 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
888 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
889 	kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate);
890 
891 	nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
892 
893 	while (argc != 0) {
894 		argc = parse_options(argc, argv, options, run_usage,
895 				PARSE_OPT_STOP_AT_NON_OPTION |
896 				PARSE_OPT_KEEP_DASHDASH);
897 		if (argc != 0) {
898 			/* Cusrom options, should have been handled elsewhere */
899 			if (strcmp(argv[0], "--") == 0) {
900 				if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
901 					sandbox = DEFAULT_SANDBOX_FILENAME;
902 					kvm_run_write_sandbox_cmd(argv+1, argc-1);
903 					break;
904 				}
905 			}
906 
907 			if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) ||
908 				(kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) {
909 				fprintf(stderr, "Cannot handle parameter: "
910 						"%s\n", argv[0]);
911 				usage_with_options(run_usage, options);
912 				return EINVAL;
913 			}
914 			if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
915 				/*
916 				 * first unhandled parameter is treated as
917 				 * sandbox command
918 				 */
919 				sandbox = DEFAULT_SANDBOX_FILENAME;
920 				kvm_run_write_sandbox_cmd(argv, argc);
921 			} else {
922 				/*
923 				 * first unhandled parameter is treated as a kernel
924 				 * image
925 				 */
926 				kernel_filename = argv[0];
927 			}
928 			argv++;
929 			argc--;
930 		}
931 
932 	}
933 
934 	if (!kernel_filename)
935 		kernel_filename = find_kernel();
936 
937 	if (!kernel_filename) {
938 		kernel_usage_with_options();
939 		return EINVAL;
940 	}
941 
942 	vmlinux_filename = find_vmlinux();
943 
944 	if (nrcpus == 0)
945 		nrcpus = nr_online_cpus;
946 
947 	if (!ram_size)
948 		ram_size	= get_ram_size(nrcpus);
949 
950 	if (ram_size < MIN_RAM_SIZE_MB)
951 		die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB);
952 
953 	if (ram_size > host_ram_size())
954 		pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size());
955 
956 	ram_size <<= MB_SHIFT;
957 
958 	if (!dev)
959 		dev = DEFAULT_KVM_DEV;
960 
961 	if (!console)
962 		console = DEFAULT_CONSOLE;
963 
964 	if (!strncmp(console, "virtio", 6))
965 		active_console  = CONSOLE_VIRTIO;
966 	else if (!strncmp(console, "serial", 6))
967 		active_console  = CONSOLE_8250;
968 	else if (!strncmp(console, "hv", 2))
969 		active_console = CONSOLE_HV;
970 	else
971 		pr_warning("No console!");
972 
973 	if (!host_ip)
974 		host_ip = DEFAULT_HOST_ADDR;
975 
976 	if (!guest_ip)
977 		guest_ip = DEFAULT_GUEST_ADDR;
978 
979 	if (!guest_mac)
980 		guest_mac = DEFAULT_GUEST_MAC;
981 
982 	if (!host_mac)
983 		host_mac = DEFAULT_HOST_MAC;
984 
985 	if (!script)
986 		script = DEFAULT_SCRIPT;
987 
988 	term_init();
989 
990 	if (!guest_name) {
991 		if (custom_rootfs) {
992 			guest_name = custom_rootfs_name;
993 		} else {
994 			sprintf(default_name, "guest-%u", getpid());
995 			guest_name = default_name;
996 		}
997 	}
998 
999 	kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name);
1000 
1001 	kvm->single_step = single_step;
1002 
1003 	r = ioeventfd__init(kvm);
1004 	if (r < 0) {
1005 		pr_err("ioeventfd__init() failed with error %d\n", r);
1006 		goto fail;
1007 	}
1008 
1009 	max_cpus = kvm__max_cpus(kvm);
1010 	recommended_cpus = kvm__recommended_cpus(kvm);
1011 
1012 	if (nrcpus > max_cpus) {
1013 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
1014 		nrcpus = max_cpus;
1015 	} else if (nrcpus > recommended_cpus) {
1016 		printf("  # Warning: The maximum recommended amount of VCPUs"
1017 			" is %d\n", recommended_cpus);
1018 	}
1019 
1020 	kvm->nrcpus = nrcpus;
1021 
1022 	/* Alloc one pointer too many, so array ends up 0-terminated */
1023 	kvm_cpus = calloc(nrcpus + 1, sizeof(void *));
1024 	if (!kvm_cpus)
1025 		die("Couldn't allocate array for %d CPUs", nrcpus);
1026 
1027 	r = irq__init(kvm);
1028 	if (r < 0) {
1029 		pr_err("irq__init() failed with error %d\n", r);
1030 		goto fail;
1031 	}
1032 
1033 	pci__init();
1034 
1035 	r = ioport__init(kvm);
1036 	if (r < 0) {
1037 		pr_err("ioport__init() failed with error %d\n", r);
1038 		goto fail;
1039 	}
1040 
1041 	/*
1042 	 * vidmode should be either specified
1043 	 * either set by default
1044 	 */
1045 	if (vnc || sdl) {
1046 		if (vidmode == -1)
1047 			vidmode = 0x312;
1048 	} else {
1049 		vidmode = 0;
1050 	}
1051 
1052 	memset(real_cmdline, 0, sizeof(real_cmdline));
1053 	kvm__arch_set_cmdline(real_cmdline, vnc || sdl);
1054 
1055 	if (strlen(real_cmdline) > 0)
1056 		strcat(real_cmdline, " ");
1057 
1058 	if (kernel_cmdline)
1059 		strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline));
1060 
1061 	if (!using_rootfs && !image_filename[0] && !initrd_filename) {
1062 		char tmp[PATH_MAX];
1063 
1064 		kvm_setup_create_new(custom_rootfs_name);
1065 		kvm_setup_resolv(custom_rootfs_name);
1066 
1067 		snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default");
1068 		if (virtio_9p__register(kvm, tmp, "/dev/root") < 0)
1069 			die("Unable to initialize virtio 9p");
1070 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
1071 			die("Unable to initialize virtio 9p");
1072 		using_rootfs = custom_rootfs = 1;
1073 	}
1074 
1075 	if (using_rootfs) {
1076 		strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p");
1077 		if (custom_rootfs) {
1078 			kvm_run_set_sandbox();
1079 
1080 			strcat(real_cmdline, " init=/virt/init");
1081 
1082 			if (!no_dhcp)
1083 				strcat(real_cmdline, "  ip=dhcp");
1084 			if (kvm_custom_stage2())
1085 				die("Failed linking stage 2 of init.");
1086 		}
1087 	} else if (!strstr(real_cmdline, "root=")) {
1088 		strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline));
1089 	}
1090 
1091 	if (image_count) {
1092 		kvm->nr_disks = image_count;
1093 		kvm->disks    = disk_image__open_all(image_filename, readonly_image, image_count);
1094 		if (!kvm->disks)
1095 			die("Unable to load all disk images.");
1096 
1097 		virtio_blk__init_all(kvm);
1098 	}
1099 
1100 	printf("  # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME,
1101 		kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name);
1102 
1103 	if (!kvm__load_kernel(kvm, kernel_filename, initrd_filename,
1104 				real_cmdline, vidmode))
1105 		die("unable to load kernel %s", kernel_filename);
1106 
1107 	kvm->vmlinux = vmlinux_filename;
1108 	r = symbol__init(kvm);
1109 	if (r < 0) {
1110 		pr_err("symbol__init() failed with error %d\n", r);
1111 		goto fail;
1112 	}
1113 
1114 	ioport__setup_arch();
1115 
1116 	rtc__init();
1117 
1118 	r = serial8250__init(kvm);
1119 	if (r < 0) {
1120 		pr_err("serial__init() failed with error %d\n", r);
1121 		goto fail;
1122 	}
1123 
1124 	if (active_console == CONSOLE_VIRTIO)
1125 		virtio_console__init(kvm);
1126 
1127 	if (virtio_rng)
1128 		virtio_rng__init(kvm);
1129 
1130 	if (balloon)
1131 		virtio_bln__init(kvm);
1132 
1133 	if (!network)
1134 		network = DEFAULT_NETWORK;
1135 
1136 	virtio_9p__init(kvm);
1137 
1138 	for (i = 0; i < num_net_devices; i++) {
1139 		net_params[i].kvm = kvm;
1140 		virtio_net__init(&net_params[i]);
1141 	}
1142 
1143 	if (num_net_devices == 0 && no_net == 0) {
1144 		struct virtio_net_params net_params;
1145 
1146 		net_params = (struct virtio_net_params) {
1147 			.guest_ip	= guest_ip,
1148 			.host_ip	= host_ip,
1149 			.kvm		= kvm,
1150 			.script		= script,
1151 			.mode		= NET_MODE_USER,
1152 		};
1153 		str_to_mac(guest_mac, net_params.guest_mac);
1154 		str_to_mac(host_mac, net_params.host_mac);
1155 
1156 		virtio_net__init(&net_params);
1157 	}
1158 
1159 	kvm__init_ram(kvm);
1160 
1161 #ifdef CONFIG_X86
1162 	kbd__init(kvm);
1163 #endif
1164 
1165 	pci_shmem__init(kvm);
1166 
1167 	if (vnc || sdl) {
1168 		fb = vesa__init(kvm);
1169 		if (IS_ERR(fb)) {
1170 			pr_err("vesa__init() failed with error %ld\n", PTR_ERR(fb));
1171 			goto fail;
1172 		}
1173 	}
1174 
1175 	if (vnc && fb) {
1176 		r = vnc__init(fb);
1177 		if (r < 0) {
1178 			pr_err("vnc__init() failed with error %d\n", r);
1179 			goto fail;
1180 		}
1181 	}
1182 
1183 	if (sdl && fb) {
1184 		sdl__init(fb);
1185 		if (r < 0) {
1186 			pr_err("sdl__init() failed with error %d\n", r);
1187 			goto fail;
1188 		}
1189 	}
1190 
1191 	r = fb__start();
1192 	if (r < 0) {
1193 		pr_err("fb__init() failed with error %d\n", r);
1194 		goto fail;
1195 	}
1196 
1197 	/* Device init all done; firmware init must
1198 	 * come after this (it may set up device trees etc.)
1199 	 */
1200 
1201 	kvm__start_timer(kvm);
1202 
1203 	kvm__arch_setup_firmware(kvm);
1204 
1205 	for (i = 0; i < nrcpus; i++) {
1206 		kvm_cpus[i] = kvm_cpu__init(kvm, i);
1207 		if (!kvm_cpus[i])
1208 			die("unable to initialize KVM VCPU");
1209 	}
1210 
1211 	thread_pool__init(nr_online_cpus);
1212 fail:
1213 	return r;
1214 }
1215 
1216 static int kvm_cmd_run_work(void)
1217 {
1218 	int i, r = -1;
1219 	void *ret = NULL;
1220 
1221 	for (i = 0; i < nrcpus; i++) {
1222 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)
1223 			die("unable to create KVM VCPU thread");
1224 	}
1225 
1226 	/* Only VCPU #0 is going to exit by itself when shutting down */
1227 	if (pthread_join(kvm_cpus[0]->thread, &ret) != 0)
1228 		r = 0;
1229 
1230 	kvm_cpu__delete(kvm_cpus[0]);
1231 	kvm_cpus[0] = NULL;
1232 
1233 	for (i = 1; i < nrcpus; i++) {
1234 		if (kvm_cpus[i]->is_running) {
1235 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
1236 			if (pthread_join(kvm_cpus[i]->thread, &ret) != 0)
1237 				die("pthread_join");
1238 			kvm_cpu__delete(kvm_cpus[i]);
1239 		}
1240 		if (ret == NULL)
1241 			r = 0;
1242 	}
1243 
1244 	return r;
1245 }
1246 
1247 static void kvm_cmd_run_exit(int guest_ret)
1248 {
1249 	int r = 0;
1250 
1251 	compat__print_all_messages();
1252 
1253 	r = symbol__exit(kvm);
1254 	if (r < 0)
1255 		pr_warning("symbol__exit() failed with error %d\n", r);
1256 
1257 	r = irq__exit(kvm);
1258 	if (r < 0)
1259 		pr_warning("irq__exit() failed with error %d\n", r);
1260 
1261 	fb__stop();
1262 
1263 	virtio_blk__delete_all(kvm);
1264 	virtio_rng__delete_all(kvm);
1265 
1266 	disk_image__close_all(kvm->disks, image_count);
1267 	free(kvm_cpus);
1268 
1269 	r = serial8250__exit(kvm);
1270 	if (r < 0)
1271 		pr_warning("serial8250__exit() failed with error %d\n", r);
1272 
1273 	r = ioport__exit(kvm);
1274 	if (r < 0)
1275 		pr_warning("ioport__exit() failed with error %d\n", r);
1276 
1277 	r = ioeventfd__exit(kvm);
1278 	if (r < 0)
1279 		pr_warning("ioeventfd__exit() failed with error %d\n", r);
1280 
1281 	kvm__delete(kvm);
1282 
1283 	if (guest_ret == 0)
1284 		printf("\n  # KVM session ended normally.\n");
1285 }
1286 
1287 int kvm_cmd_run(int argc, const char **argv, const char *prefix)
1288 {
1289 	int r, ret = -EFAULT;
1290 
1291 	r = kvm_cmd_run_init(argc, argv);
1292 	if (r < 0)
1293 		return r;
1294 
1295 	ret = kvm_cmd_run_work();
1296 	kvm_cmd_run_exit(ret);
1297 
1298 	return ret;
1299 }
1300