xref: /kvmtool/builtin-run.c (revision e3c4f8aa776211ff22c6f6af2aabb92c52b6725e)
1 #include "kvm/builtin-run.h"
2 
3 #include "kvm/builtin-setup.h"
4 #include "kvm/virtio-balloon.h"
5 #include "kvm/virtio-console.h"
6 #include "kvm/parse-options.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/framebuffer.h"
9 #include "kvm/disk-image.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/virtio-blk.h"
12 #include "kvm/virtio-net.h"
13 #include "kvm/virtio-rng.h"
14 #include "kvm/ioeventfd.h"
15 #include "kvm/virtio-9p.h"
16 #include "kvm/barrier.h"
17 #include "kvm/kvm-cpu.h"
18 #include "kvm/ioport.h"
19 #include "kvm/symbol.h"
20 #include "kvm/i8042.h"
21 #include "kvm/mutex.h"
22 #include "kvm/term.h"
23 #include "kvm/util.h"
24 #include "kvm/strbuf.h"
25 #include "kvm/vesa.h"
26 #include "kvm/irq.h"
27 #include "kvm/kvm.h"
28 #include "kvm/pci.h"
29 #include "kvm/rtc.h"
30 #include "kvm/sdl.h"
31 #include "kvm/vnc.h"
32 #include "kvm/guest_compat.h"
33 #include "kvm/pci-shmem.h"
34 #include "kvm/kvm-ipc.h"
35 #include "kvm/builtin-debug.h"
36 
37 #include <linux/types.h>
38 
39 #include <sys/utsname.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <termios.h>
43 #include <signal.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <ctype.h>
48 #include <stdio.h>
49 
50 #define DEFAULT_KVM_DEV		"/dev/kvm"
51 #define DEFAULT_CONSOLE		"serial"
52 #define DEFAULT_NETWORK		"user"
53 #define DEFAULT_HOST_ADDR	"192.168.33.1"
54 #define DEFAULT_GUEST_ADDR	"192.168.33.15"
55 #define DEFAULT_GUEST_MAC	"02:15:15:15:15:15"
56 #define DEFAULT_HOST_MAC	"02:01:01:01:01:01"
57 #define DEFAULT_SCRIPT		"none"
58 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh";
59 
60 #define MB_SHIFT		(20)
61 #define KB_SHIFT		(10)
62 #define GB_SHIFT		(30)
63 #define MIN_RAM_SIZE_MB		(64ULL)
64 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
65 
66 struct kvm *kvm;
67 struct kvm_cpu **kvm_cpus;
68 __thread struct kvm_cpu *current_kvm_cpu;
69 
70 static u64 ram_size;
71 static u8  image_count;
72 static u8 num_net_devices;
73 static bool virtio_rng;
74 static const char *kernel_cmdline;
75 static const char *kernel_filename;
76 static const char *vmlinux_filename;
77 static const char *initrd_filename;
78 static const char *image_filename[MAX_DISK_IMAGES];
79 static const char *console;
80 static const char *dev;
81 static const char *network;
82 static const char *host_ip;
83 static const char *guest_ip;
84 static const char *guest_mac;
85 static const char *host_mac;
86 static const char *script;
87 static const char *guest_name;
88 static const char *sandbox;
89 static const char *hugetlbfs_path;
90 static const char *custom_rootfs_name = "default";
91 static struct virtio_net_params *net_params;
92 static bool single_step;
93 static bool readonly_image[MAX_DISK_IMAGES];
94 static bool vnc;
95 static bool sdl;
96 static bool balloon;
97 static bool using_rootfs;
98 static bool custom_rootfs;
99 static bool no_net;
100 static bool no_dhcp;
101 extern bool ioport_debug;
102 static int  kvm_run_wrapper;
103 extern int  active_console;
104 extern int  debug_iodelay;
105 
106 bool do_debug_print = false;
107 
108 static int nrcpus;
109 static int vidmode = -1;
110 
111 static const char * const run_usage[] = {
112 	"lkvm run [<options>] [<kernel image>]",
113 	NULL
114 };
115 
116 enum {
117 	KVM_RUN_DEFAULT,
118 	KVM_RUN_SANDBOX,
119 };
120 
121 void kvm_run_set_wrapper_sandbox(void)
122 {
123 	kvm_run_wrapper = KVM_RUN_SANDBOX;
124 }
125 
126 static int img_name_parser(const struct option *opt, const char *arg, int unset)
127 {
128 	char *sep;
129 	struct stat st;
130 	char path[PATH_MAX];
131 
132 	if (stat(arg, &st) == 0 &&
133 	    S_ISDIR(st.st_mode)) {
134 		char tmp[PATH_MAX];
135 
136 		if (using_rootfs)
137 			die("Please use only one rootfs directory atmost");
138 
139 		if (realpath(arg, tmp) == 0 ||
140 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
141 			die("Unable to initialize virtio 9p");
142 		using_rootfs = 1;
143 		return 0;
144 	}
145 
146 	snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg);
147 
148 	if (stat(path, &st) == 0 &&
149 	    S_ISDIR(st.st_mode)) {
150 		char tmp[PATH_MAX];
151 
152 		if (using_rootfs)
153 			die("Please use only one rootfs directory atmost");
154 
155 		if (realpath(path, tmp) == 0 ||
156 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
157 			die("Unable to initialize virtio 9p");
158 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
159 			die("Unable to initialize virtio 9p");
160 		kvm_setup_resolv(arg);
161 		using_rootfs = custom_rootfs = 1;
162 		custom_rootfs_name = arg;
163 		return 0;
164 	}
165 
166 	if (image_count >= MAX_DISK_IMAGES)
167 		die("Currently only 4 images are supported");
168 
169 	image_filename[image_count] = arg;
170 	sep = strstr(arg, ",");
171 	if (sep) {
172 		if (strcmp(sep + 1, "ro") == 0)
173 			readonly_image[image_count] = 1;
174 		*sep = 0;
175 	}
176 
177 	image_count++;
178 
179 	return 0;
180 }
181 
182 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset)
183 {
184 	char *tag_name;
185 	char tmp[PATH_MAX];
186 
187 	/*
188 	 * 9p dir can be of the form dirname,tag_name or
189 	 * just dirname. In the later case we use the
190 	 * default tag name
191 	 */
192 	tag_name = strstr(arg, ",");
193 	if (tag_name) {
194 		*tag_name = '\0';
195 		tag_name++;
196 	}
197 	if (realpath(arg, tmp)) {
198 		if (virtio_9p__register(kvm, tmp, tag_name) < 0)
199 			die("Unable to initialize virtio 9p");
200 	} else
201 		die("Failed resolving 9p path");
202 	return 0;
203 }
204 
205 static int tty_parser(const struct option *opt, const char *arg, int unset)
206 {
207 	int tty = atoi(arg);
208 
209 	term_set_tty(tty);
210 
211 	return 0;
212 }
213 
214 static inline void str_to_mac(const char *str, char *mac)
215 {
216 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
217 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
218 }
219 static int set_net_param(struct virtio_net_params *p, const char *param,
220 				const char *val)
221 {
222 	if (strcmp(param, "guest_mac") == 0) {
223 		str_to_mac(val, p->guest_mac);
224 	} else if (strcmp(param, "mode") == 0) {
225 		if (!strncmp(val, "user", 4)) {
226 			int i;
227 
228 			for (i = 0; i < num_net_devices; i++)
229 				if (net_params[i].mode == NET_MODE_USER)
230 					die("Only one usermode network device allowed at a time");
231 			p->mode = NET_MODE_USER;
232 		} else if (!strncmp(val, "tap", 3)) {
233 			p->mode = NET_MODE_TAP;
234 		} else if (!strncmp(val, "none", 4)) {
235 			no_net = 1;
236 			return -1;
237 		} else
238 			die("Unkown network mode %s, please use user, tap or none", network);
239 	} else if (strcmp(param, "script") == 0) {
240 		p->script = strdup(val);
241 	} else if (strcmp(param, "guest_ip") == 0) {
242 		p->guest_ip = strdup(val);
243 	} else if (strcmp(param, "host_ip") == 0) {
244 		p->host_ip = strdup(val);
245 	} else if (strcmp(param, "vhost") == 0) {
246 		p->vhost = atoi(val);
247 	} else if (strcmp(param, "fd") == 0) {
248 		p->fd = atoi(val);
249 	}
250 
251 	return 0;
252 }
253 
254 static int netdev_parser(const struct option *opt, const char *arg, int unset)
255 {
256 	struct virtio_net_params p;
257 	char *buf = NULL, *cmd = NULL, *cur = NULL;
258 	bool on_cmd = true;
259 
260 	if (arg) {
261 		buf = strdup(arg);
262 		if (buf == NULL)
263 			die("Failed allocating new net buffer");
264 		cur = strtok(buf, ",=");
265 	}
266 
267 	p = (struct virtio_net_params) {
268 		.guest_ip	= DEFAULT_GUEST_ADDR,
269 		.host_ip	= DEFAULT_HOST_ADDR,
270 		.script		= DEFAULT_SCRIPT,
271 		.mode		= NET_MODE_TAP,
272 	};
273 
274 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
275 	p.guest_mac[5] += num_net_devices;
276 
277 	while (cur) {
278 		if (on_cmd) {
279 			cmd = cur;
280 		} else {
281 			if (set_net_param(&p, cmd, cur) < 0)
282 				goto done;
283 		}
284 		on_cmd = !on_cmd;
285 
286 		cur = strtok(NULL, ",=");
287 	};
288 
289 	num_net_devices++;
290 
291 	net_params = realloc(net_params, num_net_devices * sizeof(*net_params));
292 	if (net_params == NULL)
293 		die("Failed adding new network device");
294 
295 	net_params[num_net_devices - 1] = p;
296 
297 done:
298 	free(buf);
299 	return 0;
300 }
301 
302 static int shmem_parser(const struct option *opt, const char *arg, int unset)
303 {
304 	const u64 default_size = SHMEM_DEFAULT_SIZE;
305 	const u64 default_phys_addr = SHMEM_DEFAULT_ADDR;
306 	const char *default_handle = SHMEM_DEFAULT_HANDLE;
307 	struct shmem_info *si = malloc(sizeof(struct shmem_info));
308 	u64 phys_addr;
309 	u64 size;
310 	char *handle = NULL;
311 	int create = 0;
312 	const char *p = arg;
313 	char *next;
314 	int base = 10;
315 	int verbose = 0;
316 
317 	const int skip_pci = strlen("pci:");
318 	if (verbose)
319 		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
320 	/* parse out optional addr family */
321 	if (strcasestr(p, "pci:")) {
322 		p += skip_pci;
323 	} else if (strcasestr(p, "mem:")) {
324 		die("I can't add to E820 map yet.\n");
325 	}
326 	/* parse out physical addr */
327 	base = 10;
328 	if (strcasestr(p, "0x"))
329 		base = 16;
330 	phys_addr = strtoll(p, &next, base);
331 	if (next == p && phys_addr == 0) {
332 		pr_info("shmem: no physical addr specified, using default.");
333 		phys_addr = default_phys_addr;
334 	}
335 	if (*next != ':' && *next != '\0')
336 		die("shmem: unexpected chars after phys addr.\n");
337 	if (*next == '\0')
338 		p = next;
339 	else
340 		p = next + 1;
341 	/* parse out size */
342 	base = 10;
343 	if (strcasestr(p, "0x"))
344 		base = 16;
345 	size = strtoll(p, &next, base);
346 	if (next == p && size == 0) {
347 		pr_info("shmem: no size specified, using default.");
348 		size = default_size;
349 	}
350 	/* look for [KMGkmg][Bb]*  uses base 2. */
351 	int skip_B = 0;
352 	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
353 		if (*(next + 1) == 'B' || *(next + 1) == 'b')
354 			skip_B = 1;
355 		switch (*next) {
356 		case 'K':
357 		case 'k':
358 			size = size << KB_SHIFT;
359 			break;
360 		case 'M':
361 		case 'm':
362 			size = size << MB_SHIFT;
363 			break;
364 		case 'G':
365 		case 'g':
366 			size = size << GB_SHIFT;
367 			break;
368 		default:
369 			die("shmem: bug in detecting size prefix.");
370 			break;
371 		}
372 		next += 1 + skip_B;
373 	}
374 	if (*next != ':' && *next != '\0') {
375 		die("shmem: unexpected chars after phys size. <%c><%c>\n",
376 		    *next, *p);
377 	}
378 	if (*next == '\0')
379 		p = next;
380 	else
381 		p = next + 1;
382 	/* parse out optional shmem handle */
383 	const int skip_handle = strlen("handle=");
384 	next = strcasestr(p, "handle=");
385 	if (*p && next) {
386 		if (p != next)
387 			die("unexpected chars before handle\n");
388 		p += skip_handle;
389 		next = strchrnul(p, ':');
390 		if (next - p) {
391 			handle = malloc(next - p + 1);
392 			strncpy(handle, p, next - p);
393 			handle[next - p] = '\0';	/* just in case. */
394 		}
395 		if (*next == '\0')
396 			p = next;
397 		else
398 			p = next + 1;
399 	}
400 	/* parse optional create flag to see if we should create shm seg. */
401 	if (*p && strcasestr(p, "create")) {
402 		create = 1;
403 		p += strlen("create");
404 	}
405 	if (*p != '\0')
406 		die("shmem: unexpected trailing chars\n");
407 	if (handle == NULL) {
408 		handle = malloc(strlen(default_handle) + 1);
409 		strcpy(handle, default_handle);
410 	}
411 	if (verbose) {
412 		pr_info("shmem: phys_addr = %llx", phys_addr);
413 		pr_info("shmem: size      = %llx", size);
414 		pr_info("shmem: handle    = %s", handle);
415 		pr_info("shmem: create    = %d", create);
416 	}
417 
418 	si->phys_addr = phys_addr;
419 	si->size = size;
420 	si->handle = handle;
421 	si->create = create;
422 	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
423 	return 0;
424 }
425 
426 static const struct option options[] = {
427 	OPT_GROUP("Basic options:"),
428 	OPT_STRING('\0', "name", &guest_name, "guest name",
429 			"A name for the guest"),
430 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),
431 	OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."),
432 	OPT_CALLBACK('\0', "shmem", NULL,
433 		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",
434 		     "Share host shmem with guest via pci device",
435 		     shmem_parser),
436 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser),
437 	OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"),
438 	OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"),
439 	OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"),
440 	OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"),
441 	OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",
442 		     "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser),
443 	OPT_STRING('\0', "console", &console, "serial, virtio or hv",
444 			"Console to use"),
445 	OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"),
446 	OPT_CALLBACK('\0', "tty", NULL, "tty id",
447 		     "Remap guest TTY into a pty on the host",
448 		     tty_parser),
449 	OPT_STRING('\0', "sandbox", &sandbox, "script",
450 			"Run this script when booting into custom rootfs"),
451 	OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"),
452 
453 	OPT_GROUP("Kernel options:"),
454 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
455 			"Kernel to boot in virtual machine"),
456 	OPT_STRING('i', "initrd", &initrd_filename, "initrd",
457 			"Initial RAM disk image"),
458 	OPT_STRING('p', "params", &kernel_cmdline, "params",
459 			"Kernel command line arguments"),
460 
461 	OPT_GROUP("Networking options:"),
462 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",
463 		     "Create a new guest NIC",
464 		     netdev_parser, NULL),
465 	OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"),
466 
467 	OPT_GROUP("BIOS options:"),
468 	OPT_INTEGER('\0', "vidmode", &vidmode,
469 		    "Video mode"),
470 
471 	OPT_GROUP("Debug options:"),
472 	OPT_BOOLEAN('\0', "debug", &do_debug_print,
473 			"Enable debug messages"),
474 	OPT_BOOLEAN('\0', "debug-single-step", &single_step,
475 			"Enable single stepping"),
476 	OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug,
477 			"Enable ioport debugging"),
478 	OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay,
479 			"Delay IO by millisecond"),
480 	OPT_END()
481 };
482 
483 /*
484  * Serialize debug printout so that the output of multiple vcpus does not
485  * get mixed up:
486  */
487 static int printout_done;
488 
489 static void handle_sigusr1(int sig)
490 {
491 	struct kvm_cpu *cpu = current_kvm_cpu;
492 	int fd = kvm_cpu__get_debug_fd();
493 
494 	if (!cpu || cpu->needs_nmi)
495 		return;
496 
497 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
498 	kvm_cpu__show_registers(cpu);
499 	kvm_cpu__show_code(cpu);
500 	kvm_cpu__show_page_tables(cpu);
501 	fflush(stdout);
502 	printout_done = 1;
503 	mb();
504 }
505 
506 /* Pause/resume the guest using SIGUSR2 */
507 static int is_paused;
508 
509 static void handle_pause(int fd, u32 type, u32 len, u8 *msg)
510 {
511 	if (WARN_ON(len))
512 		return;
513 
514 	if (type == KVM_IPC_RESUME && is_paused) {
515 		kvm->vm_state = KVM_VMSTATE_RUNNING;
516 		kvm__continue();
517 	} else if (type == KVM_IPC_PAUSE && !is_paused) {
518 		kvm->vm_state = KVM_VMSTATE_PAUSED;
519 		kvm__pause();
520 	} else {
521 		return;
522 	}
523 
524 	is_paused = !is_paused;
525 }
526 
527 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg)
528 {
529 	int r = 0;
530 
531 	if (type == KVM_IPC_VMSTATE)
532 		r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state));
533 
534 	if (r < 0)
535 		pr_warning("Failed sending VMSTATE");
536 }
537 
538 static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
539 {
540 	int i;
541 	struct debug_cmd_params *params;
542 	u32 dbg_type;
543 	u32 vcpu;
544 
545 	if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params)))
546 		return;
547 
548 	params = (void *)msg;
549 	dbg_type = params->dbg_type;
550 	vcpu = params->cpu;
551 
552 	if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) {
553 		if ((int)vcpu >= kvm->nrcpus)
554 			return;
555 
556 		kvm_cpus[vcpu]->needs_nmi = 1;
557 		pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1);
558 	}
559 
560 	if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP))
561 		return;
562 
563 	for (i = 0; i < nrcpus; i++) {
564 		struct kvm_cpu *cpu = kvm_cpus[i];
565 
566 		if (!cpu)
567 			continue;
568 
569 		printout_done = 0;
570 
571 		kvm_cpu__set_debug_fd(fd);
572 		pthread_kill(cpu->thread, SIGUSR1);
573 		/*
574 		 * Wait for the vCPU to dump state before signalling
575 		 * the next thread. Since this is debug code it does
576 		 * not matter that we are burning CPU time a bit:
577 		 */
578 		while (!printout_done)
579 			mb();
580 	}
581 
582 	close(fd);
583 
584 	serial8250__inject_sysrq(kvm);
585 }
586 
587 static void handle_sigalrm(int sig)
588 {
589 	kvm__arch_periodic_poll(kvm);
590 }
591 
592 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
593 {
594 	if (WARN_ON(type != KVM_IPC_STOP || len))
595 		return;
596 
597 	kvm_cpu__reboot();
598 }
599 
600 static void *kvm_cpu_thread(void *arg)
601 {
602 	current_kvm_cpu		= arg;
603 
604 	if (kvm_cpu__start(current_kvm_cpu))
605 		goto panic_kvm;
606 
607 	return (void *) (intptr_t) 0;
608 
609 panic_kvm:
610 	fprintf(stderr, "KVM exit reason: %u (\"%s\")\n",
611 		current_kvm_cpu->kvm_run->exit_reason,
612 		kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]);
613 	if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
614 		fprintf(stderr, "KVM exit code: 0x%Lu\n",
615 			current_kvm_cpu->kvm_run->hw.hardware_exit_reason);
616 
617 	kvm_cpu__set_debug_fd(STDOUT_FILENO);
618 	kvm_cpu__show_registers(current_kvm_cpu);
619 	kvm_cpu__show_code(current_kvm_cpu);
620 	kvm_cpu__show_page_tables(current_kvm_cpu);
621 
622 	return (void *) (intptr_t) 1;
623 }
624 
625 static char kernel[PATH_MAX];
626 
627 static const char *host_kernels[] = {
628 	"/boot/vmlinuz",
629 	"/boot/bzImage",
630 	NULL
631 };
632 
633 static const char *default_kernels[] = {
634 	"./bzImage",
635 	"arch/" BUILD_ARCH "/boot/bzImage",
636 	"../../arch/" BUILD_ARCH "/boot/bzImage",
637 	NULL
638 };
639 
640 static const char *default_vmlinux[] = {
641 	"vmlinux",
642 	"../../../vmlinux",
643 	"../../vmlinux",
644 	NULL
645 };
646 
647 static void kernel_usage_with_options(void)
648 {
649 	const char **k;
650 	struct utsname uts;
651 
652 	fprintf(stderr, "Fatal: could not find default kernel image in:\n");
653 	k = &default_kernels[0];
654 	while (*k) {
655 		fprintf(stderr, "\t%s\n", *k);
656 		k++;
657 	}
658 
659 	if (uname(&uts) < 0)
660 		return;
661 
662 	k = &host_kernels[0];
663 	while (*k) {
664 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
665 			return;
666 		fprintf(stderr, "\t%s\n", kernel);
667 		k++;
668 	}
669 	fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n",
670 		KVM_BINARY_NAME);
671 }
672 
673 static u64 host_ram_size(void)
674 {
675 	long page_size;
676 	long nr_pages;
677 
678 	nr_pages	= sysconf(_SC_PHYS_PAGES);
679 	if (nr_pages < 0) {
680 		pr_warning("sysconf(_SC_PHYS_PAGES) failed");
681 		return 0;
682 	}
683 
684 	page_size	= sysconf(_SC_PAGE_SIZE);
685 	if (page_size < 0) {
686 		pr_warning("sysconf(_SC_PAGE_SIZE) failed");
687 		return 0;
688 	}
689 
690 	return (nr_pages * page_size) >> MB_SHIFT;
691 }
692 
693 /*
694  * If user didn't specify how much memory it wants to allocate for the guest,
695  * avoid filling the whole host RAM.
696  */
697 #define RAM_SIZE_RATIO		0.8
698 
699 static u64 get_ram_size(int nr_cpus)
700 {
701 	u64 available;
702 	u64 ram_size;
703 
704 	ram_size	= 64 * (nr_cpus + 3);
705 
706 	available	= host_ram_size() * RAM_SIZE_RATIO;
707 	if (!available)
708 		available = MIN_RAM_SIZE_MB;
709 
710 	if (ram_size > available)
711 		ram_size	= available;
712 
713 	return ram_size;
714 }
715 
716 static const char *find_kernel(void)
717 {
718 	const char **k;
719 	struct stat st;
720 	struct utsname uts;
721 
722 	k = &default_kernels[0];
723 	while (*k) {
724 		if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) {
725 			k++;
726 			continue;
727 		}
728 		strncpy(kernel, *k, PATH_MAX);
729 		return kernel;
730 	}
731 
732 	if (uname(&uts) < 0)
733 		return NULL;
734 
735 	k = &host_kernels[0];
736 	while (*k) {
737 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
738 			return NULL;
739 
740 		if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) {
741 			k++;
742 			continue;
743 		}
744 		return kernel;
745 
746 	}
747 	return NULL;
748 }
749 
750 static const char *find_vmlinux(void)
751 {
752 	const char **vmlinux;
753 
754 	vmlinux = &default_vmlinux[0];
755 	while (*vmlinux) {
756 		struct stat st;
757 
758 		if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) {
759 			vmlinux++;
760 			continue;
761 		}
762 		return *vmlinux;
763 	}
764 	return NULL;
765 }
766 
767 void kvm_run_help(void)
768 {
769 	usage_with_options(run_usage, options);
770 }
771 
772 static int kvm_custom_stage2(void)
773 {
774 	char tmp[PATH_MAX], dst[PATH_MAX], *src;
775 	const char *rootfs = custom_rootfs_name;
776 	int r;
777 
778 	src = realpath("guest/init_stage2", NULL);
779 	if (src == NULL)
780 		return -ENOMEM;
781 
782 	snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs);
783 	remove(tmp);
784 
785 	snprintf(dst, PATH_MAX, "/host/%s", src);
786 	r = symlink(dst, tmp);
787 	free(src);
788 
789 	return r;
790 }
791 
792 static int kvm_run_set_sandbox(void)
793 {
794 	const char *guestfs_name = custom_rootfs_name;
795 	char path[PATH_MAX], script[PATH_MAX], *tmp;
796 
797 	snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name);
798 
799 	remove(path);
800 
801 	if (sandbox == NULL)
802 		return 0;
803 
804 	tmp = realpath(sandbox, NULL);
805 	if (tmp == NULL)
806 		return -ENOMEM;
807 
808 	snprintf(script, PATH_MAX, "/host/%s", tmp);
809 	free(tmp);
810 
811 	return symlink(script, path);
812 }
813 
814 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg)
815 {
816 	const char *single_quote;
817 
818 	if (!*arg) { /* zero length string */
819 		if (write(fd, "''", 2) <= 0)
820 			die("Failed writing sandbox script");
821 		return;
822 	}
823 
824 	while (*arg) {
825 		single_quote = strchrnul(arg, '\'');
826 
827 		/* write non-single-quote string as #('string') */
828 		if (arg != single_quote) {
829 			if (write(fd, "'", 1) <= 0 ||
830 			    write(fd, arg, single_quote - arg) <= 0 ||
831 			    write(fd, "'", 1) <= 0)
832 				die("Failed writing sandbox script");
833 		}
834 
835 		/* write single quote as #("'") */
836 		if (*single_quote) {
837 			if (write(fd, "\"'\"", 3) <= 0)
838 				die("Failed writing sandbox script");
839 		} else
840 			break;
841 
842 		arg = single_quote + 1;
843 	}
844 }
845 
846 static void kvm_run_write_sandbox_cmd(const char **argv, int argc)
847 {
848 	const char script_hdr[] = "#! /bin/bash\n\n";
849 	int fd;
850 
851 	remove(sandbox);
852 
853 	fd = open(sandbox, O_RDWR | O_CREAT, 0777);
854 	if (fd < 0)
855 		die("Failed creating sandbox script");
856 
857 	if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0)
858 		die("Failed writing sandbox script");
859 
860 	while (argc) {
861 		kvm_write_sandbox_cmd_exactly(fd, argv[0]);
862 		if (argc - 1)
863 			if (write(fd, " ", 1) <= 0)
864 				die("Failed writing sandbox script");
865 		argv++;
866 		argc--;
867 	}
868 	if (write(fd, "\n", 1) <= 0)
869 		die("Failed writing sandbox script");
870 
871 	close(fd);
872 }
873 
874 static int kvm_cmd_run_init(int argc, const char **argv)
875 {
876 	static char real_cmdline[2048], default_name[20];
877 	struct framebuffer *fb = NULL;
878 	unsigned int nr_online_cpus;
879 	int max_cpus, recommended_cpus;
880 	int i, r;
881 
882 	signal(SIGALRM, handle_sigalrm);
883 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
884 	signal(SIGUSR1, handle_sigusr1);
885 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
886 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
887 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
888 	kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate);
889 
890 	nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
891 
892 	while (argc != 0) {
893 		argc = parse_options(argc, argv, options, run_usage,
894 				PARSE_OPT_STOP_AT_NON_OPTION |
895 				PARSE_OPT_KEEP_DASHDASH);
896 		if (argc != 0) {
897 			/* Cusrom options, should have been handled elsewhere */
898 			if (strcmp(argv[0], "--") == 0) {
899 				if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
900 					sandbox = DEFAULT_SANDBOX_FILENAME;
901 					kvm_run_write_sandbox_cmd(argv+1, argc-1);
902 					break;
903 				}
904 			}
905 
906 			if ((kvm_run_wrapper == KVM_RUN_DEFAULT && kernel_filename) ||
907 				(kvm_run_wrapper == KVM_RUN_SANDBOX && sandbox)) {
908 				fprintf(stderr, "Cannot handle parameter: "
909 						"%s\n", argv[0]);
910 				usage_with_options(run_usage, options);
911 				return EINVAL;
912 			}
913 			if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
914 				/*
915 				 * first unhandled parameter is treated as
916 				 * sandbox command
917 				 */
918 				sandbox = DEFAULT_SANDBOX_FILENAME;
919 				kvm_run_write_sandbox_cmd(argv, argc);
920 			} else {
921 				/*
922 				 * first unhandled parameter is treated as a kernel
923 				 * image
924 				 */
925 				kernel_filename = argv[0];
926 			}
927 			argv++;
928 			argc--;
929 		}
930 
931 	}
932 
933 	if (!kernel_filename)
934 		kernel_filename = find_kernel();
935 
936 	if (!kernel_filename) {
937 		kernel_usage_with_options();
938 		return EINVAL;
939 	}
940 
941 	vmlinux_filename = find_vmlinux();
942 
943 	if (nrcpus == 0)
944 		nrcpus = nr_online_cpus;
945 
946 	if (!ram_size)
947 		ram_size	= get_ram_size(nrcpus);
948 
949 	if (ram_size < MIN_RAM_SIZE_MB)
950 		die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB);
951 
952 	if (ram_size > host_ram_size())
953 		pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size());
954 
955 	ram_size <<= MB_SHIFT;
956 
957 	if (!dev)
958 		dev = DEFAULT_KVM_DEV;
959 
960 	if (!console)
961 		console = DEFAULT_CONSOLE;
962 
963 	if (!strncmp(console, "virtio", 6))
964 		active_console  = CONSOLE_VIRTIO;
965 	else if (!strncmp(console, "serial", 6))
966 		active_console  = CONSOLE_8250;
967 	else if (!strncmp(console, "hv", 2))
968 		active_console = CONSOLE_HV;
969 	else
970 		pr_warning("No console!");
971 
972 	if (!host_ip)
973 		host_ip = DEFAULT_HOST_ADDR;
974 
975 	if (!guest_ip)
976 		guest_ip = DEFAULT_GUEST_ADDR;
977 
978 	if (!guest_mac)
979 		guest_mac = DEFAULT_GUEST_MAC;
980 
981 	if (!host_mac)
982 		host_mac = DEFAULT_HOST_MAC;
983 
984 	if (!script)
985 		script = DEFAULT_SCRIPT;
986 
987 	term_init();
988 
989 	if (!guest_name) {
990 		if (custom_rootfs) {
991 			guest_name = custom_rootfs_name;
992 		} else {
993 			sprintf(default_name, "guest-%u", getpid());
994 			guest_name = default_name;
995 		}
996 	}
997 
998 	kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name);
999 
1000 	kvm->single_step = single_step;
1001 
1002 	ioeventfd__init(kvm);
1003 
1004 	max_cpus = kvm__max_cpus(kvm);
1005 	recommended_cpus = kvm__recommended_cpus(kvm);
1006 
1007 	if (nrcpus > max_cpus) {
1008 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
1009 		nrcpus = max_cpus;
1010 	} else if (nrcpus > recommended_cpus) {
1011 		printf("  # Warning: The maximum recommended amount of VCPUs"
1012 			" is %d\n", recommended_cpus);
1013 	}
1014 
1015 	kvm->nrcpus = nrcpus;
1016 
1017 	/* Alloc one pointer too many, so array ends up 0-terminated */
1018 	kvm_cpus = calloc(nrcpus + 1, sizeof(void *));
1019 	if (!kvm_cpus)
1020 		die("Couldn't allocate array for %d CPUs", nrcpus);
1021 
1022 	r = irq__init(kvm);
1023 	if (r < 0) {
1024 		pr_err("irq__init() failed with error %d\n", r);
1025 		goto fail;
1026 	}
1027 
1028 	pci__init();
1029 
1030 	/*
1031 	 * vidmode should be either specified
1032 	 * either set by default
1033 	 */
1034 	if (vnc || sdl) {
1035 		if (vidmode == -1)
1036 			vidmode = 0x312;
1037 	} else
1038 		vidmode = 0;
1039 
1040 	memset(real_cmdline, 0, sizeof(real_cmdline));
1041 	kvm__arch_set_cmdline(real_cmdline, vnc || sdl);
1042 
1043 	if (strlen(real_cmdline) > 0)
1044 		strcat(real_cmdline, " ");
1045 
1046 	if (kernel_cmdline)
1047 		strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline));
1048 
1049 	if (!using_rootfs && !image_filename[0] && !initrd_filename) {
1050 		char tmp[PATH_MAX];
1051 
1052 		kvm_setup_create_new(custom_rootfs_name);
1053 		kvm_setup_resolv(custom_rootfs_name);
1054 
1055 		snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default");
1056 		if (virtio_9p__register(kvm, tmp, "/dev/root") < 0)
1057 			die("Unable to initialize virtio 9p");
1058 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
1059 			die("Unable to initialize virtio 9p");
1060 		using_rootfs = custom_rootfs = 1;
1061 	}
1062 
1063 	if (using_rootfs) {
1064 		strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p");
1065 		if (custom_rootfs) {
1066 			kvm_run_set_sandbox();
1067 
1068 			strcat(real_cmdline, " init=/virt/init");
1069 
1070 			if (!no_dhcp)
1071 				strcat(real_cmdline, "  ip=dhcp");
1072 			if (kvm_custom_stage2())
1073 				die("Failed linking stage 2 of init.");
1074 		}
1075 	} else if (!strstr(real_cmdline, "root=")) {
1076 		strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline));
1077 	}
1078 
1079 	if (image_count) {
1080 		kvm->nr_disks = image_count;
1081 		kvm->disks    = disk_image__open_all(image_filename, readonly_image, image_count);
1082 		if (!kvm->disks)
1083 			die("Unable to load all disk images.");
1084 
1085 		virtio_blk__init_all(kvm);
1086 	}
1087 
1088 	printf("  # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME,
1089 		kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name);
1090 
1091 	if (!kvm__load_kernel(kvm, kernel_filename, initrd_filename,
1092 				real_cmdline, vidmode))
1093 		die("unable to load kernel %s", kernel_filename);
1094 
1095 	kvm->vmlinux = vmlinux_filename;
1096 	r = symbol__init(kvm);
1097 	if (r < 0) {
1098 		pr_err("symbol__init() failed with error %d\n", r);
1099 		goto fail;
1100 	}
1101 
1102 	ioport__setup_arch();
1103 
1104 	rtc__init();
1105 
1106 	serial8250__init(kvm);
1107 
1108 	if (active_console == CONSOLE_VIRTIO)
1109 		virtio_console__init(kvm);
1110 
1111 	if (virtio_rng)
1112 		virtio_rng__init(kvm);
1113 
1114 	if (balloon)
1115 		virtio_bln__init(kvm);
1116 
1117 	if (!network)
1118 		network = DEFAULT_NETWORK;
1119 
1120 	virtio_9p__init(kvm);
1121 
1122 	for (i = 0; i < num_net_devices; i++) {
1123 		net_params[i].kvm = kvm;
1124 		virtio_net__init(&net_params[i]);
1125 	}
1126 
1127 	if (num_net_devices == 0 && no_net == 0) {
1128 		struct virtio_net_params net_params;
1129 
1130 		net_params = (struct virtio_net_params) {
1131 			.guest_ip	= guest_ip,
1132 			.host_ip	= host_ip,
1133 			.kvm		= kvm,
1134 			.script		= script,
1135 			.mode		= NET_MODE_USER,
1136 		};
1137 		str_to_mac(guest_mac, net_params.guest_mac);
1138 		str_to_mac(host_mac, net_params.host_mac);
1139 
1140 		virtio_net__init(&net_params);
1141 	}
1142 
1143 	kvm__init_ram(kvm);
1144 
1145 #ifdef CONFIG_X86
1146 	kbd__init(kvm);
1147 #endif
1148 
1149 	pci_shmem__init(kvm);
1150 
1151 	if (vnc || sdl)
1152 		fb = vesa__init(kvm);
1153 
1154 	if (vnc) {
1155 		if (fb)
1156 			vnc__init(fb);
1157 	}
1158 
1159 	if (sdl) {
1160 		if (fb)
1161 			sdl__init(fb);
1162 	}
1163 
1164 	fb__start();
1165 
1166 	/* Device init all done; firmware init must
1167 	 * come after this (it may set up device trees etc.)
1168 	 */
1169 
1170 	kvm__start_timer(kvm);
1171 
1172 	kvm__arch_setup_firmware(kvm);
1173 
1174 	for (i = 0; i < nrcpus; i++) {
1175 		kvm_cpus[i] = kvm_cpu__init(kvm, i);
1176 		if (!kvm_cpus[i])
1177 			die("unable to initialize KVM VCPU");
1178 	}
1179 
1180 	thread_pool__init(nr_online_cpus);
1181 	ioeventfd__start();
1182 
1183 fail:
1184 	return r;
1185 }
1186 
1187 static int kvm_cmd_run_work(void)
1188 {
1189 	int i, r = -1;
1190 	void *ret = NULL;
1191 
1192 	for (i = 0; i < nrcpus; i++) {
1193 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)
1194 			die("unable to create KVM VCPU thread");
1195 	}
1196 
1197 	/* Only VCPU #0 is going to exit by itself when shutting down */
1198 	if (pthread_join(kvm_cpus[0]->thread, &ret) != 0)
1199 		r = 0;
1200 
1201 	kvm_cpu__delete(kvm_cpus[0]);
1202 
1203 	for (i = 1; i < nrcpus; i++) {
1204 		if (kvm_cpus[i]->is_running) {
1205 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
1206 			if (pthread_join(kvm_cpus[i]->thread, &ret) != 0)
1207 				die("pthread_join");
1208 			kvm_cpu__delete(kvm_cpus[i]);
1209 		}
1210 		if (ret == NULL)
1211 			r = 0;
1212 	}
1213 
1214 	return r;
1215 }
1216 
1217 static void kvm_cmd_run_exit(int guest_ret)
1218 {
1219 	int r = 0;
1220 
1221 	compat__print_all_messages();
1222 
1223 	r = symbol__exit(kvm);
1224 	if (r < 0)
1225 		pr_warning("symbol__exit() failed with error %d\n", r);
1226 
1227 	r = irq__exit(kvm);
1228 	if (r < 0)
1229 		pr_warning("irq__exit() failed with error %d\n", r);
1230 
1231 	fb__stop();
1232 
1233 	virtio_blk__delete_all(kvm);
1234 	virtio_rng__delete_all(kvm);
1235 
1236 	disk_image__close_all(kvm->disks, image_count);
1237 	free(kvm_cpus);
1238 	kvm__delete(kvm);
1239 
1240 	if (guest_ret == 0)
1241 		printf("\n  # KVM session ended normally.\n");
1242 }
1243 
1244 int kvm_cmd_run(int argc, const char **argv, const char *prefix)
1245 {
1246 	int r, ret = -EFAULT;
1247 
1248 	r = kvm_cmd_run_init(argc, argv);
1249 	if (r < 0)
1250 		return r;
1251 
1252 	ret = kvm_cmd_run_work();
1253 	kvm_cmd_run_exit(ret);
1254 
1255 	return ret;
1256 }
1257