xref: /kvmtool/builtin-run.c (revision 89e0575a5e208762a7d7cc4a0a08261019593cdd)
1 #include "kvm/builtin-run.h"
2 
3 #include "kvm/builtin-setup.h"
4 #include "kvm/virtio-balloon.h"
5 #include "kvm/virtio-console.h"
6 #include "kvm/parse-options.h"
7 #include "kvm/8250-serial.h"
8 #include "kvm/framebuffer.h"
9 #include "kvm/disk-image.h"
10 #include "kvm/threadpool.h"
11 #include "kvm/virtio-blk.h"
12 #include "kvm/virtio-net.h"
13 #include "kvm/virtio-rng.h"
14 #include "kvm/ioeventfd.h"
15 #include "kvm/virtio-9p.h"
16 #include "kvm/barrier.h"
17 #include "kvm/kvm-cpu.h"
18 #include "kvm/ioport.h"
19 #include "kvm/symbol.h"
20 #include "kvm/i8042.h"
21 #include "kvm/mutex.h"
22 #include "kvm/term.h"
23 #include "kvm/util.h"
24 #include "kvm/strbuf.h"
25 #include "kvm/vesa.h"
26 #include "kvm/irq.h"
27 #include "kvm/kvm.h"
28 #include "kvm/pci.h"
29 #include "kvm/rtc.h"
30 #include "kvm/sdl.h"
31 #include "kvm/vnc.h"
32 #include "kvm/guest_compat.h"
33 #include "kvm/pci-shmem.h"
34 #include "kvm/kvm-ipc.h"
35 #include "kvm/builtin-debug.h"
36 
37 #include <linux/types.h>
38 
39 #include <sys/utsname.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <termios.h>
43 #include <signal.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <ctype.h>
48 #include <stdio.h>
49 
50 #define DEFAULT_KVM_DEV		"/dev/kvm"
51 #define DEFAULT_CONSOLE		"serial"
52 #define DEFAULT_NETWORK		"user"
53 #define DEFAULT_HOST_ADDR	"192.168.33.1"
54 #define DEFAULT_GUEST_ADDR	"192.168.33.15"
55 #define DEFAULT_GUEST_MAC	"02:15:15:15:15:15"
56 #define DEFAULT_HOST_MAC	"02:01:01:01:01:01"
57 #define DEFAULT_SCRIPT		"none"
58 const char *DEFAULT_SANDBOX_FILENAME = "guest/sandbox.sh";
59 
60 #define MB_SHIFT		(20)
61 #define KB_SHIFT		(10)
62 #define GB_SHIFT		(30)
63 #define MIN_RAM_SIZE_MB		(64ULL)
64 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
65 
66 struct kvm *kvm;
67 struct kvm_cpu **kvm_cpus;
68 __thread struct kvm_cpu *current_kvm_cpu;
69 
70 static u64 ram_size;
71 static u8  image_count;
72 static u8 num_net_devices;
73 static bool virtio_rng;
74 static const char *kernel_cmdline;
75 static const char *kernel_filename;
76 static const char *vmlinux_filename;
77 static const char *initrd_filename;
78 static const char *image_filename[MAX_DISK_IMAGES];
79 static const char *console;
80 static const char *dev;
81 static const char *network;
82 static const char *host_ip;
83 static const char *guest_ip;
84 static const char *guest_mac;
85 static const char *host_mac;
86 static const char *script;
87 static const char *guest_name;
88 static const char *sandbox;
89 static const char *hugetlbfs_path;
90 static const char *custom_rootfs_name = "default";
91 static struct virtio_net_params *net_params;
92 static bool single_step;
93 static bool readonly_image[MAX_DISK_IMAGES];
94 static bool vnc;
95 static bool sdl;
96 static bool balloon;
97 static bool using_rootfs;
98 static bool custom_rootfs;
99 static bool no_net;
100 static bool no_dhcp;
101 extern bool ioport_debug;
102 static int  kvm_run_wrapper;
103 extern int  active_console;
104 extern int  debug_iodelay;
105 
106 bool do_debug_print = false;
107 
108 static int nrcpus;
109 static int vidmode = -1;
110 
111 static const char * const run_usage[] = {
112 	"lkvm run [<options>] [<kernel image>]",
113 	NULL
114 };
115 
116 enum {
117 	KVM_RUN_SANDBOX,
118 };
119 
120 void kvm_run_set_wrapper_sandbox(void)
121 {
122 	kvm_run_wrapper = KVM_RUN_SANDBOX;
123 }
124 
125 static int img_name_parser(const struct option *opt, const char *arg, int unset)
126 {
127 	char *sep;
128 	struct stat st;
129 	char path[PATH_MAX];
130 
131 	if (stat(arg, &st) == 0 &&
132 	    S_ISDIR(st.st_mode)) {
133 		char tmp[PATH_MAX];
134 
135 		if (using_rootfs)
136 			die("Please use only one rootfs directory atmost");
137 
138 		if (realpath(arg, tmp) == 0 ||
139 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
140 			die("Unable to initialize virtio 9p");
141 		using_rootfs = 1;
142 		return 0;
143 	}
144 
145 	snprintf(path, PATH_MAX, "%s%s", kvm__get_dir(), arg);
146 
147 	if (stat(path, &st) == 0 &&
148 	    S_ISDIR(st.st_mode)) {
149 		char tmp[PATH_MAX];
150 
151 		if (using_rootfs)
152 			die("Please use only one rootfs directory atmost");
153 
154 		if (realpath(path, tmp) == 0 ||
155 		    virtio_9p__register(kvm, tmp, "/dev/root") < 0)
156 			die("Unable to initialize virtio 9p");
157 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
158 			die("Unable to initialize virtio 9p");
159 		kvm_setup_resolv(arg);
160 		using_rootfs = custom_rootfs = 1;
161 		custom_rootfs_name = arg;
162 		return 0;
163 	}
164 
165 	if (image_count >= MAX_DISK_IMAGES)
166 		die("Currently only 4 images are supported");
167 
168 	image_filename[image_count] = arg;
169 	sep = strstr(arg, ",");
170 	if (sep) {
171 		if (strcmp(sep + 1, "ro") == 0)
172 			readonly_image[image_count] = 1;
173 		*sep = 0;
174 	}
175 
176 	image_count++;
177 
178 	return 0;
179 }
180 
181 static int virtio_9p_rootdir_parser(const struct option *opt, const char *arg, int unset)
182 {
183 	char *tag_name;
184 	char tmp[PATH_MAX];
185 
186 	/*
187 	 * 9p dir can be of the form dirname,tag_name or
188 	 * just dirname. In the later case we use the
189 	 * default tag name
190 	 */
191 	tag_name = strstr(arg, ",");
192 	if (tag_name) {
193 		*tag_name = '\0';
194 		tag_name++;
195 	}
196 	if (realpath(arg, tmp)) {
197 		if (virtio_9p__register(kvm, tmp, tag_name) < 0)
198 			die("Unable to initialize virtio 9p");
199 	} else
200 		die("Failed resolving 9p path");
201 	return 0;
202 }
203 
204 static int tty_parser(const struct option *opt, const char *arg, int unset)
205 {
206 	int tty = atoi(arg);
207 
208 	term_set_tty(tty);
209 
210 	return 0;
211 }
212 
213 static inline void str_to_mac(const char *str, char *mac)
214 {
215 	sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
216 		mac, mac+1, mac+2, mac+3, mac+4, mac+5);
217 }
218 static int set_net_param(struct virtio_net_params *p, const char *param,
219 				const char *val)
220 {
221 	if (strcmp(param, "guest_mac") == 0) {
222 		str_to_mac(val, p->guest_mac);
223 	} else if (strcmp(param, "mode") == 0) {
224 		if (!strncmp(val, "user", 4)) {
225 			int i;
226 
227 			for (i = 0; i < num_net_devices; i++)
228 				if (net_params[i].mode == NET_MODE_USER)
229 					die("Only one usermode network device allowed at a time");
230 			p->mode = NET_MODE_USER;
231 		} else if (!strncmp(val, "tap", 3)) {
232 			p->mode = NET_MODE_TAP;
233 		} else if (!strncmp(val, "none", 4)) {
234 			no_net = 1;
235 			return -1;
236 		} else
237 			die("Unkown network mode %s, please use user, tap or none", network);
238 	} else if (strcmp(param, "script") == 0) {
239 		p->script = strdup(val);
240 	} else if (strcmp(param, "guest_ip") == 0) {
241 		p->guest_ip = strdup(val);
242 	} else if (strcmp(param, "host_ip") == 0) {
243 		p->host_ip = strdup(val);
244 	} else if (strcmp(param, "vhost") == 0) {
245 		p->vhost = atoi(val);
246 	} else if (strcmp(param, "fd") == 0) {
247 		p->fd = atoi(val);
248 	}
249 
250 	return 0;
251 }
252 
253 static int netdev_parser(const struct option *opt, const char *arg, int unset)
254 {
255 	struct virtio_net_params p;
256 	char *buf = NULL, *cmd = NULL, *cur = NULL;
257 	bool on_cmd = true;
258 
259 	if (arg) {
260 		buf = strdup(arg);
261 		if (buf == NULL)
262 			die("Failed allocating new net buffer");
263 		cur = strtok(buf, ",=");
264 	}
265 
266 	p = (struct virtio_net_params) {
267 		.guest_ip	= DEFAULT_GUEST_ADDR,
268 		.host_ip	= DEFAULT_HOST_ADDR,
269 		.script		= DEFAULT_SCRIPT,
270 		.mode		= NET_MODE_TAP,
271 	};
272 
273 	str_to_mac(DEFAULT_GUEST_MAC, p.guest_mac);
274 	p.guest_mac[5] += num_net_devices;
275 
276 	while (cur) {
277 		if (on_cmd) {
278 			cmd = cur;
279 		} else {
280 			if (set_net_param(&p, cmd, cur) < 0)
281 				goto done;
282 		}
283 		on_cmd = !on_cmd;
284 
285 		cur = strtok(NULL, ",=");
286 	};
287 
288 	num_net_devices++;
289 
290 	net_params = realloc(net_params, num_net_devices * sizeof(*net_params));
291 	if (net_params == NULL)
292 		die("Failed adding new network device");
293 
294 	net_params[num_net_devices - 1] = p;
295 
296 done:
297 	free(buf);
298 	return 0;
299 }
300 
301 static int shmem_parser(const struct option *opt, const char *arg, int unset)
302 {
303 	const u64 default_size = SHMEM_DEFAULT_SIZE;
304 	const u64 default_phys_addr = SHMEM_DEFAULT_ADDR;
305 	const char *default_handle = SHMEM_DEFAULT_HANDLE;
306 	struct shmem_info *si = malloc(sizeof(struct shmem_info));
307 	u64 phys_addr;
308 	u64 size;
309 	char *handle = NULL;
310 	int create = 0;
311 	const char *p = arg;
312 	char *next;
313 	int base = 10;
314 	int verbose = 0;
315 
316 	const int skip_pci = strlen("pci:");
317 	if (verbose)
318 		pr_info("shmem_parser(%p,%s,%d)", opt, arg, unset);
319 	/* parse out optional addr family */
320 	if (strcasestr(p, "pci:")) {
321 		p += skip_pci;
322 	} else if (strcasestr(p, "mem:")) {
323 		die("I can't add to E820 map yet.\n");
324 	}
325 	/* parse out physical addr */
326 	base = 10;
327 	if (strcasestr(p, "0x"))
328 		base = 16;
329 	phys_addr = strtoll(p, &next, base);
330 	if (next == p && phys_addr == 0) {
331 		pr_info("shmem: no physical addr specified, using default.");
332 		phys_addr = default_phys_addr;
333 	}
334 	if (*next != ':' && *next != '\0')
335 		die("shmem: unexpected chars after phys addr.\n");
336 	if (*next == '\0')
337 		p = next;
338 	else
339 		p = next + 1;
340 	/* parse out size */
341 	base = 10;
342 	if (strcasestr(p, "0x"))
343 		base = 16;
344 	size = strtoll(p, &next, base);
345 	if (next == p && size == 0) {
346 		pr_info("shmem: no size specified, using default.");
347 		size = default_size;
348 	}
349 	/* look for [KMGkmg][Bb]*  uses base 2. */
350 	int skip_B = 0;
351 	if (strspn(next, "KMGkmg")) {	/* might have a prefix */
352 		if (*(next + 1) == 'B' || *(next + 1) == 'b')
353 			skip_B = 1;
354 		switch (*next) {
355 		case 'K':
356 		case 'k':
357 			size = size << KB_SHIFT;
358 			break;
359 		case 'M':
360 		case 'm':
361 			size = size << MB_SHIFT;
362 			break;
363 		case 'G':
364 		case 'g':
365 			size = size << GB_SHIFT;
366 			break;
367 		default:
368 			die("shmem: bug in detecting size prefix.");
369 			break;
370 		}
371 		next += 1 + skip_B;
372 	}
373 	if (*next != ':' && *next != '\0') {
374 		die("shmem: unexpected chars after phys size. <%c><%c>\n",
375 		    *next, *p);
376 	}
377 	if (*next == '\0')
378 		p = next;
379 	else
380 		p = next + 1;
381 	/* parse out optional shmem handle */
382 	const int skip_handle = strlen("handle=");
383 	next = strcasestr(p, "handle=");
384 	if (*p && next) {
385 		if (p != next)
386 			die("unexpected chars before handle\n");
387 		p += skip_handle;
388 		next = strchrnul(p, ':');
389 		if (next - p) {
390 			handle = malloc(next - p + 1);
391 			strncpy(handle, p, next - p);
392 			handle[next - p] = '\0';	/* just in case. */
393 		}
394 		if (*next == '\0')
395 			p = next;
396 		else
397 			p = next + 1;
398 	}
399 	/* parse optional create flag to see if we should create shm seg. */
400 	if (*p && strcasestr(p, "create")) {
401 		create = 1;
402 		p += strlen("create");
403 	}
404 	if (*p != '\0')
405 		die("shmem: unexpected trailing chars\n");
406 	if (handle == NULL) {
407 		handle = malloc(strlen(default_handle) + 1);
408 		strcpy(handle, default_handle);
409 	}
410 	if (verbose) {
411 		pr_info("shmem: phys_addr = %llx", phys_addr);
412 		pr_info("shmem: size      = %llx", size);
413 		pr_info("shmem: handle    = %s", handle);
414 		pr_info("shmem: create    = %d", create);
415 	}
416 
417 	si->phys_addr = phys_addr;
418 	si->size = size;
419 	si->handle = handle;
420 	si->create = create;
421 	pci_shmem__register_mem(si);	/* ownership of si, etc. passed on. */
422 	return 0;
423 }
424 
425 static const struct option options[] = {
426 	OPT_GROUP("Basic options:"),
427 	OPT_STRING('\0', "name", &guest_name, "guest name",
428 			"A name for the guest"),
429 	OPT_INTEGER('c', "cpus", &nrcpus, "Number of CPUs"),
430 	OPT_U64('m', "mem", &ram_size, "Virtual machine memory size in MiB."),
431 	OPT_CALLBACK('\0', "shmem", NULL,
432 		     "[pci:]<addr>:<size>[:handle=<handle>][:create]",
433 		     "Share host shmem with guest via pci device",
434 		     shmem_parser),
435 	OPT_CALLBACK('d', "disk", NULL, "image or rootfs_dir", "Disk image or rootfs directory", img_name_parser),
436 	OPT_BOOLEAN('\0', "balloon", &balloon, "Enable virtio balloon"),
437 	OPT_BOOLEAN('\0', "vnc", &vnc, "Enable VNC framebuffer"),
438 	OPT_BOOLEAN('\0', "sdl", &sdl, "Enable SDL framebuffer"),
439 	OPT_BOOLEAN('\0', "rng", &virtio_rng, "Enable virtio Random Number Generator"),
440 	OPT_CALLBACK('\0', "9p", NULL, "dir_to_share,tag_name",
441 		     "Enable virtio 9p to share files between host and guest", virtio_9p_rootdir_parser),
442 	OPT_STRING('\0', "console", &console, "serial, virtio or hv",
443 			"Console to use"),
444 	OPT_STRING('\0', "dev", &dev, "device_file", "KVM device file"),
445 	OPT_CALLBACK('\0', "tty", NULL, "tty id",
446 		     "Remap guest TTY into a pty on the host",
447 		     tty_parser),
448 	OPT_STRING('\0', "sandbox", &sandbox, "script",
449 			"Run this script when booting into custom rootfs"),
450 	OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"),
451 
452 	OPT_GROUP("Kernel options:"),
453 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
454 			"Kernel to boot in virtual machine"),
455 	OPT_STRING('i', "initrd", &initrd_filename, "initrd",
456 			"Initial RAM disk image"),
457 	OPT_STRING('p', "params", &kernel_cmdline, "params",
458 			"Kernel command line arguments"),
459 
460 	OPT_GROUP("Networking options:"),
461 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",
462 		     "Create a new guest NIC",
463 		     netdev_parser, NULL),
464 	OPT_BOOLEAN('\0', "no-dhcp", &no_dhcp, "Disable kernel DHCP in rootfs mode"),
465 
466 	OPT_GROUP("BIOS options:"),
467 	OPT_INTEGER('\0', "vidmode", &vidmode,
468 		    "Video mode"),
469 
470 	OPT_GROUP("Debug options:"),
471 	OPT_BOOLEAN('\0', "debug", &do_debug_print,
472 			"Enable debug messages"),
473 	OPT_BOOLEAN('\0', "debug-single-step", &single_step,
474 			"Enable single stepping"),
475 	OPT_BOOLEAN('\0', "debug-ioport", &ioport_debug,
476 			"Enable ioport debugging"),
477 	OPT_INTEGER('\0', "debug-iodelay", &debug_iodelay,
478 			"Delay IO by millisecond"),
479 	OPT_END()
480 };
481 
482 /*
483  * Serialize debug printout so that the output of multiple vcpus does not
484  * get mixed up:
485  */
486 static int printout_done;
487 
488 static void handle_sigusr1(int sig)
489 {
490 	struct kvm_cpu *cpu = current_kvm_cpu;
491 	int fd = kvm_cpu__get_debug_fd();
492 
493 	if (!cpu || cpu->needs_nmi)
494 		return;
495 
496 	dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id);
497 	kvm_cpu__show_registers(cpu);
498 	kvm_cpu__show_code(cpu);
499 	kvm_cpu__show_page_tables(cpu);
500 	fflush(stdout);
501 	printout_done = 1;
502 	mb();
503 }
504 
505 /* Pause/resume the guest using SIGUSR2 */
506 static int is_paused;
507 
508 static void handle_pause(int fd, u32 type, u32 len, u8 *msg)
509 {
510 	if (WARN_ON(len))
511 		return;
512 
513 	if (type == KVM_IPC_RESUME && is_paused) {
514 		kvm->vm_state = KVM_VMSTATE_RUNNING;
515 		kvm__continue();
516 	} else if (type == KVM_IPC_PAUSE && !is_paused) {
517 		kvm->vm_state = KVM_VMSTATE_PAUSED;
518 		kvm__pause();
519 	} else {
520 		return;
521 	}
522 
523 	is_paused = !is_paused;
524 }
525 
526 static void handle_vmstate(int fd, u32 type, u32 len, u8 *msg)
527 {
528 	int r = 0;
529 
530 	if (type == KVM_IPC_VMSTATE)
531 		r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state));
532 
533 	if (r < 0)
534 		pr_warning("Failed sending VMSTATE");
535 }
536 
537 static void handle_debug(int fd, u32 type, u32 len, u8 *msg)
538 {
539 	int i;
540 	struct debug_cmd_params *params;
541 	u32 dbg_type;
542 	u32 vcpu;
543 
544 	if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params)))
545 		return;
546 
547 	params = (void *)msg;
548 	dbg_type = params->dbg_type;
549 	vcpu = params->cpu;
550 
551 	if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) {
552 		if ((int)vcpu >= kvm->nrcpus)
553 			return;
554 
555 		kvm_cpus[vcpu]->needs_nmi = 1;
556 		pthread_kill(kvm_cpus[vcpu]->thread, SIGUSR1);
557 	}
558 
559 	if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP))
560 		return;
561 
562 	for (i = 0; i < nrcpus; i++) {
563 		struct kvm_cpu *cpu = kvm_cpus[i];
564 
565 		if (!cpu)
566 			continue;
567 
568 		printout_done = 0;
569 
570 		kvm_cpu__set_debug_fd(fd);
571 		pthread_kill(cpu->thread, SIGUSR1);
572 		/*
573 		 * Wait for the vCPU to dump state before signalling
574 		 * the next thread. Since this is debug code it does
575 		 * not matter that we are burning CPU time a bit:
576 		 */
577 		while (!printout_done)
578 			mb();
579 	}
580 
581 	close(fd);
582 
583 	serial8250__inject_sysrq(kvm);
584 }
585 
586 static void handle_sigalrm(int sig)
587 {
588 	kvm__arch_periodic_poll(kvm);
589 }
590 
591 static void handle_stop(int fd, u32 type, u32 len, u8 *msg)
592 {
593 	if (WARN_ON(type != KVM_IPC_STOP || len))
594 		return;
595 
596 	kvm_cpu__reboot();
597 }
598 
599 static void *kvm_cpu_thread(void *arg)
600 {
601 	current_kvm_cpu		= arg;
602 
603 	if (kvm_cpu__start(current_kvm_cpu))
604 		goto panic_kvm;
605 
606 	return (void *) (intptr_t) 0;
607 
608 panic_kvm:
609 	fprintf(stderr, "KVM exit reason: %u (\"%s\")\n",
610 		current_kvm_cpu->kvm_run->exit_reason,
611 		kvm_exit_reasons[current_kvm_cpu->kvm_run->exit_reason]);
612 	if (current_kvm_cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
613 		fprintf(stderr, "KVM exit code: 0x%Lu\n",
614 			current_kvm_cpu->kvm_run->hw.hardware_exit_reason);
615 
616 	kvm_cpu__set_debug_fd(STDOUT_FILENO);
617 	kvm_cpu__show_registers(current_kvm_cpu);
618 	kvm_cpu__show_code(current_kvm_cpu);
619 	kvm_cpu__show_page_tables(current_kvm_cpu);
620 
621 	return (void *) (intptr_t) 1;
622 }
623 
624 static char kernel[PATH_MAX];
625 
626 static const char *host_kernels[] = {
627 	"/boot/vmlinuz",
628 	"/boot/bzImage",
629 	NULL
630 };
631 
632 static const char *default_kernels[] = {
633 	"./bzImage",
634 	"arch/" BUILD_ARCH "/boot/bzImage",
635 	"../../arch/" BUILD_ARCH "/boot/bzImage",
636 	NULL
637 };
638 
639 static const char *default_vmlinux[] = {
640 	"vmlinux",
641 	"../../../vmlinux",
642 	"../../vmlinux",
643 	NULL
644 };
645 
646 static void kernel_usage_with_options(void)
647 {
648 	const char **k;
649 	struct utsname uts;
650 
651 	fprintf(stderr, "Fatal: could not find default kernel image in:\n");
652 	k = &default_kernels[0];
653 	while (*k) {
654 		fprintf(stderr, "\t%s\n", *k);
655 		k++;
656 	}
657 
658 	if (uname(&uts) < 0)
659 		return;
660 
661 	k = &host_kernels[0];
662 	while (*k) {
663 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
664 			return;
665 		fprintf(stderr, "\t%s\n", kernel);
666 		k++;
667 	}
668 	fprintf(stderr, "\nPlease see '%s run --help' for more options.\n\n",
669 		KVM_BINARY_NAME);
670 }
671 
672 static u64 host_ram_size(void)
673 {
674 	long page_size;
675 	long nr_pages;
676 
677 	nr_pages	= sysconf(_SC_PHYS_PAGES);
678 	if (nr_pages < 0) {
679 		pr_warning("sysconf(_SC_PHYS_PAGES) failed");
680 		return 0;
681 	}
682 
683 	page_size	= sysconf(_SC_PAGE_SIZE);
684 	if (page_size < 0) {
685 		pr_warning("sysconf(_SC_PAGE_SIZE) failed");
686 		return 0;
687 	}
688 
689 	return (nr_pages * page_size) >> MB_SHIFT;
690 }
691 
692 /*
693  * If user didn't specify how much memory it wants to allocate for the guest,
694  * avoid filling the whole host RAM.
695  */
696 #define RAM_SIZE_RATIO		0.8
697 
698 static u64 get_ram_size(int nr_cpus)
699 {
700 	u64 available;
701 	u64 ram_size;
702 
703 	ram_size	= 64 * (nr_cpus + 3);
704 
705 	available	= host_ram_size() * RAM_SIZE_RATIO;
706 	if (!available)
707 		available = MIN_RAM_SIZE_MB;
708 
709 	if (ram_size > available)
710 		ram_size	= available;
711 
712 	return ram_size;
713 }
714 
715 static const char *find_kernel(void)
716 {
717 	const char **k;
718 	struct stat st;
719 	struct utsname uts;
720 
721 	k = &default_kernels[0];
722 	while (*k) {
723 		if (stat(*k, &st) < 0 || !S_ISREG(st.st_mode)) {
724 			k++;
725 			continue;
726 		}
727 		strncpy(kernel, *k, PATH_MAX);
728 		return kernel;
729 	}
730 
731 	if (uname(&uts) < 0)
732 		return NULL;
733 
734 	k = &host_kernels[0];
735 	while (*k) {
736 		if (snprintf(kernel, PATH_MAX, "%s-%s", *k, uts.release) < 0)
737 			return NULL;
738 
739 		if (stat(kernel, &st) < 0 || !S_ISREG(st.st_mode)) {
740 			k++;
741 			continue;
742 		}
743 		return kernel;
744 
745 	}
746 	return NULL;
747 }
748 
749 static const char *find_vmlinux(void)
750 {
751 	const char **vmlinux;
752 
753 	vmlinux = &default_vmlinux[0];
754 	while (*vmlinux) {
755 		struct stat st;
756 
757 		if (stat(*vmlinux, &st) < 0 || !S_ISREG(st.st_mode)) {
758 			vmlinux++;
759 			continue;
760 		}
761 		return *vmlinux;
762 	}
763 	return NULL;
764 }
765 
766 void kvm_run_help(void)
767 {
768 	usage_with_options(run_usage, options);
769 }
770 
771 static int kvm_custom_stage2(void)
772 {
773 	char tmp[PATH_MAX], dst[PATH_MAX], *src;
774 	const char *rootfs = custom_rootfs_name;
775 	int r;
776 
777 	src = realpath("guest/init_stage2", NULL);
778 	if (src == NULL)
779 		return -ENOMEM;
780 
781 	snprintf(tmp, PATH_MAX, "%s%s/virt/init_stage2", kvm__get_dir(), rootfs);
782 	remove(tmp);
783 
784 	snprintf(dst, PATH_MAX, "/host/%s", src);
785 	r = symlink(dst, tmp);
786 	free(src);
787 
788 	return r;
789 }
790 
791 static int kvm_run_set_sandbox(void)
792 {
793 	const char *guestfs_name = custom_rootfs_name;
794 	char path[PATH_MAX], script[PATH_MAX], *tmp;
795 
796 	snprintf(path, PATH_MAX, "%s%s/virt/sandbox.sh", kvm__get_dir(), guestfs_name);
797 
798 	remove(path);
799 
800 	if (sandbox == NULL)
801 		return 0;
802 
803 	tmp = realpath(sandbox, NULL);
804 	if (tmp == NULL)
805 		return -ENOMEM;
806 
807 	snprintf(script, PATH_MAX, "/host/%s", tmp);
808 	free(tmp);
809 
810 	return symlink(script, path);
811 }
812 
813 static void kvm_write_sandbox_cmd_exactly(int fd, const char *arg)
814 {
815 	const char *single_quote;
816 
817 	if (!*arg) { /* zero length string */
818 		if (write(fd, "''", 2) <= 0)
819 			die("Failed writing sandbox script");
820 		return;
821 	}
822 
823 	while (*arg) {
824 		single_quote = strchrnul(arg, '\'');
825 
826 		/* write non-single-quote string as #('string') */
827 		if (arg != single_quote) {
828 			if (write(fd, "'", 1) <= 0 ||
829 			    write(fd, arg, single_quote - arg) <= 0 ||
830 			    write(fd, "'", 1) <= 0)
831 				die("Failed writing sandbox script");
832 		}
833 
834 		/* write single quote as #("'") */
835 		if (*single_quote) {
836 			if (write(fd, "\"'\"", 3) <= 0)
837 				die("Failed writing sandbox script");
838 		} else
839 			break;
840 
841 		arg = single_quote + 1;
842 	}
843 }
844 
845 static void kvm_run_write_sandbox_cmd(const char **argv, int argc)
846 {
847 	const char script_hdr[] = "#! /bin/bash\n\n";
848 	int fd;
849 
850 	remove(sandbox);
851 
852 	fd = open(sandbox, O_RDWR | O_CREAT, 0777);
853 	if (fd < 0)
854 		die("Failed creating sandbox script");
855 
856 	if (write(fd, script_hdr, sizeof(script_hdr) - 1) <= 0)
857 		die("Failed writing sandbox script");
858 
859 	while (argc) {
860 		kvm_write_sandbox_cmd_exactly(fd, argv[0]);
861 		if (argc - 1)
862 			if (write(fd, " ", 1) <= 0)
863 				die("Failed writing sandbox script");
864 		argv++;
865 		argc--;
866 	}
867 	if (write(fd, "\n", 1) <= 0)
868 		die("Failed writing sandbox script");
869 
870 	close(fd);
871 }
872 
873 int kvm_cmd_run(int argc, const char **argv, const char *prefix)
874 {
875 	static char real_cmdline[2048], default_name[20];
876 	struct framebuffer *fb = NULL;
877 	unsigned int nr_online_cpus;
878 	int exit_code = 0;
879 	int max_cpus, recommended_cpus;
880 	int i;
881 	void *ret;
882 
883 	signal(SIGALRM, handle_sigalrm);
884 	kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug);
885 	signal(SIGUSR1, handle_sigusr1);
886 	kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause);
887 	kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause);
888 	kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop);
889 	kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate);
890 
891 	nr_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
892 
893 	while (argc != 0) {
894 		argc = parse_options(argc, argv, options, run_usage,
895 				PARSE_OPT_STOP_AT_NON_OPTION |
896 				PARSE_OPT_KEEP_DASHDASH);
897 		if (argc != 0) {
898 			/* Cusrom options, should have been handled elsewhere */
899 			if (strcmp(argv[0], "--") == 0) {
900 				if (kvm_run_wrapper == KVM_RUN_SANDBOX) {
901 					sandbox = DEFAULT_SANDBOX_FILENAME;
902 					kvm_run_write_sandbox_cmd(argv+1, argc-1);
903 					break;
904 				}
905 			}
906 
907 			if (kernel_filename) {
908 				fprintf(stderr, "Cannot handle parameter: "
909 						"%s\n", argv[0]);
910 				usage_with_options(run_usage, options);
911 				return EINVAL;
912 			}
913 			/* first unhandled parameter is treated as a kernel
914 			   image
915 			 */
916 			kernel_filename = argv[0];
917 			argv++;
918 			argc--;
919 		}
920 
921 	}
922 
923 	if (!kernel_filename)
924 		kernel_filename = find_kernel();
925 
926 	if (!kernel_filename) {
927 		kernel_usage_with_options();
928 		return EINVAL;
929 	}
930 
931 	vmlinux_filename = find_vmlinux();
932 
933 	if (nrcpus == 0)
934 		nrcpus = nr_online_cpus;
935 
936 	if (!ram_size)
937 		ram_size	= get_ram_size(nrcpus);
938 
939 	if (ram_size < MIN_RAM_SIZE_MB)
940 		die("Not enough memory specified: %lluMB (min %lluMB)", ram_size, MIN_RAM_SIZE_MB);
941 
942 	if (ram_size > host_ram_size())
943 		pr_warning("Guest memory size %lluMB exceeds host physical RAM size %lluMB", ram_size, host_ram_size());
944 
945 	ram_size <<= MB_SHIFT;
946 
947 	if (!dev)
948 		dev = DEFAULT_KVM_DEV;
949 
950 	if (!console)
951 		console = DEFAULT_CONSOLE;
952 
953 	if (!strncmp(console, "virtio", 6))
954 		active_console  = CONSOLE_VIRTIO;
955 	else if (!strncmp(console, "serial", 6))
956 		active_console  = CONSOLE_8250;
957 	else if (!strncmp(console, "hv", 2))
958 		active_console = CONSOLE_HV;
959 	else
960 		pr_warning("No console!");
961 
962 	if (!host_ip)
963 		host_ip = DEFAULT_HOST_ADDR;
964 
965 	if (!guest_ip)
966 		guest_ip = DEFAULT_GUEST_ADDR;
967 
968 	if (!guest_mac)
969 		guest_mac = DEFAULT_GUEST_MAC;
970 
971 	if (!host_mac)
972 		host_mac = DEFAULT_HOST_MAC;
973 
974 	if (!script)
975 		script = DEFAULT_SCRIPT;
976 
977 	symbol__init(vmlinux_filename);
978 
979 	term_init();
980 
981 	if (!guest_name) {
982 		if (custom_rootfs) {
983 			guest_name = custom_rootfs_name;
984 		} else {
985 			sprintf(default_name, "guest-%u", getpid());
986 			guest_name = default_name;
987 		}
988 	}
989 
990 	kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name);
991 
992 	kvm->single_step = single_step;
993 
994 	ioeventfd__init(kvm);
995 
996 	max_cpus = kvm__max_cpus(kvm);
997 	recommended_cpus = kvm__recommended_cpus(kvm);
998 
999 	if (nrcpus > max_cpus) {
1000 		printf("  # Limit the number of CPUs to %d\n", max_cpus);
1001 		nrcpus = max_cpus;
1002 	} else if (nrcpus > recommended_cpus) {
1003 		printf("  # Warning: The maximum recommended amount of VCPUs"
1004 			" is %d\n", recommended_cpus);
1005 	}
1006 
1007 	kvm->nrcpus = nrcpus;
1008 
1009 	/* Alloc one pointer too many, so array ends up 0-terminated */
1010 	kvm_cpus = calloc(nrcpus + 1, sizeof(void *));
1011 	if (!kvm_cpus)
1012 		die("Couldn't allocate array for %d CPUs", nrcpus);
1013 
1014 	irq__init(kvm);
1015 
1016 	pci__init();
1017 
1018 	/*
1019 	 * vidmode should be either specified
1020 	 * either set by default
1021 	 */
1022 	if (vnc || sdl) {
1023 		if (vidmode == -1)
1024 			vidmode = 0x312;
1025 	} else
1026 		vidmode = 0;
1027 
1028 	memset(real_cmdline, 0, sizeof(real_cmdline));
1029 	kvm__arch_set_cmdline(real_cmdline, vnc || sdl);
1030 
1031 	if (strlen(real_cmdline) > 0)
1032 		strcat(real_cmdline, " ");
1033 
1034 	if (kernel_cmdline)
1035 		strlcat(real_cmdline, kernel_cmdline, sizeof(real_cmdline));
1036 
1037 	if (!using_rootfs && !image_filename[0] && !initrd_filename) {
1038 		char tmp[PATH_MAX];
1039 
1040 		kvm_setup_create_new(custom_rootfs_name);
1041 		kvm_setup_resolv(custom_rootfs_name);
1042 
1043 		snprintf(tmp, PATH_MAX, "%s%s", kvm__get_dir(), "default");
1044 		if (virtio_9p__register(kvm, tmp, "/dev/root") < 0)
1045 			die("Unable to initialize virtio 9p");
1046 		if (virtio_9p__register(kvm, "/", "hostfs") < 0)
1047 			die("Unable to initialize virtio 9p");
1048 		using_rootfs = custom_rootfs = 1;
1049 	}
1050 
1051 	if (using_rootfs) {
1052 		strcat(real_cmdline, " root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p");
1053 		if (custom_rootfs) {
1054 			kvm_run_set_sandbox();
1055 
1056 			strcat(real_cmdline, " init=/virt/init");
1057 
1058 			if (!no_dhcp)
1059 				strcat(real_cmdline, "  ip=dhcp");
1060 			if (kvm_custom_stage2())
1061 				die("Failed linking stage 2 of init.");
1062 		}
1063 	} else if (!strstr(real_cmdline, "root=")) {
1064 		strlcat(real_cmdline, " root=/dev/vda rw ", sizeof(real_cmdline));
1065 	}
1066 
1067 	if (image_count) {
1068 		kvm->nr_disks = image_count;
1069 		kvm->disks    = disk_image__open_all(image_filename, readonly_image, image_count);
1070 		if (!kvm->disks)
1071 			die("Unable to load all disk images.");
1072 
1073 		virtio_blk__init_all(kvm);
1074 	}
1075 
1076 	printf("  # %s run -k %s -m %Lu -c %d --name %s\n", KVM_BINARY_NAME,
1077 		kernel_filename, ram_size / 1024 / 1024, nrcpus, guest_name);
1078 
1079 	if (!kvm__load_kernel(kvm, kernel_filename, initrd_filename,
1080 				real_cmdline, vidmode))
1081 		die("unable to load kernel %s", kernel_filename);
1082 
1083 	kvm->vmlinux		= vmlinux_filename;
1084 
1085 	ioport__setup_arch();
1086 
1087 	rtc__init();
1088 
1089 	serial8250__init(kvm);
1090 
1091 	if (active_console == CONSOLE_VIRTIO)
1092 		virtio_console__init(kvm);
1093 
1094 	if (virtio_rng)
1095 		virtio_rng__init(kvm);
1096 
1097 	if (balloon)
1098 		virtio_bln__init(kvm);
1099 
1100 	if (!network)
1101 		network = DEFAULT_NETWORK;
1102 
1103 	virtio_9p__init(kvm);
1104 
1105 	for (i = 0; i < num_net_devices; i++) {
1106 		net_params[i].kvm = kvm;
1107 		virtio_net__init(&net_params[i]);
1108 	}
1109 
1110 	if (num_net_devices == 0 && no_net == 0) {
1111 		struct virtio_net_params net_params;
1112 
1113 		net_params = (struct virtio_net_params) {
1114 			.guest_ip	= guest_ip,
1115 			.host_ip	= host_ip,
1116 			.kvm		= kvm,
1117 			.script		= script,
1118 			.mode		= NET_MODE_USER,
1119 		};
1120 		str_to_mac(guest_mac, net_params.guest_mac);
1121 		str_to_mac(host_mac, net_params.host_mac);
1122 
1123 		virtio_net__init(&net_params);
1124 	}
1125 
1126 	kvm__init_ram(kvm);
1127 
1128 #ifdef CONFIG_X86
1129 	kbd__init(kvm);
1130 #endif
1131 
1132 	pci_shmem__init(kvm);
1133 
1134 	if (vnc || sdl)
1135 		fb = vesa__init(kvm);
1136 
1137 	if (vnc) {
1138 		if (fb)
1139 			vnc__init(fb);
1140 	}
1141 
1142 	if (sdl) {
1143 		if (fb)
1144 			sdl__init(fb);
1145 	}
1146 
1147 	fb__start();
1148 
1149 	/* Device init all done; firmware init must
1150 	 * come after this (it may set up device trees etc.)
1151 	 */
1152 
1153 	kvm__start_timer(kvm);
1154 
1155 	exit_code = kvm__arch_setup_firmware(kvm);
1156 	if (exit_code)
1157 		goto err;
1158 
1159 	for (i = 0; i < nrcpus; i++) {
1160 		kvm_cpus[i] = kvm_cpu__init(kvm, i);
1161 		if (!kvm_cpus[i])
1162 			die("unable to initialize KVM VCPU");
1163 	}
1164 
1165 	thread_pool__init(nr_online_cpus);
1166 	ioeventfd__start();
1167 
1168 	for (i = 0; i < nrcpus; i++) {
1169 		if (pthread_create(&kvm_cpus[i]->thread, NULL, kvm_cpu_thread, kvm_cpus[i]) != 0)
1170 			die("unable to create KVM VCPU thread");
1171 	}
1172 
1173 	/* Only VCPU #0 is going to exit by itself when shutting down */
1174 	if (pthread_join(kvm_cpus[0]->thread, &ret) != 0)
1175 		exit_code = 1;
1176 
1177 	kvm_cpu__delete(kvm_cpus[0]);
1178 
1179 	for (i = 1; i < nrcpus; i++) {
1180 		if (kvm_cpus[i]->is_running) {
1181 			pthread_kill(kvm_cpus[i]->thread, SIGKVMEXIT);
1182 			if (pthread_join(kvm_cpus[i]->thread, &ret) != 0)
1183 				die("pthread_join");
1184 			kvm_cpu__delete(kvm_cpus[i]);
1185 		}
1186 		if (ret != NULL)
1187 			exit_code = 1;
1188 	}
1189 
1190 err:
1191 	compat__print_all_messages();
1192 
1193 	fb__stop();
1194 
1195 	virtio_blk__delete_all(kvm);
1196 	virtio_rng__delete_all(kvm);
1197 
1198 	disk_image__close_all(kvm->disks, image_count);
1199 	kvm__delete(kvm);
1200 
1201 	if (!exit_code)
1202 		printf("\n  # KVM session ended normally.\n");
1203 
1204 	return exit_code;
1205 }
1206