xref: /src/usr.sbin/bhyve/amd64/bhyverun_machdep.c (revision 68ad2b0d7af2a3571c4abac9afa712f9b09b721c)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <assert.h>
30 #include <err.h>
31 #include <stdbool.h>
32 #include <stdlib.h>
33 #include <sysexits.h>
34 
35 #include <vmmapi.h>
36 
37 #include "acpi.h"
38 #include "atkbdc.h"
39 #include "bhyverun.h"
40 #include "bootrom.h"
41 #include "config.h"
42 #include "debug.h"
43 #include "e820.h"
44 #include "fwctl.h"
45 #include "ioapic.h"
46 #include "inout.h"
47 #include "kernemu_dev.h"
48 #include "mptbl.h"
49 #include "pci_emul.h"
50 #include "pci_irq.h"
51 #include "pci_lpc.h"
52 #include "rtc.h"
53 #include "smbiostbl.h"
54 #include "xmsr.h"
55 
56 void
57 bhyve_init_config(void)
58 {
59 	init_config();
60 
61 	/* Set default values prior to option parsing. */
62 	set_config_bool("acpi_tables", true);
63 	set_config_bool("acpi_tables_in_memory", true);
64 	set_config_value("memory.size", "256M");
65 	set_config_bool("x86.strictmsr", true);
66 	set_config_bool("x86.verbosemsr", false);
67 	set_config_value("lpc.fwcfg", "bhyve");
68 }
69 
70 void
71 bhyve_usage(int code)
72 {
73 	const char *progname;
74 
75 	progname = getprogname();
76 
77 	fprintf(stderr,
78 	    "Usage: %s [-aCDeHhPSuWwxY]\n"
79 	    "       %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"
80 	    "       %*s [-G port] [-k config_file] [-l lpc] [-m mem] [-o var=value]\n"
81 	    "       %*s [-p vcpu:hostcpu] [-r file] [-s pci] [-U uuid] vmname\n"
82 	    "       -a: local apic is in xAPIC mode (deprecated)\n"
83 	    "       -C: include guest memory in core file\n"
84 	    "       -c: number of CPUs and/or topology specification\n"
85 	    "       -D: destroy on power-off\n"
86 	    "       -e: exit on unhandled I/O access\n"
87 	    "       -G: start a debug server\n"
88 	    "       -H: vmexit from the guest on HLT\n"
89 	    "       -h: help\n"
90 	    "       -k: key=value flat config file\n"
91 	    "       -K: PS2 keyboard layout\n"
92 	    "       -l: LPC device configuration\n"
93 	    "       -M: monitor mode\n"
94 	    "       -m: memory size\n"
95 	    "       -n: NUMA domain specification\n"
96 	    "       -o: set config 'var' to 'value'\n"
97 	    "       -P: vmexit from the guest on pause\n"
98 	    "       -p: pin 'vcpu' to 'hostcpu'\n"
99 #ifdef BHYVE_SNAPSHOT
100 	    "       -r: path to checkpoint file\n"
101 #endif
102 	    "       -S: guest memory cannot be swapped\n"
103 	    "       -s: <slot,driver,configinfo> PCI slot config\n"
104 	    "       -U: UUID\n"
105 	    "       -u: RTC keeps UTC time\n"
106 	    "       -W: force virtio to use single-vector MSI\n"
107 	    "       -w: ignore unimplemented MSRs\n"
108 	    "       -x: local APIC is in x2APIC mode\n"
109 	    "       -Y: disable MPtable generation\n",
110 	    progname, (int)strlen(progname), "", (int)strlen(progname), "",
111 	    (int)strlen(progname), "");
112 	exit(code);
113 }
114 
115 void
116 bhyve_optparse(int argc, char **argv)
117 {
118 	const char *optstr;
119 	int c;
120 
121 #ifdef BHYVE_SNAPSHOT
122 	optstr = "aehuwxACDHIMPSWYk:f:o:p:G:c:s:m:n:l:K:U:r:";
123 #else
124 	optstr = "aehuwxACDHIMPSWYk:f:o:p:G:c:s:m:n:l:K:U:";
125 #endif
126 	while ((c = getopt(argc, argv, optstr)) != -1) {
127 		switch (c) {
128 		case 'a':
129 			set_config_bool("x86.x2apic", false);
130 			break;
131 		case 'A':
132 			/*
133 			 * NOP. For backward compatibility. Most systems don't
134 			 * work properly without sane ACPI tables. Therefore,
135 			 * we're always generating them.
136 			 */
137 			break;
138 		case 'D':
139 			set_config_bool("destroy_on_poweroff", true);
140 			break;
141 		case 'p':
142 			if (bhyve_pincpu_parse(optarg) != 0) {
143 				errx(EX_USAGE, "invalid vcpu pinning "
144 				    "configuration '%s'", optarg);
145 			}
146 			break;
147 		case 'c':
148 			if (bhyve_topology_parse(optarg) != 0) {
149 			    errx(EX_USAGE, "invalid cpu topology "
150 				"'%s'", optarg);
151 			}
152 			break;
153 		case 'C':
154 			set_config_bool("memory.guest_in_core", true);
155 			break;
156 		case 'f':
157 			if (qemu_fwcfg_parse_cmdline_arg(optarg) != 0) {
158 				errx(EX_USAGE, "invalid fwcfg item '%s'",
159 				    optarg);
160 			}
161 			break;
162 		case 'G':
163 			bhyve_parse_gdb_options(optarg);
164 			break;
165 		case 'k':
166 			bhyve_parse_simple_config_file(optarg);
167 			break;
168 		case 'K':
169 			set_config_value("keyboard.layout", optarg);
170 			break;
171 		case 'l':
172 			if (strncmp(optarg, "help", strlen(optarg)) == 0) {
173 				lpc_print_supported_devices();
174 				exit(0);
175 			} else if (lpc_device_parse(optarg) != 0) {
176 				errx(EX_USAGE, "invalid lpc device "
177 				    "configuration '%s'", optarg);
178 			}
179 			break;
180 #ifdef BHYVE_SNAPSHOT
181 		case 'r':
182 			restore_file = optarg;
183 			break;
184 #endif
185 		case 's':
186 			if (strncmp(optarg, "help", strlen(optarg)) == 0) {
187 				pci_print_supported_devices();
188 				exit(0);
189 			} else if (pci_parse_slot(optarg) != 0)
190 				exit(BHYVE_EXIT_ERROR);
191 			else
192 				break;
193 		case 'S':
194 			set_config_bool("memory.wired", true);
195 			break;
196 		case 'm':
197 			set_config_value("memory.size", optarg);
198 			break;
199 		case 'M':
200 			set_config_bool("monitor", true);
201 			break;
202 		case 'n':
203 			if (bhyve_numa_parse(optarg) != 0)
204 				errx(EX_USAGE,
205 				    "invalid NUMA configuration "
206 				    "'%s'",
207 				    optarg);
208 			if (!get_config_bool("acpi_tables"))
209 				errx(EX_USAGE, "NUMA emulation requires ACPI");
210 			break;
211 		case 'o':
212 			if (!bhyve_parse_config_option(optarg)) {
213 				errx(EX_USAGE,
214 				    "invalid configuration option '%s'",
215 				    optarg);
216 			}
217 			break;
218 		case 'H':
219 			set_config_bool("x86.vmexit_on_hlt", true);
220 			break;
221 		case 'I':
222 			/*
223 			 * The "-I" option was used to add an ioapic to the
224 			 * virtual machine.
225 			 *
226 			 * An ioapic is now provided unconditionally for each
227 			 * virtual machine and this option is now deprecated.
228 			 */
229 			break;
230 		case 'P':
231 			set_config_bool("x86.vmexit_on_pause", true);
232 			break;
233 		case 'e':
234 			set_config_bool("x86.strictio", true);
235 			break;
236 		case 'u':
237 			set_config_bool("rtc.use_localtime", false);
238 			break;
239 		case 'U':
240 			set_config_value("uuid", optarg);
241 			break;
242 		case 'w':
243 			set_config_bool("x86.strictmsr", false);
244 			break;
245 		case 'W':
246 			set_config_bool("virtio_msix", false);
247 			break;
248 		case 'x':
249 			set_config_bool("x86.x2apic", true);
250 			break;
251 		case 'Y':
252 			set_config_bool("x86.mptable", false);
253 			break;
254 		case 'h':
255 			bhyve_usage(0);
256 		default:
257 			bhyve_usage(1);
258 		}
259 	}
260 
261 	/* Handle backwards compatibility aliases in config options. */
262 	if (get_config_value("lpc.bootrom") != NULL &&
263 	    get_config_value("bootrom") == NULL) {
264 		warnx("lpc.bootrom is deprecated, use '-o bootrom' instead");
265 		set_config_value("bootrom", get_config_value("lpc.bootrom"));
266 	}
267 	if (get_config_value("lpc.bootvars") != NULL &&
268 	    get_config_value("bootvars") == NULL) {
269 		warnx("lpc.bootvars is deprecated, use '-o bootvars' instead");
270 		set_config_value("bootvars", get_config_value("lpc.bootvars"));
271 	}
272 }
273 
274 void
275 bhyve_init_vcpu(struct vcpu *vcpu)
276 {
277 	int err, tmp;
278 
279 	if (get_config_bool_default("x86.vmexit_on_hlt", false)) {
280 		err = vm_get_capability(vcpu, VM_CAP_HALT_EXIT, &tmp);
281 		if (err < 0) {
282 			EPRINTLN("VM exit on HLT not supported");
283 			exit(BHYVE_EXIT_ERROR);
284 		}
285 		vm_set_capability(vcpu, VM_CAP_HALT_EXIT, 1);
286 	}
287 
288 	if (get_config_bool_default("x86.vmexit_on_pause", false)) {
289 		/*
290 		 * pause exit support required for this mode
291 		 */
292 		err = vm_get_capability(vcpu, VM_CAP_PAUSE_EXIT, &tmp);
293 		if (err < 0) {
294 			EPRINTLN("SMP mux requested, no pause support");
295 			exit(BHYVE_EXIT_ERROR);
296 		}
297 		vm_set_capability(vcpu, VM_CAP_PAUSE_EXIT, 1);
298 	}
299 
300 	if (get_config_bool_default("x86.x2apic", false))
301 		err = vm_set_x2apic_state(vcpu, X2APIC_ENABLED);
302 	else
303 		err = vm_set_x2apic_state(vcpu, X2APIC_DISABLED);
304 
305 	if (err) {
306 		EPRINTLN("Unable to set x2apic state (%d)", err);
307 		exit(BHYVE_EXIT_ERROR);
308 	}
309 
310 	vm_set_capability(vcpu, VM_CAP_ENABLE_INVPCID, 1);
311 
312 	err = vm_set_capability(vcpu, VM_CAP_IPI_EXIT, 1);
313 	assert(err == 0);
314 }
315 
316 void
317 bhyve_start_vcpu(struct vcpu *vcpu, bool bsp)
318 {
319 	int error;
320 
321 	if (bsp) {
322 		if (bootrom_boot()) {
323 			error = vm_set_capability(vcpu,
324 			    VM_CAP_UNRESTRICTED_GUEST, 1);
325 			if (error != 0) {
326 				err(4, "ROM boot failed: unrestricted guest "
327 				    "capability not available");
328 			}
329 			error = vcpu_reset(vcpu);
330 			assert(error == 0);
331 		}
332 	} else {
333 		bhyve_init_vcpu(vcpu);
334 
335 		/*
336 		 * Enable the 'unrestricted guest' mode for APs.
337 		 *
338 		 * APs startup in power-on 16-bit mode.
339 		 */
340 		error = vm_set_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
341 		assert(error == 0);
342 	}
343 
344 	fbsdrun_addcpu(vcpu_id(vcpu));
345 }
346 
347 int
348 bhyve_init_platform(struct vmctx *ctx, struct vcpu *bsp __unused)
349 {
350 	int error;
351 
352 	error = init_msr();
353 	if (error != 0)
354 		return (error);
355 	init_inout();
356 	kernemu_dev_init();
357 	atkbdc_init(ctx);
358 	pci_irq_init(ctx);
359 	ioapic_init(ctx);
360 	rtc_init(ctx);
361 	sci_init(ctx);
362 	error = e820_init(ctx);
363 	if (error != 0)
364 		return (error);
365 	error = bootrom_loadrom(ctx);
366 	if (error != 0)
367 		return (error);
368 
369 	return (0);
370 }
371 
372 int
373 bhyve_init_platform_late(struct vmctx *ctx, struct vcpu *bsp __unused)
374 {
375 	int error;
376 
377 	if (get_config_bool_default("x86.mptable", true)) {
378 		error = mptable_build(ctx, guest_ncpus);
379 		if (error != 0)
380 			return (error);
381 	}
382 	error = smbios_build(ctx);
383 	if (error != 0)
384 		return (error);
385 	error = e820_finalize();
386 	if (error != 0)
387 		return (error);
388 
389 	if (bootrom_boot() && strcmp(lpc_fwcfg(), "bhyve") == 0)
390 		fwctl_init();
391 
392 	if (get_config_bool("acpi_tables")) {
393 		error = acpi_build(ctx, guest_ncpus);
394 		assert(error == 0);
395 	}
396 
397 	return (0);
398 }
399