xref: /qemu/hw/i386/x86-common.c (revision b061f0598b9231f7992aff4fcdf3f336f9747d11)
1*b061f059SPaolo Bonzini /*
2*b061f059SPaolo Bonzini  * Copyright (c) 2003-2004 Fabrice Bellard
3*b061f059SPaolo Bonzini  * Copyright (c) 2019, 2024 Red Hat, Inc.
4*b061f059SPaolo Bonzini  *
5*b061f059SPaolo Bonzini  * Permission is hereby granted, free of charge, to any person obtaining a copy
6*b061f059SPaolo Bonzini  * of this software and associated documentation files (the "Software"), to deal
7*b061f059SPaolo Bonzini  * in the Software without restriction, including without limitation the rights
8*b061f059SPaolo Bonzini  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9*b061f059SPaolo Bonzini  * copies of the Software, and to permit persons to whom the Software is
10*b061f059SPaolo Bonzini  * furnished to do so, subject to the following conditions:
11*b061f059SPaolo Bonzini  *
12*b061f059SPaolo Bonzini  * The above copyright notice and this permission notice shall be included in
13*b061f059SPaolo Bonzini  * all copies or substantial portions of the Software.
14*b061f059SPaolo Bonzini  *
15*b061f059SPaolo Bonzini  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*b061f059SPaolo Bonzini  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*b061f059SPaolo Bonzini  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18*b061f059SPaolo Bonzini  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*b061f059SPaolo Bonzini  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20*b061f059SPaolo Bonzini  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21*b061f059SPaolo Bonzini  * THE SOFTWARE.
22*b061f059SPaolo Bonzini  */
23*b061f059SPaolo Bonzini #include "qemu/osdep.h"
24*b061f059SPaolo Bonzini #include "qemu/error-report.h"
25*b061f059SPaolo Bonzini #include "qemu/cutils.h"
26*b061f059SPaolo Bonzini #include "qemu/units.h"
27*b061f059SPaolo Bonzini #include "qemu/datadir.h"
28*b061f059SPaolo Bonzini #include "qapi/error.h"
29*b061f059SPaolo Bonzini #include "sysemu/numa.h"
30*b061f059SPaolo Bonzini #include "sysemu/sysemu.h"
31*b061f059SPaolo Bonzini #include "sysemu/xen.h"
32*b061f059SPaolo Bonzini #include "trace.h"
33*b061f059SPaolo Bonzini 
34*b061f059SPaolo Bonzini #include "hw/i386/x86.h"
35*b061f059SPaolo Bonzini #include "target/i386/cpu.h"
36*b061f059SPaolo Bonzini #include "hw/rtc/mc146818rtc.h"
37*b061f059SPaolo Bonzini #include "target/i386/sev.h"
38*b061f059SPaolo Bonzini 
39*b061f059SPaolo Bonzini #include "hw/acpi/cpu_hotplug.h"
40*b061f059SPaolo Bonzini #include "hw/irq.h"
41*b061f059SPaolo Bonzini #include "hw/loader.h"
42*b061f059SPaolo Bonzini #include "multiboot.h"
43*b061f059SPaolo Bonzini #include "elf.h"
44*b061f059SPaolo Bonzini #include "standard-headers/asm-x86/bootparam.h"
45*b061f059SPaolo Bonzini #include CONFIG_DEVICES
46*b061f059SPaolo Bonzini #include "kvm/kvm_i386.h"
47*b061f059SPaolo Bonzini 
48*b061f059SPaolo Bonzini #ifdef CONFIG_XEN_EMU
49*b061f059SPaolo Bonzini #include "hw/xen/xen.h"
50*b061f059SPaolo Bonzini #include "hw/i386/kvm/xen_evtchn.h"
51*b061f059SPaolo Bonzini #endif
52*b061f059SPaolo Bonzini 
53*b061f059SPaolo Bonzini /* Physical Address of PVH entry point read from kernel ELF NOTE */
54*b061f059SPaolo Bonzini static size_t pvh_start_addr;
55*b061f059SPaolo Bonzini 
56*b061f059SPaolo Bonzini static void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp)
57*b061f059SPaolo Bonzini {
58*b061f059SPaolo Bonzini     Object *cpu = object_new(MACHINE(x86ms)->cpu_type);
59*b061f059SPaolo Bonzini 
60*b061f059SPaolo Bonzini     if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) {
61*b061f059SPaolo Bonzini         goto out;
62*b061f059SPaolo Bonzini     }
63*b061f059SPaolo Bonzini     qdev_realize(DEVICE(cpu), NULL, errp);
64*b061f059SPaolo Bonzini 
65*b061f059SPaolo Bonzini out:
66*b061f059SPaolo Bonzini     object_unref(cpu);
67*b061f059SPaolo Bonzini }
68*b061f059SPaolo Bonzini 
69*b061f059SPaolo Bonzini void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
70*b061f059SPaolo Bonzini {
71*b061f059SPaolo Bonzini     int i;
72*b061f059SPaolo Bonzini     const CPUArchIdList *possible_cpus;
73*b061f059SPaolo Bonzini     MachineState *ms = MACHINE(x86ms);
74*b061f059SPaolo Bonzini     MachineClass *mc = MACHINE_GET_CLASS(x86ms);
75*b061f059SPaolo Bonzini 
76*b061f059SPaolo Bonzini     x86_cpu_set_default_version(default_cpu_version);
77*b061f059SPaolo Bonzini 
78*b061f059SPaolo Bonzini     /*
79*b061f059SPaolo Bonzini      * Calculates the limit to CPU APIC ID values
80*b061f059SPaolo Bonzini      *
81*b061f059SPaolo Bonzini      * Limit for the APIC ID value, so that all
82*b061f059SPaolo Bonzini      * CPU APIC IDs are < x86ms->apic_id_limit.
83*b061f059SPaolo Bonzini      *
84*b061f059SPaolo Bonzini      * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create().
85*b061f059SPaolo Bonzini      */
86*b061f059SPaolo Bonzini     x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
87*b061f059SPaolo Bonzini                                                       ms->smp.max_cpus - 1) + 1;
88*b061f059SPaolo Bonzini 
89*b061f059SPaolo Bonzini     /*
90*b061f059SPaolo Bonzini      * Can we support APIC ID 255 or higher?  With KVM, that requires
91*b061f059SPaolo Bonzini      * both in-kernel lapic and X2APIC userspace API.
92*b061f059SPaolo Bonzini      *
93*b061f059SPaolo Bonzini      * kvm_enabled() must go first to ensure that kvm_* references are
94*b061f059SPaolo Bonzini      * not emitted for the linker to consume (kvm_enabled() is
95*b061f059SPaolo Bonzini      * a literal `0` in configurations where kvm_* aren't defined)
96*b061f059SPaolo Bonzini      */
97*b061f059SPaolo Bonzini     if (kvm_enabled() && x86ms->apic_id_limit > 255 &&
98*b061f059SPaolo Bonzini         kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) {
99*b061f059SPaolo Bonzini         error_report("current -smp configuration requires kernel "
100*b061f059SPaolo Bonzini                      "irqchip and X2APIC API support.");
101*b061f059SPaolo Bonzini         exit(EXIT_FAILURE);
102*b061f059SPaolo Bonzini     }
103*b061f059SPaolo Bonzini 
104*b061f059SPaolo Bonzini     if (kvm_enabled()) {
105*b061f059SPaolo Bonzini         kvm_set_max_apic_id(x86ms->apic_id_limit);
106*b061f059SPaolo Bonzini     }
107*b061f059SPaolo Bonzini 
108*b061f059SPaolo Bonzini     if (!kvm_irqchip_in_kernel()) {
109*b061f059SPaolo Bonzini         apic_set_max_apic_id(x86ms->apic_id_limit);
110*b061f059SPaolo Bonzini     }
111*b061f059SPaolo Bonzini 
112*b061f059SPaolo Bonzini     possible_cpus = mc->possible_cpu_arch_ids(ms);
113*b061f059SPaolo Bonzini     for (i = 0; i < ms->smp.cpus; i++) {
114*b061f059SPaolo Bonzini         x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
115*b061f059SPaolo Bonzini     }
116*b061f059SPaolo Bonzini }
117*b061f059SPaolo Bonzini 
118*b061f059SPaolo Bonzini void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count)
119*b061f059SPaolo Bonzini {
120*b061f059SPaolo Bonzini     MC146818RtcState *rtc = MC146818_RTC(s);
121*b061f059SPaolo Bonzini 
122*b061f059SPaolo Bonzini     if (cpus_count > 0xff) {
123*b061f059SPaolo Bonzini         /*
124*b061f059SPaolo Bonzini          * If the number of CPUs can't be represented in 8 bits, the
125*b061f059SPaolo Bonzini          * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just
126*b061f059SPaolo Bonzini          * to make old BIOSes fail more predictably.
127*b061f059SPaolo Bonzini          */
128*b061f059SPaolo Bonzini         mc146818rtc_set_cmos_data(rtc, 0x5f, 0);
129*b061f059SPaolo Bonzini     } else {
130*b061f059SPaolo Bonzini         mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1);
131*b061f059SPaolo Bonzini     }
132*b061f059SPaolo Bonzini }
133*b061f059SPaolo Bonzini 
134*b061f059SPaolo Bonzini static int x86_apic_cmp(const void *a, const void *b)
135*b061f059SPaolo Bonzini {
136*b061f059SPaolo Bonzini    CPUArchId *apic_a = (CPUArchId *)a;
137*b061f059SPaolo Bonzini    CPUArchId *apic_b = (CPUArchId *)b;
138*b061f059SPaolo Bonzini 
139*b061f059SPaolo Bonzini    return apic_a->arch_id - apic_b->arch_id;
140*b061f059SPaolo Bonzini }
141*b061f059SPaolo Bonzini 
142*b061f059SPaolo Bonzini /*
143*b061f059SPaolo Bonzini  * returns pointer to CPUArchId descriptor that matches CPU's apic_id
144*b061f059SPaolo Bonzini  * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no
145*b061f059SPaolo Bonzini  * entry corresponding to CPU's apic_id returns NULL.
146*b061f059SPaolo Bonzini  */
147*b061f059SPaolo Bonzini static CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
148*b061f059SPaolo Bonzini {
149*b061f059SPaolo Bonzini     CPUArchId apic_id, *found_cpu;
150*b061f059SPaolo Bonzini 
151*b061f059SPaolo Bonzini     apic_id.arch_id = id;
152*b061f059SPaolo Bonzini     found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus,
153*b061f059SPaolo Bonzini         ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus),
154*b061f059SPaolo Bonzini         x86_apic_cmp);
155*b061f059SPaolo Bonzini     if (found_cpu && idx) {
156*b061f059SPaolo Bonzini         *idx = found_cpu - ms->possible_cpus->cpus;
157*b061f059SPaolo Bonzini     }
158*b061f059SPaolo Bonzini     return found_cpu;
159*b061f059SPaolo Bonzini }
160*b061f059SPaolo Bonzini 
161*b061f059SPaolo Bonzini void x86_cpu_plug(HotplugHandler *hotplug_dev,
162*b061f059SPaolo Bonzini                   DeviceState *dev, Error **errp)
163*b061f059SPaolo Bonzini {
164*b061f059SPaolo Bonzini     CPUArchId *found_cpu;
165*b061f059SPaolo Bonzini     Error *local_err = NULL;
166*b061f059SPaolo Bonzini     X86CPU *cpu = X86_CPU(dev);
167*b061f059SPaolo Bonzini     X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
168*b061f059SPaolo Bonzini 
169*b061f059SPaolo Bonzini     if (x86ms->acpi_dev) {
170*b061f059SPaolo Bonzini         hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err);
171*b061f059SPaolo Bonzini         if (local_err) {
172*b061f059SPaolo Bonzini             goto out;
173*b061f059SPaolo Bonzini         }
174*b061f059SPaolo Bonzini     }
175*b061f059SPaolo Bonzini 
176*b061f059SPaolo Bonzini     /* increment the number of CPUs */
177*b061f059SPaolo Bonzini     x86ms->boot_cpus++;
178*b061f059SPaolo Bonzini     if (x86ms->rtc) {
179*b061f059SPaolo Bonzini         x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus);
180*b061f059SPaolo Bonzini     }
181*b061f059SPaolo Bonzini     if (x86ms->fw_cfg) {
182*b061f059SPaolo Bonzini         fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
183*b061f059SPaolo Bonzini     }
184*b061f059SPaolo Bonzini 
185*b061f059SPaolo Bonzini     found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL);
186*b061f059SPaolo Bonzini     found_cpu->cpu = CPU(dev);
187*b061f059SPaolo Bonzini out:
188*b061f059SPaolo Bonzini     error_propagate(errp, local_err);
189*b061f059SPaolo Bonzini }
190*b061f059SPaolo Bonzini 
191*b061f059SPaolo Bonzini void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
192*b061f059SPaolo Bonzini                                DeviceState *dev, Error **errp)
193*b061f059SPaolo Bonzini {
194*b061f059SPaolo Bonzini     int idx = -1;
195*b061f059SPaolo Bonzini     X86CPU *cpu = X86_CPU(dev);
196*b061f059SPaolo Bonzini     X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
197*b061f059SPaolo Bonzini 
198*b061f059SPaolo Bonzini     if (!x86ms->acpi_dev) {
199*b061f059SPaolo Bonzini         error_setg(errp, "CPU hot unplug not supported without ACPI");
200*b061f059SPaolo Bonzini         return;
201*b061f059SPaolo Bonzini     }
202*b061f059SPaolo Bonzini 
203*b061f059SPaolo Bonzini     x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx);
204*b061f059SPaolo Bonzini     assert(idx != -1);
205*b061f059SPaolo Bonzini     if (idx == 0) {
206*b061f059SPaolo Bonzini         error_setg(errp, "Boot CPU is unpluggable");
207*b061f059SPaolo Bonzini         return;
208*b061f059SPaolo Bonzini     }
209*b061f059SPaolo Bonzini 
210*b061f059SPaolo Bonzini     hotplug_handler_unplug_request(x86ms->acpi_dev, dev,
211*b061f059SPaolo Bonzini                                    errp);
212*b061f059SPaolo Bonzini }
213*b061f059SPaolo Bonzini 
214*b061f059SPaolo Bonzini void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev,
215*b061f059SPaolo Bonzini                        DeviceState *dev, Error **errp)
216*b061f059SPaolo Bonzini {
217*b061f059SPaolo Bonzini     CPUArchId *found_cpu;
218*b061f059SPaolo Bonzini     Error *local_err = NULL;
219*b061f059SPaolo Bonzini     X86CPU *cpu = X86_CPU(dev);
220*b061f059SPaolo Bonzini     X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
221*b061f059SPaolo Bonzini 
222*b061f059SPaolo Bonzini     hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err);
223*b061f059SPaolo Bonzini     if (local_err) {
224*b061f059SPaolo Bonzini         goto out;
225*b061f059SPaolo Bonzini     }
226*b061f059SPaolo Bonzini 
227*b061f059SPaolo Bonzini     found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL);
228*b061f059SPaolo Bonzini     found_cpu->cpu = NULL;
229*b061f059SPaolo Bonzini     qdev_unrealize(dev);
230*b061f059SPaolo Bonzini 
231*b061f059SPaolo Bonzini     /* decrement the number of CPUs */
232*b061f059SPaolo Bonzini     x86ms->boot_cpus--;
233*b061f059SPaolo Bonzini     /* Update the number of CPUs in CMOS */
234*b061f059SPaolo Bonzini     x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus);
235*b061f059SPaolo Bonzini     fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
236*b061f059SPaolo Bonzini  out:
237*b061f059SPaolo Bonzini     error_propagate(errp, local_err);
238*b061f059SPaolo Bonzini }
239*b061f059SPaolo Bonzini 
240*b061f059SPaolo Bonzini void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
241*b061f059SPaolo Bonzini                       DeviceState *dev, Error **errp)
242*b061f059SPaolo Bonzini {
243*b061f059SPaolo Bonzini     int idx;
244*b061f059SPaolo Bonzini     CPUState *cs;
245*b061f059SPaolo Bonzini     CPUArchId *cpu_slot;
246*b061f059SPaolo Bonzini     X86CPUTopoIDs topo_ids;
247*b061f059SPaolo Bonzini     X86CPU *cpu = X86_CPU(dev);
248*b061f059SPaolo Bonzini     CPUX86State *env = &cpu->env;
249*b061f059SPaolo Bonzini     MachineState *ms = MACHINE(hotplug_dev);
250*b061f059SPaolo Bonzini     X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
251*b061f059SPaolo Bonzini     unsigned int smp_cores = ms->smp.cores;
252*b061f059SPaolo Bonzini     unsigned int smp_threads = ms->smp.threads;
253*b061f059SPaolo Bonzini     X86CPUTopoInfo topo_info;
254*b061f059SPaolo Bonzini 
255*b061f059SPaolo Bonzini     if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
256*b061f059SPaolo Bonzini         error_setg(errp, "Invalid CPU type, expected cpu type: '%s'",
257*b061f059SPaolo Bonzini                    ms->cpu_type);
258*b061f059SPaolo Bonzini         return;
259*b061f059SPaolo Bonzini     }
260*b061f059SPaolo Bonzini 
261*b061f059SPaolo Bonzini     if (x86ms->acpi_dev) {
262*b061f059SPaolo Bonzini         Error *local_err = NULL;
263*b061f059SPaolo Bonzini 
264*b061f059SPaolo Bonzini         hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev,
265*b061f059SPaolo Bonzini                                  &local_err);
266*b061f059SPaolo Bonzini         if (local_err) {
267*b061f059SPaolo Bonzini             error_propagate(errp, local_err);
268*b061f059SPaolo Bonzini             return;
269*b061f059SPaolo Bonzini         }
270*b061f059SPaolo Bonzini     }
271*b061f059SPaolo Bonzini 
272*b061f059SPaolo Bonzini     init_topo_info(&topo_info, x86ms);
273*b061f059SPaolo Bonzini 
274*b061f059SPaolo Bonzini     env->nr_dies = ms->smp.dies;
275*b061f059SPaolo Bonzini 
276*b061f059SPaolo Bonzini     /*
277*b061f059SPaolo Bonzini      * If APIC ID is not set,
278*b061f059SPaolo Bonzini      * set it based on socket/die/core/thread properties.
279*b061f059SPaolo Bonzini      */
280*b061f059SPaolo Bonzini     if (cpu->apic_id == UNASSIGNED_APIC_ID) {
281*b061f059SPaolo Bonzini         int max_socket = (ms->smp.max_cpus - 1) /
282*b061f059SPaolo Bonzini                                 smp_threads / smp_cores / ms->smp.dies;
283*b061f059SPaolo Bonzini 
284*b061f059SPaolo Bonzini         /*
285*b061f059SPaolo Bonzini          * die-id was optional in QEMU 4.0 and older, so keep it optional
286*b061f059SPaolo Bonzini          * if there's only one die per socket.
287*b061f059SPaolo Bonzini          */
288*b061f059SPaolo Bonzini         if (cpu->die_id < 0 && ms->smp.dies == 1) {
289*b061f059SPaolo Bonzini             cpu->die_id = 0;
290*b061f059SPaolo Bonzini         }
291*b061f059SPaolo Bonzini 
292*b061f059SPaolo Bonzini         if (cpu->socket_id < 0) {
293*b061f059SPaolo Bonzini             error_setg(errp, "CPU socket-id is not set");
294*b061f059SPaolo Bonzini             return;
295*b061f059SPaolo Bonzini         } else if (cpu->socket_id > max_socket) {
296*b061f059SPaolo Bonzini             error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u",
297*b061f059SPaolo Bonzini                        cpu->socket_id, max_socket);
298*b061f059SPaolo Bonzini             return;
299*b061f059SPaolo Bonzini         }
300*b061f059SPaolo Bonzini         if (cpu->die_id < 0) {
301*b061f059SPaolo Bonzini             error_setg(errp, "CPU die-id is not set");
302*b061f059SPaolo Bonzini             return;
303*b061f059SPaolo Bonzini         } else if (cpu->die_id > ms->smp.dies - 1) {
304*b061f059SPaolo Bonzini             error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u",
305*b061f059SPaolo Bonzini                        cpu->die_id, ms->smp.dies - 1);
306*b061f059SPaolo Bonzini             return;
307*b061f059SPaolo Bonzini         }
308*b061f059SPaolo Bonzini         if (cpu->core_id < 0) {
309*b061f059SPaolo Bonzini             error_setg(errp, "CPU core-id is not set");
310*b061f059SPaolo Bonzini             return;
311*b061f059SPaolo Bonzini         } else if (cpu->core_id > (smp_cores - 1)) {
312*b061f059SPaolo Bonzini             error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u",
313*b061f059SPaolo Bonzini                        cpu->core_id, smp_cores - 1);
314*b061f059SPaolo Bonzini             return;
315*b061f059SPaolo Bonzini         }
316*b061f059SPaolo Bonzini         if (cpu->thread_id < 0) {
317*b061f059SPaolo Bonzini             error_setg(errp, "CPU thread-id is not set");
318*b061f059SPaolo Bonzini             return;
319*b061f059SPaolo Bonzini         } else if (cpu->thread_id > (smp_threads - 1)) {
320*b061f059SPaolo Bonzini             error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u",
321*b061f059SPaolo Bonzini                        cpu->thread_id, smp_threads - 1);
322*b061f059SPaolo Bonzini             return;
323*b061f059SPaolo Bonzini         }
324*b061f059SPaolo Bonzini 
325*b061f059SPaolo Bonzini         topo_ids.pkg_id = cpu->socket_id;
326*b061f059SPaolo Bonzini         topo_ids.die_id = cpu->die_id;
327*b061f059SPaolo Bonzini         topo_ids.core_id = cpu->core_id;
328*b061f059SPaolo Bonzini         topo_ids.smt_id = cpu->thread_id;
329*b061f059SPaolo Bonzini         cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids);
330*b061f059SPaolo Bonzini     }
331*b061f059SPaolo Bonzini 
332*b061f059SPaolo Bonzini     cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx);
333*b061f059SPaolo Bonzini     if (!cpu_slot) {
334*b061f059SPaolo Bonzini         x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids);
335*b061f059SPaolo Bonzini         error_setg(errp,
336*b061f059SPaolo Bonzini             "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with"
337*b061f059SPaolo Bonzini             " APIC ID %" PRIu32 ", valid index range 0:%d",
338*b061f059SPaolo Bonzini             topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id,
339*b061f059SPaolo Bonzini             cpu->apic_id, ms->possible_cpus->len - 1);
340*b061f059SPaolo Bonzini         return;
341*b061f059SPaolo Bonzini     }
342*b061f059SPaolo Bonzini 
343*b061f059SPaolo Bonzini     if (cpu_slot->cpu) {
344*b061f059SPaolo Bonzini         error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists",
345*b061f059SPaolo Bonzini                    idx, cpu->apic_id);
346*b061f059SPaolo Bonzini         return;
347*b061f059SPaolo Bonzini     }
348*b061f059SPaolo Bonzini 
349*b061f059SPaolo Bonzini     /* if 'address' properties socket-id/core-id/thread-id are not set, set them
350*b061f059SPaolo Bonzini      * so that machine_query_hotpluggable_cpus would show correct values
351*b061f059SPaolo Bonzini      */
352*b061f059SPaolo Bonzini     /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn()
353*b061f059SPaolo Bonzini      * once -smp refactoring is complete and there will be CPU private
354*b061f059SPaolo Bonzini      * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */
355*b061f059SPaolo Bonzini     x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids);
356*b061f059SPaolo Bonzini     if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) {
357*b061f059SPaolo Bonzini         error_setg(errp, "property socket-id: %u doesn't match set apic-id:"
358*b061f059SPaolo Bonzini             " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id,
359*b061f059SPaolo Bonzini             topo_ids.pkg_id);
360*b061f059SPaolo Bonzini         return;
361*b061f059SPaolo Bonzini     }
362*b061f059SPaolo Bonzini     cpu->socket_id = topo_ids.pkg_id;
363*b061f059SPaolo Bonzini 
364*b061f059SPaolo Bonzini     if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) {
365*b061f059SPaolo Bonzini         error_setg(errp, "property die-id: %u doesn't match set apic-id:"
366*b061f059SPaolo Bonzini             " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id);
367*b061f059SPaolo Bonzini         return;
368*b061f059SPaolo Bonzini     }
369*b061f059SPaolo Bonzini     cpu->die_id = topo_ids.die_id;
370*b061f059SPaolo Bonzini 
371*b061f059SPaolo Bonzini     if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) {
372*b061f059SPaolo Bonzini         error_setg(errp, "property core-id: %u doesn't match set apic-id:"
373*b061f059SPaolo Bonzini             " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id,
374*b061f059SPaolo Bonzini             topo_ids.core_id);
375*b061f059SPaolo Bonzini         return;
376*b061f059SPaolo Bonzini     }
377*b061f059SPaolo Bonzini     cpu->core_id = topo_ids.core_id;
378*b061f059SPaolo Bonzini 
379*b061f059SPaolo Bonzini     if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) {
380*b061f059SPaolo Bonzini         error_setg(errp, "property thread-id: %u doesn't match set apic-id:"
381*b061f059SPaolo Bonzini             " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id,
382*b061f059SPaolo Bonzini             topo_ids.smt_id);
383*b061f059SPaolo Bonzini         return;
384*b061f059SPaolo Bonzini     }
385*b061f059SPaolo Bonzini     cpu->thread_id = topo_ids.smt_id;
386*b061f059SPaolo Bonzini 
387*b061f059SPaolo Bonzini     /*
388*b061f059SPaolo Bonzini     * kvm_enabled() must go first to ensure that kvm_* references are
389*b061f059SPaolo Bonzini     * not emitted for the linker to consume (kvm_enabled() is
390*b061f059SPaolo Bonzini     * a literal `0` in configurations where kvm_* aren't defined)
391*b061f059SPaolo Bonzini     */
392*b061f059SPaolo Bonzini     if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) &&
393*b061f059SPaolo Bonzini         !kvm_hv_vpindex_settable()) {
394*b061f059SPaolo Bonzini         error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX");
395*b061f059SPaolo Bonzini         return;
396*b061f059SPaolo Bonzini     }
397*b061f059SPaolo Bonzini 
398*b061f059SPaolo Bonzini     cs = CPU(cpu);
399*b061f059SPaolo Bonzini     cs->cpu_index = idx;
400*b061f059SPaolo Bonzini 
401*b061f059SPaolo Bonzini     numa_cpu_pre_plug(cpu_slot, dev, errp);
402*b061f059SPaolo Bonzini }
403*b061f059SPaolo Bonzini 
404*b061f059SPaolo Bonzini static long get_file_size(FILE *f)
405*b061f059SPaolo Bonzini {
406*b061f059SPaolo Bonzini     long where, size;
407*b061f059SPaolo Bonzini 
408*b061f059SPaolo Bonzini     /* XXX: on Unix systems, using fstat() probably makes more sense */
409*b061f059SPaolo Bonzini 
410*b061f059SPaolo Bonzini     where = ftell(f);
411*b061f059SPaolo Bonzini     fseek(f, 0, SEEK_END);
412*b061f059SPaolo Bonzini     size = ftell(f);
413*b061f059SPaolo Bonzini     fseek(f, where, SEEK_SET);
414*b061f059SPaolo Bonzini 
415*b061f059SPaolo Bonzini     return size;
416*b061f059SPaolo Bonzini }
417*b061f059SPaolo Bonzini 
418*b061f059SPaolo Bonzini void gsi_handler(void *opaque, int n, int level)
419*b061f059SPaolo Bonzini {
420*b061f059SPaolo Bonzini     GSIState *s = opaque;
421*b061f059SPaolo Bonzini 
422*b061f059SPaolo Bonzini     trace_x86_gsi_interrupt(n, level);
423*b061f059SPaolo Bonzini     switch (n) {
424*b061f059SPaolo Bonzini     case 0 ... ISA_NUM_IRQS - 1:
425*b061f059SPaolo Bonzini         if (s->i8259_irq[n]) {
426*b061f059SPaolo Bonzini             /* Under KVM, Kernel will forward to both PIC and IOAPIC */
427*b061f059SPaolo Bonzini             qemu_set_irq(s->i8259_irq[n], level);
428*b061f059SPaolo Bonzini         }
429*b061f059SPaolo Bonzini         /* fall through */
430*b061f059SPaolo Bonzini     case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1:
431*b061f059SPaolo Bonzini #ifdef CONFIG_XEN_EMU
432*b061f059SPaolo Bonzini         /*
433*b061f059SPaolo Bonzini          * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC
434*b061f059SPaolo Bonzini          * routing actually works properly under Xen). And then to
435*b061f059SPaolo Bonzini          * *either* the PIRQ handling or the I/OAPIC depending on
436*b061f059SPaolo Bonzini          * whether the former wants it.
437*b061f059SPaolo Bonzini          */
438*b061f059SPaolo Bonzini         if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) {
439*b061f059SPaolo Bonzini             break;
440*b061f059SPaolo Bonzini         }
441*b061f059SPaolo Bonzini #endif
442*b061f059SPaolo Bonzini         qemu_set_irq(s->ioapic_irq[n], level);
443*b061f059SPaolo Bonzini         break;
444*b061f059SPaolo Bonzini     case IO_APIC_SECONDARY_IRQBASE
445*b061f059SPaolo Bonzini         ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1:
446*b061f059SPaolo Bonzini         qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level);
447*b061f059SPaolo Bonzini         break;
448*b061f059SPaolo Bonzini     }
449*b061f059SPaolo Bonzini }
450*b061f059SPaolo Bonzini 
451*b061f059SPaolo Bonzini void ioapic_init_gsi(GSIState *gsi_state, Object *parent)
452*b061f059SPaolo Bonzini {
453*b061f059SPaolo Bonzini     DeviceState *dev;
454*b061f059SPaolo Bonzini     SysBusDevice *d;
455*b061f059SPaolo Bonzini     unsigned int i;
456*b061f059SPaolo Bonzini 
457*b061f059SPaolo Bonzini     assert(parent);
458*b061f059SPaolo Bonzini     if (kvm_ioapic_in_kernel()) {
459*b061f059SPaolo Bonzini         dev = qdev_new(TYPE_KVM_IOAPIC);
460*b061f059SPaolo Bonzini     } else {
461*b061f059SPaolo Bonzini         dev = qdev_new(TYPE_IOAPIC);
462*b061f059SPaolo Bonzini     }
463*b061f059SPaolo Bonzini     object_property_add_child(parent, "ioapic", OBJECT(dev));
464*b061f059SPaolo Bonzini     d = SYS_BUS_DEVICE(dev);
465*b061f059SPaolo Bonzini     sysbus_realize_and_unref(d, &error_fatal);
466*b061f059SPaolo Bonzini     sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS);
467*b061f059SPaolo Bonzini 
468*b061f059SPaolo Bonzini     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
469*b061f059SPaolo Bonzini         gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i);
470*b061f059SPaolo Bonzini     }
471*b061f059SPaolo Bonzini }
472*b061f059SPaolo Bonzini 
473*b061f059SPaolo Bonzini DeviceState *ioapic_init_secondary(GSIState *gsi_state)
474*b061f059SPaolo Bonzini {
475*b061f059SPaolo Bonzini     DeviceState *dev;
476*b061f059SPaolo Bonzini     SysBusDevice *d;
477*b061f059SPaolo Bonzini     unsigned int i;
478*b061f059SPaolo Bonzini 
479*b061f059SPaolo Bonzini     dev = qdev_new(TYPE_IOAPIC);
480*b061f059SPaolo Bonzini     d = SYS_BUS_DEVICE(dev);
481*b061f059SPaolo Bonzini     sysbus_realize_and_unref(d, &error_fatal);
482*b061f059SPaolo Bonzini     sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS);
483*b061f059SPaolo Bonzini 
484*b061f059SPaolo Bonzini     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
485*b061f059SPaolo Bonzini         gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i);
486*b061f059SPaolo Bonzini     }
487*b061f059SPaolo Bonzini     return dev;
488*b061f059SPaolo Bonzini }
489*b061f059SPaolo Bonzini 
490*b061f059SPaolo Bonzini /*
491*b061f059SPaolo Bonzini  * The entry point into the kernel for PVH boot is different from
492*b061f059SPaolo Bonzini  * the native entry point.  The PVH entry is defined by the x86/HVM
493*b061f059SPaolo Bonzini  * direct boot ABI and is available in an ELFNOTE in the kernel binary.
494*b061f059SPaolo Bonzini  *
495*b061f059SPaolo Bonzini  * This function is passed to load_elf() when it is called from
496*b061f059SPaolo Bonzini  * load_elfboot() which then additionally checks for an ELF Note of
497*b061f059SPaolo Bonzini  * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to
498*b061f059SPaolo Bonzini  * parse the PVH entry address from the ELF Note.
499*b061f059SPaolo Bonzini  *
500*b061f059SPaolo Bonzini  * Due to trickery in elf_opts.h, load_elf() is actually available as
501*b061f059SPaolo Bonzini  * load_elf32() or load_elf64() and this routine needs to be able
502*b061f059SPaolo Bonzini  * to deal with being called as 32 or 64 bit.
503*b061f059SPaolo Bonzini  *
504*b061f059SPaolo Bonzini  * The address of the PVH entry point is saved to the 'pvh_start_addr'
505*b061f059SPaolo Bonzini  * global variable.  (although the entry point is 32-bit, the kernel
506*b061f059SPaolo Bonzini  * binary can be either 32-bit or 64-bit).
507*b061f059SPaolo Bonzini  */
508*b061f059SPaolo Bonzini static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64)
509*b061f059SPaolo Bonzini {
510*b061f059SPaolo Bonzini     size_t *elf_note_data_addr;
511*b061f059SPaolo Bonzini 
512*b061f059SPaolo Bonzini     /* Check if ELF Note header passed in is valid */
513*b061f059SPaolo Bonzini     if (arg1 == NULL) {
514*b061f059SPaolo Bonzini         return 0;
515*b061f059SPaolo Bonzini     }
516*b061f059SPaolo Bonzini 
517*b061f059SPaolo Bonzini     if (is64) {
518*b061f059SPaolo Bonzini         struct elf64_note *nhdr64 = (struct elf64_note *)arg1;
519*b061f059SPaolo Bonzini         uint64_t nhdr_size64 = sizeof(struct elf64_note);
520*b061f059SPaolo Bonzini         uint64_t phdr_align = *(uint64_t *)arg2;
521*b061f059SPaolo Bonzini         uint64_t nhdr_namesz = nhdr64->n_namesz;
522*b061f059SPaolo Bonzini 
523*b061f059SPaolo Bonzini         elf_note_data_addr =
524*b061f059SPaolo Bonzini             ((void *)nhdr64) + nhdr_size64 +
525*b061f059SPaolo Bonzini             QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
526*b061f059SPaolo Bonzini 
527*b061f059SPaolo Bonzini         pvh_start_addr = *elf_note_data_addr;
528*b061f059SPaolo Bonzini     } else {
529*b061f059SPaolo Bonzini         struct elf32_note *nhdr32 = (struct elf32_note *)arg1;
530*b061f059SPaolo Bonzini         uint32_t nhdr_size32 = sizeof(struct elf32_note);
531*b061f059SPaolo Bonzini         uint32_t phdr_align = *(uint32_t *)arg2;
532*b061f059SPaolo Bonzini         uint32_t nhdr_namesz = nhdr32->n_namesz;
533*b061f059SPaolo Bonzini 
534*b061f059SPaolo Bonzini         elf_note_data_addr =
535*b061f059SPaolo Bonzini             ((void *)nhdr32) + nhdr_size32 +
536*b061f059SPaolo Bonzini             QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
537*b061f059SPaolo Bonzini 
538*b061f059SPaolo Bonzini         pvh_start_addr = *(uint32_t *)elf_note_data_addr;
539*b061f059SPaolo Bonzini     }
540*b061f059SPaolo Bonzini 
541*b061f059SPaolo Bonzini     return pvh_start_addr;
542*b061f059SPaolo Bonzini }
543*b061f059SPaolo Bonzini 
544*b061f059SPaolo Bonzini static bool load_elfboot(const char *kernel_filename,
545*b061f059SPaolo Bonzini                          int kernel_file_size,
546*b061f059SPaolo Bonzini                          uint8_t *header,
547*b061f059SPaolo Bonzini                          size_t pvh_xen_start_addr,
548*b061f059SPaolo Bonzini                          FWCfgState *fw_cfg)
549*b061f059SPaolo Bonzini {
550*b061f059SPaolo Bonzini     uint32_t flags = 0;
551*b061f059SPaolo Bonzini     uint32_t mh_load_addr = 0;
552*b061f059SPaolo Bonzini     uint32_t elf_kernel_size = 0;
553*b061f059SPaolo Bonzini     uint64_t elf_entry;
554*b061f059SPaolo Bonzini     uint64_t elf_low, elf_high;
555*b061f059SPaolo Bonzini     int kernel_size;
556*b061f059SPaolo Bonzini 
557*b061f059SPaolo Bonzini     if (ldl_p(header) != 0x464c457f) {
558*b061f059SPaolo Bonzini         return false; /* no elfboot */
559*b061f059SPaolo Bonzini     }
560*b061f059SPaolo Bonzini 
561*b061f059SPaolo Bonzini     bool elf_is64 = header[EI_CLASS] == ELFCLASS64;
562*b061f059SPaolo Bonzini     flags = elf_is64 ?
563*b061f059SPaolo Bonzini         ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags;
564*b061f059SPaolo Bonzini 
565*b061f059SPaolo Bonzini     if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */
566*b061f059SPaolo Bonzini         error_report("elfboot unsupported flags = %x", flags);
567*b061f059SPaolo Bonzini         exit(1);
568*b061f059SPaolo Bonzini     }
569*b061f059SPaolo Bonzini 
570*b061f059SPaolo Bonzini     uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY;
571*b061f059SPaolo Bonzini     kernel_size = load_elf(kernel_filename, read_pvh_start_addr,
572*b061f059SPaolo Bonzini                            NULL, &elf_note_type, &elf_entry,
573*b061f059SPaolo Bonzini                            &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE,
574*b061f059SPaolo Bonzini                            0, 0);
575*b061f059SPaolo Bonzini 
576*b061f059SPaolo Bonzini     if (kernel_size < 0) {
577*b061f059SPaolo Bonzini         error_report("Error while loading elf kernel");
578*b061f059SPaolo Bonzini         exit(1);
579*b061f059SPaolo Bonzini     }
580*b061f059SPaolo Bonzini     mh_load_addr = elf_low;
581*b061f059SPaolo Bonzini     elf_kernel_size = elf_high - elf_low;
582*b061f059SPaolo Bonzini 
583*b061f059SPaolo Bonzini     if (pvh_start_addr == 0) {
584*b061f059SPaolo Bonzini         error_report("Error loading uncompressed kernel without PVH ELF Note");
585*b061f059SPaolo Bonzini         exit(1);
586*b061f059SPaolo Bonzini     }
587*b061f059SPaolo Bonzini     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr);
588*b061f059SPaolo Bonzini     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr);
589*b061f059SPaolo Bonzini     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size);
590*b061f059SPaolo Bonzini 
591*b061f059SPaolo Bonzini     return true;
592*b061f059SPaolo Bonzini }
593*b061f059SPaolo Bonzini 
594*b061f059SPaolo Bonzini void x86_load_linux(X86MachineState *x86ms,
595*b061f059SPaolo Bonzini                     FWCfgState *fw_cfg,
596*b061f059SPaolo Bonzini                     int acpi_data_size,
597*b061f059SPaolo Bonzini                     bool pvh_enabled)
598*b061f059SPaolo Bonzini {
599*b061f059SPaolo Bonzini     bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
600*b061f059SPaolo Bonzini     uint16_t protocol;
601*b061f059SPaolo Bonzini     int setup_size, kernel_size, cmdline_size;
602*b061f059SPaolo Bonzini     int dtb_size, setup_data_offset;
603*b061f059SPaolo Bonzini     uint32_t initrd_max;
604*b061f059SPaolo Bonzini     uint8_t header[8192], *setup, *kernel;
605*b061f059SPaolo Bonzini     hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
606*b061f059SPaolo Bonzini     FILE *f;
607*b061f059SPaolo Bonzini     char *vmode;
608*b061f059SPaolo Bonzini     MachineState *machine = MACHINE(x86ms);
609*b061f059SPaolo Bonzini     struct setup_data *setup_data;
610*b061f059SPaolo Bonzini     const char *kernel_filename = machine->kernel_filename;
611*b061f059SPaolo Bonzini     const char *initrd_filename = machine->initrd_filename;
612*b061f059SPaolo Bonzini     const char *dtb_filename = machine->dtb;
613*b061f059SPaolo Bonzini     const char *kernel_cmdline = machine->kernel_cmdline;
614*b061f059SPaolo Bonzini     SevKernelLoaderContext sev_load_ctx = {};
615*b061f059SPaolo Bonzini 
616*b061f059SPaolo Bonzini     /* Align to 16 bytes as a paranoia measure */
617*b061f059SPaolo Bonzini     cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
618*b061f059SPaolo Bonzini 
619*b061f059SPaolo Bonzini     /* load the kernel header */
620*b061f059SPaolo Bonzini     f = fopen(kernel_filename, "rb");
621*b061f059SPaolo Bonzini     if (!f) {
622*b061f059SPaolo Bonzini         fprintf(stderr, "qemu: could not open kernel file '%s': %s\n",
623*b061f059SPaolo Bonzini                 kernel_filename, strerror(errno));
624*b061f059SPaolo Bonzini         exit(1);
625*b061f059SPaolo Bonzini     }
626*b061f059SPaolo Bonzini 
627*b061f059SPaolo Bonzini     kernel_size = get_file_size(f);
628*b061f059SPaolo Bonzini     if (!kernel_size ||
629*b061f059SPaolo Bonzini         fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) !=
630*b061f059SPaolo Bonzini         MIN(ARRAY_SIZE(header), kernel_size)) {
631*b061f059SPaolo Bonzini         fprintf(stderr, "qemu: could not load kernel '%s': %s\n",
632*b061f059SPaolo Bonzini                 kernel_filename, strerror(errno));
633*b061f059SPaolo Bonzini         exit(1);
634*b061f059SPaolo Bonzini     }
635*b061f059SPaolo Bonzini 
636*b061f059SPaolo Bonzini     /* kernel protocol version */
637*b061f059SPaolo Bonzini     if (ldl_p(header + 0x202) == 0x53726448) {
638*b061f059SPaolo Bonzini         protocol = lduw_p(header + 0x206);
639*b061f059SPaolo Bonzini     } else {
640*b061f059SPaolo Bonzini         /*
641*b061f059SPaolo Bonzini          * This could be a multiboot kernel. If it is, let's stop treating it
642*b061f059SPaolo Bonzini          * like a Linux kernel.
643*b061f059SPaolo Bonzini          * Note: some multiboot images could be in the ELF format (the same of
644*b061f059SPaolo Bonzini          * PVH), so we try multiboot first since we check the multiboot magic
645*b061f059SPaolo Bonzini          * header before to load it.
646*b061f059SPaolo Bonzini          */
647*b061f059SPaolo Bonzini         if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename,
648*b061f059SPaolo Bonzini                            kernel_cmdline, kernel_size, header)) {
649*b061f059SPaolo Bonzini             return;
650*b061f059SPaolo Bonzini         }
651*b061f059SPaolo Bonzini         /*
652*b061f059SPaolo Bonzini          * Check if the file is an uncompressed kernel file (ELF) and load it,
653*b061f059SPaolo Bonzini          * saving the PVH entry point used by the x86/HVM direct boot ABI.
654*b061f059SPaolo Bonzini          * If load_elfboot() is successful, populate the fw_cfg info.
655*b061f059SPaolo Bonzini          */
656*b061f059SPaolo Bonzini         if (pvh_enabled &&
657*b061f059SPaolo Bonzini             load_elfboot(kernel_filename, kernel_size,
658*b061f059SPaolo Bonzini                          header, pvh_start_addr, fw_cfg)) {
659*b061f059SPaolo Bonzini             fclose(f);
660*b061f059SPaolo Bonzini 
661*b061f059SPaolo Bonzini             fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
662*b061f059SPaolo Bonzini                 strlen(kernel_cmdline) + 1);
663*b061f059SPaolo Bonzini             fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
664*b061f059SPaolo Bonzini 
665*b061f059SPaolo Bonzini             fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header));
666*b061f059SPaolo Bonzini             fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA,
667*b061f059SPaolo Bonzini                              header, sizeof(header));
668*b061f059SPaolo Bonzini 
669*b061f059SPaolo Bonzini             /* load initrd */
670*b061f059SPaolo Bonzini             if (initrd_filename) {
671*b061f059SPaolo Bonzini                 GMappedFile *mapped_file;
672*b061f059SPaolo Bonzini                 gsize initrd_size;
673*b061f059SPaolo Bonzini                 gchar *initrd_data;
674*b061f059SPaolo Bonzini                 GError *gerr = NULL;
675*b061f059SPaolo Bonzini 
676*b061f059SPaolo Bonzini                 mapped_file = g_mapped_file_new(initrd_filename, false, &gerr);
677*b061f059SPaolo Bonzini                 if (!mapped_file) {
678*b061f059SPaolo Bonzini                     fprintf(stderr, "qemu: error reading initrd %s: %s\n",
679*b061f059SPaolo Bonzini                             initrd_filename, gerr->message);
680*b061f059SPaolo Bonzini                     exit(1);
681*b061f059SPaolo Bonzini                 }
682*b061f059SPaolo Bonzini                 x86ms->initrd_mapped_file = mapped_file;
683*b061f059SPaolo Bonzini 
684*b061f059SPaolo Bonzini                 initrd_data = g_mapped_file_get_contents(mapped_file);
685*b061f059SPaolo Bonzini                 initrd_size = g_mapped_file_get_length(mapped_file);
686*b061f059SPaolo Bonzini                 initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1;
687*b061f059SPaolo Bonzini                 if (initrd_size >= initrd_max) {
688*b061f059SPaolo Bonzini                     fprintf(stderr, "qemu: initrd is too large, cannot support."
689*b061f059SPaolo Bonzini                             "(max: %"PRIu32", need %"PRId64")\n",
690*b061f059SPaolo Bonzini                             initrd_max, (uint64_t)initrd_size);
691*b061f059SPaolo Bonzini                     exit(1);
692*b061f059SPaolo Bonzini                 }
693*b061f059SPaolo Bonzini 
694*b061f059SPaolo Bonzini                 initrd_addr = (initrd_max - initrd_size) & ~4095;
695*b061f059SPaolo Bonzini 
696*b061f059SPaolo Bonzini                 fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
697*b061f059SPaolo Bonzini                 fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
698*b061f059SPaolo Bonzini                 fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data,
699*b061f059SPaolo Bonzini                                  initrd_size);
700*b061f059SPaolo Bonzini             }
701*b061f059SPaolo Bonzini 
702*b061f059SPaolo Bonzini             option_rom[nb_option_roms].bootindex = 0;
703*b061f059SPaolo Bonzini             option_rom[nb_option_roms].name = "pvh.bin";
704*b061f059SPaolo Bonzini             nb_option_roms++;
705*b061f059SPaolo Bonzini 
706*b061f059SPaolo Bonzini             return;
707*b061f059SPaolo Bonzini         }
708*b061f059SPaolo Bonzini         protocol = 0;
709*b061f059SPaolo Bonzini     }
710*b061f059SPaolo Bonzini 
711*b061f059SPaolo Bonzini     if (protocol < 0x200 || !(header[0x211] & 0x01)) {
712*b061f059SPaolo Bonzini         /* Low kernel */
713*b061f059SPaolo Bonzini         real_addr    = 0x90000;
714*b061f059SPaolo Bonzini         cmdline_addr = 0x9a000 - cmdline_size;
715*b061f059SPaolo Bonzini         prot_addr    = 0x10000;
716*b061f059SPaolo Bonzini     } else if (protocol < 0x202) {
717*b061f059SPaolo Bonzini         /* High but ancient kernel */
718*b061f059SPaolo Bonzini         real_addr    = 0x90000;
719*b061f059SPaolo Bonzini         cmdline_addr = 0x9a000 - cmdline_size;
720*b061f059SPaolo Bonzini         prot_addr    = 0x100000;
721*b061f059SPaolo Bonzini     } else {
722*b061f059SPaolo Bonzini         /* High and recent kernel */
723*b061f059SPaolo Bonzini         real_addr    = 0x10000;
724*b061f059SPaolo Bonzini         cmdline_addr = 0x20000;
725*b061f059SPaolo Bonzini         prot_addr    = 0x100000;
726*b061f059SPaolo Bonzini     }
727*b061f059SPaolo Bonzini 
728*b061f059SPaolo Bonzini     /* highest address for loading the initrd */
729*b061f059SPaolo Bonzini     if (protocol >= 0x20c &&
730*b061f059SPaolo Bonzini         lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) {
731*b061f059SPaolo Bonzini         /*
732*b061f059SPaolo Bonzini          * Linux has supported initrd up to 4 GB for a very long time (2007,
733*b061f059SPaolo Bonzini          * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013),
734*b061f059SPaolo Bonzini          * though it only sets initrd_max to 2 GB to "work around bootloader
735*b061f059SPaolo Bonzini          * bugs". Luckily, QEMU firmware(which does something like bootloader)
736*b061f059SPaolo Bonzini          * has supported this.
737*b061f059SPaolo Bonzini          *
738*b061f059SPaolo Bonzini          * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can
739*b061f059SPaolo Bonzini          * be loaded into any address.
740*b061f059SPaolo Bonzini          *
741*b061f059SPaolo Bonzini          * In addition, initrd_max is uint32_t simply because QEMU doesn't
742*b061f059SPaolo Bonzini          * support the 64-bit boot protocol (specifically the ext_ramdisk_image
743*b061f059SPaolo Bonzini          * field).
744*b061f059SPaolo Bonzini          *
745*b061f059SPaolo Bonzini          * Therefore here just limit initrd_max to UINT32_MAX simply as well.
746*b061f059SPaolo Bonzini          */
747*b061f059SPaolo Bonzini         initrd_max = UINT32_MAX;
748*b061f059SPaolo Bonzini     } else if (protocol >= 0x203) {
749*b061f059SPaolo Bonzini         initrd_max = ldl_p(header + 0x22c);
750*b061f059SPaolo Bonzini     } else {
751*b061f059SPaolo Bonzini         initrd_max = 0x37ffffff;
752*b061f059SPaolo Bonzini     }
753*b061f059SPaolo Bonzini 
754*b061f059SPaolo Bonzini     if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) {
755*b061f059SPaolo Bonzini         initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1;
756*b061f059SPaolo Bonzini     }
757*b061f059SPaolo Bonzini 
758*b061f059SPaolo Bonzini     fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
759*b061f059SPaolo Bonzini     fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1);
760*b061f059SPaolo Bonzini     fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
761*b061f059SPaolo Bonzini     sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
762*b061f059SPaolo Bonzini     sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1;
763*b061f059SPaolo Bonzini 
764*b061f059SPaolo Bonzini     if (protocol >= 0x202) {
765*b061f059SPaolo Bonzini         stl_p(header + 0x228, cmdline_addr);
766*b061f059SPaolo Bonzini     } else {
767*b061f059SPaolo Bonzini         stw_p(header + 0x20, 0xA33F);
768*b061f059SPaolo Bonzini         stw_p(header + 0x22, cmdline_addr - real_addr);
769*b061f059SPaolo Bonzini     }
770*b061f059SPaolo Bonzini 
771*b061f059SPaolo Bonzini     /* handle vga= parameter */
772*b061f059SPaolo Bonzini     vmode = strstr(kernel_cmdline, "vga=");
773*b061f059SPaolo Bonzini     if (vmode) {
774*b061f059SPaolo Bonzini         unsigned int video_mode;
775*b061f059SPaolo Bonzini         const char *end;
776*b061f059SPaolo Bonzini         int ret;
777*b061f059SPaolo Bonzini         /* skip "vga=" */
778*b061f059SPaolo Bonzini         vmode += 4;
779*b061f059SPaolo Bonzini         if (!strncmp(vmode, "normal", 6)) {
780*b061f059SPaolo Bonzini             video_mode = 0xffff;
781*b061f059SPaolo Bonzini         } else if (!strncmp(vmode, "ext", 3)) {
782*b061f059SPaolo Bonzini             video_mode = 0xfffe;
783*b061f059SPaolo Bonzini         } else if (!strncmp(vmode, "ask", 3)) {
784*b061f059SPaolo Bonzini             video_mode = 0xfffd;
785*b061f059SPaolo Bonzini         } else {
786*b061f059SPaolo Bonzini             ret = qemu_strtoui(vmode, &end, 0, &video_mode);
787*b061f059SPaolo Bonzini             if (ret != 0 || (*end && *end != ' ')) {
788*b061f059SPaolo Bonzini                 fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n");
789*b061f059SPaolo Bonzini                 exit(1);
790*b061f059SPaolo Bonzini             }
791*b061f059SPaolo Bonzini         }
792*b061f059SPaolo Bonzini         stw_p(header + 0x1fa, video_mode);
793*b061f059SPaolo Bonzini     }
794*b061f059SPaolo Bonzini 
795*b061f059SPaolo Bonzini     /* loader type */
796*b061f059SPaolo Bonzini     /*
797*b061f059SPaolo Bonzini      * High nybble = B reserved for QEMU; low nybble is revision number.
798*b061f059SPaolo Bonzini      * If this code is substantially changed, you may want to consider
799*b061f059SPaolo Bonzini      * incrementing the revision.
800*b061f059SPaolo Bonzini      */
801*b061f059SPaolo Bonzini     if (protocol >= 0x200) {
802*b061f059SPaolo Bonzini         header[0x210] = 0xB0;
803*b061f059SPaolo Bonzini     }
804*b061f059SPaolo Bonzini     /* heap */
805*b061f059SPaolo Bonzini     if (protocol >= 0x201) {
806*b061f059SPaolo Bonzini         header[0x211] |= 0x80; /* CAN_USE_HEAP */
807*b061f059SPaolo Bonzini         stw_p(header + 0x224, cmdline_addr - real_addr - 0x200);
808*b061f059SPaolo Bonzini     }
809*b061f059SPaolo Bonzini 
810*b061f059SPaolo Bonzini     /* load initrd */
811*b061f059SPaolo Bonzini     if (initrd_filename) {
812*b061f059SPaolo Bonzini         GMappedFile *mapped_file;
813*b061f059SPaolo Bonzini         gsize initrd_size;
814*b061f059SPaolo Bonzini         gchar *initrd_data;
815*b061f059SPaolo Bonzini         GError *gerr = NULL;
816*b061f059SPaolo Bonzini 
817*b061f059SPaolo Bonzini         if (protocol < 0x200) {
818*b061f059SPaolo Bonzini             fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n");
819*b061f059SPaolo Bonzini             exit(1);
820*b061f059SPaolo Bonzini         }
821*b061f059SPaolo Bonzini 
822*b061f059SPaolo Bonzini         mapped_file = g_mapped_file_new(initrd_filename, false, &gerr);
823*b061f059SPaolo Bonzini         if (!mapped_file) {
824*b061f059SPaolo Bonzini             fprintf(stderr, "qemu: error reading initrd %s: %s\n",
825*b061f059SPaolo Bonzini                     initrd_filename, gerr->message);
826*b061f059SPaolo Bonzini             exit(1);
827*b061f059SPaolo Bonzini         }
828*b061f059SPaolo Bonzini         x86ms->initrd_mapped_file = mapped_file;
829*b061f059SPaolo Bonzini 
830*b061f059SPaolo Bonzini         initrd_data = g_mapped_file_get_contents(mapped_file);
831*b061f059SPaolo Bonzini         initrd_size = g_mapped_file_get_length(mapped_file);
832*b061f059SPaolo Bonzini         if (initrd_size >= initrd_max) {
833*b061f059SPaolo Bonzini             fprintf(stderr, "qemu: initrd is too large, cannot support."
834*b061f059SPaolo Bonzini                     "(max: %"PRIu32", need %"PRId64")\n",
835*b061f059SPaolo Bonzini                     initrd_max, (uint64_t)initrd_size);
836*b061f059SPaolo Bonzini             exit(1);
837*b061f059SPaolo Bonzini         }
838*b061f059SPaolo Bonzini 
839*b061f059SPaolo Bonzini         initrd_addr = (initrd_max - initrd_size) & ~4095;
840*b061f059SPaolo Bonzini 
841*b061f059SPaolo Bonzini         fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
842*b061f059SPaolo Bonzini         fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
843*b061f059SPaolo Bonzini         fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size);
844*b061f059SPaolo Bonzini         sev_load_ctx.initrd_data = initrd_data;
845*b061f059SPaolo Bonzini         sev_load_ctx.initrd_size = initrd_size;
846*b061f059SPaolo Bonzini 
847*b061f059SPaolo Bonzini         stl_p(header + 0x218, initrd_addr);
848*b061f059SPaolo Bonzini         stl_p(header + 0x21c, initrd_size);
849*b061f059SPaolo Bonzini     }
850*b061f059SPaolo Bonzini 
851*b061f059SPaolo Bonzini     /* load kernel and setup */
852*b061f059SPaolo Bonzini     setup_size = header[0x1f1];
853*b061f059SPaolo Bonzini     if (setup_size == 0) {
854*b061f059SPaolo Bonzini         setup_size = 4;
855*b061f059SPaolo Bonzini     }
856*b061f059SPaolo Bonzini     setup_size = (setup_size + 1) * 512;
857*b061f059SPaolo Bonzini     if (setup_size > kernel_size) {
858*b061f059SPaolo Bonzini         fprintf(stderr, "qemu: invalid kernel header\n");
859*b061f059SPaolo Bonzini         exit(1);
860*b061f059SPaolo Bonzini     }
861*b061f059SPaolo Bonzini     kernel_size -= setup_size;
862*b061f059SPaolo Bonzini 
863*b061f059SPaolo Bonzini     setup  = g_malloc(setup_size);
864*b061f059SPaolo Bonzini     kernel = g_malloc(kernel_size);
865*b061f059SPaolo Bonzini     fseek(f, 0, SEEK_SET);
866*b061f059SPaolo Bonzini     if (fread(setup, 1, setup_size, f) != setup_size) {
867*b061f059SPaolo Bonzini         fprintf(stderr, "fread() failed\n");
868*b061f059SPaolo Bonzini         exit(1);
869*b061f059SPaolo Bonzini     }
870*b061f059SPaolo Bonzini     if (fread(kernel, 1, kernel_size, f) != kernel_size) {
871*b061f059SPaolo Bonzini         fprintf(stderr, "fread() failed\n");
872*b061f059SPaolo Bonzini         exit(1);
873*b061f059SPaolo Bonzini     }
874*b061f059SPaolo Bonzini     fclose(f);
875*b061f059SPaolo Bonzini 
876*b061f059SPaolo Bonzini     /* append dtb to kernel */
877*b061f059SPaolo Bonzini     if (dtb_filename) {
878*b061f059SPaolo Bonzini         if (protocol < 0x209) {
879*b061f059SPaolo Bonzini             fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n");
880*b061f059SPaolo Bonzini             exit(1);
881*b061f059SPaolo Bonzini         }
882*b061f059SPaolo Bonzini 
883*b061f059SPaolo Bonzini         dtb_size = get_image_size(dtb_filename);
884*b061f059SPaolo Bonzini         if (dtb_size <= 0) {
885*b061f059SPaolo Bonzini             fprintf(stderr, "qemu: error reading dtb %s: %s\n",
886*b061f059SPaolo Bonzini                     dtb_filename, strerror(errno));
887*b061f059SPaolo Bonzini             exit(1);
888*b061f059SPaolo Bonzini         }
889*b061f059SPaolo Bonzini 
890*b061f059SPaolo Bonzini         setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
891*b061f059SPaolo Bonzini         kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size;
892*b061f059SPaolo Bonzini         kernel = g_realloc(kernel, kernel_size);
893*b061f059SPaolo Bonzini 
894*b061f059SPaolo Bonzini         stq_p(header + 0x250, prot_addr + setup_data_offset);
895*b061f059SPaolo Bonzini 
896*b061f059SPaolo Bonzini         setup_data = (struct setup_data *)(kernel + setup_data_offset);
897*b061f059SPaolo Bonzini         setup_data->next = 0;
898*b061f059SPaolo Bonzini         setup_data->type = cpu_to_le32(SETUP_DTB);
899*b061f059SPaolo Bonzini         setup_data->len = cpu_to_le32(dtb_size);
900*b061f059SPaolo Bonzini 
901*b061f059SPaolo Bonzini         load_image_size(dtb_filename, setup_data->data, dtb_size);
902*b061f059SPaolo Bonzini     }
903*b061f059SPaolo Bonzini 
904*b061f059SPaolo Bonzini     /*
905*b061f059SPaolo Bonzini      * If we're starting an encrypted VM, it will be OVMF based, which uses the
906*b061f059SPaolo Bonzini      * efi stub for booting and doesn't require any values to be placed in the
907*b061f059SPaolo Bonzini      * kernel header.  We therefore don't update the header so the hash of the
908*b061f059SPaolo Bonzini      * kernel on the other side of the fw_cfg interface matches the hash of the
909*b061f059SPaolo Bonzini      * file the user passed in.
910*b061f059SPaolo Bonzini      */
911*b061f059SPaolo Bonzini     if (!sev_enabled()) {
912*b061f059SPaolo Bonzini         memcpy(setup, header, MIN(sizeof(header), setup_size));
913*b061f059SPaolo Bonzini     }
914*b061f059SPaolo Bonzini 
915*b061f059SPaolo Bonzini     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
916*b061f059SPaolo Bonzini     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
917*b061f059SPaolo Bonzini     fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
918*b061f059SPaolo Bonzini     sev_load_ctx.kernel_data = (char *)kernel;
919*b061f059SPaolo Bonzini     sev_load_ctx.kernel_size = kernel_size;
920*b061f059SPaolo Bonzini 
921*b061f059SPaolo Bonzini     fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
922*b061f059SPaolo Bonzini     fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
923*b061f059SPaolo Bonzini     fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
924*b061f059SPaolo Bonzini     sev_load_ctx.setup_data = (char *)setup;
925*b061f059SPaolo Bonzini     sev_load_ctx.setup_size = setup_size;
926*b061f059SPaolo Bonzini 
927*b061f059SPaolo Bonzini     if (sev_enabled()) {
928*b061f059SPaolo Bonzini         sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal);
929*b061f059SPaolo Bonzini     }
930*b061f059SPaolo Bonzini 
931*b061f059SPaolo Bonzini     option_rom[nb_option_roms].bootindex = 0;
932*b061f059SPaolo Bonzini     option_rom[nb_option_roms].name = "linuxboot.bin";
933*b061f059SPaolo Bonzini     if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) {
934*b061f059SPaolo Bonzini         option_rom[nb_option_roms].name = "linuxboot_dma.bin";
935*b061f059SPaolo Bonzini     }
936*b061f059SPaolo Bonzini     nb_option_roms++;
937*b061f059SPaolo Bonzini }
938*b061f059SPaolo Bonzini 
939*b061f059SPaolo Bonzini void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory,
940*b061f059SPaolo Bonzini                        MemoryRegion *bios, bool read_only)
941*b061f059SPaolo Bonzini {
942*b061f059SPaolo Bonzini     uint64_t bios_size = memory_region_size(bios);
943*b061f059SPaolo Bonzini     uint64_t isa_bios_size = MIN(bios_size, 128 * KiB);
944*b061f059SPaolo Bonzini 
945*b061f059SPaolo Bonzini     memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
946*b061f059SPaolo Bonzini                              bios_size - isa_bios_size, isa_bios_size);
947*b061f059SPaolo Bonzini     memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size,
948*b061f059SPaolo Bonzini                                         isa_bios, 1);
949*b061f059SPaolo Bonzini     memory_region_set_readonly(isa_bios, read_only);
950*b061f059SPaolo Bonzini }
951*b061f059SPaolo Bonzini 
952*b061f059SPaolo Bonzini void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
953*b061f059SPaolo Bonzini                        MemoryRegion *rom_memory, bool isapc_ram_fw)
954*b061f059SPaolo Bonzini {
955*b061f059SPaolo Bonzini     const char *bios_name;
956*b061f059SPaolo Bonzini     char *filename;
957*b061f059SPaolo Bonzini     int bios_size;
958*b061f059SPaolo Bonzini     ssize_t ret;
959*b061f059SPaolo Bonzini 
960*b061f059SPaolo Bonzini     /* BIOS load */
961*b061f059SPaolo Bonzini     bios_name = MACHINE(x86ms)->firmware ?: default_firmware;
962*b061f059SPaolo Bonzini     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
963*b061f059SPaolo Bonzini     if (filename) {
964*b061f059SPaolo Bonzini         bios_size = get_image_size(filename);
965*b061f059SPaolo Bonzini     } else {
966*b061f059SPaolo Bonzini         bios_size = -1;
967*b061f059SPaolo Bonzini     }
968*b061f059SPaolo Bonzini     if (bios_size <= 0 ||
969*b061f059SPaolo Bonzini         (bios_size % 65536) != 0) {
970*b061f059SPaolo Bonzini         goto bios_error;
971*b061f059SPaolo Bonzini     }
972*b061f059SPaolo Bonzini     memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size,
973*b061f059SPaolo Bonzini                            &error_fatal);
974*b061f059SPaolo Bonzini     if (sev_enabled()) {
975*b061f059SPaolo Bonzini         /*
976*b061f059SPaolo Bonzini          * The concept of a "reset" simply doesn't exist for
977*b061f059SPaolo Bonzini          * confidential computing guests, we have to destroy and
978*b061f059SPaolo Bonzini          * re-launch them instead.  So there is no need to register
979*b061f059SPaolo Bonzini          * the firmware as rom to properly re-initialize on reset.
980*b061f059SPaolo Bonzini          * Just go for a straight file load instead.
981*b061f059SPaolo Bonzini          */
982*b061f059SPaolo Bonzini         void *ptr = memory_region_get_ram_ptr(&x86ms->bios);
983*b061f059SPaolo Bonzini         load_image_size(filename, ptr, bios_size);
984*b061f059SPaolo Bonzini         x86_firmware_configure(ptr, bios_size);
985*b061f059SPaolo Bonzini     } else {
986*b061f059SPaolo Bonzini         memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw);
987*b061f059SPaolo Bonzini         ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
988*b061f059SPaolo Bonzini         if (ret != 0) {
989*b061f059SPaolo Bonzini             goto bios_error;
990*b061f059SPaolo Bonzini         }
991*b061f059SPaolo Bonzini     }
992*b061f059SPaolo Bonzini     g_free(filename);
993*b061f059SPaolo Bonzini 
994*b061f059SPaolo Bonzini     /* map the last 128KB of the BIOS in ISA space */
995*b061f059SPaolo Bonzini     x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
996*b061f059SPaolo Bonzini                       !isapc_ram_fw);
997*b061f059SPaolo Bonzini 
998*b061f059SPaolo Bonzini     /* map all the bios at the top of memory */
999*b061f059SPaolo Bonzini     memory_region_add_subregion(rom_memory,
1000*b061f059SPaolo Bonzini                                 (uint32_t)(-bios_size),
1001*b061f059SPaolo Bonzini                                 &x86ms->bios);
1002*b061f059SPaolo Bonzini     return;
1003*b061f059SPaolo Bonzini 
1004*b061f059SPaolo Bonzini bios_error:
1005*b061f059SPaolo Bonzini     fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name);
1006*b061f059SPaolo Bonzini     exit(1);
1007*b061f059SPaolo Bonzini }
1008