1 /* Copyright 2008 IBM Corporation
2 * 2008 Red Hat, Inc.
3 * Copyright 2011 Intel Corporation
4 * Copyright 2016 Veertu, Inc.
5 * Copyright 2017 The Android Open Source Project
6 *
7 * QEMU Hypervisor.framework support
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of version 2 of the GNU General Public
11 * License as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, see <http://www.gnu.org/licenses/>.
20 *
21 * This file contain code under public domain from the hvdos project:
22 * https://github.com/mist64/hvdos
23 *
24 * Parts Copyright (c) 2011 NetApp, Inc.
25 * All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 *
36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 */
48
49 #include "qemu/osdep.h"
50 #include "qemu/error-report.h"
51 #include "qemu/memalign.h"
52 #include "qapi/error.h"
53 #include "migration/blocker.h"
54
55 #include "system/hvf.h"
56 #include "system/hvf_int.h"
57 #include "system/runstate.h"
58 #include "system/cpus.h"
59 #include "hvf-i386.h"
60 #include "vmcs.h"
61 #include "vmx.h"
62 #include "emulate/x86.h"
63 #include "x86_descr.h"
64 #include "emulate/x86_flags.h"
65 #include "x86_mmu.h"
66 #include "emulate/x86_decode.h"
67 #include "emulate/x86_emu.h"
68 #include "x86_task.h"
69 #include "x86hvf.h"
70
71 #include <Hypervisor/hv.h>
72 #include <Hypervisor/hv_vmx.h>
73 #include <sys/sysctl.h>
74
75 #include "hw/i386/apic_internal.h"
76 #include "qemu/main-loop.h"
77 #include "qemu/accel.h"
78 #include "target/i386/cpu.h"
79 #include "exec/target_page.h"
80
81 static Error *invtsc_mig_blocker;
82
vmx_update_tpr(CPUState * cpu)83 void vmx_update_tpr(CPUState *cpu)
84 {
85 /* TODO: need integrate APIC handling */
86 X86CPU *x86_cpu = X86_CPU(cpu);
87 int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
88 int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
89
90 wreg(cpu->accel->fd, HV_X86_TPR, tpr);
91 if (irr == -1) {
92 wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0);
93 } else {
94 wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
95 irr >> 4);
96 }
97 }
98
update_apic_tpr(CPUState * cpu)99 static void update_apic_tpr(CPUState *cpu)
100 {
101 X86CPU *x86_cpu = X86_CPU(cpu);
102 int tpr = rreg(cpu->accel->fd, HV_X86_TPR) >> 4;
103 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
104 }
105
106 #define VECTORING_INFO_VECTOR_MASK 0xff
107
hvf_handle_io(CPUState * env,uint16_t port,void * buffer,int direction,int size,int count)108 void hvf_handle_io(CPUState *env, uint16_t port, void *buffer,
109 int direction, int size, int count)
110 {
111 int i;
112 uint8_t *ptr = buffer;
113
114 for (i = 0; i < count; i++) {
115 address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
116 ptr, size,
117 direction);
118 ptr += size;
119 }
120 }
121
ept_emulation_fault(hvf_slot * slot,uint64_t gpa,uint64_t ept_qual)122 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
123 {
124 int read, write;
125
126 /* EPT fault on an instruction fetch doesn't make sense here */
127 if (ept_qual & EPT_VIOLATION_INST_FETCH) {
128 return false;
129 }
130
131 /* EPT fault must be a read fault or a write fault */
132 read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
133 write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
134 if ((read | write) == 0) {
135 return false;
136 }
137
138 if (write && slot) {
139 if (slot->flags & HVF_SLOT_LOG) {
140 uint64_t dirty_page_start = gpa & ~(TARGET_PAGE_SIZE - 1u);
141 memory_region_set_dirty(slot->region, gpa - slot->start, 1);
142 hv_vm_protect(dirty_page_start, TARGET_PAGE_SIZE,
143 HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
144 }
145 }
146
147 /*
148 * The EPT violation must have been caused by accessing a
149 * guest-physical address that is a translation of a guest-linear
150 * address.
151 */
152 if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
153 (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
154 return false;
155 }
156
157 if (!slot) {
158 return true;
159 }
160 if (!memory_region_is_ram(slot->region) &&
161 !(read && memory_region_is_romd(slot->region))) {
162 return true;
163 }
164 return false;
165 }
166
hvf_arch_vcpu_destroy(CPUState * cpu)167 void hvf_arch_vcpu_destroy(CPUState *cpu)
168 {
169 X86CPU *x86_cpu = X86_CPU(cpu);
170 CPUX86State *env = &x86_cpu->env;
171
172 g_free(env->emu_mmio_buf);
173 }
174
init_tsc_freq(CPUX86State * env)175 static void init_tsc_freq(CPUX86State *env)
176 {
177 size_t length;
178 uint64_t tsc_freq;
179
180 if (env->tsc_khz != 0) {
181 return;
182 }
183
184 length = sizeof(uint64_t);
185 if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) {
186 return;
187 }
188 env->tsc_khz = tsc_freq / 1000; /* Hz to KHz */
189 }
190
init_apic_bus_freq(CPUX86State * env)191 static void init_apic_bus_freq(CPUX86State *env)
192 {
193 size_t length;
194 uint64_t bus_freq;
195
196 if (env->apic_bus_freq != 0) {
197 return;
198 }
199
200 length = sizeof(uint64_t);
201 if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) {
202 return;
203 }
204 env->apic_bus_freq = bus_freq;
205 }
206
tsc_is_known(CPUX86State * env)207 static inline bool tsc_is_known(CPUX86State *env)
208 {
209 return env->tsc_khz != 0;
210 }
211
apic_bus_freq_is_known(CPUX86State * env)212 static inline bool apic_bus_freq_is_known(CPUX86State *env)
213 {
214 return env->apic_bus_freq != 0;
215 }
216
hvf_kick_vcpu_thread(CPUState * cpu)217 void hvf_kick_vcpu_thread(CPUState *cpu)
218 {
219 cpus_kick_thread(cpu);
220 hv_vcpu_interrupt(&cpu->accel->fd, 1);
221 }
222
hvf_arch_init(void)223 int hvf_arch_init(void)
224 {
225 return 0;
226 }
227
hvf_arch_vm_create(MachineState * ms,uint32_t pa_range)228 hv_return_t hvf_arch_vm_create(MachineState *ms, uint32_t pa_range)
229 {
230 return hv_vm_create(HV_VM_DEFAULT);
231 }
232
hvf_read_segment_descriptor(CPUState * s,struct x86_segment_descriptor * desc,X86Seg seg)233 static void hvf_read_segment_descriptor(CPUState *s, struct x86_segment_descriptor *desc,
234 X86Seg seg)
235 {
236 struct vmx_segment vmx_segment;
237 vmx_read_segment_descriptor(s, &vmx_segment, seg);
238 vmx_segment_to_x86_descriptor(s, &vmx_segment, desc);
239 }
240
hvf_read_mem(CPUState * cpu,void * data,target_ulong gva,int bytes)241 static void hvf_read_mem(CPUState *cpu, void *data, target_ulong gva, int bytes)
242 {
243 vmx_read_mem(cpu, data, gva, bytes);
244 }
245
hvf_write_mem(CPUState * cpu,void * data,target_ulong gva,int bytes)246 static void hvf_write_mem(CPUState *cpu, void *data, target_ulong gva, int bytes)
247 {
248 vmx_write_mem(cpu, gva, data, bytes);
249 }
250
251 static const struct x86_emul_ops hvf_x86_emul_ops = {
252 .read_mem = hvf_read_mem,
253 .write_mem = hvf_write_mem,
254 .read_segment_descriptor = hvf_read_segment_descriptor,
255 .handle_io = hvf_handle_io,
256 .simulate_rdmsr = hvf_simulate_rdmsr,
257 .simulate_wrmsr = hvf_simulate_wrmsr,
258 };
259
hvf_arch_init_vcpu(CPUState * cpu)260 int hvf_arch_init_vcpu(CPUState *cpu)
261 {
262 X86CPU *x86cpu = X86_CPU(cpu);
263 CPUX86State *env = &x86cpu->env;
264 Error *local_err = NULL;
265 int r;
266 uint64_t reqCap;
267
268 init_emu(&hvf_x86_emul_ops);
269 init_decoder();
270
271 if (hvf_state->hvf_caps == NULL) {
272 hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
273 }
274 env->emu_mmio_buf = g_new(char, 4096);
275
276 if (x86cpu->vmware_cpuid_freq) {
277 init_tsc_freq(env);
278 init_apic_bus_freq(env);
279
280 if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
281 error_report("vmware-cpuid-freq: feature couldn't be enabled");
282 }
283 }
284
285 if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) &&
286 invtsc_mig_blocker == NULL) {
287 error_setg(&invtsc_mig_blocker,
288 "State blocked by non-migratable CPU device (invtsc flag)");
289 r = migrate_add_blocker(&invtsc_mig_blocker, &local_err);
290 if (r < 0) {
291 error_report_err(local_err);
292 return r;
293 }
294 }
295
296
297 if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
298 &hvf_state->hvf_caps->vmx_cap_pinbased)) {
299 abort();
300 }
301 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
302 &hvf_state->hvf_caps->vmx_cap_procbased)) {
303 abort();
304 }
305 if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
306 &hvf_state->hvf_caps->vmx_cap_procbased2)) {
307 abort();
308 }
309 if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
310 &hvf_state->hvf_caps->vmx_cap_entry)) {
311 abort();
312 }
313
314 /* set VMCS control fields */
315 wvmcs(cpu->accel->fd, VMCS_PIN_BASED_CTLS,
316 cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
317 VMCS_PIN_BASED_CTLS_EXTINT |
318 VMCS_PIN_BASED_CTLS_NMI |
319 VMCS_PIN_BASED_CTLS_VNMI));
320 wvmcs(cpu->accel->fd, VMCS_PRI_PROC_BASED_CTLS,
321 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
322 VMCS_PRI_PROC_BASED_CTLS_HLT |
323 VMCS_PRI_PROC_BASED_CTLS_MWAIT |
324 VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
325 VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
326 VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
327
328 reqCap = VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES;
329
330 /* Is RDTSCP support in CPUID? If so, enable it in the VMCS. */
331 if (hvf_get_supported_cpuid(0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) {
332 reqCap |= VMCS_PRI_PROC_BASED2_CTLS_RDTSCP;
333 }
334
335 wvmcs(cpu->accel->fd, VMCS_SEC_PROC_BASED_CTLS,
336 cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, reqCap));
337
338 wvmcs(cpu->accel->fd, VMCS_ENTRY_CTLS,
339 cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 0));
340 wvmcs(cpu->accel->fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
341
342 wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0);
343
344 x86cpu = X86_CPU(cpu);
345 x86cpu->env.xsave_buf_len = 4096;
346 x86cpu->env.xsave_buf = qemu_memalign(4096, x86cpu->env.xsave_buf_len);
347
348 /*
349 * The allocated storage must be large enough for all of the
350 * possible XSAVE state components.
351 */
352 assert(hvf_get_supported_cpuid(0xd, 0, R_ECX) <= x86cpu->env.xsave_buf_len);
353
354 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_STAR, 1);
355 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_LSTAR, 1);
356 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_CSTAR, 1);
357 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FMASK, 1);
358 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FSBASE, 1);
359 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_GSBASE, 1);
360 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_KERNELGSBASE, 1);
361 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_TSC_AUX, 1);
362 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_TSC, 1);
363 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_CS, 1);
364 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_EIP, 1);
365 hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_ESP, 1);
366
367 return 0;
368 }
369
hvf_store_events(CPUState * cpu,uint32_t ins_len,uint64_t idtvec_info)370 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
371 {
372 X86CPU *x86_cpu = X86_CPU(cpu);
373 CPUX86State *env = &x86_cpu->env;
374
375 env->exception_nr = -1;
376 env->exception_pending = 0;
377 env->exception_injected = 0;
378 env->interrupt_injected = -1;
379 env->nmi_injected = false;
380 env->ins_len = 0;
381 env->has_error_code = false;
382 if (idtvec_info & VMCS_IDT_VEC_VALID) {
383 switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
384 case VMCS_IDT_VEC_HWINTR:
385 case VMCS_IDT_VEC_SWINTR:
386 env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
387 break;
388 case VMCS_IDT_VEC_NMI:
389 env->nmi_injected = true;
390 break;
391 case VMCS_IDT_VEC_HWEXCEPTION:
392 case VMCS_IDT_VEC_SWEXCEPTION:
393 env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
394 env->exception_injected = 1;
395 break;
396 case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
397 default:
398 abort();
399 }
400 if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
401 (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
402 env->ins_len = ins_len;
403 }
404 if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
405 env->has_error_code = true;
406 env->error_code = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_ERROR);
407 }
408 }
409 if ((rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) &
410 VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
411 env->hflags2 |= HF2_NMI_MASK;
412 } else {
413 env->hflags2 &= ~HF2_NMI_MASK;
414 }
415 if (rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) &
416 (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
417 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
418 env->hflags |= HF_INHIBIT_IRQ_MASK;
419 } else {
420 env->hflags &= ~HF_INHIBIT_IRQ_MASK;
421 }
422 }
423
hvf_cpu_x86_cpuid(CPUX86State * env,uint32_t index,uint32_t count,uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)424 static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
425 uint32_t *eax, uint32_t *ebx,
426 uint32_t *ecx, uint32_t *edx)
427 {
428 /*
429 * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs,
430 * leafs 0x40000001-0x4000000F are filled with zeros
431 * Provides vmware-cpuid-freq support to hvf
432 *
433 * Note: leaf 0x40000000 not exposes HVF,
434 * leaving hypervisor signature empty
435 */
436
437 if (index < 0x40000000 || index > 0x40000010 ||
438 !tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
439
440 cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx);
441 return;
442 }
443
444 switch (index) {
445 case 0x40000000:
446 *eax = 0x40000010; /* Max available cpuid leaf */
447 *ebx = 0; /* Leave signature empty */
448 *ecx = 0;
449 *edx = 0;
450 break;
451 case 0x40000010:
452 *eax = env->tsc_khz;
453 *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */
454 *ecx = 0;
455 *edx = 0;
456 break;
457 default:
458 *eax = 0;
459 *ebx = 0;
460 *ecx = 0;
461 *edx = 0;
462 break;
463 }
464 }
465
hvf_load_regs(CPUState * cs)466 void hvf_load_regs(CPUState *cs)
467 {
468 X86CPU *cpu = X86_CPU(cs);
469 CPUX86State *env = &cpu->env;
470
471 int i = 0;
472 RRX(env, R_EAX) = rreg(cs->accel->fd, HV_X86_RAX);
473 RRX(env, R_EBX) = rreg(cs->accel->fd, HV_X86_RBX);
474 RRX(env, R_ECX) = rreg(cs->accel->fd, HV_X86_RCX);
475 RRX(env, R_EDX) = rreg(cs->accel->fd, HV_X86_RDX);
476 RRX(env, R_ESI) = rreg(cs->accel->fd, HV_X86_RSI);
477 RRX(env, R_EDI) = rreg(cs->accel->fd, HV_X86_RDI);
478 RRX(env, R_ESP) = rreg(cs->accel->fd, HV_X86_RSP);
479 RRX(env, R_EBP) = rreg(cs->accel->fd, HV_X86_RBP);
480 for (i = 8; i < 16; i++) {
481 RRX(env, i) = rreg(cs->accel->fd, HV_X86_RAX + i);
482 }
483
484 env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS);
485 rflags_to_lflags(env);
486 env->eip = rreg(cs->accel->fd, HV_X86_RIP);
487 }
488
hvf_store_regs(CPUState * cs)489 void hvf_store_regs(CPUState *cs)
490 {
491 X86CPU *cpu = X86_CPU(cs);
492 CPUX86State *env = &cpu->env;
493
494 int i = 0;
495 wreg(cs->accel->fd, HV_X86_RAX, RAX(env));
496 wreg(cs->accel->fd, HV_X86_RBX, RBX(env));
497 wreg(cs->accel->fd, HV_X86_RCX, RCX(env));
498 wreg(cs->accel->fd, HV_X86_RDX, RDX(env));
499 wreg(cs->accel->fd, HV_X86_RSI, RSI(env));
500 wreg(cs->accel->fd, HV_X86_RDI, RDI(env));
501 wreg(cs->accel->fd, HV_X86_RBP, RBP(env));
502 wreg(cs->accel->fd, HV_X86_RSP, RSP(env));
503 for (i = 8; i < 16; i++) {
504 wreg(cs->accel->fd, HV_X86_RAX + i, RRX(env, i));
505 }
506
507 lflags_to_rflags(env);
508 wreg(cs->accel->fd, HV_X86_RFLAGS, env->eflags);
509 macvm_set_rip(cs, env->eip);
510 }
511
hvf_simulate_rdmsr(CPUState * cs)512 void hvf_simulate_rdmsr(CPUState *cs)
513 {
514 X86CPU *cpu = X86_CPU(cs);
515 CPUX86State *env = &cpu->env;
516 uint32_t msr = ECX(env);
517 uint64_t val = 0;
518
519 switch (msr) {
520 case MSR_IA32_TSC:
521 val = rdtscp() + rvmcs(cs->accel->fd, VMCS_TSC_OFFSET);
522 break;
523 case MSR_IA32_APICBASE:
524 val = cpu_get_apic_base(cpu->apic_state);
525 break;
526 case MSR_APIC_START ... MSR_APIC_END: {
527 int ret;
528 int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
529
530 ret = apic_msr_read(index, &val);
531 if (ret < 0) {
532 x86_emul_raise_exception(env, EXCP0D_GPF, 0);
533 }
534
535 break;
536 }
537 case MSR_IA32_UCODE_REV:
538 val = cpu->ucode_rev;
539 break;
540 case MSR_EFER:
541 val = rvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER);
542 break;
543 case MSR_FSBASE:
544 val = rvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE);
545 break;
546 case MSR_GSBASE:
547 val = rvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE);
548 break;
549 case MSR_KERNELGSBASE:
550 val = rvmcs(cs->accel->fd, VMCS_HOST_FS_BASE);
551 break;
552 case MSR_STAR:
553 abort();
554 break;
555 case MSR_LSTAR:
556 abort();
557 break;
558 case MSR_CSTAR:
559 abort();
560 break;
561 case MSR_IA32_MISC_ENABLE:
562 val = env->msr_ia32_misc_enable;
563 break;
564 case MSR_MTRRphysBase(0):
565 case MSR_MTRRphysBase(1):
566 case MSR_MTRRphysBase(2):
567 case MSR_MTRRphysBase(3):
568 case MSR_MTRRphysBase(4):
569 case MSR_MTRRphysBase(5):
570 case MSR_MTRRphysBase(6):
571 case MSR_MTRRphysBase(7):
572 val = env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base;
573 break;
574 case MSR_MTRRphysMask(0):
575 case MSR_MTRRphysMask(1):
576 case MSR_MTRRphysMask(2):
577 case MSR_MTRRphysMask(3):
578 case MSR_MTRRphysMask(4):
579 case MSR_MTRRphysMask(5):
580 case MSR_MTRRphysMask(6):
581 case MSR_MTRRphysMask(7):
582 val = env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask;
583 break;
584 case MSR_MTRRfix64K_00000:
585 val = env->mtrr_fixed[0];
586 break;
587 case MSR_MTRRfix16K_80000:
588 case MSR_MTRRfix16K_A0000:
589 val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1];
590 break;
591 case MSR_MTRRfix4K_C0000:
592 case MSR_MTRRfix4K_C8000:
593 case MSR_MTRRfix4K_D0000:
594 case MSR_MTRRfix4K_D8000:
595 case MSR_MTRRfix4K_E0000:
596 case MSR_MTRRfix4K_E8000:
597 case MSR_MTRRfix4K_F0000:
598 case MSR_MTRRfix4K_F8000:
599 val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3];
600 break;
601 case MSR_MTRRdefType:
602 val = env->mtrr_deftype;
603 break;
604 case MSR_CORE_THREAD_COUNT:
605 val = cpu_x86_get_msr_core_thread_count(cpu);
606 break;
607 default:
608 /* fprintf(stderr, "%s: unknown msr 0x%x\n", __func__, msr); */
609 val = 0;
610 break;
611 }
612
613 RAX(env) = (uint32_t)val;
614 RDX(env) = (uint32_t)(val >> 32);
615 }
616
hvf_simulate_wrmsr(CPUState * cs)617 void hvf_simulate_wrmsr(CPUState *cs)
618 {
619 X86CPU *cpu = X86_CPU(cs);
620 CPUX86State *env = &cpu->env;
621 uint32_t msr = ECX(env);
622 uint64_t data = ((uint64_t)EDX(env) << 32) | EAX(env);
623
624 switch (msr) {
625 case MSR_IA32_TSC:
626 break;
627 case MSR_IA32_APICBASE: {
628 int r;
629
630 r = cpu_set_apic_base(cpu->apic_state, data);
631 if (r < 0) {
632 x86_emul_raise_exception(env, EXCP0D_GPF, 0);
633 }
634
635 break;
636 }
637 case MSR_APIC_START ... MSR_APIC_END: {
638 int ret;
639 int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
640
641 ret = apic_msr_write(index, data);
642 if (ret < 0) {
643 x86_emul_raise_exception(env, EXCP0D_GPF, 0);
644 }
645
646 break;
647 }
648 case MSR_FSBASE:
649 wvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE, data);
650 break;
651 case MSR_GSBASE:
652 wvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE, data);
653 break;
654 case MSR_KERNELGSBASE:
655 wvmcs(cs->accel->fd, VMCS_HOST_FS_BASE, data);
656 break;
657 case MSR_STAR:
658 abort();
659 break;
660 case MSR_LSTAR:
661 abort();
662 break;
663 case MSR_CSTAR:
664 abort();
665 break;
666 case MSR_EFER:
667 /*printf("new efer %llx\n", EFER(cs));*/
668 wvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER, data);
669 if (data & MSR_EFER_NXE) {
670 hv_vcpu_invalidate_tlb(cs->accel->fd);
671 }
672 break;
673 case MSR_MTRRphysBase(0):
674 case MSR_MTRRphysBase(1):
675 case MSR_MTRRphysBase(2):
676 case MSR_MTRRphysBase(3):
677 case MSR_MTRRphysBase(4):
678 case MSR_MTRRphysBase(5):
679 case MSR_MTRRphysBase(6):
680 case MSR_MTRRphysBase(7):
681 env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base = data;
682 break;
683 case MSR_MTRRphysMask(0):
684 case MSR_MTRRphysMask(1):
685 case MSR_MTRRphysMask(2):
686 case MSR_MTRRphysMask(3):
687 case MSR_MTRRphysMask(4):
688 case MSR_MTRRphysMask(5):
689 case MSR_MTRRphysMask(6):
690 case MSR_MTRRphysMask(7):
691 env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask = data;
692 break;
693 case MSR_MTRRfix64K_00000:
694 env->mtrr_fixed[ECX(env) - MSR_MTRRfix64K_00000] = data;
695 break;
696 case MSR_MTRRfix16K_80000:
697 case MSR_MTRRfix16K_A0000:
698 env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1] = data;
699 break;
700 case MSR_MTRRfix4K_C0000:
701 case MSR_MTRRfix4K_C8000:
702 case MSR_MTRRfix4K_D0000:
703 case MSR_MTRRfix4K_D8000:
704 case MSR_MTRRfix4K_E0000:
705 case MSR_MTRRfix4K_E8000:
706 case MSR_MTRRfix4K_F0000:
707 case MSR_MTRRfix4K_F8000:
708 env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3] = data;
709 break;
710 case MSR_MTRRdefType:
711 env->mtrr_deftype = data;
712 break;
713 default:
714 break;
715 }
716
717 /* Related to support known hypervisor interface */
718 /* if (g_hypervisor_iface)
719 g_hypervisor_iface->wrmsr_handler(cs, msr, data);
720
721 printf("write msr %llx\n", RCX(cs));*/
722 }
723
hvf_vcpu_exec(CPUState * cpu)724 int hvf_vcpu_exec(CPUState *cpu)
725 {
726 X86CPU *x86_cpu = X86_CPU(cpu);
727 CPUX86State *env = &x86_cpu->env;
728 int ret = 0;
729 uint64_t rip = 0;
730
731 if (hvf_process_events(cpu)) {
732 return EXCP_HLT;
733 }
734
735 do {
736 if (cpu->accel->dirty) {
737 hvf_put_registers(cpu);
738 cpu->accel->dirty = false;
739 }
740
741 if (hvf_inject_interrupts(cpu)) {
742 return EXCP_INTERRUPT;
743 }
744 vmx_update_tpr(cpu);
745
746 bql_unlock();
747 if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
748 bql_lock();
749 return EXCP_HLT;
750 }
751
752 hv_return_t r = hv_vcpu_run_until(cpu->accel->fd, HV_DEADLINE_FOREVER);
753 assert_hvf_ok(r);
754
755 /* handle VMEXIT */
756 uint64_t exit_reason = rvmcs(cpu->accel->fd, VMCS_EXIT_REASON);
757 uint64_t exit_qual = rvmcs(cpu->accel->fd, VMCS_EXIT_QUALIFICATION);
758 uint32_t ins_len = (uint32_t)rvmcs(cpu->accel->fd,
759 VMCS_EXIT_INSTRUCTION_LENGTH);
760
761 uint64_t idtvec_info = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO);
762
763 hvf_store_events(cpu, ins_len, idtvec_info);
764 rip = rreg(cpu->accel->fd, HV_X86_RIP);
765 env->eflags = rreg(cpu->accel->fd, HV_X86_RFLAGS);
766
767 bql_lock();
768
769 update_apic_tpr(cpu);
770 current_cpu = cpu;
771
772 ret = 0;
773 switch (exit_reason) {
774 case EXIT_REASON_HLT: {
775 macvm_set_rip(cpu, rip + ins_len);
776 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
777 (env->eflags & IF_MASK))
778 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
779 !(idtvec_info & VMCS_IDT_VEC_VALID)) {
780 cpu->halted = 1;
781 ret = EXCP_HLT;
782 break;
783 }
784 ret = EXCP_INTERRUPT;
785 break;
786 }
787 case EXIT_REASON_MWAIT: {
788 ret = EXCP_INTERRUPT;
789 break;
790 }
791 /* Need to check if MMIO or unmapped fault */
792 case EXIT_REASON_EPT_FAULT:
793 {
794 hvf_slot *slot;
795 uint64_t gpa = rvmcs(cpu->accel->fd, VMCS_GUEST_PHYSICAL_ADDRESS);
796
797 if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
798 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
799 vmx_set_nmi_blocking(cpu);
800 }
801
802 slot = hvf_find_overlap_slot(gpa, 1);
803 /* mmio */
804 if (ept_emulation_fault(slot, gpa, exit_qual)) {
805 struct x86_decode decode;
806
807 hvf_load_regs(cpu);
808 decode_instruction(env, &decode);
809 exec_instruction(env, &decode);
810 hvf_store_regs(cpu);
811 break;
812 }
813 break;
814 }
815 case EXIT_REASON_INOUT:
816 {
817 uint32_t in = (exit_qual & 8) != 0;
818 uint32_t size = (exit_qual & 7) + 1;
819 uint32_t string = (exit_qual & 16) != 0;
820 uint32_t port = exit_qual >> 16;
821 /*uint32_t rep = (exit_qual & 0x20) != 0;*/
822
823 if (!string && in) {
824 uint64_t val = 0;
825 hvf_load_regs(cpu);
826 hvf_handle_io(env_cpu(env), port, &val, 0, size, 1);
827 if (size == 1) {
828 AL(env) = val;
829 } else if (size == 2) {
830 AX(env) = val;
831 } else if (size == 4) {
832 RAX(env) = (uint32_t)val;
833 } else {
834 RAX(env) = (uint64_t)val;
835 }
836 env->eip += ins_len;
837 hvf_store_regs(cpu);
838 break;
839 } else if (!string && !in) {
840 RAX(env) = rreg(cpu->accel->fd, HV_X86_RAX);
841 hvf_handle_io(env_cpu(env), port, &RAX(env), 1, size, 1);
842 macvm_set_rip(cpu, rip + ins_len);
843 break;
844 }
845 struct x86_decode decode;
846
847 hvf_load_regs(cpu);
848 decode_instruction(env, &decode);
849 assert(ins_len == decode.len);
850 exec_instruction(env, &decode);
851 hvf_store_regs(cpu);
852
853 break;
854 }
855 case EXIT_REASON_CPUID: {
856 uint32_t rax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX);
857 uint32_t rbx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RBX);
858 uint32_t rcx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX);
859 uint32_t rdx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX);
860
861 if (rax == 1) {
862 /* CPUID1.ecx.OSXSAVE needs to know CR4 */
863 env->cr[4] = rvmcs(cpu->accel->fd, VMCS_GUEST_CR4);
864 }
865 hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
866
867 wreg(cpu->accel->fd, HV_X86_RAX, rax);
868 wreg(cpu->accel->fd, HV_X86_RBX, rbx);
869 wreg(cpu->accel->fd, HV_X86_RCX, rcx);
870 wreg(cpu->accel->fd, HV_X86_RDX, rdx);
871
872 macvm_set_rip(cpu, rip + ins_len);
873 break;
874 }
875 case EXIT_REASON_XSETBV: {
876 uint32_t eax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX);
877 uint32_t ecx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX);
878 uint32_t edx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX);
879
880 if (ecx) {
881 macvm_set_rip(cpu, rip + ins_len);
882 break;
883 }
884 env->xcr0 = ((uint64_t)edx << 32) | eax;
885 wreg(cpu->accel->fd, HV_X86_XCR0, env->xcr0 | 1);
886 macvm_set_rip(cpu, rip + ins_len);
887 break;
888 }
889 case EXIT_REASON_INTR_WINDOW:
890 vmx_clear_int_window_exiting(cpu);
891 ret = EXCP_INTERRUPT;
892 break;
893 case EXIT_REASON_NMI_WINDOW:
894 vmx_clear_nmi_window_exiting(cpu);
895 ret = EXCP_INTERRUPT;
896 break;
897 case EXIT_REASON_EXT_INTR:
898 /* force exit and allow io handling */
899 ret = EXCP_INTERRUPT;
900 break;
901 case EXIT_REASON_RDMSR:
902 case EXIT_REASON_WRMSR:
903 {
904 hvf_load_regs(cpu);
905 if (exit_reason == EXIT_REASON_RDMSR) {
906 hvf_simulate_rdmsr(cpu);
907 } else {
908 hvf_simulate_wrmsr(cpu);
909 }
910 env->eip += ins_len;
911 hvf_store_regs(cpu);
912 break;
913 }
914 case EXIT_REASON_CR_ACCESS: {
915 int cr;
916 int reg;
917
918 hvf_load_regs(cpu);
919 cr = exit_qual & 15;
920 reg = (exit_qual >> 8) & 15;
921
922 switch (cr) {
923 case 0x0: {
924 macvm_set_cr0(cpu->accel->fd, RRX(env, reg));
925 break;
926 }
927 case 4: {
928 macvm_set_cr4(cpu->accel->fd, RRX(env, reg));
929 break;
930 }
931 case 8: {
932 if (exit_qual & 0x10) {
933 RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
934 } else {
935 int tpr = RRX(env, reg);
936 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
937 ret = EXCP_INTERRUPT;
938 }
939 break;
940 }
941 default:
942 error_report("Unrecognized CR %d", cr);
943 abort();
944 }
945 env->eip += ins_len;
946 hvf_store_regs(cpu);
947 break;
948 }
949 case EXIT_REASON_APIC_ACCESS: { /* TODO */
950 struct x86_decode decode;
951
952 hvf_load_regs(cpu);
953 decode_instruction(env, &decode);
954 exec_instruction(env, &decode);
955 hvf_store_regs(cpu);
956 break;
957 }
958 case EXIT_REASON_TPR: {
959 ret = 1;
960 break;
961 }
962 case EXIT_REASON_TASK_SWITCH: {
963 uint64_t vinfo = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO);
964 x86_segment_selector sel = {.sel = exit_qual & 0xffff};
965 vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
966 vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
967 & VMCS_INTR_T_MASK);
968 break;
969 }
970 case EXIT_REASON_TRIPLE_FAULT: {
971 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
972 ret = EXCP_INTERRUPT;
973 break;
974 }
975 case EXIT_REASON_RDPMC:
976 wreg(cpu->accel->fd, HV_X86_RAX, 0);
977 wreg(cpu->accel->fd, HV_X86_RDX, 0);
978 macvm_set_rip(cpu, rip + ins_len);
979 break;
980 case VMX_REASON_VMCALL:
981 env->exception_nr = EXCP0D_GPF;
982 env->exception_injected = 1;
983 env->has_error_code = true;
984 env->error_code = 0;
985 break;
986 default:
987 error_report("%llx: unhandled exit %llx", rip, exit_reason);
988 }
989 } while (ret == 0);
990
991 return ret;
992 }
993
hvf_arch_insert_sw_breakpoint(CPUState * cpu,struct hvf_sw_breakpoint * bp)994 int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp)
995 {
996 return -ENOSYS;
997 }
998
hvf_arch_remove_sw_breakpoint(CPUState * cpu,struct hvf_sw_breakpoint * bp)999 int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp)
1000 {
1001 return -ENOSYS;
1002 }
1003
hvf_arch_insert_hw_breakpoint(vaddr addr,vaddr len,int type)1004 int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type)
1005 {
1006 return -ENOSYS;
1007 }
1008
hvf_arch_remove_hw_breakpoint(vaddr addr,vaddr len,int type)1009 int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type)
1010 {
1011 return -ENOSYS;
1012 }
1013
hvf_arch_remove_all_hw_breakpoints(void)1014 void hvf_arch_remove_all_hw_breakpoints(void)
1015 {
1016 }
1017
hvf_arch_update_guest_debug(CPUState * cpu)1018 void hvf_arch_update_guest_debug(CPUState *cpu)
1019 {
1020 }
1021
hvf_arch_supports_guest_debug(void)1022 bool hvf_arch_supports_guest_debug(void)
1023 {
1024 return false;
1025 }
1026