xref: /qemu/target/i386/hvf/hvf.c (revision 63d8bc669302ec22bd394c45380848a2d5947943)
1 /* Copyright 2008 IBM Corporation
2  *           2008 Red Hat, Inc.
3  * Copyright 2011 Intel Corporation
4  * Copyright 2016 Veertu, Inc.
5  * Copyright 2017 The Android Open Source Project
6  *
7  * QEMU Hypervisor.framework support
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of version 2 of the GNU General Public
11  * License as published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, see <http://www.gnu.org/licenses/>.
20  *
21  * This file contain code under public domain from the hvdos project:
22  * https://github.com/mist64/hvdos
23  *
24  * Parts Copyright (c) 2011 NetApp, Inc.
25  * All rights reserved.
26  *
27  * Redistribution and use in source and binary forms, with or without
28  * modification, are permitted provided that the following conditions
29  * are met:
30  * 1. Redistributions of source code must retain the above copyright
31  *    notice, this list of conditions and the following disclaimer.
32  * 2. Redistributions in binary form must reproduce the above copyright
33  *    notice, this list of conditions and the following disclaimer in the
34  *    documentation and/or other materials provided with the distribution.
35  *
36  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46  * SUCH DAMAGE.
47  */
48 
49 #include "qemu/osdep.h"
50 #include "qemu/error-report.h"
51 #include "qemu/memalign.h"
52 #include "qapi/error.h"
53 #include "migration/blocker.h"
54 
55 #include "system/hvf.h"
56 #include "system/hvf_int.h"
57 #include "system/runstate.h"
58 #include "system/cpus.h"
59 #include "hvf-i386.h"
60 #include "vmcs.h"
61 #include "vmx.h"
62 #include "x86.h"
63 #include "x86_descr.h"
64 #include "x86_flags.h"
65 #include "x86_mmu.h"
66 #include "x86_decode.h"
67 #include "x86_emu.h"
68 #include "x86_task.h"
69 #include "x86hvf.h"
70 
71 #include <Hypervisor/hv.h>
72 #include <Hypervisor/hv_vmx.h>
73 #include <sys/sysctl.h>
74 
75 #include "hw/i386/apic_internal.h"
76 #include "qemu/main-loop.h"
77 #include "qemu/accel.h"
78 #include "target/i386/cpu.h"
79 
80 static Error *invtsc_mig_blocker;
81 
82 void vmx_update_tpr(CPUState *cpu)
83 {
84     /* TODO: need integrate APIC handling */
85     X86CPU *x86_cpu = X86_CPU(cpu);
86     int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
87     int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
88 
89     wreg(cpu->accel->fd, HV_X86_TPR, tpr);
90     if (irr == -1) {
91         wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0);
92     } else {
93         wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
94               irr >> 4);
95     }
96 }
97 
98 static void update_apic_tpr(CPUState *cpu)
99 {
100     X86CPU *x86_cpu = X86_CPU(cpu);
101     int tpr = rreg(cpu->accel->fd, HV_X86_TPR) >> 4;
102     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
103 }
104 
105 #define VECTORING_INFO_VECTOR_MASK     0xff
106 
107 void hvf_handle_io(CPUState *env, uint16_t port, void *buffer,
108                   int direction, int size, int count)
109 {
110     int i;
111     uint8_t *ptr = buffer;
112 
113     for (i = 0; i < count; i++) {
114         address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
115                          ptr, size,
116                          direction);
117         ptr += size;
118     }
119 }
120 
121 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
122 {
123     int read, write;
124 
125     /* EPT fault on an instruction fetch doesn't make sense here */
126     if (ept_qual & EPT_VIOLATION_INST_FETCH) {
127         return false;
128     }
129 
130     /* EPT fault must be a read fault or a write fault */
131     read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
132     write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
133     if ((read | write) == 0) {
134         return false;
135     }
136 
137     if (write && slot) {
138         if (slot->flags & HVF_SLOT_LOG) {
139             uint64_t dirty_page_start = gpa & ~(TARGET_PAGE_SIZE - 1u);
140             memory_region_set_dirty(slot->region, gpa - slot->start, 1);
141             hv_vm_protect(dirty_page_start, TARGET_PAGE_SIZE,
142                           HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
143         }
144     }
145 
146     /*
147      * The EPT violation must have been caused by accessing a
148      * guest-physical address that is a translation of a guest-linear
149      * address.
150      */
151     if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
152         (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
153         return false;
154     }
155 
156     if (!slot) {
157         return true;
158     }
159     if (!memory_region_is_ram(slot->region) &&
160         !(read && memory_region_is_romd(slot->region))) {
161         return true;
162     }
163     return false;
164 }
165 
166 void hvf_arch_vcpu_destroy(CPUState *cpu)
167 {
168     X86CPU *x86_cpu = X86_CPU(cpu);
169     CPUX86State *env = &x86_cpu->env;
170 
171     g_free(env->emu_mmio_buf);
172 }
173 
174 static void init_tsc_freq(CPUX86State *env)
175 {
176     size_t length;
177     uint64_t tsc_freq;
178 
179     if (env->tsc_khz != 0) {
180         return;
181     }
182 
183     length = sizeof(uint64_t);
184     if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) {
185         return;
186     }
187     env->tsc_khz = tsc_freq / 1000;  /* Hz to KHz */
188 }
189 
190 static void init_apic_bus_freq(CPUX86State *env)
191 {
192     size_t length;
193     uint64_t bus_freq;
194 
195     if (env->apic_bus_freq != 0) {
196         return;
197     }
198 
199     length = sizeof(uint64_t);
200     if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) {
201         return;
202     }
203     env->apic_bus_freq = bus_freq;
204 }
205 
206 static inline bool tsc_is_known(CPUX86State *env)
207 {
208     return env->tsc_khz != 0;
209 }
210 
211 static inline bool apic_bus_freq_is_known(CPUX86State *env)
212 {
213     return env->apic_bus_freq != 0;
214 }
215 
216 void hvf_kick_vcpu_thread(CPUState *cpu)
217 {
218     cpus_kick_thread(cpu);
219     hv_vcpu_interrupt(&cpu->accel->fd, 1);
220 }
221 
222 int hvf_arch_init(void)
223 {
224     return 0;
225 }
226 
227 hv_return_t hvf_arch_vm_create(MachineState *ms, uint32_t pa_range)
228 {
229     return hv_vm_create(HV_VM_DEFAULT);
230 }
231 
232 static void hvf_read_segment_descriptor(CPUState *s, struct x86_segment_descriptor *desc,
233                                         X86Seg seg)
234 {
235     struct vmx_segment vmx_segment;
236     vmx_read_segment_descriptor(s, &vmx_segment, seg);
237     vmx_segment_to_x86_descriptor(s, &vmx_segment, desc);
238 }
239 
240 static void hvf_read_mem(CPUState *cpu, void *data, target_ulong gva, int bytes)
241 {
242     vmx_read_mem(cpu, data, gva, bytes);
243 }
244 
245 static void hvf_write_mem(CPUState *cpu, void *data, target_ulong gva, int bytes)
246 {
247     vmx_write_mem(cpu, gva, data, bytes);
248 }
249 
250 static const struct x86_emul_ops hvf_x86_emul_ops = {
251     .read_mem = hvf_read_mem,
252     .write_mem = hvf_write_mem,
253     .read_segment_descriptor = hvf_read_segment_descriptor,
254     .handle_io = hvf_handle_io,
255 };
256 
257 int hvf_arch_init_vcpu(CPUState *cpu)
258 {
259     X86CPU *x86cpu = X86_CPU(cpu);
260     CPUX86State *env = &x86cpu->env;
261     Error *local_err = NULL;
262     int r;
263     uint64_t reqCap;
264 
265     init_emu(&hvf_x86_emul_ops);
266     init_decoder();
267 
268     if (hvf_state->hvf_caps == NULL) {
269         hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
270     }
271     env->emu_mmio_buf = g_new(char, 4096);
272 
273     if (x86cpu->vmware_cpuid_freq) {
274         init_tsc_freq(env);
275         init_apic_bus_freq(env);
276 
277         if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
278             error_report("vmware-cpuid-freq: feature couldn't be enabled");
279         }
280     }
281 
282     if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) &&
283         invtsc_mig_blocker == NULL) {
284         error_setg(&invtsc_mig_blocker,
285                    "State blocked by non-migratable CPU device (invtsc flag)");
286         r = migrate_add_blocker(&invtsc_mig_blocker, &local_err);
287         if (r < 0) {
288             error_report_err(local_err);
289             return r;
290         }
291     }
292 
293 
294     if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
295         &hvf_state->hvf_caps->vmx_cap_pinbased)) {
296         abort();
297     }
298     if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
299         &hvf_state->hvf_caps->vmx_cap_procbased)) {
300         abort();
301     }
302     if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
303         &hvf_state->hvf_caps->vmx_cap_procbased2)) {
304         abort();
305     }
306     if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
307         &hvf_state->hvf_caps->vmx_cap_entry)) {
308         abort();
309     }
310 
311     /* set VMCS control fields */
312     wvmcs(cpu->accel->fd, VMCS_PIN_BASED_CTLS,
313           cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
314                    VMCS_PIN_BASED_CTLS_EXTINT |
315                    VMCS_PIN_BASED_CTLS_NMI |
316                    VMCS_PIN_BASED_CTLS_VNMI));
317     wvmcs(cpu->accel->fd, VMCS_PRI_PROC_BASED_CTLS,
318           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
319                    VMCS_PRI_PROC_BASED_CTLS_HLT |
320                    VMCS_PRI_PROC_BASED_CTLS_MWAIT |
321                    VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
322                    VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
323           VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
324 
325     reqCap = VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES;
326 
327     /* Is RDTSCP support in CPUID?  If so, enable it in the VMCS. */
328     if (hvf_get_supported_cpuid(0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) {
329         reqCap |= VMCS_PRI_PROC_BASED2_CTLS_RDTSCP;
330     }
331 
332     wvmcs(cpu->accel->fd, VMCS_SEC_PROC_BASED_CTLS,
333           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, reqCap));
334 
335     wvmcs(cpu->accel->fd, VMCS_ENTRY_CTLS,
336           cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 0));
337     wvmcs(cpu->accel->fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
338 
339     wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0);
340 
341     x86cpu = X86_CPU(cpu);
342     x86cpu->env.xsave_buf_len = 4096;
343     x86cpu->env.xsave_buf = qemu_memalign(4096, x86cpu->env.xsave_buf_len);
344 
345     /*
346      * The allocated storage must be large enough for all of the
347      * possible XSAVE state components.
348      */
349     assert(hvf_get_supported_cpuid(0xd, 0, R_ECX) <= x86cpu->env.xsave_buf_len);
350 
351     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_STAR, 1);
352     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_LSTAR, 1);
353     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_CSTAR, 1);
354     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FMASK, 1);
355     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FSBASE, 1);
356     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_GSBASE, 1);
357     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_KERNELGSBASE, 1);
358     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_TSC_AUX, 1);
359     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_TSC, 1);
360     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_CS, 1);
361     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_EIP, 1);
362     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_ESP, 1);
363 
364     return 0;
365 }
366 
367 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
368 {
369     X86CPU *x86_cpu = X86_CPU(cpu);
370     CPUX86State *env = &x86_cpu->env;
371 
372     env->exception_nr = -1;
373     env->exception_pending = 0;
374     env->exception_injected = 0;
375     env->interrupt_injected = -1;
376     env->nmi_injected = false;
377     env->ins_len = 0;
378     env->has_error_code = false;
379     if (idtvec_info & VMCS_IDT_VEC_VALID) {
380         switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
381         case VMCS_IDT_VEC_HWINTR:
382         case VMCS_IDT_VEC_SWINTR:
383             env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
384             break;
385         case VMCS_IDT_VEC_NMI:
386             env->nmi_injected = true;
387             break;
388         case VMCS_IDT_VEC_HWEXCEPTION:
389         case VMCS_IDT_VEC_SWEXCEPTION:
390             env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
391             env->exception_injected = 1;
392             break;
393         case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
394         default:
395             abort();
396         }
397         if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
398             (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
399             env->ins_len = ins_len;
400         }
401         if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
402             env->has_error_code = true;
403             env->error_code = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_ERROR);
404         }
405     }
406     if ((rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) &
407         VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
408         env->hflags2 |= HF2_NMI_MASK;
409     } else {
410         env->hflags2 &= ~HF2_NMI_MASK;
411     }
412     if (rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) &
413          (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
414          VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
415         env->hflags |= HF_INHIBIT_IRQ_MASK;
416     } else {
417         env->hflags &= ~HF_INHIBIT_IRQ_MASK;
418     }
419 }
420 
421 static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
422                               uint32_t *eax, uint32_t *ebx,
423                               uint32_t *ecx, uint32_t *edx)
424 {
425     /*
426      * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs,
427      * leafs 0x40000001-0x4000000F are filled with zeros
428      * Provides vmware-cpuid-freq support to hvf
429      *
430      * Note: leaf 0x40000000 not exposes HVF,
431      * leaving hypervisor signature empty
432      */
433 
434     if (index < 0x40000000 || index > 0x40000010 ||
435         !tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
436 
437         cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx);
438         return;
439     }
440 
441     switch (index) {
442     case 0x40000000:
443         *eax = 0x40000010;    /* Max available cpuid leaf */
444         *ebx = 0;             /* Leave signature empty */
445         *ecx = 0;
446         *edx = 0;
447         break;
448     case 0x40000010:
449         *eax = env->tsc_khz;
450         *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */
451         *ecx = 0;
452         *edx = 0;
453         break;
454     default:
455         *eax = 0;
456         *ebx = 0;
457         *ecx = 0;
458         *edx = 0;
459         break;
460     }
461 }
462 
463 void hvf_load_regs(CPUState *cs)
464 {
465     X86CPU *cpu = X86_CPU(cs);
466     CPUX86State *env = &cpu->env;
467 
468     int i = 0;
469     RRX(env, R_EAX) = rreg(cs->accel->fd, HV_X86_RAX);
470     RRX(env, R_EBX) = rreg(cs->accel->fd, HV_X86_RBX);
471     RRX(env, R_ECX) = rreg(cs->accel->fd, HV_X86_RCX);
472     RRX(env, R_EDX) = rreg(cs->accel->fd, HV_X86_RDX);
473     RRX(env, R_ESI) = rreg(cs->accel->fd, HV_X86_RSI);
474     RRX(env, R_EDI) = rreg(cs->accel->fd, HV_X86_RDI);
475     RRX(env, R_ESP) = rreg(cs->accel->fd, HV_X86_RSP);
476     RRX(env, R_EBP) = rreg(cs->accel->fd, HV_X86_RBP);
477     for (i = 8; i < 16; i++) {
478         RRX(env, i) = rreg(cs->accel->fd, HV_X86_RAX + i);
479     }
480 
481     env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS);
482     rflags_to_lflags(env);
483     env->eip = rreg(cs->accel->fd, HV_X86_RIP);
484 }
485 
486 void hvf_store_regs(CPUState *cs)
487 {
488     X86CPU *cpu = X86_CPU(cs);
489     CPUX86State *env = &cpu->env;
490 
491     int i = 0;
492     wreg(cs->accel->fd, HV_X86_RAX, RAX(env));
493     wreg(cs->accel->fd, HV_X86_RBX, RBX(env));
494     wreg(cs->accel->fd, HV_X86_RCX, RCX(env));
495     wreg(cs->accel->fd, HV_X86_RDX, RDX(env));
496     wreg(cs->accel->fd, HV_X86_RSI, RSI(env));
497     wreg(cs->accel->fd, HV_X86_RDI, RDI(env));
498     wreg(cs->accel->fd, HV_X86_RBP, RBP(env));
499     wreg(cs->accel->fd, HV_X86_RSP, RSP(env));
500     for (i = 8; i < 16; i++) {
501         wreg(cs->accel->fd, HV_X86_RAX + i, RRX(env, i));
502     }
503 
504     lflags_to_rflags(env);
505     wreg(cs->accel->fd, HV_X86_RFLAGS, env->eflags);
506     macvm_set_rip(cs, env->eip);
507 }
508 
509 void hvf_simulate_rdmsr(CPUX86State *env)
510 {
511     X86CPU *cpu = env_archcpu(env);
512     CPUState *cs = env_cpu(env);
513     uint32_t msr = ECX(env);
514     uint64_t val = 0;
515 
516     switch (msr) {
517     case MSR_IA32_TSC:
518         val = rdtscp() + rvmcs(cs->accel->fd, VMCS_TSC_OFFSET);
519         break;
520     case MSR_IA32_APICBASE:
521         val = cpu_get_apic_base(cpu->apic_state);
522         break;
523     case MSR_APIC_START ... MSR_APIC_END: {
524         int ret;
525         int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
526 
527         ret = apic_msr_read(index, &val);
528         if (ret < 0) {
529             x86_emul_raise_exception(env, EXCP0D_GPF, 0);
530         }
531 
532         break;
533     }
534     case MSR_IA32_UCODE_REV:
535         val = cpu->ucode_rev;
536         break;
537     case MSR_EFER:
538         val = rvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER);
539         break;
540     case MSR_FSBASE:
541         val = rvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE);
542         break;
543     case MSR_GSBASE:
544         val = rvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE);
545         break;
546     case MSR_KERNELGSBASE:
547         val = rvmcs(cs->accel->fd, VMCS_HOST_FS_BASE);
548         break;
549     case MSR_STAR:
550         abort();
551         break;
552     case MSR_LSTAR:
553         abort();
554         break;
555     case MSR_CSTAR:
556         abort();
557         break;
558     case MSR_IA32_MISC_ENABLE:
559         val = env->msr_ia32_misc_enable;
560         break;
561     case MSR_MTRRphysBase(0):
562     case MSR_MTRRphysBase(1):
563     case MSR_MTRRphysBase(2):
564     case MSR_MTRRphysBase(3):
565     case MSR_MTRRphysBase(4):
566     case MSR_MTRRphysBase(5):
567     case MSR_MTRRphysBase(6):
568     case MSR_MTRRphysBase(7):
569         val = env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base;
570         break;
571     case MSR_MTRRphysMask(0):
572     case MSR_MTRRphysMask(1):
573     case MSR_MTRRphysMask(2):
574     case MSR_MTRRphysMask(3):
575     case MSR_MTRRphysMask(4):
576     case MSR_MTRRphysMask(5):
577     case MSR_MTRRphysMask(6):
578     case MSR_MTRRphysMask(7):
579         val = env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask;
580         break;
581     case MSR_MTRRfix64K_00000:
582         val = env->mtrr_fixed[0];
583         break;
584     case MSR_MTRRfix16K_80000:
585     case MSR_MTRRfix16K_A0000:
586         val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1];
587         break;
588     case MSR_MTRRfix4K_C0000:
589     case MSR_MTRRfix4K_C8000:
590     case MSR_MTRRfix4K_D0000:
591     case MSR_MTRRfix4K_D8000:
592     case MSR_MTRRfix4K_E0000:
593     case MSR_MTRRfix4K_E8000:
594     case MSR_MTRRfix4K_F0000:
595     case MSR_MTRRfix4K_F8000:
596         val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3];
597         break;
598     case MSR_MTRRdefType:
599         val = env->mtrr_deftype;
600         break;
601     case MSR_CORE_THREAD_COUNT:
602         val = cpu_x86_get_msr_core_thread_count(cpu);
603         break;
604     default:
605         /* fprintf(stderr, "%s: unknown msr 0x%x\n", __func__, msr); */
606         val = 0;
607         break;
608     }
609 
610     RAX(env) = (uint32_t)val;
611     RDX(env) = (uint32_t)(val >> 32);
612 }
613 
614 void hvf_simulate_wrmsr(CPUX86State *env)
615 {
616     X86CPU *cpu = env_archcpu(env);
617     CPUState *cs = env_cpu(env);
618     uint32_t msr = ECX(env);
619     uint64_t data = ((uint64_t)EDX(env) << 32) | EAX(env);
620 
621     switch (msr) {
622     case MSR_IA32_TSC:
623         break;
624     case MSR_IA32_APICBASE: {
625         int r;
626 
627         r = cpu_set_apic_base(cpu->apic_state, data);
628         if (r < 0) {
629             x86_emul_raise_exception(env, EXCP0D_GPF, 0);
630         }
631 
632         break;
633     }
634     case MSR_APIC_START ... MSR_APIC_END: {
635         int ret;
636         int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
637 
638         ret = apic_msr_write(index, data);
639         if (ret < 0) {
640             x86_emul_raise_exception(env, EXCP0D_GPF, 0);
641         }
642 
643         break;
644     }
645     case MSR_FSBASE:
646         wvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE, data);
647         break;
648     case MSR_GSBASE:
649         wvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE, data);
650         break;
651     case MSR_KERNELGSBASE:
652         wvmcs(cs->accel->fd, VMCS_HOST_FS_BASE, data);
653         break;
654     case MSR_STAR:
655         abort();
656         break;
657     case MSR_LSTAR:
658         abort();
659         break;
660     case MSR_CSTAR:
661         abort();
662         break;
663     case MSR_EFER:
664         /*printf("new efer %llx\n", EFER(cs));*/
665         wvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER, data);
666         if (data & MSR_EFER_NXE) {
667             hv_vcpu_invalidate_tlb(cs->accel->fd);
668         }
669         break;
670     case MSR_MTRRphysBase(0):
671     case MSR_MTRRphysBase(1):
672     case MSR_MTRRphysBase(2):
673     case MSR_MTRRphysBase(3):
674     case MSR_MTRRphysBase(4):
675     case MSR_MTRRphysBase(5):
676     case MSR_MTRRphysBase(6):
677     case MSR_MTRRphysBase(7):
678         env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base = data;
679         break;
680     case MSR_MTRRphysMask(0):
681     case MSR_MTRRphysMask(1):
682     case MSR_MTRRphysMask(2):
683     case MSR_MTRRphysMask(3):
684     case MSR_MTRRphysMask(4):
685     case MSR_MTRRphysMask(5):
686     case MSR_MTRRphysMask(6):
687     case MSR_MTRRphysMask(7):
688         env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask = data;
689         break;
690     case MSR_MTRRfix64K_00000:
691         env->mtrr_fixed[ECX(env) - MSR_MTRRfix64K_00000] = data;
692         break;
693     case MSR_MTRRfix16K_80000:
694     case MSR_MTRRfix16K_A0000:
695         env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1] = data;
696         break;
697     case MSR_MTRRfix4K_C0000:
698     case MSR_MTRRfix4K_C8000:
699     case MSR_MTRRfix4K_D0000:
700     case MSR_MTRRfix4K_D8000:
701     case MSR_MTRRfix4K_E0000:
702     case MSR_MTRRfix4K_E8000:
703     case MSR_MTRRfix4K_F0000:
704     case MSR_MTRRfix4K_F8000:
705         env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3] = data;
706         break;
707     case MSR_MTRRdefType:
708         env->mtrr_deftype = data;
709         break;
710     default:
711         break;
712     }
713 
714     /* Related to support known hypervisor interface */
715     /* if (g_hypervisor_iface)
716          g_hypervisor_iface->wrmsr_handler(cs, msr, data);
717 
718     printf("write msr %llx\n", RCX(cs));*/
719 }
720 
721 int hvf_vcpu_exec(CPUState *cpu)
722 {
723     X86CPU *x86_cpu = X86_CPU(cpu);
724     CPUX86State *env = &x86_cpu->env;
725     int ret = 0;
726     uint64_t rip = 0;
727 
728     if (hvf_process_events(cpu)) {
729         return EXCP_HLT;
730     }
731 
732     do {
733         if (cpu->accel->dirty) {
734             hvf_put_registers(cpu);
735             cpu->accel->dirty = false;
736         }
737 
738         if (hvf_inject_interrupts(cpu)) {
739             return EXCP_INTERRUPT;
740         }
741         vmx_update_tpr(cpu);
742 
743         bql_unlock();
744         if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
745             bql_lock();
746             return EXCP_HLT;
747         }
748 
749         hv_return_t r = hv_vcpu_run_until(cpu->accel->fd, HV_DEADLINE_FOREVER);
750         assert_hvf_ok(r);
751 
752         /* handle VMEXIT */
753         uint64_t exit_reason = rvmcs(cpu->accel->fd, VMCS_EXIT_REASON);
754         uint64_t exit_qual = rvmcs(cpu->accel->fd, VMCS_EXIT_QUALIFICATION);
755         uint32_t ins_len = (uint32_t)rvmcs(cpu->accel->fd,
756                                            VMCS_EXIT_INSTRUCTION_LENGTH);
757 
758         uint64_t idtvec_info = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO);
759 
760         hvf_store_events(cpu, ins_len, idtvec_info);
761         rip = rreg(cpu->accel->fd, HV_X86_RIP);
762         env->eflags = rreg(cpu->accel->fd, HV_X86_RFLAGS);
763 
764         bql_lock();
765 
766         update_apic_tpr(cpu);
767         current_cpu = cpu;
768 
769         ret = 0;
770         switch (exit_reason) {
771         case EXIT_REASON_HLT: {
772             macvm_set_rip(cpu, rip + ins_len);
773             if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
774                 (env->eflags & IF_MASK))
775                 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
776                 !(idtvec_info & VMCS_IDT_VEC_VALID)) {
777                 cpu->halted = 1;
778                 ret = EXCP_HLT;
779                 break;
780             }
781             ret = EXCP_INTERRUPT;
782             break;
783         }
784         case EXIT_REASON_MWAIT: {
785             ret = EXCP_INTERRUPT;
786             break;
787         }
788         /* Need to check if MMIO or unmapped fault */
789         case EXIT_REASON_EPT_FAULT:
790         {
791             hvf_slot *slot;
792             uint64_t gpa = rvmcs(cpu->accel->fd, VMCS_GUEST_PHYSICAL_ADDRESS);
793 
794             if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
795                 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
796                 vmx_set_nmi_blocking(cpu);
797             }
798 
799             slot = hvf_find_overlap_slot(gpa, 1);
800             /* mmio */
801             if (ept_emulation_fault(slot, gpa, exit_qual)) {
802                 struct x86_decode decode;
803 
804                 hvf_load_regs(cpu);
805                 decode_instruction(env, &decode);
806                 exec_instruction(env, &decode);
807                 hvf_store_regs(cpu);
808                 break;
809             }
810             break;
811         }
812         case EXIT_REASON_INOUT:
813         {
814             uint32_t in = (exit_qual & 8) != 0;
815             uint32_t size =  (exit_qual & 7) + 1;
816             uint32_t string =  (exit_qual & 16) != 0;
817             uint32_t port =  exit_qual >> 16;
818             /*uint32_t rep = (exit_qual & 0x20) != 0;*/
819 
820             if (!string && in) {
821                 uint64_t val = 0;
822                 hvf_load_regs(cpu);
823                 hvf_handle_io(env_cpu(env), port, &val, 0, size, 1);
824                 if (size == 1) {
825                     AL(env) = val;
826                 } else if (size == 2) {
827                     AX(env) = val;
828                 } else if (size == 4) {
829                     RAX(env) = (uint32_t)val;
830                 } else {
831                     RAX(env) = (uint64_t)val;
832                 }
833                 env->eip += ins_len;
834                 hvf_store_regs(cpu);
835                 break;
836             } else if (!string && !in) {
837                 RAX(env) = rreg(cpu->accel->fd, HV_X86_RAX);
838                 hvf_handle_io(env_cpu(env), port, &RAX(env), 1, size, 1);
839                 macvm_set_rip(cpu, rip + ins_len);
840                 break;
841             }
842             struct x86_decode decode;
843 
844             hvf_load_regs(cpu);
845             decode_instruction(env, &decode);
846             assert(ins_len == decode.len);
847             exec_instruction(env, &decode);
848             hvf_store_regs(cpu);
849 
850             break;
851         }
852         case EXIT_REASON_CPUID: {
853             uint32_t rax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX);
854             uint32_t rbx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RBX);
855             uint32_t rcx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX);
856             uint32_t rdx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX);
857 
858             if (rax == 1) {
859                 /* CPUID1.ecx.OSXSAVE needs to know CR4 */
860                 env->cr[4] = rvmcs(cpu->accel->fd, VMCS_GUEST_CR4);
861             }
862             hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
863 
864             wreg(cpu->accel->fd, HV_X86_RAX, rax);
865             wreg(cpu->accel->fd, HV_X86_RBX, rbx);
866             wreg(cpu->accel->fd, HV_X86_RCX, rcx);
867             wreg(cpu->accel->fd, HV_X86_RDX, rdx);
868 
869             macvm_set_rip(cpu, rip + ins_len);
870             break;
871         }
872         case EXIT_REASON_XSETBV: {
873             uint32_t eax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX);
874             uint32_t ecx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX);
875             uint32_t edx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX);
876 
877             if (ecx) {
878                 macvm_set_rip(cpu, rip + ins_len);
879                 break;
880             }
881             env->xcr0 = ((uint64_t)edx << 32) | eax;
882             wreg(cpu->accel->fd, HV_X86_XCR0, env->xcr0 | 1);
883             macvm_set_rip(cpu, rip + ins_len);
884             break;
885         }
886         case EXIT_REASON_INTR_WINDOW:
887             vmx_clear_int_window_exiting(cpu);
888             ret = EXCP_INTERRUPT;
889             break;
890         case EXIT_REASON_NMI_WINDOW:
891             vmx_clear_nmi_window_exiting(cpu);
892             ret = EXCP_INTERRUPT;
893             break;
894         case EXIT_REASON_EXT_INTR:
895             /* force exit and allow io handling */
896             ret = EXCP_INTERRUPT;
897             break;
898         case EXIT_REASON_RDMSR:
899         case EXIT_REASON_WRMSR:
900         {
901             hvf_load_regs(cpu);
902             if (exit_reason == EXIT_REASON_RDMSR) {
903                 hvf_simulate_rdmsr(env);
904             } else {
905                 hvf_simulate_wrmsr(env);
906             }
907             env->eip += ins_len;
908             hvf_store_regs(cpu);
909             break;
910         }
911         case EXIT_REASON_CR_ACCESS: {
912             int cr;
913             int reg;
914 
915             hvf_load_regs(cpu);
916             cr = exit_qual & 15;
917             reg = (exit_qual >> 8) & 15;
918 
919             switch (cr) {
920             case 0x0: {
921                 macvm_set_cr0(cpu->accel->fd, RRX(env, reg));
922                 break;
923             }
924             case 4: {
925                 macvm_set_cr4(cpu->accel->fd, RRX(env, reg));
926                 break;
927             }
928             case 8: {
929                 if (exit_qual & 0x10) {
930                     RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
931                 } else {
932                     int tpr = RRX(env, reg);
933                     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
934                     ret = EXCP_INTERRUPT;
935                 }
936                 break;
937             }
938             default:
939                 error_report("Unrecognized CR %d", cr);
940                 abort();
941             }
942             env->eip += ins_len;
943             hvf_store_regs(cpu);
944             break;
945         }
946         case EXIT_REASON_APIC_ACCESS: { /* TODO */
947             struct x86_decode decode;
948 
949             hvf_load_regs(cpu);
950             decode_instruction(env, &decode);
951             exec_instruction(env, &decode);
952             hvf_store_regs(cpu);
953             break;
954         }
955         case EXIT_REASON_TPR: {
956             ret = 1;
957             break;
958         }
959         case EXIT_REASON_TASK_SWITCH: {
960             uint64_t vinfo = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO);
961             x86_segment_selector sel = {.sel = exit_qual & 0xffff};
962             vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
963              vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
964              & VMCS_INTR_T_MASK);
965             break;
966         }
967         case EXIT_REASON_TRIPLE_FAULT: {
968             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
969             ret = EXCP_INTERRUPT;
970             break;
971         }
972         case EXIT_REASON_RDPMC:
973             wreg(cpu->accel->fd, HV_X86_RAX, 0);
974             wreg(cpu->accel->fd, HV_X86_RDX, 0);
975             macvm_set_rip(cpu, rip + ins_len);
976             break;
977         case VMX_REASON_VMCALL:
978             env->exception_nr = EXCP0D_GPF;
979             env->exception_injected = 1;
980             env->has_error_code = true;
981             env->error_code = 0;
982             break;
983         default:
984             error_report("%llx: unhandled exit %llx", rip, exit_reason);
985         }
986     } while (ret == 0);
987 
988     return ret;
989 }
990 
991 int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp)
992 {
993     return -ENOSYS;
994 }
995 
996 int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp)
997 {
998     return -ENOSYS;
999 }
1000 
1001 int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type)
1002 {
1003     return -ENOSYS;
1004 }
1005 
1006 int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type)
1007 {
1008     return -ENOSYS;
1009 }
1010 
1011 void hvf_arch_remove_all_hw_breakpoints(void)
1012 {
1013 }
1014 
1015 void hvf_arch_update_guest_debug(CPUState *cpu)
1016 {
1017 }
1018 
1019 bool hvf_arch_supports_guest_debug(void)
1020 {
1021     return false;
1022 }
1023