xref: /qemu/target/i386/hvf/hvf.c (revision 444bae08bbdae175b14cc96a11af8640eb262963)
1 /* Copyright 2008 IBM Corporation
2  *           2008 Red Hat, Inc.
3  * Copyright 2011 Intel Corporation
4  * Copyright 2016 Veertu, Inc.
5  * Copyright 2017 The Android Open Source Project
6  *
7  * QEMU Hypervisor.framework support
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of version 2 of the GNU General Public
11  * License as published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, see <http://www.gnu.org/licenses/>.
20  *
21  * This file contain code under public domain from the hvdos project:
22  * https://github.com/mist64/hvdos
23  *
24  * Parts Copyright (c) 2011 NetApp, Inc.
25  * All rights reserved.
26  *
27  * Redistribution and use in source and binary forms, with or without
28  * modification, are permitted provided that the following conditions
29  * are met:
30  * 1. Redistributions of source code must retain the above copyright
31  *    notice, this list of conditions and the following disclaimer.
32  * 2. Redistributions in binary form must reproduce the above copyright
33  *    notice, this list of conditions and the following disclaimer in the
34  *    documentation and/or other materials provided with the distribution.
35  *
36  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46  * SUCH DAMAGE.
47  */
48 
49 #include "qemu/osdep.h"
50 #include "qemu/error-report.h"
51 #include "qemu/memalign.h"
52 #include "qapi/error.h"
53 #include "migration/blocker.h"
54 
55 #include "system/hvf.h"
56 #include "system/hvf_int.h"
57 #include "system/runstate.h"
58 #include "system/cpus.h"
59 #include "hvf-i386.h"
60 #include "vmcs.h"
61 #include "vmx.h"
62 #include "x86.h"
63 #include "x86_descr.h"
64 #include "x86_flags.h"
65 #include "x86_mmu.h"
66 #include "x86_decode.h"
67 #include "x86_emu.h"
68 #include "x86_task.h"
69 #include "x86hvf.h"
70 
71 #include <Hypervisor/hv.h>
72 #include <Hypervisor/hv_vmx.h>
73 #include <sys/sysctl.h>
74 
75 #include "hw/i386/apic_internal.h"
76 #include "qemu/main-loop.h"
77 #include "qemu/accel.h"
78 #include "target/i386/cpu.h"
79 
80 static Error *invtsc_mig_blocker;
81 
82 void vmx_update_tpr(CPUState *cpu)
83 {
84     /* TODO: need integrate APIC handling */
85     X86CPU *x86_cpu = X86_CPU(cpu);
86     int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
87     int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
88 
89     wreg(cpu->accel->fd, HV_X86_TPR, tpr);
90     if (irr == -1) {
91         wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0);
92     } else {
93         wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
94               irr >> 4);
95     }
96 }
97 
98 static void update_apic_tpr(CPUState *cpu)
99 {
100     X86CPU *x86_cpu = X86_CPU(cpu);
101     int tpr = rreg(cpu->accel->fd, HV_X86_TPR) >> 4;
102     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
103 }
104 
105 #define VECTORING_INFO_VECTOR_MASK     0xff
106 
107 void hvf_handle_io(CPUState *env, uint16_t port, void *buffer,
108                   int direction, int size, int count)
109 {
110     int i;
111     uint8_t *ptr = buffer;
112 
113     for (i = 0; i < count; i++) {
114         address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
115                          ptr, size,
116                          direction);
117         ptr += size;
118     }
119 }
120 
121 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
122 {
123     int read, write;
124 
125     /* EPT fault on an instruction fetch doesn't make sense here */
126     if (ept_qual & EPT_VIOLATION_INST_FETCH) {
127         return false;
128     }
129 
130     /* EPT fault must be a read fault or a write fault */
131     read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
132     write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
133     if ((read | write) == 0) {
134         return false;
135     }
136 
137     if (write && slot) {
138         if (slot->flags & HVF_SLOT_LOG) {
139             uint64_t dirty_page_start = gpa & ~(TARGET_PAGE_SIZE - 1u);
140             memory_region_set_dirty(slot->region, gpa - slot->start, 1);
141             hv_vm_protect(dirty_page_start, TARGET_PAGE_SIZE,
142                           HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
143         }
144     }
145 
146     /*
147      * The EPT violation must have been caused by accessing a
148      * guest-physical address that is a translation of a guest-linear
149      * address.
150      */
151     if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
152         (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
153         return false;
154     }
155 
156     if (!slot) {
157         return true;
158     }
159     if (!memory_region_is_ram(slot->region) &&
160         !(read && memory_region_is_romd(slot->region))) {
161         return true;
162     }
163     return false;
164 }
165 
166 void hvf_arch_vcpu_destroy(CPUState *cpu)
167 {
168     X86CPU *x86_cpu = X86_CPU(cpu);
169     CPUX86State *env = &x86_cpu->env;
170 
171     g_free(env->hvf_mmio_buf);
172 }
173 
174 static void init_tsc_freq(CPUX86State *env)
175 {
176     size_t length;
177     uint64_t tsc_freq;
178 
179     if (env->tsc_khz != 0) {
180         return;
181     }
182 
183     length = sizeof(uint64_t);
184     if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) {
185         return;
186     }
187     env->tsc_khz = tsc_freq / 1000;  /* Hz to KHz */
188 }
189 
190 static void init_apic_bus_freq(CPUX86State *env)
191 {
192     size_t length;
193     uint64_t bus_freq;
194 
195     if (env->apic_bus_freq != 0) {
196         return;
197     }
198 
199     length = sizeof(uint64_t);
200     if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) {
201         return;
202     }
203     env->apic_bus_freq = bus_freq;
204 }
205 
206 static inline bool tsc_is_known(CPUX86State *env)
207 {
208     return env->tsc_khz != 0;
209 }
210 
211 static inline bool apic_bus_freq_is_known(CPUX86State *env)
212 {
213     return env->apic_bus_freq != 0;
214 }
215 
216 void hvf_kick_vcpu_thread(CPUState *cpu)
217 {
218     cpus_kick_thread(cpu);
219     hv_vcpu_interrupt(&cpu->accel->fd, 1);
220 }
221 
222 int hvf_arch_init(void)
223 {
224     return 0;
225 }
226 
227 hv_return_t hvf_arch_vm_create(MachineState *ms, uint32_t pa_range)
228 {
229     return hv_vm_create(HV_VM_DEFAULT);
230 }
231 
232 static void hvf_read_segment_descriptor(CPUState *s, struct x86_segment_descriptor *desc,
233                                         X86Seg seg)
234 {
235     struct vmx_segment vmx_segment;
236     vmx_read_segment_descriptor(s, &vmx_segment, seg);
237     vmx_segment_to_x86_descriptor(s, &vmx_segment, desc);
238 }
239 
240 static void hvf_read_mem(CPUState *cpu, void *data, target_ulong gva, int bytes)
241 {
242     vmx_read_mem(cpu, data, gva, bytes);
243 }
244 
245 static const struct x86_emul_ops hvf_x86_emul_ops = {
246     .read_mem = hvf_read_mem,
247     .read_segment_descriptor = hvf_read_segment_descriptor,
248     .handle_io = hvf_handle_io,
249 };
250 
251 int hvf_arch_init_vcpu(CPUState *cpu)
252 {
253     X86CPU *x86cpu = X86_CPU(cpu);
254     CPUX86State *env = &x86cpu->env;
255     Error *local_err = NULL;
256     int r;
257     uint64_t reqCap;
258 
259     init_emu(&hvf_x86_emul_ops);
260     init_decoder();
261 
262     if (hvf_state->hvf_caps == NULL) {
263         hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
264     }
265     env->hvf_mmio_buf = g_new(char, 4096);
266 
267     if (x86cpu->vmware_cpuid_freq) {
268         init_tsc_freq(env);
269         init_apic_bus_freq(env);
270 
271         if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
272             error_report("vmware-cpuid-freq: feature couldn't be enabled");
273         }
274     }
275 
276     if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) &&
277         invtsc_mig_blocker == NULL) {
278         error_setg(&invtsc_mig_blocker,
279                    "State blocked by non-migratable CPU device (invtsc flag)");
280         r = migrate_add_blocker(&invtsc_mig_blocker, &local_err);
281         if (r < 0) {
282             error_report_err(local_err);
283             return r;
284         }
285     }
286 
287 
288     if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
289         &hvf_state->hvf_caps->vmx_cap_pinbased)) {
290         abort();
291     }
292     if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
293         &hvf_state->hvf_caps->vmx_cap_procbased)) {
294         abort();
295     }
296     if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
297         &hvf_state->hvf_caps->vmx_cap_procbased2)) {
298         abort();
299     }
300     if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
301         &hvf_state->hvf_caps->vmx_cap_entry)) {
302         abort();
303     }
304 
305     /* set VMCS control fields */
306     wvmcs(cpu->accel->fd, VMCS_PIN_BASED_CTLS,
307           cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
308                    VMCS_PIN_BASED_CTLS_EXTINT |
309                    VMCS_PIN_BASED_CTLS_NMI |
310                    VMCS_PIN_BASED_CTLS_VNMI));
311     wvmcs(cpu->accel->fd, VMCS_PRI_PROC_BASED_CTLS,
312           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
313                    VMCS_PRI_PROC_BASED_CTLS_HLT |
314                    VMCS_PRI_PROC_BASED_CTLS_MWAIT |
315                    VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
316                    VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
317           VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
318 
319     reqCap = VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES;
320 
321     /* Is RDTSCP support in CPUID?  If so, enable it in the VMCS. */
322     if (hvf_get_supported_cpuid(0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) {
323         reqCap |= VMCS_PRI_PROC_BASED2_CTLS_RDTSCP;
324     }
325 
326     wvmcs(cpu->accel->fd, VMCS_SEC_PROC_BASED_CTLS,
327           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, reqCap));
328 
329     wvmcs(cpu->accel->fd, VMCS_ENTRY_CTLS,
330           cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 0));
331     wvmcs(cpu->accel->fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
332 
333     wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0);
334 
335     x86cpu = X86_CPU(cpu);
336     x86cpu->env.xsave_buf_len = 4096;
337     x86cpu->env.xsave_buf = qemu_memalign(4096, x86cpu->env.xsave_buf_len);
338 
339     /*
340      * The allocated storage must be large enough for all of the
341      * possible XSAVE state components.
342      */
343     assert(hvf_get_supported_cpuid(0xd, 0, R_ECX) <= x86cpu->env.xsave_buf_len);
344 
345     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_STAR, 1);
346     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_LSTAR, 1);
347     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_CSTAR, 1);
348     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FMASK, 1);
349     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FSBASE, 1);
350     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_GSBASE, 1);
351     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_KERNELGSBASE, 1);
352     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_TSC_AUX, 1);
353     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_TSC, 1);
354     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_CS, 1);
355     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_EIP, 1);
356     hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_ESP, 1);
357 
358     return 0;
359 }
360 
361 static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
362 {
363     X86CPU *x86_cpu = X86_CPU(cpu);
364     CPUX86State *env = &x86_cpu->env;
365 
366     env->exception_nr = -1;
367     env->exception_pending = 0;
368     env->exception_injected = 0;
369     env->interrupt_injected = -1;
370     env->nmi_injected = false;
371     env->ins_len = 0;
372     env->has_error_code = false;
373     if (idtvec_info & VMCS_IDT_VEC_VALID) {
374         switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
375         case VMCS_IDT_VEC_HWINTR:
376         case VMCS_IDT_VEC_SWINTR:
377             env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
378             break;
379         case VMCS_IDT_VEC_NMI:
380             env->nmi_injected = true;
381             break;
382         case VMCS_IDT_VEC_HWEXCEPTION:
383         case VMCS_IDT_VEC_SWEXCEPTION:
384             env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
385             env->exception_injected = 1;
386             break;
387         case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
388         default:
389             abort();
390         }
391         if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
392             (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
393             env->ins_len = ins_len;
394         }
395         if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
396             env->has_error_code = true;
397             env->error_code = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_ERROR);
398         }
399     }
400     if ((rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) &
401         VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
402         env->hflags2 |= HF2_NMI_MASK;
403     } else {
404         env->hflags2 &= ~HF2_NMI_MASK;
405     }
406     if (rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) &
407          (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
408          VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
409         env->hflags |= HF_INHIBIT_IRQ_MASK;
410     } else {
411         env->hflags &= ~HF_INHIBIT_IRQ_MASK;
412     }
413 }
414 
415 static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
416                               uint32_t *eax, uint32_t *ebx,
417                               uint32_t *ecx, uint32_t *edx)
418 {
419     /*
420      * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs,
421      * leafs 0x40000001-0x4000000F are filled with zeros
422      * Provides vmware-cpuid-freq support to hvf
423      *
424      * Note: leaf 0x40000000 not exposes HVF,
425      * leaving hypervisor signature empty
426      */
427 
428     if (index < 0x40000000 || index > 0x40000010 ||
429         !tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
430 
431         cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx);
432         return;
433     }
434 
435     switch (index) {
436     case 0x40000000:
437         *eax = 0x40000010;    /* Max available cpuid leaf */
438         *ebx = 0;             /* Leave signature empty */
439         *ecx = 0;
440         *edx = 0;
441         break;
442     case 0x40000010:
443         *eax = env->tsc_khz;
444         *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */
445         *ecx = 0;
446         *edx = 0;
447         break;
448     default:
449         *eax = 0;
450         *ebx = 0;
451         *ecx = 0;
452         *edx = 0;
453         break;
454     }
455 }
456 
457 void hvf_load_regs(CPUState *cs)
458 {
459     X86CPU *cpu = X86_CPU(cs);
460     CPUX86State *env = &cpu->env;
461 
462     int i = 0;
463     RRX(env, R_EAX) = rreg(cs->accel->fd, HV_X86_RAX);
464     RRX(env, R_EBX) = rreg(cs->accel->fd, HV_X86_RBX);
465     RRX(env, R_ECX) = rreg(cs->accel->fd, HV_X86_RCX);
466     RRX(env, R_EDX) = rreg(cs->accel->fd, HV_X86_RDX);
467     RRX(env, R_ESI) = rreg(cs->accel->fd, HV_X86_RSI);
468     RRX(env, R_EDI) = rreg(cs->accel->fd, HV_X86_RDI);
469     RRX(env, R_ESP) = rreg(cs->accel->fd, HV_X86_RSP);
470     RRX(env, R_EBP) = rreg(cs->accel->fd, HV_X86_RBP);
471     for (i = 8; i < 16; i++) {
472         RRX(env, i) = rreg(cs->accel->fd, HV_X86_RAX + i);
473     }
474 
475     env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS);
476     rflags_to_lflags(env);
477     env->eip = rreg(cs->accel->fd, HV_X86_RIP);
478 }
479 
480 void hvf_store_regs(CPUState *cs)
481 {
482     X86CPU *cpu = X86_CPU(cs);
483     CPUX86State *env = &cpu->env;
484 
485     int i = 0;
486     wreg(cs->accel->fd, HV_X86_RAX, RAX(env));
487     wreg(cs->accel->fd, HV_X86_RBX, RBX(env));
488     wreg(cs->accel->fd, HV_X86_RCX, RCX(env));
489     wreg(cs->accel->fd, HV_X86_RDX, RDX(env));
490     wreg(cs->accel->fd, HV_X86_RSI, RSI(env));
491     wreg(cs->accel->fd, HV_X86_RDI, RDI(env));
492     wreg(cs->accel->fd, HV_X86_RBP, RBP(env));
493     wreg(cs->accel->fd, HV_X86_RSP, RSP(env));
494     for (i = 8; i < 16; i++) {
495         wreg(cs->accel->fd, HV_X86_RAX + i, RRX(env, i));
496     }
497 
498     lflags_to_rflags(env);
499     wreg(cs->accel->fd, HV_X86_RFLAGS, env->eflags);
500     macvm_set_rip(cs, env->eip);
501 }
502 
503 void hvf_simulate_rdmsr(CPUX86State *env)
504 {
505     X86CPU *cpu = env_archcpu(env);
506     CPUState *cs = env_cpu(env);
507     uint32_t msr = ECX(env);
508     uint64_t val = 0;
509 
510     switch (msr) {
511     case MSR_IA32_TSC:
512         val = rdtscp() + rvmcs(cs->accel->fd, VMCS_TSC_OFFSET);
513         break;
514     case MSR_IA32_APICBASE:
515         val = cpu_get_apic_base(cpu->apic_state);
516         break;
517     case MSR_APIC_START ... MSR_APIC_END: {
518         int ret;
519         int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
520 
521         ret = apic_msr_read(index, &val);
522         if (ret < 0) {
523             x86_emul_raise_exception(env, EXCP0D_GPF, 0);
524         }
525 
526         break;
527     }
528     case MSR_IA32_UCODE_REV:
529         val = cpu->ucode_rev;
530         break;
531     case MSR_EFER:
532         val = rvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER);
533         break;
534     case MSR_FSBASE:
535         val = rvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE);
536         break;
537     case MSR_GSBASE:
538         val = rvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE);
539         break;
540     case MSR_KERNELGSBASE:
541         val = rvmcs(cs->accel->fd, VMCS_HOST_FS_BASE);
542         break;
543     case MSR_STAR:
544         abort();
545         break;
546     case MSR_LSTAR:
547         abort();
548         break;
549     case MSR_CSTAR:
550         abort();
551         break;
552     case MSR_IA32_MISC_ENABLE:
553         val = env->msr_ia32_misc_enable;
554         break;
555     case MSR_MTRRphysBase(0):
556     case MSR_MTRRphysBase(1):
557     case MSR_MTRRphysBase(2):
558     case MSR_MTRRphysBase(3):
559     case MSR_MTRRphysBase(4):
560     case MSR_MTRRphysBase(5):
561     case MSR_MTRRphysBase(6):
562     case MSR_MTRRphysBase(7):
563         val = env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base;
564         break;
565     case MSR_MTRRphysMask(0):
566     case MSR_MTRRphysMask(1):
567     case MSR_MTRRphysMask(2):
568     case MSR_MTRRphysMask(3):
569     case MSR_MTRRphysMask(4):
570     case MSR_MTRRphysMask(5):
571     case MSR_MTRRphysMask(6):
572     case MSR_MTRRphysMask(7):
573         val = env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask;
574         break;
575     case MSR_MTRRfix64K_00000:
576         val = env->mtrr_fixed[0];
577         break;
578     case MSR_MTRRfix16K_80000:
579     case MSR_MTRRfix16K_A0000:
580         val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1];
581         break;
582     case MSR_MTRRfix4K_C0000:
583     case MSR_MTRRfix4K_C8000:
584     case MSR_MTRRfix4K_D0000:
585     case MSR_MTRRfix4K_D8000:
586     case MSR_MTRRfix4K_E0000:
587     case MSR_MTRRfix4K_E8000:
588     case MSR_MTRRfix4K_F0000:
589     case MSR_MTRRfix4K_F8000:
590         val = env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3];
591         break;
592     case MSR_MTRRdefType:
593         val = env->mtrr_deftype;
594         break;
595     case MSR_CORE_THREAD_COUNT:
596         val = cpu_x86_get_msr_core_thread_count(cpu);
597         break;
598     default:
599         /* fprintf(stderr, "%s: unknown msr 0x%x\n", __func__, msr); */
600         val = 0;
601         break;
602     }
603 
604     RAX(env) = (uint32_t)val;
605     RDX(env) = (uint32_t)(val >> 32);
606 }
607 
608 void hvf_simulate_wrmsr(CPUX86State *env)
609 {
610     X86CPU *cpu = env_archcpu(env);
611     CPUState *cs = env_cpu(env);
612     uint32_t msr = ECX(env);
613     uint64_t data = ((uint64_t)EDX(env) << 32) | EAX(env);
614 
615     switch (msr) {
616     case MSR_IA32_TSC:
617         break;
618     case MSR_IA32_APICBASE: {
619         int r;
620 
621         r = cpu_set_apic_base(cpu->apic_state, data);
622         if (r < 0) {
623             x86_emul_raise_exception(env, EXCP0D_GPF, 0);
624         }
625 
626         break;
627     }
628     case MSR_APIC_START ... MSR_APIC_END: {
629         int ret;
630         int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
631 
632         ret = apic_msr_write(index, data);
633         if (ret < 0) {
634             x86_emul_raise_exception(env, EXCP0D_GPF, 0);
635         }
636 
637         break;
638     }
639     case MSR_FSBASE:
640         wvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE, data);
641         break;
642     case MSR_GSBASE:
643         wvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE, data);
644         break;
645     case MSR_KERNELGSBASE:
646         wvmcs(cs->accel->fd, VMCS_HOST_FS_BASE, data);
647         break;
648     case MSR_STAR:
649         abort();
650         break;
651     case MSR_LSTAR:
652         abort();
653         break;
654     case MSR_CSTAR:
655         abort();
656         break;
657     case MSR_EFER:
658         /*printf("new efer %llx\n", EFER(cs));*/
659         wvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER, data);
660         if (data & MSR_EFER_NXE) {
661             hv_vcpu_invalidate_tlb(cs->accel->fd);
662         }
663         break;
664     case MSR_MTRRphysBase(0):
665     case MSR_MTRRphysBase(1):
666     case MSR_MTRRphysBase(2):
667     case MSR_MTRRphysBase(3):
668     case MSR_MTRRphysBase(4):
669     case MSR_MTRRphysBase(5):
670     case MSR_MTRRphysBase(6):
671     case MSR_MTRRphysBase(7):
672         env->mtrr_var[(ECX(env) - MSR_MTRRphysBase(0)) / 2].base = data;
673         break;
674     case MSR_MTRRphysMask(0):
675     case MSR_MTRRphysMask(1):
676     case MSR_MTRRphysMask(2):
677     case MSR_MTRRphysMask(3):
678     case MSR_MTRRphysMask(4):
679     case MSR_MTRRphysMask(5):
680     case MSR_MTRRphysMask(6):
681     case MSR_MTRRphysMask(7):
682         env->mtrr_var[(ECX(env) - MSR_MTRRphysMask(0)) / 2].mask = data;
683         break;
684     case MSR_MTRRfix64K_00000:
685         env->mtrr_fixed[ECX(env) - MSR_MTRRfix64K_00000] = data;
686         break;
687     case MSR_MTRRfix16K_80000:
688     case MSR_MTRRfix16K_A0000:
689         env->mtrr_fixed[ECX(env) - MSR_MTRRfix16K_80000 + 1] = data;
690         break;
691     case MSR_MTRRfix4K_C0000:
692     case MSR_MTRRfix4K_C8000:
693     case MSR_MTRRfix4K_D0000:
694     case MSR_MTRRfix4K_D8000:
695     case MSR_MTRRfix4K_E0000:
696     case MSR_MTRRfix4K_E8000:
697     case MSR_MTRRfix4K_F0000:
698     case MSR_MTRRfix4K_F8000:
699         env->mtrr_fixed[ECX(env) - MSR_MTRRfix4K_C0000 + 3] = data;
700         break;
701     case MSR_MTRRdefType:
702         env->mtrr_deftype = data;
703         break;
704     default:
705         break;
706     }
707 
708     /* Related to support known hypervisor interface */
709     /* if (g_hypervisor_iface)
710          g_hypervisor_iface->wrmsr_handler(cs, msr, data);
711 
712     printf("write msr %llx\n", RCX(cs));*/
713 }
714 
715 int hvf_vcpu_exec(CPUState *cpu)
716 {
717     X86CPU *x86_cpu = X86_CPU(cpu);
718     CPUX86State *env = &x86_cpu->env;
719     int ret = 0;
720     uint64_t rip = 0;
721 
722     if (hvf_process_events(cpu)) {
723         return EXCP_HLT;
724     }
725 
726     do {
727         if (cpu->accel->dirty) {
728             hvf_put_registers(cpu);
729             cpu->accel->dirty = false;
730         }
731 
732         if (hvf_inject_interrupts(cpu)) {
733             return EXCP_INTERRUPT;
734         }
735         vmx_update_tpr(cpu);
736 
737         bql_unlock();
738         if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
739             bql_lock();
740             return EXCP_HLT;
741         }
742 
743         hv_return_t r = hv_vcpu_run_until(cpu->accel->fd, HV_DEADLINE_FOREVER);
744         assert_hvf_ok(r);
745 
746         /* handle VMEXIT */
747         uint64_t exit_reason = rvmcs(cpu->accel->fd, VMCS_EXIT_REASON);
748         uint64_t exit_qual = rvmcs(cpu->accel->fd, VMCS_EXIT_QUALIFICATION);
749         uint32_t ins_len = (uint32_t)rvmcs(cpu->accel->fd,
750                                            VMCS_EXIT_INSTRUCTION_LENGTH);
751 
752         uint64_t idtvec_info = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO);
753 
754         hvf_store_events(cpu, ins_len, idtvec_info);
755         rip = rreg(cpu->accel->fd, HV_X86_RIP);
756         env->eflags = rreg(cpu->accel->fd, HV_X86_RFLAGS);
757 
758         bql_lock();
759 
760         update_apic_tpr(cpu);
761         current_cpu = cpu;
762 
763         ret = 0;
764         switch (exit_reason) {
765         case EXIT_REASON_HLT: {
766             macvm_set_rip(cpu, rip + ins_len);
767             if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
768                 (env->eflags & IF_MASK))
769                 && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
770                 !(idtvec_info & VMCS_IDT_VEC_VALID)) {
771                 cpu->halted = 1;
772                 ret = EXCP_HLT;
773                 break;
774             }
775             ret = EXCP_INTERRUPT;
776             break;
777         }
778         case EXIT_REASON_MWAIT: {
779             ret = EXCP_INTERRUPT;
780             break;
781         }
782         /* Need to check if MMIO or unmapped fault */
783         case EXIT_REASON_EPT_FAULT:
784         {
785             hvf_slot *slot;
786             uint64_t gpa = rvmcs(cpu->accel->fd, VMCS_GUEST_PHYSICAL_ADDRESS);
787 
788             if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
789                 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
790                 vmx_set_nmi_blocking(cpu);
791             }
792 
793             slot = hvf_find_overlap_slot(gpa, 1);
794             /* mmio */
795             if (ept_emulation_fault(slot, gpa, exit_qual)) {
796                 struct x86_decode decode;
797 
798                 hvf_load_regs(cpu);
799                 decode_instruction(env, &decode);
800                 exec_instruction(env, &decode);
801                 hvf_store_regs(cpu);
802                 break;
803             }
804             break;
805         }
806         case EXIT_REASON_INOUT:
807         {
808             uint32_t in = (exit_qual & 8) != 0;
809             uint32_t size =  (exit_qual & 7) + 1;
810             uint32_t string =  (exit_qual & 16) != 0;
811             uint32_t port =  exit_qual >> 16;
812             /*uint32_t rep = (exit_qual & 0x20) != 0;*/
813 
814             if (!string && in) {
815                 uint64_t val = 0;
816                 hvf_load_regs(cpu);
817                 hvf_handle_io(env_cpu(env), port, &val, 0, size, 1);
818                 if (size == 1) {
819                     AL(env) = val;
820                 } else if (size == 2) {
821                     AX(env) = val;
822                 } else if (size == 4) {
823                     RAX(env) = (uint32_t)val;
824                 } else {
825                     RAX(env) = (uint64_t)val;
826                 }
827                 env->eip += ins_len;
828                 hvf_store_regs(cpu);
829                 break;
830             } else if (!string && !in) {
831                 RAX(env) = rreg(cpu->accel->fd, HV_X86_RAX);
832                 hvf_handle_io(env_cpu(env), port, &RAX(env), 1, size, 1);
833                 macvm_set_rip(cpu, rip + ins_len);
834                 break;
835             }
836             struct x86_decode decode;
837 
838             hvf_load_regs(cpu);
839             decode_instruction(env, &decode);
840             assert(ins_len == decode.len);
841             exec_instruction(env, &decode);
842             hvf_store_regs(cpu);
843 
844             break;
845         }
846         case EXIT_REASON_CPUID: {
847             uint32_t rax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX);
848             uint32_t rbx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RBX);
849             uint32_t rcx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX);
850             uint32_t rdx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX);
851 
852             if (rax == 1) {
853                 /* CPUID1.ecx.OSXSAVE needs to know CR4 */
854                 env->cr[4] = rvmcs(cpu->accel->fd, VMCS_GUEST_CR4);
855             }
856             hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
857 
858             wreg(cpu->accel->fd, HV_X86_RAX, rax);
859             wreg(cpu->accel->fd, HV_X86_RBX, rbx);
860             wreg(cpu->accel->fd, HV_X86_RCX, rcx);
861             wreg(cpu->accel->fd, HV_X86_RDX, rdx);
862 
863             macvm_set_rip(cpu, rip + ins_len);
864             break;
865         }
866         case EXIT_REASON_XSETBV: {
867             uint32_t eax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX);
868             uint32_t ecx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX);
869             uint32_t edx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX);
870 
871             if (ecx) {
872                 macvm_set_rip(cpu, rip + ins_len);
873                 break;
874             }
875             env->xcr0 = ((uint64_t)edx << 32) | eax;
876             wreg(cpu->accel->fd, HV_X86_XCR0, env->xcr0 | 1);
877             macvm_set_rip(cpu, rip + ins_len);
878             break;
879         }
880         case EXIT_REASON_INTR_WINDOW:
881             vmx_clear_int_window_exiting(cpu);
882             ret = EXCP_INTERRUPT;
883             break;
884         case EXIT_REASON_NMI_WINDOW:
885             vmx_clear_nmi_window_exiting(cpu);
886             ret = EXCP_INTERRUPT;
887             break;
888         case EXIT_REASON_EXT_INTR:
889             /* force exit and allow io handling */
890             ret = EXCP_INTERRUPT;
891             break;
892         case EXIT_REASON_RDMSR:
893         case EXIT_REASON_WRMSR:
894         {
895             hvf_load_regs(cpu);
896             if (exit_reason == EXIT_REASON_RDMSR) {
897                 hvf_simulate_rdmsr(env);
898             } else {
899                 hvf_simulate_wrmsr(env);
900             }
901             env->eip += ins_len;
902             hvf_store_regs(cpu);
903             break;
904         }
905         case EXIT_REASON_CR_ACCESS: {
906             int cr;
907             int reg;
908 
909             hvf_load_regs(cpu);
910             cr = exit_qual & 15;
911             reg = (exit_qual >> 8) & 15;
912 
913             switch (cr) {
914             case 0x0: {
915                 macvm_set_cr0(cpu->accel->fd, RRX(env, reg));
916                 break;
917             }
918             case 4: {
919                 macvm_set_cr4(cpu->accel->fd, RRX(env, reg));
920                 break;
921             }
922             case 8: {
923                 if (exit_qual & 0x10) {
924                     RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
925                 } else {
926                     int tpr = RRX(env, reg);
927                     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
928                     ret = EXCP_INTERRUPT;
929                 }
930                 break;
931             }
932             default:
933                 error_report("Unrecognized CR %d", cr);
934                 abort();
935             }
936             env->eip += ins_len;
937             hvf_store_regs(cpu);
938             break;
939         }
940         case EXIT_REASON_APIC_ACCESS: { /* TODO */
941             struct x86_decode decode;
942 
943             hvf_load_regs(cpu);
944             decode_instruction(env, &decode);
945             exec_instruction(env, &decode);
946             hvf_store_regs(cpu);
947             break;
948         }
949         case EXIT_REASON_TPR: {
950             ret = 1;
951             break;
952         }
953         case EXIT_REASON_TASK_SWITCH: {
954             uint64_t vinfo = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO);
955             x86_segment_selector sel = {.sel = exit_qual & 0xffff};
956             vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
957              vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
958              & VMCS_INTR_T_MASK);
959             break;
960         }
961         case EXIT_REASON_TRIPLE_FAULT: {
962             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
963             ret = EXCP_INTERRUPT;
964             break;
965         }
966         case EXIT_REASON_RDPMC:
967             wreg(cpu->accel->fd, HV_X86_RAX, 0);
968             wreg(cpu->accel->fd, HV_X86_RDX, 0);
969             macvm_set_rip(cpu, rip + ins_len);
970             break;
971         case VMX_REASON_VMCALL:
972             env->exception_nr = EXCP0D_GPF;
973             env->exception_injected = 1;
974             env->has_error_code = true;
975             env->error_code = 0;
976             break;
977         default:
978             error_report("%llx: unhandled exit %llx", rip, exit_reason);
979         }
980     } while (ret == 0);
981 
982     return ret;
983 }
984 
985 int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp)
986 {
987     return -ENOSYS;
988 }
989 
990 int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp)
991 {
992     return -ENOSYS;
993 }
994 
995 int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type)
996 {
997     return -ENOSYS;
998 }
999 
1000 int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type)
1001 {
1002     return -ENOSYS;
1003 }
1004 
1005 void hvf_arch_remove_all_hw_breakpoints(void)
1006 {
1007 }
1008 
1009 void hvf_arch_update_guest_debug(CPUState *cpu)
1010 {
1011 }
1012 
1013 bool hvf_arch_supports_guest_debug(void)
1014 {
1015     return false;
1016 }
1017