xref: /qemu/target/i386/whpx/whpx-all.c (revision adff55b520ef9ad2907a91409b152220c1ba8051)
1  /*
2   * QEMU Windows Hypervisor Platform accelerator (WHPX)
3   *
4   * Copyright Microsoft Corp. 2017
5   *
6   * This work is licensed under the terms of the GNU GPL, version 2 or later.
7   * See the COPYING file in the top-level directory.
8   *
9   */
10  
11  #include "qemu/osdep.h"
12  #include "cpu.h"
13  #include "exec/address-spaces.h"
14  #include "exec/ioport.h"
15  #include "gdbstub/helpers.h"
16  #include "qemu/accel.h"
17  #include "sysemu/whpx.h"
18  #include "sysemu/cpus.h"
19  #include "sysemu/runstate.h"
20  #include "qemu/main-loop.h"
21  #include "hw/boards.h"
22  #include "hw/intc/ioapic.h"
23  #include "hw/i386/apic_internal.h"
24  #include "qemu/error-report.h"
25  #include "qapi/error.h"
26  #include "qapi/qapi-types-common.h"
27  #include "qapi/qapi-visit-common.h"
28  #include "migration/blocker.h"
29  #include <winerror.h>
30  
31  #include "whpx-internal.h"
32  #include "whpx-accel-ops.h"
33  
34  #include <winhvplatform.h>
35  #include <winhvemulation.h>
36  
37  #define HYPERV_APIC_BUS_FREQUENCY      (200000000ULL)
38  
39  static const WHV_REGISTER_NAME whpx_register_names[] = {
40  
41      /* X64 General purpose registers */
42      WHvX64RegisterRax,
43      WHvX64RegisterRcx,
44      WHvX64RegisterRdx,
45      WHvX64RegisterRbx,
46      WHvX64RegisterRsp,
47      WHvX64RegisterRbp,
48      WHvX64RegisterRsi,
49      WHvX64RegisterRdi,
50      WHvX64RegisterR8,
51      WHvX64RegisterR9,
52      WHvX64RegisterR10,
53      WHvX64RegisterR11,
54      WHvX64RegisterR12,
55      WHvX64RegisterR13,
56      WHvX64RegisterR14,
57      WHvX64RegisterR15,
58      WHvX64RegisterRip,
59      WHvX64RegisterRflags,
60  
61      /* X64 Segment registers */
62      WHvX64RegisterEs,
63      WHvX64RegisterCs,
64      WHvX64RegisterSs,
65      WHvX64RegisterDs,
66      WHvX64RegisterFs,
67      WHvX64RegisterGs,
68      WHvX64RegisterLdtr,
69      WHvX64RegisterTr,
70  
71      /* X64 Table registers */
72      WHvX64RegisterIdtr,
73      WHvX64RegisterGdtr,
74  
75      /* X64 Control Registers */
76      WHvX64RegisterCr0,
77      WHvX64RegisterCr2,
78      WHvX64RegisterCr3,
79      WHvX64RegisterCr4,
80      WHvX64RegisterCr8,
81  
82      /* X64 Debug Registers */
83      /*
84       * WHvX64RegisterDr0,
85       * WHvX64RegisterDr1,
86       * WHvX64RegisterDr2,
87       * WHvX64RegisterDr3,
88       * WHvX64RegisterDr6,
89       * WHvX64RegisterDr7,
90       */
91  
92      /* X64 Floating Point and Vector Registers */
93      WHvX64RegisterXmm0,
94      WHvX64RegisterXmm1,
95      WHvX64RegisterXmm2,
96      WHvX64RegisterXmm3,
97      WHvX64RegisterXmm4,
98      WHvX64RegisterXmm5,
99      WHvX64RegisterXmm6,
100      WHvX64RegisterXmm7,
101      WHvX64RegisterXmm8,
102      WHvX64RegisterXmm9,
103      WHvX64RegisterXmm10,
104      WHvX64RegisterXmm11,
105      WHvX64RegisterXmm12,
106      WHvX64RegisterXmm13,
107      WHvX64RegisterXmm14,
108      WHvX64RegisterXmm15,
109      WHvX64RegisterFpMmx0,
110      WHvX64RegisterFpMmx1,
111      WHvX64RegisterFpMmx2,
112      WHvX64RegisterFpMmx3,
113      WHvX64RegisterFpMmx4,
114      WHvX64RegisterFpMmx5,
115      WHvX64RegisterFpMmx6,
116      WHvX64RegisterFpMmx7,
117      WHvX64RegisterFpControlStatus,
118      WHvX64RegisterXmmControlStatus,
119  
120      /* X64 MSRs */
121      WHvX64RegisterEfer,
122  #ifdef TARGET_X86_64
123      WHvX64RegisterKernelGsBase,
124  #endif
125      WHvX64RegisterApicBase,
126      /* WHvX64RegisterPat, */
127      WHvX64RegisterSysenterCs,
128      WHvX64RegisterSysenterEip,
129      WHvX64RegisterSysenterEsp,
130      WHvX64RegisterStar,
131  #ifdef TARGET_X86_64
132      WHvX64RegisterLstar,
133      WHvX64RegisterCstar,
134      WHvX64RegisterSfmask,
135  #endif
136  
137      /* Interrupt / Event Registers */
138      /*
139       * WHvRegisterPendingInterruption,
140       * WHvRegisterInterruptState,
141       * WHvRegisterPendingEvent0,
142       * WHvRegisterPendingEvent1
143       * WHvX64RegisterDeliverabilityNotifications,
144       */
145  };
146  
147  struct whpx_register_set {
148      WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
149  };
150  
151  /*
152   * The current implementation of instruction stepping sets the TF flag
153   * in RFLAGS, causing the CPU to raise an INT1 after each instruction.
154   * This corresponds to the WHvX64ExceptionTypeDebugTrapOrFault exception.
155   *
156   * This approach has a few limitations:
157   *     1. Stepping over a PUSHF/SAHF instruction will save the TF flag
158   *        along with the other flags, possibly restoring it later. It would
159   *        result in another INT1 when the flags are restored, triggering
160   *        a stop in gdb that could be cleared by doing another step.
161   *
162   *        Stepping over a POPF/LAHF instruction will let it overwrite the
163   *        TF flags, ending the stepping mode.
164   *
165   *     2. Stepping over an instruction raising an exception (e.g. INT, DIV,
166   *        or anything that could result in a page fault) will save the flags
167   *        to the stack, clear the TF flag, and let the guest execute the
168   *        handler. Normally, the guest will restore the original flags,
169   *        that will continue single-stepping.
170   *
171   *     3. Debuggers running on the guest may wish to set TF to do instruction
172   *        stepping. INT1 events generated by it would be intercepted by us,
173   *        as long as the gdb is connected to QEMU.
174   *
175   * In practice this means that:
176   *     1. Stepping through flags-modifying instructions may cause gdb to
177   *        continue or stop in unexpected places. This will be fully recoverable
178   *        and will not crash the target.
179   *
180   *     2. Stepping over an instruction that triggers an exception will step
181   *        over the exception handler, not into it.
182   *
183   *     3. Debugging the guest via gdb, while running debugger on the guest
184   *        at the same time may lead to unexpected effects. Removing all
185   *        breakpoints set via QEMU will prevent any further interference
186   *        with the guest-level debuggers.
187   *
188   * The limitations can be addressed as shown below:
189   *     1. PUSHF/SAHF/POPF/LAHF/IRET instructions can be emulated instead of
190   *        stepping through them. The exact semantics of the instructions is
191   *        defined in the "Combined Volume Set of Intel 64 and IA-32
192   *        Architectures Software Developer's Manuals", however it involves a
193   *        fair amount of corner cases due to compatibility with real mode,
194   *        virtual 8086 mode, and differences between 64-bit and 32-bit modes.
195   *
196   *     2. We could step into the guest's exception handlers using the following
197   *        sequence:
198   *          a. Temporarily enable catching of all exception types via
199   *             whpx_set_exception_exit_bitmap().
200   *          b. Once an exception is intercepted, read the IDT/GDT and locate
201   *             the original handler.
202   *          c. Patch the original handler, injecting an INT3 at the beginning.
203   *          d. Update the exception exit bitmap to only catch the
204   *             WHvX64ExceptionTypeBreakpointTrap exception.
205   *          e. Let the affected CPU run in the exclusive mode.
206   *          f. Restore the original handler and the exception exit bitmap.
207   *        Note that handling all corner cases related to IDT/GDT is harder
208   *        than it may seem. See x86_cpu_get_phys_page_attrs_debug() for a
209   *        rough idea.
210   *
211   *     3. In order to properly support guest-level debugging in parallel with
212   *        the QEMU-level debugging, we would need to be able to pass some INT1
213   *        events to the guest. This could be done via the following methods:
214   *          a. Using the WHvRegisterPendingEvent register. As of Windows 21H1,
215   *             it seems to only work for interrupts and not software
216   *             exceptions.
217   *          b. Locating and patching the original handler by parsing IDT/GDT.
218   *             This involves relatively complex logic outlined in the previous
219   *             paragraph.
220   *          c. Emulating the exception invocation (i.e. manually updating RIP,
221   *             RFLAGS, and pushing the old values to stack). This is even more
222   *             complicated than the previous option, since it involves checking
223   *             CPL, gate attributes, and doing various adjustments depending
224   *             on the current CPU mode, whether the CPL is changing, etc.
225   */
226  typedef enum WhpxStepMode {
227      WHPX_STEP_NONE = 0,
228      /* Halt other VCPUs */
229      WHPX_STEP_EXCLUSIVE,
230  } WhpxStepMode;
231  
232  struct AccelCPUState {
233      WHV_EMULATOR_HANDLE emulator;
234      bool window_registered;
235      bool interruptable;
236      bool ready_for_pic_interrupt;
237      uint64_t tpr;
238      uint64_t apic_base;
239      bool interruption_pending;
240  
241      /* Must be the last field as it may have a tail */
242      WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
243  };
244  
245  static bool whpx_allowed;
246  static bool whp_dispatch_initialized;
247  static HMODULE hWinHvPlatform, hWinHvEmulation;
248  static uint32_t max_vcpu_index;
249  static WHV_PROCESSOR_XSAVE_FEATURES whpx_xsave_cap;
250  
251  struct whpx_state whpx_global;
252  struct WHPDispatch whp_dispatch;
253  
254  static bool whpx_has_xsave(void)
255  {
256      return whpx_xsave_cap.XsaveSupport;
257  }
258  
259  static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86,
260                                               int r86)
261  {
262      WHV_X64_SEGMENT_REGISTER hs;
263      unsigned flags = qs->flags;
264  
265      hs.Base = qs->base;
266      hs.Limit = qs->limit;
267      hs.Selector = qs->selector;
268  
269      if (v86) {
270          hs.Attributes = 0;
271          hs.SegmentType = 3;
272          hs.Present = 1;
273          hs.DescriptorPrivilegeLevel = 3;
274          hs.NonSystemSegment = 1;
275  
276      } else {
277          hs.Attributes = (flags >> DESC_TYPE_SHIFT);
278  
279          if (r86) {
280              /* hs.Base &= 0xfffff; */
281          }
282      }
283  
284      return hs;
285  }
286  
287  static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
288  {
289      SegmentCache qs;
290  
291      qs.base = hs->Base;
292      qs.limit = hs->Limit;
293      qs.selector = hs->Selector;
294  
295      qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT;
296  
297      return qs;
298  }
299  
300  /* X64 Extended Control Registers */
301  static void whpx_set_xcrs(CPUState *cpu)
302  {
303      CPUX86State *env = cpu_env(cpu);
304      HRESULT hr;
305      struct whpx_state *whpx = &whpx_global;
306      WHV_REGISTER_VALUE xcr0;
307      WHV_REGISTER_NAME xcr0_name = WHvX64RegisterXCr0;
308  
309      if (!whpx_has_xsave()) {
310          return;
311      }
312  
313      /* Only xcr0 is supported by the hypervisor currently */
314      xcr0.Reg64 = env->xcr0;
315      hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
316          whpx->partition, cpu->cpu_index, &xcr0_name, 1, &xcr0);
317      if (FAILED(hr)) {
318          error_report("WHPX: Failed to set register xcr0, hr=%08lx", hr);
319      }
320  }
321  
322  static int whpx_set_tsc(CPUState *cpu)
323  {
324      CPUX86State *env = cpu_env(cpu);
325      WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
326      WHV_REGISTER_VALUE tsc_val;
327      HRESULT hr;
328      struct whpx_state *whpx = &whpx_global;
329  
330      /*
331       * Suspend the partition prior to setting the TSC to reduce the variance
332       * in TSC across vCPUs. When the first vCPU runs post suspend, the
333       * partition is automatically resumed.
334       */
335      if (whp_dispatch.WHvSuspendPartitionTime) {
336  
337          /*
338           * Unable to suspend partition while setting TSC is not a fatal
339           * error. It just increases the likelihood of TSC variance between
340           * vCPUs and some guest OS are able to handle that just fine.
341           */
342          hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
343          if (FAILED(hr)) {
344              warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
345          }
346      }
347  
348      tsc_val.Reg64 = env->tsc;
349      hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
350          whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
351      if (FAILED(hr)) {
352          error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
353          return -1;
354      }
355  
356      return 0;
357  }
358  
359  /*
360   * The CR8 register in the CPU is mapped to the TPR register of the APIC,
361   * however, they use a slightly different encoding. Specifically:
362   *
363   *     APIC.TPR[bits 7:4] = CR8[bits 3:0]
364   *
365   * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64
366   * and IA-32 Architectures Software Developer's Manual.
367   *
368   * The functions below translate the value of CR8 to TPR and vice versa.
369   */
370  
371  static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr)
372  {
373      return tpr >> 4;
374  }
375  
376  static uint64_t whpx_cr8_to_apic_tpr(uint64_t cr8)
377  {
378      return cr8 << 4;
379  }
380  
381  static void whpx_set_registers(CPUState *cpu, int level)
382  {
383      struct whpx_state *whpx = &whpx_global;
384      AccelCPUState *vcpu = cpu->accel;
385      X86CPU *x86_cpu = X86_CPU(cpu);
386      CPUX86State *env = &x86_cpu->env;
387      struct whpx_register_set vcxt;
388      HRESULT hr;
389      int idx;
390      int idx_next;
391      int i;
392      int v86, r86;
393  
394      assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
395  
396      /*
397       * Following MSRs have side effects on the guest or are too heavy for
398       * runtime. Limit them to full state update.
399       */
400      if (level >= WHPX_SET_RESET_STATE) {
401          whpx_set_tsc(cpu);
402      }
403  
404      memset(&vcxt, 0, sizeof(struct whpx_register_set));
405  
406      v86 = (env->eflags & VM_MASK);
407      r86 = !(env->cr[0] & CR0_PE_MASK);
408  
409      vcpu->tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state));
410      vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
411  
412      idx = 0;
413  
414      /* Indexes for first 16 registers match between HV and QEMU definitions */
415      idx_next = 16;
416      for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
417          vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
418      }
419      idx = idx_next;
420  
421      /* Same goes for RIP and RFLAGS */
422      assert(whpx_register_names[idx] == WHvX64RegisterRip);
423      vcxt.values[idx++].Reg64 = env->eip;
424  
425      assert(whpx_register_names[idx] == WHvX64RegisterRflags);
426      vcxt.values[idx++].Reg64 = env->eflags;
427  
428      /* Translate 6+4 segment registers. HV and QEMU order matches  */
429      assert(idx == WHvX64RegisterEs);
430      for (i = 0; i < 6; i += 1, idx += 1) {
431          vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86);
432      }
433  
434      assert(idx == WHvX64RegisterLdtr);
435      vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0);
436  
437      assert(idx == WHvX64RegisterTr);
438      vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0);
439  
440      assert(idx == WHvX64RegisterIdtr);
441      vcxt.values[idx].Table.Base = env->idt.base;
442      vcxt.values[idx].Table.Limit = env->idt.limit;
443      idx += 1;
444  
445      assert(idx == WHvX64RegisterGdtr);
446      vcxt.values[idx].Table.Base = env->gdt.base;
447      vcxt.values[idx].Table.Limit = env->gdt.limit;
448      idx += 1;
449  
450      /* CR0, 2, 3, 4, 8 */
451      assert(whpx_register_names[idx] == WHvX64RegisterCr0);
452      vcxt.values[idx++].Reg64 = env->cr[0];
453      assert(whpx_register_names[idx] == WHvX64RegisterCr2);
454      vcxt.values[idx++].Reg64 = env->cr[2];
455      assert(whpx_register_names[idx] == WHvX64RegisterCr3);
456      vcxt.values[idx++].Reg64 = env->cr[3];
457      assert(whpx_register_names[idx] == WHvX64RegisterCr4);
458      vcxt.values[idx++].Reg64 = env->cr[4];
459      assert(whpx_register_names[idx] == WHvX64RegisterCr8);
460      vcxt.values[idx++].Reg64 = vcpu->tpr;
461  
462      /* 8 Debug Registers - Skipped */
463  
464      /*
465       * Extended control registers needs to be handled separately depending
466       * on whether xsave is supported/enabled or not.
467       */
468      whpx_set_xcrs(cpu);
469  
470      /* 16 XMM registers */
471      assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
472      idx_next = idx + 16;
473      for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
474          vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
475          vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
476      }
477      idx = idx_next;
478  
479      /* 8 FP registers */
480      assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
481      for (i = 0; i < 8; i += 1, idx += 1) {
482          vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0);
483          /* vcxt.values[idx].Fp.AsUINT128.High64 =
484                 env->fpregs[i].mmx.MMX_Q(1);
485          */
486      }
487  
488      /* FP control status register */
489      assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
490      vcxt.values[idx].FpControlStatus.FpControl = env->fpuc;
491      vcxt.values[idx].FpControlStatus.FpStatus =
492          (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
493      vcxt.values[idx].FpControlStatus.FpTag = 0;
494      for (i = 0; i < 8; ++i) {
495          vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i;
496      }
497      vcxt.values[idx].FpControlStatus.Reserved = 0;
498      vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop;
499      vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip;
500      idx += 1;
501  
502      /* XMM control status register */
503      assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
504      vcxt.values[idx].XmmControlStatus.LastFpRdp = 0;
505      vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr;
506      vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff;
507      idx += 1;
508  
509      /* MSRs */
510      assert(whpx_register_names[idx] == WHvX64RegisterEfer);
511      vcxt.values[idx++].Reg64 = env->efer;
512  #ifdef TARGET_X86_64
513      assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
514      vcxt.values[idx++].Reg64 = env->kernelgsbase;
515  #endif
516  
517      assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
518      vcxt.values[idx++].Reg64 = vcpu->apic_base;
519  
520      /* WHvX64RegisterPat - Skipped */
521  
522      assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
523      vcxt.values[idx++].Reg64 = env->sysenter_cs;
524      assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
525      vcxt.values[idx++].Reg64 = env->sysenter_eip;
526      assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
527      vcxt.values[idx++].Reg64 = env->sysenter_esp;
528      assert(whpx_register_names[idx] == WHvX64RegisterStar);
529      vcxt.values[idx++].Reg64 = env->star;
530  #ifdef TARGET_X86_64
531      assert(whpx_register_names[idx] == WHvX64RegisterLstar);
532      vcxt.values[idx++].Reg64 = env->lstar;
533      assert(whpx_register_names[idx] == WHvX64RegisterCstar);
534      vcxt.values[idx++].Reg64 = env->cstar;
535      assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
536      vcxt.values[idx++].Reg64 = env->fmask;
537  #endif
538  
539      /* Interrupt / Event Registers - Skipped */
540  
541      assert(idx == RTL_NUMBER_OF(whpx_register_names));
542  
543      hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
544          whpx->partition, cpu->cpu_index,
545          whpx_register_names,
546          RTL_NUMBER_OF(whpx_register_names),
547          &vcxt.values[0]);
548  
549      if (FAILED(hr)) {
550          error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
551                       hr);
552      }
553  
554      return;
555  }
556  
557  static int whpx_get_tsc(CPUState *cpu)
558  {
559      CPUX86State *env = cpu_env(cpu);
560      WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
561      WHV_REGISTER_VALUE tsc_val;
562      HRESULT hr;
563      struct whpx_state *whpx = &whpx_global;
564  
565      hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
566          whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
567      if (FAILED(hr)) {
568          error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
569          return -1;
570      }
571  
572      env->tsc = tsc_val.Reg64;
573      return 0;
574  }
575  
576  /* X64 Extended Control Registers */
577  static void whpx_get_xcrs(CPUState *cpu)
578  {
579      CPUX86State *env = cpu_env(cpu);
580      HRESULT hr;
581      struct whpx_state *whpx = &whpx_global;
582      WHV_REGISTER_VALUE xcr0;
583      WHV_REGISTER_NAME xcr0_name = WHvX64RegisterXCr0;
584  
585      if (!whpx_has_xsave()) {
586          return;
587      }
588  
589      /* Only xcr0 is supported by the hypervisor currently */
590      hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
591          whpx->partition, cpu->cpu_index, &xcr0_name, 1, &xcr0);
592      if (FAILED(hr)) {
593          error_report("WHPX: Failed to get register xcr0, hr=%08lx", hr);
594          return;
595      }
596  
597      env->xcr0 = xcr0.Reg64;
598  }
599  
600  static void whpx_get_registers(CPUState *cpu)
601  {
602      struct whpx_state *whpx = &whpx_global;
603      AccelCPUState *vcpu = cpu->accel;
604      X86CPU *x86_cpu = X86_CPU(cpu);
605      CPUX86State *env = &x86_cpu->env;
606      struct whpx_register_set vcxt;
607      uint64_t tpr, apic_base;
608      HRESULT hr;
609      int idx;
610      int idx_next;
611      int i;
612  
613      assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
614  
615      if (!env->tsc_valid) {
616          whpx_get_tsc(cpu);
617          env->tsc_valid = !runstate_is_running();
618      }
619  
620      hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
621          whpx->partition, cpu->cpu_index,
622          whpx_register_names,
623          RTL_NUMBER_OF(whpx_register_names),
624          &vcxt.values[0]);
625      if (FAILED(hr)) {
626          error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
627                       hr);
628      }
629  
630      if (whpx_apic_in_platform()) {
631          /*
632           * Fetch the TPR value from the emulated APIC. It may get overwritten
633           * below with the value from CR8 returned by
634           * WHvGetVirtualProcessorRegisters().
635           */
636          whpx_apic_get(x86_cpu->apic_state);
637          vcpu->tpr = whpx_apic_tpr_to_cr8(
638              cpu_get_apic_tpr(x86_cpu->apic_state));
639      }
640  
641      idx = 0;
642  
643      /* Indexes for first 16 registers match between HV and QEMU definitions */
644      idx_next = 16;
645      for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
646          env->regs[idx] = vcxt.values[idx].Reg64;
647      }
648      idx = idx_next;
649  
650      /* Same goes for RIP and RFLAGS */
651      assert(whpx_register_names[idx] == WHvX64RegisterRip);
652      env->eip = vcxt.values[idx++].Reg64;
653      assert(whpx_register_names[idx] == WHvX64RegisterRflags);
654      env->eflags = vcxt.values[idx++].Reg64;
655  
656      /* Translate 6+4 segment registers. HV and QEMU order matches  */
657      assert(idx == WHvX64RegisterEs);
658      for (i = 0; i < 6; i += 1, idx += 1) {
659          env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment);
660      }
661  
662      assert(idx == WHvX64RegisterLdtr);
663      env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment);
664      assert(idx == WHvX64RegisterTr);
665      env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment);
666      assert(idx == WHvX64RegisterIdtr);
667      env->idt.base = vcxt.values[idx].Table.Base;
668      env->idt.limit = vcxt.values[idx].Table.Limit;
669      idx += 1;
670      assert(idx == WHvX64RegisterGdtr);
671      env->gdt.base = vcxt.values[idx].Table.Base;
672      env->gdt.limit = vcxt.values[idx].Table.Limit;
673      idx += 1;
674  
675      /* CR0, 2, 3, 4, 8 */
676      assert(whpx_register_names[idx] == WHvX64RegisterCr0);
677      env->cr[0] = vcxt.values[idx++].Reg64;
678      assert(whpx_register_names[idx] == WHvX64RegisterCr2);
679      env->cr[2] = vcxt.values[idx++].Reg64;
680      assert(whpx_register_names[idx] == WHvX64RegisterCr3);
681      env->cr[3] = vcxt.values[idx++].Reg64;
682      assert(whpx_register_names[idx] == WHvX64RegisterCr4);
683      env->cr[4] = vcxt.values[idx++].Reg64;
684      assert(whpx_register_names[idx] == WHvX64RegisterCr8);
685      tpr = vcxt.values[idx++].Reg64;
686      if (tpr != vcpu->tpr) {
687          vcpu->tpr = tpr;
688          cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(tpr));
689      }
690  
691      /* 8 Debug Registers - Skipped */
692  
693      /*
694       * Extended control registers needs to be handled separately depending
695       * on whether xsave is supported/enabled or not.
696       */
697      whpx_get_xcrs(cpu);
698  
699      /* 16 XMM registers */
700      assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
701      idx_next = idx + 16;
702      for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
703          env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
704          env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
705      }
706      idx = idx_next;
707  
708      /* 8 FP registers */
709      assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
710      for (i = 0; i < 8; i += 1, idx += 1) {
711          env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64;
712          /* env->fpregs[i].mmx.MMX_Q(1) =
713                 vcxt.values[idx].Fp.AsUINT128.High64;
714          */
715      }
716  
717      /* FP control status register */
718      assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
719      env->fpuc = vcxt.values[idx].FpControlStatus.FpControl;
720      env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7;
721      env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800;
722      for (i = 0; i < 8; ++i) {
723          env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1);
724      }
725      env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp;
726      env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip;
727      idx += 1;
728  
729      /* XMM control status register */
730      assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
731      env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl;
732      idx += 1;
733  
734      /* MSRs */
735      assert(whpx_register_names[idx] == WHvX64RegisterEfer);
736      env->efer = vcxt.values[idx++].Reg64;
737  #ifdef TARGET_X86_64
738      assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
739      env->kernelgsbase = vcxt.values[idx++].Reg64;
740  #endif
741  
742      assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
743      apic_base = vcxt.values[idx++].Reg64;
744      if (apic_base != vcpu->apic_base) {
745          vcpu->apic_base = apic_base;
746          cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base);
747      }
748  
749      /* WHvX64RegisterPat - Skipped */
750  
751      assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
752      env->sysenter_cs = vcxt.values[idx++].Reg64;
753      assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
754      env->sysenter_eip = vcxt.values[idx++].Reg64;
755      assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
756      env->sysenter_esp = vcxt.values[idx++].Reg64;
757      assert(whpx_register_names[idx] == WHvX64RegisterStar);
758      env->star = vcxt.values[idx++].Reg64;
759  #ifdef TARGET_X86_64
760      assert(whpx_register_names[idx] == WHvX64RegisterLstar);
761      env->lstar = vcxt.values[idx++].Reg64;
762      assert(whpx_register_names[idx] == WHvX64RegisterCstar);
763      env->cstar = vcxt.values[idx++].Reg64;
764      assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
765      env->fmask = vcxt.values[idx++].Reg64;
766  #endif
767  
768      /* Interrupt / Event Registers - Skipped */
769  
770      assert(idx == RTL_NUMBER_OF(whpx_register_names));
771  
772      if (whpx_apic_in_platform()) {
773          whpx_apic_get(x86_cpu->apic_state);
774      }
775  
776      x86_update_hflags(env);
777  
778      return;
779  }
780  
781  static HRESULT CALLBACK whpx_emu_ioport_callback(
782      void *ctx,
783      WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
784  {
785      MemTxAttrs attrs = { 0 };
786      address_space_rw(&address_space_io, IoAccess->Port, attrs,
787                       &IoAccess->Data, IoAccess->AccessSize,
788                       IoAccess->Direction);
789      return S_OK;
790  }
791  
792  static HRESULT CALLBACK whpx_emu_mmio_callback(
793      void *ctx,
794      WHV_EMULATOR_MEMORY_ACCESS_INFO *ma)
795  {
796      cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize,
797                             ma->Direction);
798      return S_OK;
799  }
800  
801  static HRESULT CALLBACK whpx_emu_getreg_callback(
802      void *ctx,
803      const WHV_REGISTER_NAME *RegisterNames,
804      UINT32 RegisterCount,
805      WHV_REGISTER_VALUE *RegisterValues)
806  {
807      HRESULT hr;
808      struct whpx_state *whpx = &whpx_global;
809      CPUState *cpu = (CPUState *)ctx;
810  
811      hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
812          whpx->partition, cpu->cpu_index,
813          RegisterNames, RegisterCount,
814          RegisterValues);
815      if (FAILED(hr)) {
816          error_report("WHPX: Failed to get virtual processor registers,"
817                       " hr=%08lx", hr);
818      }
819  
820      return hr;
821  }
822  
823  static HRESULT CALLBACK whpx_emu_setreg_callback(
824      void *ctx,
825      const WHV_REGISTER_NAME *RegisterNames,
826      UINT32 RegisterCount,
827      const WHV_REGISTER_VALUE *RegisterValues)
828  {
829      HRESULT hr;
830      struct whpx_state *whpx = &whpx_global;
831      CPUState *cpu = (CPUState *)ctx;
832  
833      hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
834          whpx->partition, cpu->cpu_index,
835          RegisterNames, RegisterCount,
836          RegisterValues);
837      if (FAILED(hr)) {
838          error_report("WHPX: Failed to set virtual processor registers,"
839                       " hr=%08lx", hr);
840      }
841  
842      /*
843       * The emulator just successfully wrote the register state. We clear the
844       * dirty state so we avoid the double write on resume of the VP.
845       */
846      cpu->vcpu_dirty = false;
847  
848      return hr;
849  }
850  
851  static HRESULT CALLBACK whpx_emu_translate_callback(
852      void *ctx,
853      WHV_GUEST_VIRTUAL_ADDRESS Gva,
854      WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
855      WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult,
856      WHV_GUEST_PHYSICAL_ADDRESS *Gpa)
857  {
858      HRESULT hr;
859      struct whpx_state *whpx = &whpx_global;
860      CPUState *cpu = (CPUState *)ctx;
861      WHV_TRANSLATE_GVA_RESULT res;
862  
863      hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
864                                        Gva, TranslateFlags, &res, Gpa);
865      if (FAILED(hr)) {
866          error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
867      } else {
868          *TranslationResult = res.ResultCode;
869      }
870  
871      return hr;
872  }
873  
874  static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = {
875      .Size = sizeof(WHV_EMULATOR_CALLBACKS),
876      .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback,
877      .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback,
878      .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback,
879      .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback,
880      .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback,
881  };
882  
883  static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
884  {
885      HRESULT hr;
886      AccelCPUState *vcpu = cpu->accel;
887      WHV_EMULATOR_STATUS emu_status;
888  
889      hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
890          vcpu->emulator, cpu,
891          &vcpu->exit_ctx.VpContext, ctx,
892          &emu_status);
893      if (FAILED(hr)) {
894          error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
895          return -1;
896      }
897  
898      if (!emu_status.EmulationSuccessful) {
899          error_report("WHPX: Failed to emulate MMIO access with"
900                       " EmulatorReturnStatus: %u", emu_status.AsUINT32);
901          return -1;
902      }
903  
904      return 0;
905  }
906  
907  static int whpx_handle_portio(CPUState *cpu,
908                                WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx)
909  {
910      HRESULT hr;
911      AccelCPUState *vcpu = cpu->accel;
912      WHV_EMULATOR_STATUS emu_status;
913  
914      hr = whp_dispatch.WHvEmulatorTryIoEmulation(
915          vcpu->emulator, cpu,
916          &vcpu->exit_ctx.VpContext, ctx,
917          &emu_status);
918      if (FAILED(hr)) {
919          error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
920          return -1;
921      }
922  
923      if (!emu_status.EmulationSuccessful) {
924          error_report("WHPX: Failed to emulate PortIO access with"
925                       " EmulatorReturnStatus: %u", emu_status.AsUINT32);
926          return -1;
927      }
928  
929      return 0;
930  }
931  
932  /*
933   * Controls whether we should intercept various exceptions on the guest,
934   * namely breakpoint/single-step events.
935   *
936   * The 'exceptions' argument accepts a bitmask, e.g:
937   * (1 << WHvX64ExceptionTypeDebugTrapOrFault) | (...)
938   */
939  static HRESULT whpx_set_exception_exit_bitmap(UINT64 exceptions)
940  {
941      struct whpx_state *whpx = &whpx_global;
942      WHV_PARTITION_PROPERTY prop = { 0, };
943      HRESULT hr;
944  
945      if (exceptions == whpx->exception_exit_bitmap) {
946          return S_OK;
947      }
948  
949      prop.ExceptionExitBitmap = exceptions;
950  
951      hr = whp_dispatch.WHvSetPartitionProperty(
952          whpx->partition,
953          WHvPartitionPropertyCodeExceptionExitBitmap,
954          &prop,
955          sizeof(WHV_PARTITION_PROPERTY));
956  
957      if (SUCCEEDED(hr)) {
958          whpx->exception_exit_bitmap = exceptions;
959      }
960  
961      return hr;
962  }
963  
964  
965  /*
966   * This function is called before/after stepping over a single instruction.
967   * It will update the CPU registers to arm/disarm the instruction stepping
968   * accordingly.
969   */
970  static HRESULT whpx_vcpu_configure_single_stepping(CPUState *cpu,
971      bool set,
972      uint64_t *exit_context_rflags)
973  {
974      WHV_REGISTER_NAME reg_name;
975      WHV_REGISTER_VALUE reg_value;
976      HRESULT hr;
977      struct whpx_state *whpx = &whpx_global;
978  
979      /*
980       * If we are trying to step over a single instruction, we need to set the
981       * TF bit in rflags. Otherwise, clear it.
982       */
983      reg_name = WHvX64RegisterRflags;
984      hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
985          whpx->partition,
986          cpu->cpu_index,
987          &reg_name,
988          1,
989          &reg_value);
990  
991      if (FAILED(hr)) {
992          error_report("WHPX: Failed to get rflags, hr=%08lx", hr);
993          return hr;
994      }
995  
996      if (exit_context_rflags) {
997          assert(*exit_context_rflags == reg_value.Reg64);
998      }
999  
1000      if (set) {
1001          /* Raise WHvX64ExceptionTypeDebugTrapOrFault after each instruction */
1002          reg_value.Reg64 |= TF_MASK;
1003      } else {
1004          reg_value.Reg64 &= ~TF_MASK;
1005      }
1006  
1007      if (exit_context_rflags) {
1008          *exit_context_rflags = reg_value.Reg64;
1009      }
1010  
1011      hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1012          whpx->partition,
1013          cpu->cpu_index,
1014          &reg_name,
1015          1,
1016          &reg_value);
1017  
1018      if (FAILED(hr)) {
1019          error_report("WHPX: Failed to set rflags,"
1020              " hr=%08lx",
1021              hr);
1022          return hr;
1023      }
1024  
1025      reg_name = WHvRegisterInterruptState;
1026      reg_value.Reg64 = 0;
1027  
1028      /* Suspend delivery of hardware interrupts during single-stepping. */
1029      reg_value.InterruptState.InterruptShadow = set != 0;
1030  
1031      hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1032      whpx->partition,
1033          cpu->cpu_index,
1034          &reg_name,
1035          1,
1036          &reg_value);
1037  
1038      if (FAILED(hr)) {
1039          error_report("WHPX: Failed to set InterruptState,"
1040              " hr=%08lx",
1041              hr);
1042          return hr;
1043      }
1044  
1045      if (!set) {
1046          /*
1047           * We have just finished stepping over a single instruction,
1048           * and intercepted the INT1 generated by it.
1049           * We need to now hide the INT1 from the guest,
1050           * as it would not be expecting it.
1051           */
1052  
1053          reg_name = WHvX64RegisterPendingDebugException;
1054          hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
1055          whpx->partition,
1056              cpu->cpu_index,
1057              &reg_name,
1058              1,
1059              &reg_value);
1060  
1061          if (FAILED(hr)) {
1062              error_report("WHPX: Failed to get pending debug exceptions,"
1063                           "hr=%08lx", hr);
1064              return hr;
1065          }
1066  
1067          if (reg_value.PendingDebugException.SingleStep) {
1068              reg_value.PendingDebugException.SingleStep = 0;
1069  
1070              hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1071                  whpx->partition,
1072                  cpu->cpu_index,
1073                  &reg_name,
1074                  1,
1075                  &reg_value);
1076  
1077              if (FAILED(hr)) {
1078                  error_report("WHPX: Failed to clear pending debug exceptions,"
1079                               "hr=%08lx", hr);
1080               return hr;
1081              }
1082          }
1083  
1084      }
1085  
1086      return S_OK;
1087  }
1088  
1089  /* Tries to find a breakpoint at the specified address. */
1090  static struct whpx_breakpoint *whpx_lookup_breakpoint_by_addr(uint64_t address)
1091  {
1092      struct whpx_state *whpx = &whpx_global;
1093      int i;
1094  
1095      if (whpx->breakpoints.breakpoints) {
1096          for (i = 0; i < whpx->breakpoints.breakpoints->used; i++) {
1097              if (address == whpx->breakpoints.breakpoints->data[i].address) {
1098                  return &whpx->breakpoints.breakpoints->data[i];
1099              }
1100          }
1101      }
1102  
1103      return NULL;
1104  }
1105  
1106  /*
1107   * Linux uses int3 (0xCC) during startup (see int3_selftest()) and for
1108   * debugging user-mode applications. Since the WHPX API does not offer
1109   * an easy way to pass the intercepted exception back to the guest, we
1110   * resort to using INT1 instead, and let the guest always handle INT3.
1111   */
1112  static const uint8_t whpx_breakpoint_instruction = 0xF1;
1113  
1114  /*
1115   * The WHPX QEMU backend implements breakpoints by writing the INT1
1116   * instruction into memory (ignoring the DRx registers). This raises a few
1117   * issues that need to be carefully handled:
1118   *
1119   * 1. Although unlikely, other parts of QEMU may set multiple breakpoints
1120   *    at the same location, and later remove them in arbitrary order.
1121   *    This should not cause memory corruption, and should only remove the
1122   *    physical breakpoint instruction when the last QEMU breakpoint is gone.
1123   *
1124   * 2. Writing arbitrary virtual memory may fail if it's not mapped to a valid
1125   *    physical location. Hence, physically adding/removing a breakpoint can
1126   *    theoretically fail at any time. We need to keep track of it.
1127   *
1128   * The function below rebuilds a list of low-level breakpoints (one per
1129   * address, tracking the original instruction and any errors) from the list of
1130   * high-level breakpoints (set via cpu_breakpoint_insert()).
1131   *
1132   * In order to optimize performance, this function stores the list of
1133   * high-level breakpoints (a.k.a. CPU breakpoints) used to compute the
1134   * low-level ones, so that it won't be re-invoked until these breakpoints
1135   * change.
1136   *
1137   * Note that this function decides which breakpoints should be inserted into,
1138   * memory, but doesn't actually do it. The memory accessing is done in
1139   * whpx_apply_breakpoints().
1140   */
1141  static void whpx_translate_cpu_breakpoints(
1142      struct whpx_breakpoints *breakpoints,
1143      CPUState *cpu,
1144      int cpu_breakpoint_count)
1145  {
1146      CPUBreakpoint *bp;
1147      int cpu_bp_index = 0;
1148  
1149      breakpoints->original_addresses =
1150          g_renew(vaddr, breakpoints->original_addresses, cpu_breakpoint_count);
1151  
1152      breakpoints->original_address_count = cpu_breakpoint_count;
1153  
1154      int max_breakpoints = cpu_breakpoint_count +
1155          (breakpoints->breakpoints ? breakpoints->breakpoints->used : 0);
1156  
1157      struct whpx_breakpoint_collection *new_breakpoints =
1158          g_malloc0(sizeof(struct whpx_breakpoint_collection)
1159                    + max_breakpoints * sizeof(struct whpx_breakpoint));
1160  
1161      new_breakpoints->allocated = max_breakpoints;
1162      new_breakpoints->used = 0;
1163  
1164      /*
1165       * 1. Preserve all old breakpoints that could not be automatically
1166       * cleared when the CPU got stopped.
1167       */
1168      if (breakpoints->breakpoints) {
1169          int i;
1170          for (i = 0; i < breakpoints->breakpoints->used; i++) {
1171              if (breakpoints->breakpoints->data[i].state != WHPX_BP_CLEARED) {
1172                  new_breakpoints->data[new_breakpoints->used++] =
1173                      breakpoints->breakpoints->data[i];
1174              }
1175          }
1176      }
1177  
1178      /* 2. Map all CPU breakpoints to WHPX breakpoints */
1179      QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
1180          int i;
1181          bool found = false;
1182  
1183          /* This will be used to detect changed CPU breakpoints later. */
1184          breakpoints->original_addresses[cpu_bp_index++] = bp->pc;
1185  
1186          for (i = 0; i < new_breakpoints->used; i++) {
1187              /*
1188               * WARNING: This loop has O(N^2) complexity, where N is the
1189               * number of breakpoints. It should not be a bottleneck in
1190               * real-world scenarios, since it only needs to run once after
1191               * the breakpoints have been modified.
1192               * If this ever becomes a concern, it can be optimized by storing
1193               * high-level breakpoint objects in a tree or hash map.
1194               */
1195  
1196              if (new_breakpoints->data[i].address == bp->pc) {
1197                  /* There was already a breakpoint at this address. */
1198                  if (new_breakpoints->data[i].state == WHPX_BP_CLEAR_PENDING) {
1199                      new_breakpoints->data[i].state = WHPX_BP_SET;
1200                  } else if (new_breakpoints->data[i].state == WHPX_BP_SET) {
1201                      new_breakpoints->data[i].state = WHPX_BP_SET_PENDING;
1202                  }
1203  
1204                  found = true;
1205                  break;
1206              }
1207          }
1208  
1209          if (!found && new_breakpoints->used < new_breakpoints->allocated) {
1210              /* No WHPX breakpoint at this address. Create one. */
1211              new_breakpoints->data[new_breakpoints->used].address = bp->pc;
1212              new_breakpoints->data[new_breakpoints->used].state =
1213                  WHPX_BP_SET_PENDING;
1214              new_breakpoints->used++;
1215          }
1216      }
1217  
1218      /*
1219       * Free the previous breakpoint list. This can be optimized by keeping
1220       * it as shadow buffer for the next computation instead of freeing
1221       * it immediately.
1222       */
1223      g_free(breakpoints->breakpoints);
1224  
1225      breakpoints->breakpoints = new_breakpoints;
1226  }
1227  
1228  /*
1229   * Physically inserts/removes the breakpoints by reading and writing the
1230   * physical memory, keeping a track of the failed attempts.
1231   *
1232   * Passing resuming=true  will try to set all previously unset breakpoints.
1233   * Passing resuming=false will remove all inserted ones.
1234   */
1235  static void whpx_apply_breakpoints(
1236      struct whpx_breakpoint_collection *breakpoints,
1237      CPUState *cpu,
1238      bool resuming)
1239  {
1240      int i, rc;
1241      if (!breakpoints) {
1242          return;
1243      }
1244  
1245      for (i = 0; i < breakpoints->used; i++) {
1246          /* Decide what to do right now based on the last known state. */
1247          WhpxBreakpointState state = breakpoints->data[i].state;
1248          switch (state) {
1249          case WHPX_BP_CLEARED:
1250              if (resuming) {
1251                  state = WHPX_BP_SET_PENDING;
1252              }
1253              break;
1254          case WHPX_BP_SET_PENDING:
1255              if (!resuming) {
1256                  state = WHPX_BP_CLEARED;
1257              }
1258              break;
1259          case WHPX_BP_SET:
1260              if (!resuming) {
1261                  state = WHPX_BP_CLEAR_PENDING;
1262              }
1263              break;
1264          case WHPX_BP_CLEAR_PENDING:
1265              if (resuming) {
1266                  state = WHPX_BP_SET;
1267              }
1268              break;
1269          }
1270  
1271          if (state == WHPX_BP_SET_PENDING) {
1272              /* Remember the original instruction. */
1273              rc = cpu_memory_rw_debug(cpu,
1274                  breakpoints->data[i].address,
1275                  &breakpoints->data[i].original_instruction,
1276                  1,
1277                  false);
1278  
1279              if (!rc) {
1280                  /* Write the breakpoint instruction. */
1281                  rc = cpu_memory_rw_debug(cpu,
1282                      breakpoints->data[i].address,
1283                      (void *)&whpx_breakpoint_instruction,
1284                      1,
1285                      true);
1286              }
1287  
1288              if (!rc) {
1289                  state = WHPX_BP_SET;
1290              }
1291  
1292          }
1293  
1294          if (state == WHPX_BP_CLEAR_PENDING) {
1295              /* Restore the original instruction. */
1296              rc = cpu_memory_rw_debug(cpu,
1297                  breakpoints->data[i].address,
1298                  &breakpoints->data[i].original_instruction,
1299                  1,
1300                  true);
1301  
1302              if (!rc) {
1303                  state = WHPX_BP_CLEARED;
1304              }
1305          }
1306  
1307          breakpoints->data[i].state = state;
1308      }
1309  }
1310  
1311  /*
1312   * This function is called when the a VCPU is about to start and no other
1313   * VCPUs have been started so far. Since the VCPU start order could be
1314   * arbitrary, it doesn't have to be VCPU#0.
1315   *
1316   * It is used to commit the breakpoints into memory, and configure WHPX
1317   * to intercept debug exceptions.
1318   *
1319   * Note that whpx_set_exception_exit_bitmap() cannot be called if one or
1320   * more VCPUs are already running, so this is the best place to do it.
1321   */
1322  static int whpx_first_vcpu_starting(CPUState *cpu)
1323  {
1324      struct whpx_state *whpx = &whpx_global;
1325      HRESULT hr;
1326  
1327      g_assert(qemu_mutex_iothread_locked());
1328  
1329      if (!QTAILQ_EMPTY(&cpu->breakpoints) ||
1330              (whpx->breakpoints.breakpoints &&
1331               whpx->breakpoints.breakpoints->used)) {
1332          CPUBreakpoint *bp;
1333          int i = 0;
1334          bool update_pending = false;
1335  
1336          QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
1337              if (i >= whpx->breakpoints.original_address_count ||
1338                  bp->pc != whpx->breakpoints.original_addresses[i]) {
1339                  update_pending = true;
1340              }
1341  
1342              i++;
1343          }
1344  
1345          if (i != whpx->breakpoints.original_address_count) {
1346              update_pending = true;
1347          }
1348  
1349          if (update_pending) {
1350              /*
1351               * The CPU breakpoints have changed since the last call to
1352               * whpx_translate_cpu_breakpoints(). WHPX breakpoints must
1353               * now be recomputed.
1354               */
1355              whpx_translate_cpu_breakpoints(&whpx->breakpoints, cpu, i);
1356          }
1357  
1358          /* Actually insert the breakpoints into the memory. */
1359          whpx_apply_breakpoints(whpx->breakpoints.breakpoints, cpu, true);
1360      }
1361  
1362      uint64_t exception_mask;
1363      if (whpx->step_pending ||
1364          (whpx->breakpoints.breakpoints &&
1365           whpx->breakpoints.breakpoints->used)) {
1366          /*
1367           * We are either attempting to single-step one or more CPUs, or
1368           * have one or more breakpoints enabled. Both require intercepting
1369           * the WHvX64ExceptionTypeBreakpointTrap exception.
1370           */
1371  
1372          exception_mask = 1UL << WHvX64ExceptionTypeDebugTrapOrFault;
1373      } else {
1374          /* Let the guest handle all exceptions. */
1375          exception_mask = 0;
1376      }
1377  
1378      hr = whpx_set_exception_exit_bitmap(exception_mask);
1379      if (!SUCCEEDED(hr)) {
1380          error_report("WHPX: Failed to update exception exit mask,"
1381                       "hr=%08lx.", hr);
1382          return 1;
1383      }
1384  
1385      return 0;
1386  }
1387  
1388  /*
1389   * This function is called when the last VCPU has finished running.
1390   * It is used to remove any previously set breakpoints from memory.
1391   */
1392  static int whpx_last_vcpu_stopping(CPUState *cpu)
1393  {
1394      whpx_apply_breakpoints(whpx_global.breakpoints.breakpoints, cpu, false);
1395      return 0;
1396  }
1397  
1398  /* Returns the address of the next instruction that is about to be executed. */
1399  static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid)
1400  {
1401      if (cpu->vcpu_dirty) {
1402          /* The CPU registers have been modified by other parts of QEMU. */
1403          CPUArchState *env = cpu_env(cpu);
1404          return env->eip;
1405      } else if (exit_context_valid) {
1406          /*
1407           * The CPU registers have not been modified by neither other parts
1408           * of QEMU, nor this port by calling WHvSetVirtualProcessorRegisters().
1409           * This is the most common case.
1410           */
1411          AccelCPUState *vcpu = cpu->accel;
1412          return vcpu->exit_ctx.VpContext.Rip;
1413      } else {
1414          /*
1415           * The CPU registers have been modified by a call to
1416           * WHvSetVirtualProcessorRegisters() and must be re-queried from
1417           * the target.
1418           */
1419          WHV_REGISTER_VALUE reg_value;
1420          WHV_REGISTER_NAME reg_name = WHvX64RegisterRip;
1421          HRESULT hr;
1422          struct whpx_state *whpx = &whpx_global;
1423  
1424          hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
1425              whpx->partition,
1426              cpu->cpu_index,
1427              &reg_name,
1428              1,
1429              &reg_value);
1430  
1431          if (FAILED(hr)) {
1432              error_report("WHPX: Failed to get PC, hr=%08lx", hr);
1433              return 0;
1434          }
1435  
1436          return reg_value.Reg64;
1437      }
1438  }
1439  
1440  static int whpx_handle_halt(CPUState *cpu)
1441  {
1442      CPUX86State *env = cpu_env(cpu);
1443      int ret = 0;
1444  
1445      qemu_mutex_lock_iothread();
1446      if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
1447            (env->eflags & IF_MASK)) &&
1448          !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
1449          cpu->exception_index = EXCP_HLT;
1450          cpu->halted = true;
1451          ret = 1;
1452      }
1453      qemu_mutex_unlock_iothread();
1454  
1455      return ret;
1456  }
1457  
1458  static void whpx_vcpu_pre_run(CPUState *cpu)
1459  {
1460      HRESULT hr;
1461      struct whpx_state *whpx = &whpx_global;
1462      AccelCPUState *vcpu = cpu->accel;
1463      X86CPU *x86_cpu = X86_CPU(cpu);
1464      CPUX86State *env = &x86_cpu->env;
1465      int irq;
1466      uint8_t tpr;
1467      WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
1468      UINT32 reg_count = 0;
1469      WHV_REGISTER_VALUE reg_values[3];
1470      WHV_REGISTER_NAME reg_names[3];
1471  
1472      memset(&new_int, 0, sizeof(new_int));
1473      memset(reg_values, 0, sizeof(reg_values));
1474  
1475      qemu_mutex_lock_iothread();
1476  
1477      /* Inject NMI */
1478      if (!vcpu->interruption_pending &&
1479          cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
1480          if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
1481              cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
1482              vcpu->interruptable = false;
1483              new_int.InterruptionType = WHvX64PendingNmi;
1484              new_int.InterruptionPending = 1;
1485              new_int.InterruptionVector = 2;
1486          }
1487          if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
1488              cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
1489          }
1490      }
1491  
1492      /*
1493       * Force the VCPU out of its inner loop to process any INIT requests or
1494       * commit pending TPR access.
1495       */
1496      if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
1497          if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
1498              !(env->hflags & HF_SMM_MASK)) {
1499              cpu->exit_request = 1;
1500          }
1501          if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
1502              cpu->exit_request = 1;
1503          }
1504      }
1505  
1506      /* Get pending hard interruption or replay one that was overwritten */
1507      if (!whpx_apic_in_platform()) {
1508          if (!vcpu->interruption_pending &&
1509              vcpu->interruptable && (env->eflags & IF_MASK)) {
1510              assert(!new_int.InterruptionPending);
1511              if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
1512                  cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
1513                  irq = cpu_get_pic_interrupt(env);
1514                  if (irq >= 0) {
1515                      new_int.InterruptionType = WHvX64PendingInterrupt;
1516                      new_int.InterruptionPending = 1;
1517                      new_int.InterruptionVector = irq;
1518                  }
1519              }
1520          }
1521  
1522          /* Setup interrupt state if new one was prepared */
1523          if (new_int.InterruptionPending) {
1524              reg_values[reg_count].PendingInterruption = new_int;
1525              reg_names[reg_count] = WHvRegisterPendingInterruption;
1526              reg_count += 1;
1527          }
1528      } else if (vcpu->ready_for_pic_interrupt &&
1529                 (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
1530          cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
1531          irq = cpu_get_pic_interrupt(env);
1532          if (irq >= 0) {
1533              reg_names[reg_count] = WHvRegisterPendingEvent;
1534              reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT)
1535              {
1536                  .EventPending = 1,
1537                  .EventType = WHvX64PendingEventExtInt,
1538                  .Vector = irq,
1539              };
1540              reg_count += 1;
1541          }
1542       }
1543  
1544      /* Sync the TPR to the CR8 if was modified during the intercept */
1545      tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state));
1546      if (tpr != vcpu->tpr) {
1547          vcpu->tpr = tpr;
1548          reg_values[reg_count].Reg64 = tpr;
1549          cpu->exit_request = 1;
1550          reg_names[reg_count] = WHvX64RegisterCr8;
1551          reg_count += 1;
1552      }
1553  
1554      /* Update the state of the interrupt delivery notification */
1555      if (!vcpu->window_registered &&
1556          cpu->interrupt_request & CPU_INTERRUPT_HARD) {
1557          reg_values[reg_count].DeliverabilityNotifications =
1558              (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) {
1559                  .InterruptNotification = 1
1560              };
1561          vcpu->window_registered = 1;
1562          reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications;
1563          reg_count += 1;
1564      }
1565  
1566      qemu_mutex_unlock_iothread();
1567      vcpu->ready_for_pic_interrupt = false;
1568  
1569      if (reg_count) {
1570          hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1571              whpx->partition, cpu->cpu_index,
1572              reg_names, reg_count, reg_values);
1573          if (FAILED(hr)) {
1574              error_report("WHPX: Failed to set interrupt state registers,"
1575                           " hr=%08lx", hr);
1576          }
1577      }
1578  
1579      return;
1580  }
1581  
1582  static void whpx_vcpu_post_run(CPUState *cpu)
1583  {
1584      AccelCPUState *vcpu = cpu->accel;
1585      X86CPU *x86_cpu = X86_CPU(cpu);
1586      CPUX86State *env = &x86_cpu->env;
1587  
1588      env->eflags = vcpu->exit_ctx.VpContext.Rflags;
1589  
1590      uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8;
1591      if (vcpu->tpr != tpr) {
1592          vcpu->tpr = tpr;
1593          qemu_mutex_lock_iothread();
1594          cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(vcpu->tpr));
1595          qemu_mutex_unlock_iothread();
1596      }
1597  
1598      vcpu->interruption_pending =
1599          vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending;
1600  
1601      vcpu->interruptable =
1602          !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow;
1603  
1604      return;
1605  }
1606  
1607  static void whpx_vcpu_process_async_events(CPUState *cpu)
1608  {
1609      X86CPU *x86_cpu = X86_CPU(cpu);
1610      CPUX86State *env = &x86_cpu->env;
1611      AccelCPUState *vcpu = cpu->accel;
1612  
1613      if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
1614          !(env->hflags & HF_SMM_MASK)) {
1615          whpx_cpu_synchronize_state(cpu);
1616          do_cpu_init(x86_cpu);
1617          vcpu->interruptable = true;
1618      }
1619  
1620      if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
1621          cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
1622          apic_poll_irq(x86_cpu->apic_state);
1623      }
1624  
1625      if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
1626           (env->eflags & IF_MASK)) ||
1627          (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
1628          cpu->halted = false;
1629      }
1630  
1631      if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
1632          whpx_cpu_synchronize_state(cpu);
1633          do_cpu_sipi(x86_cpu);
1634      }
1635  
1636      if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
1637          cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
1638          whpx_cpu_synchronize_state(cpu);
1639          apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
1640                                        env->tpr_access_type);
1641      }
1642  
1643      return;
1644  }
1645  
1646  static int whpx_vcpu_run(CPUState *cpu)
1647  {
1648      HRESULT hr;
1649      struct whpx_state *whpx = &whpx_global;
1650      AccelCPUState *vcpu = cpu->accel;
1651      struct whpx_breakpoint *stepped_over_bp = NULL;
1652      WhpxStepMode exclusive_step_mode = WHPX_STEP_NONE;
1653      int ret;
1654  
1655      g_assert(qemu_mutex_iothread_locked());
1656  
1657      if (whpx->running_cpus++ == 0) {
1658          /* Insert breakpoints into memory, update exception exit bitmap. */
1659          ret = whpx_first_vcpu_starting(cpu);
1660          if (ret != 0) {
1661              return ret;
1662          }
1663      }
1664  
1665      if (whpx->breakpoints.breakpoints &&
1666          whpx->breakpoints.breakpoints->used > 0)
1667      {
1668          uint64_t pc = whpx_vcpu_get_pc(cpu, true);
1669          stepped_over_bp = whpx_lookup_breakpoint_by_addr(pc);
1670          if (stepped_over_bp && stepped_over_bp->state != WHPX_BP_SET) {
1671              stepped_over_bp = NULL;
1672          }
1673  
1674          if (stepped_over_bp) {
1675              /*
1676               * We are trying to run the instruction overwritten by an active
1677               * breakpoint. We will temporarily disable the breakpoint, suspend
1678               * other CPUs, and step over the instruction.
1679               */
1680              exclusive_step_mode = WHPX_STEP_EXCLUSIVE;
1681          }
1682      }
1683  
1684      if (exclusive_step_mode == WHPX_STEP_NONE) {
1685          whpx_vcpu_process_async_events(cpu);
1686          if (cpu->halted && !whpx_apic_in_platform()) {
1687              cpu->exception_index = EXCP_HLT;
1688              qatomic_set(&cpu->exit_request, false);
1689              return 0;
1690          }
1691      }
1692  
1693      qemu_mutex_unlock_iothread();
1694  
1695      if (exclusive_step_mode != WHPX_STEP_NONE) {
1696          start_exclusive();
1697          g_assert(cpu == current_cpu);
1698          g_assert(!cpu->running);
1699          cpu->running = true;
1700  
1701          hr = whpx_set_exception_exit_bitmap(
1702              1UL << WHvX64ExceptionTypeDebugTrapOrFault);
1703          if (!SUCCEEDED(hr)) {
1704              error_report("WHPX: Failed to update exception exit mask, "
1705                           "hr=%08lx.", hr);
1706              return 1;
1707          }
1708  
1709          if (stepped_over_bp) {
1710              /* Temporarily disable the triggered breakpoint. */
1711              cpu_memory_rw_debug(cpu,
1712                  stepped_over_bp->address,
1713                  &stepped_over_bp->original_instruction,
1714                  1,
1715                  true);
1716          }
1717      } else {
1718          cpu_exec_start(cpu);
1719      }
1720  
1721      do {
1722          if (cpu->vcpu_dirty) {
1723              whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
1724              cpu->vcpu_dirty = false;
1725          }
1726  
1727          if (exclusive_step_mode == WHPX_STEP_NONE) {
1728              whpx_vcpu_pre_run(cpu);
1729  
1730              if (qatomic_read(&cpu->exit_request)) {
1731                  whpx_vcpu_kick(cpu);
1732              }
1733          }
1734  
1735          if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) {
1736              whpx_vcpu_configure_single_stepping(cpu, true, NULL);
1737          }
1738  
1739          hr = whp_dispatch.WHvRunVirtualProcessor(
1740              whpx->partition, cpu->cpu_index,
1741              &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
1742  
1743          if (FAILED(hr)) {
1744              error_report("WHPX: Failed to exec a virtual processor,"
1745                           " hr=%08lx", hr);
1746              ret = -1;
1747              break;
1748          }
1749  
1750          if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) {
1751              whpx_vcpu_configure_single_stepping(cpu,
1752                  false,
1753                  &vcpu->exit_ctx.VpContext.Rflags);
1754          }
1755  
1756          whpx_vcpu_post_run(cpu);
1757  
1758          switch (vcpu->exit_ctx.ExitReason) {
1759          case WHvRunVpExitReasonMemoryAccess:
1760              ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess);
1761              break;
1762  
1763          case WHvRunVpExitReasonX64IoPortAccess:
1764              ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess);
1765              break;
1766  
1767          case WHvRunVpExitReasonX64InterruptWindow:
1768              vcpu->ready_for_pic_interrupt = 1;
1769              vcpu->window_registered = 0;
1770              ret = 0;
1771              break;
1772  
1773          case WHvRunVpExitReasonX64ApicEoi:
1774              assert(whpx_apic_in_platform());
1775              ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector);
1776              break;
1777  
1778          case WHvRunVpExitReasonX64Halt:
1779              /*
1780               * WARNING: as of build 19043.1526 (21H1), this exit reason is no
1781               * longer used.
1782               */
1783              ret = whpx_handle_halt(cpu);
1784              break;
1785  
1786          case WHvRunVpExitReasonX64ApicInitSipiTrap: {
1787              WHV_INTERRUPT_CONTROL ipi = {0};
1788              uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr;
1789              uint32_t delivery_mode =
1790                  (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT;
1791              int dest_shorthand =
1792                  (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT;
1793              bool broadcast = false;
1794              bool include_self = false;
1795              uint32_t i;
1796  
1797              /* We only registered for INIT and SIPI exits. */
1798              if ((delivery_mode != APIC_DM_INIT) &&
1799                  (delivery_mode != APIC_DM_SIPI)) {
1800                  error_report(
1801                      "WHPX: Unexpected APIC exit that is not a INIT or SIPI");
1802                  break;
1803              }
1804  
1805              if (delivery_mode == APIC_DM_INIT) {
1806                  ipi.Type = WHvX64InterruptTypeInit;
1807              } else {
1808                  ipi.Type = WHvX64InterruptTypeSipi;
1809              }
1810  
1811              ipi.DestinationMode =
1812                  ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ?
1813                      WHvX64InterruptDestinationModeLogical :
1814                      WHvX64InterruptDestinationModePhysical;
1815  
1816              ipi.TriggerMode =
1817                  ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ?
1818                      WHvX64InterruptTriggerModeLevel :
1819                      WHvX64InterruptTriggerModeEdge;
1820  
1821              ipi.Vector = icr & APIC_VECTOR_MASK;
1822              switch (dest_shorthand) {
1823              /* no shorthand. Bits 56-63 contain the destination. */
1824              case 0:
1825                  ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK;
1826                  hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
1827                          &ipi, sizeof(ipi));
1828                  if (FAILED(hr)) {
1829                      error_report("WHPX: Failed to request interrupt  hr=%08lx",
1830                          hr);
1831                  }
1832  
1833                  break;
1834  
1835              /* self */
1836              case 1:
1837                  include_self = true;
1838                  break;
1839  
1840              /* broadcast, including self */
1841              case 2:
1842                  broadcast = true;
1843                  include_self = true;
1844                  break;
1845  
1846              /* broadcast, excluding self */
1847              case 3:
1848                  broadcast = true;
1849                  break;
1850              }
1851  
1852              if (!broadcast && !include_self) {
1853                  break;
1854              }
1855  
1856              for (i = 0; i <= max_vcpu_index; i++) {
1857                  if (i == cpu->cpu_index && !include_self) {
1858                      continue;
1859                  }
1860  
1861                  /*
1862                   * Assuming that APIC Ids are identity mapped since
1863                   * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers
1864                   * are not handled yet and the hypervisor doesn't allow the
1865                   * guest to modify the APIC ID.
1866                   */
1867                  ipi.Destination = i;
1868                  hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
1869                          &ipi, sizeof(ipi));
1870                  if (FAILED(hr)) {
1871                      error_report(
1872                          "WHPX: Failed to request SIPI for %d,  hr=%08lx",
1873                          i, hr);
1874                  }
1875              }
1876  
1877              break;
1878          }
1879  
1880          case WHvRunVpExitReasonCanceled:
1881              if (exclusive_step_mode != WHPX_STEP_NONE) {
1882                  /*
1883                   * We are trying to step over a single instruction, and
1884                   * likely got a request to stop from another thread.
1885                   * Delay it until we are done stepping
1886                   * over.
1887                   */
1888                  ret = 0;
1889              } else {
1890                  cpu->exception_index = EXCP_INTERRUPT;
1891                  ret = 1;
1892              }
1893              break;
1894          case WHvRunVpExitReasonX64MsrAccess: {
1895              WHV_REGISTER_VALUE reg_values[3] = {0};
1896              WHV_REGISTER_NAME reg_names[3];
1897              UINT32 reg_count;
1898  
1899              reg_names[0] = WHvX64RegisterRip;
1900              reg_names[1] = WHvX64RegisterRax;
1901              reg_names[2] = WHvX64RegisterRdx;
1902  
1903              reg_values[0].Reg64 =
1904                  vcpu->exit_ctx.VpContext.Rip +
1905                  vcpu->exit_ctx.VpContext.InstructionLength;
1906  
1907              /*
1908               * For all unsupported MSR access we:
1909               *     ignore writes
1910               *     return 0 on read.
1911               */
1912              reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ?
1913                          1 : 3;
1914  
1915              hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1916                  whpx->partition,
1917                  cpu->cpu_index,
1918                  reg_names, reg_count,
1919                  reg_values);
1920  
1921              if (FAILED(hr)) {
1922                  error_report("WHPX: Failed to set MsrAccess state "
1923                               " registers, hr=%08lx", hr);
1924              }
1925              ret = 0;
1926              break;
1927          }
1928          case WHvRunVpExitReasonX64Cpuid: {
1929              WHV_REGISTER_VALUE reg_values[5];
1930              WHV_REGISTER_NAME reg_names[5];
1931              UINT32 reg_count = 5;
1932              UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0;
1933              X86CPU *x86_cpu = X86_CPU(cpu);
1934              CPUX86State *env = &x86_cpu->env;
1935  
1936              memset(reg_values, 0, sizeof(reg_values));
1937  
1938              rip = vcpu->exit_ctx.VpContext.Rip +
1939                    vcpu->exit_ctx.VpContext.InstructionLength;
1940              cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax;
1941  
1942              /*
1943               * Ideally, these should be supplied to the hypervisor during VCPU
1944               * initialization and it should be able to satisfy this request.
1945               * But, currently, WHPX doesn't support setting CPUID values in the
1946               * hypervisor once the partition has been setup, which is too late
1947               * since VCPUs are realized later. For now, use the values from
1948               * QEMU to satisfy these requests, until WHPX adds support for
1949               * being able to set these values in the hypervisor at runtime.
1950               */
1951              cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx,
1952                  (UINT32 *)&rcx, (UINT32 *)&rdx);
1953              switch (cpuid_fn) {
1954              case 0x40000000:
1955                  /* Expose the vmware cpu frequency cpuid leaf */
1956                  rax = 0x40000010;
1957                  rbx = rcx = rdx = 0;
1958                  break;
1959  
1960              case 0x40000010:
1961                  rax = env->tsc_khz;
1962                  rbx = env->apic_bus_freq / 1000; /* Hz to KHz */
1963                  rcx = rdx = 0;
1964                  break;
1965  
1966              case 0x80000001:
1967                  /* Remove any support of OSVW */
1968                  rcx &= ~CPUID_EXT3_OSVW;
1969                  break;
1970              }
1971  
1972              reg_names[0] = WHvX64RegisterRip;
1973              reg_names[1] = WHvX64RegisterRax;
1974              reg_names[2] = WHvX64RegisterRcx;
1975              reg_names[3] = WHvX64RegisterRdx;
1976              reg_names[4] = WHvX64RegisterRbx;
1977  
1978              reg_values[0].Reg64 = rip;
1979              reg_values[1].Reg64 = rax;
1980              reg_values[2].Reg64 = rcx;
1981              reg_values[3].Reg64 = rdx;
1982              reg_values[4].Reg64 = rbx;
1983  
1984              hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1985                  whpx->partition, cpu->cpu_index,
1986                  reg_names,
1987                  reg_count,
1988                  reg_values);
1989  
1990              if (FAILED(hr)) {
1991                  error_report("WHPX: Failed to set CpuidAccess state registers,"
1992                               " hr=%08lx", hr);
1993              }
1994              ret = 0;
1995              break;
1996          }
1997          case WHvRunVpExitReasonException:
1998              whpx_get_registers(cpu);
1999  
2000              if ((vcpu->exit_ctx.VpException.ExceptionType ==
2001                   WHvX64ExceptionTypeDebugTrapOrFault) &&
2002                  (vcpu->exit_ctx.VpException.InstructionByteCount >= 1) &&
2003                  (vcpu->exit_ctx.VpException.InstructionBytes[0] ==
2004                   whpx_breakpoint_instruction)) {
2005                  /* Stopped at a software breakpoint. */
2006                  cpu->exception_index = EXCP_DEBUG;
2007              } else if ((vcpu->exit_ctx.VpException.ExceptionType ==
2008                          WHvX64ExceptionTypeDebugTrapOrFault) &&
2009                         !cpu->singlestep_enabled) {
2010                  /*
2011                   * Just finished stepping over a breakpoint, but the
2012                   * gdb does not expect us to do single-stepping.
2013                   * Don't do anything special.
2014                   */
2015                  cpu->exception_index = EXCP_INTERRUPT;
2016              } else {
2017                  /* Another exception or debug event. Report it to GDB. */
2018                  cpu->exception_index = EXCP_DEBUG;
2019              }
2020  
2021              ret = 1;
2022              break;
2023          case WHvRunVpExitReasonNone:
2024          case WHvRunVpExitReasonUnrecoverableException:
2025          case WHvRunVpExitReasonInvalidVpRegisterValue:
2026          case WHvRunVpExitReasonUnsupportedFeature:
2027          default:
2028              error_report("WHPX: Unexpected VP exit code %d",
2029                           vcpu->exit_ctx.ExitReason);
2030              whpx_get_registers(cpu);
2031              qemu_mutex_lock_iothread();
2032              qemu_system_guest_panicked(cpu_get_crash_info(cpu));
2033              qemu_mutex_unlock_iothread();
2034              break;
2035          }
2036  
2037      } while (!ret);
2038  
2039      if (stepped_over_bp) {
2040          /* Restore the breakpoint we stepped over */
2041          cpu_memory_rw_debug(cpu,
2042              stepped_over_bp->address,
2043              (void *)&whpx_breakpoint_instruction,
2044              1,
2045              true);
2046      }
2047  
2048      if (exclusive_step_mode != WHPX_STEP_NONE) {
2049          g_assert(cpu_in_exclusive_context(cpu));
2050          cpu->running = false;
2051          end_exclusive();
2052  
2053          exclusive_step_mode = WHPX_STEP_NONE;
2054      } else {
2055          cpu_exec_end(cpu);
2056      }
2057  
2058      qemu_mutex_lock_iothread();
2059      current_cpu = cpu;
2060  
2061      if (--whpx->running_cpus == 0) {
2062          whpx_last_vcpu_stopping(cpu);
2063      }
2064  
2065      qatomic_set(&cpu->exit_request, false);
2066  
2067      return ret < 0;
2068  }
2069  
2070  static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
2071  {
2072      if (!cpu->vcpu_dirty) {
2073          whpx_get_registers(cpu);
2074          cpu->vcpu_dirty = true;
2075      }
2076  }
2077  
2078  static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
2079                                                 run_on_cpu_data arg)
2080  {
2081      whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
2082      cpu->vcpu_dirty = false;
2083  }
2084  
2085  static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
2086                                                run_on_cpu_data arg)
2087  {
2088      whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
2089      cpu->vcpu_dirty = false;
2090  }
2091  
2092  static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
2093                                                 run_on_cpu_data arg)
2094  {
2095      cpu->vcpu_dirty = true;
2096  }
2097  
2098  /*
2099   * CPU support.
2100   */
2101  
2102  void whpx_cpu_synchronize_state(CPUState *cpu)
2103  {
2104      if (!cpu->vcpu_dirty) {
2105          run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
2106      }
2107  }
2108  
2109  void whpx_cpu_synchronize_post_reset(CPUState *cpu)
2110  {
2111      run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
2112  }
2113  
2114  void whpx_cpu_synchronize_post_init(CPUState *cpu)
2115  {
2116      run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
2117  }
2118  
2119  void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
2120  {
2121      run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
2122  }
2123  
2124  void whpx_cpu_synchronize_pre_resume(bool step_pending)
2125  {
2126      whpx_global.step_pending = step_pending;
2127  }
2128  
2129  /*
2130   * Vcpu support.
2131   */
2132  
2133  static Error *whpx_migration_blocker;
2134  
2135  static void whpx_cpu_update_state(void *opaque, bool running, RunState state)
2136  {
2137      CPUX86State *env = opaque;
2138  
2139      if (running) {
2140          env->tsc_valid = false;
2141      }
2142  }
2143  
2144  int whpx_init_vcpu(CPUState *cpu)
2145  {
2146      HRESULT hr;
2147      struct whpx_state *whpx = &whpx_global;
2148      AccelCPUState *vcpu = NULL;
2149      Error *local_error = NULL;
2150      X86CPU *x86_cpu = X86_CPU(cpu);
2151      CPUX86State *env = &x86_cpu->env;
2152      UINT64 freq = 0;
2153      int ret;
2154  
2155      /* Add migration blockers for all unsupported features of the
2156       * Windows Hypervisor Platform
2157       */
2158      if (whpx_migration_blocker == NULL) {
2159          error_setg(&whpx_migration_blocker,
2160                 "State blocked due to non-migratable CPUID feature support,"
2161                 "dirty memory tracking support, and XSAVE/XRSTOR support");
2162  
2163          if (migrate_add_blocker(&whpx_migration_blocker, &local_error) < 0) {
2164              error_report_err(local_error);
2165              ret = -EINVAL;
2166              goto error;
2167          }
2168      }
2169  
2170      vcpu = g_new0(AccelCPUState, 1);
2171  
2172      hr = whp_dispatch.WHvEmulatorCreateEmulator(
2173          &whpx_emu_callbacks,
2174          &vcpu->emulator);
2175      if (FAILED(hr)) {
2176          error_report("WHPX: Failed to setup instruction completion support,"
2177                       " hr=%08lx", hr);
2178          ret = -EINVAL;
2179          goto error;
2180      }
2181  
2182      hr = whp_dispatch.WHvCreateVirtualProcessor(
2183          whpx->partition, cpu->cpu_index, 0);
2184      if (FAILED(hr)) {
2185          error_report("WHPX: Failed to create a virtual processor,"
2186                       " hr=%08lx", hr);
2187          whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
2188          ret = -EINVAL;
2189          goto error;
2190      }
2191  
2192      /*
2193       * vcpu's TSC frequency is either specified by user, or use the value
2194       * provided by Hyper-V if the former is not present. In the latter case, we
2195       * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
2196       * frequency can be migrated later via this field.
2197       */
2198      if (!env->tsc_khz) {
2199          hr = whp_dispatch.WHvGetCapability(
2200              WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq),
2201                  NULL);
2202          if (hr != WHV_E_UNKNOWN_CAPABILITY) {
2203              if (FAILED(hr)) {
2204                  printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr);
2205              } else {
2206                  env->tsc_khz = freq / 1000; /* Hz to KHz */
2207              }
2208          }
2209      }
2210  
2211      env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY;
2212      hr = whp_dispatch.WHvGetCapability(
2213          WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL);
2214      if (hr != WHV_E_UNKNOWN_CAPABILITY) {
2215          if (FAILED(hr)) {
2216              printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr);
2217          } else {
2218              env->apic_bus_freq = freq;
2219          }
2220      }
2221  
2222      /*
2223       * If the vmware cpuid frequency leaf option is set, and we have a valid
2224       * tsc value, trap the corresponding cpuid's.
2225       */
2226      if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) {
2227          UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010};
2228  
2229          hr = whp_dispatch.WHvSetPartitionProperty(
2230                  whpx->partition,
2231                  WHvPartitionPropertyCodeCpuidExitList,
2232                  cpuidExitList,
2233                  RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
2234  
2235          if (FAILED(hr)) {
2236              error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
2237                          hr);
2238              ret = -EINVAL;
2239              goto error;
2240          }
2241      }
2242  
2243      vcpu->interruptable = true;
2244      cpu->vcpu_dirty = true;
2245      cpu->accel = vcpu;
2246      max_vcpu_index = max(max_vcpu_index, cpu->cpu_index);
2247      qemu_add_vm_change_state_handler(whpx_cpu_update_state, env);
2248  
2249      return 0;
2250  
2251  error:
2252      g_free(vcpu);
2253  
2254      return ret;
2255  }
2256  
2257  int whpx_vcpu_exec(CPUState *cpu)
2258  {
2259      int ret;
2260      int fatal;
2261  
2262      for (;;) {
2263          if (cpu->exception_index >= EXCP_INTERRUPT) {
2264              ret = cpu->exception_index;
2265              cpu->exception_index = -1;
2266              break;
2267          }
2268  
2269          fatal = whpx_vcpu_run(cpu);
2270  
2271          if (fatal) {
2272              error_report("WHPX: Failed to exec a virtual processor");
2273              abort();
2274          }
2275      }
2276  
2277      return ret;
2278  }
2279  
2280  void whpx_destroy_vcpu(CPUState *cpu)
2281  {
2282      struct whpx_state *whpx = &whpx_global;
2283      AccelCPUState *vcpu = cpu->accel;
2284  
2285      whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
2286      whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
2287      g_free(cpu->accel);
2288      return;
2289  }
2290  
2291  void whpx_vcpu_kick(CPUState *cpu)
2292  {
2293      struct whpx_state *whpx = &whpx_global;
2294      whp_dispatch.WHvCancelRunVirtualProcessor(
2295          whpx->partition, cpu->cpu_index, 0);
2296  }
2297  
2298  /*
2299   * Memory support.
2300   */
2301  
2302  static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
2303                                  void *host_va, int add, int rom,
2304                                  const char *name)
2305  {
2306      struct whpx_state *whpx = &whpx_global;
2307      HRESULT hr;
2308  
2309      /*
2310      if (add) {
2311          printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
2312                 (void*)start_pa, (void*)size, host_va,
2313                 (rom ? "ROM" : "RAM"), name);
2314      } else {
2315          printf("WHPX: DEL PA:%p Size:%p, Host:%p,      '%s'\n",
2316                 (void*)start_pa, (void*)size, host_va, name);
2317      }
2318      */
2319  
2320      if (add) {
2321          hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
2322                                           host_va,
2323                                           start_pa,
2324                                           size,
2325                                           (WHvMapGpaRangeFlagRead |
2326                                            WHvMapGpaRangeFlagExecute |
2327                                            (rom ? 0 : WHvMapGpaRangeFlagWrite)));
2328      } else {
2329          hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
2330                                             start_pa,
2331                                             size);
2332      }
2333  
2334      if (FAILED(hr)) {
2335          error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
2336                       " Host:%p, hr=%08lx",
2337                       (add ? "MAP" : "UNMAP"), name,
2338                       (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
2339      }
2340  }
2341  
2342  static void whpx_process_section(MemoryRegionSection *section, int add)
2343  {
2344      MemoryRegion *mr = section->mr;
2345      hwaddr start_pa = section->offset_within_address_space;
2346      ram_addr_t size = int128_get64(section->size);
2347      unsigned int delta;
2348      uint64_t host_va;
2349  
2350      if (!memory_region_is_ram(mr)) {
2351          return;
2352      }
2353  
2354      delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
2355      delta &= ~qemu_real_host_page_mask();
2356      if (delta > size) {
2357          return;
2358      }
2359      start_pa += delta;
2360      size -= delta;
2361      size &= qemu_real_host_page_mask();
2362      if (!size || (start_pa & ~qemu_real_host_page_mask())) {
2363          return;
2364      }
2365  
2366      host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
2367              + section->offset_within_region + delta;
2368  
2369      whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
2370                          memory_region_is_rom(mr), mr->name);
2371  }
2372  
2373  static void whpx_region_add(MemoryListener *listener,
2374                             MemoryRegionSection *section)
2375  {
2376      memory_region_ref(section->mr);
2377      whpx_process_section(section, 1);
2378  }
2379  
2380  static void whpx_region_del(MemoryListener *listener,
2381                             MemoryRegionSection *section)
2382  {
2383      whpx_process_section(section, 0);
2384      memory_region_unref(section->mr);
2385  }
2386  
2387  static void whpx_transaction_begin(MemoryListener *listener)
2388  {
2389  }
2390  
2391  static void whpx_transaction_commit(MemoryListener *listener)
2392  {
2393  }
2394  
2395  static void whpx_log_sync(MemoryListener *listener,
2396                           MemoryRegionSection *section)
2397  {
2398      MemoryRegion *mr = section->mr;
2399  
2400      if (!memory_region_is_ram(mr)) {
2401          return;
2402      }
2403  
2404      memory_region_set_dirty(mr, 0, int128_get64(section->size));
2405  }
2406  
2407  static MemoryListener whpx_memory_listener = {
2408      .name = "whpx",
2409      .begin = whpx_transaction_begin,
2410      .commit = whpx_transaction_commit,
2411      .region_add = whpx_region_add,
2412      .region_del = whpx_region_del,
2413      .log_sync = whpx_log_sync,
2414      .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
2415  };
2416  
2417  static void whpx_memory_init(void)
2418  {
2419      memory_listener_register(&whpx_memory_listener, &address_space_memory);
2420  }
2421  
2422  /*
2423   * Load the functions from the given library, using the given handle. If a
2424   * handle is provided, it is used, otherwise the library is opened. The
2425   * handle will be updated on return with the opened one.
2426   */
2427  static bool load_whp_dispatch_fns(HMODULE *handle,
2428      WHPFunctionList function_list)
2429  {
2430      HMODULE hLib = *handle;
2431  
2432      #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
2433      #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
2434      #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
2435          whp_dispatch.function_name = \
2436              (function_name ## _t)GetProcAddress(hLib, #function_name); \
2437  
2438      #define WHP_LOAD_FIELD(return_type, function_name, signature) \
2439          whp_dispatch.function_name = \
2440              (function_name ## _t)GetProcAddress(hLib, #function_name); \
2441          if (!whp_dispatch.function_name) { \
2442              error_report("Could not load function %s", #function_name); \
2443              goto error; \
2444          } \
2445  
2446      #define WHP_LOAD_LIB(lib_name, handle_lib) \
2447      if (!handle_lib) { \
2448          handle_lib = LoadLibrary(lib_name); \
2449          if (!handle_lib) { \
2450              error_report("Could not load library %s.", lib_name); \
2451              goto error; \
2452          } \
2453      } \
2454  
2455      switch (function_list) {
2456      case WINHV_PLATFORM_FNS_DEFAULT:
2457          WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
2458          LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
2459          break;
2460  
2461      case WINHV_EMULATION_FNS_DEFAULT:
2462          WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
2463          LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
2464          break;
2465  
2466      case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
2467          WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
2468          LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
2469          break;
2470      }
2471  
2472      *handle = hLib;
2473      return true;
2474  
2475  error:
2476      if (hLib) {
2477          FreeLibrary(hLib);
2478      }
2479  
2480      return false;
2481  }
2482  
2483  static void whpx_set_kernel_irqchip(Object *obj, Visitor *v,
2484                                     const char *name, void *opaque,
2485                                     Error **errp)
2486  {
2487      struct whpx_state *whpx = &whpx_global;
2488      OnOffSplit mode;
2489  
2490      if (!visit_type_OnOffSplit(v, name, &mode, errp)) {
2491          return;
2492      }
2493  
2494      switch (mode) {
2495      case ON_OFF_SPLIT_ON:
2496          whpx->kernel_irqchip_allowed = true;
2497          whpx->kernel_irqchip_required = true;
2498          break;
2499  
2500      case ON_OFF_SPLIT_OFF:
2501          whpx->kernel_irqchip_allowed = false;
2502          whpx->kernel_irqchip_required = false;
2503          break;
2504  
2505      case ON_OFF_SPLIT_SPLIT:
2506          error_setg(errp, "WHPX: split irqchip currently not supported");
2507          error_append_hint(errp,
2508              "Try without kernel-irqchip or with kernel-irqchip=on|off");
2509          break;
2510  
2511      default:
2512          /*
2513           * The value was checked in visit_type_OnOffSplit() above. If
2514           * we get here, then something is wrong in QEMU.
2515           */
2516          abort();
2517      }
2518  }
2519  
2520  /*
2521   * Partition support
2522   */
2523  
2524  static int whpx_accel_init(MachineState *ms)
2525  {
2526      struct whpx_state *whpx;
2527      int ret;
2528      HRESULT hr;
2529      WHV_CAPABILITY whpx_cap;
2530      UINT32 whpx_cap_size;
2531      WHV_PARTITION_PROPERTY prop;
2532      UINT32 cpuidExitList[] = {1, 0x80000001};
2533      WHV_CAPABILITY_FEATURES features = {0};
2534  
2535      whpx = &whpx_global;
2536  
2537      if (!init_whp_dispatch()) {
2538          ret = -ENOSYS;
2539          goto error;
2540      }
2541  
2542      whpx->mem_quota = ms->ram_size;
2543  
2544      hr = whp_dispatch.WHvGetCapability(
2545          WHvCapabilityCodeHypervisorPresent, &whpx_cap,
2546          sizeof(whpx_cap), &whpx_cap_size);
2547      if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
2548          error_report("WHPX: No accelerator found, hr=%08lx", hr);
2549          ret = -ENOSPC;
2550          goto error;
2551      }
2552  
2553      hr = whp_dispatch.WHvGetCapability(
2554          WHvCapabilityCodeFeatures, &features, sizeof(features), NULL);
2555      if (FAILED(hr)) {
2556          error_report("WHPX: Failed to query capabilities, hr=%08lx", hr);
2557          ret = -EINVAL;
2558          goto error;
2559      }
2560  
2561      hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
2562      if (FAILED(hr)) {
2563          error_report("WHPX: Failed to create partition, hr=%08lx", hr);
2564          ret = -EINVAL;
2565          goto error;
2566      }
2567  
2568      /*
2569       * Query the XSAVE capability of the partition. Any error here is not
2570       * considered fatal.
2571       */
2572      hr = whp_dispatch.WHvGetPartitionProperty(
2573          whpx->partition,
2574          WHvPartitionPropertyCodeProcessorXsaveFeatures,
2575          &whpx_xsave_cap,
2576          sizeof(whpx_xsave_cap),
2577          &whpx_cap_size);
2578  
2579      /*
2580       * Windows version which don't support this property will return with the
2581       * specific error code.
2582       */
2583      if (FAILED(hr) && hr != WHV_E_UNKNOWN_PROPERTY) {
2584          error_report("WHPX: Failed to query XSAVE capability, hr=%08lx", hr);
2585      }
2586  
2587      if (!whpx_has_xsave()) {
2588          printf("WHPX: Partition is not XSAVE capable\n");
2589      }
2590  
2591      memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
2592      prop.ProcessorCount = ms->smp.cpus;
2593      hr = whp_dispatch.WHvSetPartitionProperty(
2594          whpx->partition,
2595          WHvPartitionPropertyCodeProcessorCount,
2596          &prop,
2597          sizeof(WHV_PARTITION_PROPERTY));
2598  
2599      if (FAILED(hr)) {
2600          error_report("WHPX: Failed to set partition processor count to %u,"
2601                       " hr=%08lx", prop.ProcessorCount, hr);
2602          ret = -EINVAL;
2603          goto error;
2604      }
2605  
2606      /*
2607       * Error out if WHP doesn't support apic emulation and user is requiring
2608       * it.
2609       */
2610      if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation ||
2611              !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) {
2612          error_report("WHPX: kernel irqchip requested, but unavailable. "
2613              "Try without kernel-irqchip or with kernel-irqchip=off");
2614          ret = -EINVAL;
2615          goto error;
2616      }
2617  
2618      if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation &&
2619          whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) {
2620          WHV_X64_LOCAL_APIC_EMULATION_MODE mode =
2621              WHvX64LocalApicEmulationModeXApic;
2622          printf("WHPX: setting APIC emulation mode in the hypervisor\n");
2623          hr = whp_dispatch.WHvSetPartitionProperty(
2624              whpx->partition,
2625              WHvPartitionPropertyCodeLocalApicEmulationMode,
2626              &mode,
2627              sizeof(mode));
2628          if (FAILED(hr)) {
2629              error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr);
2630              if (whpx->kernel_irqchip_required) {
2631                  error_report("WHPX: kernel irqchip requested, but unavailable");
2632                  ret = -EINVAL;
2633                  goto error;
2634              }
2635          } else {
2636              whpx->apic_in_platform = true;
2637          }
2638      }
2639  
2640      /* Register for MSR and CPUID exits */
2641      memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
2642      prop.ExtendedVmExits.X64MsrExit = 1;
2643      prop.ExtendedVmExits.X64CpuidExit = 1;
2644      prop.ExtendedVmExits.ExceptionExit = 1;
2645      if (whpx_apic_in_platform()) {
2646          prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1;
2647      }
2648  
2649      hr = whp_dispatch.WHvSetPartitionProperty(
2650              whpx->partition,
2651              WHvPartitionPropertyCodeExtendedVmExits,
2652              &prop,
2653              sizeof(WHV_PARTITION_PROPERTY));
2654      if (FAILED(hr)) {
2655          error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr);
2656          ret = -EINVAL;
2657          goto error;
2658      }
2659  
2660      hr = whp_dispatch.WHvSetPartitionProperty(
2661          whpx->partition,
2662          WHvPartitionPropertyCodeCpuidExitList,
2663          cpuidExitList,
2664          RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
2665  
2666      if (FAILED(hr)) {
2667          error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
2668                       hr);
2669          ret = -EINVAL;
2670          goto error;
2671      }
2672  
2673      /*
2674       * We do not want to intercept any exceptions from the guest,
2675       * until we actually start debugging with gdb.
2676       */
2677      whpx->exception_exit_bitmap = -1;
2678      hr = whpx_set_exception_exit_bitmap(0);
2679  
2680      if (FAILED(hr)) {
2681          error_report("WHPX: Failed to set exception exit bitmap, hr=%08lx", hr);
2682          ret = -EINVAL;
2683          goto error;
2684      }
2685  
2686      hr = whp_dispatch.WHvSetupPartition(whpx->partition);
2687      if (FAILED(hr)) {
2688          error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
2689          ret = -EINVAL;
2690          goto error;
2691      }
2692  
2693      whpx_memory_init();
2694  
2695      printf("Windows Hypervisor Platform accelerator is operational\n");
2696      return 0;
2697  
2698  error:
2699  
2700      if (NULL != whpx->partition) {
2701          whp_dispatch.WHvDeletePartition(whpx->partition);
2702          whpx->partition = NULL;
2703      }
2704  
2705      return ret;
2706  }
2707  
2708  int whpx_enabled(void)
2709  {
2710      return whpx_allowed;
2711  }
2712  
2713  bool whpx_apic_in_platform(void) {
2714      return whpx_global.apic_in_platform;
2715  }
2716  
2717  static void whpx_accel_class_init(ObjectClass *oc, void *data)
2718  {
2719      AccelClass *ac = ACCEL_CLASS(oc);
2720      ac->name = "WHPX";
2721      ac->init_machine = whpx_accel_init;
2722      ac->allowed = &whpx_allowed;
2723  
2724      object_class_property_add(oc, "kernel-irqchip", "on|off|split",
2725          NULL, whpx_set_kernel_irqchip,
2726          NULL, NULL);
2727      object_class_property_set_description(oc, "kernel-irqchip",
2728          "Configure WHPX in-kernel irqchip");
2729  }
2730  
2731  static void whpx_accel_instance_init(Object *obj)
2732  {
2733      struct whpx_state *whpx = &whpx_global;
2734  
2735      memset(whpx, 0, sizeof(struct whpx_state));
2736      /* Turn on kernel-irqchip, by default */
2737      whpx->kernel_irqchip_allowed = true;
2738  }
2739  
2740  static const TypeInfo whpx_accel_type = {
2741      .name = ACCEL_CLASS_NAME("whpx"),
2742      .parent = TYPE_ACCEL,
2743      .instance_init = whpx_accel_instance_init,
2744      .class_init = whpx_accel_class_init,
2745  };
2746  
2747  static void whpx_type_init(void)
2748  {
2749      type_register_static(&whpx_accel_type);
2750  }
2751  
2752  bool init_whp_dispatch(void)
2753  {
2754      if (whp_dispatch_initialized) {
2755          return true;
2756      }
2757  
2758      if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
2759          goto error;
2760      }
2761  
2762      if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
2763          goto error;
2764      }
2765  
2766      assert(load_whp_dispatch_fns(&hWinHvPlatform,
2767          WINHV_PLATFORM_FNS_SUPPLEMENTAL));
2768      whp_dispatch_initialized = true;
2769  
2770      return true;
2771  error:
2772      if (hWinHvPlatform) {
2773          FreeLibrary(hWinHvPlatform);
2774      }
2775  
2776      if (hWinHvEmulation) {
2777          FreeLibrary(hWinHvEmulation);
2778      }
2779  
2780      return false;
2781  }
2782  
2783  type_init(whpx_type_init);
2784