xref: /qemu/hw/ppc/spapr_nested.c (revision 01499add2ae6529589002860e1880ff193a6578a)
1 #include "qemu/osdep.h"
2 #include "qemu/cutils.h"
3 #include "exec/exec-all.h"
4 #include "exec/cputlb.h"
5 #include "exec/target_long.h"
6 #include "helper_regs.h"
7 #include "hw/ppc/ppc.h"
8 #include "hw/ppc/spapr.h"
9 #include "hw/ppc/spapr_cpu_core.h"
10 #include "hw/ppc/spapr_nested.h"
11 #include "mmu-book3s-v3.h"
12 #include "cpu-models.h"
13 #include "qemu/log.h"
14 
15 void spapr_nested_reset(SpaprMachineState *spapr)
16 {
17     if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
18         spapr_unregister_nested_hv();
19         spapr_register_nested_hv();
20     } else if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_PAPR)) {
21         spapr->nested.capabilities_set = false;
22         spapr_unregister_nested_papr();
23         spapr_register_nested_papr();
24         spapr_nested_gsb_init();
25     } else {
26         spapr->nested.api = 0;
27     }
28 }
29 
30 uint8_t spapr_nested_api(SpaprMachineState *spapr)
31 {
32     return spapr->nested.api;
33 }
34 
35 #ifdef CONFIG_TCG
36 
37 bool spapr_get_pate_nested_hv(SpaprMachineState *spapr, PowerPCCPU *cpu,
38                               target_ulong lpid, ppc_v3_pate_t *entry)
39 {
40     uint64_t patb, pats;
41 
42     assert(lpid != 0);
43 
44     patb = spapr->nested.ptcr & PTCR_PATB;
45     pats = spapr->nested.ptcr & PTCR_PATS;
46 
47     /* Check if partition table is properly aligned */
48     if (patb & MAKE_64BIT_MASK(0, pats + 12)) {
49         return false;
50     }
51 
52     /* Calculate number of entries */
53     pats = 1ull << (pats + 12 - 4);
54     if (pats <= lpid) {
55         return false;
56     }
57 
58     /* Grab entry */
59     patb += 16 * lpid;
60     entry->dw0 = ldq_phys(CPU(cpu)->as, patb);
61     entry->dw1 = ldq_phys(CPU(cpu)->as, patb + 8);
62     return true;
63 }
64 
65 static
66 SpaprMachineStateNestedGuest *spapr_get_nested_guest(SpaprMachineState *spapr,
67                                                      target_ulong guestid)
68 {
69     return spapr->nested.guests ?
70         g_hash_table_lookup(spapr->nested.guests,
71                             GINT_TO_POINTER(guestid)) : NULL;
72 }
73 
74 bool spapr_get_pate_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu,
75                                 target_ulong lpid, ppc_v3_pate_t *entry)
76 {
77     SpaprMachineStateNestedGuest *guest;
78     assert(lpid != 0);
79     guest = spapr_get_nested_guest(spapr, lpid);
80     if (!guest) {
81         return false;
82     }
83 
84     entry->dw0 = guest->parttbl[0];
85     entry->dw1 = guest->parttbl[1];
86     return true;
87 }
88 
89 #define PRTS_MASK      0x1f
90 
91 static target_ulong h_set_ptbl(PowerPCCPU *cpu,
92                                SpaprMachineState *spapr,
93                                target_ulong opcode,
94                                target_ulong *args)
95 {
96     target_ulong ptcr = args[0];
97 
98     if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
99         return H_FUNCTION;
100     }
101 
102     if ((ptcr & PRTS_MASK) + 12 - 4 > 12) {
103         return H_PARAMETER;
104     }
105 
106     spapr->nested.ptcr = ptcr; /* Save new partition table */
107 
108     return H_SUCCESS;
109 }
110 
111 static target_ulong h_tlb_invalidate(PowerPCCPU *cpu,
112                                      SpaprMachineState *spapr,
113                                      target_ulong opcode,
114                                      target_ulong *args)
115 {
116     /*
117      * The spapr virtual hypervisor nested HV implementation retains no L2
118      * translation state except for TLB. And the TLB is always invalidated
119      * across L1<->L2 transitions, so nothing is required here.
120      */
121 
122     return H_SUCCESS;
123 }
124 
125 static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu,
126                                         SpaprMachineState *spapr,
127                                         target_ulong opcode,
128                                         target_ulong *args)
129 {
130     /*
131      * This HCALL is not required, L1 KVM will take a slow path and walk the
132      * page tables manually to do the data copy.
133      */
134     return H_FUNCTION;
135 }
136 
137 static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu)
138 {
139     CPUPPCState *env = &cpu->env;
140     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
141 
142     memcpy(save->gpr, env->gpr, sizeof(save->gpr));
143 
144     save->lr = env->lr;
145     save->ctr = env->ctr;
146     save->cfar = env->cfar;
147     save->msr = env->msr;
148     save->nip = env->nip;
149 
150     save->cr = ppc_get_cr(env);
151     save->xer = cpu_read_xer(env);
152 
153     save->lpcr = env->spr[SPR_LPCR];
154     save->lpidr = env->spr[SPR_LPIDR];
155     save->pcr = env->spr[SPR_PCR];
156     save->dpdes = env->spr[SPR_DPDES];
157     save->hfscr = env->spr[SPR_HFSCR];
158     save->srr0 = env->spr[SPR_SRR0];
159     save->srr1 = env->spr[SPR_SRR1];
160     save->sprg0 = env->spr[SPR_SPRG0];
161     save->sprg1 = env->spr[SPR_SPRG1];
162     save->sprg2 = env->spr[SPR_SPRG2];
163     save->sprg3 = env->spr[SPR_SPRG3];
164     save->pidr = env->spr[SPR_BOOKS_PID];
165     save->ppr = env->spr[SPR_PPR];
166 
167     if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
168         save->amor = env->spr[SPR_AMOR];
169         save->dawr0 = env->spr[SPR_DAWR0];
170         save->dawrx0 = env->spr[SPR_DAWRX0];
171         save->ciabr = env->spr[SPR_CIABR];
172         save->purr = env->spr[SPR_PURR];
173         save->spurr = env->spr[SPR_SPURR];
174         save->ic = env->spr[SPR_IC];
175         save->vtb = env->spr[SPR_VTB];
176         save->hdar = env->spr[SPR_HDAR];
177         save->hdsisr = env->spr[SPR_HDSISR];
178         save->heir = env->spr[SPR_HEIR];
179         save->asdr = env->spr[SPR_ASDR];
180         save->dawr1 = env->spr[SPR_DAWR1];
181         save->dawrx1 = env->spr[SPR_DAWRX1];
182         save->dexcr = env->spr[SPR_DEXCR];
183         save->hdexcr = env->spr[SPR_HDEXCR];
184         save->hashkeyr = env->spr[SPR_HASHKEYR];
185         save->hashpkeyr = env->spr[SPR_HASHPKEYR];
186         memcpy(save->vsr, env->vsr, sizeof(save->vsr));
187         save->ebbhr = env->spr[SPR_EBBHR];
188         save->tar = env->spr[SPR_TAR];
189         save->ebbrr = env->spr[SPR_EBBRR];
190         save->bescr = env->spr[SPR_BESCR];
191         save->iamr = env->spr[SPR_IAMR];
192         save->amr = env->spr[SPR_AMR];
193         save->uamor = env->spr[SPR_UAMOR];
194         save->dscr = env->spr[SPR_DSCR];
195         save->fscr = env->spr[SPR_FSCR];
196         save->pspb = env->spr[SPR_PSPB];
197         save->ctrl = env->spr[SPR_CTRL];
198         save->vrsave = env->spr[SPR_VRSAVE];
199         save->dar = env->spr[SPR_DAR];
200         save->dsisr = env->spr[SPR_DSISR];
201         save->pmc1 = env->spr[SPR_POWER_PMC1];
202         save->pmc2 = env->spr[SPR_POWER_PMC2];
203         save->pmc3 = env->spr[SPR_POWER_PMC3];
204         save->pmc4 = env->spr[SPR_POWER_PMC4];
205         save->pmc5 = env->spr[SPR_POWER_PMC5];
206         save->pmc6 = env->spr[SPR_POWER_PMC6];
207         save->mmcr0 = env->spr[SPR_POWER_MMCR0];
208         save->mmcr1 = env->spr[SPR_POWER_MMCR1];
209         save->mmcr2 = env->spr[SPR_POWER_MMCR2];
210         save->mmcra = env->spr[SPR_POWER_MMCRA];
211         save->sdar = env->spr[SPR_POWER_SDAR];
212         save->siar = env->spr[SPR_POWER_SIAR];
213         save->sier = env->spr[SPR_POWER_SIER];
214         save->vscr = ppc_get_vscr(env);
215         save->fpscr = env->fpscr;
216     } else if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) {
217         save->tb_offset = env->tb_env->tb_offset;
218     }
219 }
220 
221 static void nested_post_load_state(CPUPPCState *env, CPUState *cs)
222 {
223     /*
224      * compute hflags and possible interrupts.
225      */
226     hreg_compute_hflags(env);
227     ppc_maybe_interrupt(env);
228     /*
229      * Nested HV does not tag TLB entries between L1 and L2, so must
230      * flush on transition.
231      */
232     tlb_flush(cs);
233     env->reserve_addr = -1; /* Reset the reservation */
234 }
235 
236 static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load)
237 {
238     CPUPPCState *env = &cpu->env;
239     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
240 
241     memcpy(env->gpr, load->gpr, sizeof(env->gpr));
242 
243     env->lr = load->lr;
244     env->ctr = load->ctr;
245     env->cfar = load->cfar;
246     env->msr = load->msr;
247     env->nip = load->nip;
248 
249     ppc_set_cr(env, load->cr);
250     cpu_write_xer(env, load->xer);
251 
252     env->spr[SPR_LPCR] = load->lpcr;
253     env->spr[SPR_LPIDR] = load->lpidr;
254     env->spr[SPR_PCR] = load->pcr;
255     env->spr[SPR_DPDES] = load->dpdes;
256     env->spr[SPR_HFSCR] = load->hfscr;
257     env->spr[SPR_SRR0] = load->srr0;
258     env->spr[SPR_SRR1] = load->srr1;
259     env->spr[SPR_SPRG0] = load->sprg0;
260     env->spr[SPR_SPRG1] = load->sprg1;
261     env->spr[SPR_SPRG2] = load->sprg2;
262     env->spr[SPR_SPRG3] = load->sprg3;
263     env->spr[SPR_BOOKS_PID] = load->pidr;
264     env->spr[SPR_PPR] = load->ppr;
265 
266     if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
267         env->spr[SPR_AMOR] = load->amor;
268         env->spr[SPR_DAWR0] = load->dawr0;
269         env->spr[SPR_DAWRX0] = load->dawrx0;
270         env->spr[SPR_CIABR] = load->ciabr;
271         env->spr[SPR_PURR] = load->purr;
272         env->spr[SPR_SPURR] = load->purr;
273         env->spr[SPR_IC] = load->ic;
274         env->spr[SPR_VTB] = load->vtb;
275         env->spr[SPR_HDAR] = load->hdar;
276         env->spr[SPR_HDSISR] = load->hdsisr;
277         env->spr[SPR_HEIR] = load->heir;
278         env->spr[SPR_ASDR] = load->asdr;
279         env->spr[SPR_DAWR1] = load->dawr1;
280         env->spr[SPR_DAWRX1] = load->dawrx1;
281         env->spr[SPR_DEXCR] = load->dexcr;
282         env->spr[SPR_HDEXCR] = load->hdexcr;
283         env->spr[SPR_HASHKEYR] = load->hashkeyr;
284         env->spr[SPR_HASHPKEYR] = load->hashpkeyr;
285         memcpy(env->vsr, load->vsr, sizeof(env->vsr));
286         env->spr[SPR_EBBHR] = load->ebbhr;
287         env->spr[SPR_TAR] = load->tar;
288         env->spr[SPR_EBBRR] = load->ebbrr;
289         env->spr[SPR_BESCR] = load->bescr;
290         env->spr[SPR_IAMR] = load->iamr;
291         env->spr[SPR_AMR] = load->amr;
292         env->spr[SPR_UAMOR] = load->uamor;
293         env->spr[SPR_DSCR] = load->dscr;
294         env->spr[SPR_FSCR] = load->fscr;
295         env->spr[SPR_PSPB] = load->pspb;
296         env->spr[SPR_CTRL] = load->ctrl;
297         env->spr[SPR_VRSAVE] = load->vrsave;
298         env->spr[SPR_DAR] = load->dar;
299         env->spr[SPR_DSISR] = load->dsisr;
300         env->spr[SPR_POWER_PMC1] = load->pmc1;
301         env->spr[SPR_POWER_PMC2] = load->pmc2;
302         env->spr[SPR_POWER_PMC3] = load->pmc3;
303         env->spr[SPR_POWER_PMC4] = load->pmc4;
304         env->spr[SPR_POWER_PMC5] = load->pmc5;
305         env->spr[SPR_POWER_PMC6] = load->pmc6;
306         env->spr[SPR_POWER_MMCR0] = load->mmcr0;
307         env->spr[SPR_POWER_MMCR1] = load->mmcr1;
308         env->spr[SPR_POWER_MMCR2] = load->mmcr2;
309         env->spr[SPR_POWER_MMCRA] = load->mmcra;
310         env->spr[SPR_POWER_SDAR] = load->sdar;
311         env->spr[SPR_POWER_SIAR] = load->siar;
312         env->spr[SPR_POWER_SIER] = load->sier;
313         ppc_store_vscr(env, load->vscr);
314         ppc_store_fpscr(env, load->fpscr);
315     } else if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) {
316         env->tb_env->tb_offset = load->tb_offset;
317     }
318 }
319 
320 /*
321  * When this handler returns, the environment is switched to the L2 guest
322  * and TCG begins running that. spapr_exit_nested() performs the switch from
323  * L2 back to L1 and returns from the H_ENTER_NESTED hcall.
324  */
325 static target_ulong h_enter_nested(PowerPCCPU *cpu,
326                                    SpaprMachineState *spapr,
327                                    target_ulong opcode,
328                                    target_ulong *args)
329 {
330     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
331     CPUPPCState *env = &cpu->env;
332     CPUState *cs = CPU(cpu);
333     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
334     struct nested_ppc_state l2_state;
335     target_ulong hv_ptr = args[0];
336     target_ulong regs_ptr = args[1];
337     target_ulong hdec, now = cpu_ppc_load_tbl(env);
338     target_ulong lpcr, lpcr_mask;
339     struct kvmppc_hv_guest_state *hvstate;
340     struct kvmppc_hv_guest_state hv_state;
341     struct kvmppc_pt_regs *regs;
342     hwaddr len;
343 
344     if (spapr->nested.ptcr == 0) {
345         return H_NOT_AVAILABLE;
346     }
347 
348     len = sizeof(*hvstate);
349     hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false,
350                                 MEMTXATTRS_UNSPECIFIED);
351     if (len != sizeof(*hvstate)) {
352         address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false);
353         return H_PARAMETER;
354     }
355 
356     memcpy(&hv_state, hvstate, len);
357 
358     address_space_unmap(CPU(cpu)->as, hvstate, len, len, false);
359 
360     /*
361      * We accept versions 1 and 2. Version 2 fields are unused because TCG
362      * does not implement DAWR*.
363      */
364     if (hv_state.version > HV_GUEST_STATE_VERSION) {
365         return H_PARAMETER;
366     }
367 
368     if (hv_state.lpid == 0) {
369         return H_PARAMETER;
370     }
371 
372     spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1);
373     if (!spapr_cpu->nested_host_state) {
374         return H_NO_MEM;
375     }
376 
377     assert(env->spr[SPR_LPIDR] == 0);
378     assert(env->spr[SPR_DPDES] == 0);
379     nested_save_state(spapr_cpu->nested_host_state, cpu);
380 
381     len = sizeof(*regs);
382     regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false,
383                                 MEMTXATTRS_UNSPECIFIED);
384     if (!regs || len != sizeof(*regs)) {
385         address_space_unmap(CPU(cpu)->as, regs, len, 0, false);
386         g_free(spapr_cpu->nested_host_state);
387         return H_P2;
388     }
389 
390     len = sizeof(l2_state.gpr);
391     assert(len == sizeof(regs->gpr));
392     memcpy(l2_state.gpr, regs->gpr, len);
393 
394     l2_state.lr = regs->link;
395     l2_state.ctr = regs->ctr;
396     l2_state.xer = regs->xer;
397     l2_state.cr = regs->ccr;
398     l2_state.msr = regs->msr;
399     l2_state.nip = regs->nip;
400 
401     address_space_unmap(CPU(cpu)->as, regs, len, len, false);
402 
403     l2_state.cfar = hv_state.cfar;
404     l2_state.lpidr = hv_state.lpid;
405 
406     lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER;
407     lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask);
408     lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE;
409     lpcr &= ~LPCR_LPES0;
410     l2_state.lpcr = lpcr & pcc->lpcr_mask;
411 
412     l2_state.pcr = hv_state.pcr;
413     /* hv_state.amor is not used */
414     l2_state.dpdes = hv_state.dpdes;
415     l2_state.hfscr = hv_state.hfscr;
416     /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/
417     l2_state.srr0 = hv_state.srr0;
418     l2_state.srr1 = hv_state.srr1;
419     l2_state.sprg0 = hv_state.sprg[0];
420     l2_state.sprg1 = hv_state.sprg[1];
421     l2_state.sprg2 = hv_state.sprg[2];
422     l2_state.sprg3 = hv_state.sprg[3];
423     l2_state.pidr = hv_state.pidr;
424     l2_state.ppr = hv_state.ppr;
425     l2_state.tb_offset = env->tb_env->tb_offset + hv_state.tb_offset;
426 
427     /*
428      * Switch to the nested guest environment and start the "hdec" timer.
429      */
430     nested_load_state(cpu, &l2_state);
431     nested_post_load_state(env, cs);
432 
433     hdec = hv_state.hdec_expiry - now;
434     cpu_ppc_hdecr_init(env);
435     cpu_ppc_store_hdecr(env, hdec);
436 
437     /*
438      * The hv_state.vcpu_token is not needed. It is used by the KVM
439      * implementation to remember which L2 vCPU last ran on which physical
440      * CPU so as to invalidate process scope translations if it is moved
441      * between physical CPUs. For now TLBs are always flushed on L1<->L2
442      * transitions so this is not a problem.
443      *
444      * Could validate that the same vcpu_token does not attempt to run on
445      * different L1 vCPUs at the same time, but that would be a L1 KVM bug
446      * and it's not obviously worth a new data structure to do it.
447      */
448 
449     spapr_cpu->in_nested = true;
450 
451     /*
452      * The spapr hcall helper sets env->gpr[3] to the return value, but at
453      * this point the L1 is not returning from the hcall but rather we
454      * start running the L2, so r3 must not be clobbered, so return env->gpr[3]
455      * to leave it unchanged.
456      */
457     return env->gpr[3];
458 }
459 
460 static void spapr_exit_nested_hv(PowerPCCPU *cpu, int excp)
461 {
462     CPUPPCState *env = &cpu->env;
463     CPUState *cs = CPU(cpu);
464     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
465     struct nested_ppc_state l2_state;
466     target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4];
467     target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5];
468     target_ulong hsrr0, hsrr1, hdar, asdr, hdsisr;
469     struct kvmppc_hv_guest_state *hvstate;
470     struct kvmppc_pt_regs *regs;
471     hwaddr len;
472 
473     nested_save_state(&l2_state, cpu);
474     hsrr0 = env->spr[SPR_HSRR0];
475     hsrr1 = env->spr[SPR_HSRR1];
476     hdar = env->spr[SPR_HDAR];
477     hdsisr = env->spr[SPR_HDSISR];
478     asdr = env->spr[SPR_ASDR];
479 
480     /*
481      * Switch back to the host environment (including for any error).
482      */
483     assert(env->spr[SPR_LPIDR] != 0);
484     nested_load_state(cpu, spapr_cpu->nested_host_state);
485     nested_post_load_state(env, cs);
486     env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */
487 
488     cpu_ppc_hdecr_exit(env);
489 
490     spapr_cpu->in_nested = false;
491 
492     g_free(spapr_cpu->nested_host_state);
493     spapr_cpu->nested_host_state = NULL;
494 
495     len = sizeof(*hvstate);
496     hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true,
497                                 MEMTXATTRS_UNSPECIFIED);
498     if (len != sizeof(*hvstate)) {
499         address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true);
500         env->gpr[3] = H_PARAMETER;
501         return;
502     }
503 
504     hvstate->cfar = l2_state.cfar;
505     hvstate->lpcr = l2_state.lpcr;
506     hvstate->pcr = l2_state.pcr;
507     hvstate->dpdes = l2_state.dpdes;
508     hvstate->hfscr = l2_state.hfscr;
509 
510     if (excp == POWERPC_EXCP_HDSI) {
511         hvstate->hdar = hdar;
512         hvstate->hdsisr = hdsisr;
513         hvstate->asdr = asdr;
514     } else if (excp == POWERPC_EXCP_HISI) {
515         hvstate->asdr = asdr;
516     }
517 
518     /* HEIR should be implemented for HV mode and saved here. */
519     hvstate->srr0 = l2_state.srr0;
520     hvstate->srr1 = l2_state.srr1;
521     hvstate->sprg[0] = l2_state.sprg0;
522     hvstate->sprg[1] = l2_state.sprg1;
523     hvstate->sprg[2] = l2_state.sprg2;
524     hvstate->sprg[3] = l2_state.sprg3;
525     hvstate->pidr = l2_state.pidr;
526     hvstate->ppr = l2_state.ppr;
527 
528     /* Is it okay to specify write length larger than actual data written? */
529     address_space_unmap(CPU(cpu)->as, hvstate, len, len, true);
530 
531     len = sizeof(*regs);
532     regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true,
533                                 MEMTXATTRS_UNSPECIFIED);
534     if (!regs || len != sizeof(*regs)) {
535         address_space_unmap(CPU(cpu)->as, regs, len, 0, true);
536         env->gpr[3] = H_P2;
537         return;
538     }
539 
540     len = sizeof(env->gpr);
541     assert(len == sizeof(regs->gpr));
542     memcpy(regs->gpr, l2_state.gpr, len);
543 
544     regs->link = l2_state.lr;
545     regs->ctr = l2_state.ctr;
546     regs->xer = l2_state.xer;
547     regs->ccr = l2_state.cr;
548 
549     if (excp == POWERPC_EXCP_MCHECK ||
550         excp == POWERPC_EXCP_RESET ||
551         excp == POWERPC_EXCP_SYSCALL) {
552         regs->nip = l2_state.srr0;
553         regs->msr = l2_state.srr1 & env->msr_mask;
554     } else {
555         regs->nip = hsrr0;
556         regs->msr = hsrr1 & env->msr_mask;
557     }
558 
559     /* Is it okay to specify write length larger than actual data written? */
560     address_space_unmap(CPU(cpu)->as, regs, len, len, true);
561 }
562 
563 static bool spapr_nested_vcpu_check(SpaprMachineStateNestedGuest *guest,
564                                     target_ulong vcpuid, bool inoutbuf)
565 {
566     struct SpaprMachineStateNestedGuestVcpu *vcpu;
567     /*
568      * Perform sanity checks for the provided vcpuid of a guest.
569      * For now, ensure its valid, allocated and enabled for use.
570      */
571 
572     if (vcpuid >= PAPR_NESTED_GUEST_VCPU_MAX) {
573         return false;
574     }
575 
576     if (!(vcpuid < guest->nr_vcpus)) {
577         return false;
578     }
579 
580     vcpu = &guest->vcpus[vcpuid];
581     if (!vcpu->enabled) {
582         return false;
583     }
584 
585     if (!inoutbuf) {
586         return true;
587     }
588 
589     /* Check to see if the in/out buffers are registered */
590     if (vcpu->runbufin.addr && vcpu->runbufout.addr) {
591         return true;
592     }
593 
594     return false;
595 }
596 
597 static void *get_vcpu_state_ptr(SpaprMachineState *spapr,
598                                 SpaprMachineStateNestedGuest *guest,
599                                 target_ulong vcpuid)
600 {
601     assert(spapr_nested_vcpu_check(guest, vcpuid, false));
602     return &guest->vcpus[vcpuid].state;
603 }
604 
605 static void *get_vcpu_ptr(SpaprMachineState *spapr,
606                           SpaprMachineStateNestedGuest *guest,
607                           target_ulong vcpuid)
608 {
609     assert(spapr_nested_vcpu_check(guest, vcpuid, false));
610     return &guest->vcpus[vcpuid];
611 }
612 
613 static void *get_guest_ptr(SpaprMachineState *spapr,
614                            SpaprMachineStateNestedGuest *guest,
615                            target_ulong vcpuid)
616 {
617     return guest; /* for GSBE_NESTED */
618 }
619 
620 static void *get_machine_ptr(SpaprMachineState *spapr,
621                              SpaprMachineStateNestedGuest *guest,
622                              target_ulong vcpuid)
623 {
624     /* ignore guest and vcpuid for this */
625     return &spapr->nested;
626 }
627 
628 /*
629  * set=1 means the L1 is trying to set some state
630  * set=0 means the L1 is trying to get some state
631  */
632 static void copy_state_8to8(void *a, void *b, bool set)
633 {
634     /* set takes from the Big endian element_buf and sets internal buffer */
635 
636     if (set) {
637         *(uint64_t *)a = be64_to_cpu(*(uint64_t *)b);
638     } else {
639         *(uint64_t *)b = cpu_to_be64(*(uint64_t *)a);
640     }
641 }
642 
643 static void copy_state_4to4(void *a, void *b, bool set)
644 {
645     if (set) {
646         *(uint32_t *)a = be32_to_cpu(*(uint32_t *)b);
647     } else {
648         *(uint32_t *)b = cpu_to_be32(*((uint32_t *)a));
649     }
650 }
651 
652 static void copy_state_16to16(void *a, void *b, bool set)
653 {
654     uint64_t *src, *dst;
655 
656     if (set) {
657         src = b;
658         dst = a;
659 
660         dst[1] = be64_to_cpu(src[0]);
661         dst[0] = be64_to_cpu(src[1]);
662     } else {
663         src = a;
664         dst = b;
665 
666         dst[1] = cpu_to_be64(src[0]);
667         dst[0] = cpu_to_be64(src[1]);
668     }
669 }
670 
671 static void copy_state_4to8(void *a, void *b, bool set)
672 {
673     if (set) {
674         *(uint64_t *)a  = (uint64_t) be32_to_cpu(*(uint32_t *)b);
675     } else {
676         *(uint32_t *)b = cpu_to_be32((uint32_t) (*((uint64_t *)a)));
677     }
678 }
679 
680 static void copy_state_pagetbl(void *a, void *b, bool set)
681 {
682     uint64_t *pagetbl;
683     uint64_t *buf; /* 3 double words */
684     uint64_t rts;
685 
686     assert(set);
687 
688     pagetbl = a;
689     buf = b;
690 
691     *pagetbl = be64_to_cpu(buf[0]);
692     /* as per ISA section 6.7.6.1 */
693     *pagetbl |= PATE0_HR; /* Host Radix bit is 1 */
694 
695     /* RTS */
696     rts = be64_to_cpu(buf[1]);
697     assert(rts == 52);
698     rts = rts - 31; /* since radix tree size = 2^(RTS+31) */
699     *pagetbl |=  ((rts & 0x7) << 5); /* RTS2 is bit 56:58 */
700     *pagetbl |=  (((rts >> 3) & 0x3) << 61); /* RTS1 is bit 1:2 */
701 
702     /* RPDS {Size = 2^(RPDS+3) , RPDS >=5} */
703     *pagetbl |= 63 - clz64(be64_to_cpu(buf[2])) - 3;
704 }
705 
706 static void copy_state_proctbl(void *a, void *b, bool set)
707 {
708     uint64_t *proctbl;
709     uint64_t *buf; /* 2 double words */
710 
711     assert(set);
712 
713     proctbl = a;
714     buf = b;
715     /* PRTB: Process Table Base */
716     *proctbl = be64_to_cpu(buf[0]);
717     /* PRTS: Process Table Size = 2^(12+PRTS) */
718     if (be64_to_cpu(buf[1]) == (1ULL << 12)) {
719             *proctbl |= 0;
720     } else if (be64_to_cpu(buf[1]) == (1ULL << 24)) {
721             *proctbl |= 12;
722     } else {
723         g_assert_not_reached();
724     }
725 }
726 
727 static void copy_state_runbuf(void *a, void *b, bool set)
728 {
729     uint64_t *buf; /* 2 double words */
730     struct SpaprMachineStateNestedGuestVcpuRunBuf *runbuf;
731 
732     assert(set);
733 
734     runbuf = a;
735     buf = b;
736 
737     runbuf->addr = be64_to_cpu(buf[0]);
738     assert(runbuf->addr);
739 
740     /* per spec */
741     assert(be64_to_cpu(buf[1]) <= 16384);
742 
743     /*
744      * This will also hit in the input buffer but should be fine for
745      * now. If not we can split this function.
746      */
747     assert(be64_to_cpu(buf[1]) >= VCPU_OUT_BUF_MIN_SZ);
748 
749     runbuf->size = be64_to_cpu(buf[1]);
750 }
751 
752 /* tell the L1 how big we want the output vcpu run buffer */
753 static void out_buf_min_size(void *a, void *b, bool set)
754 {
755     uint64_t *buf; /* 1 double word */
756 
757     assert(!set);
758 
759     buf = b;
760 
761     buf[0] = cpu_to_be64(VCPU_OUT_BUF_MIN_SZ);
762 }
763 
764 static void copy_logical_pvr(void *a, void *b, bool set)
765 {
766     SpaprMachineStateNestedGuest *guest;
767     uint32_t *buf; /* 1 word */
768     uint32_t *pvr_logical_ptr;
769     uint32_t pvr_logical;
770     target_ulong pcr = 0;
771 
772     pvr_logical_ptr = a;
773     buf = b;
774 
775     if (!set) {
776         buf[0] = cpu_to_be32(*pvr_logical_ptr);
777         return;
778     }
779 
780     pvr_logical = be32_to_cpu(buf[0]);
781 
782     *pvr_logical_ptr = pvr_logical;
783 
784     if (*pvr_logical_ptr) {
785         switch (*pvr_logical_ptr) {
786         case CPU_POWERPC_LOGICAL_3_10_P11:
787         case CPU_POWERPC_LOGICAL_3_10:
788             pcr = PCR_COMPAT_3_10 | PCR_COMPAT_3_00;
789             break;
790         case CPU_POWERPC_LOGICAL_3_00:
791             pcr = PCR_COMPAT_3_00;
792             break;
793         default:
794             qemu_log_mask(LOG_GUEST_ERROR,
795                           "Could not set PCR for LPVR=0x%08x\n",
796                           *pvr_logical_ptr);
797             return;
798         }
799     }
800 
801     guest = container_of(pvr_logical_ptr,
802                          struct SpaprMachineStateNestedGuest,
803                          pvr_logical);
804     for (int i = 0; i < guest->nr_vcpus; i++) {
805         guest->vcpus[i].state.pcr = ~pcr | HVMASK_PCR;
806     }
807 }
808 
809 static void copy_tb_offset(void *a, void *b, bool set)
810 {
811     SpaprMachineStateNestedGuest *guest;
812     uint64_t *buf; /* 1 double word */
813     uint64_t *tb_offset_ptr;
814     uint64_t tb_offset;
815 
816     tb_offset_ptr = a;
817     buf = b;
818 
819     if (!set) {
820         buf[0] = cpu_to_be64(*tb_offset_ptr);
821         return;
822     }
823 
824     tb_offset = be64_to_cpu(buf[0]);
825     /* need to copy this to the individual tb_offset for each vcpu */
826     guest = container_of(tb_offset_ptr,
827                          struct SpaprMachineStateNestedGuest,
828                          tb_offset);
829     for (int i = 0; i < guest->nr_vcpus; i++) {
830         guest->vcpus[i].tb_offset = tb_offset;
831     }
832 }
833 
834 static void copy_state_hdecr(void *a, void *b, bool set)
835 {
836     uint64_t *buf; /* 1 double word */
837     uint64_t *hdecr_expiry_tb;
838 
839     hdecr_expiry_tb = a;
840     buf = b;
841 
842     if (!set) {
843         buf[0] = cpu_to_be64(*hdecr_expiry_tb);
844         return;
845     }
846 
847     *hdecr_expiry_tb = be64_to_cpu(buf[0]);
848 }
849 
850 struct guest_state_element_type guest_state_element_types[] = {
851     GUEST_STATE_ELEMENT_NOP(GSB_HV_VCPU_IGNORED_ID, 0),
852     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR0,  gpr[0]),
853     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR1,  gpr[1]),
854     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR2,  gpr[2]),
855     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR3,  gpr[3]),
856     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR4,  gpr[4]),
857     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR5,  gpr[5]),
858     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR6,  gpr[6]),
859     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR7,  gpr[7]),
860     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR8,  gpr[8]),
861     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR9,  gpr[9]),
862     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR10, gpr[10]),
863     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR11, gpr[11]),
864     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR12, gpr[12]),
865     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR13, gpr[13]),
866     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR14, gpr[14]),
867     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR15, gpr[15]),
868     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR16, gpr[16]),
869     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR17, gpr[17]),
870     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR18, gpr[18]),
871     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR19, gpr[19]),
872     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR20, gpr[20]),
873     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR21, gpr[21]),
874     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR22, gpr[22]),
875     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR23, gpr[23]),
876     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR24, gpr[24]),
877     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR25, gpr[25]),
878     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR26, gpr[26]),
879     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR27, gpr[27]),
880     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR28, gpr[28]),
881     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR29, gpr[29]),
882     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR30, gpr[30]),
883     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR31, gpr[31]),
884     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_NIA, nip),
885     GSE_ENV_DWM(GSB_VCPU_SPR_MSR, msr, HVMASK_MSR),
886     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CTR, ctr),
887     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_LR, lr),
888     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_XER, xer),
889     GUEST_STATE_ELEMENT_ENV_WW(GSB_VCPU_SPR_CR, cr),
890     GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_MMCR3),
891     GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_SIER2),
892     GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_SIER3),
893     GUEST_STATE_ELEMENT_NOP_W(GSB_VCPU_SPR_WORT),
894     GSE_ENV_DWM(GSB_VCPU_SPR_LPCR, lpcr, HVMASK_LPCR),
895     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_AMOR, amor),
896     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HFSCR, hfscr),
897     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAWR0, dawr0),
898     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DAWRX0, dawrx0),
899     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CIABR, ciabr),
900     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_PURR,  purr),
901     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPURR, spurr),
902     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_IC,    ic),
903     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_VTB,   vtb),
904     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HDAR,  hdar),
905     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_HDSISR, hdsisr),
906     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_HEIR,   heir),
907     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_ASDR,  asdr),
908     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SRR0,  srr0),
909     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SRR1,  srr1),
910     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG0, sprg0),
911     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG1, sprg1),
912     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG2, sprg2),
913     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG3, sprg3),
914     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PIDR,   pidr),
915     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CFAR,  cfar),
916     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_PPR,   ppr),
917     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAWR1, dawr1),
918     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DAWRX1, dawrx1),
919     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DEXCR, dexcr),
920     GSE_ENV_DWM(GSB_VCPU_SPR_HDEXCR, hdexcr, HVMASK_HDEXCR),
921     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HASHKEYR, hashkeyr),
922     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HASHPKEYR, hashpkeyr),
923     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR0, vsr[0]),
924     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR1, vsr[1]),
925     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR2, vsr[2]),
926     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR3, vsr[3]),
927     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR4, vsr[4]),
928     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR5, vsr[5]),
929     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR6, vsr[6]),
930     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR7, vsr[7]),
931     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR8, vsr[8]),
932     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR9, vsr[9]),
933     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR10, vsr[10]),
934     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR11, vsr[11]),
935     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR12, vsr[12]),
936     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR13, vsr[13]),
937     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR14, vsr[14]),
938     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR15, vsr[15]),
939     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR16, vsr[16]),
940     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR17, vsr[17]),
941     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR18, vsr[18]),
942     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR19, vsr[19]),
943     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR20, vsr[20]),
944     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR21, vsr[21]),
945     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR22, vsr[22]),
946     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR23, vsr[23]),
947     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR24, vsr[24]),
948     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR25, vsr[25]),
949     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR26, vsr[26]),
950     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR27, vsr[27]),
951     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR28, vsr[28]),
952     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR29, vsr[29]),
953     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR30, vsr[30]),
954     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR31, vsr[31]),
955     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR32, vsr[32]),
956     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR33, vsr[33]),
957     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR34, vsr[34]),
958     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR35, vsr[35]),
959     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR36, vsr[36]),
960     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR37, vsr[37]),
961     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR38, vsr[38]),
962     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR39, vsr[39]),
963     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR40, vsr[40]),
964     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR41, vsr[41]),
965     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR42, vsr[42]),
966     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR43, vsr[43]),
967     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR44, vsr[44]),
968     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR45, vsr[45]),
969     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR46, vsr[46]),
970     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR47, vsr[47]),
971     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR48, vsr[48]),
972     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR49, vsr[49]),
973     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR50, vsr[50]),
974     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR51, vsr[51]),
975     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR52, vsr[52]),
976     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR53, vsr[53]),
977     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR54, vsr[54]),
978     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR55, vsr[55]),
979     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR56, vsr[56]),
980     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR57, vsr[57]),
981     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR58, vsr[58]),
982     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR59, vsr[59]),
983     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR60, vsr[60]),
984     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR61, vsr[61]),
985     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR62, vsr[62]),
986     GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR63, vsr[63]),
987     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_EBBHR, ebbhr),
988     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_TAR,   tar),
989     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_EBBRR, ebbrr),
990     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_BESCR, bescr),
991     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_IAMR,  iamr),
992     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_AMR,   amr),
993     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_UAMOR, uamor),
994     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DSCR,  dscr),
995     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_FSCR,  fscr),
996     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PSPB,   pspb),
997     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CTRL,  ctrl),
998     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DPDES, dpdes),
999     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_VRSAVE, vrsave),
1000     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAR,   dar),
1001     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DSISR,  dsisr),
1002     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC1,   pmc1),
1003     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC2,   pmc2),
1004     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC3,   pmc3),
1005     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC4,   pmc4),
1006     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC5,   pmc5),
1007     GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC6,   pmc6),
1008     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR0, mmcr0),
1009     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR1, mmcr1),
1010     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR2, mmcr2),
1011     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCRA, mmcra),
1012     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SDAR , sdar),
1013     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SIAR , siar),
1014     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SIER , sier),
1015     GUEST_STATE_ELEMENT_ENV_WW(GSB_VCPU_SPR_VSCR,  vscr),
1016     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_FPSCR, fpscr),
1017     GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_DEC_EXPIRE_TB, dec_expiry_tb),
1018     GSBE_NESTED(GSB_PART_SCOPED_PAGETBL, 0x18, parttbl[0],  copy_state_pagetbl),
1019     GSBE_NESTED(GSB_PROCESS_TBL,         0x10, parttbl[1],  copy_state_proctbl),
1020     GSBE_NESTED(GSB_VCPU_LPVR,           0x4,  pvr_logical, copy_logical_pvr),
1021     GSBE_NESTED_MSK(GSB_TB_OFFSET, 0x8, tb_offset, copy_tb_offset,
1022                     HVMASK_TB_OFFSET),
1023     GSBE_NESTED_VCPU(GSB_VCPU_IN_BUFFER, 0x10, runbufin,    copy_state_runbuf),
1024     GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUFFER, 0x10, runbufout,   copy_state_runbuf),
1025     GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUF_MIN_SZ, 0x8, runbufout, out_buf_min_size),
1026     GSBE_NESTED_VCPU(GSB_VCPU_HDEC_EXPIRY_TB, 0x8, hdecr_expiry_tb,
1027                      copy_state_hdecr),
1028     GSBE_NESTED_MACHINE_DW(GSB_L0_GUEST_HEAP_INUSE, l0_guest_heap_inuse),
1029     GSBE_NESTED_MACHINE_DW(GSB_L0_GUEST_HEAP_MAX, l0_guest_heap_max),
1030     GSBE_NESTED_MACHINE_DW(GSB_L0_GUEST_PGTABLE_SIZE_INUSE,
1031                            l0_guest_pgtable_size_inuse),
1032     GSBE_NESTED_MACHINE_DW(GSB_L0_GUEST_PGTABLE_SIZE_MAX,
1033                            l0_guest_pgtable_size_max),
1034     GSBE_NESTED_MACHINE_DW(GSB_L0_GUEST_PGTABLE_RECLAIMED,
1035                            l0_guest_pgtable_reclaimed),
1036 };
1037 
1038 void spapr_nested_gsb_init(void)
1039 {
1040     struct guest_state_element_type *type;
1041 
1042     /* Init the guest state elements lookup table, flags for now */
1043     for (int i = 0; i < ARRAY_SIZE(guest_state_element_types); i++) {
1044         type = &guest_state_element_types[i];
1045 
1046         assert(type->id <= GSB_LAST);
1047         if (type->id >= GSB_VCPU_SPR_HDAR)
1048             /* 0xf000 - 0xf005 Thread + RO */
1049             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY;
1050         else if (type->id >= GSB_VCPU_IN_BUFFER)
1051             /* 0x0c00 - 0xf000 Thread + RW */
1052             type->flags = 0;
1053         else if (type->id >= GSB_L0_GUEST_HEAP_INUSE)
1054 
1055             /*0x0800 - 0x0804 Hostwide Counters + RO */
1056             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_HOST_WIDE |
1057                           GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY;
1058         else if (type->id >= GSB_VCPU_LPVR)
1059             /* 0x0003 - 0x07ff Guest + RW */
1060             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE;
1061         else if (type->id >= GSB_HV_VCPU_STATE_SIZE)
1062             /* 0x0001 - 0x0002 Guest + RO */
1063             type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY |
1064                           GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE;
1065     }
1066 }
1067 
1068 static struct guest_state_element *guest_state_element_next(
1069     struct guest_state_element *element,
1070     int64_t *len,
1071     int64_t *num_elements)
1072 {
1073     uint16_t size;
1074 
1075     /* size is of element->value[] only. Not whole guest_state_element */
1076     size = be16_to_cpu(element->size);
1077 
1078     if (len) {
1079         *len -= size + offsetof(struct guest_state_element, value);
1080     }
1081 
1082     if (num_elements) {
1083         *num_elements -= 1;
1084     }
1085 
1086     return (struct guest_state_element *)(element->value + size);
1087 }
1088 
1089 static
1090 struct guest_state_element_type *guest_state_element_type_find(uint16_t id)
1091 {
1092     int i;
1093 
1094     for (i = 0; i < ARRAY_SIZE(guest_state_element_types); i++)
1095         if (id == guest_state_element_types[i].id) {
1096             return &guest_state_element_types[i];
1097         }
1098 
1099     return NULL;
1100 }
1101 
1102 static void log_element(struct guest_state_element *element,
1103                         struct guest_state_request *gsr)
1104 {
1105     qemu_log_mask(LOG_GUEST_ERROR, "h_guest_%s_state id:0x%04x size:0x%04x",
1106                   gsr->flags & GUEST_STATE_REQUEST_SET ? "set" : "get",
1107                   be16_to_cpu(element->id), be16_to_cpu(element->size));
1108     qemu_log_mask(LOG_GUEST_ERROR, "buf:0x%016"PRIx64" ...\n",
1109                   be64_to_cpu(*(uint64_t *)element->value));
1110 }
1111 
1112 static bool guest_state_request_check(struct guest_state_request *gsr)
1113 {
1114     int64_t num_elements, len = gsr->len;
1115     struct guest_state_buffer *gsb = gsr->gsb;
1116     struct guest_state_element *element;
1117     struct guest_state_element_type *type;
1118     uint16_t id, size;
1119 
1120     /* gsb->num_elements = 0 == 32 bits long */
1121     assert(len >= 4);
1122 
1123     num_elements = be32_to_cpu(gsb->num_elements);
1124     element = gsb->elements;
1125     len -= sizeof(gsb->num_elements);
1126 
1127     /* Walk the buffer to validate the length */
1128     while (num_elements) {
1129 
1130         id = be16_to_cpu(element->id);
1131         size = be16_to_cpu(element->size);
1132 
1133         if (false) {
1134             log_element(element, gsr);
1135         }
1136         /* buffer size too small */
1137         if (len < 0) {
1138             return false;
1139         }
1140 
1141         type = guest_state_element_type_find(id);
1142         if (!type) {
1143             qemu_log_mask(LOG_GUEST_ERROR, "Element ID %04x unknown\n", id);
1144             log_element(element, gsr);
1145             return false;
1146         }
1147 
1148         if (id == GSB_HV_VCPU_IGNORED_ID) {
1149             goto next_element;
1150         }
1151 
1152         if (size != type->size) {
1153             qemu_log_mask(LOG_GUEST_ERROR, "Size mismatch. Element ID:%04x."
1154                           "Size Exp:%i Got:%i\n", id, type->size, size);
1155             log_element(element, gsr);
1156             return false;
1157         }
1158 
1159         if ((type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY) &&
1160             (gsr->flags & GUEST_STATE_REQUEST_SET)) {
1161             qemu_log_mask(LOG_GUEST_ERROR, "Trying to set a read-only Element "
1162                           "ID:%04x.\n", id);
1163             return false;
1164         }
1165 
1166         if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_HOST_WIDE) {
1167             /* Hostwide elements cant be clubbed with other types */
1168             if (!(gsr->flags & GUEST_STATE_REQUEST_HOST_WIDE)) {
1169                 qemu_log_mask(LOG_GUEST_ERROR, "trying to get/set a host wide "
1170                               "Element ID:%04x.\n", id);
1171                 return false;
1172             }
1173         } else  if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE) {
1174             /* guest wide element type */
1175             if (!(gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE)) {
1176                 qemu_log_mask(LOG_GUEST_ERROR, "trying to get/set a guest wide "
1177                               "Element ID:%04x.\n", id);
1178                 return false;
1179             }
1180         } else {
1181             /* thread wide element type */
1182             if (gsr->flags & (GUEST_STATE_REQUEST_GUEST_WIDE |
1183                               GUEST_STATE_REQUEST_HOST_WIDE)) {
1184                 qemu_log_mask(LOG_GUEST_ERROR, "trying to get/set a thread wide"
1185                             " Element ID:%04x.\n", id);
1186                 return false;
1187             }
1188         }
1189 next_element:
1190         element = guest_state_element_next(element, &len, &num_elements);
1191 
1192     }
1193     return true;
1194 }
1195 
1196 static bool is_gsr_invalid(struct guest_state_request *gsr,
1197                                    struct guest_state_element *element,
1198                                    struct guest_state_element_type *type)
1199 {
1200     if ((gsr->flags & GUEST_STATE_REQUEST_SET) &&
1201         (*(uint64_t *)(element->value) & ~(type->mask))) {
1202         log_element(element, gsr);
1203         qemu_log_mask(LOG_GUEST_ERROR, "L1 can't set reserved bits "
1204                       "(allowed mask: 0x%08"PRIx64")\n", type->mask);
1205         return true;
1206     }
1207     return false;
1208 }
1209 
1210 static target_ulong h_guest_get_capabilities(PowerPCCPU *cpu,
1211                                              SpaprMachineState *spapr,
1212                                              target_ulong opcode,
1213                                              target_ulong *args)
1214 {
1215     CPUPPCState *env = &cpu->env;
1216     target_ulong flags = args[0];
1217 
1218     if (flags) { /* don't handle any flags capabilities for now */
1219         return H_PARAMETER;
1220     }
1221 
1222     /* P11 capabilities */
1223     if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10_P11, 0,
1224         spapr->max_compat_pvr)) {
1225         env->gpr[4] |= H_GUEST_CAPABILITIES_P11_MODE;
1226     }
1227 
1228     /* P10 capabilities */
1229     if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0,
1230         spapr->max_compat_pvr)) {
1231         env->gpr[4] |= H_GUEST_CAPABILITIES_P10_MODE;
1232     }
1233 
1234     /* P9 capabilities */
1235     if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
1236         spapr->max_compat_pvr)) {
1237         env->gpr[4] |= H_GUEST_CAPABILITIES_P9_MODE;
1238     }
1239 
1240     return H_SUCCESS;
1241 }
1242 
1243 static target_ulong h_guest_set_capabilities(PowerPCCPU *cpu,
1244                                              SpaprMachineState *spapr,
1245                                              target_ulong opcode,
1246                                               target_ulong *args)
1247 {
1248     CPUPPCState *env = &cpu->env;
1249     target_ulong flags = args[0];
1250     target_ulong capabilities = args[1];
1251     env->gpr[4] = 0;
1252 
1253     if (flags) { /* don't handle any flags capabilities for now */
1254         return H_PARAMETER;
1255     }
1256 
1257     if (capabilities & H_GUEST_CAPABILITIES_COPY_MEM) {
1258         env->gpr[4] = 1;
1259         return H_P2; /* isn't supported */
1260     }
1261 
1262     /*
1263      * If there are no capabilities configured, set the R5 to the index of
1264      * the first supported Power Processor Mode
1265      */
1266     if (!capabilities) {
1267         env->gpr[4] = 1;
1268 
1269         /* set R5 to the first supported Power Processor Mode */
1270         if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10_P11, 0,
1271                              spapr->max_compat_pvr)) {
1272             env->gpr[5] = H_GUEST_CAP_P11_MODE_BMAP;
1273         } else if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0,
1274                              spapr->max_compat_pvr)) {
1275             env->gpr[5] = H_GUEST_CAP_P10_MODE_BMAP;
1276         } else if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
1277                                     spapr->max_compat_pvr)) {
1278             env->gpr[5] = H_GUEST_CAP_P9_MODE_BMAP;
1279         }
1280 
1281         return H_P2;
1282     }
1283 
1284     /*
1285      * If an invalid capability is set, R5 should contain the index of the
1286      * invalid capability bit
1287      */
1288     if (capabilities & ~H_GUEST_CAP_VALID_MASK) {
1289         env->gpr[4] = 1;
1290 
1291         /* Set R5 to the index of the invalid capability */
1292         env->gpr[5] = 63 - ctz64(capabilities);
1293 
1294         return H_P2;
1295     }
1296 
1297     if (!spapr->nested.capabilities_set) {
1298         spapr->nested.capabilities_set = true;
1299         spapr->nested.pvr_base = env->spr[SPR_PVR];
1300         return H_SUCCESS;
1301     } else {
1302         return H_STATE;
1303     }
1304 }
1305 
1306 static void
1307 destroy_guest_helper(gpointer value)
1308 {
1309     struct SpaprMachineStateNestedGuest *guest = value;
1310     g_free(guest->vcpus);
1311     g_free(guest);
1312 }
1313 
1314 static target_ulong h_guest_create(PowerPCCPU *cpu,
1315                                    SpaprMachineState *spapr,
1316                                    target_ulong opcode,
1317                                    target_ulong *args)
1318 {
1319     CPUPPCState *env = &cpu->env;
1320     target_ulong flags = args[0];
1321     target_ulong continue_token = args[1];
1322     uint64_t guestid;
1323     int nguests = 0;
1324     struct SpaprMachineStateNestedGuest *guest;
1325 
1326     if (flags) { /* don't handle any flags for now */
1327         return H_UNSUPPORTED_FLAG;
1328     }
1329 
1330     if (continue_token != -1) {
1331         return H_P2;
1332     }
1333 
1334     if (!spapr->nested.capabilities_set) {
1335         return H_STATE;
1336     }
1337 
1338     if (!spapr->nested.guests) {
1339         spapr->nested.guests = g_hash_table_new_full(NULL,
1340                                                      NULL,
1341                                                      NULL,
1342                                                      destroy_guest_helper);
1343     }
1344 
1345     nguests = g_hash_table_size(spapr->nested.guests);
1346 
1347     if (nguests == PAPR_NESTED_GUEST_MAX) {
1348         return H_NO_MEM;
1349     }
1350 
1351     /* Lookup for available guestid */
1352     for (guestid = 1; guestid < PAPR_NESTED_GUEST_MAX; guestid++) {
1353         if (!(g_hash_table_lookup(spapr->nested.guests,
1354                                   GINT_TO_POINTER(guestid)))) {
1355             break;
1356         }
1357     }
1358 
1359     if (guestid == PAPR_NESTED_GUEST_MAX) {
1360         return H_NO_MEM;
1361     }
1362 
1363     guest = g_try_new0(struct SpaprMachineStateNestedGuest, 1);
1364     if (!guest) {
1365         return H_NO_MEM;
1366     }
1367 
1368     guest->pvr_logical = spapr->nested.pvr_base;
1369     g_hash_table_insert(spapr->nested.guests, GINT_TO_POINTER(guestid), guest);
1370     env->gpr[4] = guestid;
1371 
1372     return H_SUCCESS;
1373 }
1374 
1375 static target_ulong h_guest_delete(PowerPCCPU *cpu,
1376                                    SpaprMachineState *spapr,
1377                                    target_ulong opcode,
1378                                    target_ulong *args)
1379 {
1380     target_ulong flags = args[0];
1381     target_ulong guestid = args[1];
1382     struct SpaprMachineStateNestedGuest *guest;
1383 
1384     /*
1385      * handle flag deleteAllGuests, if set:
1386      * guestid is ignored and all guests are deleted
1387      *
1388      */
1389     if (flags & ~H_GUEST_DELETE_ALL_FLAG) {
1390         return H_UNSUPPORTED_FLAG; /* other flag bits reserved */
1391     } else if (flags & H_GUEST_DELETE_ALL_FLAG) {
1392         g_hash_table_destroy(spapr->nested.guests);
1393         return H_SUCCESS;
1394     }
1395 
1396     guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid));
1397     if (!guest) {
1398         return H_P2;
1399     }
1400 
1401     g_hash_table_remove(spapr->nested.guests, GINT_TO_POINTER(guestid));
1402 
1403     return H_SUCCESS;
1404 }
1405 
1406 static target_ulong h_guest_create_vcpu(PowerPCCPU *cpu,
1407                                         SpaprMachineState *spapr,
1408                                         target_ulong opcode,
1409                                         target_ulong *args)
1410 {
1411     target_ulong flags = args[0];
1412     target_ulong guestid = args[1];
1413     target_ulong vcpuid = args[2];
1414     SpaprMachineStateNestedGuest *guest;
1415 
1416     if (flags) { /* don't handle any flags for now */
1417         return H_UNSUPPORTED_FLAG;
1418     }
1419 
1420     guest = spapr_get_nested_guest(spapr, guestid);
1421     if (!guest) {
1422         return H_P2;
1423     }
1424 
1425     if (vcpuid < guest->nr_vcpus) {
1426         qemu_log_mask(LOG_UNIMP, "vcpuid " TARGET_FMT_ld " already in use.",
1427                       vcpuid);
1428         return H_IN_USE;
1429     }
1430     /* linear vcpuid allocation only */
1431     assert(vcpuid == guest->nr_vcpus);
1432 
1433     if (guest->nr_vcpus >= PAPR_NESTED_GUEST_VCPU_MAX) {
1434         return H_P3;
1435     }
1436 
1437     SpaprMachineStateNestedGuestVcpu *vcpus, *curr_vcpu;
1438     vcpus = g_try_renew(struct SpaprMachineStateNestedGuestVcpu,
1439                         guest->vcpus,
1440                         guest->nr_vcpus + 1);
1441     if (!vcpus) {
1442         return H_NO_MEM;
1443     }
1444     guest->vcpus = vcpus;
1445     curr_vcpu = &vcpus[guest->nr_vcpus];
1446     memset(curr_vcpu, 0, sizeof(SpaprMachineStateNestedGuestVcpu));
1447 
1448     curr_vcpu->enabled = true;
1449     guest->nr_vcpus++;
1450 
1451     return H_SUCCESS;
1452 }
1453 
1454 static target_ulong getset_state(SpaprMachineState *spapr,
1455                                  SpaprMachineStateNestedGuest *guest,
1456                                  uint64_t vcpuid,
1457                                  struct guest_state_request *gsr)
1458 {
1459     void *ptr;
1460     uint16_t id;
1461     struct guest_state_element *element;
1462     struct guest_state_element_type *type;
1463     int64_t lenleft, num_elements;
1464 
1465     lenleft = gsr->len;
1466 
1467     if (!guest_state_request_check(gsr)) {
1468         return H_P3;
1469     }
1470 
1471     num_elements = be32_to_cpu(gsr->gsb->num_elements);
1472     element = gsr->gsb->elements;
1473     /* Process the elements */
1474     while (num_elements) {
1475         type = NULL;
1476         /* log_element(element, gsr); */
1477 
1478         id = be16_to_cpu(element->id);
1479         if (id == GSB_HV_VCPU_IGNORED_ID) {
1480             goto next_element;
1481         }
1482 
1483         type = guest_state_element_type_find(id);
1484         assert(type);
1485 
1486         /* Get pointer to guest data to get/set */
1487         if (type->location && type->copy) {
1488             ptr = type->location(spapr, guest, vcpuid);
1489             assert(ptr);
1490             if (!~(type->mask) && is_gsr_invalid(gsr, element, type)) {
1491                 return H_INVALID_ELEMENT_VALUE;
1492             }
1493             type->copy(ptr + type->offset, element->value,
1494                        gsr->flags & GUEST_STATE_REQUEST_SET ? true : false);
1495         }
1496 
1497 next_element:
1498         element = guest_state_element_next(element, &lenleft, &num_elements);
1499     }
1500 
1501     return H_SUCCESS;
1502 }
1503 
1504 static target_ulong map_and_getset_state(PowerPCCPU *cpu,
1505                                          SpaprMachineState *spapr,
1506                                          SpaprMachineStateNestedGuest *guest,
1507                                          uint64_t vcpuid,
1508                                          struct guest_state_request *gsr)
1509 {
1510     target_ulong rc;
1511     int64_t len;
1512     bool is_write;
1513 
1514     len = gsr->len;
1515     /* only get_state would require write access to the provided buffer */
1516     is_write = (gsr->flags & GUEST_STATE_REQUEST_SET) ? false : true;
1517     gsr->gsb = address_space_map(CPU(cpu)->as, gsr->buf, (uint64_t *)&len,
1518                                  is_write, MEMTXATTRS_UNSPECIFIED);
1519     if (!gsr->gsb) {
1520         rc = H_P3;
1521         goto out1;
1522     }
1523 
1524     if (len != gsr->len) {
1525         rc = H_P3;
1526         goto out1;
1527     }
1528 
1529     rc = getset_state(spapr, guest, vcpuid, gsr);
1530 
1531 out1:
1532     address_space_unmap(CPU(cpu)->as, gsr->gsb, len, is_write, len);
1533     return rc;
1534 }
1535 
1536 static target_ulong h_guest_getset_state(PowerPCCPU *cpu,
1537                                          SpaprMachineState *spapr,
1538                                          target_ulong *args,
1539                                          bool set)
1540 {
1541     target_ulong flags = args[0];
1542     target_ulong lpid = args[1];
1543     target_ulong vcpuid = args[2];
1544     target_ulong buf = args[3];
1545     target_ulong buflen = args[4];
1546     struct guest_state_request gsr;
1547     SpaprMachineStateNestedGuest *guest = NULL;
1548 
1549     gsr.buf = buf;
1550     assert(buflen <= GSB_MAX_BUF_SIZE);
1551     gsr.len = buflen;
1552     gsr.flags = 0;
1553 
1554     /* Works for both get/set state */
1555     if ((flags & H_GUEST_GET_STATE_FLAGS_GUEST_WIDE) ||
1556         (flags & H_GUEST_SET_STATE_FLAGS_GUEST_WIDE)) {
1557         gsr.flags |= GUEST_STATE_REQUEST_GUEST_WIDE;
1558     }
1559 
1560     if (set) {
1561         if (flags & ~H_GUEST_SET_STATE_FLAGS_MASK) {
1562             return H_PARAMETER;
1563         }
1564         gsr.flags |= GUEST_STATE_REQUEST_SET;
1565     } else {
1566         /*
1567          * No reserved fields to be set in flags nor both
1568          * GUEST/HOST wide bits
1569          */
1570         if ((flags & ~H_GUEST_GET_STATE_FLAGS_MASK) ||
1571             (flags == H_GUEST_GET_STATE_FLAGS_MASK)) {
1572             return H_PARAMETER;
1573         }
1574 
1575         if (flags & H_GUEST_GET_STATE_FLAGS_HOST_WIDE) {
1576             gsr.flags |= GUEST_STATE_REQUEST_HOST_WIDE;
1577         }
1578     }
1579 
1580     if (!(gsr.flags & GUEST_STATE_REQUEST_HOST_WIDE)) {
1581         guest = spapr_get_nested_guest(spapr, lpid);
1582         if (!guest) {
1583             return H_P2;
1584         }
1585     }
1586     return map_and_getset_state(cpu, spapr, guest, vcpuid, &gsr);
1587 }
1588 
1589 static target_ulong h_guest_set_state(PowerPCCPU *cpu,
1590                                       SpaprMachineState *spapr,
1591                                       target_ulong opcode,
1592                                       target_ulong *args)
1593 {
1594     return h_guest_getset_state(cpu, spapr, args, true);
1595 }
1596 
1597 static target_ulong h_guest_get_state(PowerPCCPU *cpu,
1598                                       SpaprMachineState *spapr,
1599                                       target_ulong opcode,
1600                                       target_ulong *args)
1601 {
1602     return h_guest_getset_state(cpu, spapr, args, false);
1603 }
1604 
1605 static void exit_nested_store_l2(PowerPCCPU *cpu, int excp,
1606                                  SpaprMachineStateNestedGuestVcpu *vcpu)
1607 {
1608     CPUPPCState *env = &cpu->env;
1609     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1610     target_ulong now, hdar, hdsisr, asdr;
1611 
1612     assert(sizeof(env->gpr) == sizeof(vcpu->state.gpr)); /* sanity check */
1613 
1614     now = cpu_ppc_load_tbl(env); /* L2 timebase */
1615     now -= vcpu->tb_offset; /* L1 timebase */
1616     vcpu->state.dec_expiry_tb = now - cpu_ppc_load_decr(env);
1617     cpu_ppc_store_decr(env, spapr_cpu->nested_host_state->dec_expiry_tb - now);
1618     /* backup hdar, hdsisr, asdr if reqd later below */
1619     hdar   = vcpu->state.hdar;
1620     hdsisr = vcpu->state.hdsisr;
1621     asdr   = vcpu->state.asdr;
1622 
1623     nested_save_state(&vcpu->state, cpu);
1624 
1625     if (excp == POWERPC_EXCP_MCHECK ||
1626         excp == POWERPC_EXCP_RESET ||
1627         excp == POWERPC_EXCP_SYSCALL) {
1628         vcpu->state.nip = env->spr[SPR_SRR0];
1629         vcpu->state.msr = env->spr[SPR_SRR1] & env->msr_mask;
1630     } else {
1631         vcpu->state.nip = env->spr[SPR_HSRR0];
1632         vcpu->state.msr = env->spr[SPR_HSRR1] & env->msr_mask;
1633     }
1634 
1635     /* hdar, hdsisr, asdr should be retained unless certain exceptions */
1636     if ((excp != POWERPC_EXCP_HDSI) && (excp != POWERPC_EXCP_HISI)) {
1637         vcpu->state.asdr = asdr;
1638     } else if (excp != POWERPC_EXCP_HDSI) {
1639         vcpu->state.hdar   = hdar;
1640         vcpu->state.hdsisr = hdsisr;
1641     }
1642 }
1643 
1644 static int get_exit_ids(uint64_t srr0, uint16_t ids[16])
1645 {
1646     int nr;
1647 
1648     switch (srr0) {
1649     case 0xc00:
1650         nr = 10;
1651         ids[0] = GSB_VCPU_GPR3;
1652         ids[1] = GSB_VCPU_GPR4;
1653         ids[2] = GSB_VCPU_GPR5;
1654         ids[3] = GSB_VCPU_GPR6;
1655         ids[4] = GSB_VCPU_GPR7;
1656         ids[5] = GSB_VCPU_GPR8;
1657         ids[6] = GSB_VCPU_GPR9;
1658         ids[7] = GSB_VCPU_GPR10;
1659         ids[8] = GSB_VCPU_GPR11;
1660         ids[9] = GSB_VCPU_GPR12;
1661         break;
1662     case 0xe00:
1663         nr = 5;
1664         ids[0] = GSB_VCPU_SPR_HDAR;
1665         ids[1] = GSB_VCPU_SPR_HDSISR;
1666         ids[2] = GSB_VCPU_SPR_ASDR;
1667         ids[3] = GSB_VCPU_SPR_NIA;
1668         ids[4] = GSB_VCPU_SPR_MSR;
1669         break;
1670     case 0xe20:
1671         nr = 4;
1672         ids[0] = GSB_VCPU_SPR_HDAR;
1673         ids[1] = GSB_VCPU_SPR_ASDR;
1674         ids[2] = GSB_VCPU_SPR_NIA;
1675         ids[3] = GSB_VCPU_SPR_MSR;
1676         break;
1677     case 0xe40:
1678         nr = 3;
1679         ids[0] = GSB_VCPU_SPR_HEIR;
1680         ids[1] = GSB_VCPU_SPR_NIA;
1681         ids[2] = GSB_VCPU_SPR_MSR;
1682         break;
1683     case 0xf80:
1684         nr = 3;
1685         ids[0] = GSB_VCPU_SPR_HFSCR;
1686         ids[1] = GSB_VCPU_SPR_NIA;
1687         ids[2] = GSB_VCPU_SPR_MSR;
1688         break;
1689     default:
1690         nr = 0;
1691         break;
1692     }
1693 
1694     return nr;
1695 }
1696 
1697 static void exit_process_output_buffer(SpaprMachineState *spapr,
1698                                        PowerPCCPU *cpu,
1699                                        SpaprMachineStateNestedGuest *guest,
1700                                        target_ulong vcpuid,
1701                                        target_ulong *r3)
1702 {
1703     SpaprMachineStateNestedGuestVcpu *vcpu = &guest->vcpus[vcpuid];
1704     struct guest_state_request gsr;
1705     struct guest_state_buffer *gsb;
1706     struct guest_state_element *element;
1707     struct guest_state_element_type *type;
1708     int exit_id_count = 0;
1709     uint16_t exit_cause_ids[16];
1710     hwaddr len;
1711 
1712     len = vcpu->runbufout.size;
1713     gsb = address_space_map(CPU(cpu)->as, vcpu->runbufout.addr, &len, true,
1714                             MEMTXATTRS_UNSPECIFIED);
1715     if (!gsb || len != vcpu->runbufout.size) {
1716         address_space_unmap(CPU(cpu)->as, gsb, len, true, len);
1717         *r3 = H_P2;
1718         return;
1719     }
1720 
1721     exit_id_count = get_exit_ids(*r3, exit_cause_ids);
1722 
1723     /* Create a buffer of elements to send back */
1724     gsb->num_elements = cpu_to_be32(exit_id_count);
1725     element = gsb->elements;
1726     for (int i = 0; i < exit_id_count; i++) {
1727         type = guest_state_element_type_find(exit_cause_ids[i]);
1728         assert(type);
1729         element->id = cpu_to_be16(exit_cause_ids[i]);
1730         element->size = cpu_to_be16(type->size);
1731         element = guest_state_element_next(element, NULL, NULL);
1732     }
1733     gsr.gsb = gsb;
1734     gsr.len = VCPU_OUT_BUF_MIN_SZ;
1735     gsr.flags = 0; /* get + never guest wide */
1736     getset_state(spapr, guest, vcpuid, &gsr);
1737 
1738     address_space_unmap(CPU(cpu)->as, gsb, len, true, len);
1739 }
1740 
1741 static
1742 void spapr_exit_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu, int excp)
1743 {
1744     CPUPPCState *env = &cpu->env;
1745     CPUState *cs = CPU(cpu);
1746     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1747     target_ulong r3_return = env->excp_vectors[excp]; /* hcall return value */
1748     target_ulong lpid = 0, vcpuid = 0;
1749     struct SpaprMachineStateNestedGuestVcpu *vcpu = NULL;
1750     struct SpaprMachineStateNestedGuest *guest = NULL;
1751 
1752     lpid = spapr_cpu->nested_host_state->gpr[5];
1753     vcpuid = spapr_cpu->nested_host_state->gpr[6];
1754     guest = spapr_get_nested_guest(spapr, lpid);
1755     assert(guest);
1756     spapr_nested_vcpu_check(guest, vcpuid, false);
1757     vcpu = &guest->vcpus[vcpuid];
1758 
1759     exit_nested_store_l2(cpu, excp, vcpu);
1760     /* do the output buffer for run_vcpu*/
1761     exit_process_output_buffer(spapr, cpu, guest, vcpuid, &r3_return);
1762 
1763     assert(env->spr[SPR_LPIDR] != 0);
1764     nested_load_state(cpu, spapr_cpu->nested_host_state);
1765     cpu_ppc_decrease_tb_by_offset(env, vcpu->tb_offset);
1766     env->gpr[3] = H_SUCCESS;
1767     env->gpr[4] = r3_return;
1768     nested_post_load_state(env, cs);
1769     cpu_ppc_hdecr_exit(env);
1770 
1771     spapr_cpu->in_nested = false;
1772     g_free(spapr_cpu->nested_host_state);
1773     spapr_cpu->nested_host_state = NULL;
1774 }
1775 
1776 void spapr_exit_nested(PowerPCCPU *cpu, int excp)
1777 {
1778     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1779     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1780 
1781     assert(spapr_cpu->in_nested);
1782     if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) {
1783         spapr_exit_nested_hv(cpu, excp);
1784     } else if (spapr_nested_api(spapr) == NESTED_API_PAPR) {
1785         spapr_exit_nested_papr(spapr, cpu, excp);
1786     } else {
1787         g_assert_not_reached();
1788     }
1789 }
1790 
1791 static void nested_papr_load_l2(PowerPCCPU *cpu,
1792                                 CPUPPCState *env,
1793                                 SpaprMachineStateNestedGuestVcpu *vcpu,
1794                                 target_ulong now)
1795 {
1796     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
1797     target_ulong lpcr, lpcr_mask, hdec;
1798     lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER;
1799 
1800     assert(vcpu);
1801     assert(sizeof(env->gpr) == sizeof(vcpu->state.gpr));
1802     nested_load_state(cpu, &vcpu->state);
1803     lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) |
1804            (vcpu->state.lpcr & lpcr_mask);
1805     lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE;
1806     lpcr &= ~LPCR_LPES0;
1807     env->spr[SPR_LPCR] = lpcr & pcc->lpcr_mask;
1808 
1809     hdec = vcpu->hdecr_expiry_tb - now;
1810     cpu_ppc_store_decr(env, vcpu->state.dec_expiry_tb - now);
1811     cpu_ppc_hdecr_init(env);
1812     cpu_ppc_store_hdecr(env, hdec);
1813 
1814     cpu_ppc_increase_tb_by_offset(env, vcpu->tb_offset);
1815 }
1816 
1817 static void nested_papr_run_vcpu(PowerPCCPU *cpu,
1818                                  uint64_t lpid,
1819                                  SpaprMachineStateNestedGuestVcpu *vcpu)
1820 {
1821     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
1822     CPUPPCState *env = &cpu->env;
1823     CPUState *cs = CPU(cpu);
1824     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
1825     target_ulong now = cpu_ppc_load_tbl(env);
1826 
1827     assert(env->spr[SPR_LPIDR] == 0);
1828     assert(spapr->nested.api); /* ensure API version is initialized */
1829     spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1);
1830     assert(spapr_cpu->nested_host_state);
1831     nested_save_state(spapr_cpu->nested_host_state, cpu);
1832     spapr_cpu->nested_host_state->dec_expiry_tb = now - cpu_ppc_load_decr(env);
1833     nested_papr_load_l2(cpu, env, vcpu, now);
1834     env->spr[SPR_LPIDR] = lpid; /* post load l2 */
1835 
1836     spapr_cpu->in_nested = true;
1837     nested_post_load_state(env, cs);
1838 }
1839 
1840 static target_ulong h_guest_run_vcpu(PowerPCCPU *cpu,
1841                                      SpaprMachineState *spapr,
1842                                      target_ulong opcode,
1843                                      target_ulong *args)
1844 {
1845     CPUPPCState *env = &cpu->env;
1846     target_ulong flags = args[0];
1847     target_ulong lpid = args[1];
1848     target_ulong vcpuid = args[2];
1849     struct SpaprMachineStateNestedGuestVcpu *vcpu;
1850     struct guest_state_request gsr;
1851     SpaprMachineStateNestedGuest *guest;
1852     target_ulong rc;
1853 
1854     if (flags) /* don't handle any flags for now */
1855         return H_PARAMETER;
1856 
1857     guest = spapr_get_nested_guest(spapr, lpid);
1858     if (!guest) {
1859         return H_P2;
1860     }
1861     if (!spapr_nested_vcpu_check(guest, vcpuid, true)) {
1862         return H_P3;
1863     }
1864 
1865     if (guest->parttbl[0] == 0) {
1866         /* At least need a partition scoped radix tree */
1867         return H_NOT_AVAILABLE;
1868     }
1869 
1870     vcpu = &guest->vcpus[vcpuid];
1871 
1872     /* Read run_vcpu input buffer to update state */
1873     gsr.buf = vcpu->runbufin.addr;
1874     gsr.len = vcpu->runbufin.size;
1875     gsr.flags = GUEST_STATE_REQUEST_SET; /* Thread wide + writing */
1876     rc = map_and_getset_state(cpu, spapr,  guest, vcpuid, &gsr);
1877     if (rc == H_SUCCESS) {
1878         nested_papr_run_vcpu(cpu, lpid, vcpu);
1879     } else {
1880         env->gpr[3] = rc;
1881     }
1882     return env->gpr[3];
1883 }
1884 
1885 void spapr_register_nested_hv(void)
1886 {
1887     spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl);
1888     spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested);
1889     spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate);
1890     spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest);
1891 }
1892 
1893 void spapr_unregister_nested_hv(void)
1894 {
1895     spapr_unregister_hypercall(KVMPPC_H_SET_PARTITION_TABLE);
1896     spapr_unregister_hypercall(KVMPPC_H_ENTER_NESTED);
1897     spapr_unregister_hypercall(KVMPPC_H_TLB_INVALIDATE);
1898     spapr_unregister_hypercall(KVMPPC_H_COPY_TOFROM_GUEST);
1899 }
1900 
1901 void spapr_register_nested_papr(void)
1902 {
1903     spapr_register_hypercall(H_GUEST_GET_CAPABILITIES,
1904                              h_guest_get_capabilities);
1905     spapr_register_hypercall(H_GUEST_SET_CAPABILITIES,
1906                              h_guest_set_capabilities);
1907     spapr_register_hypercall(H_GUEST_CREATE, h_guest_create);
1908     spapr_register_hypercall(H_GUEST_DELETE, h_guest_delete);
1909     spapr_register_hypercall(H_GUEST_CREATE_VCPU, h_guest_create_vcpu);
1910     spapr_register_hypercall(H_GUEST_SET_STATE, h_guest_set_state);
1911     spapr_register_hypercall(H_GUEST_GET_STATE, h_guest_get_state);
1912     spapr_register_hypercall(H_GUEST_RUN_VCPU, h_guest_run_vcpu);
1913 }
1914 
1915 void spapr_unregister_nested_papr(void)
1916 {
1917     spapr_unregister_hypercall(H_GUEST_GET_CAPABILITIES);
1918     spapr_unregister_hypercall(H_GUEST_SET_CAPABILITIES);
1919     spapr_unregister_hypercall(H_GUEST_CREATE);
1920     spapr_unregister_hypercall(H_GUEST_DELETE);
1921     spapr_unregister_hypercall(H_GUEST_CREATE_VCPU);
1922     spapr_unregister_hypercall(H_GUEST_SET_STATE);
1923     spapr_unregister_hypercall(H_GUEST_GET_STATE);
1924     spapr_unregister_hypercall(H_GUEST_RUN_VCPU);
1925 }
1926 
1927 #else
1928 void spapr_exit_nested(PowerPCCPU *cpu, int excp)
1929 {
1930     g_assert_not_reached();
1931 }
1932 
1933 void spapr_register_nested_hv(void)
1934 {
1935     /* DO NOTHING */
1936 }
1937 
1938 void spapr_unregister_nested_hv(void)
1939 {
1940     /* DO NOTHING */
1941 }
1942 
1943 bool spapr_get_pate_nested_hv(SpaprMachineState *spapr, PowerPCCPU *cpu,
1944                               target_ulong lpid, ppc_v3_pate_t *entry)
1945 {
1946     return false;
1947 }
1948 
1949 bool spapr_get_pate_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu,
1950                                 target_ulong lpid, ppc_v3_pate_t *entry)
1951 {
1952     return false;
1953 }
1954 
1955 void spapr_register_nested_papr(void)
1956 {
1957     /* DO NOTHING */
1958 }
1959 
1960 void spapr_unregister_nested_papr(void)
1961 {
1962     /* DO NOTHING */
1963 }
1964 
1965 void spapr_nested_gsb_init(void)
1966 {
1967     /* DO NOTHING */
1968 }
1969 
1970 #endif
1971