xref: /qemu/target/ppc/mmu-radix64.c (revision fc524567087c2537b5103cdfc1d41e4f442892b6)
1 /*
2  *  PowerPC Radix MMU mulation helpers for QEMU.
3  *
4  *  Copyright (c) 2016 Suraj Jitindar Singh, IBM Corporation
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "exec/page-protection.h"
24 #include "qemu/error-report.h"
25 #include "system/kvm.h"
26 #include "system/memory.h"
27 #include "kvm_ppc.h"
28 #include "exec/log.h"
29 #include "internal.h"
30 #include "mmu-radix64.h"
31 #include "mmu-book3s-v3.h"
32 #include "mmu-books.h"
33 
34 /* Radix Partition Table Entry Fields */
35 #define PATE1_R_PRTB           0x0FFFFFFFFFFFF000
36 #define PATE1_R_PRTS           0x000000000000001F
37 
38 /* Radix Process Table Entry Fields */
39 #define PRTBE_R_GET_RTS(rts) \
40     ((((rts >> 58) & 0x18) | ((rts >> 5) & 0x7)) + 31)
41 #define PRTBE_R_RPDB            0x0FFFFFFFFFFFFF00
42 #define PRTBE_R_RPDS            0x000000000000001F
43 
44 /* Radix Page Directory/Table Entry Fields */
45 #define R_PTE_VALID             0x8000000000000000
46 #define R_PTE_LEAF              0x4000000000000000
47 #define R_PTE_SW0               0x2000000000000000
48 #define R_PTE_RPN               0x01FFFFFFFFFFF000
49 #define R_PTE_SW1               0x0000000000000E00
50 #define R_GET_SW(sw)            (((sw >> 58) & 0x8) | ((sw >> 9) & 0x7))
51 #define R_PTE_R                 0x0000000000000100
52 #define R_PTE_C                 0x0000000000000080
53 #define R_PTE_ATT               0x0000000000000030
54 #define R_PTE_ATT_NORMAL        0x0000000000000000
55 #define R_PTE_ATT_SAO           0x0000000000000010
56 #define R_PTE_ATT_NI_IO         0x0000000000000020
57 #define R_PTE_ATT_TOLERANT_IO   0x0000000000000030
58 #define R_PTE_EAA_PRIV          0x0000000000000008
59 #define R_PTE_EAA_R             0x0000000000000004
60 #define R_PTE_EAA_RW            0x0000000000000002
61 #define R_PTE_EAA_X             0x0000000000000001
62 #define R_PDE_NLB               PRTBE_R_RPDB
63 #define R_PDE_NLS               PRTBE_R_RPDS
64 
65 static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env,
66                                                  vaddr eaddr,
67                                                  uint64_t *lpid, uint64_t *pid)
68 {
69     /* When EA(2:11) are nonzero, raise a segment interrupt */
70     if (eaddr & ~R_EADDR_VALID_MASK) {
71         return false;
72     }
73 
74     if (FIELD_EX64(env->msr, MSR, HV)) { /* MSR[HV] -> Hypervisor/bare metal */
75         switch (eaddr & R_EADDR_QUADRANT) {
76         case R_EADDR_QUADRANT0:
77             *lpid = 0;
78             *pid = env->spr[SPR_BOOKS_PID];
79             break;
80         case R_EADDR_QUADRANT1:
81             *lpid = env->spr[SPR_LPIDR];
82             *pid = env->spr[SPR_BOOKS_PID];
83             break;
84         case R_EADDR_QUADRANT2:
85             *lpid = env->spr[SPR_LPIDR];
86             *pid = 0;
87             break;
88         case R_EADDR_QUADRANT3:
89             *lpid = 0;
90             *pid = 0;
91             break;
92         default:
93             g_assert_not_reached();
94         }
95     } else {  /* !MSR[HV] -> Guest */
96         switch (eaddr & R_EADDR_QUADRANT) {
97         case R_EADDR_QUADRANT0: /* Guest application */
98             *lpid = env->spr[SPR_LPIDR];
99             *pid = env->spr[SPR_BOOKS_PID];
100             break;
101         case R_EADDR_QUADRANT1: /* Illegal */
102         case R_EADDR_QUADRANT2:
103             return false;
104         case R_EADDR_QUADRANT3: /* Guest OS */
105             *lpid = env->spr[SPR_LPIDR];
106             *pid = 0; /* pid set to 0 -> addresses guest operating system */
107             break;
108         default:
109             g_assert_not_reached();
110         }
111     }
112 
113     return true;
114 }
115 
116 static void ppc_radix64_raise_segi(PowerPCCPU *cpu, MMUAccessType access_type,
117                                    vaddr eaddr)
118 {
119     CPUState *cs = CPU(cpu);
120     CPUPPCState *env = &cpu->env;
121 
122     switch (access_type) {
123     case MMU_INST_FETCH:
124         /* Instruction Segment Interrupt */
125         cs->exception_index = POWERPC_EXCP_ISEG;
126         break;
127     case MMU_DATA_STORE:
128     case MMU_DATA_LOAD:
129         /* Data Segment Interrupt */
130         cs->exception_index = POWERPC_EXCP_DSEG;
131         env->spr[SPR_DAR] = eaddr;
132         break;
133     default:
134         g_assert_not_reached();
135     }
136     env->error_code = 0;
137 }
138 
139 static inline const char *access_str(MMUAccessType access_type)
140 {
141     return access_type == MMU_DATA_LOAD ? "reading" :
142         (access_type == MMU_DATA_STORE ? "writing" : "execute");
143 }
144 
145 static void ppc_radix64_raise_si(PowerPCCPU *cpu, MMUAccessType access_type,
146                                  vaddr eaddr, uint32_t cause)
147 {
148     CPUState *cs = CPU(cpu);
149     CPUPPCState *env = &cpu->env;
150 
151     qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx" cause %08x\n",
152                   __func__, access_str(access_type),
153                   eaddr, cause);
154 
155     switch (access_type) {
156     case MMU_INST_FETCH:
157         /* Instruction Storage Interrupt */
158         cs->exception_index = POWERPC_EXCP_ISI;
159         env->error_code = cause;
160         break;
161     case MMU_DATA_STORE:
162         cause |= DSISR_ISSTORE;
163         /* fall through */
164     case MMU_DATA_LOAD:
165         /* Data Storage Interrupt */
166         cs->exception_index = POWERPC_EXCP_DSI;
167         env->spr[SPR_DSISR] = cause;
168         env->spr[SPR_DAR] = eaddr;
169         env->error_code = 0;
170         break;
171     default:
172         g_assert_not_reached();
173     }
174 }
175 
176 static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type,
177                                   vaddr eaddr, hwaddr g_raddr, uint32_t cause)
178 {
179     CPUState *cs = CPU(cpu);
180     CPUPPCState *env = &cpu->env;
181 
182     env->error_code = 0;
183     if (cause & DSISR_PRTABLE_FAULT) {
184         /* HDSI PRTABLE_FAULT gets the originating access type in error_code */
185         env->error_code = access_type;
186         access_type = MMU_DATA_LOAD;
187     }
188 
189     qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx" 0x%"
190                   HWADDR_PRIx" cause %08x\n",
191                   __func__, access_str(access_type),
192                   eaddr, g_raddr, cause);
193 
194     switch (access_type) {
195     case MMU_INST_FETCH:
196         /* H Instruction Storage Interrupt */
197         cs->exception_index = POWERPC_EXCP_HISI;
198         env->spr[SPR_ASDR] = g_raddr;
199         env->error_code = cause;
200         break;
201     case MMU_DATA_STORE:
202         cause |= DSISR_ISSTORE;
203         /* fall through */
204     case MMU_DATA_LOAD:
205         /* H Data Storage Interrupt */
206         cs->exception_index = POWERPC_EXCP_HDSI;
207         env->spr[SPR_HDSISR] = cause;
208         env->spr[SPR_HDAR] = eaddr;
209         env->spr[SPR_ASDR] = g_raddr;
210         break;
211     default:
212         g_assert_not_reached();
213     }
214 }
215 
216 static int ppc_radix64_get_prot_eaa(uint64_t pte)
217 {
218     return (pte & R_PTE_EAA_R ? PAGE_READ : 0) |
219            (pte & R_PTE_EAA_RW ? PAGE_READ | PAGE_WRITE : 0) |
220            (pte & R_PTE_EAA_X ? PAGE_EXEC : 0);
221 }
222 
223 static int ppc_radix64_get_prot_amr(const PowerPCCPU *cpu)
224 {
225     const CPUPPCState *env = &cpu->env;
226     int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */
227     int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 IAMR63:62 */
228 
229     return (amr & 0x2 ? 0 : PAGE_WRITE) | /* Access denied if bit is set */
230            (amr & 0x1 ? 0 : PAGE_READ) |
231            (iamr & 0x1 ? 0 : PAGE_EXEC);
232 }
233 
234 static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type,
235                                    uint64_t pte, int *fault_cause, int *prot,
236                                    int mmu_idx, bool partition_scoped)
237 {
238     CPUPPCState *env = &cpu->env;
239 
240     /* Check Page Attributes (pte58:59) */
241     if ((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO && access_type == MMU_INST_FETCH) {
242         /*
243          * Radix PTE entries with the non-idempotent I/O attribute are treated
244          * as guarded storage
245          */
246         *fault_cause |= SRR1_NOEXEC_GUARD;
247         return true;
248     }
249 
250     /* Determine permissions allowed by Encoded Access Authority */
251     if (!partition_scoped && (pte & R_PTE_EAA_PRIV) &&
252         FIELD_EX64(env->msr, MSR, PR)) {
253         *prot = 0;
254     } else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) ||
255                partition_scoped) {
256         *prot = ppc_radix64_get_prot_eaa(pte);
257     } else { /* !MSR_PR && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */
258         *prot = ppc_radix64_get_prot_eaa(pte);
259         *prot &= ppc_radix64_get_prot_amr(cpu); /* Least combined permissions */
260     }
261 
262     /* Check if requested access type is allowed */
263     if (!check_prot_access_type(*prot, access_type)) {
264         /* Page Protected for that Access */
265         *fault_cause |= access_type == MMU_INST_FETCH ? SRR1_NOEXEC_GUARD :
266                                                         DSISR_PROTFAULT;
267         return true;
268     }
269 
270     return false;
271 }
272 
273 static int ppc_radix64_check_rc(MMUAccessType access_type, uint64_t pte)
274 {
275     switch (access_type) {
276     case MMU_DATA_STORE:
277         if (!(pte & R_PTE_C)) {
278             break;
279         }
280         /* fall through */
281     case MMU_INST_FETCH:
282     case MMU_DATA_LOAD:
283         if (!(pte & R_PTE_R)) {
284             break;
285         }
286 
287         /* R/C bits are already set appropriately for this access */
288         return 0;
289     }
290 
291     return 1;
292 }
293 
294 static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls)
295 {
296     bool ret;
297 
298     /*
299      * Check if this is a valid level, according to POWER9 and POWER10
300      * Processor User's Manuals, sections 4.10.4.1 and 5.10.6.1, respectively:
301      * Supported Radix Tree Configurations and Resulting Page Sizes.
302      *
303      * Note: these checks are specific to POWER9 and POWER10 CPUs. Any future
304      * CPUs that supports a different Radix MMU configuration will need their
305      * own implementation.
306      */
307     switch (level) {
308     case 0:     /* Root Page Dir */
309         ret = psize == 52 && nls == 13;
310         break;
311     case 1:
312     case 2:
313         ret = nls == 9;
314         break;
315     case 3:
316         ret = nls == 9 || nls == 5;
317         break;
318     default:
319         ret = false;
320     }
321 
322     if (unlikely(!ret)) {
323         qemu_log_mask(LOG_GUEST_ERROR, "invalid radix configuration: "
324                       "level %d size %d nls %"PRIu64"\n",
325                       level, psize, nls);
326     }
327     return ret;
328 }
329 
330 static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr,
331                                   uint64_t *pte_addr, uint64_t *nls,
332                                   int *psize, uint64_t *pte, int *fault_cause)
333 {
334     uint64_t index, mask, nlb, pde;
335 
336     /* Read page <directory/table> entry from guest address space */
337     pde = ldq_phys(as, *pte_addr);
338     if (!(pde & R_PTE_VALID)) {         /* Invalid Entry */
339         *fault_cause |= DSISR_NOPTE;
340         return 1;
341     }
342 
343     *pte = pde;
344     *psize -= *nls;
345     if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */
346         *nls = pde & R_PDE_NLS;
347         index = eaddr >> (*psize - *nls);       /* Shift */
348         index &= ((1UL << *nls) - 1);           /* Mask */
349         nlb = pde & R_PDE_NLB;
350         mask = MAKE_64BIT_MASK(0, *nls + 3);
351 
352         if (nlb & mask) {
353             qemu_log_mask(LOG_GUEST_ERROR,
354                 "%s: misaligned page dir/table base: 0x%" PRIx64
355                 " page dir size: 0x%" PRIx64 "\n",
356                 __func__, nlb, mask + 1);
357             nlb &= ~mask;
358         }
359         *pte_addr = nlb + index * sizeof(pde);
360     }
361     return 0;
362 }
363 
364 static int ppc_radix64_walk_tree(AddressSpace *as, vaddr eaddr,
365                                  uint64_t base_addr, uint64_t nls,
366                                  hwaddr *raddr, int *psize, uint64_t *pte,
367                                  int *fault_cause, hwaddr *pte_addr)
368 {
369     uint64_t index, pde, rpn, mask;
370     int level = 0;
371 
372     index = eaddr >> (*psize - nls);    /* Shift */
373     index &= ((1UL << nls) - 1);        /* Mask */
374     mask = MAKE_64BIT_MASK(0, nls + 3);
375 
376     if (base_addr & mask) {
377         qemu_log_mask(LOG_GUEST_ERROR,
378             "%s: misaligned page dir base: 0x%" PRIx64
379             " page dir size: 0x%" PRIx64 "\n",
380             __func__, base_addr, mask + 1);
381         base_addr &= ~mask;
382     }
383     *pte_addr = base_addr + index * sizeof(pde);
384 
385     do {
386         int ret;
387 
388         if (!ppc_radix64_is_valid_level(level++, *psize, nls)) {
389             *fault_cause |= DSISR_R_BADCONFIG;
390             return 1;
391         }
392 
393         ret = ppc_radix64_next_level(as, eaddr, pte_addr, &nls, psize, &pde,
394                                      fault_cause);
395         if (ret) {
396             return ret;
397         }
398     } while (!(pde & R_PTE_LEAF));
399 
400     *pte = pde;
401     rpn = pde & R_PTE_RPN;
402     mask = (1UL << *psize) - 1;
403 
404     /* Or high bits of rpn and low bits to ea to form whole real addr */
405     *raddr = (rpn & ~mask) | (eaddr & mask);
406     return 0;
407 }
408 
409 static bool validate_pate(PowerPCCPU *cpu, uint64_t lpid, ppc_v3_pate_t *pate)
410 {
411     CPUPPCState *env = &cpu->env;
412 
413     if (!(pate->dw0 & PATE0_HR)) {
414         return false;
415     }
416     if (lpid == 0 && !FIELD_EX64(env->msr, MSR, HV)) {
417         return false;
418     }
419     if ((pate->dw0 & PATE1_R_PRTS) < 5) {
420         return false;
421     }
422     /* More checks ... */
423     return true;
424 }
425 
426 static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu,
427                                               MMUAccessType orig_access_type,
428                                               vaddr eaddr, hwaddr g_raddr,
429                                               ppc_v3_pate_t pate,
430                                               hwaddr *h_raddr, int *h_prot,
431                                               int *h_page_size, bool pde_addr,
432                                               int mmu_idx, uint64_t lpid,
433                                               bool guest_visible)
434 {
435     MMUAccessType access_type = orig_access_type;
436     int fault_cause = 0;
437     hwaddr pte_addr;
438     uint64_t pte;
439 
440     if (pde_addr) {
441         /*
442          * Translation of process-scoped tables/directories is performed as
443          * a read-access.
444          */
445         access_type = MMU_DATA_LOAD;
446     }
447 
448     qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx
449                   " mmu_idx %u 0x%"HWADDR_PRIx"\n",
450                   __func__, access_str(access_type),
451                   eaddr, mmu_idx, g_raddr);
452 
453     *h_page_size = PRTBE_R_GET_RTS(pate.dw0);
454     /* No valid pte or access denied due to protection */
455     if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB,
456                               pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
457                               &pte, &fault_cause, &pte_addr) ||
458         ppc_radix64_check_prot(cpu, access_type, pte,
459                                &fault_cause, h_prot, mmu_idx, true)) {
460         if (pde_addr) { /* address being translated was that of a guest pde */
461             fault_cause |= DSISR_PRTABLE_FAULT;
462         }
463         if (guest_visible) {
464             ppc_radix64_raise_hsi(cpu, orig_access_type,
465                                   eaddr, g_raddr, fault_cause);
466         }
467         return 1;
468     }
469 
470     if (guest_visible) {
471         if (ppc_radix64_check_rc(access_type, pte)) {
472             /*
473              * Per ISA 3.1 Book III, 7.5.3 and 7.5.5, failure to set R/C during
474              * partition-scoped translation when effLPID = 0 results in normal
475              * (non-Hypervisor) Data and Instruction Storage Interrupts
476              * respectively.
477              *
478              * ISA 3.0 is ambiguous about this, but tests on POWER9 hardware
479              * seem to exhibit the same behavior.
480              */
481             if (lpid > 0) {
482                 ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr,
483                                       DSISR_ATOMIC_RC);
484             } else {
485                 ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC);
486             }
487             return 1;
488         }
489     }
490 
491     return 0;
492 }
493 
494 /*
495  * The spapr vhc has a flat partition scope provided by qemu memory when
496  * not nested.
497  *
498  * When running a nested guest, the addressing is 2-level radix on top of the
499  * vhc memory, so it works practically identically to the bare metal 2-level
500  * radix. So that code is selected directly. A cleaner and more flexible nested
501  * hypervisor implementation would allow the vhc to provide a ->nested_xlate()
502  * function but that is not required for the moment.
503  */
504 static bool vhyp_flat_addressing(PowerPCCPU *cpu)
505 {
506     if (cpu->vhyp) {
507         return !vhyp_cpu_in_nested(cpu);
508     }
509     return false;
510 }
511 
512 static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
513                                             MMUAccessType access_type,
514                                             vaddr eaddr, uint64_t pid,
515                                             ppc_v3_pate_t pate, hwaddr *g_raddr,
516                                             int *g_prot, int *g_page_size,
517                                             int mmu_idx, uint64_t lpid,
518                                             bool guest_visible)
519 {
520     CPUState *cs = CPU(cpu);
521     CPUPPCState *env = &cpu->env;
522     uint64_t offset, size, prtb, prtbe_addr, prtbe0, base_addr, nls, index, pte;
523     int fault_cause = 0, h_page_size, h_prot;
524     hwaddr h_raddr, pte_addr;
525     int ret;
526 
527     qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx
528                   " mmu_idx %u pid %"PRIu64"\n",
529                   __func__, access_str(access_type),
530                   eaddr, mmu_idx, pid);
531 
532     prtb = (pate.dw1 & PATE1_R_PRTB);
533     size = 1ULL << ((pate.dw1 & PATE1_R_PRTS) + 12);
534     if (prtb & (size - 1)) {
535         /* Process Table not properly aligned */
536         if (guest_visible) {
537             ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_R_BADCONFIG);
538         }
539         return 1;
540     }
541 
542     /* Index Process Table by PID to Find Corresponding Process Table Entry */
543     offset = pid * sizeof(struct prtb_entry);
544     if (offset >= size) {
545         /* offset exceeds size of the process table */
546         if (guest_visible) {
547             ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE);
548         }
549         return 1;
550     }
551     prtbe_addr = prtb + offset;
552 
553     if (vhyp_flat_addressing(cpu)) {
554         prtbe0 = ldq_phys(cs->as, prtbe_addr);
555     } else {
556         /*
557          * Process table addresses are subject to partition-scoped
558          * translation
559          *
560          * On a Radix host, the partition-scoped page table for LPID=0
561          * is only used to translate the effective addresses of the
562          * process table entries.
563          */
564         /* mmu_idx is 5 because we're translating from hypervisor scope */
565         ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
566                                                  prtbe_addr, pate, &h_raddr,
567                                                  &h_prot, &h_page_size, true,
568                                                  5, lpid, guest_visible);
569         if (ret) {
570             return ret;
571         }
572         prtbe0 = ldq_phys(cs->as, h_raddr);
573     }
574 
575     /*
576      * Some Linux uses a zero process table entry in PID!=0 for kernel context
577      * without userspace in order to fault on NULL dereference, because using
578      * PIDR=0 for the kernel causes the Q0 page table to be used to translate
579      * Q3 as well. Check for that case here to avoid the invalid configuration
580      * message.
581      */
582     if (unlikely(!prtbe0)) {
583         if (guest_visible) {
584             ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_R_BADCONFIG);
585         }
586         return 1;
587     }
588 
589     /* Walk Radix Tree from Process Table Entry to Convert EA to RA */
590     *g_page_size = PRTBE_R_GET_RTS(prtbe0);
591     base_addr = prtbe0 & PRTBE_R_RPDB;
592     nls = prtbe0 & PRTBE_R_RPDS;
593     if (FIELD_EX64(env->msr, MSR, HV) || vhyp_flat_addressing(cpu)) {
594         /*
595          * Can treat process table addresses as real addresses
596          */
597         ret = ppc_radix64_walk_tree(cs->as, eaddr & R_EADDR_MASK, base_addr,
598                                     nls, g_raddr, g_page_size, &pte,
599                                     &fault_cause, &pte_addr);
600         if (ret) {
601             /* No valid PTE */
602             if (guest_visible) {
603                 ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
604             }
605             return ret;
606         }
607     } else {
608         uint64_t rpn, mask;
609         int level = 0;
610 
611         index = (eaddr & R_EADDR_MASK) >> (*g_page_size - nls); /* Shift */
612         index &= ((1UL << nls) - 1);                            /* Mask */
613         pte_addr = base_addr + (index * sizeof(pte));
614 
615         /*
616          * Each process table address is subject to a partition-scoped
617          * translation
618          */
619         do {
620             /* mmu_idx is 5 because we're translating from hypervisor scope */
621             ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
622                                                      pte_addr, pate, &h_raddr,
623                                                      &h_prot, &h_page_size,
624                                                      true, 5, lpid,
625                                                      guest_visible);
626             if (ret) {
627                 return ret;
628             }
629 
630             if (!ppc_radix64_is_valid_level(level++, *g_page_size, nls)) {
631                 fault_cause |= DSISR_R_BADCONFIG;
632                 ret = 1;
633             } else {
634                 ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK,
635                                              &h_raddr, &nls, g_page_size,
636                                              &pte, &fault_cause);
637             }
638 
639             if (ret) {
640                 /* No valid pte */
641                 if (guest_visible) {
642                     ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
643                 }
644                 return ret;
645             }
646             pte_addr = h_raddr;
647         } while (!(pte & R_PTE_LEAF));
648 
649         rpn = pte & R_PTE_RPN;
650         mask = (1UL << *g_page_size) - 1;
651 
652         /* Or high bits of rpn and low bits to ea to form whole real addr */
653         *g_raddr = (rpn & ~mask) | (eaddr & mask);
654     }
655 
656     if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause,
657                                g_prot, mmu_idx, false)) {
658         /* Access denied due to protection */
659         if (guest_visible) {
660             ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
661         }
662         return 1;
663     }
664 
665     if (guest_visible) {
666         /* R/C bits not appropriately set for access */
667         if (ppc_radix64_check_rc(access_type, pte)) {
668             ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC);
669             return 1;
670         }
671     }
672 
673     return 0;
674 }
675 
676 /*
677  * Radix tree translation is a 2 steps translation process:
678  *
679  * 1. Process-scoped translation:   Guest Eff Addr  -> Guest Real Addr
680  * 2. Partition-scoped translation: Guest Real Addr -> Host Real Addr
681  *
682  *                                  MSR[HV]
683  *              +-------------+----------------+---------------+
684  *              |             |     HV = 0     |     HV = 1    |
685  *              +-------------+----------------+---------------+
686  *              | Relocation  |    Partition   |      No       |
687  *              | = Off       |     Scoped     |  Translation  |
688  *  Relocation  +-------------+----------------+---------------+
689  *              | Relocation  |   Partition &  |    Process    |
690  *              | = On        | Process Scoped |    Scoped     |
691  *              +-------------+----------------+---------------+
692  */
693 static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr,
694                                    MMUAccessType access_type, hwaddr *raddr,
695                                    int *psizep, int *protp, int mmu_idx,
696                                    bool guest_visible)
697 {
698     CPUPPCState *env = &cpu->env;
699     uint64_t lpid, pid;
700     ppc_v3_pate_t pate;
701     int psize, prot;
702     hwaddr g_raddr;
703     bool relocation;
704 
705     assert(!(mmuidx_hv(mmu_idx) && cpu->vhyp));
706 
707     relocation = !mmuidx_real(mmu_idx);
708 
709     /* HV or virtual hypervisor Real Mode Access */
710     if (!relocation && (mmuidx_hv(mmu_idx) || vhyp_flat_addressing(cpu))) {
711         /* In real mode top 4 effective addr bits (mostly) ignored */
712         *raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL;
713 
714         /* In HV mode, add HRMOR if top EA bit is clear */
715         if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) {
716             if (!(eaddr >> 63)) {
717                 *raddr |= env->spr[SPR_HRMOR];
718            }
719         }
720         *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
721         *psizep = TARGET_PAGE_BITS;
722         return true;
723     }
724 
725     /*
726      * Check UPRT (we avoid the check in real mode to deal with
727      * transitional states during kexec.
728      */
729     if (guest_visible && !ppc64_use_proc_tbl(cpu)) {
730         qemu_log_mask(LOG_GUEST_ERROR,
731                       "LPCR:UPRT not set in radix mode ! LPCR="
732                       TARGET_FMT_lx "\n", env->spr[SPR_LPCR]);
733     }
734 
735     /* Virtual Mode Access - get the fully qualified address */
736     if (!ppc_radix64_get_fully_qualified_addr(&cpu->env, eaddr, &lpid, &pid)) {
737         if (guest_visible) {
738             ppc_radix64_raise_segi(cpu, access_type, eaddr);
739         }
740         return false;
741     }
742 
743     /* Get Partition Table */
744     if (cpu->vhyp) {
745         if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
746             if (guest_visible) {
747                 ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr,
748                                       DSISR_R_BADCONFIG);
749             }
750             return false;
751         }
752     } else {
753         if (!ppc64_v3_get_pate(cpu, lpid, &pate)) {
754             if (guest_visible) {
755                 ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr,
756                                       DSISR_R_BADCONFIG);
757             }
758             return false;
759         }
760         if (!validate_pate(cpu, lpid, &pate)) {
761             if (guest_visible) {
762                 ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr,
763                                       DSISR_R_BADCONFIG);
764             }
765             return false;
766         }
767     }
768 
769     *psizep = INT_MAX;
770     *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
771 
772     /*
773      * Perform process-scoped translation if relocation enabled.
774      *
775      * - Translates an effective address to a host real address in
776      *   quadrants 0 and 3 when HV=1.
777      *
778      * - Translates an effective address to a guest real address.
779      */
780     if (relocation) {
781         int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid,
782                                                    pate, &g_raddr, &prot,
783                                                    &psize, mmu_idx, lpid,
784                                                    guest_visible);
785         if (ret) {
786             return false;
787         }
788         *psizep = MIN(*psizep, psize);
789         *protp &= prot;
790     } else {
791         g_raddr = eaddr & R_EADDR_MASK;
792     }
793 
794     if (vhyp_flat_addressing(cpu)) {
795         *raddr = g_raddr;
796     } else {
797         /*
798          * Perform partition-scoped translation if !HV or HV access to
799          * quadrants 1 or 2. Translates a guest real address to a host
800          * real address.
801          */
802         if (lpid || !mmuidx_hv(mmu_idx)) {
803             int ret;
804 
805             ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
806                                                      g_raddr, pate, raddr,
807                                                      &prot, &psize, false,
808                                                      mmu_idx, lpid,
809                                                      guest_visible);
810             if (ret) {
811                 return false;
812             }
813             *psizep = MIN(*psizep, psize);
814             *protp &= prot;
815         } else {
816             *raddr = g_raddr;
817         }
818     }
819 
820     return true;
821 }
822 
823 bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
824                        hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
825                        bool guest_visible)
826 {
827     bool ret = ppc_radix64_xlate_impl(cpu, eaddr, access_type, raddrp,
828                                       psizep, protp, mmu_idx, guest_visible);
829 
830     qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx
831                   " mmu_idx %u (prot %c%c%c) -> 0x%"HWADDR_PRIx"\n",
832                   __func__, access_str(access_type),
833                   eaddr, mmu_idx,
834                   *protp & PAGE_READ ? 'r' : '-',
835                   *protp & PAGE_WRITE ? 'w' : '-',
836                   *protp & PAGE_EXEC ? 'x' : '-',
837                   *raddrp);
838 
839     return ret;
840 }
841