xref: /qemu/hw/riscv/riscv-iommu.c (revision f07a5674cf97b8473e5d06d7b1df9b51e97d553f)
1 /*
2  * QEMU emulation of an RISC-V IOMMU
3  *
4  * Copyright (C) 2021-2023, Rivos Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program; if not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qom/object.h"
21 #include "hw/pci/pci_bus.h"
22 #include "hw/pci/pci_device.h"
23 #include "hw/qdev-properties.h"
24 #include "hw/riscv/riscv_hart.h"
25 #include "migration/vmstate.h"
26 #include "qapi/error.h"
27 #include "qemu/timer.h"
28 
29 #include "cpu_bits.h"
30 #include "riscv-iommu.h"
31 #include "riscv-iommu-bits.h"
32 #include "riscv-iommu-hpm.h"
33 #include "trace.h"
34 
35 #define LIMIT_CACHE_CTX               (1U << 7)
36 #define LIMIT_CACHE_IOT               (1U << 20)
37 
38 /* Physical page number coversions */
39 #define PPN_PHYS(ppn)                 ((ppn) << TARGET_PAGE_BITS)
40 #define PPN_DOWN(phy)                 ((phy) >> TARGET_PAGE_BITS)
41 
42 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
43 
44 /* Device assigned I/O address space */
45 struct RISCVIOMMUSpace {
46     IOMMUMemoryRegion iova_mr;  /* IOVA memory region for attached device */
47     AddressSpace iova_as;       /* IOVA address space for attached device */
48     RISCVIOMMUState *iommu;     /* Managing IOMMU device state */
49     uint32_t devid;             /* Requester identifier, AKA device_id */
50     bool notifier;              /* IOMMU unmap notifier enabled */
51     QLIST_ENTRY(RISCVIOMMUSpace) list;
52 };
53 
54 typedef enum RISCVIOMMUTransTag {
55     RISCV_IOMMU_TRANS_TAG_BY,  /* Bypass */
56     RISCV_IOMMU_TRANS_TAG_SS,  /* Single Stage */
57     RISCV_IOMMU_TRANS_TAG_VG,  /* G-stage only */
58     RISCV_IOMMU_TRANS_TAG_VN,  /* Nested translation */
59 } RISCVIOMMUTransTag;
60 
61 /* Address translation cache entry */
62 struct RISCVIOMMUEntry {
63     RISCVIOMMUTransTag tag;     /* Translation Tag */
64     uint64_t iova:44;           /* IOVA Page Number */
65     uint64_t pscid:20;          /* Process Soft-Context identifier */
66     uint64_t phys:44;           /* Physical Page Number */
67     uint64_t gscid:16;          /* Guest Soft-Context identifier */
68     uint64_t perm:2;            /* IOMMU_RW flags */
69 };
70 
71 /* IOMMU index for transactions without process_id specified. */
72 #define RISCV_IOMMU_NOPROCID 0
73 
74 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
75 {
76     switch (vec_type) {
77     case RISCV_IOMMU_INTR_CQ:
78         return icvec & RISCV_IOMMU_ICVEC_CIV;
79     case RISCV_IOMMU_INTR_FQ:
80         return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4;
81     case RISCV_IOMMU_INTR_PM:
82         return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8;
83     case RISCV_IOMMU_INTR_PQ:
84         return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12;
85     default:
86         g_assert_not_reached();
87     }
88 }
89 
90 void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
91 {
92     uint32_t ipsr, icvec, vector;
93 
94     if (!s->notify) {
95         return;
96     }
97 
98     icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC);
99     ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0);
100 
101     if (!(ipsr & (1 << vec_type))) {
102         vector = riscv_iommu_get_icvec_vector(icvec, vec_type);
103         s->notify(s, vector);
104         trace_riscv_iommu_notify_int_vector(vec_type, vector);
105     }
106 }
107 
108 static void riscv_iommu_fault(RISCVIOMMUState *s,
109                               struct riscv_iommu_fq_record *ev)
110 {
111     uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
112     uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask;
113     uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask;
114     uint32_t next = (tail + 1) & s->fq_mask;
115     uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID);
116 
117     trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
118                           PCI_FUNC(devid), ev->hdr, ev->iotval);
119 
120     if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) ||
121         !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) {
122         return;
123     }
124 
125     if (head == next) {
126         riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
127                               RISCV_IOMMU_FQCSR_FQOF, 0);
128     } else {
129         dma_addr_t addr = s->fq_addr + tail * sizeof(*ev);
130         if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev),
131                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
132             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR,
133                                   RISCV_IOMMU_FQCSR_FQMF, 0);
134         } else {
135             riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next);
136         }
137     }
138 
139     if (ctrl & RISCV_IOMMU_FQCSR_FIE) {
140         riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ);
141     }
142 }
143 
144 static void riscv_iommu_pri(RISCVIOMMUState *s,
145     struct riscv_iommu_pq_record *pr)
146 {
147     uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
148     uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask;
149     uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask;
150     uint32_t next = (tail + 1) & s->pq_mask;
151     uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID);
152 
153     trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid),
154                           PCI_FUNC(devid), pr->payload);
155 
156     if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) ||
157         !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) {
158         return;
159     }
160 
161     if (head == next) {
162         riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
163                               RISCV_IOMMU_PQCSR_PQOF, 0);
164     } else {
165         dma_addr_t addr = s->pq_addr + tail * sizeof(*pr);
166         if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr),
167                              MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
168             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR,
169                                   RISCV_IOMMU_PQCSR_PQMF, 0);
170         } else {
171             riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next);
172         }
173     }
174 
175     if (ctrl & RISCV_IOMMU_PQCSR_PIE) {
176         riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ);
177     }
178 }
179 
180 /*
181  * Discards all bits from 'val' whose matching bits in the same
182  * positions in the mask 'ext' are zeros, and packs the remaining
183  * bits from 'val' contiguously at the least-significant end of the
184  * result, keeping the same bit order as 'val' and filling any
185  * other bits at the most-significant end of the result with zeros.
186  *
187  * For example, for the following 'val' and 'ext', the return 'ret'
188  * will be:
189  *
190  * val = a b c d e f g h
191  * ext = 1 0 1 0 0 1 1 0
192  * ret = 0 0 0 0 a c f g
193  *
194  * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3
195  * "Process to translate addresses of MSIs", is similar to bit manip
196  * function PEXT (Parallel bits extract) from x86.
197  */
198 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext)
199 {
200     uint64_t ret = 0;
201     uint64_t rot = 1;
202 
203     while (ext) {
204         if (ext & 1) {
205             if (val & 1) {
206                 ret |= rot;
207             }
208             rot <<= 1;
209         }
210         val >>= 1;
211         ext >>= 1;
212     }
213 
214     return ret;
215 }
216 
217 /* Check if GPA matches MSI/MRIF pattern. */
218 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
219     dma_addr_t gpa)
220 {
221     if (!s->enable_msi) {
222         return false;
223     }
224 
225     if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) !=
226         RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
227         return false; /* Invalid MSI/MRIF mode */
228     }
229 
230     if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) {
231         return false; /* GPA not in MSI range defined by AIA IMSIC rules. */
232     }
233 
234     return true;
235 }
236 
237 /*
238  * RISCV IOMMU Address Translation Lookup - Page Table Walk
239  *
240  * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c
241  * Both implementation can be merged into single helper function in future.
242  * Keeping them separate for now, as error reporting and flow specifics are
243  * sufficiently different for separate implementation.
244  *
245  * @s        : IOMMU Device State
246  * @ctx      : Translation context for device id and process address space id.
247  * @iotlb    : translation data: physical address and access mode.
248  * @return   : success or fault cause code.
249  */
250 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
251     IOMMUTLBEntry *iotlb)
252 {
253     dma_addr_t addr, base;
254     uint64_t satp, gatp, pte;
255     bool en_s, en_g;
256     struct {
257         unsigned char step;
258         unsigned char levels;
259         unsigned char ptidxbits;
260         unsigned char ptesize;
261     } sc[2];
262     /* Translation stage phase */
263     enum {
264         S_STAGE = 0,
265         G_STAGE = 1,
266     } pass;
267     MemTxResult ret;
268 
269     satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
270     gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
271 
272     en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE;
273     en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE;
274 
275     /*
276      * Early check for MSI address match when IOVA == GPA.
277      * Note that the (!en_s) condition means that the MSI
278      * page table may only be used when guest pages are
279      * mapped using the g-stage page table, whether single-
280      * or two-stage paging is enabled. It's unavoidable though,
281      * because the spec mandates that we do a first-stage
282      * translation before we check the MSI page table, which
283      * means we can't do an early MSI check unless we have
284      * strictly !en_s.
285      */
286     if (!en_s && (iotlb->perm & IOMMU_WO) &&
287         riscv_iommu_msi_check(s, ctx, iotlb->iova)) {
288         iotlb->target_as = &s->trap_as;
289         iotlb->translated_addr = iotlb->iova;
290         iotlb->addr_mask = ~TARGET_PAGE_MASK;
291         return 0;
292     }
293 
294     /* Exit early for pass-through mode. */
295     if (!(en_s || en_g)) {
296         iotlb->translated_addr = iotlb->iova;
297         iotlb->addr_mask = ~TARGET_PAGE_MASK;
298         /* Allow R/W in pass-through mode */
299         iotlb->perm = IOMMU_RW;
300         return 0;
301     }
302 
303     /* S/G translation parameters. */
304     for (pass = 0; pass < 2; pass++) {
305         uint32_t sv_mode;
306 
307         sc[pass].step = 0;
308         if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) :
309             (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) {
310             /* 32bit mode for GXL/SXL == 1 */
311             switch (pass ? gatp : satp) {
312             case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
313                 sc[pass].levels    = 0;
314                 sc[pass].ptidxbits = 0;
315                 sc[pass].ptesize   = 0;
316                 break;
317             case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
318                 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32;
319                 if (!(s->cap & sv_mode)) {
320                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
321                 }
322                 sc[pass].levels    = 2;
323                 sc[pass].ptidxbits = 10;
324                 sc[pass].ptesize   = 4;
325                 break;
326             default:
327                 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
328             }
329         } else {
330             /* 64bit mode for GXL/SXL == 0 */
331             switch (pass ? gatp : satp) {
332             case RISCV_IOMMU_DC_IOHGATP_MODE_BARE:
333                 sc[pass].levels    = 0;
334                 sc[pass].ptidxbits = 0;
335                 sc[pass].ptesize   = 0;
336                 break;
337             case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
338                 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39;
339                 if (!(s->cap & sv_mode)) {
340                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
341                 }
342                 sc[pass].levels    = 3;
343                 sc[pass].ptidxbits = 9;
344                 sc[pass].ptesize   = 8;
345                 break;
346             case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
347                 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48;
348                 if (!(s->cap & sv_mode)) {
349                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
350                 }
351                 sc[pass].levels    = 4;
352                 sc[pass].ptidxbits = 9;
353                 sc[pass].ptesize   = 8;
354                 break;
355             case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
356                 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57;
357                 if (!(s->cap & sv_mode)) {
358                     return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
359                 }
360                 sc[pass].levels    = 5;
361                 sc[pass].ptidxbits = 9;
362                 sc[pass].ptesize   = 8;
363                 break;
364             default:
365                 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
366             }
367         }
368     };
369 
370     /* S/G stages translation tables root pointers */
371     gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
372     satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD));
373     addr = (en_s && en_g) ? satp : iotlb->iova;
374     base = en_g ? gatp : satp;
375     pass = en_g ? G_STAGE : S_STAGE;
376 
377     do {
378         const unsigned widened = (pass && !sc[pass].step) ? 2 : 0;
379         const unsigned va_bits = widened + sc[pass].ptidxbits;
380         const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits *
381                                  (sc[pass].levels - 1 - sc[pass].step);
382         const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
383         const dma_addr_t pte_addr = base + idx * sc[pass].ptesize;
384         const bool ade =
385             ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE);
386 
387         /* Address range check before first level lookup */
388         if (!sc[pass].step) {
389             const uint64_t va_len = va_skip + va_bits;
390             const uint64_t va_mask = (1ULL << va_len) - 1;
391 
392             if (pass == S_STAGE && va_len > 32) {
393                 target_ulong mask, masked_msbs;
394 
395                 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1;
396                 masked_msbs = (addr >> (va_len - 1)) & mask;
397 
398                 if (masked_msbs != 0 && masked_msbs != mask) {
399                     return (iotlb->perm & IOMMU_WO) ?
400                                 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S :
401                                 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S;
402                 }
403             } else {
404                 if ((addr & va_mask) != addr) {
405                     return (iotlb->perm & IOMMU_WO) ?
406                                 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
407                                 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS;
408                 }
409             }
410         }
411 
412 
413         if (pass == S_STAGE) {
414             riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS);
415         } else {
416             riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS);
417         }
418 
419         /* Read page table entry */
420         if (sc[pass].ptesize == 4) {
421             uint32_t pte32 = 0;
422             ret = ldl_le_dma(s->target_as, pte_addr, &pte32,
423                              MEMTXATTRS_UNSPECIFIED);
424             pte = pte32;
425         } else {
426             ret = ldq_le_dma(s->target_as, pte_addr, &pte,
427                              MEMTXATTRS_UNSPECIFIED);
428         }
429         if (ret != MEMTX_OK) {
430             return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT
431                                             : RISCV_IOMMU_FQ_CAUSE_RD_FAULT;
432         }
433 
434         sc[pass].step++;
435         hwaddr ppn = pte >> PTE_PPN_SHIFT;
436 
437         if (!(pte & PTE_V)) {
438             break;                /* Invalid PTE */
439         } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
440             base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
441         } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
442             break;                /* Reserved leaf PTE flags: PTE_W */
443         } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
444             break;                /* Reserved leaf PTE flags: PTE_W + PTE_X */
445         } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
446             break;                /* Misaligned PPN */
447         } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) {
448             break;                /* Read access check failed */
449         } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) {
450             break;                /* Write access check failed */
451         } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) {
452             break;                /* Access bit not set */
453         } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) {
454             break;                /* Dirty bit not set */
455         } else {
456             /* Leaf PTE, translation completed. */
457             sc[pass].step = sc[pass].levels;
458             base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
459             /* Update address mask based on smallest translation granularity */
460             iotlb->addr_mask &= (1ULL << va_skip) - 1;
461             /* Continue with S-Stage translation? */
462             if (pass && sc[0].step != sc[0].levels) {
463                 pass = S_STAGE;
464                 addr = iotlb->iova;
465                 continue;
466             }
467             /* Translation phase completed (GPA or SPA) */
468             iotlb->translated_addr = base;
469             iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO)
470                                                          : IOMMU_RO;
471 
472             /* Check MSI GPA address match */
473             if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) &&
474                 riscv_iommu_msi_check(s, ctx, base)) {
475                 /* Trap MSI writes and return GPA address. */
476                 iotlb->target_as = &s->trap_as;
477                 iotlb->addr_mask = ~TARGET_PAGE_MASK;
478                 return 0;
479             }
480 
481             /* Continue with G-Stage translation? */
482             if (!pass && en_g) {
483                 pass = G_STAGE;
484                 addr = base;
485                 base = gatp;
486                 sc[pass].step = 0;
487                 continue;
488             }
489 
490             return 0;
491         }
492 
493         if (sc[pass].step == sc[pass].levels) {
494             break; /* Can't find leaf PTE */
495         }
496 
497         /* Continue with G-Stage translation? */
498         if (!pass && en_g) {
499             pass = G_STAGE;
500             addr = base;
501             base = gatp;
502             sc[pass].step = 0;
503         }
504     } while (1);
505 
506     return (iotlb->perm & IOMMU_WO) ?
507                 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS :
508                         RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) :
509                 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS :
510                         RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S);
511 }
512 
513 static void riscv_iommu_report_fault(RISCVIOMMUState *s,
514                                      RISCVIOMMUContext *ctx,
515                                      uint32_t fault_type, uint32_t cause,
516                                      bool pv,
517                                      uint64_t iotval, uint64_t iotval2)
518 {
519     struct riscv_iommu_fq_record ev = { 0 };
520 
521     if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) {
522         switch (cause) {
523         case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED:
524         case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT:
525         case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID:
526         case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED:
527         case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED:
528         case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR:
529         case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT:
530             break;
531         default:
532             /* DTF prevents reporting a fault for this given cause */
533             return;
534         }
535     }
536 
537     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause);
538     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type);
539     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid);
540     ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true);
541 
542     if (pv) {
543         ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id);
544     }
545 
546     ev.iotval = iotval;
547     ev.iotval2 = iotval2;
548 
549     riscv_iommu_fault(s, &ev);
550 }
551 
552 /* Redirect MSI write for given GPA. */
553 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
554     RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data,
555     unsigned size, MemTxAttrs attrs)
556 {
557     MemTxResult res;
558     dma_addr_t addr;
559     uint64_t intn;
560     uint32_t n190;
561     uint64_t pte[2];
562     int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
563     int cause;
564 
565     /* Interrupt File Number */
566     intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask);
567     if (intn >= 256) {
568         /* Interrupt file number out of range */
569         res = MEMTX_ACCESS_ERROR;
570         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
571         goto err;
572     }
573 
574     /* fetch MSI PTE */
575     addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
576     addr = addr | (intn * sizeof(pte));
577     res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte),
578             MEMTXATTRS_UNSPECIFIED);
579     if (res != MEMTX_OK) {
580         if (res == MEMTX_DECODE_ERROR) {
581             cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED;
582         } else {
583             cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
584         }
585         goto err;
586     }
587 
588     le64_to_cpus(&pte[0]);
589     le64_to_cpus(&pte[1]);
590 
591     if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) {
592         /*
593          * The spec mentions that: "If msipte.C == 1, then further
594          * processing to interpret the PTE is implementation
595          * defined.". We'll abort with cause = 262 for this
596          * case too.
597          */
598         res = MEMTX_ACCESS_ERROR;
599         cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID;
600         goto err;
601     }
602 
603     switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) {
604     case RISCV_IOMMU_MSI_PTE_M_BASIC:
605         /* MSI Pass-through mode */
606         addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN));
607 
608         trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
609                               PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
610                               gpa, addr);
611 
612         res = dma_memory_write(s->target_as, addr, &data, size, attrs);
613         if (res != MEMTX_OK) {
614             cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
615             goto err;
616         }
617 
618         return MEMTX_OK;
619     case RISCV_IOMMU_MSI_PTE_M_MRIF:
620         /* MRIF mode, continue. */
621         break;
622     default:
623         res = MEMTX_ACCESS_ERROR;
624         cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
625         goto err;
626     }
627 
628     /*
629      * Report an error for interrupt identities exceeding the maximum allowed
630      * for an IMSIC interrupt file (2047) or destination address is not 32-bit
631      * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables.
632      */
633     if ((data > 2047) || (gpa & 3)) {
634         res = MEMTX_ACCESS_ERROR;
635         cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED;
636         goto err;
637     }
638 
639     /* MSI MRIF mode, non atomic pending bit update */
640 
641     /* MRIF pending bit address */
642     addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9;
643     addr = addr | ((data & 0x7c0) >> 3);
644 
645     trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid),
646                           PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid),
647                           gpa, addr);
648 
649     /* MRIF pending bit mask */
650     data = 1ULL << (data & 0x03f);
651     res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
652     if (res != MEMTX_OK) {
653         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
654         goto err;
655     }
656 
657     intn = intn | data;
658     res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs);
659     if (res != MEMTX_OK) {
660         cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
661         goto err;
662     }
663 
664     /* Get MRIF enable bits */
665     addr = addr + sizeof(intn);
666     res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs);
667     if (res != MEMTX_OK) {
668         cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
669         goto err;
670     }
671 
672     if (!(intn & data)) {
673         /* notification disabled, MRIF update completed. */
674         return MEMTX_OK;
675     }
676 
677     /* Send notification message */
678     addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN));
679     n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) |
680           (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10);
681 
682     res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs);
683     if (res != MEMTX_OK) {
684         cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT;
685         goto err;
686     }
687 
688     trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr);
689 
690     return MEMTX_OK;
691 
692 err:
693     riscv_iommu_report_fault(s, ctx, fault_type, cause,
694                              !!ctx->process_id, 0, 0);
695     return res;
696 }
697 
698 /*
699  * Check device context configuration as described by the
700  * riscv-iommu spec section "Device-context configuration
701  * checks".
702  */
703 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s,
704                                             RISCVIOMMUContext *ctx)
705 {
706     uint32_t fsc_mode, msi_mode;
707     uint64_t gatp;
708 
709     if (!(s->cap & RISCV_IOMMU_CAP_ATS) &&
710         (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS ||
711          ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI ||
712          ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) {
713         return false;
714     }
715 
716     if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) &&
717         (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA ||
718          ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) {
719         return false;
720     }
721 
722     if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) &&
723         ctx->tc & RISCV_IOMMU_DC_TC_PRPR) {
724         return false;
725     }
726 
727     if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) &&
728         ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) {
729         return false;
730     }
731 
732     if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) {
733         msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE);
734 
735         if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF &&
736             msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) {
737             return false;
738         }
739     }
740 
741     gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
742     if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA &&
743         gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
744         return false;
745     }
746 
747     fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
748 
749     if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) {
750         switch (fsc_mode) {
751         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8:
752             if (!(s->cap & RISCV_IOMMU_CAP_PD8)) {
753                 return false;
754             }
755             break;
756         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17:
757             if (!(s->cap & RISCV_IOMMU_CAP_PD17)) {
758                 return false;
759             }
760             break;
761         case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20:
762             if (!(s->cap & RISCV_IOMMU_CAP_PD20)) {
763                 return false;
764             }
765             break;
766         }
767     } else {
768         /* DC.tc.PDTV is 0 */
769         if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) {
770             return false;
771         }
772 
773         if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
774             if (fsc_mode == RISCV_IOMMU_CAP_SV32 &&
775                 !(s->cap & RISCV_IOMMU_CAP_SV32)) {
776                 return false;
777             }
778         } else {
779             switch (fsc_mode) {
780             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
781                 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
782                     return false;
783                 }
784                 break;
785             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
786                 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
787                     return false;
788                 }
789             break;
790             case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
791                 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
792                     return false;
793                 }
794                 break;
795             }
796         }
797     }
798 
799     /*
800      * CAP_END is always zero (only one endianess). FCTL_BE is
801      * always zero (little-endian accesses). Thus TC_SBE must
802      * always be LE, i.e. zero.
803      */
804     if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) {
805         return false;
806     }
807 
808     return true;
809 }
810 
811 /*
812  * Validate process context (PC) according to section
813  * "Process-context configuration checks".
814  */
815 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
816                                              RISCVIOMMUContext *ctx)
817 {
818     uint32_t mode;
819 
820     if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) {
821         return false;
822     }
823 
824     if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) {
825         return false;
826     }
827 
828     mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
829     switch (mode) {
830     case RISCV_IOMMU_DC_FSC_MODE_BARE:
831     /* sv39 and sv32 modes have the same value (8) */
832     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
833     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
834     case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
835         break;
836     default:
837         return false;
838     }
839 
840     if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) {
841         if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 &&
842             !(s->cap & RISCV_IOMMU_CAP_SV32)) {
843                 return false;
844         }
845     } else {
846         switch (mode) {
847         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
848             if (!(s->cap & RISCV_IOMMU_CAP_SV39)) {
849                 return false;
850             }
851             break;
852         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
853             if (!(s->cap & RISCV_IOMMU_CAP_SV48)) {
854                 return false;
855             }
856             break;
857         case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
858             if (!(s->cap & RISCV_IOMMU_CAP_SV57)) {
859                 return false;
860             }
861             break;
862         }
863     }
864 
865     return true;
866 }
867 
868 /*
869  * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
870  *
871  * @s         : IOMMU Device State
872  * @ctx       : Device Translation Context with devid and process_id set.
873  * @return    : success or fault code.
874  */
875 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
876 {
877     const uint64_t ddtp = s->ddtp;
878     unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE);
879     dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN));
880     struct riscv_iommu_dc dc;
881     /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */
882     const int dc_fmt = !s->enable_msi;
883     const size_t dc_len = sizeof(dc) >> dc_fmt;
884     int depth;
885     uint64_t de;
886 
887     switch (mode) {
888     case RISCV_IOMMU_DDTP_MODE_OFF:
889         return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED;
890 
891     case RISCV_IOMMU_DDTP_MODE_BARE:
892         /* mock up pass-through translation context */
893         ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
894             RISCV_IOMMU_DC_IOHGATP_MODE_BARE);
895         ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD,
896             RISCV_IOMMU_DC_FSC_MODE_BARE);
897 
898         ctx->tc = RISCV_IOMMU_DC_TC_V;
899         if (s->enable_ats) {
900             ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS;
901         }
902 
903         ctx->ta = 0;
904         ctx->msiptp = 0;
905         return 0;
906 
907     case RISCV_IOMMU_DDTP_MODE_1LVL:
908         depth = 0;
909         break;
910 
911     case RISCV_IOMMU_DDTP_MODE_2LVL:
912         depth = 1;
913         break;
914 
915     case RISCV_IOMMU_DDTP_MODE_3LVL:
916         depth = 2;
917         break;
918 
919     default:
920         return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
921     }
922 
923     /*
924      * Check supported device id width (in bits).
925      * See IOMMU Specification, Chapter 6. Software guidelines.
926      * - if extended device-context format is used:
927      *   1LVL: 6, 2LVL: 15, 3LVL: 24
928      * - if base device-context format is used:
929      *   1LVL: 7, 2LVL: 16, 3LVL: 24
930      */
931     if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) {
932         return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
933     }
934 
935     /* Device directory tree walk */
936     for (; depth-- > 0; ) {
937         riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
938         /*
939          * Select device id index bits based on device directory tree level
940          * and device context format.
941          * See IOMMU Specification, Chapter 2. Data Structures.
942          * - if extended device-context format is used:
943          *   device index: [23:15][14:6][5:0]
944          * - if base device-context format is used:
945          *   device index: [23:16][15:7][6:0]
946          */
947         const int split = depth * 9 + 6 + dc_fmt;
948         addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK;
949         if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
950                             MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
951             return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
952         }
953         le64_to_cpus(&de);
954         if (!(de & RISCV_IOMMU_DDTE_VALID)) {
955             /* invalid directory entry */
956             return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
957         }
958         if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) {
959             /* reserved bits set */
960             return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
961         }
962         addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
963     }
964 
965     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
966 
967     /* index into device context entry page */
968     addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
969 
970     memset(&dc, 0, sizeof(dc));
971     if (dma_memory_read(s->target_as, addr, &dc, dc_len,
972                         MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
973         return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT;
974     }
975 
976     /* Set translation context. */
977     ctx->tc = le64_to_cpu(dc.tc);
978     ctx->gatp = le64_to_cpu(dc.iohgatp);
979     ctx->satp = le64_to_cpu(dc.fsc);
980     ctx->ta = le64_to_cpu(dc.ta);
981     ctx->msiptp = le64_to_cpu(dc.msiptp);
982     ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask);
983     ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern);
984 
985     if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) {
986         return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
987     }
988 
989     if (!riscv_iommu_validate_device_ctx(s, ctx)) {
990         return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED;
991     }
992 
993     /* FSC field checks */
994     mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE);
995     addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN));
996 
997     if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) {
998         if (ctx->process_id != RISCV_IOMMU_NOPROCID) {
999             /* PID is disabled */
1000             return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1001         }
1002         if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) {
1003             /* Invalid translation mode */
1004             return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID;
1005         }
1006         return 0;
1007     }
1008 
1009     if (ctx->process_id == RISCV_IOMMU_NOPROCID) {
1010         if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) {
1011             /* No default process_id enabled, set BARE mode */
1012             ctx->satp = 0ULL;
1013             return 0;
1014         } else {
1015             /* Use default process_id #0 */
1016             ctx->process_id = 0;
1017         }
1018     }
1019 
1020     if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1021         /* No S-Stage translation, done. */
1022         return 0;
1023     }
1024 
1025     /* FSC.TC.PDTV enabled */
1026     if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) {
1027         /* Invalid PDTP.MODE */
1028         return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1029     }
1030 
1031     for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
1032         riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
1033 
1034         /*
1035          * Select process id index bits based on process directory tree
1036          * level. See IOMMU Specification, 2.2. Process-Directory-Table.
1037          */
1038         const int split = depth * 9 + 8;
1039         addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
1040         if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
1041                             MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1042             return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1043         }
1044         le64_to_cpus(&de);
1045         if (!(de & RISCV_IOMMU_PC_TA_V)) {
1046             return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1047         }
1048         addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN));
1049     }
1050 
1051     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
1052 
1053     /* Leaf entry in PDT */
1054     addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
1055     if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
1056                         MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
1057         return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
1058     }
1059 
1060     /* Use FSC and TA from process directory entry. */
1061     ctx->ta = le64_to_cpu(dc.ta);
1062     ctx->satp = le64_to_cpu(dc.fsc);
1063 
1064     if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) {
1065         return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID;
1066     }
1067 
1068     if (!riscv_iommu_validate_process_ctx(s, ctx)) {
1069         return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED;
1070     }
1071 
1072     return 0;
1073 }
1074 
1075 /* Translation Context cache support */
1076 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2)
1077 {
1078     RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1;
1079     RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2;
1080     return c1->devid == c2->devid &&
1081            c1->process_id == c2->process_id;
1082 }
1083 
1084 static guint riscv_iommu_ctx_hash(gconstpointer v)
1085 {
1086     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v;
1087     /*
1088      * Generate simple hash of (process_id, devid)
1089      * assuming 24-bit wide devid.
1090      */
1091     return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24);
1092 }
1093 
1094 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value,
1095                                                gpointer data)
1096 {
1097     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1098     RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1099     if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1100         ctx->devid == arg->devid &&
1101         ctx->process_id == arg->process_id) {
1102         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1103     }
1104 }
1105 
1106 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value,
1107                                         gpointer data)
1108 {
1109     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1110     RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data;
1111     if (ctx->tc & RISCV_IOMMU_DC_TC_V &&
1112         ctx->devid == arg->devid) {
1113         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1114     }
1115 }
1116 
1117 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value,
1118                                       gpointer data)
1119 {
1120     RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value;
1121     if (ctx->tc & RISCV_IOMMU_DC_TC_V) {
1122         ctx->tc &= ~RISCV_IOMMU_DC_TC_V;
1123     }
1124 }
1125 
1126 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func,
1127                                   uint32_t devid, uint32_t process_id)
1128 {
1129     GHashTable *ctx_cache;
1130     RISCVIOMMUContext key = {
1131         .devid = devid,
1132         .process_id = process_id,
1133     };
1134     ctx_cache = g_hash_table_ref(s->ctx_cache);
1135     g_hash_table_foreach(ctx_cache, func, &key);
1136     g_hash_table_unref(ctx_cache);
1137 }
1138 
1139 /* Find or allocate translation context for a given {device_id, process_id} */
1140 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s,
1141                                           unsigned devid, unsigned process_id,
1142                                           void **ref)
1143 {
1144     GHashTable *ctx_cache;
1145     RISCVIOMMUContext *ctx;
1146     RISCVIOMMUContext key = {
1147         .devid = devid,
1148         .process_id = process_id,
1149     };
1150 
1151     ctx_cache = g_hash_table_ref(s->ctx_cache);
1152     ctx = g_hash_table_lookup(ctx_cache, &key);
1153 
1154     if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) {
1155         *ref = ctx_cache;
1156         return ctx;
1157     }
1158 
1159     ctx = g_new0(RISCVIOMMUContext, 1);
1160     ctx->devid = devid;
1161     ctx->process_id = process_id;
1162 
1163     int fault = riscv_iommu_ctx_fetch(s, ctx);
1164     if (!fault) {
1165         if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) {
1166             g_hash_table_unref(ctx_cache);
1167             ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
1168                                               riscv_iommu_ctx_equal,
1169                                               g_free, NULL);
1170             g_hash_table_ref(ctx_cache);
1171             g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache));
1172         }
1173         g_hash_table_add(ctx_cache, ctx);
1174         *ref = ctx_cache;
1175         return ctx;
1176     }
1177 
1178     g_hash_table_unref(ctx_cache);
1179     *ref = NULL;
1180 
1181     riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD,
1182                              fault, !!process_id, 0, 0);
1183 
1184     g_free(ctx);
1185     return NULL;
1186 }
1187 
1188 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref)
1189 {
1190     if (ref) {
1191         g_hash_table_unref((GHashTable *)ref);
1192     }
1193 }
1194 
1195 /* Find or allocate address space for a given device */
1196 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid)
1197 {
1198     RISCVIOMMUSpace *as;
1199 
1200     /* FIXME: PCIe bus remapping for attached endpoints. */
1201     devid |= s->bus << 8;
1202 
1203     QLIST_FOREACH(as, &s->spaces, list) {
1204         if (as->devid == devid) {
1205             break;
1206         }
1207     }
1208 
1209     if (as == NULL) {
1210         char name[64];
1211         as = g_new0(RISCVIOMMUSpace, 1);
1212 
1213         as->iommu = s;
1214         as->devid = devid;
1215 
1216         snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova",
1217             PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1218 
1219         /* IOVA address space, untranslated addresses */
1220         memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr),
1221             TYPE_RISCV_IOMMU_MEMORY_REGION,
1222             OBJECT(as), "riscv_iommu", UINT64_MAX);
1223         address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name);
1224 
1225         QLIST_INSERT_HEAD(&s->spaces, as, list);
1226 
1227         trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid),
1228                 PCI_SLOT(as->devid), PCI_FUNC(as->devid));
1229     }
1230     return &as->iova_as;
1231 }
1232 
1233 /* Translation Object cache support */
1234 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2)
1235 {
1236     RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1;
1237     RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2;
1238     return t1->gscid == t2->gscid && t1->pscid == t2->pscid &&
1239            t1->iova == t2->iova && t1->tag == t2->tag;
1240 }
1241 
1242 static guint riscv_iommu_iot_hash(gconstpointer v)
1243 {
1244     RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v;
1245     return (guint)t->iova;
1246 }
1247 
1248 /* GV: 0 AV: 0 PSCV: 0 GVMA: 0 */
1249 /* GV: 0 AV: 0 GVMA: 1 */
1250 static
1251 void riscv_iommu_iot_inval_all(gpointer key, gpointer value, gpointer data)
1252 {
1253     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1254     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1255     if (iot->tag == arg->tag) {
1256         iot->perm = IOMMU_NONE;
1257     }
1258 }
1259 
1260 /* GV: 0 AV: 0 PSCV: 1 GVMA: 0 */
1261 static
1262 void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, gpointer data)
1263 {
1264     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1265     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1266     if (iot->tag == arg->tag &&
1267         iot->pscid == arg->pscid) {
1268         iot->perm = IOMMU_NONE;
1269     }
1270 }
1271 
1272 /* GV: 0 AV: 1 PSCV: 0 GVMA: 0 */
1273 static
1274 void riscv_iommu_iot_inval_iova(gpointer key, gpointer value, gpointer data)
1275 {
1276     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1277     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1278     if (iot->tag == arg->tag &&
1279         iot->iova == arg->iova) {
1280         iot->perm = IOMMU_NONE;
1281     }
1282 }
1283 
1284 /* GV: 0 AV: 1 PSCV: 1 GVMA: 0 */
1285 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value,
1286                                              gpointer data)
1287 {
1288     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1289     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1290     if (iot->tag == arg->tag &&
1291         iot->pscid == arg->pscid &&
1292         iot->iova == arg->iova) {
1293         iot->perm = IOMMU_NONE;
1294     }
1295 }
1296 
1297 /* GV: 1 AV: 0 PSCV: 0 GVMA: 0 */
1298 /* GV: 1 AV: 0 GVMA: 1 */
1299 static
1300 void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, gpointer data)
1301 {
1302     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1303     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1304     if (iot->tag == arg->tag &&
1305         iot->gscid == arg->gscid) {
1306         iot->perm = IOMMU_NONE;
1307     }
1308 }
1309 
1310 /* GV: 1 AV: 0 PSCV: 1 GVMA: 0 */
1311 static void riscv_iommu_iot_inval_gscid_pscid(gpointer key, gpointer value,
1312                                               gpointer data)
1313 {
1314     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1315     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1316     if (iot->tag == arg->tag &&
1317         iot->gscid == arg->gscid &&
1318         iot->pscid == arg->pscid) {
1319         iot->perm = IOMMU_NONE;
1320     }
1321 }
1322 
1323 /* GV: 1 AV: 1 PSCV: 0 GVMA: 0 */
1324 /* GV: 1 AV: 1 GVMA: 1 */
1325 static void riscv_iommu_iot_inval_gscid_iova(gpointer key, gpointer value,
1326                                              gpointer data)
1327 {
1328     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1329     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1330     if (iot->tag == arg->tag &&
1331         iot->gscid == arg->gscid &&
1332         iot->iova == arg->iova) {
1333         iot->perm = IOMMU_NONE;
1334     }
1335 }
1336 
1337 /* GV: 1 AV: 1 PSCV: 1 GVMA: 0 */
1338 static void riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key, gpointer value,
1339                                                    gpointer data)
1340 {
1341     RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value;
1342     RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data;
1343     if (iot->tag == arg->tag &&
1344         iot->gscid == arg->gscid &&
1345         iot->pscid == arg->pscid &&
1346         iot->iova == arg->iova) {
1347         iot->perm = IOMMU_NONE;
1348     }
1349 }
1350 
1351 /* caller should keep ref-count for iot_cache object */
1352 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx,
1353     GHashTable *iot_cache, hwaddr iova, RISCVIOMMUTransTag transtag)
1354 {
1355     RISCVIOMMUEntry key = {
1356         .tag   = transtag,
1357         .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID),
1358         .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID),
1359         .iova  = PPN_DOWN(iova),
1360     };
1361     return g_hash_table_lookup(iot_cache, &key);
1362 }
1363 
1364 /* caller should keep ref-count for iot_cache object */
1365 static void riscv_iommu_iot_update(RISCVIOMMUState *s,
1366     GHashTable *iot_cache, RISCVIOMMUEntry *iot)
1367 {
1368     if (!s->iot_limit) {
1369         return;
1370     }
1371 
1372     if (g_hash_table_size(s->iot_cache) >= s->iot_limit) {
1373         iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
1374                                           riscv_iommu_iot_equal,
1375                                           g_free, NULL);
1376         g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache));
1377     }
1378     g_hash_table_add(iot_cache, iot);
1379 }
1380 
1381 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func,
1382     uint32_t gscid, uint32_t pscid, hwaddr iova, RISCVIOMMUTransTag transtag)
1383 {
1384     GHashTable *iot_cache;
1385     RISCVIOMMUEntry key = {
1386         .tag = transtag,
1387         .gscid = gscid,
1388         .pscid = pscid,
1389         .iova  = PPN_DOWN(iova),
1390     };
1391 
1392     iot_cache = g_hash_table_ref(s->iot_cache);
1393     g_hash_table_foreach(iot_cache, func, &key);
1394     g_hash_table_unref(iot_cache);
1395 }
1396 
1397 static RISCVIOMMUTransTag riscv_iommu_get_transtag(RISCVIOMMUContext *ctx)
1398 {
1399     uint64_t satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD);
1400     uint64_t gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
1401 
1402     if (satp == RISCV_IOMMU_DC_FSC_MODE_BARE) {
1403         return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ?
1404             RISCV_IOMMU_TRANS_TAG_BY : RISCV_IOMMU_TRANS_TAG_VG;
1405     } else {
1406         return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ?
1407             RISCV_IOMMU_TRANS_TAG_SS : RISCV_IOMMU_TRANS_TAG_VN;
1408     }
1409 }
1410 
1411 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
1412     IOMMUTLBEntry *iotlb, bool enable_cache)
1413 {
1414     RISCVIOMMUTransTag transtag = riscv_iommu_get_transtag(ctx);
1415     RISCVIOMMUEntry *iot;
1416     IOMMUAccessFlags perm;
1417     bool enable_pid;
1418     bool enable_pri;
1419     GHashTable *iot_cache;
1420     int fault;
1421 
1422     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ);
1423 
1424     iot_cache = g_hash_table_ref(s->iot_cache);
1425     /*
1426      * TC[32] is reserved for custom extensions, used here to temporarily
1427      * enable automatic page-request generation for ATS queries.
1428      */
1429     enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32));
1430     enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV);
1431 
1432     /* Check for ATS request. */
1433     if (iotlb->perm == IOMMU_NONE) {
1434         riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ);
1435         /* Check if ATS is disabled. */
1436         if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
1437             enable_pri = false;
1438             fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED;
1439             goto done;
1440         }
1441     }
1442 
1443     iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova, transtag);
1444     perm = iot ? iot->perm : IOMMU_NONE;
1445     if (perm != IOMMU_NONE) {
1446         iotlb->translated_addr = PPN_PHYS(iot->phys);
1447         iotlb->addr_mask = ~TARGET_PAGE_MASK;
1448         iotlb->perm = perm;
1449         fault = 0;
1450         goto done;
1451     }
1452 
1453     riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS);
1454 
1455     /* Translate using device directory / page table information. */
1456     fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
1457 
1458     if (!fault && iotlb->target_as == &s->trap_as) {
1459         /* Do not cache trapped MSI translations */
1460         goto done;
1461     }
1462 
1463     /*
1464      * We made an implementation choice to not cache identity-mapped
1465      * translations, as allowed by the specification, to avoid
1466      * translation cache evictions for other devices sharing the
1467      * IOMMU hardware model.
1468      */
1469     if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) {
1470         iot = g_new0(RISCVIOMMUEntry, 1);
1471         iot->iova = PPN_DOWN(iotlb->iova);
1472         iot->phys = PPN_DOWN(iotlb->translated_addr);
1473         iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
1474         iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
1475         iot->perm = iotlb->perm;
1476         iot->tag = transtag;
1477         riscv_iommu_iot_update(s, iot_cache, iot);
1478     }
1479 
1480 done:
1481     g_hash_table_unref(iot_cache);
1482 
1483     if (enable_pri && fault) {
1484         struct riscv_iommu_pq_record pr = {0};
1485         if (enable_pid) {
1486             pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV,
1487                                RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id);
1488         }
1489         pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid);
1490         pr.payload = (iotlb->iova & TARGET_PAGE_MASK) |
1491                      RISCV_IOMMU_PREQ_PAYLOAD_M;
1492         riscv_iommu_pri(s, &pr);
1493         return fault;
1494     }
1495 
1496     if (fault) {
1497         unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ;
1498 
1499         if (iotlb->perm & IOMMU_RW) {
1500             ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
1501         } else if (iotlb->perm & IOMMU_RO) {
1502             ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD;
1503         }
1504 
1505         riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid,
1506                                  iotlb->iova, iotlb->translated_addr);
1507         return fault;
1508     }
1509 
1510     return 0;
1511 }
1512 
1513 /* IOMMU Command Interface */
1514 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify,
1515     uint64_t addr, uint32_t data)
1516 {
1517     /*
1518      * ATS processing in this implementation of the IOMMU is synchronous,
1519      * no need to wait for completions here.
1520      */
1521     if (!notify) {
1522         return MEMTX_OK;
1523     }
1524 
1525     return dma_memory_write(s->target_as, addr, &data, sizeof(data),
1526         MEMTXATTRS_UNSPECIFIED);
1527 }
1528 
1529 static void riscv_iommu_ats(RISCVIOMMUState *s,
1530     struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag,
1531     IOMMUAccessFlags perm,
1532     void (*trace_fn)(const char *id))
1533 {
1534     RISCVIOMMUSpace *as = NULL;
1535     IOMMUNotifier *n;
1536     IOMMUTLBEvent event;
1537     uint32_t pid;
1538     uint32_t devid;
1539     const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV;
1540 
1541     if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) {
1542         /* Use device segment and requester id */
1543         devid = get_field(cmd->dword0,
1544             RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID);
1545     } else {
1546         devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID);
1547     }
1548 
1549     pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID);
1550 
1551     QLIST_FOREACH(as, &s->spaces, list) {
1552         if (as->devid == devid) {
1553             break;
1554         }
1555     }
1556 
1557     if (!as || !as->notifier) {
1558         return;
1559     }
1560 
1561     event.type = flag;
1562     event.entry.perm = perm;
1563     event.entry.target_as = s->target_as;
1564 
1565     IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) {
1566         if (!pv || n->iommu_idx == pid) {
1567             event.entry.iova = n->start;
1568             event.entry.addr_mask = n->end - n->start;
1569             trace_fn(as->iova_mr.parent_obj.name);
1570             memory_region_notify_iommu_one(n, &event);
1571         }
1572     }
1573 }
1574 
1575 static void riscv_iommu_ats_inval(RISCVIOMMUState *s,
1576     struct riscv_iommu_command *cmd)
1577 {
1578     return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE,
1579                            trace_riscv_iommu_ats_inval);
1580 }
1581 
1582 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s,
1583     struct riscv_iommu_command *cmd)
1584 {
1585     unsigned resp_code = get_field(cmd->dword1,
1586                                    RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE);
1587 
1588     /* Using the access flag to carry response code information */
1589     IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW;
1590     return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm,
1591                            trace_riscv_iommu_ats_prgr);
1592 }
1593 
1594 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s)
1595 {
1596     uint64_t old_ddtp = s->ddtp;
1597     uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP);
1598     unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE);
1599     unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE);
1600     bool ok = false;
1601 
1602     /*
1603      * Check for allowed DDTP.MODE transitions:
1604      * {OFF, BARE}        -> {OFF, BARE, 1LVL, 2LVL, 3LVL}
1605      * {1LVL, 2LVL, 3LVL} -> {OFF, BARE}
1606      */
1607     if (new_mode == old_mode ||
1608         new_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1609         new_mode == RISCV_IOMMU_DDTP_MODE_BARE) {
1610         ok = true;
1611     } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL ||
1612                new_mode == RISCV_IOMMU_DDTP_MODE_2LVL ||
1613                new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) {
1614         ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF ||
1615              old_mode == RISCV_IOMMU_DDTP_MODE_BARE;
1616     }
1617 
1618     if (ok) {
1619         /* clear reserved and busy bits, report back sanitized version */
1620         new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN,
1621                              RISCV_IOMMU_DDTP_MODE, new_mode);
1622     } else {
1623         new_ddtp = old_ddtp;
1624     }
1625     s->ddtp = new_ddtp;
1626 
1627     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp);
1628 }
1629 
1630 /* Command function and opcode field. */
1631 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op))
1632 
1633 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s)
1634 {
1635     struct riscv_iommu_command cmd;
1636     MemTxResult res;
1637     dma_addr_t addr;
1638     uint32_t tail, head, ctrl;
1639     uint64_t cmd_opcode;
1640     GHFunc func;
1641 
1642     ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1643     tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask;
1644     head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask;
1645 
1646     /* Check for pending error or queue processing disabled */
1647     if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) ||
1648         !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) {
1649         return;
1650     }
1651 
1652     while (tail != head) {
1653         addr = s->cq_addr  + head * sizeof(cmd);
1654         res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd),
1655                               MEMTXATTRS_UNSPECIFIED);
1656 
1657         if (res != MEMTX_OK) {
1658             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1659                                   RISCV_IOMMU_CQCSR_CQMF, 0);
1660             goto fault;
1661         }
1662 
1663         trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1);
1664 
1665         cmd_opcode = get_field(cmd.dword0,
1666                                RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC);
1667 
1668         switch (cmd_opcode) {
1669         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C,
1670                              RISCV_IOMMU_CMD_IOFENCE_OPCODE):
1671             res = riscv_iommu_iofence(s,
1672                 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2,
1673                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA));
1674 
1675             if (res != MEMTX_OK) {
1676                 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1677                                       RISCV_IOMMU_CQCSR_CQMF, 0);
1678                 goto fault;
1679             }
1680             break;
1681 
1682         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA,
1683                              RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1684         {
1685             bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV);
1686             bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV);
1687             bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV);
1688             uint32_t gscid = get_field(cmd.dword0,
1689                                        RISCV_IOMMU_CMD_IOTINVAL_GSCID);
1690             uint32_t pscid = get_field(cmd.dword0,
1691                                        RISCV_IOMMU_CMD_IOTINVAL_PSCID);
1692             hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK;
1693 
1694             if (pscv) {
1695                 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */
1696                 goto cmd_ill;
1697             }
1698 
1699             func = riscv_iommu_iot_inval_all;
1700 
1701             if (gv) {
1702                 func = (av) ? riscv_iommu_iot_inval_gscid_iova :
1703                               riscv_iommu_iot_inval_gscid;
1704             }
1705 
1706             riscv_iommu_iot_inval(
1707                 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VG);
1708 
1709             riscv_iommu_iot_inval(
1710                 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VN);
1711             break;
1712         }
1713 
1714         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA,
1715                              RISCV_IOMMU_CMD_IOTINVAL_OPCODE):
1716         {
1717             bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV);
1718             bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV);
1719             bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV);
1720             uint32_t gscid = get_field(cmd.dword0,
1721                                        RISCV_IOMMU_CMD_IOTINVAL_GSCID);
1722             uint32_t pscid = get_field(cmd.dword0,
1723                                        RISCV_IOMMU_CMD_IOTINVAL_PSCID);
1724             hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK;
1725             RISCVIOMMUTransTag transtag;
1726 
1727             if (gv) {
1728                 transtag = RISCV_IOMMU_TRANS_TAG_VN;
1729                 if (pscv) {
1730                     func = (av) ? riscv_iommu_iot_inval_gscid_pscid_iova :
1731                                   riscv_iommu_iot_inval_gscid_pscid;
1732                 } else {
1733                     func = (av) ? riscv_iommu_iot_inval_gscid_iova :
1734                                   riscv_iommu_iot_inval_gscid;
1735                 }
1736             } else {
1737                 transtag = RISCV_IOMMU_TRANS_TAG_SS;
1738                 if (pscv) {
1739                     func = (av) ? riscv_iommu_iot_inval_pscid_iova :
1740                                   riscv_iommu_iot_inval_pscid;
1741                 } else {
1742                     func = (av) ? riscv_iommu_iot_inval_iova :
1743                                   riscv_iommu_iot_inval_all;
1744                 }
1745             }
1746 
1747             riscv_iommu_iot_inval(s, func, gscid, pscid, iova, transtag);
1748             break;
1749         }
1750 
1751         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT,
1752                              RISCV_IOMMU_CMD_IODIR_OPCODE):
1753             if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1754                 /* invalidate all device context cache mappings */
1755                 func = riscv_iommu_ctx_inval_all;
1756             } else {
1757                 /* invalidate all device context matching DID */
1758                 func = riscv_iommu_ctx_inval_devid;
1759             }
1760             riscv_iommu_ctx_inval(s, func,
1761                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0);
1762             break;
1763 
1764         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT,
1765                              RISCV_IOMMU_CMD_IODIR_OPCODE):
1766             if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) {
1767                 /* illegal command arguments IODIR_PDT & DV == 0 */
1768                 goto cmd_ill;
1769             } else {
1770                 func = riscv_iommu_ctx_inval_devid_procid;
1771             }
1772             riscv_iommu_ctx_inval(s, func,
1773                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID),
1774                 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID));
1775             break;
1776 
1777         /* ATS commands */
1778         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL,
1779                              RISCV_IOMMU_CMD_ATS_OPCODE):
1780             if (!s->enable_ats) {
1781                 goto cmd_ill;
1782             }
1783 
1784             riscv_iommu_ats_inval(s, &cmd);
1785             break;
1786 
1787         case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR,
1788                              RISCV_IOMMU_CMD_ATS_OPCODE):
1789             if (!s->enable_ats) {
1790                 goto cmd_ill;
1791             }
1792 
1793             riscv_iommu_ats_prgr(s, &cmd);
1794             break;
1795 
1796         default:
1797         cmd_ill:
1798             /* Invalid instruction, do not advance instruction index. */
1799             riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR,
1800                 RISCV_IOMMU_CQCSR_CMD_ILL, 0);
1801             goto fault;
1802         }
1803 
1804         /* Advance and update head pointer after command completes. */
1805         head = (head + 1) & s->cq_mask;
1806         riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head);
1807     }
1808     return;
1809 
1810 fault:
1811     if (ctrl & RISCV_IOMMU_CQCSR_CIE) {
1812         riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ);
1813     }
1814 }
1815 
1816 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s)
1817 {
1818     uint64_t base;
1819     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1820     uint32_t ctrl_clr;
1821     bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN);
1822     bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON);
1823 
1824     if (enable && !active) {
1825         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB);
1826         s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1;
1827         s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN));
1828         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask);
1829         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0);
1830         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0);
1831         ctrl_set = RISCV_IOMMU_CQCSR_CQON;
1832         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF |
1833                    RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO |
1834                    RISCV_IOMMU_CQCSR_FENCE_W_IP;
1835     } else if (!enable && active) {
1836         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0);
1837         ctrl_set = 0;
1838         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON;
1839     } else {
1840         ctrl_set = 0;
1841         ctrl_clr = RISCV_IOMMU_CQCSR_BUSY;
1842     }
1843 
1844     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr);
1845 }
1846 
1847 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s)
1848 {
1849     uint64_t base;
1850     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1851     uint32_t ctrl_clr;
1852     bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN);
1853     bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON);
1854 
1855     if (enable && !active) {
1856         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB);
1857         s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1;
1858         s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN));
1859         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask);
1860         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0);
1861         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0);
1862         ctrl_set = RISCV_IOMMU_FQCSR_FQON;
1863         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF |
1864             RISCV_IOMMU_FQCSR_FQOF;
1865     } else if (!enable && active) {
1866         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0);
1867         ctrl_set = 0;
1868         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON;
1869     } else {
1870         ctrl_set = 0;
1871         ctrl_clr = RISCV_IOMMU_FQCSR_BUSY;
1872     }
1873 
1874     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr);
1875 }
1876 
1877 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s)
1878 {
1879     uint64_t base;
1880     uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
1881     uint32_t ctrl_clr;
1882     bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN);
1883     bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON);
1884 
1885     if (enable && !active) {
1886         base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB);
1887         s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1;
1888         s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN));
1889         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask);
1890         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0);
1891         stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0);
1892         ctrl_set = RISCV_IOMMU_PQCSR_PQON;
1893         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF |
1894             RISCV_IOMMU_PQCSR_PQOF;
1895     } else if (!enable && active) {
1896         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0);
1897         ctrl_set = 0;
1898         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON;
1899     } else {
1900         ctrl_set = 0;
1901         ctrl_clr = RISCV_IOMMU_PQCSR_BUSY;
1902     }
1903 
1904     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr);
1905 }
1906 
1907 static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
1908 {
1909     uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA);
1910     uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL);
1911     unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID);
1912     unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID);
1913     RISCVIOMMUContext *ctx;
1914     void *ref;
1915 
1916     if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) {
1917         return;
1918     }
1919 
1920     ctx = riscv_iommu_ctx(s, devid, pid, &ref);
1921     if (ctx == NULL) {
1922         riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE,
1923                                  RISCV_IOMMU_TR_RESPONSE_FAULT |
1924                                  (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10));
1925     } else {
1926         IOMMUTLBEntry iotlb = {
1927             .iova = iova,
1928             .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW,
1929             .addr_mask = ~0,
1930             .target_as = NULL,
1931         };
1932         int fault = riscv_iommu_translate(s, ctx, &iotlb, false);
1933         if (fault) {
1934             iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
1935         } else {
1936             iova = iotlb.translated_addr & ~iotlb.addr_mask;
1937             iova >>= TARGET_PAGE_BITS;
1938             iova &= RISCV_IOMMU_TR_RESPONSE_PPN;
1939 
1940             /* We do not support superpages (> 4kbs) for now */
1941             iova &= ~RISCV_IOMMU_TR_RESPONSE_S;
1942         }
1943         riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
1944     }
1945 
1946     riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
1947         RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
1948     riscv_iommu_ctx_put(s, ref);
1949 }
1950 
1951 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
1952 
1953 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data)
1954 {
1955     uint64_t icvec = 0;
1956 
1957     icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV,
1958                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV);
1959 
1960     icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV,
1961                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV);
1962 
1963     icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV,
1964                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV);
1965 
1966     icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV,
1967                  s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV);
1968 
1969     trace_riscv_iommu_icvec_write(data, icvec);
1970 
1971     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec);
1972 }
1973 
1974 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
1975 {
1976     uint32_t cqcsr, fqcsr, pqcsr;
1977     uint32_t ipsr_set = 0;
1978     uint32_t ipsr_clr = 0;
1979 
1980     if (data & RISCV_IOMMU_IPSR_CIP) {
1981         cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR);
1982 
1983         if (cqcsr & RISCV_IOMMU_CQCSR_CIE &&
1984             (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP ||
1985              cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL ||
1986              cqcsr & RISCV_IOMMU_CQCSR_CMD_TO ||
1987              cqcsr & RISCV_IOMMU_CQCSR_CQMF)) {
1988             ipsr_set |= RISCV_IOMMU_IPSR_CIP;
1989         } else {
1990             ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
1991         }
1992     } else {
1993         ipsr_clr |= RISCV_IOMMU_IPSR_CIP;
1994     }
1995 
1996     if (data & RISCV_IOMMU_IPSR_FIP) {
1997         fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR);
1998 
1999         if (fqcsr & RISCV_IOMMU_FQCSR_FIE &&
2000             (fqcsr & RISCV_IOMMU_FQCSR_FQOF ||
2001              fqcsr & RISCV_IOMMU_FQCSR_FQMF)) {
2002             ipsr_set |= RISCV_IOMMU_IPSR_FIP;
2003         } else {
2004             ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
2005         }
2006     } else {
2007         ipsr_clr |= RISCV_IOMMU_IPSR_FIP;
2008     }
2009 
2010     if (data & RISCV_IOMMU_IPSR_PIP) {
2011         pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR);
2012 
2013         if (pqcsr & RISCV_IOMMU_PQCSR_PIE &&
2014             (pqcsr & RISCV_IOMMU_PQCSR_PQOF ||
2015              pqcsr & RISCV_IOMMU_PQCSR_PQMF)) {
2016             ipsr_set |= RISCV_IOMMU_IPSR_PIP;
2017         } else {
2018             ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
2019         }
2020     } else {
2021         ipsr_clr |= RISCV_IOMMU_IPSR_PIP;
2022     }
2023 
2024     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
2025 }
2026 
2027 static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s,
2028                                            uint32_t regb,
2029                                            bool prev_cy_inh)
2030 {
2031     switch (regb) {
2032     case RISCV_IOMMU_REG_IOCOUNTINH:
2033         riscv_iommu_process_iocntinh_cy(s, prev_cy_inh);
2034         break;
2035 
2036     case RISCV_IOMMU_REG_IOHPMCYCLES:
2037     case RISCV_IOMMU_REG_IOHPMCYCLES + 4:
2038         riscv_iommu_process_hpmcycle_write(s);
2039         break;
2040 
2041     case RISCV_IOMMU_REG_IOHPMEVT_BASE ...
2042         RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4:
2043         riscv_iommu_process_hpmevt_write(s, regb & ~7);
2044         break;
2045     }
2046 }
2047 
2048 /*
2049  * Write the resulting value of 'data' for the reg specified
2050  * by 'reg_addr', after considering read-only/read-write/write-clear
2051  * bits, in the pointer 'dest'.
2052  *
2053  * The result is written in little-endian.
2054  */
2055 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s,
2056                                       void *dest, hwaddr reg_addr,
2057                                       int size, uint64_t data)
2058 {
2059     uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size);
2060     uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size);
2061     uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size);
2062 
2063     stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc));
2064 }
2065 
2066 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
2067                                           uint64_t data, unsigned size,
2068                                           MemTxAttrs attrs)
2069 {
2070     riscv_iommu_process_fn *process_fn = NULL;
2071     RISCVIOMMUState *s = opaque;
2072     uint32_t regb = addr & ~3;
2073     uint32_t busy = 0;
2074     uint64_t val = 0;
2075     bool cy_inh = false;
2076 
2077     if ((addr & (size - 1)) != 0) {
2078         /* Unsupported MMIO alignment or access size */
2079         return MEMTX_ERROR;
2080     }
2081 
2082     if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2083         /* Unsupported MMIO access location. */
2084         return MEMTX_ACCESS_ERROR;
2085     }
2086 
2087     /* Track actionable MMIO write. */
2088     switch (regb) {
2089     case RISCV_IOMMU_REG_DDTP:
2090     case RISCV_IOMMU_REG_DDTP + 4:
2091         process_fn = riscv_iommu_process_ddtp;
2092         regb = RISCV_IOMMU_REG_DDTP;
2093         busy = RISCV_IOMMU_DDTP_BUSY;
2094         break;
2095 
2096     case RISCV_IOMMU_REG_CQT:
2097         process_fn = riscv_iommu_process_cq_tail;
2098         break;
2099 
2100     case RISCV_IOMMU_REG_CQCSR:
2101         process_fn = riscv_iommu_process_cq_control;
2102         busy = RISCV_IOMMU_CQCSR_BUSY;
2103         break;
2104 
2105     case RISCV_IOMMU_REG_FQCSR:
2106         process_fn = riscv_iommu_process_fq_control;
2107         busy = RISCV_IOMMU_FQCSR_BUSY;
2108         break;
2109 
2110     case RISCV_IOMMU_REG_PQCSR:
2111         process_fn = riscv_iommu_process_pq_control;
2112         busy = RISCV_IOMMU_PQCSR_BUSY;
2113         break;
2114 
2115     case RISCV_IOMMU_REG_ICVEC:
2116     case RISCV_IOMMU_REG_IPSR:
2117         /*
2118          * ICVEC and IPSR have special read/write procedures. We'll
2119          * call their respective helpers and exit.
2120          */
2121         riscv_iommu_write_reg_val(s, &val, addr, size, data);
2122 
2123         /*
2124          * 'val' is stored as LE. Switch to host endianess
2125          * before using it.
2126          */
2127         val = le64_to_cpu(val);
2128 
2129         if (regb == RISCV_IOMMU_REG_ICVEC) {
2130             riscv_iommu_update_icvec(s, val);
2131         } else {
2132             riscv_iommu_update_ipsr(s, val);
2133         }
2134 
2135         return MEMTX_OK;
2136 
2137     case RISCV_IOMMU_REG_TR_REQ_CTL:
2138         process_fn = riscv_iommu_process_dbg;
2139         regb = RISCV_IOMMU_REG_TR_REQ_CTL;
2140         busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
2141         break;
2142 
2143     case RISCV_IOMMU_REG_IOCOUNTINH:
2144         if (addr != RISCV_IOMMU_REG_IOCOUNTINH) {
2145             break;
2146         }
2147         /* Store previous value of CY bit. */
2148         cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) &
2149             RISCV_IOMMU_IOCOUNTINH_CY);
2150         break;
2151 
2152 
2153     default:
2154         break;
2155     }
2156 
2157     /*
2158      * Registers update might be not synchronized with core logic.
2159      * If system software updates register when relevant BUSY bit
2160      * is set IOMMU behavior of additional writes to the register
2161      * is UNSPECIFIED.
2162      */
2163     riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data);
2164 
2165     /* Busy flag update, MSB 4-byte register. */
2166     if (busy) {
2167         uint32_t rw = ldl_le_p(&s->regs_rw[regb]);
2168         stl_le_p(&s->regs_rw[regb], rw | busy);
2169     }
2170 
2171     /* Process HPM writes and update any internal state if needed. */
2172     if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF &&
2173         regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) {
2174         riscv_iommu_process_hpm_writes(s, regb, cy_inh);
2175     }
2176 
2177     if (process_fn) {
2178         process_fn(s);
2179     }
2180 
2181     return MEMTX_OK;
2182 }
2183 
2184 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
2185     uint64_t *data, unsigned size, MemTxAttrs attrs)
2186 {
2187     RISCVIOMMUState *s = opaque;
2188     uint64_t val = -1;
2189     uint8_t *ptr;
2190 
2191     if ((addr & (size - 1)) != 0) {
2192         /* Unsupported MMIO alignment. */
2193         return MEMTX_ERROR;
2194     }
2195 
2196     if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) {
2197         return MEMTX_ACCESS_ERROR;
2198     }
2199 
2200     /* Compute cycle register value. */
2201     if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) {
2202         val = riscv_iommu_hpmcycle_read(s);
2203         ptr = (uint8_t *)&val + (addr & 7);
2204     } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) {
2205         /*
2206          * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer
2207          * callback completes. In which case CY_OF bit in
2208          * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the
2209          * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as
2210          * it's not dependent over the timer callback and is computed
2211          * from cycle overflow.
2212          */
2213         val = ldq_le_p(&s->regs_rw[addr]);
2214         val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF)
2215                    ? RISCV_IOMMU_IOCOUNTOVF_CY
2216                    : 0;
2217         ptr = (uint8_t *)&val + (addr & 3);
2218     } else {
2219         ptr = &s->regs_rw[addr];
2220     }
2221 
2222     val = ldn_le_p(ptr, size);
2223 
2224     *data = val;
2225 
2226     return MEMTX_OK;
2227 }
2228 
2229 static const MemoryRegionOps riscv_iommu_mmio_ops = {
2230     .read_with_attrs = riscv_iommu_mmio_read,
2231     .write_with_attrs = riscv_iommu_mmio_write,
2232     .endianness = DEVICE_NATIVE_ENDIAN,
2233     .impl = {
2234         .min_access_size = 4,
2235         .max_access_size = 8,
2236         .unaligned = false,
2237     },
2238     .valid = {
2239         .min_access_size = 4,
2240         .max_access_size = 8,
2241     }
2242 };
2243 
2244 /*
2245  * Translations matching MSI pattern check are redirected to "riscv-iommu-trap"
2246  * memory region as untranslated address, for additional MSI/MRIF interception
2247  * by IOMMU interrupt remapping implementation.
2248  * Note: Device emulation code generating an MSI is expected to provide a valid
2249  * memory transaction attributes with requested_id set.
2250  */
2251 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr,
2252     uint64_t data, unsigned size, MemTxAttrs attrs)
2253 {
2254     RISCVIOMMUState* s = (RISCVIOMMUState *)opaque;
2255     RISCVIOMMUContext *ctx;
2256     MemTxResult res;
2257     void *ref;
2258     uint32_t devid = attrs.requester_id;
2259 
2260     if (attrs.unspecified) {
2261         return MEMTX_ACCESS_ERROR;
2262     }
2263 
2264     /* FIXME: PCIe bus remapping for attached endpoints. */
2265     devid |= s->bus << 8;
2266 
2267     ctx = riscv_iommu_ctx(s, devid, 0, &ref);
2268     if (ctx == NULL) {
2269         res = MEMTX_ACCESS_ERROR;
2270     } else {
2271         res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs);
2272     }
2273     riscv_iommu_ctx_put(s, ref);
2274     return res;
2275 }
2276 
2277 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr,
2278     uint64_t *data, unsigned size, MemTxAttrs attrs)
2279 {
2280     return MEMTX_ACCESS_ERROR;
2281 }
2282 
2283 static const MemoryRegionOps riscv_iommu_trap_ops = {
2284     .read_with_attrs = riscv_iommu_trap_read,
2285     .write_with_attrs = riscv_iommu_trap_write,
2286     .endianness = DEVICE_LITTLE_ENDIAN,
2287     .impl = {
2288         .min_access_size = 4,
2289         .max_access_size = 8,
2290         .unaligned = true,
2291     },
2292     .valid = {
2293         .min_access_size = 4,
2294         .max_access_size = 8,
2295     }
2296 };
2297 
2298 void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode)
2299 {
2300     s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode);
2301 }
2302 
2303 static void riscv_iommu_instance_init(Object *obj)
2304 {
2305     RISCVIOMMUState *s = RISCV_IOMMU(obj);
2306 
2307     /* Enable translation debug interface */
2308     s->cap = RISCV_IOMMU_CAP_DBG;
2309 
2310     /* Report QEMU target physical address space limits */
2311     s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
2312                        TARGET_PHYS_ADDR_SPACE_BITS);
2313 
2314     /* TODO: method to report supported PID bits */
2315     s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */
2316     s->cap |= RISCV_IOMMU_CAP_PD8;
2317 
2318     /* register storage */
2319     s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2320     s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2321     s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE);
2322 
2323      /* Mark all registers read-only */
2324     memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE);
2325 
2326     /* Device translation context cache */
2327     s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash,
2328                                          riscv_iommu_ctx_equal,
2329                                          g_free, NULL);
2330 
2331     s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash,
2332                                          riscv_iommu_iot_equal,
2333                                          g_free, NULL);
2334 
2335     s->iommus.le_next = NULL;
2336     s->iommus.le_prev = NULL;
2337     QLIST_INIT(&s->spaces);
2338 }
2339 
2340 static void riscv_iommu_realize(DeviceState *dev, Error **errp)
2341 {
2342     RISCVIOMMUState *s = RISCV_IOMMU(dev);
2343 
2344     s->cap |= s->version & RISCV_IOMMU_CAP_VERSION;
2345     if (s->enable_msi) {
2346         s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF;
2347     }
2348     if (s->enable_ats) {
2349         s->cap |= RISCV_IOMMU_CAP_ATS;
2350     }
2351     if (s->enable_s_stage) {
2352         s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 |
2353                   RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57;
2354     }
2355     if (s->enable_g_stage) {
2356         s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
2357                   RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
2358     }
2359 
2360     if (s->hpm_cntrs > 0) {
2361         /* Clip number of HPM counters to maximum supported (31). */
2362         if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) {
2363             s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM;
2364         }
2365         /* Enable hardware performance monitor interface */
2366         s->cap |= RISCV_IOMMU_CAP_HPM;
2367     }
2368 
2369     /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
2370     s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
2371                         RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
2372 
2373     /*
2374      * Register complete MMIO space, including MSI/PBA registers.
2375      * Note, PCIDevice implementation will add overlapping MR for MSI/PBA,
2376      * managed directly by the PCIDevice implementation.
2377      */
2378     memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s,
2379         "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE);
2380 
2381     /* Set power-on register state */
2382     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
2383     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
2384     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL],
2385              ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI));
2386     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
2387         ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
2388     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],
2389         ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN));
2390     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB],
2391         ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN));
2392     stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB],
2393         ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN));
2394     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF |
2395         RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL);
2396     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON |
2397         RISCV_IOMMU_CQCSR_BUSY);
2398     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF |
2399         RISCV_IOMMU_FQCSR_FQOF);
2400     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON |
2401         RISCV_IOMMU_FQCSR_BUSY);
2402     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF |
2403         RISCV_IOMMU_PQCSR_PQOF);
2404     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON |
2405         RISCV_IOMMU_PQCSR_BUSY);
2406     stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
2407     stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0);
2408     stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp);
2409     /* If debug registers enabled. */
2410     if (s->cap & RISCV_IOMMU_CAP_DBG) {
2411         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0);
2412         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL],
2413             RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2414     }
2415 
2416     /* If HPM registers are enabled. */
2417     if (s->cap & RISCV_IOMMU_CAP_HPM) {
2418         /* +1 for cycle counter bit. */
2419         stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH],
2420                  ~((2 << s->hpm_cntrs) - 1));
2421         stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0);
2422         memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE],
2423                0x00, s->hpm_cntrs * 8);
2424         memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE],
2425                0x00, s->hpm_cntrs * 8);
2426     }
2427 
2428     /* Memory region for downstream access, if specified. */
2429     if (s->target_mr) {
2430         s->target_as = g_new0(AddressSpace, 1);
2431         address_space_init(s->target_as, s->target_mr,
2432             "riscv-iommu-downstream");
2433     } else {
2434         /* Fallback to global system memory. */
2435         s->target_as = &address_space_memory;
2436     }
2437 
2438     /* Memory region for untranslated MRIF/MSI writes */
2439     memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
2440             "riscv-iommu-trap", ~0ULL);
2441     address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
2442 
2443     if (s->cap & RISCV_IOMMU_CAP_HPM) {
2444         s->hpm_timer =
2445             timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s);
2446         s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal);
2447     }
2448 }
2449 
2450 static void riscv_iommu_unrealize(DeviceState *dev)
2451 {
2452     RISCVIOMMUState *s = RISCV_IOMMU(dev);
2453 
2454     g_hash_table_unref(s->iot_cache);
2455     g_hash_table_unref(s->ctx_cache);
2456 
2457     if (s->cap & RISCV_IOMMU_CAP_HPM) {
2458         g_hash_table_unref(s->hpm_event_ctr_map);
2459         timer_free(s->hpm_timer);
2460     }
2461 }
2462 
2463 void riscv_iommu_reset(RISCVIOMMUState *s)
2464 {
2465     uint32_t reg_clr;
2466     int ddtp_mode;
2467 
2468     /*
2469      * Clear DDTP while setting DDTP_mode back to user
2470      * initial setting.
2471      */
2472     ddtp_mode = s->enable_off ?
2473                 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE;
2474     s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode);
2475     riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp);
2476 
2477     reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE |
2478               RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY;
2479     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr);
2480 
2481     reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE |
2482               RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY;
2483     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr);
2484 
2485     reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE |
2486               RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY;
2487     riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr);
2488 
2489     riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
2490                           RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
2491 
2492     riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0);
2493 
2494     g_hash_table_remove_all(s->ctx_cache);
2495     g_hash_table_remove_all(s->iot_cache);
2496 }
2497 
2498 static const Property riscv_iommu_properties[] = {
2499     DEFINE_PROP_UINT32("version", RISCVIOMMUState, version,
2500         RISCV_IOMMU_SPEC_DOT_VER),
2501     DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0),
2502     DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit,
2503         LIMIT_CACHE_IOT),
2504     DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE),
2505     DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE),
2506     DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE),
2507     DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE),
2508     DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
2509     DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
2510         TYPE_MEMORY_REGION, MemoryRegion *),
2511     DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs,
2512                       RISCV_IOMMU_IOCOUNT_NUM),
2513 };
2514 
2515 static void riscv_iommu_class_init(ObjectClass *klass, void* data)
2516 {
2517     DeviceClass *dc = DEVICE_CLASS(klass);
2518 
2519     /* internal device for riscv-iommu-{pci/sys}, not user-creatable */
2520     dc->user_creatable = false;
2521     dc->realize = riscv_iommu_realize;
2522     dc->unrealize = riscv_iommu_unrealize;
2523     device_class_set_props(dc, riscv_iommu_properties);
2524 }
2525 
2526 static const TypeInfo riscv_iommu_info = {
2527     .name = TYPE_RISCV_IOMMU,
2528     .parent = TYPE_DEVICE,
2529     .instance_size = sizeof(RISCVIOMMUState),
2530     .instance_init = riscv_iommu_instance_init,
2531     .class_init = riscv_iommu_class_init,
2532 };
2533 
2534 static const char *IOMMU_FLAG_STR[] = {
2535     "NA",
2536     "RO",
2537     "WR",
2538     "RW",
2539 };
2540 
2541 /* RISC-V IOMMU Memory Region - Address Translation Space */
2542 static IOMMUTLBEntry riscv_iommu_memory_region_translate(
2543     IOMMUMemoryRegion *iommu_mr, hwaddr addr,
2544     IOMMUAccessFlags flag, int iommu_idx)
2545 {
2546     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2547     RISCVIOMMUContext *ctx;
2548     void *ref;
2549     IOMMUTLBEntry iotlb = {
2550         .iova = addr,
2551         .target_as = as->iommu->target_as,
2552         .addr_mask = ~0ULL,
2553         .perm = flag,
2554     };
2555 
2556     ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref);
2557     if (ctx == NULL) {
2558         /* Translation disabled or invalid. */
2559         iotlb.addr_mask = 0;
2560         iotlb.perm = IOMMU_NONE;
2561     } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) {
2562         /* Translation disabled or fault reported. */
2563         iotlb.addr_mask = 0;
2564         iotlb.perm = IOMMU_NONE;
2565     }
2566 
2567     /* Trace all dma translations with original access flags. */
2568     trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid),
2569                           PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx,
2570                           IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova,
2571                           iotlb.translated_addr);
2572 
2573     riscv_iommu_ctx_put(as->iommu, ref);
2574 
2575     return iotlb;
2576 }
2577 
2578 static int riscv_iommu_memory_region_notify(
2579     IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old,
2580     IOMMUNotifierFlag new, Error **errp)
2581 {
2582     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2583 
2584     if (old == IOMMU_NOTIFIER_NONE) {
2585         as->notifier = true;
2586         trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name);
2587     } else if (new == IOMMU_NOTIFIER_NONE) {
2588         as->notifier = false;
2589         trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name);
2590     }
2591 
2592     return 0;
2593 }
2594 
2595 static inline bool pci_is_iommu(PCIDevice *pdev)
2596 {
2597     return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806;
2598 }
2599 
2600 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn)
2601 {
2602     RISCVIOMMUState *s = (RISCVIOMMUState *) opaque;
2603     PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
2604     AddressSpace *as = NULL;
2605 
2606     if (pdev && pci_is_iommu(pdev)) {
2607         return s->target_as;
2608     }
2609 
2610     /* Find first registered IOMMU device */
2611     while (s->iommus.le_prev) {
2612         s = *(s->iommus.le_prev);
2613     }
2614 
2615     /* Find first matching IOMMU */
2616     while (s != NULL && as == NULL) {
2617         as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn));
2618         s = s->iommus.le_next;
2619     }
2620 
2621     return as ? as : &address_space_memory;
2622 }
2623 
2624 static const PCIIOMMUOps riscv_iommu_ops = {
2625     .get_address_space = riscv_iommu_find_as,
2626 };
2627 
2628 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
2629         Error **errp)
2630 {
2631     if (bus->iommu_ops &&
2632         bus->iommu_ops->get_address_space == riscv_iommu_find_as) {
2633         /* Allow multiple IOMMUs on the same PCIe bus, link known devices */
2634         RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque;
2635         QLIST_INSERT_AFTER(last, iommu, iommus);
2636     } else if (!bus->iommu_ops && !bus->iommu_opaque) {
2637         pci_setup_iommu(bus, &riscv_iommu_ops, iommu);
2638     } else {
2639         error_setg(errp, "can't register secondary IOMMU for PCI bus #%d",
2640             pci_bus_num(bus));
2641     }
2642 }
2643 
2644 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr,
2645     MemTxAttrs attrs)
2646 {
2647     return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid;
2648 }
2649 
2650 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr)
2651 {
2652     RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr);
2653     return 1 << as->iommu->pid_bits;
2654 }
2655 
2656 static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data)
2657 {
2658     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
2659 
2660     imrc->translate = riscv_iommu_memory_region_translate;
2661     imrc->notify_flag_changed = riscv_iommu_memory_region_notify;
2662     imrc->attrs_to_index = riscv_iommu_memory_region_index;
2663     imrc->num_indexes = riscv_iommu_memory_region_index_len;
2664 }
2665 
2666 static const TypeInfo riscv_iommu_memory_region_info = {
2667     .parent = TYPE_IOMMU_MEMORY_REGION,
2668     .name = TYPE_RISCV_IOMMU_MEMORY_REGION,
2669     .class_init = riscv_iommu_memory_region_init,
2670 };
2671 
2672 static void riscv_iommu_register_mr_types(void)
2673 {
2674     type_register_static(&riscv_iommu_memory_region_info);
2675     type_register_static(&riscv_iommu_info);
2676 }
2677 
2678 type_init(riscv_iommu_register_mr_types);
2679