1 /* 2 * QEMU emulation of an RISC-V IOMMU 3 * 4 * Copyright (C) 2021-2023, Rivos Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qom/object.h" 21 #include "exec/target_page.h" 22 #include "hw/pci/pci_bus.h" 23 #include "hw/pci/pci_device.h" 24 #include "hw/qdev-properties.h" 25 #include "hw/riscv/riscv_hart.h" 26 #include "migration/vmstate.h" 27 #include "qapi/error.h" 28 #include "qemu/timer.h" 29 30 #include "cpu_bits.h" 31 #include "riscv-iommu.h" 32 #include "riscv-iommu-bits.h" 33 #include "riscv-iommu-hpm.h" 34 #include "trace.h" 35 36 #define LIMIT_CACHE_CTX (1U << 7) 37 #define LIMIT_CACHE_IOT (1U << 20) 38 39 /* Physical page number coversions */ 40 #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) 41 #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) 42 43 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; 44 45 /* Device assigned I/O address space */ 46 struct RISCVIOMMUSpace { 47 IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ 48 AddressSpace iova_as; /* IOVA address space for attached device */ 49 RISCVIOMMUState *iommu; /* Managing IOMMU device state */ 50 uint32_t devid; /* Requester identifier, AKA device_id */ 51 bool notifier; /* IOMMU unmap notifier enabled */ 52 QLIST_ENTRY(RISCVIOMMUSpace) list; 53 }; 54 55 typedef enum RISCVIOMMUTransTag { 56 RISCV_IOMMU_TRANS_TAG_BY, /* Bypass */ 57 RISCV_IOMMU_TRANS_TAG_SS, /* Single Stage */ 58 RISCV_IOMMU_TRANS_TAG_VG, /* G-stage only */ 59 RISCV_IOMMU_TRANS_TAG_VN, /* Nested translation */ 60 } RISCVIOMMUTransTag; 61 62 /* Address translation cache entry */ 63 struct RISCVIOMMUEntry { 64 RISCVIOMMUTransTag tag; /* Translation Tag */ 65 uint64_t iova:44; /* IOVA Page Number */ 66 uint64_t pscid:20; /* Process Soft-Context identifier */ 67 uint64_t phys:44; /* Physical Page Number */ 68 uint64_t gscid:16; /* Guest Soft-Context identifier */ 69 uint64_t perm:2; /* IOMMU_RW flags */ 70 }; 71 72 /* IOMMU index for transactions without process_id specified. */ 73 #define RISCV_IOMMU_NOPROCID 0 74 75 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) 76 { 77 switch (vec_type) { 78 case RISCV_IOMMU_INTR_CQ: 79 return icvec & RISCV_IOMMU_ICVEC_CIV; 80 case RISCV_IOMMU_INTR_FQ: 81 return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; 82 case RISCV_IOMMU_INTR_PM: 83 return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; 84 case RISCV_IOMMU_INTR_PQ: 85 return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; 86 default: 87 g_assert_not_reached(); 88 } 89 } 90 91 void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) 92 { 93 uint32_t ipsr, icvec, vector; 94 95 if (!s->notify) { 96 return; 97 } 98 99 icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); 100 ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); 101 102 if (!(ipsr & (1 << vec_type))) { 103 vector = riscv_iommu_get_icvec_vector(icvec, vec_type); 104 s->notify(s, vector); 105 trace_riscv_iommu_notify_int_vector(vec_type, vector); 106 } 107 } 108 109 static void riscv_iommu_fault(RISCVIOMMUState *s, 110 struct riscv_iommu_fq_record *ev) 111 { 112 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 113 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; 114 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; 115 uint32_t next = (tail + 1) & s->fq_mask; 116 uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); 117 118 trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 119 PCI_FUNC(devid), ev->hdr, ev->iotval); 120 121 if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || 122 !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { 123 return; 124 } 125 126 if (head == next) { 127 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 128 RISCV_IOMMU_FQCSR_FQOF, 0); 129 } else { 130 dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); 131 if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), 132 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 133 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 134 RISCV_IOMMU_FQCSR_FQMF, 0); 135 } else { 136 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); 137 } 138 } 139 140 if (ctrl & RISCV_IOMMU_FQCSR_FIE) { 141 riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); 142 } 143 } 144 145 static void riscv_iommu_pri(RISCVIOMMUState *s, 146 struct riscv_iommu_pq_record *pr) 147 { 148 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 149 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; 150 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; 151 uint32_t next = (tail + 1) & s->pq_mask; 152 uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); 153 154 trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 155 PCI_FUNC(devid), pr->payload); 156 157 if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || 158 !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { 159 return; 160 } 161 162 if (head == next) { 163 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 164 RISCV_IOMMU_PQCSR_PQOF, 0); 165 } else { 166 dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); 167 if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), 168 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 169 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 170 RISCV_IOMMU_PQCSR_PQMF, 0); 171 } else { 172 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); 173 } 174 } 175 176 if (ctrl & RISCV_IOMMU_PQCSR_PIE) { 177 riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); 178 } 179 } 180 181 /* 182 * Discards all bits from 'val' whose matching bits in the same 183 * positions in the mask 'ext' are zeros, and packs the remaining 184 * bits from 'val' contiguously at the least-significant end of the 185 * result, keeping the same bit order as 'val' and filling any 186 * other bits at the most-significant end of the result with zeros. 187 * 188 * For example, for the following 'val' and 'ext', the return 'ret' 189 * will be: 190 * 191 * val = a b c d e f g h 192 * ext = 1 0 1 0 0 1 1 0 193 * ret = 0 0 0 0 a c f g 194 * 195 * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3 196 * "Process to translate addresses of MSIs", is similar to bit manip 197 * function PEXT (Parallel bits extract) from x86. 198 */ 199 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext) 200 { 201 uint64_t ret = 0; 202 uint64_t rot = 1; 203 204 while (ext) { 205 if (ext & 1) { 206 if (val & 1) { 207 ret |= rot; 208 } 209 rot <<= 1; 210 } 211 val >>= 1; 212 ext >>= 1; 213 } 214 215 return ret; 216 } 217 218 /* Check if GPA matches MSI/MRIF pattern. */ 219 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 220 dma_addr_t gpa) 221 { 222 if (!s->enable_msi) { 223 return false; 224 } 225 226 if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != 227 RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 228 return false; /* Invalid MSI/MRIF mode */ 229 } 230 231 if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { 232 return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ 233 } 234 235 return true; 236 } 237 238 /* 239 * RISCV IOMMU Address Translation Lookup - Page Table Walk 240 * 241 * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c 242 * Both implementation can be merged into single helper function in future. 243 * Keeping them separate for now, as error reporting and flow specifics are 244 * sufficiently different for separate implementation. 245 * 246 * @s : IOMMU Device State 247 * @ctx : Translation context for device id and process address space id. 248 * @iotlb : translation data: physical address and access mode. 249 * @return : success or fault cause code. 250 */ 251 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 252 IOMMUTLBEntry *iotlb) 253 { 254 dma_addr_t addr, base; 255 uint64_t satp, gatp, pte; 256 bool en_s, en_g; 257 struct { 258 unsigned char step; 259 unsigned char levels; 260 unsigned char ptidxbits; 261 unsigned char ptesize; 262 } sc[2]; 263 /* Translation stage phase */ 264 enum { 265 S_STAGE = 0, 266 G_STAGE = 1, 267 } pass; 268 MemTxResult ret; 269 270 satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 271 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 272 273 en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; 274 en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; 275 276 /* 277 * Early check for MSI address match when IOVA == GPA. 278 * Note that the (!en_s) condition means that the MSI 279 * page table may only be used when guest pages are 280 * mapped using the g-stage page table, whether single- 281 * or two-stage paging is enabled. It's unavoidable though, 282 * because the spec mandates that we do a first-stage 283 * translation before we check the MSI page table, which 284 * means we can't do an early MSI check unless we have 285 * strictly !en_s. 286 */ 287 if (!en_s && (iotlb->perm & IOMMU_WO) && 288 riscv_iommu_msi_check(s, ctx, iotlb->iova)) { 289 iotlb->target_as = &s->trap_as; 290 iotlb->translated_addr = iotlb->iova; 291 iotlb->addr_mask = ~TARGET_PAGE_MASK; 292 return 0; 293 } 294 295 /* Exit early for pass-through mode. */ 296 if (!(en_s || en_g)) { 297 iotlb->translated_addr = iotlb->iova; 298 iotlb->addr_mask = ~TARGET_PAGE_MASK; 299 /* Allow R/W in pass-through mode */ 300 iotlb->perm = IOMMU_RW; 301 return 0; 302 } 303 304 /* S/G translation parameters. */ 305 for (pass = 0; pass < 2; pass++) { 306 uint32_t sv_mode; 307 308 sc[pass].step = 0; 309 if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : 310 (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { 311 /* 32bit mode for GXL/SXL == 1 */ 312 switch (pass ? gatp : satp) { 313 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 314 sc[pass].levels = 0; 315 sc[pass].ptidxbits = 0; 316 sc[pass].ptesize = 0; 317 break; 318 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: 319 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; 320 if (!(s->cap & sv_mode)) { 321 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 322 } 323 sc[pass].levels = 2; 324 sc[pass].ptidxbits = 10; 325 sc[pass].ptesize = 4; 326 break; 327 default: 328 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 329 } 330 } else { 331 /* 64bit mode for GXL/SXL == 0 */ 332 switch (pass ? gatp : satp) { 333 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 334 sc[pass].levels = 0; 335 sc[pass].ptidxbits = 0; 336 sc[pass].ptesize = 0; 337 break; 338 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: 339 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; 340 if (!(s->cap & sv_mode)) { 341 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 342 } 343 sc[pass].levels = 3; 344 sc[pass].ptidxbits = 9; 345 sc[pass].ptesize = 8; 346 break; 347 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: 348 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; 349 if (!(s->cap & sv_mode)) { 350 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 351 } 352 sc[pass].levels = 4; 353 sc[pass].ptidxbits = 9; 354 sc[pass].ptesize = 8; 355 break; 356 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: 357 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; 358 if (!(s->cap & sv_mode)) { 359 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 360 } 361 sc[pass].levels = 5; 362 sc[pass].ptidxbits = 9; 363 sc[pass].ptesize = 8; 364 break; 365 default: 366 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 367 } 368 } 369 }; 370 371 /* S/G stages translation tables root pointers */ 372 gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); 373 satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); 374 addr = (en_s && en_g) ? satp : iotlb->iova; 375 base = en_g ? gatp : satp; 376 pass = en_g ? G_STAGE : S_STAGE; 377 378 do { 379 const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; 380 const unsigned va_bits = widened + sc[pass].ptidxbits; 381 const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * 382 (sc[pass].levels - 1 - sc[pass].step); 383 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); 384 const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; 385 const bool ade = 386 ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); 387 388 /* Address range check before first level lookup */ 389 if (!sc[pass].step) { 390 const uint64_t va_len = va_skip + va_bits; 391 const uint64_t va_mask = (1ULL << va_len) - 1; 392 393 if (pass == S_STAGE && va_len > 32) { 394 target_ulong mask, masked_msbs; 395 396 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1; 397 masked_msbs = (addr >> (va_len - 1)) & mask; 398 399 if (masked_msbs != 0 && masked_msbs != mask) { 400 return (iotlb->perm & IOMMU_WO) ? 401 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S : 402 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S; 403 } 404 } else { 405 if ((addr & va_mask) != addr) { 406 return (iotlb->perm & IOMMU_WO) ? 407 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 408 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS; 409 } 410 } 411 } 412 413 414 if (pass == S_STAGE) { 415 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS); 416 } else { 417 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS); 418 } 419 420 /* Read page table entry */ 421 if (sc[pass].ptesize == 4) { 422 uint32_t pte32 = 0; 423 ret = ldl_le_dma(s->target_as, pte_addr, &pte32, 424 MEMTXATTRS_UNSPECIFIED); 425 pte = pte32; 426 } else { 427 ret = ldq_le_dma(s->target_as, pte_addr, &pte, 428 MEMTXATTRS_UNSPECIFIED); 429 } 430 if (ret != MEMTX_OK) { 431 return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT 432 : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; 433 } 434 435 sc[pass].step++; 436 hwaddr ppn = pte >> PTE_PPN_SHIFT; 437 438 if (!(pte & PTE_V)) { 439 break; /* Invalid PTE */ 440 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { 441 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ 442 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { 443 break; /* Reserved leaf PTE flags: PTE_W */ 444 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { 445 break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ 446 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { 447 break; /* Misaligned PPN */ 448 } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { 449 break; /* Read access check failed */ 450 } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { 451 break; /* Write access check failed */ 452 } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { 453 break; /* Access bit not set */ 454 } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { 455 break; /* Dirty bit not set */ 456 } else { 457 /* Leaf PTE, translation completed. */ 458 sc[pass].step = sc[pass].levels; 459 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); 460 /* Update address mask based on smallest translation granularity */ 461 iotlb->addr_mask &= (1ULL << va_skip) - 1; 462 /* Continue with S-Stage translation? */ 463 if (pass && sc[0].step != sc[0].levels) { 464 pass = S_STAGE; 465 addr = iotlb->iova; 466 continue; 467 } 468 /* Translation phase completed (GPA or SPA) */ 469 iotlb->translated_addr = base; 470 iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) 471 : IOMMU_RO; 472 473 /* Check MSI GPA address match */ 474 if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && 475 riscv_iommu_msi_check(s, ctx, base)) { 476 /* Trap MSI writes and return GPA address. */ 477 iotlb->target_as = &s->trap_as; 478 iotlb->addr_mask = ~TARGET_PAGE_MASK; 479 return 0; 480 } 481 482 /* Continue with G-Stage translation? */ 483 if (!pass && en_g) { 484 pass = G_STAGE; 485 addr = base; 486 base = gatp; 487 sc[pass].step = 0; 488 continue; 489 } 490 491 return 0; 492 } 493 494 if (sc[pass].step == sc[pass].levels) { 495 break; /* Can't find leaf PTE */ 496 } 497 498 /* Continue with G-Stage translation? */ 499 if (!pass && en_g) { 500 pass = G_STAGE; 501 addr = base; 502 base = gatp; 503 sc[pass].step = 0; 504 } 505 } while (1); 506 507 return (iotlb->perm & IOMMU_WO) ? 508 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 509 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : 510 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : 511 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); 512 } 513 514 static void riscv_iommu_report_fault(RISCVIOMMUState *s, 515 RISCVIOMMUContext *ctx, 516 uint32_t fault_type, uint32_t cause, 517 bool pv, 518 uint64_t iotval, uint64_t iotval2) 519 { 520 struct riscv_iommu_fq_record ev = { 0 }; 521 522 if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { 523 switch (cause) { 524 case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: 525 case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: 526 case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: 527 case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: 528 case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: 529 case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: 530 case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: 531 break; 532 default: 533 /* DTF prevents reporting a fault for this given cause */ 534 return; 535 } 536 } 537 538 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); 539 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); 540 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); 541 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); 542 543 if (pv) { 544 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); 545 } 546 547 ev.iotval = iotval; 548 ev.iotval2 = iotval2; 549 550 riscv_iommu_fault(s, &ev); 551 } 552 553 /* Redirect MSI write for given GPA. */ 554 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, 555 RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, 556 unsigned size, MemTxAttrs attrs) 557 { 558 MemTxResult res; 559 dma_addr_t addr; 560 uint64_t intn; 561 uint32_t n190; 562 uint64_t pte[2]; 563 int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 564 int cause; 565 566 /* Interrupt File Number */ 567 intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); 568 if (intn >= 256) { 569 /* Interrupt file number out of range */ 570 res = MEMTX_ACCESS_ERROR; 571 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 572 goto err; 573 } 574 575 /* fetch MSI PTE */ 576 addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); 577 addr = addr | (intn * sizeof(pte)); 578 res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), 579 MEMTXATTRS_UNSPECIFIED); 580 if (res != MEMTX_OK) { 581 if (res == MEMTX_DECODE_ERROR) { 582 cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; 583 } else { 584 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 585 } 586 goto err; 587 } 588 589 le64_to_cpus(&pte[0]); 590 le64_to_cpus(&pte[1]); 591 592 if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { 593 /* 594 * The spec mentions that: "If msipte.C == 1, then further 595 * processing to interpret the PTE is implementation 596 * defined.". We'll abort with cause = 262 for this 597 * case too. 598 */ 599 res = MEMTX_ACCESS_ERROR; 600 cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; 601 goto err; 602 } 603 604 switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { 605 case RISCV_IOMMU_MSI_PTE_M_BASIC: 606 /* MSI Pass-through mode */ 607 addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); 608 609 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 610 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 611 gpa, addr); 612 613 res = dma_memory_write(s->target_as, addr, &data, size, attrs); 614 if (res != MEMTX_OK) { 615 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 616 goto err; 617 } 618 619 return MEMTX_OK; 620 case RISCV_IOMMU_MSI_PTE_M_MRIF: 621 /* MRIF mode, continue. */ 622 break; 623 default: 624 res = MEMTX_ACCESS_ERROR; 625 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 626 goto err; 627 } 628 629 /* 630 * Report an error for interrupt identities exceeding the maximum allowed 631 * for an IMSIC interrupt file (2047) or destination address is not 32-bit 632 * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. 633 */ 634 if ((data > 2047) || (gpa & 3)) { 635 res = MEMTX_ACCESS_ERROR; 636 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 637 goto err; 638 } 639 640 /* MSI MRIF mode, non atomic pending bit update */ 641 642 /* MRIF pending bit address */ 643 addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; 644 addr = addr | ((data & 0x7c0) >> 3); 645 646 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 647 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 648 gpa, addr); 649 650 /* MRIF pending bit mask */ 651 data = 1ULL << (data & 0x03f); 652 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 653 if (res != MEMTX_OK) { 654 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 655 goto err; 656 } 657 658 intn = intn | data; 659 res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); 660 if (res != MEMTX_OK) { 661 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 662 goto err; 663 } 664 665 /* Get MRIF enable bits */ 666 addr = addr + sizeof(intn); 667 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 668 if (res != MEMTX_OK) { 669 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 670 goto err; 671 } 672 673 if (!(intn & data)) { 674 /* notification disabled, MRIF update completed. */ 675 return MEMTX_OK; 676 } 677 678 /* Send notification message */ 679 addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); 680 n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | 681 (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); 682 683 res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); 684 if (res != MEMTX_OK) { 685 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 686 goto err; 687 } 688 689 trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); 690 691 return MEMTX_OK; 692 693 err: 694 riscv_iommu_report_fault(s, ctx, fault_type, cause, 695 !!ctx->process_id, 0, 0); 696 return res; 697 } 698 699 /* 700 * Check device context configuration as described by the 701 * riscv-iommu spec section "Device-context configuration 702 * checks". 703 */ 704 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, 705 RISCVIOMMUContext *ctx) 706 { 707 uint32_t fsc_mode, msi_mode; 708 uint64_t gatp; 709 710 if (!(s->cap & RISCV_IOMMU_CAP_ATS) && 711 (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS || 712 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI || 713 ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) { 714 return false; 715 } 716 717 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) && 718 (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA || 719 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) { 720 return false; 721 } 722 723 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && 724 ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { 725 return false; 726 } 727 728 if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && 729 ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { 730 return false; 731 } 732 733 if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { 734 msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); 735 736 if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && 737 msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 738 return false; 739 } 740 } 741 742 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 743 if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA && 744 gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { 745 return false; 746 } 747 748 fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 749 750 if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { 751 switch (fsc_mode) { 752 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 753 if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { 754 return false; 755 } 756 break; 757 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 758 if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { 759 return false; 760 } 761 break; 762 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 763 if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { 764 return false; 765 } 766 break; 767 } 768 } else { 769 /* DC.tc.PDTV is 0 */ 770 if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { 771 return false; 772 } 773 774 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 775 if (fsc_mode == RISCV_IOMMU_CAP_SV32 && 776 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 777 return false; 778 } 779 } else { 780 switch (fsc_mode) { 781 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 782 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 783 return false; 784 } 785 break; 786 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 787 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 788 return false; 789 } 790 break; 791 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 792 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 793 return false; 794 } 795 break; 796 } 797 } 798 } 799 800 /* 801 * CAP_END is always zero (only one endianess). FCTL_BE is 802 * always zero (little-endian accesses). Thus TC_SBE must 803 * always be LE, i.e. zero. 804 */ 805 if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { 806 return false; 807 } 808 809 return true; 810 } 811 812 /* 813 * Validate process context (PC) according to section 814 * "Process-context configuration checks". 815 */ 816 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, 817 RISCVIOMMUContext *ctx) 818 { 819 uint32_t mode; 820 821 if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { 822 return false; 823 } 824 825 if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { 826 return false; 827 } 828 829 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 830 switch (mode) { 831 case RISCV_IOMMU_DC_FSC_MODE_BARE: 832 /* sv39 and sv32 modes have the same value (8) */ 833 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 834 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 835 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 836 break; 837 default: 838 return false; 839 } 840 841 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 842 if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 && 843 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 844 return false; 845 } 846 } else { 847 switch (mode) { 848 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 849 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 850 return false; 851 } 852 break; 853 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 854 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 855 return false; 856 } 857 break; 858 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 859 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 860 return false; 861 } 862 break; 863 } 864 } 865 866 return true; 867 } 868 869 /* 870 * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk 871 * 872 * @s : IOMMU Device State 873 * @ctx : Device Translation Context with devid and process_id set. 874 * @return : success or fault code. 875 */ 876 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) 877 { 878 const uint64_t ddtp = s->ddtp; 879 unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); 880 dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); 881 struct riscv_iommu_dc dc; 882 /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ 883 const int dc_fmt = !s->enable_msi; 884 const size_t dc_len = sizeof(dc) >> dc_fmt; 885 int depth; 886 uint64_t de; 887 888 switch (mode) { 889 case RISCV_IOMMU_DDTP_MODE_OFF: 890 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; 891 892 case RISCV_IOMMU_DDTP_MODE_BARE: 893 /* mock up pass-through translation context */ 894 ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 895 RISCV_IOMMU_DC_IOHGATP_MODE_BARE); 896 ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 897 RISCV_IOMMU_DC_FSC_MODE_BARE); 898 899 ctx->tc = RISCV_IOMMU_DC_TC_V; 900 if (s->enable_ats) { 901 ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS; 902 } 903 904 ctx->ta = 0; 905 ctx->msiptp = 0; 906 return 0; 907 908 case RISCV_IOMMU_DDTP_MODE_1LVL: 909 depth = 0; 910 break; 911 912 case RISCV_IOMMU_DDTP_MODE_2LVL: 913 depth = 1; 914 break; 915 916 case RISCV_IOMMU_DDTP_MODE_3LVL: 917 depth = 2; 918 break; 919 920 default: 921 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 922 } 923 924 /* 925 * Check supported device id width (in bits). 926 * See IOMMU Specification, Chapter 6. Software guidelines. 927 * - if extended device-context format is used: 928 * 1LVL: 6, 2LVL: 15, 3LVL: 24 929 * - if base device-context format is used: 930 * 1LVL: 7, 2LVL: 16, 3LVL: 24 931 */ 932 if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { 933 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 934 } 935 936 /* Device directory tree walk */ 937 for (; depth-- > 0; ) { 938 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK); 939 /* 940 * Select device id index bits based on device directory tree level 941 * and device context format. 942 * See IOMMU Specification, Chapter 2. Data Structures. 943 * - if extended device-context format is used: 944 * device index: [23:15][14:6][5:0] 945 * - if base device-context format is used: 946 * device index: [23:16][15:7][6:0] 947 */ 948 const int split = depth * 9 + 6 + dc_fmt; 949 addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; 950 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 951 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 952 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 953 } 954 le64_to_cpus(&de); 955 if (!(de & RISCV_IOMMU_DDTE_VALID)) { 956 /* invalid directory entry */ 957 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 958 } 959 if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { 960 /* reserved bits set */ 961 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 962 } 963 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); 964 } 965 966 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK); 967 968 /* index into device context entry page */ 969 addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; 970 971 memset(&dc, 0, sizeof(dc)); 972 if (dma_memory_read(s->target_as, addr, &dc, dc_len, 973 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 974 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 975 } 976 977 /* Set translation context. */ 978 ctx->tc = le64_to_cpu(dc.tc); 979 ctx->gatp = le64_to_cpu(dc.iohgatp); 980 ctx->satp = le64_to_cpu(dc.fsc); 981 ctx->ta = le64_to_cpu(dc.ta); 982 ctx->msiptp = le64_to_cpu(dc.msiptp); 983 ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); 984 ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); 985 986 if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { 987 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 988 } 989 990 if (!riscv_iommu_validate_device_ctx(s, ctx)) { 991 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 992 } 993 994 /* FSC field checks */ 995 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 996 addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); 997 998 if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { 999 if (ctx->process_id != RISCV_IOMMU_NOPROCID) { 1000 /* PID is disabled */ 1001 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1002 } 1003 if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { 1004 /* Invalid translation mode */ 1005 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 1006 } 1007 return 0; 1008 } 1009 1010 if (ctx->process_id == RISCV_IOMMU_NOPROCID) { 1011 if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { 1012 /* No default process_id enabled, set BARE mode */ 1013 ctx->satp = 0ULL; 1014 return 0; 1015 } else { 1016 /* Use default process_id #0 */ 1017 ctx->process_id = 0; 1018 } 1019 } 1020 1021 if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1022 /* No S-Stage translation, done. */ 1023 return 0; 1024 } 1025 1026 /* FSC.TC.PDTV enabled */ 1027 if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { 1028 /* Invalid PDTP.MODE */ 1029 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1030 } 1031 1032 for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { 1033 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); 1034 1035 /* 1036 * Select process id index bits based on process directory tree 1037 * level. See IOMMU Specification, 2.2. Process-Directory-Table. 1038 */ 1039 const int split = depth * 9 + 8; 1040 addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; 1041 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 1042 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1043 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1044 } 1045 le64_to_cpus(&de); 1046 if (!(de & RISCV_IOMMU_PDTE_VALID)) { 1047 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1048 } 1049 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PDTE_PPN)); 1050 } 1051 1052 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); 1053 1054 /* Leaf entry in PDT */ 1055 addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; 1056 if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, 1057 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1058 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1059 } 1060 1061 /* Use FSC and TA from process directory entry. */ 1062 ctx->ta = le64_to_cpu(dc.ta); 1063 ctx->satp = le64_to_cpu(dc.fsc); 1064 1065 if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { 1066 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1067 } 1068 1069 if (!riscv_iommu_validate_process_ctx(s, ctx)) { 1070 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1071 } 1072 1073 return 0; 1074 } 1075 1076 /* Translation Context cache support */ 1077 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) 1078 { 1079 RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; 1080 RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; 1081 return c1->devid == c2->devid && 1082 c1->process_id == c2->process_id; 1083 } 1084 1085 static guint riscv_iommu_ctx_hash(gconstpointer v) 1086 { 1087 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; 1088 /* 1089 * Generate simple hash of (process_id, devid) 1090 * assuming 24-bit wide devid. 1091 */ 1092 return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); 1093 } 1094 1095 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, 1096 gpointer data) 1097 { 1098 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1099 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1100 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1101 ctx->devid == arg->devid && 1102 ctx->process_id == arg->process_id) { 1103 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1104 } 1105 } 1106 1107 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, 1108 gpointer data) 1109 { 1110 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1111 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1112 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1113 ctx->devid == arg->devid) { 1114 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1115 } 1116 } 1117 1118 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, 1119 gpointer data) 1120 { 1121 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1122 if (ctx->tc & RISCV_IOMMU_DC_TC_V) { 1123 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1124 } 1125 } 1126 1127 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, 1128 uint32_t devid, uint32_t process_id) 1129 { 1130 GHashTable *ctx_cache; 1131 RISCVIOMMUContext key = { 1132 .devid = devid, 1133 .process_id = process_id, 1134 }; 1135 ctx_cache = g_hash_table_ref(s->ctx_cache); 1136 g_hash_table_foreach(ctx_cache, func, &key); 1137 g_hash_table_unref(ctx_cache); 1138 } 1139 1140 /* Find or allocate translation context for a given {device_id, process_id} */ 1141 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, 1142 unsigned devid, unsigned process_id, 1143 void **ref) 1144 { 1145 GHashTable *ctx_cache; 1146 RISCVIOMMUContext *ctx; 1147 RISCVIOMMUContext key = { 1148 .devid = devid, 1149 .process_id = process_id, 1150 }; 1151 1152 ctx_cache = g_hash_table_ref(s->ctx_cache); 1153 ctx = g_hash_table_lookup(ctx_cache, &key); 1154 1155 if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { 1156 *ref = ctx_cache; 1157 return ctx; 1158 } 1159 1160 ctx = g_new0(RISCVIOMMUContext, 1); 1161 ctx->devid = devid; 1162 ctx->process_id = process_id; 1163 1164 int fault = riscv_iommu_ctx_fetch(s, ctx); 1165 if (!fault) { 1166 if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { 1167 g_hash_table_unref(ctx_cache); 1168 ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 1169 riscv_iommu_ctx_equal, 1170 g_free, NULL); 1171 g_hash_table_ref(ctx_cache); 1172 g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); 1173 } 1174 g_hash_table_add(ctx_cache, ctx); 1175 *ref = ctx_cache; 1176 return ctx; 1177 } 1178 1179 g_hash_table_unref(ctx_cache); 1180 *ref = NULL; 1181 1182 riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, 1183 fault, !!process_id, 0, 0); 1184 1185 g_free(ctx); 1186 return NULL; 1187 } 1188 1189 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) 1190 { 1191 if (ref) { 1192 g_hash_table_unref((GHashTable *)ref); 1193 } 1194 } 1195 1196 /* Find or allocate address space for a given device */ 1197 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) 1198 { 1199 RISCVIOMMUSpace *as; 1200 1201 /* FIXME: PCIe bus remapping for attached endpoints. */ 1202 devid |= s->bus << 8; 1203 1204 QLIST_FOREACH(as, &s->spaces, list) { 1205 if (as->devid == devid) { 1206 break; 1207 } 1208 } 1209 1210 if (as == NULL) { 1211 char name[64]; 1212 as = g_new0(RISCVIOMMUSpace, 1); 1213 1214 as->iommu = s; 1215 as->devid = devid; 1216 1217 snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", 1218 PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1219 1220 /* IOVA address space, untranslated addresses */ 1221 memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), 1222 TYPE_RISCV_IOMMU_MEMORY_REGION, 1223 OBJECT(as), "riscv_iommu", UINT64_MAX); 1224 address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); 1225 1226 QLIST_INSERT_HEAD(&s->spaces, as, list); 1227 1228 trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), 1229 PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1230 } 1231 return &as->iova_as; 1232 } 1233 1234 /* Translation Object cache support */ 1235 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2) 1236 { 1237 RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1; 1238 RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2; 1239 return t1->gscid == t2->gscid && t1->pscid == t2->pscid && 1240 t1->iova == t2->iova && t1->tag == t2->tag; 1241 } 1242 1243 static guint riscv_iommu_iot_hash(gconstpointer v) 1244 { 1245 RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v; 1246 return (guint)t->iova; 1247 } 1248 1249 /* GV: 0 AV: 0 PSCV: 0 GVMA: 0 */ 1250 /* GV: 0 AV: 0 GVMA: 1 */ 1251 static 1252 void riscv_iommu_iot_inval_all(gpointer key, gpointer value, gpointer data) 1253 { 1254 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1255 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1256 if (iot->tag == arg->tag) { 1257 iot->perm = IOMMU_NONE; 1258 } 1259 } 1260 1261 /* GV: 0 AV: 0 PSCV: 1 GVMA: 0 */ 1262 static 1263 void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, gpointer data) 1264 { 1265 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1266 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1267 if (iot->tag == arg->tag && 1268 iot->pscid == arg->pscid) { 1269 iot->perm = IOMMU_NONE; 1270 } 1271 } 1272 1273 /* GV: 0 AV: 1 PSCV: 0 GVMA: 0 */ 1274 static 1275 void riscv_iommu_iot_inval_iova(gpointer key, gpointer value, gpointer data) 1276 { 1277 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1278 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1279 if (iot->tag == arg->tag && 1280 iot->iova == arg->iova) { 1281 iot->perm = IOMMU_NONE; 1282 } 1283 } 1284 1285 /* GV: 0 AV: 1 PSCV: 1 GVMA: 0 */ 1286 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value, 1287 gpointer data) 1288 { 1289 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1290 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1291 if (iot->tag == arg->tag && 1292 iot->pscid == arg->pscid && 1293 iot->iova == arg->iova) { 1294 iot->perm = IOMMU_NONE; 1295 } 1296 } 1297 1298 /* GV: 1 AV: 0 PSCV: 0 GVMA: 0 */ 1299 /* GV: 1 AV: 0 GVMA: 1 */ 1300 static 1301 void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, gpointer data) 1302 { 1303 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1304 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1305 if (iot->tag == arg->tag && 1306 iot->gscid == arg->gscid) { 1307 iot->perm = IOMMU_NONE; 1308 } 1309 } 1310 1311 /* GV: 1 AV: 0 PSCV: 1 GVMA: 0 */ 1312 static void riscv_iommu_iot_inval_gscid_pscid(gpointer key, gpointer value, 1313 gpointer data) 1314 { 1315 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1316 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1317 if (iot->tag == arg->tag && 1318 iot->gscid == arg->gscid && 1319 iot->pscid == arg->pscid) { 1320 iot->perm = IOMMU_NONE; 1321 } 1322 } 1323 1324 /* GV: 1 AV: 1 PSCV: 0 GVMA: 0 */ 1325 /* GV: 1 AV: 1 GVMA: 1 */ 1326 static void riscv_iommu_iot_inval_gscid_iova(gpointer key, gpointer value, 1327 gpointer data) 1328 { 1329 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1330 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1331 if (iot->tag == arg->tag && 1332 iot->gscid == arg->gscid && 1333 iot->iova == arg->iova) { 1334 iot->perm = IOMMU_NONE; 1335 } 1336 } 1337 1338 /* GV: 1 AV: 1 PSCV: 1 GVMA: 0 */ 1339 static void riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key, gpointer value, 1340 gpointer data) 1341 { 1342 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1343 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1344 if (iot->tag == arg->tag && 1345 iot->gscid == arg->gscid && 1346 iot->pscid == arg->pscid && 1347 iot->iova == arg->iova) { 1348 iot->perm = IOMMU_NONE; 1349 } 1350 } 1351 1352 /* caller should keep ref-count for iot_cache object */ 1353 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx, 1354 GHashTable *iot_cache, hwaddr iova, RISCVIOMMUTransTag transtag) 1355 { 1356 RISCVIOMMUEntry key = { 1357 .tag = transtag, 1358 .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID), 1359 .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID), 1360 .iova = PPN_DOWN(iova), 1361 }; 1362 return g_hash_table_lookup(iot_cache, &key); 1363 } 1364 1365 /* caller should keep ref-count for iot_cache object */ 1366 static void riscv_iommu_iot_update(RISCVIOMMUState *s, 1367 GHashTable *iot_cache, RISCVIOMMUEntry *iot) 1368 { 1369 if (!s->iot_limit) { 1370 return; 1371 } 1372 1373 if (g_hash_table_size(s->iot_cache) >= s->iot_limit) { 1374 iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 1375 riscv_iommu_iot_equal, 1376 g_free, NULL); 1377 g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache)); 1378 } 1379 g_hash_table_add(iot_cache, iot); 1380 } 1381 1382 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func, 1383 uint32_t gscid, uint32_t pscid, hwaddr iova, RISCVIOMMUTransTag transtag) 1384 { 1385 GHashTable *iot_cache; 1386 RISCVIOMMUEntry key = { 1387 .tag = transtag, 1388 .gscid = gscid, 1389 .pscid = pscid, 1390 .iova = PPN_DOWN(iova), 1391 }; 1392 1393 iot_cache = g_hash_table_ref(s->iot_cache); 1394 g_hash_table_foreach(iot_cache, func, &key); 1395 g_hash_table_unref(iot_cache); 1396 } 1397 1398 static RISCVIOMMUTransTag riscv_iommu_get_transtag(RISCVIOMMUContext *ctx) 1399 { 1400 uint64_t satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 1401 uint64_t gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 1402 1403 if (satp == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1404 return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ? 1405 RISCV_IOMMU_TRANS_TAG_BY : RISCV_IOMMU_TRANS_TAG_VG; 1406 } else { 1407 return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ? 1408 RISCV_IOMMU_TRANS_TAG_SS : RISCV_IOMMU_TRANS_TAG_VN; 1409 } 1410 } 1411 1412 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 1413 IOMMUTLBEntry *iotlb, bool enable_cache) 1414 { 1415 RISCVIOMMUTransTag transtag = riscv_iommu_get_transtag(ctx); 1416 RISCVIOMMUEntry *iot; 1417 IOMMUAccessFlags perm; 1418 bool enable_pid; 1419 bool enable_pri; 1420 GHashTable *iot_cache; 1421 int fault; 1422 1423 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ); 1424 1425 iot_cache = g_hash_table_ref(s->iot_cache); 1426 /* 1427 * TC[32] is reserved for custom extensions, used here to temporarily 1428 * enable automatic page-request generation for ATS queries. 1429 */ 1430 enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); 1431 enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); 1432 1433 /* Check for ATS request. */ 1434 if (iotlb->perm == IOMMU_NONE) { 1435 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ); 1436 /* Check if ATS is disabled. */ 1437 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { 1438 enable_pri = false; 1439 fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1440 goto done; 1441 } 1442 } 1443 1444 iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova, transtag); 1445 perm = iot ? iot->perm : IOMMU_NONE; 1446 if (perm != IOMMU_NONE) { 1447 iotlb->translated_addr = PPN_PHYS(iot->phys); 1448 iotlb->addr_mask = ~TARGET_PAGE_MASK; 1449 iotlb->perm = perm; 1450 fault = 0; 1451 goto done; 1452 } 1453 1454 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS); 1455 1456 /* Translate using device directory / page table information. */ 1457 fault = riscv_iommu_spa_fetch(s, ctx, iotlb); 1458 1459 if (!fault && iotlb->target_as == &s->trap_as) { 1460 /* Do not cache trapped MSI translations */ 1461 goto done; 1462 } 1463 1464 /* 1465 * We made an implementation choice to not cache identity-mapped 1466 * translations, as allowed by the specification, to avoid 1467 * translation cache evictions for other devices sharing the 1468 * IOMMU hardware model. 1469 */ 1470 if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) { 1471 iot = g_new0(RISCVIOMMUEntry, 1); 1472 iot->iova = PPN_DOWN(iotlb->iova); 1473 iot->phys = PPN_DOWN(iotlb->translated_addr); 1474 iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); 1475 iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); 1476 iot->perm = iotlb->perm; 1477 iot->tag = transtag; 1478 riscv_iommu_iot_update(s, iot_cache, iot); 1479 } 1480 1481 done: 1482 g_hash_table_unref(iot_cache); 1483 1484 if (enable_pri && fault) { 1485 struct riscv_iommu_pq_record pr = {0}; 1486 if (enable_pid) { 1487 pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, 1488 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); 1489 } 1490 pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); 1491 pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | 1492 RISCV_IOMMU_PREQ_PAYLOAD_M; 1493 riscv_iommu_pri(s, &pr); 1494 return fault; 1495 } 1496 1497 if (fault) { 1498 unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ; 1499 1500 if (iotlb->perm & IOMMU_RW) { 1501 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 1502 } else if (iotlb->perm & IOMMU_RO) { 1503 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; 1504 } 1505 1506 riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, 1507 iotlb->iova, iotlb->translated_addr); 1508 return fault; 1509 } 1510 1511 return 0; 1512 } 1513 1514 /* IOMMU Command Interface */ 1515 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, 1516 uint64_t addr, uint32_t data) 1517 { 1518 /* 1519 * ATS processing in this implementation of the IOMMU is synchronous, 1520 * no need to wait for completions here. 1521 */ 1522 if (!notify) { 1523 return MEMTX_OK; 1524 } 1525 1526 return dma_memory_write(s->target_as, addr, &data, sizeof(data), 1527 MEMTXATTRS_UNSPECIFIED); 1528 } 1529 1530 static void riscv_iommu_ats(RISCVIOMMUState *s, 1531 struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag, 1532 IOMMUAccessFlags perm, 1533 void (*trace_fn)(const char *id)) 1534 { 1535 RISCVIOMMUSpace *as = NULL; 1536 IOMMUNotifier *n; 1537 IOMMUTLBEvent event; 1538 uint32_t pid; 1539 uint32_t devid; 1540 const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV; 1541 1542 if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) { 1543 /* Use device segment and requester id */ 1544 devid = get_field(cmd->dword0, 1545 RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID); 1546 } else { 1547 devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID); 1548 } 1549 1550 pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID); 1551 1552 QLIST_FOREACH(as, &s->spaces, list) { 1553 if (as->devid == devid) { 1554 break; 1555 } 1556 } 1557 1558 if (!as || !as->notifier) { 1559 return; 1560 } 1561 1562 event.type = flag; 1563 event.entry.perm = perm; 1564 event.entry.target_as = s->target_as; 1565 1566 IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) { 1567 if (!pv || n->iommu_idx == pid) { 1568 event.entry.iova = n->start; 1569 event.entry.addr_mask = n->end - n->start; 1570 trace_fn(as->iova_mr.parent_obj.name); 1571 memory_region_notify_iommu_one(n, &event); 1572 } 1573 } 1574 } 1575 1576 static void riscv_iommu_ats_inval(RISCVIOMMUState *s, 1577 struct riscv_iommu_command *cmd) 1578 { 1579 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE, 1580 trace_riscv_iommu_ats_inval); 1581 } 1582 1583 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s, 1584 struct riscv_iommu_command *cmd) 1585 { 1586 unsigned resp_code = get_field(cmd->dword1, 1587 RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE); 1588 1589 /* Using the access flag to carry response code information */ 1590 IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW; 1591 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm, 1592 trace_riscv_iommu_ats_prgr); 1593 } 1594 1595 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) 1596 { 1597 uint64_t old_ddtp = s->ddtp; 1598 uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); 1599 unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); 1600 unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); 1601 bool ok = false; 1602 1603 /* 1604 * Check for allowed DDTP.MODE transitions: 1605 * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} 1606 * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} 1607 */ 1608 if (new_mode == old_mode || 1609 new_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1610 new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { 1611 ok = true; 1612 } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || 1613 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || 1614 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { 1615 ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1616 old_mode == RISCV_IOMMU_DDTP_MODE_BARE; 1617 } 1618 1619 if (ok) { 1620 /* clear reserved and busy bits, report back sanitized version */ 1621 new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, 1622 RISCV_IOMMU_DDTP_MODE, new_mode); 1623 } else { 1624 new_ddtp = old_ddtp; 1625 } 1626 s->ddtp = new_ddtp; 1627 1628 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); 1629 } 1630 1631 /* Command function and opcode field. */ 1632 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) 1633 1634 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) 1635 { 1636 struct riscv_iommu_command cmd; 1637 MemTxResult res; 1638 dma_addr_t addr; 1639 uint32_t tail, head, ctrl; 1640 uint64_t cmd_opcode; 1641 GHFunc func; 1642 1643 ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1644 tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; 1645 head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; 1646 1647 /* Check for pending error or queue processing disabled */ 1648 if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || 1649 !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { 1650 return; 1651 } 1652 1653 while (tail != head) { 1654 addr = s->cq_addr + head * sizeof(cmd); 1655 res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), 1656 MEMTXATTRS_UNSPECIFIED); 1657 1658 if (res != MEMTX_OK) { 1659 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1660 RISCV_IOMMU_CQCSR_CQMF, 0); 1661 goto fault; 1662 } 1663 1664 trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); 1665 1666 cmd_opcode = get_field(cmd.dword0, 1667 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); 1668 1669 switch (cmd_opcode) { 1670 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, 1671 RISCV_IOMMU_CMD_IOFENCE_OPCODE): 1672 res = riscv_iommu_iofence(s, 1673 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, 1674 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); 1675 1676 if (res != MEMTX_OK) { 1677 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1678 RISCV_IOMMU_CQCSR_CQMF, 0); 1679 goto fault; 1680 } 1681 break; 1682 1683 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, 1684 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1685 { 1686 bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV); 1687 bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV); 1688 bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV); 1689 uint32_t gscid = get_field(cmd.dword0, 1690 RISCV_IOMMU_CMD_IOTINVAL_GSCID); 1691 uint32_t pscid = get_field(cmd.dword0, 1692 RISCV_IOMMU_CMD_IOTINVAL_PSCID); 1693 hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK; 1694 1695 if (pscv) { 1696 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ 1697 goto cmd_ill; 1698 } 1699 1700 func = riscv_iommu_iot_inval_all; 1701 1702 if (gv) { 1703 func = (av) ? riscv_iommu_iot_inval_gscid_iova : 1704 riscv_iommu_iot_inval_gscid; 1705 } 1706 1707 riscv_iommu_iot_inval( 1708 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VG); 1709 1710 riscv_iommu_iot_inval( 1711 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VN); 1712 break; 1713 } 1714 1715 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, 1716 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1717 { 1718 bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV); 1719 bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV); 1720 bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV); 1721 uint32_t gscid = get_field(cmd.dword0, 1722 RISCV_IOMMU_CMD_IOTINVAL_GSCID); 1723 uint32_t pscid = get_field(cmd.dword0, 1724 RISCV_IOMMU_CMD_IOTINVAL_PSCID); 1725 hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK; 1726 RISCVIOMMUTransTag transtag; 1727 1728 if (gv) { 1729 transtag = RISCV_IOMMU_TRANS_TAG_VN; 1730 if (pscv) { 1731 func = (av) ? riscv_iommu_iot_inval_gscid_pscid_iova : 1732 riscv_iommu_iot_inval_gscid_pscid; 1733 } else { 1734 func = (av) ? riscv_iommu_iot_inval_gscid_iova : 1735 riscv_iommu_iot_inval_gscid; 1736 } 1737 } else { 1738 transtag = RISCV_IOMMU_TRANS_TAG_SS; 1739 if (pscv) { 1740 func = (av) ? riscv_iommu_iot_inval_pscid_iova : 1741 riscv_iommu_iot_inval_pscid; 1742 } else { 1743 func = (av) ? riscv_iommu_iot_inval_iova : 1744 riscv_iommu_iot_inval_all; 1745 } 1746 } 1747 1748 riscv_iommu_iot_inval(s, func, gscid, pscid, iova, transtag); 1749 break; 1750 } 1751 1752 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, 1753 RISCV_IOMMU_CMD_IODIR_OPCODE): 1754 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1755 /* invalidate all device context cache mappings */ 1756 func = riscv_iommu_ctx_inval_all; 1757 } else { 1758 /* invalidate all device context matching DID */ 1759 func = riscv_iommu_ctx_inval_devid; 1760 } 1761 riscv_iommu_ctx_inval(s, func, 1762 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); 1763 break; 1764 1765 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, 1766 RISCV_IOMMU_CMD_IODIR_OPCODE): 1767 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1768 /* illegal command arguments IODIR_PDT & DV == 0 */ 1769 goto cmd_ill; 1770 } else { 1771 func = riscv_iommu_ctx_inval_devid_procid; 1772 } 1773 riscv_iommu_ctx_inval(s, func, 1774 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 1775 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); 1776 break; 1777 1778 /* ATS commands */ 1779 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL, 1780 RISCV_IOMMU_CMD_ATS_OPCODE): 1781 if (!s->enable_ats) { 1782 goto cmd_ill; 1783 } 1784 1785 riscv_iommu_ats_inval(s, &cmd); 1786 break; 1787 1788 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR, 1789 RISCV_IOMMU_CMD_ATS_OPCODE): 1790 if (!s->enable_ats) { 1791 goto cmd_ill; 1792 } 1793 1794 riscv_iommu_ats_prgr(s, &cmd); 1795 break; 1796 1797 default: 1798 cmd_ill: 1799 /* Invalid instruction, do not advance instruction index. */ 1800 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1801 RISCV_IOMMU_CQCSR_CMD_ILL, 0); 1802 goto fault; 1803 } 1804 1805 /* Advance and update head pointer after command completes. */ 1806 head = (head + 1) & s->cq_mask; 1807 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); 1808 } 1809 return; 1810 1811 fault: 1812 if (ctrl & RISCV_IOMMU_CQCSR_CIE) { 1813 riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); 1814 } 1815 } 1816 1817 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) 1818 { 1819 uint64_t base; 1820 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1821 uint32_t ctrl_clr; 1822 bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); 1823 bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); 1824 1825 if (enable && !active) { 1826 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); 1827 s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; 1828 s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); 1829 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); 1830 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); 1831 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); 1832 ctrl_set = RISCV_IOMMU_CQCSR_CQON; 1833 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | 1834 RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | 1835 RISCV_IOMMU_CQCSR_FENCE_W_IP; 1836 } else if (!enable && active) { 1837 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); 1838 ctrl_set = 0; 1839 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; 1840 } else { 1841 ctrl_set = 0; 1842 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; 1843 } 1844 1845 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); 1846 } 1847 1848 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) 1849 { 1850 uint64_t base; 1851 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1852 uint32_t ctrl_clr; 1853 bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); 1854 bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); 1855 1856 if (enable && !active) { 1857 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); 1858 s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; 1859 s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); 1860 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); 1861 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); 1862 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); 1863 ctrl_set = RISCV_IOMMU_FQCSR_FQON; 1864 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | 1865 RISCV_IOMMU_FQCSR_FQOF; 1866 } else if (!enable && active) { 1867 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); 1868 ctrl_set = 0; 1869 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; 1870 } else { 1871 ctrl_set = 0; 1872 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; 1873 } 1874 1875 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); 1876 } 1877 1878 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) 1879 { 1880 uint64_t base; 1881 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1882 uint32_t ctrl_clr; 1883 bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); 1884 bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); 1885 1886 if (enable && !active) { 1887 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); 1888 s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; 1889 s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); 1890 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); 1891 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); 1892 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); 1893 ctrl_set = RISCV_IOMMU_PQCSR_PQON; 1894 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | 1895 RISCV_IOMMU_PQCSR_PQOF; 1896 } else if (!enable && active) { 1897 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); 1898 ctrl_set = 0; 1899 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; 1900 } else { 1901 ctrl_set = 0; 1902 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; 1903 } 1904 1905 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); 1906 } 1907 1908 static void riscv_iommu_process_dbg(RISCVIOMMUState *s) 1909 { 1910 uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA); 1911 uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL); 1912 unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID); 1913 unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID); 1914 RISCVIOMMUContext *ctx; 1915 void *ref; 1916 1917 if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) { 1918 return; 1919 } 1920 1921 ctx = riscv_iommu_ctx(s, devid, pid, &ref); 1922 if (ctx == NULL) { 1923 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, 1924 RISCV_IOMMU_TR_RESPONSE_FAULT | 1925 (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10)); 1926 } else { 1927 IOMMUTLBEntry iotlb = { 1928 .iova = iova, 1929 .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW, 1930 .addr_mask = ~0, 1931 .target_as = NULL, 1932 }; 1933 int fault = riscv_iommu_translate(s, ctx, &iotlb, false); 1934 if (fault) { 1935 iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); 1936 } else { 1937 iova = iotlb.translated_addr & ~iotlb.addr_mask; 1938 iova >>= TARGET_PAGE_BITS; 1939 iova &= RISCV_IOMMU_TR_RESPONSE_PPN; 1940 1941 /* We do not support superpages (> 4kbs) for now */ 1942 iova &= ~RISCV_IOMMU_TR_RESPONSE_S; 1943 } 1944 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); 1945 } 1946 1947 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 1948 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 1949 riscv_iommu_ctx_put(s, ref); 1950 } 1951 1952 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); 1953 1954 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) 1955 { 1956 uint64_t icvec = 0; 1957 1958 icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, 1959 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); 1960 1961 icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, 1962 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); 1963 1964 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, 1965 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); 1966 1967 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, 1968 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); 1969 1970 trace_riscv_iommu_icvec_write(data, icvec); 1971 1972 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); 1973 } 1974 1975 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) 1976 { 1977 uint32_t cqcsr, fqcsr, pqcsr; 1978 uint32_t ipsr_set = 0; 1979 uint32_t ipsr_clr = 0; 1980 1981 if (data & RISCV_IOMMU_IPSR_CIP) { 1982 cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1983 1984 if (cqcsr & RISCV_IOMMU_CQCSR_CIE && 1985 (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || 1986 cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || 1987 cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || 1988 cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { 1989 ipsr_set |= RISCV_IOMMU_IPSR_CIP; 1990 } else { 1991 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1992 } 1993 } else { 1994 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1995 } 1996 1997 if (data & RISCV_IOMMU_IPSR_FIP) { 1998 fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1999 2000 if (fqcsr & RISCV_IOMMU_FQCSR_FIE && 2001 (fqcsr & RISCV_IOMMU_FQCSR_FQOF || 2002 fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { 2003 ipsr_set |= RISCV_IOMMU_IPSR_FIP; 2004 } else { 2005 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 2006 } 2007 } else { 2008 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 2009 } 2010 2011 if (data & RISCV_IOMMU_IPSR_PIP) { 2012 pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 2013 2014 if (pqcsr & RISCV_IOMMU_PQCSR_PIE && 2015 (pqcsr & RISCV_IOMMU_PQCSR_PQOF || 2016 pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { 2017 ipsr_set |= RISCV_IOMMU_IPSR_PIP; 2018 } else { 2019 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 2020 } 2021 } else { 2022 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 2023 } 2024 2025 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); 2026 } 2027 2028 static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s, 2029 uint32_t regb, 2030 bool prev_cy_inh) 2031 { 2032 switch (regb) { 2033 case RISCV_IOMMU_REG_IOCOUNTINH: 2034 riscv_iommu_process_iocntinh_cy(s, prev_cy_inh); 2035 break; 2036 2037 case RISCV_IOMMU_REG_IOHPMCYCLES: 2038 case RISCV_IOMMU_REG_IOHPMCYCLES + 4: 2039 riscv_iommu_process_hpmcycle_write(s); 2040 break; 2041 2042 case RISCV_IOMMU_REG_IOHPMEVT_BASE ... 2043 RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4: 2044 riscv_iommu_process_hpmevt_write(s, regb & ~7); 2045 break; 2046 } 2047 } 2048 2049 /* 2050 * Write the resulting value of 'data' for the reg specified 2051 * by 'reg_addr', after considering read-only/read-write/write-clear 2052 * bits, in the pointer 'dest'. 2053 * 2054 * The result is written in little-endian. 2055 */ 2056 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, 2057 void *dest, hwaddr reg_addr, 2058 int size, uint64_t data) 2059 { 2060 uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); 2061 uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); 2062 uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); 2063 2064 stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); 2065 } 2066 2067 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, 2068 uint64_t data, unsigned size, 2069 MemTxAttrs attrs) 2070 { 2071 riscv_iommu_process_fn *process_fn = NULL; 2072 RISCVIOMMUState *s = opaque; 2073 uint32_t regb = addr & ~3; 2074 uint32_t busy = 0; 2075 uint64_t val = 0; 2076 bool cy_inh = false; 2077 2078 if ((addr & (size - 1)) != 0) { 2079 /* Unsupported MMIO alignment or access size */ 2080 return MEMTX_ERROR; 2081 } 2082 2083 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2084 /* Unsupported MMIO access location. */ 2085 return MEMTX_ACCESS_ERROR; 2086 } 2087 2088 /* Track actionable MMIO write. */ 2089 switch (regb) { 2090 case RISCV_IOMMU_REG_DDTP: 2091 case RISCV_IOMMU_REG_DDTP + 4: 2092 process_fn = riscv_iommu_process_ddtp; 2093 regb = RISCV_IOMMU_REG_DDTP; 2094 busy = RISCV_IOMMU_DDTP_BUSY; 2095 break; 2096 2097 case RISCV_IOMMU_REG_CQT: 2098 process_fn = riscv_iommu_process_cq_tail; 2099 break; 2100 2101 case RISCV_IOMMU_REG_CQCSR: 2102 process_fn = riscv_iommu_process_cq_control; 2103 busy = RISCV_IOMMU_CQCSR_BUSY; 2104 break; 2105 2106 case RISCV_IOMMU_REG_FQCSR: 2107 process_fn = riscv_iommu_process_fq_control; 2108 busy = RISCV_IOMMU_FQCSR_BUSY; 2109 break; 2110 2111 case RISCV_IOMMU_REG_PQCSR: 2112 process_fn = riscv_iommu_process_pq_control; 2113 busy = RISCV_IOMMU_PQCSR_BUSY; 2114 break; 2115 2116 case RISCV_IOMMU_REG_ICVEC: 2117 case RISCV_IOMMU_REG_IPSR: 2118 /* 2119 * ICVEC and IPSR have special read/write procedures. We'll 2120 * call their respective helpers and exit. 2121 */ 2122 riscv_iommu_write_reg_val(s, &val, addr, size, data); 2123 2124 /* 2125 * 'val' is stored as LE. Switch to host endianess 2126 * before using it. 2127 */ 2128 val = le64_to_cpu(val); 2129 2130 if (regb == RISCV_IOMMU_REG_ICVEC) { 2131 riscv_iommu_update_icvec(s, val); 2132 } else { 2133 riscv_iommu_update_ipsr(s, val); 2134 } 2135 2136 return MEMTX_OK; 2137 2138 case RISCV_IOMMU_REG_TR_REQ_CTL: 2139 process_fn = riscv_iommu_process_dbg; 2140 regb = RISCV_IOMMU_REG_TR_REQ_CTL; 2141 busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; 2142 break; 2143 2144 case RISCV_IOMMU_REG_IOCOUNTINH: 2145 if (addr != RISCV_IOMMU_REG_IOCOUNTINH) { 2146 break; 2147 } 2148 /* Store previous value of CY bit. */ 2149 cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) & 2150 RISCV_IOMMU_IOCOUNTINH_CY); 2151 break; 2152 2153 2154 default: 2155 break; 2156 } 2157 2158 /* 2159 * Registers update might be not synchronized with core logic. 2160 * If system software updates register when relevant BUSY bit 2161 * is set IOMMU behavior of additional writes to the register 2162 * is UNSPECIFIED. 2163 */ 2164 riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); 2165 2166 /* Busy flag update, MSB 4-byte register. */ 2167 if (busy) { 2168 uint32_t rw = ldl_le_p(&s->regs_rw[regb]); 2169 stl_le_p(&s->regs_rw[regb], rw | busy); 2170 } 2171 2172 /* Process HPM writes and update any internal state if needed. */ 2173 if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF && 2174 regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) { 2175 riscv_iommu_process_hpm_writes(s, regb, cy_inh); 2176 } 2177 2178 if (process_fn) { 2179 process_fn(s); 2180 } 2181 2182 return MEMTX_OK; 2183 } 2184 2185 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, 2186 uint64_t *data, unsigned size, MemTxAttrs attrs) 2187 { 2188 RISCVIOMMUState *s = opaque; 2189 uint64_t val = -1; 2190 uint8_t *ptr; 2191 2192 if ((addr & (size - 1)) != 0) { 2193 /* Unsupported MMIO alignment. */ 2194 return MEMTX_ERROR; 2195 } 2196 2197 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2198 return MEMTX_ACCESS_ERROR; 2199 } 2200 2201 /* Compute cycle register value. */ 2202 if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) { 2203 val = riscv_iommu_hpmcycle_read(s); 2204 ptr = (uint8_t *)&val + (addr & 7); 2205 } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) { 2206 /* 2207 * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer 2208 * callback completes. In which case CY_OF bit in 2209 * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the 2210 * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as 2211 * it's not dependent over the timer callback and is computed 2212 * from cycle overflow. 2213 */ 2214 val = ldq_le_p(&s->regs_rw[addr]); 2215 val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF) 2216 ? RISCV_IOMMU_IOCOUNTOVF_CY 2217 : 0; 2218 ptr = (uint8_t *)&val + (addr & 3); 2219 } else { 2220 ptr = &s->regs_rw[addr]; 2221 } 2222 2223 val = ldn_le_p(ptr, size); 2224 2225 *data = val; 2226 2227 return MEMTX_OK; 2228 } 2229 2230 static const MemoryRegionOps riscv_iommu_mmio_ops = { 2231 .read_with_attrs = riscv_iommu_mmio_read, 2232 .write_with_attrs = riscv_iommu_mmio_write, 2233 .endianness = DEVICE_NATIVE_ENDIAN, 2234 .impl = { 2235 .min_access_size = 4, 2236 .max_access_size = 8, 2237 .unaligned = false, 2238 }, 2239 .valid = { 2240 .min_access_size = 4, 2241 .max_access_size = 8, 2242 } 2243 }; 2244 2245 /* 2246 * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" 2247 * memory region as untranslated address, for additional MSI/MRIF interception 2248 * by IOMMU interrupt remapping implementation. 2249 * Note: Device emulation code generating an MSI is expected to provide a valid 2250 * memory transaction attributes with requested_id set. 2251 */ 2252 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, 2253 uint64_t data, unsigned size, MemTxAttrs attrs) 2254 { 2255 RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; 2256 RISCVIOMMUContext *ctx; 2257 MemTxResult res; 2258 void *ref; 2259 uint32_t devid = attrs.requester_id; 2260 2261 if (attrs.unspecified) { 2262 return MEMTX_ACCESS_ERROR; 2263 } 2264 2265 /* FIXME: PCIe bus remapping for attached endpoints. */ 2266 devid |= s->bus << 8; 2267 2268 ctx = riscv_iommu_ctx(s, devid, 0, &ref); 2269 if (ctx == NULL) { 2270 res = MEMTX_ACCESS_ERROR; 2271 } else { 2272 res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); 2273 } 2274 riscv_iommu_ctx_put(s, ref); 2275 return res; 2276 } 2277 2278 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, 2279 uint64_t *data, unsigned size, MemTxAttrs attrs) 2280 { 2281 return MEMTX_ACCESS_ERROR; 2282 } 2283 2284 static const MemoryRegionOps riscv_iommu_trap_ops = { 2285 .read_with_attrs = riscv_iommu_trap_read, 2286 .write_with_attrs = riscv_iommu_trap_write, 2287 .endianness = DEVICE_LITTLE_ENDIAN, 2288 .impl = { 2289 .min_access_size = 4, 2290 .max_access_size = 8, 2291 .unaligned = true, 2292 }, 2293 .valid = { 2294 .min_access_size = 4, 2295 .max_access_size = 8, 2296 } 2297 }; 2298 2299 void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode) 2300 { 2301 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode); 2302 } 2303 2304 static void riscv_iommu_instance_init(Object *obj) 2305 { 2306 RISCVIOMMUState *s = RISCV_IOMMU(obj); 2307 2308 /* Enable translation debug interface */ 2309 s->cap = RISCV_IOMMU_CAP_DBG; 2310 2311 /* Report QEMU target physical address space limits */ 2312 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, 2313 TARGET_PHYS_ADDR_SPACE_BITS); 2314 2315 /* TODO: method to report supported PID bits */ 2316 s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ 2317 s->cap |= RISCV_IOMMU_CAP_PD8; 2318 2319 /* register storage */ 2320 s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2321 s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2322 s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2323 2324 /* Mark all registers read-only */ 2325 memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); 2326 2327 /* Device translation context cache */ 2328 s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 2329 riscv_iommu_ctx_equal, 2330 g_free, NULL); 2331 2332 s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 2333 riscv_iommu_iot_equal, 2334 g_free, NULL); 2335 2336 s->iommus.le_next = NULL; 2337 s->iommus.le_prev = NULL; 2338 QLIST_INIT(&s->spaces); 2339 } 2340 2341 static void riscv_iommu_realize(DeviceState *dev, Error **errp) 2342 { 2343 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2344 2345 s->cap |= s->version & RISCV_IOMMU_CAP_VERSION; 2346 if (s->enable_msi) { 2347 s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; 2348 } 2349 if (s->enable_ats) { 2350 s->cap |= RISCV_IOMMU_CAP_ATS; 2351 } 2352 if (s->enable_s_stage) { 2353 s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | 2354 RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; 2355 } 2356 if (s->enable_g_stage) { 2357 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | 2358 RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; 2359 } 2360 2361 if (s->hpm_cntrs > 0) { 2362 /* Clip number of HPM counters to maximum supported (31). */ 2363 if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) { 2364 s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM; 2365 } 2366 /* Enable hardware performance monitor interface */ 2367 s->cap |= RISCV_IOMMU_CAP_HPM; 2368 } 2369 2370 /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ 2371 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? 2372 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); 2373 2374 /* 2375 * Register complete MMIO space, including MSI/PBA registers. 2376 * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, 2377 * managed directly by the PCIDevice implementation. 2378 */ 2379 memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, 2380 "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); 2381 2382 /* Set power-on register state */ 2383 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); 2384 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); 2385 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], 2386 ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); 2387 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], 2388 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); 2389 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], 2390 ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); 2391 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], 2392 ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); 2393 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], 2394 ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); 2395 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | 2396 RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); 2397 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | 2398 RISCV_IOMMU_CQCSR_BUSY); 2399 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | 2400 RISCV_IOMMU_FQCSR_FQOF); 2401 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | 2402 RISCV_IOMMU_FQCSR_BUSY); 2403 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | 2404 RISCV_IOMMU_PQCSR_PQOF); 2405 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | 2406 RISCV_IOMMU_PQCSR_BUSY); 2407 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); 2408 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); 2409 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); 2410 /* If debug registers enabled. */ 2411 if (s->cap & RISCV_IOMMU_CAP_DBG) { 2412 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0); 2413 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL], 2414 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2415 } 2416 2417 /* If HPM registers are enabled. */ 2418 if (s->cap & RISCV_IOMMU_CAP_HPM) { 2419 /* +1 for cycle counter bit. */ 2420 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH], 2421 ~((2 << s->hpm_cntrs) - 1)); 2422 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0); 2423 memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE], 2424 0x00, s->hpm_cntrs * 8); 2425 memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE], 2426 0x00, s->hpm_cntrs * 8); 2427 } 2428 2429 /* Memory region for downstream access, if specified. */ 2430 if (s->target_mr) { 2431 s->target_as = g_new0(AddressSpace, 1); 2432 address_space_init(s->target_as, s->target_mr, 2433 "riscv-iommu-downstream"); 2434 } else { 2435 /* Fallback to global system memory. */ 2436 s->target_as = &address_space_memory; 2437 } 2438 2439 /* Memory region for untranslated MRIF/MSI writes */ 2440 memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, 2441 "riscv-iommu-trap", ~0ULL); 2442 address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); 2443 2444 if (s->cap & RISCV_IOMMU_CAP_HPM) { 2445 s->hpm_timer = 2446 timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s); 2447 s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal); 2448 } 2449 } 2450 2451 static void riscv_iommu_unrealize(DeviceState *dev) 2452 { 2453 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2454 2455 g_hash_table_unref(s->iot_cache); 2456 g_hash_table_unref(s->ctx_cache); 2457 2458 if (s->cap & RISCV_IOMMU_CAP_HPM) { 2459 g_hash_table_unref(s->hpm_event_ctr_map); 2460 timer_free(s->hpm_timer); 2461 } 2462 } 2463 2464 void riscv_iommu_reset(RISCVIOMMUState *s) 2465 { 2466 uint32_t reg_clr; 2467 int ddtp_mode; 2468 2469 /* 2470 * Clear DDTP while setting DDTP_mode back to user 2471 * initial setting. 2472 */ 2473 ddtp_mode = s->enable_off ? 2474 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE; 2475 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode); 2476 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp); 2477 2478 reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE | 2479 RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY; 2480 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr); 2481 2482 reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE | 2483 RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY; 2484 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr); 2485 2486 reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE | 2487 RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY; 2488 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr); 2489 2490 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 2491 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2492 2493 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0); 2494 2495 g_hash_table_remove_all(s->ctx_cache); 2496 g_hash_table_remove_all(s->iot_cache); 2497 } 2498 2499 static const Property riscv_iommu_properties[] = { 2500 DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, 2501 RISCV_IOMMU_SPEC_DOT_VER), 2502 DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), 2503 DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit, 2504 LIMIT_CACHE_IOT), 2505 DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), 2506 DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE), 2507 DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), 2508 DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), 2509 DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), 2510 DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, 2511 TYPE_MEMORY_REGION, MemoryRegion *), 2512 DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs, 2513 RISCV_IOMMU_IOCOUNT_NUM), 2514 }; 2515 2516 static void riscv_iommu_class_init(ObjectClass *klass, const void *data) 2517 { 2518 DeviceClass *dc = DEVICE_CLASS(klass); 2519 2520 /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ 2521 dc->user_creatable = false; 2522 dc->realize = riscv_iommu_realize; 2523 dc->unrealize = riscv_iommu_unrealize; 2524 device_class_set_props(dc, riscv_iommu_properties); 2525 } 2526 2527 static const TypeInfo riscv_iommu_info = { 2528 .name = TYPE_RISCV_IOMMU, 2529 .parent = TYPE_DEVICE, 2530 .instance_size = sizeof(RISCVIOMMUState), 2531 .instance_init = riscv_iommu_instance_init, 2532 .class_init = riscv_iommu_class_init, 2533 }; 2534 2535 static const char *IOMMU_FLAG_STR[] = { 2536 "NA", 2537 "RO", 2538 "WR", 2539 "RW", 2540 }; 2541 2542 /* RISC-V IOMMU Memory Region - Address Translation Space */ 2543 static IOMMUTLBEntry riscv_iommu_memory_region_translate( 2544 IOMMUMemoryRegion *iommu_mr, hwaddr addr, 2545 IOMMUAccessFlags flag, int iommu_idx) 2546 { 2547 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2548 RISCVIOMMUContext *ctx; 2549 void *ref; 2550 IOMMUTLBEntry iotlb = { 2551 .iova = addr, 2552 .target_as = as->iommu->target_as, 2553 .addr_mask = ~0ULL, 2554 .perm = flag, 2555 }; 2556 2557 ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); 2558 if (ctx == NULL) { 2559 /* Translation disabled or invalid. */ 2560 iotlb.addr_mask = 0; 2561 iotlb.perm = IOMMU_NONE; 2562 } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) { 2563 /* Translation disabled or fault reported. */ 2564 iotlb.addr_mask = 0; 2565 iotlb.perm = IOMMU_NONE; 2566 } 2567 2568 /* Trace all dma translations with original access flags. */ 2569 trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), 2570 PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, 2571 IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, 2572 iotlb.translated_addr); 2573 2574 riscv_iommu_ctx_put(as->iommu, ref); 2575 2576 return iotlb; 2577 } 2578 2579 static int riscv_iommu_memory_region_notify( 2580 IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, 2581 IOMMUNotifierFlag new, Error **errp) 2582 { 2583 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2584 2585 if (old == IOMMU_NOTIFIER_NONE) { 2586 as->notifier = true; 2587 trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); 2588 } else if (new == IOMMU_NOTIFIER_NONE) { 2589 as->notifier = false; 2590 trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); 2591 } 2592 2593 return 0; 2594 } 2595 2596 static inline bool pci_is_iommu(PCIDevice *pdev) 2597 { 2598 return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; 2599 } 2600 2601 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) 2602 { 2603 RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; 2604 PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); 2605 AddressSpace *as = NULL; 2606 2607 if (pdev && pci_is_iommu(pdev)) { 2608 return s->target_as; 2609 } 2610 2611 /* Find first registered IOMMU device */ 2612 while (s->iommus.le_prev) { 2613 s = *(s->iommus.le_prev); 2614 } 2615 2616 /* Find first matching IOMMU */ 2617 while (s != NULL && as == NULL) { 2618 as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); 2619 s = s->iommus.le_next; 2620 } 2621 2622 return as ? as : &address_space_memory; 2623 } 2624 2625 static const PCIIOMMUOps riscv_iommu_ops = { 2626 .get_address_space = riscv_iommu_find_as, 2627 }; 2628 2629 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, 2630 Error **errp) 2631 { 2632 if (bus->iommu_ops && 2633 bus->iommu_ops->get_address_space == riscv_iommu_find_as) { 2634 /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ 2635 RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; 2636 QLIST_INSERT_AFTER(last, iommu, iommus); 2637 } else if (!bus->iommu_ops && !bus->iommu_opaque) { 2638 pci_setup_iommu(bus, &riscv_iommu_ops, iommu); 2639 } else { 2640 error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", 2641 pci_bus_num(bus)); 2642 } 2643 } 2644 2645 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, 2646 MemTxAttrs attrs) 2647 { 2648 return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; 2649 } 2650 2651 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) 2652 { 2653 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2654 return 1 << as->iommu->pid_bits; 2655 } 2656 2657 static void riscv_iommu_memory_region_init(ObjectClass *klass, const void *data) 2658 { 2659 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 2660 2661 imrc->translate = riscv_iommu_memory_region_translate; 2662 imrc->notify_flag_changed = riscv_iommu_memory_region_notify; 2663 imrc->attrs_to_index = riscv_iommu_memory_region_index; 2664 imrc->num_indexes = riscv_iommu_memory_region_index_len; 2665 } 2666 2667 static const TypeInfo riscv_iommu_memory_region_info = { 2668 .parent = TYPE_IOMMU_MEMORY_REGION, 2669 .name = TYPE_RISCV_IOMMU_MEMORY_REGION, 2670 .class_init = riscv_iommu_memory_region_init, 2671 }; 2672 2673 static void riscv_iommu_register_mr_types(void) 2674 { 2675 type_register_static(&riscv_iommu_memory_region_info); 2676 type_register_static(&riscv_iommu_info); 2677 } 2678 2679 type_init(riscv_iommu_register_mr_types); 2680