1 /* 2 * QEMU emulation of an RISC-V IOMMU 3 * 4 * Copyright (C) 2021-2023, Rivos Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qom/object.h" 21 #include "hw/pci/pci_bus.h" 22 #include "hw/pci/pci_device.h" 23 #include "hw/qdev-properties.h" 24 #include "hw/riscv/riscv_hart.h" 25 #include "migration/vmstate.h" 26 #include "qapi/error.h" 27 #include "qemu/timer.h" 28 29 #include "cpu_bits.h" 30 #include "riscv-iommu.h" 31 #include "riscv-iommu-bits.h" 32 #include "riscv-iommu-hpm.h" 33 #include "trace.h" 34 35 #define LIMIT_CACHE_CTX (1U << 7) 36 #define LIMIT_CACHE_IOT (1U << 20) 37 38 /* Physical page number coversions */ 39 #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) 40 #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) 41 42 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; 43 44 /* Device assigned I/O address space */ 45 struct RISCVIOMMUSpace { 46 IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ 47 AddressSpace iova_as; /* IOVA address space for attached device */ 48 RISCVIOMMUState *iommu; /* Managing IOMMU device state */ 49 uint32_t devid; /* Requester identifier, AKA device_id */ 50 bool notifier; /* IOMMU unmap notifier enabled */ 51 QLIST_ENTRY(RISCVIOMMUSpace) list; 52 }; 53 54 typedef enum RISCVIOMMUTransTag { 55 RISCV_IOMMU_TRANS_TAG_BY, /* Bypass */ 56 RISCV_IOMMU_TRANS_TAG_SS, /* Single Stage */ 57 RISCV_IOMMU_TRANS_TAG_VG, /* G-stage only */ 58 RISCV_IOMMU_TRANS_TAG_VN, /* Nested translation */ 59 } RISCVIOMMUTransTag; 60 61 /* Address translation cache entry */ 62 struct RISCVIOMMUEntry { 63 RISCVIOMMUTransTag tag; /* Translation Tag */ 64 uint64_t iova:44; /* IOVA Page Number */ 65 uint64_t pscid:20; /* Process Soft-Context identifier */ 66 uint64_t phys:44; /* Physical Page Number */ 67 uint64_t gscid:16; /* Guest Soft-Context identifier */ 68 uint64_t perm:2; /* IOMMU_RW flags */ 69 }; 70 71 /* IOMMU index for transactions without process_id specified. */ 72 #define RISCV_IOMMU_NOPROCID 0 73 74 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) 75 { 76 switch (vec_type) { 77 case RISCV_IOMMU_INTR_CQ: 78 return icvec & RISCV_IOMMU_ICVEC_CIV; 79 case RISCV_IOMMU_INTR_FQ: 80 return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; 81 case RISCV_IOMMU_INTR_PM: 82 return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; 83 case RISCV_IOMMU_INTR_PQ: 84 return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; 85 default: 86 g_assert_not_reached(); 87 } 88 } 89 90 void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) 91 { 92 uint32_t ipsr, icvec, vector; 93 94 if (!s->notify) { 95 return; 96 } 97 98 icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); 99 ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); 100 101 if (!(ipsr & (1 << vec_type))) { 102 vector = riscv_iommu_get_icvec_vector(icvec, vec_type); 103 s->notify(s, vector); 104 trace_riscv_iommu_notify_int_vector(vec_type, vector); 105 } 106 } 107 108 static void riscv_iommu_fault(RISCVIOMMUState *s, 109 struct riscv_iommu_fq_record *ev) 110 { 111 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 112 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; 113 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; 114 uint32_t next = (tail + 1) & s->fq_mask; 115 uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); 116 117 trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 118 PCI_FUNC(devid), ev->hdr, ev->iotval); 119 120 if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || 121 !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { 122 return; 123 } 124 125 if (head == next) { 126 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 127 RISCV_IOMMU_FQCSR_FQOF, 0); 128 } else { 129 dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); 130 if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), 131 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 132 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 133 RISCV_IOMMU_FQCSR_FQMF, 0); 134 } else { 135 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); 136 } 137 } 138 139 if (ctrl & RISCV_IOMMU_FQCSR_FIE) { 140 riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); 141 } 142 } 143 144 static void riscv_iommu_pri(RISCVIOMMUState *s, 145 struct riscv_iommu_pq_record *pr) 146 { 147 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 148 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; 149 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; 150 uint32_t next = (tail + 1) & s->pq_mask; 151 uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); 152 153 trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 154 PCI_FUNC(devid), pr->payload); 155 156 if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || 157 !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { 158 return; 159 } 160 161 if (head == next) { 162 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 163 RISCV_IOMMU_PQCSR_PQOF, 0); 164 } else { 165 dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); 166 if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), 167 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 168 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 169 RISCV_IOMMU_PQCSR_PQMF, 0); 170 } else { 171 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); 172 } 173 } 174 175 if (ctrl & RISCV_IOMMU_PQCSR_PIE) { 176 riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); 177 } 178 } 179 180 /* 181 * Discards all bits from 'val' whose matching bits in the same 182 * positions in the mask 'ext' are zeros, and packs the remaining 183 * bits from 'val' contiguously at the least-significant end of the 184 * result, keeping the same bit order as 'val' and filling any 185 * other bits at the most-significant end of the result with zeros. 186 * 187 * For example, for the following 'val' and 'ext', the return 'ret' 188 * will be: 189 * 190 * val = a b c d e f g h 191 * ext = 1 0 1 0 0 1 1 0 192 * ret = 0 0 0 0 a c f g 193 * 194 * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3 195 * "Process to translate addresses of MSIs", is similar to bit manip 196 * function PEXT (Parallel bits extract) from x86. 197 */ 198 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext) 199 { 200 uint64_t ret = 0; 201 uint64_t rot = 1; 202 203 while (ext) { 204 if (ext & 1) { 205 if (val & 1) { 206 ret |= rot; 207 } 208 rot <<= 1; 209 } 210 val >>= 1; 211 ext >>= 1; 212 } 213 214 return ret; 215 } 216 217 /* Check if GPA matches MSI/MRIF pattern. */ 218 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 219 dma_addr_t gpa) 220 { 221 if (!s->enable_msi) { 222 return false; 223 } 224 225 if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != 226 RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 227 return false; /* Invalid MSI/MRIF mode */ 228 } 229 230 if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { 231 return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ 232 } 233 234 return true; 235 } 236 237 /* 238 * RISCV IOMMU Address Translation Lookup - Page Table Walk 239 * 240 * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c 241 * Both implementation can be merged into single helper function in future. 242 * Keeping them separate for now, as error reporting and flow specifics are 243 * sufficiently different for separate implementation. 244 * 245 * @s : IOMMU Device State 246 * @ctx : Translation context for device id and process address space id. 247 * @iotlb : translation data: physical address and access mode. 248 * @return : success or fault cause code. 249 */ 250 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 251 IOMMUTLBEntry *iotlb) 252 { 253 dma_addr_t addr, base; 254 uint64_t satp, gatp, pte; 255 bool en_s, en_g; 256 struct { 257 unsigned char step; 258 unsigned char levels; 259 unsigned char ptidxbits; 260 unsigned char ptesize; 261 } sc[2]; 262 /* Translation stage phase */ 263 enum { 264 S_STAGE = 0, 265 G_STAGE = 1, 266 } pass; 267 MemTxResult ret; 268 269 satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 270 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 271 272 en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; 273 en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; 274 275 /* 276 * Early check for MSI address match when IOVA == GPA. 277 * Note that the (!en_s) condition means that the MSI 278 * page table may only be used when guest pages are 279 * mapped using the g-stage page table, whether single- 280 * or two-stage paging is enabled. It's unavoidable though, 281 * because the spec mandates that we do a first-stage 282 * translation before we check the MSI page table, which 283 * means we can't do an early MSI check unless we have 284 * strictly !en_s. 285 */ 286 if (!en_s && (iotlb->perm & IOMMU_WO) && 287 riscv_iommu_msi_check(s, ctx, iotlb->iova)) { 288 iotlb->target_as = &s->trap_as; 289 iotlb->translated_addr = iotlb->iova; 290 iotlb->addr_mask = ~TARGET_PAGE_MASK; 291 return 0; 292 } 293 294 /* Exit early for pass-through mode. */ 295 if (!(en_s || en_g)) { 296 iotlb->translated_addr = iotlb->iova; 297 iotlb->addr_mask = ~TARGET_PAGE_MASK; 298 /* Allow R/W in pass-through mode */ 299 iotlb->perm = IOMMU_RW; 300 return 0; 301 } 302 303 /* S/G translation parameters. */ 304 for (pass = 0; pass < 2; pass++) { 305 uint32_t sv_mode; 306 307 sc[pass].step = 0; 308 if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : 309 (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { 310 /* 32bit mode for GXL/SXL == 1 */ 311 switch (pass ? gatp : satp) { 312 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 313 sc[pass].levels = 0; 314 sc[pass].ptidxbits = 0; 315 sc[pass].ptesize = 0; 316 break; 317 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: 318 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; 319 if (!(s->cap & sv_mode)) { 320 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 321 } 322 sc[pass].levels = 2; 323 sc[pass].ptidxbits = 10; 324 sc[pass].ptesize = 4; 325 break; 326 default: 327 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 328 } 329 } else { 330 /* 64bit mode for GXL/SXL == 0 */ 331 switch (pass ? gatp : satp) { 332 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 333 sc[pass].levels = 0; 334 sc[pass].ptidxbits = 0; 335 sc[pass].ptesize = 0; 336 break; 337 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: 338 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; 339 if (!(s->cap & sv_mode)) { 340 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 341 } 342 sc[pass].levels = 3; 343 sc[pass].ptidxbits = 9; 344 sc[pass].ptesize = 8; 345 break; 346 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: 347 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; 348 if (!(s->cap & sv_mode)) { 349 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 350 } 351 sc[pass].levels = 4; 352 sc[pass].ptidxbits = 9; 353 sc[pass].ptesize = 8; 354 break; 355 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: 356 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; 357 if (!(s->cap & sv_mode)) { 358 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 359 } 360 sc[pass].levels = 5; 361 sc[pass].ptidxbits = 9; 362 sc[pass].ptesize = 8; 363 break; 364 default: 365 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 366 } 367 } 368 }; 369 370 /* S/G stages translation tables root pointers */ 371 gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); 372 satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); 373 addr = (en_s && en_g) ? satp : iotlb->iova; 374 base = en_g ? gatp : satp; 375 pass = en_g ? G_STAGE : S_STAGE; 376 377 do { 378 const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; 379 const unsigned va_bits = widened + sc[pass].ptidxbits; 380 const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * 381 (sc[pass].levels - 1 - sc[pass].step); 382 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); 383 const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; 384 const bool ade = 385 ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); 386 387 /* Address range check before first level lookup */ 388 if (!sc[pass].step) { 389 const uint64_t va_len = va_skip + va_bits; 390 const uint64_t va_mask = (1ULL << va_len) - 1; 391 392 if (pass == S_STAGE && va_len > 32) { 393 target_ulong mask, masked_msbs; 394 395 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1; 396 masked_msbs = (addr >> (va_len - 1)) & mask; 397 398 if (masked_msbs != 0 && masked_msbs != mask) { 399 return (iotlb->perm & IOMMU_WO) ? 400 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S : 401 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S; 402 } 403 } else { 404 if ((addr & va_mask) != addr) { 405 return (iotlb->perm & IOMMU_WO) ? 406 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 407 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS; 408 } 409 } 410 } 411 412 413 if (pass == S_STAGE) { 414 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS); 415 } else { 416 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS); 417 } 418 419 /* Read page table entry */ 420 if (sc[pass].ptesize == 4) { 421 uint32_t pte32 = 0; 422 ret = ldl_le_dma(s->target_as, pte_addr, &pte32, 423 MEMTXATTRS_UNSPECIFIED); 424 pte = pte32; 425 } else { 426 ret = ldq_le_dma(s->target_as, pte_addr, &pte, 427 MEMTXATTRS_UNSPECIFIED); 428 } 429 if (ret != MEMTX_OK) { 430 return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT 431 : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; 432 } 433 434 sc[pass].step++; 435 hwaddr ppn = pte >> PTE_PPN_SHIFT; 436 437 if (!(pte & PTE_V)) { 438 break; /* Invalid PTE */ 439 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { 440 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ 441 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { 442 break; /* Reserved leaf PTE flags: PTE_W */ 443 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { 444 break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ 445 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { 446 break; /* Misaligned PPN */ 447 } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { 448 break; /* Read access check failed */ 449 } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { 450 break; /* Write access check failed */ 451 } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { 452 break; /* Access bit not set */ 453 } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { 454 break; /* Dirty bit not set */ 455 } else { 456 /* Leaf PTE, translation completed. */ 457 sc[pass].step = sc[pass].levels; 458 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); 459 /* Update address mask based on smallest translation granularity */ 460 iotlb->addr_mask &= (1ULL << va_skip) - 1; 461 /* Continue with S-Stage translation? */ 462 if (pass && sc[0].step != sc[0].levels) { 463 pass = S_STAGE; 464 addr = iotlb->iova; 465 continue; 466 } 467 /* Translation phase completed (GPA or SPA) */ 468 iotlb->translated_addr = base; 469 iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) 470 : IOMMU_RO; 471 472 /* Check MSI GPA address match */ 473 if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && 474 riscv_iommu_msi_check(s, ctx, base)) { 475 /* Trap MSI writes and return GPA address. */ 476 iotlb->target_as = &s->trap_as; 477 iotlb->addr_mask = ~TARGET_PAGE_MASK; 478 return 0; 479 } 480 481 /* Continue with G-Stage translation? */ 482 if (!pass && en_g) { 483 pass = G_STAGE; 484 addr = base; 485 base = gatp; 486 sc[pass].step = 0; 487 continue; 488 } 489 490 return 0; 491 } 492 493 if (sc[pass].step == sc[pass].levels) { 494 break; /* Can't find leaf PTE */ 495 } 496 497 /* Continue with G-Stage translation? */ 498 if (!pass && en_g) { 499 pass = G_STAGE; 500 addr = base; 501 base = gatp; 502 sc[pass].step = 0; 503 } 504 } while (1); 505 506 return (iotlb->perm & IOMMU_WO) ? 507 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 508 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : 509 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : 510 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); 511 } 512 513 static void riscv_iommu_report_fault(RISCVIOMMUState *s, 514 RISCVIOMMUContext *ctx, 515 uint32_t fault_type, uint32_t cause, 516 bool pv, 517 uint64_t iotval, uint64_t iotval2) 518 { 519 struct riscv_iommu_fq_record ev = { 0 }; 520 521 if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { 522 switch (cause) { 523 case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: 524 case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: 525 case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: 526 case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: 527 case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: 528 case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: 529 case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: 530 break; 531 default: 532 /* DTF prevents reporting a fault for this given cause */ 533 return; 534 } 535 } 536 537 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); 538 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); 539 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); 540 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); 541 542 if (pv) { 543 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); 544 } 545 546 ev.iotval = iotval; 547 ev.iotval2 = iotval2; 548 549 riscv_iommu_fault(s, &ev); 550 } 551 552 /* Redirect MSI write for given GPA. */ 553 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, 554 RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, 555 unsigned size, MemTxAttrs attrs) 556 { 557 MemTxResult res; 558 dma_addr_t addr; 559 uint64_t intn; 560 uint32_t n190; 561 uint64_t pte[2]; 562 int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 563 int cause; 564 565 /* Interrupt File Number */ 566 intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); 567 if (intn >= 256) { 568 /* Interrupt file number out of range */ 569 res = MEMTX_ACCESS_ERROR; 570 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 571 goto err; 572 } 573 574 /* fetch MSI PTE */ 575 addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); 576 addr = addr | (intn * sizeof(pte)); 577 res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), 578 MEMTXATTRS_UNSPECIFIED); 579 if (res != MEMTX_OK) { 580 if (res == MEMTX_DECODE_ERROR) { 581 cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; 582 } else { 583 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 584 } 585 goto err; 586 } 587 588 le64_to_cpus(&pte[0]); 589 le64_to_cpus(&pte[1]); 590 591 if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { 592 /* 593 * The spec mentions that: "If msipte.C == 1, then further 594 * processing to interpret the PTE is implementation 595 * defined.". We'll abort with cause = 262 for this 596 * case too. 597 */ 598 res = MEMTX_ACCESS_ERROR; 599 cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; 600 goto err; 601 } 602 603 switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { 604 case RISCV_IOMMU_MSI_PTE_M_BASIC: 605 /* MSI Pass-through mode */ 606 addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); 607 608 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 609 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 610 gpa, addr); 611 612 res = dma_memory_write(s->target_as, addr, &data, size, attrs); 613 if (res != MEMTX_OK) { 614 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 615 goto err; 616 } 617 618 return MEMTX_OK; 619 case RISCV_IOMMU_MSI_PTE_M_MRIF: 620 /* MRIF mode, continue. */ 621 break; 622 default: 623 res = MEMTX_ACCESS_ERROR; 624 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 625 goto err; 626 } 627 628 /* 629 * Report an error for interrupt identities exceeding the maximum allowed 630 * for an IMSIC interrupt file (2047) or destination address is not 32-bit 631 * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. 632 */ 633 if ((data > 2047) || (gpa & 3)) { 634 res = MEMTX_ACCESS_ERROR; 635 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 636 goto err; 637 } 638 639 /* MSI MRIF mode, non atomic pending bit update */ 640 641 /* MRIF pending bit address */ 642 addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; 643 addr = addr | ((data & 0x7c0) >> 3); 644 645 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 646 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 647 gpa, addr); 648 649 /* MRIF pending bit mask */ 650 data = 1ULL << (data & 0x03f); 651 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 652 if (res != MEMTX_OK) { 653 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 654 goto err; 655 } 656 657 intn = intn | data; 658 res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); 659 if (res != MEMTX_OK) { 660 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 661 goto err; 662 } 663 664 /* Get MRIF enable bits */ 665 addr = addr + sizeof(intn); 666 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 667 if (res != MEMTX_OK) { 668 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 669 goto err; 670 } 671 672 if (!(intn & data)) { 673 /* notification disabled, MRIF update completed. */ 674 return MEMTX_OK; 675 } 676 677 /* Send notification message */ 678 addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); 679 n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | 680 (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); 681 682 res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); 683 if (res != MEMTX_OK) { 684 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 685 goto err; 686 } 687 688 trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); 689 690 return MEMTX_OK; 691 692 err: 693 riscv_iommu_report_fault(s, ctx, fault_type, cause, 694 !!ctx->process_id, 0, 0); 695 return res; 696 } 697 698 /* 699 * Check device context configuration as described by the 700 * riscv-iommu spec section "Device-context configuration 701 * checks". 702 */ 703 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, 704 RISCVIOMMUContext *ctx) 705 { 706 uint32_t fsc_mode, msi_mode; 707 uint64_t gatp; 708 709 if (!(s->cap & RISCV_IOMMU_CAP_ATS) && 710 (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS || 711 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI || 712 ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) { 713 return false; 714 } 715 716 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) && 717 (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA || 718 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) { 719 return false; 720 } 721 722 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && 723 ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { 724 return false; 725 } 726 727 if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && 728 ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { 729 return false; 730 } 731 732 if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { 733 msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); 734 735 if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && 736 msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 737 return false; 738 } 739 } 740 741 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 742 if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA && 743 gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { 744 return false; 745 } 746 747 fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 748 749 if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { 750 switch (fsc_mode) { 751 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 752 if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { 753 return false; 754 } 755 break; 756 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 757 if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { 758 return false; 759 } 760 break; 761 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 762 if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { 763 return false; 764 } 765 break; 766 } 767 } else { 768 /* DC.tc.PDTV is 0 */ 769 if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { 770 return false; 771 } 772 773 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 774 if (fsc_mode == RISCV_IOMMU_CAP_SV32 && 775 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 776 return false; 777 } 778 } else { 779 switch (fsc_mode) { 780 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 781 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 782 return false; 783 } 784 break; 785 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 786 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 787 return false; 788 } 789 break; 790 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 791 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 792 return false; 793 } 794 break; 795 } 796 } 797 } 798 799 /* 800 * CAP_END is always zero (only one endianess). FCTL_BE is 801 * always zero (little-endian accesses). Thus TC_SBE must 802 * always be LE, i.e. zero. 803 */ 804 if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { 805 return false; 806 } 807 808 return true; 809 } 810 811 /* 812 * Validate process context (PC) according to section 813 * "Process-context configuration checks". 814 */ 815 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, 816 RISCVIOMMUContext *ctx) 817 { 818 uint32_t mode; 819 820 if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { 821 return false; 822 } 823 824 if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { 825 return false; 826 } 827 828 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 829 switch (mode) { 830 case RISCV_IOMMU_DC_FSC_MODE_BARE: 831 /* sv39 and sv32 modes have the same value (8) */ 832 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 833 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 834 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 835 break; 836 default: 837 return false; 838 } 839 840 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 841 if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 && 842 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 843 return false; 844 } 845 } else { 846 switch (mode) { 847 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 848 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 849 return false; 850 } 851 break; 852 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 853 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 854 return false; 855 } 856 break; 857 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 858 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 859 return false; 860 } 861 break; 862 } 863 } 864 865 return true; 866 } 867 868 /* 869 * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk 870 * 871 * @s : IOMMU Device State 872 * @ctx : Device Translation Context with devid and process_id set. 873 * @return : success or fault code. 874 */ 875 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) 876 { 877 const uint64_t ddtp = s->ddtp; 878 unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); 879 dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); 880 struct riscv_iommu_dc dc; 881 /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ 882 const int dc_fmt = !s->enable_msi; 883 const size_t dc_len = sizeof(dc) >> dc_fmt; 884 int depth; 885 uint64_t de; 886 887 switch (mode) { 888 case RISCV_IOMMU_DDTP_MODE_OFF: 889 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; 890 891 case RISCV_IOMMU_DDTP_MODE_BARE: 892 /* mock up pass-through translation context */ 893 ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 894 RISCV_IOMMU_DC_IOHGATP_MODE_BARE); 895 ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 896 RISCV_IOMMU_DC_FSC_MODE_BARE); 897 898 ctx->tc = RISCV_IOMMU_DC_TC_V; 899 if (s->enable_ats) { 900 ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS; 901 } 902 903 ctx->ta = 0; 904 ctx->msiptp = 0; 905 return 0; 906 907 case RISCV_IOMMU_DDTP_MODE_1LVL: 908 depth = 0; 909 break; 910 911 case RISCV_IOMMU_DDTP_MODE_2LVL: 912 depth = 1; 913 break; 914 915 case RISCV_IOMMU_DDTP_MODE_3LVL: 916 depth = 2; 917 break; 918 919 default: 920 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 921 } 922 923 /* 924 * Check supported device id width (in bits). 925 * See IOMMU Specification, Chapter 6. Software guidelines. 926 * - if extended device-context format is used: 927 * 1LVL: 6, 2LVL: 15, 3LVL: 24 928 * - if base device-context format is used: 929 * 1LVL: 7, 2LVL: 16, 3LVL: 24 930 */ 931 if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { 932 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 933 } 934 935 /* Device directory tree walk */ 936 for (; depth-- > 0; ) { 937 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK); 938 /* 939 * Select device id index bits based on device directory tree level 940 * and device context format. 941 * See IOMMU Specification, Chapter 2. Data Structures. 942 * - if extended device-context format is used: 943 * device index: [23:15][14:6][5:0] 944 * - if base device-context format is used: 945 * device index: [23:16][15:7][6:0] 946 */ 947 const int split = depth * 9 + 6 + dc_fmt; 948 addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; 949 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 950 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 951 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 952 } 953 le64_to_cpus(&de); 954 if (!(de & RISCV_IOMMU_DDTE_VALID)) { 955 /* invalid directory entry */ 956 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 957 } 958 if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { 959 /* reserved bits set */ 960 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 961 } 962 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); 963 } 964 965 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK); 966 967 /* index into device context entry page */ 968 addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; 969 970 memset(&dc, 0, sizeof(dc)); 971 if (dma_memory_read(s->target_as, addr, &dc, dc_len, 972 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 973 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 974 } 975 976 /* Set translation context. */ 977 ctx->tc = le64_to_cpu(dc.tc); 978 ctx->gatp = le64_to_cpu(dc.iohgatp); 979 ctx->satp = le64_to_cpu(dc.fsc); 980 ctx->ta = le64_to_cpu(dc.ta); 981 ctx->msiptp = le64_to_cpu(dc.msiptp); 982 ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); 983 ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); 984 985 if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { 986 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 987 } 988 989 if (!riscv_iommu_validate_device_ctx(s, ctx)) { 990 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 991 } 992 993 /* FSC field checks */ 994 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 995 addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); 996 997 if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { 998 if (ctx->process_id != RISCV_IOMMU_NOPROCID) { 999 /* PID is disabled */ 1000 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1001 } 1002 if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { 1003 /* Invalid translation mode */ 1004 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 1005 } 1006 return 0; 1007 } 1008 1009 if (ctx->process_id == RISCV_IOMMU_NOPROCID) { 1010 if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { 1011 /* No default process_id enabled, set BARE mode */ 1012 ctx->satp = 0ULL; 1013 return 0; 1014 } else { 1015 /* Use default process_id #0 */ 1016 ctx->process_id = 0; 1017 } 1018 } 1019 1020 if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1021 /* No S-Stage translation, done. */ 1022 return 0; 1023 } 1024 1025 /* FSC.TC.PDTV enabled */ 1026 if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { 1027 /* Invalid PDTP.MODE */ 1028 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1029 } 1030 1031 for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { 1032 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); 1033 1034 /* 1035 * Select process id index bits based on process directory tree 1036 * level. See IOMMU Specification, 2.2. Process-Directory-Table. 1037 */ 1038 const int split = depth * 9 + 8; 1039 addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; 1040 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 1041 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1042 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1043 } 1044 le64_to_cpus(&de); 1045 if (!(de & RISCV_IOMMU_PC_TA_V)) { 1046 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1047 } 1048 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN)); 1049 } 1050 1051 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); 1052 1053 /* Leaf entry in PDT */ 1054 addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; 1055 if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, 1056 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1057 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1058 } 1059 1060 /* Use FSC and TA from process directory entry. */ 1061 ctx->ta = le64_to_cpu(dc.ta); 1062 ctx->satp = le64_to_cpu(dc.fsc); 1063 1064 if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { 1065 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1066 } 1067 1068 if (!riscv_iommu_validate_process_ctx(s, ctx)) { 1069 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1070 } 1071 1072 return 0; 1073 } 1074 1075 /* Translation Context cache support */ 1076 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) 1077 { 1078 RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; 1079 RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; 1080 return c1->devid == c2->devid && 1081 c1->process_id == c2->process_id; 1082 } 1083 1084 static guint riscv_iommu_ctx_hash(gconstpointer v) 1085 { 1086 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; 1087 /* 1088 * Generate simple hash of (process_id, devid) 1089 * assuming 24-bit wide devid. 1090 */ 1091 return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); 1092 } 1093 1094 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, 1095 gpointer data) 1096 { 1097 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1098 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1099 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1100 ctx->devid == arg->devid && 1101 ctx->process_id == arg->process_id) { 1102 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1103 } 1104 } 1105 1106 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, 1107 gpointer data) 1108 { 1109 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1110 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1111 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1112 ctx->devid == arg->devid) { 1113 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1114 } 1115 } 1116 1117 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, 1118 gpointer data) 1119 { 1120 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1121 if (ctx->tc & RISCV_IOMMU_DC_TC_V) { 1122 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1123 } 1124 } 1125 1126 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, 1127 uint32_t devid, uint32_t process_id) 1128 { 1129 GHashTable *ctx_cache; 1130 RISCVIOMMUContext key = { 1131 .devid = devid, 1132 .process_id = process_id, 1133 }; 1134 ctx_cache = g_hash_table_ref(s->ctx_cache); 1135 g_hash_table_foreach(ctx_cache, func, &key); 1136 g_hash_table_unref(ctx_cache); 1137 } 1138 1139 /* Find or allocate translation context for a given {device_id, process_id} */ 1140 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, 1141 unsigned devid, unsigned process_id, 1142 void **ref) 1143 { 1144 GHashTable *ctx_cache; 1145 RISCVIOMMUContext *ctx; 1146 RISCVIOMMUContext key = { 1147 .devid = devid, 1148 .process_id = process_id, 1149 }; 1150 1151 ctx_cache = g_hash_table_ref(s->ctx_cache); 1152 ctx = g_hash_table_lookup(ctx_cache, &key); 1153 1154 if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { 1155 *ref = ctx_cache; 1156 return ctx; 1157 } 1158 1159 ctx = g_new0(RISCVIOMMUContext, 1); 1160 ctx->devid = devid; 1161 ctx->process_id = process_id; 1162 1163 int fault = riscv_iommu_ctx_fetch(s, ctx); 1164 if (!fault) { 1165 if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { 1166 g_hash_table_unref(ctx_cache); 1167 ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 1168 riscv_iommu_ctx_equal, 1169 g_free, NULL); 1170 g_hash_table_ref(ctx_cache); 1171 g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); 1172 } 1173 g_hash_table_add(ctx_cache, ctx); 1174 *ref = ctx_cache; 1175 return ctx; 1176 } 1177 1178 g_hash_table_unref(ctx_cache); 1179 *ref = NULL; 1180 1181 riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, 1182 fault, !!process_id, 0, 0); 1183 1184 g_free(ctx); 1185 return NULL; 1186 } 1187 1188 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) 1189 { 1190 if (ref) { 1191 g_hash_table_unref((GHashTable *)ref); 1192 } 1193 } 1194 1195 /* Find or allocate address space for a given device */ 1196 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) 1197 { 1198 RISCVIOMMUSpace *as; 1199 1200 /* FIXME: PCIe bus remapping for attached endpoints. */ 1201 devid |= s->bus << 8; 1202 1203 QLIST_FOREACH(as, &s->spaces, list) { 1204 if (as->devid == devid) { 1205 break; 1206 } 1207 } 1208 1209 if (as == NULL) { 1210 char name[64]; 1211 as = g_new0(RISCVIOMMUSpace, 1); 1212 1213 as->iommu = s; 1214 as->devid = devid; 1215 1216 snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", 1217 PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1218 1219 /* IOVA address space, untranslated addresses */ 1220 memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), 1221 TYPE_RISCV_IOMMU_MEMORY_REGION, 1222 OBJECT(as), "riscv_iommu", UINT64_MAX); 1223 address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); 1224 1225 QLIST_INSERT_HEAD(&s->spaces, as, list); 1226 1227 trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), 1228 PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1229 } 1230 return &as->iova_as; 1231 } 1232 1233 /* Translation Object cache support */ 1234 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2) 1235 { 1236 RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1; 1237 RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2; 1238 return t1->gscid == t2->gscid && t1->pscid == t2->pscid && 1239 t1->iova == t2->iova && t1->tag == t2->tag; 1240 } 1241 1242 static guint riscv_iommu_iot_hash(gconstpointer v) 1243 { 1244 RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v; 1245 return (guint)t->iova; 1246 } 1247 1248 /* GV: 0 AV: 0 PSCV: 0 GVMA: 0 */ 1249 /* GV: 0 AV: 0 GVMA: 1 */ 1250 static 1251 void riscv_iommu_iot_inval_all(gpointer key, gpointer value, gpointer data) 1252 { 1253 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1254 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1255 if (iot->tag == arg->tag) { 1256 iot->perm = IOMMU_NONE; 1257 } 1258 } 1259 1260 /* GV: 0 AV: 0 PSCV: 1 GVMA: 0 */ 1261 static 1262 void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, gpointer data) 1263 { 1264 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1265 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1266 if (iot->tag == arg->tag && 1267 iot->pscid == arg->pscid) { 1268 iot->perm = IOMMU_NONE; 1269 } 1270 } 1271 1272 /* GV: 0 AV: 1 PSCV: 0 GVMA: 0 */ 1273 static 1274 void riscv_iommu_iot_inval_iova(gpointer key, gpointer value, gpointer data) 1275 { 1276 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1277 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1278 if (iot->tag == arg->tag && 1279 iot->iova == arg->iova) { 1280 iot->perm = IOMMU_NONE; 1281 } 1282 } 1283 1284 /* GV: 0 AV: 1 PSCV: 1 GVMA: 0 */ 1285 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value, 1286 gpointer data) 1287 { 1288 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1289 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1290 if (iot->tag == arg->tag && 1291 iot->pscid == arg->pscid && 1292 iot->iova == arg->iova) { 1293 iot->perm = IOMMU_NONE; 1294 } 1295 } 1296 1297 /* GV: 1 AV: 0 PSCV: 0 GVMA: 0 */ 1298 /* GV: 1 AV: 0 GVMA: 1 */ 1299 static 1300 void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, gpointer data) 1301 { 1302 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1303 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1304 if (iot->tag == arg->tag && 1305 iot->gscid == arg->gscid) { 1306 iot->perm = IOMMU_NONE; 1307 } 1308 } 1309 1310 /* GV: 1 AV: 0 PSCV: 1 GVMA: 0 */ 1311 static void riscv_iommu_iot_inval_gscid_pscid(gpointer key, gpointer value, 1312 gpointer data) 1313 { 1314 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1315 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1316 if (iot->tag == arg->tag && 1317 iot->gscid == arg->gscid && 1318 iot->pscid == arg->pscid) { 1319 iot->perm = IOMMU_NONE; 1320 } 1321 } 1322 1323 /* GV: 1 AV: 1 PSCV: 0 GVMA: 0 */ 1324 /* GV: 1 AV: 1 GVMA: 1 */ 1325 static void riscv_iommu_iot_inval_gscid_iova(gpointer key, gpointer value, 1326 gpointer data) 1327 { 1328 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1329 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1330 if (iot->tag == arg->tag && 1331 iot->gscid == arg->gscid && 1332 iot->iova == arg->iova) { 1333 iot->perm = IOMMU_NONE; 1334 } 1335 } 1336 1337 /* GV: 1 AV: 1 PSCV: 1 GVMA: 0 */ 1338 static void riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key, gpointer value, 1339 gpointer data) 1340 { 1341 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1342 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1343 if (iot->tag == arg->tag && 1344 iot->gscid == arg->gscid && 1345 iot->pscid == arg->pscid && 1346 iot->iova == arg->iova) { 1347 iot->perm = IOMMU_NONE; 1348 } 1349 } 1350 1351 /* caller should keep ref-count for iot_cache object */ 1352 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx, 1353 GHashTable *iot_cache, hwaddr iova, RISCVIOMMUTransTag transtag) 1354 { 1355 RISCVIOMMUEntry key = { 1356 .tag = transtag, 1357 .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID), 1358 .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID), 1359 .iova = PPN_DOWN(iova), 1360 }; 1361 return g_hash_table_lookup(iot_cache, &key); 1362 } 1363 1364 /* caller should keep ref-count for iot_cache object */ 1365 static void riscv_iommu_iot_update(RISCVIOMMUState *s, 1366 GHashTable *iot_cache, RISCVIOMMUEntry *iot) 1367 { 1368 if (!s->iot_limit) { 1369 return; 1370 } 1371 1372 if (g_hash_table_size(s->iot_cache) >= s->iot_limit) { 1373 iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 1374 riscv_iommu_iot_equal, 1375 g_free, NULL); 1376 g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache)); 1377 } 1378 g_hash_table_add(iot_cache, iot); 1379 } 1380 1381 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func, 1382 uint32_t gscid, uint32_t pscid, hwaddr iova, RISCVIOMMUTransTag transtag) 1383 { 1384 GHashTable *iot_cache; 1385 RISCVIOMMUEntry key = { 1386 .tag = transtag, 1387 .gscid = gscid, 1388 .pscid = pscid, 1389 .iova = PPN_DOWN(iova), 1390 }; 1391 1392 iot_cache = g_hash_table_ref(s->iot_cache); 1393 g_hash_table_foreach(iot_cache, func, &key); 1394 g_hash_table_unref(iot_cache); 1395 } 1396 1397 static RISCVIOMMUTransTag riscv_iommu_get_transtag(RISCVIOMMUContext *ctx) 1398 { 1399 uint64_t satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 1400 uint64_t gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 1401 1402 if (satp == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1403 return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ? 1404 RISCV_IOMMU_TRANS_TAG_BY : RISCV_IOMMU_TRANS_TAG_VG; 1405 } else { 1406 return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ? 1407 RISCV_IOMMU_TRANS_TAG_SS : RISCV_IOMMU_TRANS_TAG_VN; 1408 } 1409 } 1410 1411 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 1412 IOMMUTLBEntry *iotlb, bool enable_cache) 1413 { 1414 RISCVIOMMUTransTag transtag = riscv_iommu_get_transtag(ctx); 1415 RISCVIOMMUEntry *iot; 1416 IOMMUAccessFlags perm; 1417 bool enable_pid; 1418 bool enable_pri; 1419 GHashTable *iot_cache; 1420 int fault; 1421 1422 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ); 1423 1424 iot_cache = g_hash_table_ref(s->iot_cache); 1425 /* 1426 * TC[32] is reserved for custom extensions, used here to temporarily 1427 * enable automatic page-request generation for ATS queries. 1428 */ 1429 enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); 1430 enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); 1431 1432 /* Check for ATS request. */ 1433 if (iotlb->perm == IOMMU_NONE) { 1434 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ); 1435 /* Check if ATS is disabled. */ 1436 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { 1437 enable_pri = false; 1438 fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1439 goto done; 1440 } 1441 } 1442 1443 iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova, transtag); 1444 perm = iot ? iot->perm : IOMMU_NONE; 1445 if (perm != IOMMU_NONE) { 1446 iotlb->translated_addr = PPN_PHYS(iot->phys); 1447 iotlb->addr_mask = ~TARGET_PAGE_MASK; 1448 iotlb->perm = perm; 1449 fault = 0; 1450 goto done; 1451 } 1452 1453 riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS); 1454 1455 /* Translate using device directory / page table information. */ 1456 fault = riscv_iommu_spa_fetch(s, ctx, iotlb); 1457 1458 if (!fault && iotlb->target_as == &s->trap_as) { 1459 /* Do not cache trapped MSI translations */ 1460 goto done; 1461 } 1462 1463 /* 1464 * We made an implementation choice to not cache identity-mapped 1465 * translations, as allowed by the specification, to avoid 1466 * translation cache evictions for other devices sharing the 1467 * IOMMU hardware model. 1468 */ 1469 if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) { 1470 iot = g_new0(RISCVIOMMUEntry, 1); 1471 iot->iova = PPN_DOWN(iotlb->iova); 1472 iot->phys = PPN_DOWN(iotlb->translated_addr); 1473 iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); 1474 iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); 1475 iot->perm = iotlb->perm; 1476 iot->tag = transtag; 1477 riscv_iommu_iot_update(s, iot_cache, iot); 1478 } 1479 1480 done: 1481 g_hash_table_unref(iot_cache); 1482 1483 if (enable_pri && fault) { 1484 struct riscv_iommu_pq_record pr = {0}; 1485 if (enable_pid) { 1486 pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, 1487 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); 1488 } 1489 pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); 1490 pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | 1491 RISCV_IOMMU_PREQ_PAYLOAD_M; 1492 riscv_iommu_pri(s, &pr); 1493 return fault; 1494 } 1495 1496 if (fault) { 1497 unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ; 1498 1499 if (iotlb->perm & IOMMU_RW) { 1500 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 1501 } else if (iotlb->perm & IOMMU_RO) { 1502 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; 1503 } 1504 1505 riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, 1506 iotlb->iova, iotlb->translated_addr); 1507 return fault; 1508 } 1509 1510 return 0; 1511 } 1512 1513 /* IOMMU Command Interface */ 1514 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, 1515 uint64_t addr, uint32_t data) 1516 { 1517 /* 1518 * ATS processing in this implementation of the IOMMU is synchronous, 1519 * no need to wait for completions here. 1520 */ 1521 if (!notify) { 1522 return MEMTX_OK; 1523 } 1524 1525 return dma_memory_write(s->target_as, addr, &data, sizeof(data), 1526 MEMTXATTRS_UNSPECIFIED); 1527 } 1528 1529 static void riscv_iommu_ats(RISCVIOMMUState *s, 1530 struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag, 1531 IOMMUAccessFlags perm, 1532 void (*trace_fn)(const char *id)) 1533 { 1534 RISCVIOMMUSpace *as = NULL; 1535 IOMMUNotifier *n; 1536 IOMMUTLBEvent event; 1537 uint32_t pid; 1538 uint32_t devid; 1539 const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV; 1540 1541 if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) { 1542 /* Use device segment and requester id */ 1543 devid = get_field(cmd->dword0, 1544 RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID); 1545 } else { 1546 devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID); 1547 } 1548 1549 pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID); 1550 1551 QLIST_FOREACH(as, &s->spaces, list) { 1552 if (as->devid == devid) { 1553 break; 1554 } 1555 } 1556 1557 if (!as || !as->notifier) { 1558 return; 1559 } 1560 1561 event.type = flag; 1562 event.entry.perm = perm; 1563 event.entry.target_as = s->target_as; 1564 1565 IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) { 1566 if (!pv || n->iommu_idx == pid) { 1567 event.entry.iova = n->start; 1568 event.entry.addr_mask = n->end - n->start; 1569 trace_fn(as->iova_mr.parent_obj.name); 1570 memory_region_notify_iommu_one(n, &event); 1571 } 1572 } 1573 } 1574 1575 static void riscv_iommu_ats_inval(RISCVIOMMUState *s, 1576 struct riscv_iommu_command *cmd) 1577 { 1578 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE, 1579 trace_riscv_iommu_ats_inval); 1580 } 1581 1582 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s, 1583 struct riscv_iommu_command *cmd) 1584 { 1585 unsigned resp_code = get_field(cmd->dword1, 1586 RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE); 1587 1588 /* Using the access flag to carry response code information */ 1589 IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW; 1590 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm, 1591 trace_riscv_iommu_ats_prgr); 1592 } 1593 1594 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) 1595 { 1596 uint64_t old_ddtp = s->ddtp; 1597 uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); 1598 unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); 1599 unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); 1600 bool ok = false; 1601 1602 /* 1603 * Check for allowed DDTP.MODE transitions: 1604 * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} 1605 * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} 1606 */ 1607 if (new_mode == old_mode || 1608 new_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1609 new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { 1610 ok = true; 1611 } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || 1612 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || 1613 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { 1614 ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1615 old_mode == RISCV_IOMMU_DDTP_MODE_BARE; 1616 } 1617 1618 if (ok) { 1619 /* clear reserved and busy bits, report back sanitized version */ 1620 new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, 1621 RISCV_IOMMU_DDTP_MODE, new_mode); 1622 } else { 1623 new_ddtp = old_ddtp; 1624 } 1625 s->ddtp = new_ddtp; 1626 1627 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); 1628 } 1629 1630 /* Command function and opcode field. */ 1631 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) 1632 1633 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) 1634 { 1635 struct riscv_iommu_command cmd; 1636 MemTxResult res; 1637 dma_addr_t addr; 1638 uint32_t tail, head, ctrl; 1639 uint64_t cmd_opcode; 1640 GHFunc func; 1641 1642 ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1643 tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; 1644 head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; 1645 1646 /* Check for pending error or queue processing disabled */ 1647 if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || 1648 !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { 1649 return; 1650 } 1651 1652 while (tail != head) { 1653 addr = s->cq_addr + head * sizeof(cmd); 1654 res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), 1655 MEMTXATTRS_UNSPECIFIED); 1656 1657 if (res != MEMTX_OK) { 1658 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1659 RISCV_IOMMU_CQCSR_CQMF, 0); 1660 goto fault; 1661 } 1662 1663 trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); 1664 1665 cmd_opcode = get_field(cmd.dword0, 1666 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); 1667 1668 switch (cmd_opcode) { 1669 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, 1670 RISCV_IOMMU_CMD_IOFENCE_OPCODE): 1671 res = riscv_iommu_iofence(s, 1672 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, 1673 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); 1674 1675 if (res != MEMTX_OK) { 1676 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1677 RISCV_IOMMU_CQCSR_CQMF, 0); 1678 goto fault; 1679 } 1680 break; 1681 1682 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, 1683 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1684 { 1685 bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV); 1686 bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV); 1687 bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV); 1688 uint32_t gscid = get_field(cmd.dword0, 1689 RISCV_IOMMU_CMD_IOTINVAL_GSCID); 1690 uint32_t pscid = get_field(cmd.dword0, 1691 RISCV_IOMMU_CMD_IOTINVAL_PSCID); 1692 hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK; 1693 1694 if (pscv) { 1695 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ 1696 goto cmd_ill; 1697 } 1698 1699 func = riscv_iommu_iot_inval_all; 1700 1701 if (gv) { 1702 func = (av) ? riscv_iommu_iot_inval_gscid_iova : 1703 riscv_iommu_iot_inval_gscid; 1704 } 1705 1706 riscv_iommu_iot_inval( 1707 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VG); 1708 1709 riscv_iommu_iot_inval( 1710 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VN); 1711 break; 1712 } 1713 1714 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, 1715 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1716 { 1717 bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV); 1718 bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV); 1719 bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV); 1720 uint32_t gscid = get_field(cmd.dword0, 1721 RISCV_IOMMU_CMD_IOTINVAL_GSCID); 1722 uint32_t pscid = get_field(cmd.dword0, 1723 RISCV_IOMMU_CMD_IOTINVAL_PSCID); 1724 hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK; 1725 RISCVIOMMUTransTag transtag; 1726 1727 if (gv) { 1728 transtag = RISCV_IOMMU_TRANS_TAG_VN; 1729 if (pscv) { 1730 func = (av) ? riscv_iommu_iot_inval_gscid_pscid_iova : 1731 riscv_iommu_iot_inval_gscid_pscid; 1732 } else { 1733 func = (av) ? riscv_iommu_iot_inval_gscid_iova : 1734 riscv_iommu_iot_inval_gscid; 1735 } 1736 } else { 1737 transtag = RISCV_IOMMU_TRANS_TAG_SS; 1738 if (pscv) { 1739 func = (av) ? riscv_iommu_iot_inval_pscid_iova : 1740 riscv_iommu_iot_inval_pscid; 1741 } else { 1742 func = (av) ? riscv_iommu_iot_inval_iova : 1743 riscv_iommu_iot_inval_all; 1744 } 1745 } 1746 1747 riscv_iommu_iot_inval(s, func, gscid, pscid, iova, transtag); 1748 break; 1749 } 1750 1751 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, 1752 RISCV_IOMMU_CMD_IODIR_OPCODE): 1753 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1754 /* invalidate all device context cache mappings */ 1755 func = riscv_iommu_ctx_inval_all; 1756 } else { 1757 /* invalidate all device context matching DID */ 1758 func = riscv_iommu_ctx_inval_devid; 1759 } 1760 riscv_iommu_ctx_inval(s, func, 1761 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); 1762 break; 1763 1764 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, 1765 RISCV_IOMMU_CMD_IODIR_OPCODE): 1766 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1767 /* illegal command arguments IODIR_PDT & DV == 0 */ 1768 goto cmd_ill; 1769 } else { 1770 func = riscv_iommu_ctx_inval_devid_procid; 1771 } 1772 riscv_iommu_ctx_inval(s, func, 1773 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 1774 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); 1775 break; 1776 1777 /* ATS commands */ 1778 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL, 1779 RISCV_IOMMU_CMD_ATS_OPCODE): 1780 if (!s->enable_ats) { 1781 goto cmd_ill; 1782 } 1783 1784 riscv_iommu_ats_inval(s, &cmd); 1785 break; 1786 1787 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR, 1788 RISCV_IOMMU_CMD_ATS_OPCODE): 1789 if (!s->enable_ats) { 1790 goto cmd_ill; 1791 } 1792 1793 riscv_iommu_ats_prgr(s, &cmd); 1794 break; 1795 1796 default: 1797 cmd_ill: 1798 /* Invalid instruction, do not advance instruction index. */ 1799 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1800 RISCV_IOMMU_CQCSR_CMD_ILL, 0); 1801 goto fault; 1802 } 1803 1804 /* Advance and update head pointer after command completes. */ 1805 head = (head + 1) & s->cq_mask; 1806 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); 1807 } 1808 return; 1809 1810 fault: 1811 if (ctrl & RISCV_IOMMU_CQCSR_CIE) { 1812 riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); 1813 } 1814 } 1815 1816 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) 1817 { 1818 uint64_t base; 1819 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1820 uint32_t ctrl_clr; 1821 bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); 1822 bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); 1823 1824 if (enable && !active) { 1825 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); 1826 s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; 1827 s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); 1828 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); 1829 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); 1830 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); 1831 ctrl_set = RISCV_IOMMU_CQCSR_CQON; 1832 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | 1833 RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | 1834 RISCV_IOMMU_CQCSR_FENCE_W_IP; 1835 } else if (!enable && active) { 1836 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); 1837 ctrl_set = 0; 1838 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; 1839 } else { 1840 ctrl_set = 0; 1841 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; 1842 } 1843 1844 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); 1845 } 1846 1847 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) 1848 { 1849 uint64_t base; 1850 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1851 uint32_t ctrl_clr; 1852 bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); 1853 bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); 1854 1855 if (enable && !active) { 1856 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); 1857 s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; 1858 s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); 1859 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); 1860 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); 1861 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); 1862 ctrl_set = RISCV_IOMMU_FQCSR_FQON; 1863 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | 1864 RISCV_IOMMU_FQCSR_FQOF; 1865 } else if (!enable && active) { 1866 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); 1867 ctrl_set = 0; 1868 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; 1869 } else { 1870 ctrl_set = 0; 1871 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; 1872 } 1873 1874 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); 1875 } 1876 1877 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) 1878 { 1879 uint64_t base; 1880 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1881 uint32_t ctrl_clr; 1882 bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); 1883 bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); 1884 1885 if (enable && !active) { 1886 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); 1887 s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; 1888 s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); 1889 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); 1890 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); 1891 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); 1892 ctrl_set = RISCV_IOMMU_PQCSR_PQON; 1893 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | 1894 RISCV_IOMMU_PQCSR_PQOF; 1895 } else if (!enable && active) { 1896 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); 1897 ctrl_set = 0; 1898 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; 1899 } else { 1900 ctrl_set = 0; 1901 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; 1902 } 1903 1904 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); 1905 } 1906 1907 static void riscv_iommu_process_dbg(RISCVIOMMUState *s) 1908 { 1909 uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA); 1910 uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL); 1911 unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID); 1912 unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID); 1913 RISCVIOMMUContext *ctx; 1914 void *ref; 1915 1916 if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) { 1917 return; 1918 } 1919 1920 ctx = riscv_iommu_ctx(s, devid, pid, &ref); 1921 if (ctx == NULL) { 1922 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, 1923 RISCV_IOMMU_TR_RESPONSE_FAULT | 1924 (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10)); 1925 } else { 1926 IOMMUTLBEntry iotlb = { 1927 .iova = iova, 1928 .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW, 1929 .addr_mask = ~0, 1930 .target_as = NULL, 1931 }; 1932 int fault = riscv_iommu_translate(s, ctx, &iotlb, false); 1933 if (fault) { 1934 iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); 1935 } else { 1936 iova = iotlb.translated_addr & ~iotlb.addr_mask; 1937 iova >>= TARGET_PAGE_BITS; 1938 iova &= RISCV_IOMMU_TR_RESPONSE_PPN; 1939 1940 /* We do not support superpages (> 4kbs) for now */ 1941 iova &= ~RISCV_IOMMU_TR_RESPONSE_S; 1942 } 1943 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); 1944 } 1945 1946 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 1947 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 1948 riscv_iommu_ctx_put(s, ref); 1949 } 1950 1951 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); 1952 1953 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) 1954 { 1955 uint64_t icvec = 0; 1956 1957 icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, 1958 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); 1959 1960 icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, 1961 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); 1962 1963 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, 1964 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); 1965 1966 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, 1967 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); 1968 1969 trace_riscv_iommu_icvec_write(data, icvec); 1970 1971 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); 1972 } 1973 1974 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) 1975 { 1976 uint32_t cqcsr, fqcsr, pqcsr; 1977 uint32_t ipsr_set = 0; 1978 uint32_t ipsr_clr = 0; 1979 1980 if (data & RISCV_IOMMU_IPSR_CIP) { 1981 cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1982 1983 if (cqcsr & RISCV_IOMMU_CQCSR_CIE && 1984 (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || 1985 cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || 1986 cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || 1987 cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { 1988 ipsr_set |= RISCV_IOMMU_IPSR_CIP; 1989 } else { 1990 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1991 } 1992 } else { 1993 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1994 } 1995 1996 if (data & RISCV_IOMMU_IPSR_FIP) { 1997 fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1998 1999 if (fqcsr & RISCV_IOMMU_FQCSR_FIE && 2000 (fqcsr & RISCV_IOMMU_FQCSR_FQOF || 2001 fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { 2002 ipsr_set |= RISCV_IOMMU_IPSR_FIP; 2003 } else { 2004 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 2005 } 2006 } else { 2007 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 2008 } 2009 2010 if (data & RISCV_IOMMU_IPSR_PIP) { 2011 pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 2012 2013 if (pqcsr & RISCV_IOMMU_PQCSR_PIE && 2014 (pqcsr & RISCV_IOMMU_PQCSR_PQOF || 2015 pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { 2016 ipsr_set |= RISCV_IOMMU_IPSR_PIP; 2017 } else { 2018 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 2019 } 2020 } else { 2021 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 2022 } 2023 2024 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); 2025 } 2026 2027 static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s, 2028 uint32_t regb, 2029 bool prev_cy_inh) 2030 { 2031 switch (regb) { 2032 case RISCV_IOMMU_REG_IOCOUNTINH: 2033 riscv_iommu_process_iocntinh_cy(s, prev_cy_inh); 2034 break; 2035 2036 case RISCV_IOMMU_REG_IOHPMCYCLES: 2037 case RISCV_IOMMU_REG_IOHPMCYCLES + 4: 2038 riscv_iommu_process_hpmcycle_write(s); 2039 break; 2040 2041 case RISCV_IOMMU_REG_IOHPMEVT_BASE ... 2042 RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4: 2043 riscv_iommu_process_hpmevt_write(s, regb & ~7); 2044 break; 2045 } 2046 } 2047 2048 /* 2049 * Write the resulting value of 'data' for the reg specified 2050 * by 'reg_addr', after considering read-only/read-write/write-clear 2051 * bits, in the pointer 'dest'. 2052 * 2053 * The result is written in little-endian. 2054 */ 2055 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, 2056 void *dest, hwaddr reg_addr, 2057 int size, uint64_t data) 2058 { 2059 uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); 2060 uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); 2061 uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); 2062 2063 stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); 2064 } 2065 2066 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, 2067 uint64_t data, unsigned size, 2068 MemTxAttrs attrs) 2069 { 2070 riscv_iommu_process_fn *process_fn = NULL; 2071 RISCVIOMMUState *s = opaque; 2072 uint32_t regb = addr & ~3; 2073 uint32_t busy = 0; 2074 uint64_t val = 0; 2075 bool cy_inh = false; 2076 2077 if ((addr & (size - 1)) != 0) { 2078 /* Unsupported MMIO alignment or access size */ 2079 return MEMTX_ERROR; 2080 } 2081 2082 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2083 /* Unsupported MMIO access location. */ 2084 return MEMTX_ACCESS_ERROR; 2085 } 2086 2087 /* Track actionable MMIO write. */ 2088 switch (regb) { 2089 case RISCV_IOMMU_REG_DDTP: 2090 case RISCV_IOMMU_REG_DDTP + 4: 2091 process_fn = riscv_iommu_process_ddtp; 2092 regb = RISCV_IOMMU_REG_DDTP; 2093 busy = RISCV_IOMMU_DDTP_BUSY; 2094 break; 2095 2096 case RISCV_IOMMU_REG_CQT: 2097 process_fn = riscv_iommu_process_cq_tail; 2098 break; 2099 2100 case RISCV_IOMMU_REG_CQCSR: 2101 process_fn = riscv_iommu_process_cq_control; 2102 busy = RISCV_IOMMU_CQCSR_BUSY; 2103 break; 2104 2105 case RISCV_IOMMU_REG_FQCSR: 2106 process_fn = riscv_iommu_process_fq_control; 2107 busy = RISCV_IOMMU_FQCSR_BUSY; 2108 break; 2109 2110 case RISCV_IOMMU_REG_PQCSR: 2111 process_fn = riscv_iommu_process_pq_control; 2112 busy = RISCV_IOMMU_PQCSR_BUSY; 2113 break; 2114 2115 case RISCV_IOMMU_REG_ICVEC: 2116 case RISCV_IOMMU_REG_IPSR: 2117 /* 2118 * ICVEC and IPSR have special read/write procedures. We'll 2119 * call their respective helpers and exit. 2120 */ 2121 riscv_iommu_write_reg_val(s, &val, addr, size, data); 2122 2123 /* 2124 * 'val' is stored as LE. Switch to host endianess 2125 * before using it. 2126 */ 2127 val = le64_to_cpu(val); 2128 2129 if (regb == RISCV_IOMMU_REG_ICVEC) { 2130 riscv_iommu_update_icvec(s, val); 2131 } else { 2132 riscv_iommu_update_ipsr(s, val); 2133 } 2134 2135 return MEMTX_OK; 2136 2137 case RISCV_IOMMU_REG_TR_REQ_CTL: 2138 process_fn = riscv_iommu_process_dbg; 2139 regb = RISCV_IOMMU_REG_TR_REQ_CTL; 2140 busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; 2141 break; 2142 2143 case RISCV_IOMMU_REG_IOCOUNTINH: 2144 if (addr != RISCV_IOMMU_REG_IOCOUNTINH) { 2145 break; 2146 } 2147 /* Store previous value of CY bit. */ 2148 cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) & 2149 RISCV_IOMMU_IOCOUNTINH_CY); 2150 break; 2151 2152 2153 default: 2154 break; 2155 } 2156 2157 /* 2158 * Registers update might be not synchronized with core logic. 2159 * If system software updates register when relevant BUSY bit 2160 * is set IOMMU behavior of additional writes to the register 2161 * is UNSPECIFIED. 2162 */ 2163 riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); 2164 2165 /* Busy flag update, MSB 4-byte register. */ 2166 if (busy) { 2167 uint32_t rw = ldl_le_p(&s->regs_rw[regb]); 2168 stl_le_p(&s->regs_rw[regb], rw | busy); 2169 } 2170 2171 /* Process HPM writes and update any internal state if needed. */ 2172 if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF && 2173 regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) { 2174 riscv_iommu_process_hpm_writes(s, regb, cy_inh); 2175 } 2176 2177 if (process_fn) { 2178 process_fn(s); 2179 } 2180 2181 return MEMTX_OK; 2182 } 2183 2184 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, 2185 uint64_t *data, unsigned size, MemTxAttrs attrs) 2186 { 2187 RISCVIOMMUState *s = opaque; 2188 uint64_t val = -1; 2189 uint8_t *ptr; 2190 2191 if ((addr & (size - 1)) != 0) { 2192 /* Unsupported MMIO alignment. */ 2193 return MEMTX_ERROR; 2194 } 2195 2196 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2197 return MEMTX_ACCESS_ERROR; 2198 } 2199 2200 /* Compute cycle register value. */ 2201 if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) { 2202 val = riscv_iommu_hpmcycle_read(s); 2203 ptr = (uint8_t *)&val + (addr & 7); 2204 } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) { 2205 /* 2206 * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer 2207 * callback completes. In which case CY_OF bit in 2208 * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the 2209 * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as 2210 * it's not dependent over the timer callback and is computed 2211 * from cycle overflow. 2212 */ 2213 val = ldq_le_p(&s->regs_rw[addr]); 2214 val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF) 2215 ? RISCV_IOMMU_IOCOUNTOVF_CY 2216 : 0; 2217 ptr = (uint8_t *)&val + (addr & 3); 2218 } else { 2219 ptr = &s->regs_rw[addr]; 2220 } 2221 2222 val = ldn_le_p(ptr, size); 2223 2224 *data = val; 2225 2226 return MEMTX_OK; 2227 } 2228 2229 static const MemoryRegionOps riscv_iommu_mmio_ops = { 2230 .read_with_attrs = riscv_iommu_mmio_read, 2231 .write_with_attrs = riscv_iommu_mmio_write, 2232 .endianness = DEVICE_NATIVE_ENDIAN, 2233 .impl = { 2234 .min_access_size = 4, 2235 .max_access_size = 8, 2236 .unaligned = false, 2237 }, 2238 .valid = { 2239 .min_access_size = 4, 2240 .max_access_size = 8, 2241 } 2242 }; 2243 2244 /* 2245 * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" 2246 * memory region as untranslated address, for additional MSI/MRIF interception 2247 * by IOMMU interrupt remapping implementation. 2248 * Note: Device emulation code generating an MSI is expected to provide a valid 2249 * memory transaction attributes with requested_id set. 2250 */ 2251 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, 2252 uint64_t data, unsigned size, MemTxAttrs attrs) 2253 { 2254 RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; 2255 RISCVIOMMUContext *ctx; 2256 MemTxResult res; 2257 void *ref; 2258 uint32_t devid = attrs.requester_id; 2259 2260 if (attrs.unspecified) { 2261 return MEMTX_ACCESS_ERROR; 2262 } 2263 2264 /* FIXME: PCIe bus remapping for attached endpoints. */ 2265 devid |= s->bus << 8; 2266 2267 ctx = riscv_iommu_ctx(s, devid, 0, &ref); 2268 if (ctx == NULL) { 2269 res = MEMTX_ACCESS_ERROR; 2270 } else { 2271 res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); 2272 } 2273 riscv_iommu_ctx_put(s, ref); 2274 return res; 2275 } 2276 2277 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, 2278 uint64_t *data, unsigned size, MemTxAttrs attrs) 2279 { 2280 return MEMTX_ACCESS_ERROR; 2281 } 2282 2283 static const MemoryRegionOps riscv_iommu_trap_ops = { 2284 .read_with_attrs = riscv_iommu_trap_read, 2285 .write_with_attrs = riscv_iommu_trap_write, 2286 .endianness = DEVICE_LITTLE_ENDIAN, 2287 .impl = { 2288 .min_access_size = 4, 2289 .max_access_size = 8, 2290 .unaligned = true, 2291 }, 2292 .valid = { 2293 .min_access_size = 4, 2294 .max_access_size = 8, 2295 } 2296 }; 2297 2298 void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode) 2299 { 2300 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode); 2301 } 2302 2303 static void riscv_iommu_instance_init(Object *obj) 2304 { 2305 RISCVIOMMUState *s = RISCV_IOMMU(obj); 2306 2307 /* Enable translation debug interface */ 2308 s->cap = RISCV_IOMMU_CAP_DBG; 2309 2310 /* Report QEMU target physical address space limits */ 2311 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, 2312 TARGET_PHYS_ADDR_SPACE_BITS); 2313 2314 /* TODO: method to report supported PID bits */ 2315 s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ 2316 s->cap |= RISCV_IOMMU_CAP_PD8; 2317 2318 /* register storage */ 2319 s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2320 s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2321 s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2322 2323 /* Mark all registers read-only */ 2324 memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); 2325 2326 /* Device translation context cache */ 2327 s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 2328 riscv_iommu_ctx_equal, 2329 g_free, NULL); 2330 2331 s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 2332 riscv_iommu_iot_equal, 2333 g_free, NULL); 2334 2335 s->iommus.le_next = NULL; 2336 s->iommus.le_prev = NULL; 2337 QLIST_INIT(&s->spaces); 2338 } 2339 2340 static void riscv_iommu_realize(DeviceState *dev, Error **errp) 2341 { 2342 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2343 2344 s->cap |= s->version & RISCV_IOMMU_CAP_VERSION; 2345 if (s->enable_msi) { 2346 s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; 2347 } 2348 if (s->enable_ats) { 2349 s->cap |= RISCV_IOMMU_CAP_ATS; 2350 } 2351 if (s->enable_s_stage) { 2352 s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | 2353 RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; 2354 } 2355 if (s->enable_g_stage) { 2356 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | 2357 RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; 2358 } 2359 2360 if (s->hpm_cntrs > 0) { 2361 /* Clip number of HPM counters to maximum supported (31). */ 2362 if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) { 2363 s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM; 2364 } 2365 /* Enable hardware performance monitor interface */ 2366 s->cap |= RISCV_IOMMU_CAP_HPM; 2367 } 2368 2369 /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ 2370 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? 2371 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); 2372 2373 /* 2374 * Register complete MMIO space, including MSI/PBA registers. 2375 * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, 2376 * managed directly by the PCIDevice implementation. 2377 */ 2378 memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, 2379 "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); 2380 2381 /* Set power-on register state */ 2382 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); 2383 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); 2384 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], 2385 ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); 2386 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], 2387 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); 2388 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], 2389 ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); 2390 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], 2391 ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); 2392 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], 2393 ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); 2394 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | 2395 RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); 2396 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | 2397 RISCV_IOMMU_CQCSR_BUSY); 2398 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | 2399 RISCV_IOMMU_FQCSR_FQOF); 2400 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | 2401 RISCV_IOMMU_FQCSR_BUSY); 2402 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | 2403 RISCV_IOMMU_PQCSR_PQOF); 2404 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | 2405 RISCV_IOMMU_PQCSR_BUSY); 2406 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); 2407 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); 2408 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); 2409 /* If debug registers enabled. */ 2410 if (s->cap & RISCV_IOMMU_CAP_DBG) { 2411 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0); 2412 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL], 2413 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2414 } 2415 2416 /* If HPM registers are enabled. */ 2417 if (s->cap & RISCV_IOMMU_CAP_HPM) { 2418 /* +1 for cycle counter bit. */ 2419 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH], 2420 ~((2 << s->hpm_cntrs) - 1)); 2421 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0); 2422 memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE], 2423 0x00, s->hpm_cntrs * 8); 2424 memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE], 2425 0x00, s->hpm_cntrs * 8); 2426 } 2427 2428 /* Memory region for downstream access, if specified. */ 2429 if (s->target_mr) { 2430 s->target_as = g_new0(AddressSpace, 1); 2431 address_space_init(s->target_as, s->target_mr, 2432 "riscv-iommu-downstream"); 2433 } else { 2434 /* Fallback to global system memory. */ 2435 s->target_as = &address_space_memory; 2436 } 2437 2438 /* Memory region for untranslated MRIF/MSI writes */ 2439 memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, 2440 "riscv-iommu-trap", ~0ULL); 2441 address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); 2442 2443 if (s->cap & RISCV_IOMMU_CAP_HPM) { 2444 s->hpm_timer = 2445 timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s); 2446 s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal); 2447 } 2448 } 2449 2450 static void riscv_iommu_unrealize(DeviceState *dev) 2451 { 2452 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2453 2454 g_hash_table_unref(s->iot_cache); 2455 g_hash_table_unref(s->ctx_cache); 2456 2457 if (s->cap & RISCV_IOMMU_CAP_HPM) { 2458 g_hash_table_unref(s->hpm_event_ctr_map); 2459 timer_free(s->hpm_timer); 2460 } 2461 } 2462 2463 void riscv_iommu_reset(RISCVIOMMUState *s) 2464 { 2465 uint32_t reg_clr; 2466 int ddtp_mode; 2467 2468 /* 2469 * Clear DDTP while setting DDTP_mode back to user 2470 * initial setting. 2471 */ 2472 ddtp_mode = s->enable_off ? 2473 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE; 2474 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode); 2475 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp); 2476 2477 reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE | 2478 RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY; 2479 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr); 2480 2481 reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE | 2482 RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY; 2483 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr); 2484 2485 reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE | 2486 RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY; 2487 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr); 2488 2489 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 2490 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2491 2492 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0); 2493 2494 g_hash_table_remove_all(s->ctx_cache); 2495 g_hash_table_remove_all(s->iot_cache); 2496 } 2497 2498 static const Property riscv_iommu_properties[] = { 2499 DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, 2500 RISCV_IOMMU_SPEC_DOT_VER), 2501 DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), 2502 DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit, 2503 LIMIT_CACHE_IOT), 2504 DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), 2505 DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE), 2506 DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), 2507 DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), 2508 DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), 2509 DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, 2510 TYPE_MEMORY_REGION, MemoryRegion *), 2511 DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs, 2512 RISCV_IOMMU_IOCOUNT_NUM), 2513 }; 2514 2515 static void riscv_iommu_class_init(ObjectClass *klass, void* data) 2516 { 2517 DeviceClass *dc = DEVICE_CLASS(klass); 2518 2519 /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ 2520 dc->user_creatable = false; 2521 dc->realize = riscv_iommu_realize; 2522 dc->unrealize = riscv_iommu_unrealize; 2523 device_class_set_props(dc, riscv_iommu_properties); 2524 } 2525 2526 static const TypeInfo riscv_iommu_info = { 2527 .name = TYPE_RISCV_IOMMU, 2528 .parent = TYPE_DEVICE, 2529 .instance_size = sizeof(RISCVIOMMUState), 2530 .instance_init = riscv_iommu_instance_init, 2531 .class_init = riscv_iommu_class_init, 2532 }; 2533 2534 static const char *IOMMU_FLAG_STR[] = { 2535 "NA", 2536 "RO", 2537 "WR", 2538 "RW", 2539 }; 2540 2541 /* RISC-V IOMMU Memory Region - Address Translation Space */ 2542 static IOMMUTLBEntry riscv_iommu_memory_region_translate( 2543 IOMMUMemoryRegion *iommu_mr, hwaddr addr, 2544 IOMMUAccessFlags flag, int iommu_idx) 2545 { 2546 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2547 RISCVIOMMUContext *ctx; 2548 void *ref; 2549 IOMMUTLBEntry iotlb = { 2550 .iova = addr, 2551 .target_as = as->iommu->target_as, 2552 .addr_mask = ~0ULL, 2553 .perm = flag, 2554 }; 2555 2556 ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); 2557 if (ctx == NULL) { 2558 /* Translation disabled or invalid. */ 2559 iotlb.addr_mask = 0; 2560 iotlb.perm = IOMMU_NONE; 2561 } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) { 2562 /* Translation disabled or fault reported. */ 2563 iotlb.addr_mask = 0; 2564 iotlb.perm = IOMMU_NONE; 2565 } 2566 2567 /* Trace all dma translations with original access flags. */ 2568 trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), 2569 PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, 2570 IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, 2571 iotlb.translated_addr); 2572 2573 riscv_iommu_ctx_put(as->iommu, ref); 2574 2575 return iotlb; 2576 } 2577 2578 static int riscv_iommu_memory_region_notify( 2579 IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, 2580 IOMMUNotifierFlag new, Error **errp) 2581 { 2582 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2583 2584 if (old == IOMMU_NOTIFIER_NONE) { 2585 as->notifier = true; 2586 trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); 2587 } else if (new == IOMMU_NOTIFIER_NONE) { 2588 as->notifier = false; 2589 trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); 2590 } 2591 2592 return 0; 2593 } 2594 2595 static inline bool pci_is_iommu(PCIDevice *pdev) 2596 { 2597 return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; 2598 } 2599 2600 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) 2601 { 2602 RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; 2603 PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); 2604 AddressSpace *as = NULL; 2605 2606 if (pdev && pci_is_iommu(pdev)) { 2607 return s->target_as; 2608 } 2609 2610 /* Find first registered IOMMU device */ 2611 while (s->iommus.le_prev) { 2612 s = *(s->iommus.le_prev); 2613 } 2614 2615 /* Find first matching IOMMU */ 2616 while (s != NULL && as == NULL) { 2617 as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); 2618 s = s->iommus.le_next; 2619 } 2620 2621 return as ? as : &address_space_memory; 2622 } 2623 2624 static const PCIIOMMUOps riscv_iommu_ops = { 2625 .get_address_space = riscv_iommu_find_as, 2626 }; 2627 2628 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, 2629 Error **errp) 2630 { 2631 if (bus->iommu_ops && 2632 bus->iommu_ops->get_address_space == riscv_iommu_find_as) { 2633 /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ 2634 RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; 2635 QLIST_INSERT_AFTER(last, iommu, iommus); 2636 } else if (!bus->iommu_ops && !bus->iommu_opaque) { 2637 pci_setup_iommu(bus, &riscv_iommu_ops, iommu); 2638 } else { 2639 error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", 2640 pci_bus_num(bus)); 2641 } 2642 } 2643 2644 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, 2645 MemTxAttrs attrs) 2646 { 2647 return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; 2648 } 2649 2650 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) 2651 { 2652 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2653 return 1 << as->iommu->pid_bits; 2654 } 2655 2656 static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data) 2657 { 2658 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 2659 2660 imrc->translate = riscv_iommu_memory_region_translate; 2661 imrc->notify_flag_changed = riscv_iommu_memory_region_notify; 2662 imrc->attrs_to_index = riscv_iommu_memory_region_index; 2663 imrc->num_indexes = riscv_iommu_memory_region_index_len; 2664 } 2665 2666 static const TypeInfo riscv_iommu_memory_region_info = { 2667 .parent = TYPE_IOMMU_MEMORY_REGION, 2668 .name = TYPE_RISCV_IOMMU_MEMORY_REGION, 2669 .class_init = riscv_iommu_memory_region_init, 2670 }; 2671 2672 static void riscv_iommu_register_mr_types(void) 2673 { 2674 type_register_static(&riscv_iommu_memory_region_info); 2675 type_register_static(&riscv_iommu_info); 2676 } 2677 2678 type_init(riscv_iommu_register_mr_types); 2679