1 /* 2 * QEMU emulation of an RISC-V IOMMU 3 * 4 * Copyright (C) 2021-2023, Rivos Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qom/object.h" 21 #include "hw/pci/pci_bus.h" 22 #include "hw/pci/pci_device.h" 23 #include "hw/qdev-properties.h" 24 #include "hw/riscv/riscv_hart.h" 25 #include "migration/vmstate.h" 26 #include "qapi/error.h" 27 #include "qemu/timer.h" 28 29 #include "cpu_bits.h" 30 #include "riscv-iommu.h" 31 #include "riscv-iommu-bits.h" 32 #include "trace.h" 33 34 #define LIMIT_CACHE_CTX (1U << 7) 35 #define LIMIT_CACHE_IOT (1U << 20) 36 37 /* Physical page number coversions */ 38 #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) 39 #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) 40 41 typedef struct RISCVIOMMUContext RISCVIOMMUContext; 42 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; 43 44 /* Device assigned I/O address space */ 45 struct RISCVIOMMUSpace { 46 IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ 47 AddressSpace iova_as; /* IOVA address space for attached device */ 48 RISCVIOMMUState *iommu; /* Managing IOMMU device state */ 49 uint32_t devid; /* Requester identifier, AKA device_id */ 50 bool notifier; /* IOMMU unmap notifier enabled */ 51 QLIST_ENTRY(RISCVIOMMUSpace) list; 52 }; 53 54 /* Device translation context state. */ 55 struct RISCVIOMMUContext { 56 uint64_t devid:24; /* Requester Id, AKA device_id */ 57 uint64_t process_id:20; /* Process ID. PASID for PCIe */ 58 uint64_t tc; /* Translation Control */ 59 uint64_t ta; /* Translation Attributes */ 60 uint64_t satp; /* S-Stage address translation and protection */ 61 uint64_t gatp; /* G-Stage address translation and protection */ 62 uint64_t msi_addr_mask; /* MSI filtering - address mask */ 63 uint64_t msi_addr_pattern; /* MSI filtering - address pattern */ 64 uint64_t msiptp; /* MSI redirection page table pointer */ 65 }; 66 67 /* Address translation cache entry */ 68 struct RISCVIOMMUEntry { 69 uint64_t iova:44; /* IOVA Page Number */ 70 uint64_t pscid:20; /* Process Soft-Context identifier */ 71 uint64_t phys:44; /* Physical Page Number */ 72 uint64_t gscid:16; /* Guest Soft-Context identifier */ 73 uint64_t perm:2; /* IOMMU_RW flags */ 74 }; 75 76 /* IOMMU index for transactions without process_id specified. */ 77 #define RISCV_IOMMU_NOPROCID 0 78 79 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) 80 { 81 switch (vec_type) { 82 case RISCV_IOMMU_INTR_CQ: 83 return icvec & RISCV_IOMMU_ICVEC_CIV; 84 case RISCV_IOMMU_INTR_FQ: 85 return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; 86 case RISCV_IOMMU_INTR_PM: 87 return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; 88 case RISCV_IOMMU_INTR_PQ: 89 return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; 90 default: 91 g_assert_not_reached(); 92 } 93 } 94 95 static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) 96 { 97 uint32_t ipsr, icvec, vector; 98 99 if (!s->notify) { 100 return; 101 } 102 103 icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); 104 ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); 105 106 if (!(ipsr & (1 << vec_type))) { 107 vector = riscv_iommu_get_icvec_vector(icvec, vec_type); 108 s->notify(s, vector); 109 trace_riscv_iommu_notify_int_vector(vec_type, vector); 110 } 111 } 112 113 static void riscv_iommu_fault(RISCVIOMMUState *s, 114 struct riscv_iommu_fq_record *ev) 115 { 116 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 117 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; 118 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; 119 uint32_t next = (tail + 1) & s->fq_mask; 120 uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); 121 122 trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 123 PCI_FUNC(devid), ev->hdr, ev->iotval); 124 125 if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || 126 !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { 127 return; 128 } 129 130 if (head == next) { 131 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 132 RISCV_IOMMU_FQCSR_FQOF, 0); 133 } else { 134 dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); 135 if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), 136 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 137 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 138 RISCV_IOMMU_FQCSR_FQMF, 0); 139 } else { 140 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); 141 } 142 } 143 144 if (ctrl & RISCV_IOMMU_FQCSR_FIE) { 145 riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); 146 } 147 } 148 149 static void riscv_iommu_pri(RISCVIOMMUState *s, 150 struct riscv_iommu_pq_record *pr) 151 { 152 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 153 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; 154 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; 155 uint32_t next = (tail + 1) & s->pq_mask; 156 uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); 157 158 trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 159 PCI_FUNC(devid), pr->payload); 160 161 if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || 162 !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { 163 return; 164 } 165 166 if (head == next) { 167 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 168 RISCV_IOMMU_PQCSR_PQOF, 0); 169 } else { 170 dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); 171 if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), 172 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 173 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 174 RISCV_IOMMU_PQCSR_PQMF, 0); 175 } else { 176 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); 177 } 178 } 179 180 if (ctrl & RISCV_IOMMU_PQCSR_PIE) { 181 riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); 182 } 183 } 184 185 /* 186 * Discards all bits from 'val' whose matching bits in the same 187 * positions in the mask 'ext' are zeros, and packs the remaining 188 * bits from 'val' contiguously at the least-significant end of the 189 * result, keeping the same bit order as 'val' and filling any 190 * other bits at the most-significant end of the result with zeros. 191 * 192 * For example, for the following 'val' and 'ext', the return 'ret' 193 * will be: 194 * 195 * val = a b c d e f g h 196 * ext = 1 0 1 0 0 1 1 0 197 * ret = 0 0 0 0 a c f g 198 * 199 * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3 200 * "Process to translate addresses of MSIs", is similar to bit manip 201 * function PEXT (Parallel bits extract) from x86. 202 */ 203 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext) 204 { 205 uint64_t ret = 0; 206 uint64_t rot = 1; 207 208 while (ext) { 209 if (ext & 1) { 210 if (val & 1) { 211 ret |= rot; 212 } 213 rot <<= 1; 214 } 215 val >>= 1; 216 ext >>= 1; 217 } 218 219 return ret; 220 } 221 222 /* Check if GPA matches MSI/MRIF pattern. */ 223 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 224 dma_addr_t gpa) 225 { 226 if (!s->enable_msi) { 227 return false; 228 } 229 230 if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != 231 RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 232 return false; /* Invalid MSI/MRIF mode */ 233 } 234 235 if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { 236 return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ 237 } 238 239 return true; 240 } 241 242 /* 243 * RISCV IOMMU Address Translation Lookup - Page Table Walk 244 * 245 * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c 246 * Both implementation can be merged into single helper function in future. 247 * Keeping them separate for now, as error reporting and flow specifics are 248 * sufficiently different for separate implementation. 249 * 250 * @s : IOMMU Device State 251 * @ctx : Translation context for device id and process address space id. 252 * @iotlb : translation data: physical address and access mode. 253 * @return : success or fault cause code. 254 */ 255 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 256 IOMMUTLBEntry *iotlb) 257 { 258 dma_addr_t addr, base; 259 uint64_t satp, gatp, pte; 260 bool en_s, en_g; 261 struct { 262 unsigned char step; 263 unsigned char levels; 264 unsigned char ptidxbits; 265 unsigned char ptesize; 266 } sc[2]; 267 /* Translation stage phase */ 268 enum { 269 S_STAGE = 0, 270 G_STAGE = 1, 271 } pass; 272 MemTxResult ret; 273 274 satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 275 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 276 277 en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; 278 en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; 279 280 /* 281 * Early check for MSI address match when IOVA == GPA. 282 * Note that the (!en_s) condition means that the MSI 283 * page table may only be used when guest pages are 284 * mapped using the g-stage page table, whether single- 285 * or two-stage paging is enabled. It's unavoidable though, 286 * because the spec mandates that we do a first-stage 287 * translation before we check the MSI page table, which 288 * means we can't do an early MSI check unless we have 289 * strictly !en_s. 290 */ 291 if (!en_s && (iotlb->perm & IOMMU_WO) && 292 riscv_iommu_msi_check(s, ctx, iotlb->iova)) { 293 iotlb->target_as = &s->trap_as; 294 iotlb->translated_addr = iotlb->iova; 295 iotlb->addr_mask = ~TARGET_PAGE_MASK; 296 return 0; 297 } 298 299 /* Exit early for pass-through mode. */ 300 if (!(en_s || en_g)) { 301 iotlb->translated_addr = iotlb->iova; 302 iotlb->addr_mask = ~TARGET_PAGE_MASK; 303 /* Allow R/W in pass-through mode */ 304 iotlb->perm = IOMMU_RW; 305 return 0; 306 } 307 308 /* S/G translation parameters. */ 309 for (pass = 0; pass < 2; pass++) { 310 uint32_t sv_mode; 311 312 sc[pass].step = 0; 313 if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : 314 (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { 315 /* 32bit mode for GXL/SXL == 1 */ 316 switch (pass ? gatp : satp) { 317 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 318 sc[pass].levels = 0; 319 sc[pass].ptidxbits = 0; 320 sc[pass].ptesize = 0; 321 break; 322 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: 323 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; 324 if (!(s->cap & sv_mode)) { 325 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 326 } 327 sc[pass].levels = 2; 328 sc[pass].ptidxbits = 10; 329 sc[pass].ptesize = 4; 330 break; 331 default: 332 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 333 } 334 } else { 335 /* 64bit mode for GXL/SXL == 0 */ 336 switch (pass ? gatp : satp) { 337 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 338 sc[pass].levels = 0; 339 sc[pass].ptidxbits = 0; 340 sc[pass].ptesize = 0; 341 break; 342 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: 343 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; 344 if (!(s->cap & sv_mode)) { 345 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 346 } 347 sc[pass].levels = 3; 348 sc[pass].ptidxbits = 9; 349 sc[pass].ptesize = 8; 350 break; 351 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: 352 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; 353 if (!(s->cap & sv_mode)) { 354 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 355 } 356 sc[pass].levels = 4; 357 sc[pass].ptidxbits = 9; 358 sc[pass].ptesize = 8; 359 break; 360 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: 361 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; 362 if (!(s->cap & sv_mode)) { 363 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 364 } 365 sc[pass].levels = 5; 366 sc[pass].ptidxbits = 9; 367 sc[pass].ptesize = 8; 368 break; 369 default: 370 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 371 } 372 } 373 }; 374 375 /* S/G stages translation tables root pointers */ 376 gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); 377 satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); 378 addr = (en_s && en_g) ? satp : iotlb->iova; 379 base = en_g ? gatp : satp; 380 pass = en_g ? G_STAGE : S_STAGE; 381 382 do { 383 const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; 384 const unsigned va_bits = widened + sc[pass].ptidxbits; 385 const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * 386 (sc[pass].levels - 1 - sc[pass].step); 387 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); 388 const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; 389 const bool ade = 390 ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); 391 392 /* Address range check before first level lookup */ 393 if (!sc[pass].step) { 394 const uint64_t va_len = va_skip + va_bits; 395 const uint64_t va_mask = (1ULL << va_len) - 1; 396 397 if (pass == S_STAGE && va_len > 32) { 398 target_ulong mask, masked_msbs; 399 400 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1; 401 masked_msbs = (addr >> (va_len - 1)) & mask; 402 403 if (masked_msbs != 0 && masked_msbs != mask) { 404 return (iotlb->perm & IOMMU_WO) ? 405 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S : 406 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S; 407 } 408 } else { 409 if ((addr & va_mask) != addr) { 410 return (iotlb->perm & IOMMU_WO) ? 411 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 412 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS; 413 } 414 } 415 } 416 417 /* Read page table entry */ 418 if (sc[pass].ptesize == 4) { 419 uint32_t pte32 = 0; 420 ret = ldl_le_dma(s->target_as, pte_addr, &pte32, 421 MEMTXATTRS_UNSPECIFIED); 422 pte = pte32; 423 } else { 424 ret = ldq_le_dma(s->target_as, pte_addr, &pte, 425 MEMTXATTRS_UNSPECIFIED); 426 } 427 if (ret != MEMTX_OK) { 428 return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT 429 : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; 430 } 431 432 sc[pass].step++; 433 hwaddr ppn = pte >> PTE_PPN_SHIFT; 434 435 if (!(pte & PTE_V)) { 436 break; /* Invalid PTE */ 437 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { 438 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ 439 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { 440 break; /* Reserved leaf PTE flags: PTE_W */ 441 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { 442 break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ 443 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { 444 break; /* Misaligned PPN */ 445 } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { 446 break; /* Read access check failed */ 447 } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { 448 break; /* Write access check failed */ 449 } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { 450 break; /* Access bit not set */ 451 } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { 452 break; /* Dirty bit not set */ 453 } else { 454 /* Leaf PTE, translation completed. */ 455 sc[pass].step = sc[pass].levels; 456 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); 457 /* Update address mask based on smallest translation granularity */ 458 iotlb->addr_mask &= (1ULL << va_skip) - 1; 459 /* Continue with S-Stage translation? */ 460 if (pass && sc[0].step != sc[0].levels) { 461 pass = S_STAGE; 462 addr = iotlb->iova; 463 continue; 464 } 465 /* Translation phase completed (GPA or SPA) */ 466 iotlb->translated_addr = base; 467 iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) 468 : IOMMU_RO; 469 470 /* Check MSI GPA address match */ 471 if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && 472 riscv_iommu_msi_check(s, ctx, base)) { 473 /* Trap MSI writes and return GPA address. */ 474 iotlb->target_as = &s->trap_as; 475 iotlb->addr_mask = ~TARGET_PAGE_MASK; 476 return 0; 477 } 478 479 /* Continue with G-Stage translation? */ 480 if (!pass && en_g) { 481 pass = G_STAGE; 482 addr = base; 483 base = gatp; 484 sc[pass].step = 0; 485 continue; 486 } 487 488 return 0; 489 } 490 491 if (sc[pass].step == sc[pass].levels) { 492 break; /* Can't find leaf PTE */ 493 } 494 495 /* Continue with G-Stage translation? */ 496 if (!pass && en_g) { 497 pass = G_STAGE; 498 addr = base; 499 base = gatp; 500 sc[pass].step = 0; 501 } 502 } while (1); 503 504 return (iotlb->perm & IOMMU_WO) ? 505 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 506 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : 507 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : 508 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); 509 } 510 511 static void riscv_iommu_report_fault(RISCVIOMMUState *s, 512 RISCVIOMMUContext *ctx, 513 uint32_t fault_type, uint32_t cause, 514 bool pv, 515 uint64_t iotval, uint64_t iotval2) 516 { 517 struct riscv_iommu_fq_record ev = { 0 }; 518 519 if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { 520 switch (cause) { 521 case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: 522 case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: 523 case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: 524 case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: 525 case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: 526 case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: 527 case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: 528 break; 529 default: 530 /* DTF prevents reporting a fault for this given cause */ 531 return; 532 } 533 } 534 535 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); 536 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); 537 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); 538 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); 539 540 if (pv) { 541 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); 542 } 543 544 ev.iotval = iotval; 545 ev.iotval2 = iotval2; 546 547 riscv_iommu_fault(s, &ev); 548 } 549 550 /* Redirect MSI write for given GPA. */ 551 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, 552 RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, 553 unsigned size, MemTxAttrs attrs) 554 { 555 MemTxResult res; 556 dma_addr_t addr; 557 uint64_t intn; 558 uint32_t n190; 559 uint64_t pte[2]; 560 int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 561 int cause; 562 563 /* Interrupt File Number */ 564 intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); 565 if (intn >= 256) { 566 /* Interrupt file number out of range */ 567 res = MEMTX_ACCESS_ERROR; 568 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 569 goto err; 570 } 571 572 /* fetch MSI PTE */ 573 addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); 574 addr = addr | (intn * sizeof(pte)); 575 res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), 576 MEMTXATTRS_UNSPECIFIED); 577 if (res != MEMTX_OK) { 578 if (res == MEMTX_DECODE_ERROR) { 579 cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; 580 } else { 581 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 582 } 583 goto err; 584 } 585 586 le64_to_cpus(&pte[0]); 587 le64_to_cpus(&pte[1]); 588 589 if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { 590 /* 591 * The spec mentions that: "If msipte.C == 1, then further 592 * processing to interpret the PTE is implementation 593 * defined.". We'll abort with cause = 262 for this 594 * case too. 595 */ 596 res = MEMTX_ACCESS_ERROR; 597 cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; 598 goto err; 599 } 600 601 switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { 602 case RISCV_IOMMU_MSI_PTE_M_BASIC: 603 /* MSI Pass-through mode */ 604 addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); 605 606 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 607 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 608 gpa, addr); 609 610 res = dma_memory_write(s->target_as, addr, &data, size, attrs); 611 if (res != MEMTX_OK) { 612 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 613 goto err; 614 } 615 616 return MEMTX_OK; 617 case RISCV_IOMMU_MSI_PTE_M_MRIF: 618 /* MRIF mode, continue. */ 619 break; 620 default: 621 res = MEMTX_ACCESS_ERROR; 622 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 623 goto err; 624 } 625 626 /* 627 * Report an error for interrupt identities exceeding the maximum allowed 628 * for an IMSIC interrupt file (2047) or destination address is not 32-bit 629 * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. 630 */ 631 if ((data > 2047) || (gpa & 3)) { 632 res = MEMTX_ACCESS_ERROR; 633 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 634 goto err; 635 } 636 637 /* MSI MRIF mode, non atomic pending bit update */ 638 639 /* MRIF pending bit address */ 640 addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; 641 addr = addr | ((data & 0x7c0) >> 3); 642 643 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 644 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 645 gpa, addr); 646 647 /* MRIF pending bit mask */ 648 data = 1ULL << (data & 0x03f); 649 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 650 if (res != MEMTX_OK) { 651 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 652 goto err; 653 } 654 655 intn = intn | data; 656 res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); 657 if (res != MEMTX_OK) { 658 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 659 goto err; 660 } 661 662 /* Get MRIF enable bits */ 663 addr = addr + sizeof(intn); 664 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 665 if (res != MEMTX_OK) { 666 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 667 goto err; 668 } 669 670 if (!(intn & data)) { 671 /* notification disabled, MRIF update completed. */ 672 return MEMTX_OK; 673 } 674 675 /* Send notification message */ 676 addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); 677 n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | 678 (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); 679 680 res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); 681 if (res != MEMTX_OK) { 682 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 683 goto err; 684 } 685 686 trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); 687 688 return MEMTX_OK; 689 690 err: 691 riscv_iommu_report_fault(s, ctx, fault_type, cause, 692 !!ctx->process_id, 0, 0); 693 return res; 694 } 695 696 /* 697 * Check device context configuration as described by the 698 * riscv-iommu spec section "Device-context configuration 699 * checks". 700 */ 701 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, 702 RISCVIOMMUContext *ctx) 703 { 704 uint32_t fsc_mode, msi_mode; 705 uint64_t gatp; 706 707 if (!(s->cap & RISCV_IOMMU_CAP_ATS) && 708 (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS || 709 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI || 710 ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) { 711 return false; 712 } 713 714 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) && 715 (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA || 716 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) { 717 return false; 718 } 719 720 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && 721 ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { 722 return false; 723 } 724 725 if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && 726 ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { 727 return false; 728 } 729 730 if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { 731 msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); 732 733 if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && 734 msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 735 return false; 736 } 737 } 738 739 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 740 if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA && 741 gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { 742 return false; 743 } 744 745 fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 746 747 if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { 748 switch (fsc_mode) { 749 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 750 if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { 751 return false; 752 } 753 break; 754 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 755 if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { 756 return false; 757 } 758 break; 759 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 760 if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { 761 return false; 762 } 763 break; 764 } 765 } else { 766 /* DC.tc.PDTV is 0 */ 767 if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { 768 return false; 769 } 770 771 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 772 if (fsc_mode == RISCV_IOMMU_CAP_SV32 && 773 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 774 return false; 775 } 776 } else { 777 switch (fsc_mode) { 778 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 779 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 780 return false; 781 } 782 break; 783 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 784 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 785 return false; 786 } 787 break; 788 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 789 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 790 return false; 791 } 792 break; 793 } 794 } 795 } 796 797 /* 798 * CAP_END is always zero (only one endianess). FCTL_BE is 799 * always zero (little-endian accesses). Thus TC_SBE must 800 * always be LE, i.e. zero. 801 */ 802 if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { 803 return false; 804 } 805 806 return true; 807 } 808 809 /* 810 * Validate process context (PC) according to section 811 * "Process-context configuration checks". 812 */ 813 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, 814 RISCVIOMMUContext *ctx) 815 { 816 uint32_t mode; 817 818 if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { 819 return false; 820 } 821 822 if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { 823 return false; 824 } 825 826 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 827 switch (mode) { 828 case RISCV_IOMMU_DC_FSC_MODE_BARE: 829 /* sv39 and sv32 modes have the same value (8) */ 830 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 831 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 832 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 833 break; 834 default: 835 return false; 836 } 837 838 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 839 if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 && 840 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 841 return false; 842 } 843 } else { 844 switch (mode) { 845 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 846 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 847 return false; 848 } 849 break; 850 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 851 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 852 return false; 853 } 854 break; 855 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 856 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 857 return false; 858 } 859 break; 860 } 861 } 862 863 return true; 864 } 865 866 /* 867 * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk 868 * 869 * @s : IOMMU Device State 870 * @ctx : Device Translation Context with devid and process_id set. 871 * @return : success or fault code. 872 */ 873 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) 874 { 875 const uint64_t ddtp = s->ddtp; 876 unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); 877 dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); 878 struct riscv_iommu_dc dc; 879 /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ 880 const int dc_fmt = !s->enable_msi; 881 const size_t dc_len = sizeof(dc) >> dc_fmt; 882 int depth; 883 uint64_t de; 884 885 switch (mode) { 886 case RISCV_IOMMU_DDTP_MODE_OFF: 887 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; 888 889 case RISCV_IOMMU_DDTP_MODE_BARE: 890 /* mock up pass-through translation context */ 891 ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 892 RISCV_IOMMU_DC_IOHGATP_MODE_BARE); 893 ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 894 RISCV_IOMMU_DC_FSC_MODE_BARE); 895 896 ctx->tc = RISCV_IOMMU_DC_TC_V; 897 if (s->enable_ats) { 898 ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS; 899 } 900 901 ctx->ta = 0; 902 ctx->msiptp = 0; 903 return 0; 904 905 case RISCV_IOMMU_DDTP_MODE_1LVL: 906 depth = 0; 907 break; 908 909 case RISCV_IOMMU_DDTP_MODE_2LVL: 910 depth = 1; 911 break; 912 913 case RISCV_IOMMU_DDTP_MODE_3LVL: 914 depth = 2; 915 break; 916 917 default: 918 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 919 } 920 921 /* 922 * Check supported device id width (in bits). 923 * See IOMMU Specification, Chapter 6. Software guidelines. 924 * - if extended device-context format is used: 925 * 1LVL: 6, 2LVL: 15, 3LVL: 24 926 * - if base device-context format is used: 927 * 1LVL: 7, 2LVL: 16, 3LVL: 24 928 */ 929 if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { 930 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 931 } 932 933 /* Device directory tree walk */ 934 for (; depth-- > 0; ) { 935 /* 936 * Select device id index bits based on device directory tree level 937 * and device context format. 938 * See IOMMU Specification, Chapter 2. Data Structures. 939 * - if extended device-context format is used: 940 * device index: [23:15][14:6][5:0] 941 * - if base device-context format is used: 942 * device index: [23:16][15:7][6:0] 943 */ 944 const int split = depth * 9 + 6 + dc_fmt; 945 addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; 946 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 947 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 948 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 949 } 950 le64_to_cpus(&de); 951 if (!(de & RISCV_IOMMU_DDTE_VALID)) { 952 /* invalid directory entry */ 953 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 954 } 955 if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { 956 /* reserved bits set */ 957 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 958 } 959 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); 960 } 961 962 /* index into device context entry page */ 963 addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; 964 965 memset(&dc, 0, sizeof(dc)); 966 if (dma_memory_read(s->target_as, addr, &dc, dc_len, 967 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 968 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 969 } 970 971 /* Set translation context. */ 972 ctx->tc = le64_to_cpu(dc.tc); 973 ctx->gatp = le64_to_cpu(dc.iohgatp); 974 ctx->satp = le64_to_cpu(dc.fsc); 975 ctx->ta = le64_to_cpu(dc.ta); 976 ctx->msiptp = le64_to_cpu(dc.msiptp); 977 ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); 978 ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); 979 980 if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { 981 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 982 } 983 984 if (!riscv_iommu_validate_device_ctx(s, ctx)) { 985 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 986 } 987 988 /* FSC field checks */ 989 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 990 addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); 991 992 if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { 993 if (ctx->process_id != RISCV_IOMMU_NOPROCID) { 994 /* PID is disabled */ 995 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 996 } 997 if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { 998 /* Invalid translation mode */ 999 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 1000 } 1001 return 0; 1002 } 1003 1004 if (ctx->process_id == RISCV_IOMMU_NOPROCID) { 1005 if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { 1006 /* No default process_id enabled, set BARE mode */ 1007 ctx->satp = 0ULL; 1008 return 0; 1009 } else { 1010 /* Use default process_id #0 */ 1011 ctx->process_id = 0; 1012 } 1013 } 1014 1015 if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1016 /* No S-Stage translation, done. */ 1017 return 0; 1018 } 1019 1020 /* FSC.TC.PDTV enabled */ 1021 if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { 1022 /* Invalid PDTP.MODE */ 1023 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1024 } 1025 1026 for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { 1027 /* 1028 * Select process id index bits based on process directory tree 1029 * level. See IOMMU Specification, 2.2. Process-Directory-Table. 1030 */ 1031 const int split = depth * 9 + 8; 1032 addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; 1033 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 1034 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1035 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1036 } 1037 le64_to_cpus(&de); 1038 if (!(de & RISCV_IOMMU_PC_TA_V)) { 1039 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1040 } 1041 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN)); 1042 } 1043 1044 /* Leaf entry in PDT */ 1045 addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; 1046 if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, 1047 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1048 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1049 } 1050 1051 /* Use FSC and TA from process directory entry. */ 1052 ctx->ta = le64_to_cpu(dc.ta); 1053 ctx->satp = le64_to_cpu(dc.fsc); 1054 1055 if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { 1056 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1057 } 1058 1059 if (!riscv_iommu_validate_process_ctx(s, ctx)) { 1060 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1061 } 1062 1063 return 0; 1064 } 1065 1066 /* Translation Context cache support */ 1067 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) 1068 { 1069 RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; 1070 RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; 1071 return c1->devid == c2->devid && 1072 c1->process_id == c2->process_id; 1073 } 1074 1075 static guint riscv_iommu_ctx_hash(gconstpointer v) 1076 { 1077 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; 1078 /* 1079 * Generate simple hash of (process_id, devid) 1080 * assuming 24-bit wide devid. 1081 */ 1082 return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); 1083 } 1084 1085 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, 1086 gpointer data) 1087 { 1088 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1089 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1090 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1091 ctx->devid == arg->devid && 1092 ctx->process_id == arg->process_id) { 1093 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1094 } 1095 } 1096 1097 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, 1098 gpointer data) 1099 { 1100 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1101 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1102 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1103 ctx->devid == arg->devid) { 1104 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1105 } 1106 } 1107 1108 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, 1109 gpointer data) 1110 { 1111 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1112 if (ctx->tc & RISCV_IOMMU_DC_TC_V) { 1113 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1114 } 1115 } 1116 1117 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, 1118 uint32_t devid, uint32_t process_id) 1119 { 1120 GHashTable *ctx_cache; 1121 RISCVIOMMUContext key = { 1122 .devid = devid, 1123 .process_id = process_id, 1124 }; 1125 ctx_cache = g_hash_table_ref(s->ctx_cache); 1126 g_hash_table_foreach(ctx_cache, func, &key); 1127 g_hash_table_unref(ctx_cache); 1128 } 1129 1130 /* Find or allocate translation context for a given {device_id, process_id} */ 1131 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, 1132 unsigned devid, unsigned process_id, 1133 void **ref) 1134 { 1135 GHashTable *ctx_cache; 1136 RISCVIOMMUContext *ctx; 1137 RISCVIOMMUContext key = { 1138 .devid = devid, 1139 .process_id = process_id, 1140 }; 1141 1142 ctx_cache = g_hash_table_ref(s->ctx_cache); 1143 ctx = g_hash_table_lookup(ctx_cache, &key); 1144 1145 if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { 1146 *ref = ctx_cache; 1147 return ctx; 1148 } 1149 1150 ctx = g_new0(RISCVIOMMUContext, 1); 1151 ctx->devid = devid; 1152 ctx->process_id = process_id; 1153 1154 int fault = riscv_iommu_ctx_fetch(s, ctx); 1155 if (!fault) { 1156 if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { 1157 g_hash_table_unref(ctx_cache); 1158 ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 1159 riscv_iommu_ctx_equal, 1160 g_free, NULL); 1161 g_hash_table_ref(ctx_cache); 1162 g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); 1163 } 1164 g_hash_table_add(ctx_cache, ctx); 1165 *ref = ctx_cache; 1166 return ctx; 1167 } 1168 1169 g_hash_table_unref(ctx_cache); 1170 *ref = NULL; 1171 1172 riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, 1173 fault, !!process_id, 0, 0); 1174 1175 g_free(ctx); 1176 return NULL; 1177 } 1178 1179 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) 1180 { 1181 if (ref) { 1182 g_hash_table_unref((GHashTable *)ref); 1183 } 1184 } 1185 1186 /* Find or allocate address space for a given device */ 1187 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) 1188 { 1189 RISCVIOMMUSpace *as; 1190 1191 /* FIXME: PCIe bus remapping for attached endpoints. */ 1192 devid |= s->bus << 8; 1193 1194 QLIST_FOREACH(as, &s->spaces, list) { 1195 if (as->devid == devid) { 1196 break; 1197 } 1198 } 1199 1200 if (as == NULL) { 1201 char name[64]; 1202 as = g_new0(RISCVIOMMUSpace, 1); 1203 1204 as->iommu = s; 1205 as->devid = devid; 1206 1207 snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", 1208 PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1209 1210 /* IOVA address space, untranslated addresses */ 1211 memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), 1212 TYPE_RISCV_IOMMU_MEMORY_REGION, 1213 OBJECT(as), "riscv_iommu", UINT64_MAX); 1214 address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); 1215 1216 QLIST_INSERT_HEAD(&s->spaces, as, list); 1217 1218 trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), 1219 PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1220 } 1221 return &as->iova_as; 1222 } 1223 1224 /* Translation Object cache support */ 1225 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2) 1226 { 1227 RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1; 1228 RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2; 1229 return t1->gscid == t2->gscid && t1->pscid == t2->pscid && 1230 t1->iova == t2->iova; 1231 } 1232 1233 static guint riscv_iommu_iot_hash(gconstpointer v) 1234 { 1235 RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v; 1236 return (guint)t->iova; 1237 } 1238 1239 /* GV: 1 PSCV: 1 AV: 1 */ 1240 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value, 1241 gpointer data) 1242 { 1243 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1244 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1245 if (iot->gscid == arg->gscid && 1246 iot->pscid == arg->pscid && 1247 iot->iova == arg->iova) { 1248 iot->perm = IOMMU_NONE; 1249 } 1250 } 1251 1252 /* GV: 1 PSCV: 1 AV: 0 */ 1253 static void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, 1254 gpointer data) 1255 { 1256 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1257 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1258 if (iot->gscid == arg->gscid && 1259 iot->pscid == arg->pscid) { 1260 iot->perm = IOMMU_NONE; 1261 } 1262 } 1263 1264 /* GV: 1 GVMA: 1 */ 1265 static void riscv_iommu_iot_inval_gscid_gpa(gpointer key, gpointer value, 1266 gpointer data) 1267 { 1268 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1269 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1270 if (iot->gscid == arg->gscid) { 1271 /* simplified cache, no GPA matching */ 1272 iot->perm = IOMMU_NONE; 1273 } 1274 } 1275 1276 /* GV: 1 GVMA: 0 */ 1277 static void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, 1278 gpointer data) 1279 { 1280 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1281 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1282 if (iot->gscid == arg->gscid) { 1283 iot->perm = IOMMU_NONE; 1284 } 1285 } 1286 1287 /* GV: 0 */ 1288 static void riscv_iommu_iot_inval_all(gpointer key, gpointer value, 1289 gpointer data) 1290 { 1291 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1292 iot->perm = IOMMU_NONE; 1293 } 1294 1295 /* caller should keep ref-count for iot_cache object */ 1296 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx, 1297 GHashTable *iot_cache, hwaddr iova) 1298 { 1299 RISCVIOMMUEntry key = { 1300 .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID), 1301 .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID), 1302 .iova = PPN_DOWN(iova), 1303 }; 1304 return g_hash_table_lookup(iot_cache, &key); 1305 } 1306 1307 /* caller should keep ref-count for iot_cache object */ 1308 static void riscv_iommu_iot_update(RISCVIOMMUState *s, 1309 GHashTable *iot_cache, RISCVIOMMUEntry *iot) 1310 { 1311 if (!s->iot_limit) { 1312 return; 1313 } 1314 1315 if (g_hash_table_size(s->iot_cache) >= s->iot_limit) { 1316 iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 1317 riscv_iommu_iot_equal, 1318 g_free, NULL); 1319 g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache)); 1320 } 1321 g_hash_table_add(iot_cache, iot); 1322 } 1323 1324 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func, 1325 uint32_t gscid, uint32_t pscid, hwaddr iova) 1326 { 1327 GHashTable *iot_cache; 1328 RISCVIOMMUEntry key = { 1329 .gscid = gscid, 1330 .pscid = pscid, 1331 .iova = PPN_DOWN(iova), 1332 }; 1333 1334 iot_cache = g_hash_table_ref(s->iot_cache); 1335 g_hash_table_foreach(iot_cache, func, &key); 1336 g_hash_table_unref(iot_cache); 1337 } 1338 1339 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 1340 IOMMUTLBEntry *iotlb, bool enable_cache) 1341 { 1342 RISCVIOMMUEntry *iot; 1343 IOMMUAccessFlags perm; 1344 bool enable_pid; 1345 bool enable_pri; 1346 GHashTable *iot_cache; 1347 int fault; 1348 1349 iot_cache = g_hash_table_ref(s->iot_cache); 1350 /* 1351 * TC[32] is reserved for custom extensions, used here to temporarily 1352 * enable automatic page-request generation for ATS queries. 1353 */ 1354 enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); 1355 enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); 1356 1357 /* Check for ATS request. */ 1358 if (iotlb->perm == IOMMU_NONE) { 1359 /* Check if ATS is disabled. */ 1360 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { 1361 enable_pri = false; 1362 fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1363 goto done; 1364 } 1365 } 1366 1367 iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova); 1368 perm = iot ? iot->perm : IOMMU_NONE; 1369 if (perm != IOMMU_NONE) { 1370 iotlb->translated_addr = PPN_PHYS(iot->phys); 1371 iotlb->addr_mask = ~TARGET_PAGE_MASK; 1372 iotlb->perm = perm; 1373 fault = 0; 1374 goto done; 1375 } 1376 1377 /* Translate using device directory / page table information. */ 1378 fault = riscv_iommu_spa_fetch(s, ctx, iotlb); 1379 1380 if (!fault && iotlb->target_as == &s->trap_as) { 1381 /* Do not cache trapped MSI translations */ 1382 goto done; 1383 } 1384 1385 /* 1386 * We made an implementation choice to not cache identity-mapped 1387 * translations, as allowed by the specification, to avoid 1388 * translation cache evictions for other devices sharing the 1389 * IOMMU hardware model. 1390 */ 1391 if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) { 1392 iot = g_new0(RISCVIOMMUEntry, 1); 1393 iot->iova = PPN_DOWN(iotlb->iova); 1394 iot->phys = PPN_DOWN(iotlb->translated_addr); 1395 iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); 1396 iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); 1397 iot->perm = iotlb->perm; 1398 riscv_iommu_iot_update(s, iot_cache, iot); 1399 } 1400 1401 done: 1402 g_hash_table_unref(iot_cache); 1403 1404 if (enable_pri && fault) { 1405 struct riscv_iommu_pq_record pr = {0}; 1406 if (enable_pid) { 1407 pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, 1408 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); 1409 } 1410 pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); 1411 pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | 1412 RISCV_IOMMU_PREQ_PAYLOAD_M; 1413 riscv_iommu_pri(s, &pr); 1414 return fault; 1415 } 1416 1417 if (fault) { 1418 unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ; 1419 1420 if (iotlb->perm & IOMMU_RW) { 1421 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 1422 } else if (iotlb->perm & IOMMU_RO) { 1423 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; 1424 } 1425 1426 riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, 1427 iotlb->iova, iotlb->translated_addr); 1428 return fault; 1429 } 1430 1431 return 0; 1432 } 1433 1434 /* IOMMU Command Interface */ 1435 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, 1436 uint64_t addr, uint32_t data) 1437 { 1438 /* 1439 * ATS processing in this implementation of the IOMMU is synchronous, 1440 * no need to wait for completions here. 1441 */ 1442 if (!notify) { 1443 return MEMTX_OK; 1444 } 1445 1446 return dma_memory_write(s->target_as, addr, &data, sizeof(data), 1447 MEMTXATTRS_UNSPECIFIED); 1448 } 1449 1450 static void riscv_iommu_ats(RISCVIOMMUState *s, 1451 struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag, 1452 IOMMUAccessFlags perm, 1453 void (*trace_fn)(const char *id)) 1454 { 1455 RISCVIOMMUSpace *as = NULL; 1456 IOMMUNotifier *n; 1457 IOMMUTLBEvent event; 1458 uint32_t pid; 1459 uint32_t devid; 1460 const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV; 1461 1462 if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) { 1463 /* Use device segment and requester id */ 1464 devid = get_field(cmd->dword0, 1465 RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID); 1466 } else { 1467 devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID); 1468 } 1469 1470 pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID); 1471 1472 QLIST_FOREACH(as, &s->spaces, list) { 1473 if (as->devid == devid) { 1474 break; 1475 } 1476 } 1477 1478 if (!as || !as->notifier) { 1479 return; 1480 } 1481 1482 event.type = flag; 1483 event.entry.perm = perm; 1484 event.entry.target_as = s->target_as; 1485 1486 IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) { 1487 if (!pv || n->iommu_idx == pid) { 1488 event.entry.iova = n->start; 1489 event.entry.addr_mask = n->end - n->start; 1490 trace_fn(as->iova_mr.parent_obj.name); 1491 memory_region_notify_iommu_one(n, &event); 1492 } 1493 } 1494 } 1495 1496 static void riscv_iommu_ats_inval(RISCVIOMMUState *s, 1497 struct riscv_iommu_command *cmd) 1498 { 1499 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE, 1500 trace_riscv_iommu_ats_inval); 1501 } 1502 1503 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s, 1504 struct riscv_iommu_command *cmd) 1505 { 1506 unsigned resp_code = get_field(cmd->dword1, 1507 RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE); 1508 1509 /* Using the access flag to carry response code information */ 1510 IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW; 1511 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm, 1512 trace_riscv_iommu_ats_prgr); 1513 } 1514 1515 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) 1516 { 1517 uint64_t old_ddtp = s->ddtp; 1518 uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); 1519 unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); 1520 unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); 1521 bool ok = false; 1522 1523 /* 1524 * Check for allowed DDTP.MODE transitions: 1525 * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} 1526 * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} 1527 */ 1528 if (new_mode == old_mode || 1529 new_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1530 new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { 1531 ok = true; 1532 } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || 1533 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || 1534 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { 1535 ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1536 old_mode == RISCV_IOMMU_DDTP_MODE_BARE; 1537 } 1538 1539 if (ok) { 1540 /* clear reserved and busy bits, report back sanitized version */ 1541 new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, 1542 RISCV_IOMMU_DDTP_MODE, new_mode); 1543 } else { 1544 new_ddtp = old_ddtp; 1545 } 1546 s->ddtp = new_ddtp; 1547 1548 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); 1549 } 1550 1551 /* Command function and opcode field. */ 1552 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) 1553 1554 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) 1555 { 1556 struct riscv_iommu_command cmd; 1557 MemTxResult res; 1558 dma_addr_t addr; 1559 uint32_t tail, head, ctrl; 1560 uint64_t cmd_opcode; 1561 GHFunc func; 1562 1563 ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1564 tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; 1565 head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; 1566 1567 /* Check for pending error or queue processing disabled */ 1568 if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || 1569 !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { 1570 return; 1571 } 1572 1573 while (tail != head) { 1574 addr = s->cq_addr + head * sizeof(cmd); 1575 res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), 1576 MEMTXATTRS_UNSPECIFIED); 1577 1578 if (res != MEMTX_OK) { 1579 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1580 RISCV_IOMMU_CQCSR_CQMF, 0); 1581 goto fault; 1582 } 1583 1584 trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); 1585 1586 cmd_opcode = get_field(cmd.dword0, 1587 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); 1588 1589 switch (cmd_opcode) { 1590 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, 1591 RISCV_IOMMU_CMD_IOFENCE_OPCODE): 1592 res = riscv_iommu_iofence(s, 1593 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, 1594 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); 1595 1596 if (res != MEMTX_OK) { 1597 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1598 RISCV_IOMMU_CQCSR_CQMF, 0); 1599 goto fault; 1600 } 1601 break; 1602 1603 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, 1604 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1605 if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) { 1606 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ 1607 goto cmd_ill; 1608 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) { 1609 /* invalidate all cache mappings */ 1610 func = riscv_iommu_iot_inval_all; 1611 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) { 1612 /* invalidate cache matching GSCID */ 1613 func = riscv_iommu_iot_inval_gscid; 1614 } else { 1615 /* invalidate cache matching GSCID and ADDR (GPA) */ 1616 func = riscv_iommu_iot_inval_gscid_gpa; 1617 } 1618 riscv_iommu_iot_inval(s, func, 1619 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 0, 1620 cmd.dword1 << 2 & TARGET_PAGE_MASK); 1621 break; 1622 1623 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, 1624 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1625 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) { 1626 /* invalidate all cache mappings, simplified model */ 1627 func = riscv_iommu_iot_inval_all; 1628 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV)) { 1629 /* invalidate cache matching GSCID, simplified model */ 1630 func = riscv_iommu_iot_inval_gscid; 1631 } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) { 1632 /* invalidate cache matching GSCID and PSCID */ 1633 func = riscv_iommu_iot_inval_pscid; 1634 } else { 1635 /* invalidate cache matching GSCID and PSCID and ADDR (IOVA) */ 1636 func = riscv_iommu_iot_inval_pscid_iova; 1637 } 1638 riscv_iommu_iot_inval(s, func, 1639 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 1640 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_PSCID), 1641 cmd.dword1 << 2 & TARGET_PAGE_MASK); 1642 break; 1643 1644 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, 1645 RISCV_IOMMU_CMD_IODIR_OPCODE): 1646 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1647 /* invalidate all device context cache mappings */ 1648 func = riscv_iommu_ctx_inval_all; 1649 } else { 1650 /* invalidate all device context matching DID */ 1651 func = riscv_iommu_ctx_inval_devid; 1652 } 1653 riscv_iommu_ctx_inval(s, func, 1654 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); 1655 break; 1656 1657 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, 1658 RISCV_IOMMU_CMD_IODIR_OPCODE): 1659 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1660 /* illegal command arguments IODIR_PDT & DV == 0 */ 1661 goto cmd_ill; 1662 } else { 1663 func = riscv_iommu_ctx_inval_devid_procid; 1664 } 1665 riscv_iommu_ctx_inval(s, func, 1666 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 1667 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); 1668 break; 1669 1670 /* ATS commands */ 1671 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL, 1672 RISCV_IOMMU_CMD_ATS_OPCODE): 1673 if (!s->enable_ats) { 1674 goto cmd_ill; 1675 } 1676 1677 riscv_iommu_ats_inval(s, &cmd); 1678 break; 1679 1680 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR, 1681 RISCV_IOMMU_CMD_ATS_OPCODE): 1682 if (!s->enable_ats) { 1683 goto cmd_ill; 1684 } 1685 1686 riscv_iommu_ats_prgr(s, &cmd); 1687 break; 1688 1689 default: 1690 cmd_ill: 1691 /* Invalid instruction, do not advance instruction index. */ 1692 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1693 RISCV_IOMMU_CQCSR_CMD_ILL, 0); 1694 goto fault; 1695 } 1696 1697 /* Advance and update head pointer after command completes. */ 1698 head = (head + 1) & s->cq_mask; 1699 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); 1700 } 1701 return; 1702 1703 fault: 1704 if (ctrl & RISCV_IOMMU_CQCSR_CIE) { 1705 riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); 1706 } 1707 } 1708 1709 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) 1710 { 1711 uint64_t base; 1712 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1713 uint32_t ctrl_clr; 1714 bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); 1715 bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); 1716 1717 if (enable && !active) { 1718 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); 1719 s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; 1720 s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); 1721 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); 1722 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); 1723 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); 1724 ctrl_set = RISCV_IOMMU_CQCSR_CQON; 1725 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | 1726 RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | 1727 RISCV_IOMMU_CQCSR_FENCE_W_IP; 1728 } else if (!enable && active) { 1729 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); 1730 ctrl_set = 0; 1731 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; 1732 } else { 1733 ctrl_set = 0; 1734 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; 1735 } 1736 1737 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); 1738 } 1739 1740 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) 1741 { 1742 uint64_t base; 1743 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1744 uint32_t ctrl_clr; 1745 bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); 1746 bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); 1747 1748 if (enable && !active) { 1749 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); 1750 s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; 1751 s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); 1752 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); 1753 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); 1754 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); 1755 ctrl_set = RISCV_IOMMU_FQCSR_FQON; 1756 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | 1757 RISCV_IOMMU_FQCSR_FQOF; 1758 } else if (!enable && active) { 1759 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); 1760 ctrl_set = 0; 1761 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; 1762 } else { 1763 ctrl_set = 0; 1764 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; 1765 } 1766 1767 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); 1768 } 1769 1770 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) 1771 { 1772 uint64_t base; 1773 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1774 uint32_t ctrl_clr; 1775 bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); 1776 bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); 1777 1778 if (enable && !active) { 1779 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); 1780 s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; 1781 s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); 1782 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); 1783 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); 1784 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); 1785 ctrl_set = RISCV_IOMMU_PQCSR_PQON; 1786 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | 1787 RISCV_IOMMU_PQCSR_PQOF; 1788 } else if (!enable && active) { 1789 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); 1790 ctrl_set = 0; 1791 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; 1792 } else { 1793 ctrl_set = 0; 1794 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; 1795 } 1796 1797 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); 1798 } 1799 1800 static void riscv_iommu_process_dbg(RISCVIOMMUState *s) 1801 { 1802 uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA); 1803 uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL); 1804 unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID); 1805 unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID); 1806 RISCVIOMMUContext *ctx; 1807 void *ref; 1808 1809 if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) { 1810 return; 1811 } 1812 1813 ctx = riscv_iommu_ctx(s, devid, pid, &ref); 1814 if (ctx == NULL) { 1815 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, 1816 RISCV_IOMMU_TR_RESPONSE_FAULT | 1817 (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10)); 1818 } else { 1819 IOMMUTLBEntry iotlb = { 1820 .iova = iova, 1821 .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW, 1822 .addr_mask = ~0, 1823 .target_as = NULL, 1824 }; 1825 int fault = riscv_iommu_translate(s, ctx, &iotlb, false); 1826 if (fault) { 1827 iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); 1828 } else { 1829 iova = iotlb.translated_addr & ~iotlb.addr_mask; 1830 iova >>= TARGET_PAGE_BITS; 1831 iova &= RISCV_IOMMU_TR_RESPONSE_PPN; 1832 1833 /* We do not support superpages (> 4kbs) for now */ 1834 iova &= ~RISCV_IOMMU_TR_RESPONSE_S; 1835 } 1836 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); 1837 } 1838 1839 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 1840 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 1841 riscv_iommu_ctx_put(s, ref); 1842 } 1843 1844 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); 1845 1846 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) 1847 { 1848 uint64_t icvec = 0; 1849 1850 icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, 1851 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); 1852 1853 icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, 1854 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); 1855 1856 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, 1857 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); 1858 1859 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, 1860 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); 1861 1862 trace_riscv_iommu_icvec_write(data, icvec); 1863 1864 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); 1865 } 1866 1867 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) 1868 { 1869 uint32_t cqcsr, fqcsr, pqcsr; 1870 uint32_t ipsr_set = 0; 1871 uint32_t ipsr_clr = 0; 1872 1873 if (data & RISCV_IOMMU_IPSR_CIP) { 1874 cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1875 1876 if (cqcsr & RISCV_IOMMU_CQCSR_CIE && 1877 (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || 1878 cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || 1879 cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || 1880 cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { 1881 ipsr_set |= RISCV_IOMMU_IPSR_CIP; 1882 } else { 1883 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1884 } 1885 } else { 1886 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1887 } 1888 1889 if (data & RISCV_IOMMU_IPSR_FIP) { 1890 fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1891 1892 if (fqcsr & RISCV_IOMMU_FQCSR_FIE && 1893 (fqcsr & RISCV_IOMMU_FQCSR_FQOF || 1894 fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { 1895 ipsr_set |= RISCV_IOMMU_IPSR_FIP; 1896 } else { 1897 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 1898 } 1899 } else { 1900 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 1901 } 1902 1903 if (data & RISCV_IOMMU_IPSR_PIP) { 1904 pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1905 1906 if (pqcsr & RISCV_IOMMU_PQCSR_PIE && 1907 (pqcsr & RISCV_IOMMU_PQCSR_PQOF || 1908 pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { 1909 ipsr_set |= RISCV_IOMMU_IPSR_PIP; 1910 } else { 1911 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 1912 } 1913 } else { 1914 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 1915 } 1916 1917 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); 1918 } 1919 1920 /* 1921 * Write the resulting value of 'data' for the reg specified 1922 * by 'reg_addr', after considering read-only/read-write/write-clear 1923 * bits, in the pointer 'dest'. 1924 * 1925 * The result is written in little-endian. 1926 */ 1927 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, 1928 void *dest, hwaddr reg_addr, 1929 int size, uint64_t data) 1930 { 1931 uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); 1932 uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); 1933 uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); 1934 1935 stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); 1936 } 1937 1938 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, 1939 uint64_t data, unsigned size, 1940 MemTxAttrs attrs) 1941 { 1942 riscv_iommu_process_fn *process_fn = NULL; 1943 RISCVIOMMUState *s = opaque; 1944 uint32_t regb = addr & ~3; 1945 uint32_t busy = 0; 1946 uint64_t val = 0; 1947 1948 if ((addr & (size - 1)) != 0) { 1949 /* Unsupported MMIO alignment or access size */ 1950 return MEMTX_ERROR; 1951 } 1952 1953 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 1954 /* Unsupported MMIO access location. */ 1955 return MEMTX_ACCESS_ERROR; 1956 } 1957 1958 /* Track actionable MMIO write. */ 1959 switch (regb) { 1960 case RISCV_IOMMU_REG_DDTP: 1961 case RISCV_IOMMU_REG_DDTP + 4: 1962 process_fn = riscv_iommu_process_ddtp; 1963 regb = RISCV_IOMMU_REG_DDTP; 1964 busy = RISCV_IOMMU_DDTP_BUSY; 1965 break; 1966 1967 case RISCV_IOMMU_REG_CQT: 1968 process_fn = riscv_iommu_process_cq_tail; 1969 break; 1970 1971 case RISCV_IOMMU_REG_CQCSR: 1972 process_fn = riscv_iommu_process_cq_control; 1973 busy = RISCV_IOMMU_CQCSR_BUSY; 1974 break; 1975 1976 case RISCV_IOMMU_REG_FQCSR: 1977 process_fn = riscv_iommu_process_fq_control; 1978 busy = RISCV_IOMMU_FQCSR_BUSY; 1979 break; 1980 1981 case RISCV_IOMMU_REG_PQCSR: 1982 process_fn = riscv_iommu_process_pq_control; 1983 busy = RISCV_IOMMU_PQCSR_BUSY; 1984 break; 1985 1986 case RISCV_IOMMU_REG_ICVEC: 1987 case RISCV_IOMMU_REG_IPSR: 1988 /* 1989 * ICVEC and IPSR have special read/write procedures. We'll 1990 * call their respective helpers and exit. 1991 */ 1992 riscv_iommu_write_reg_val(s, &val, addr, size, data); 1993 1994 /* 1995 * 'val' is stored as LE. Switch to host endianess 1996 * before using it. 1997 */ 1998 val = le64_to_cpu(val); 1999 2000 if (regb == RISCV_IOMMU_REG_ICVEC) { 2001 riscv_iommu_update_icvec(s, val); 2002 } else { 2003 riscv_iommu_update_ipsr(s, val); 2004 } 2005 2006 return MEMTX_OK; 2007 2008 case RISCV_IOMMU_REG_TR_REQ_CTL: 2009 process_fn = riscv_iommu_process_dbg; 2010 regb = RISCV_IOMMU_REG_TR_REQ_CTL; 2011 busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; 2012 break; 2013 2014 default: 2015 break; 2016 } 2017 2018 /* 2019 * Registers update might be not synchronized with core logic. 2020 * If system software updates register when relevant BUSY bit 2021 * is set IOMMU behavior of additional writes to the register 2022 * is UNSPECIFIED. 2023 */ 2024 riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); 2025 2026 /* Busy flag update, MSB 4-byte register. */ 2027 if (busy) { 2028 uint32_t rw = ldl_le_p(&s->regs_rw[regb]); 2029 stl_le_p(&s->regs_rw[regb], rw | busy); 2030 } 2031 2032 if (process_fn) { 2033 process_fn(s); 2034 } 2035 2036 return MEMTX_OK; 2037 } 2038 2039 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, 2040 uint64_t *data, unsigned size, MemTxAttrs attrs) 2041 { 2042 RISCVIOMMUState *s = opaque; 2043 uint64_t val = -1; 2044 uint8_t *ptr; 2045 2046 if ((addr & (size - 1)) != 0) { 2047 /* Unsupported MMIO alignment. */ 2048 return MEMTX_ERROR; 2049 } 2050 2051 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2052 return MEMTX_ACCESS_ERROR; 2053 } 2054 2055 ptr = &s->regs_rw[addr]; 2056 val = ldn_le_p(ptr, size); 2057 2058 *data = val; 2059 2060 return MEMTX_OK; 2061 } 2062 2063 static const MemoryRegionOps riscv_iommu_mmio_ops = { 2064 .read_with_attrs = riscv_iommu_mmio_read, 2065 .write_with_attrs = riscv_iommu_mmio_write, 2066 .endianness = DEVICE_NATIVE_ENDIAN, 2067 .impl = { 2068 .min_access_size = 4, 2069 .max_access_size = 8, 2070 .unaligned = false, 2071 }, 2072 .valid = { 2073 .min_access_size = 4, 2074 .max_access_size = 8, 2075 } 2076 }; 2077 2078 /* 2079 * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" 2080 * memory region as untranslated address, for additional MSI/MRIF interception 2081 * by IOMMU interrupt remapping implementation. 2082 * Note: Device emulation code generating an MSI is expected to provide a valid 2083 * memory transaction attributes with requested_id set. 2084 */ 2085 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, 2086 uint64_t data, unsigned size, MemTxAttrs attrs) 2087 { 2088 RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; 2089 RISCVIOMMUContext *ctx; 2090 MemTxResult res; 2091 void *ref; 2092 uint32_t devid = attrs.requester_id; 2093 2094 if (attrs.unspecified) { 2095 return MEMTX_ACCESS_ERROR; 2096 } 2097 2098 /* FIXME: PCIe bus remapping for attached endpoints. */ 2099 devid |= s->bus << 8; 2100 2101 ctx = riscv_iommu_ctx(s, devid, 0, &ref); 2102 if (ctx == NULL) { 2103 res = MEMTX_ACCESS_ERROR; 2104 } else { 2105 res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); 2106 } 2107 riscv_iommu_ctx_put(s, ref); 2108 return res; 2109 } 2110 2111 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, 2112 uint64_t *data, unsigned size, MemTxAttrs attrs) 2113 { 2114 return MEMTX_ACCESS_ERROR; 2115 } 2116 2117 static const MemoryRegionOps riscv_iommu_trap_ops = { 2118 .read_with_attrs = riscv_iommu_trap_read, 2119 .write_with_attrs = riscv_iommu_trap_write, 2120 .endianness = DEVICE_LITTLE_ENDIAN, 2121 .impl = { 2122 .min_access_size = 4, 2123 .max_access_size = 8, 2124 .unaligned = true, 2125 }, 2126 .valid = { 2127 .min_access_size = 4, 2128 .max_access_size = 8, 2129 } 2130 }; 2131 2132 void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode) 2133 { 2134 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode); 2135 } 2136 2137 static void riscv_iommu_instance_init(Object *obj) 2138 { 2139 RISCVIOMMUState *s = RISCV_IOMMU(obj); 2140 2141 /* Enable translation debug interface */ 2142 s->cap = RISCV_IOMMU_CAP_DBG; 2143 2144 /* Report QEMU target physical address space limits */ 2145 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, 2146 TARGET_PHYS_ADDR_SPACE_BITS); 2147 2148 /* TODO: method to report supported PID bits */ 2149 s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ 2150 s->cap |= RISCV_IOMMU_CAP_PD8; 2151 2152 /* register storage */ 2153 s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2154 s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2155 s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2156 2157 /* Mark all registers read-only */ 2158 memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); 2159 2160 /* Device translation context cache */ 2161 s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 2162 riscv_iommu_ctx_equal, 2163 g_free, NULL); 2164 2165 s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 2166 riscv_iommu_iot_equal, 2167 g_free, NULL); 2168 2169 s->iommus.le_next = NULL; 2170 s->iommus.le_prev = NULL; 2171 QLIST_INIT(&s->spaces); 2172 } 2173 2174 static void riscv_iommu_realize(DeviceState *dev, Error **errp) 2175 { 2176 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2177 2178 s->cap |= s->version & RISCV_IOMMU_CAP_VERSION; 2179 if (s->enable_msi) { 2180 s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; 2181 } 2182 if (s->enable_ats) { 2183 s->cap |= RISCV_IOMMU_CAP_ATS; 2184 } 2185 if (s->enable_s_stage) { 2186 s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | 2187 RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; 2188 } 2189 if (s->enable_g_stage) { 2190 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | 2191 RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; 2192 } 2193 2194 /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ 2195 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? 2196 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); 2197 2198 /* 2199 * Register complete MMIO space, including MSI/PBA registers. 2200 * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, 2201 * managed directly by the PCIDevice implementation. 2202 */ 2203 memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, 2204 "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); 2205 2206 /* Set power-on register state */ 2207 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); 2208 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); 2209 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], 2210 ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); 2211 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], 2212 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); 2213 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], 2214 ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); 2215 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], 2216 ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); 2217 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], 2218 ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); 2219 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | 2220 RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); 2221 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | 2222 RISCV_IOMMU_CQCSR_BUSY); 2223 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | 2224 RISCV_IOMMU_FQCSR_FQOF); 2225 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | 2226 RISCV_IOMMU_FQCSR_BUSY); 2227 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | 2228 RISCV_IOMMU_PQCSR_PQOF); 2229 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | 2230 RISCV_IOMMU_PQCSR_BUSY); 2231 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); 2232 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); 2233 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); 2234 /* If debug registers enabled. */ 2235 if (s->cap & RISCV_IOMMU_CAP_DBG) { 2236 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0); 2237 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL], 2238 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2239 } 2240 2241 /* Memory region for downstream access, if specified. */ 2242 if (s->target_mr) { 2243 s->target_as = g_new0(AddressSpace, 1); 2244 address_space_init(s->target_as, s->target_mr, 2245 "riscv-iommu-downstream"); 2246 } else { 2247 /* Fallback to global system memory. */ 2248 s->target_as = &address_space_memory; 2249 } 2250 2251 /* Memory region for untranslated MRIF/MSI writes */ 2252 memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, 2253 "riscv-iommu-trap", ~0ULL); 2254 address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); 2255 } 2256 2257 static void riscv_iommu_unrealize(DeviceState *dev) 2258 { 2259 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2260 2261 g_hash_table_unref(s->iot_cache); 2262 g_hash_table_unref(s->ctx_cache); 2263 } 2264 2265 void riscv_iommu_reset(RISCVIOMMUState *s) 2266 { 2267 uint32_t reg_clr; 2268 int ddtp_mode; 2269 2270 /* 2271 * Clear DDTP while setting DDTP_mode back to user 2272 * initial setting. 2273 */ 2274 ddtp_mode = s->enable_off ? 2275 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE; 2276 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode); 2277 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp); 2278 2279 reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE | 2280 RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY; 2281 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr); 2282 2283 reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE | 2284 RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY; 2285 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr); 2286 2287 reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE | 2288 RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY; 2289 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr); 2290 2291 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 2292 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2293 2294 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0); 2295 2296 g_hash_table_remove_all(s->ctx_cache); 2297 g_hash_table_remove_all(s->iot_cache); 2298 } 2299 2300 static const Property riscv_iommu_properties[] = { 2301 DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, 2302 RISCV_IOMMU_SPEC_DOT_VER), 2303 DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), 2304 DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit, 2305 LIMIT_CACHE_IOT), 2306 DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), 2307 DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE), 2308 DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), 2309 DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), 2310 DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), 2311 DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, 2312 TYPE_MEMORY_REGION, MemoryRegion *), 2313 }; 2314 2315 static void riscv_iommu_class_init(ObjectClass *klass, void* data) 2316 { 2317 DeviceClass *dc = DEVICE_CLASS(klass); 2318 2319 /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ 2320 dc->user_creatable = false; 2321 dc->realize = riscv_iommu_realize; 2322 dc->unrealize = riscv_iommu_unrealize; 2323 device_class_set_props(dc, riscv_iommu_properties); 2324 } 2325 2326 static const TypeInfo riscv_iommu_info = { 2327 .name = TYPE_RISCV_IOMMU, 2328 .parent = TYPE_DEVICE, 2329 .instance_size = sizeof(RISCVIOMMUState), 2330 .instance_init = riscv_iommu_instance_init, 2331 .class_init = riscv_iommu_class_init, 2332 }; 2333 2334 static const char *IOMMU_FLAG_STR[] = { 2335 "NA", 2336 "RO", 2337 "WR", 2338 "RW", 2339 }; 2340 2341 /* RISC-V IOMMU Memory Region - Address Translation Space */ 2342 static IOMMUTLBEntry riscv_iommu_memory_region_translate( 2343 IOMMUMemoryRegion *iommu_mr, hwaddr addr, 2344 IOMMUAccessFlags flag, int iommu_idx) 2345 { 2346 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2347 RISCVIOMMUContext *ctx; 2348 void *ref; 2349 IOMMUTLBEntry iotlb = { 2350 .iova = addr, 2351 .target_as = as->iommu->target_as, 2352 .addr_mask = ~0ULL, 2353 .perm = flag, 2354 }; 2355 2356 ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); 2357 if (ctx == NULL) { 2358 /* Translation disabled or invalid. */ 2359 iotlb.addr_mask = 0; 2360 iotlb.perm = IOMMU_NONE; 2361 } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) { 2362 /* Translation disabled or fault reported. */ 2363 iotlb.addr_mask = 0; 2364 iotlb.perm = IOMMU_NONE; 2365 } 2366 2367 /* Trace all dma translations with original access flags. */ 2368 trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), 2369 PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, 2370 IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, 2371 iotlb.translated_addr); 2372 2373 riscv_iommu_ctx_put(as->iommu, ref); 2374 2375 return iotlb; 2376 } 2377 2378 static int riscv_iommu_memory_region_notify( 2379 IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, 2380 IOMMUNotifierFlag new, Error **errp) 2381 { 2382 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2383 2384 if (old == IOMMU_NOTIFIER_NONE) { 2385 as->notifier = true; 2386 trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); 2387 } else if (new == IOMMU_NOTIFIER_NONE) { 2388 as->notifier = false; 2389 trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); 2390 } 2391 2392 return 0; 2393 } 2394 2395 static inline bool pci_is_iommu(PCIDevice *pdev) 2396 { 2397 return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; 2398 } 2399 2400 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) 2401 { 2402 RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; 2403 PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); 2404 AddressSpace *as = NULL; 2405 2406 if (pdev && pci_is_iommu(pdev)) { 2407 return s->target_as; 2408 } 2409 2410 /* Find first registered IOMMU device */ 2411 while (s->iommus.le_prev) { 2412 s = *(s->iommus.le_prev); 2413 } 2414 2415 /* Find first matching IOMMU */ 2416 while (s != NULL && as == NULL) { 2417 as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); 2418 s = s->iommus.le_next; 2419 } 2420 2421 return as ? as : &address_space_memory; 2422 } 2423 2424 static const PCIIOMMUOps riscv_iommu_ops = { 2425 .get_address_space = riscv_iommu_find_as, 2426 }; 2427 2428 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, 2429 Error **errp) 2430 { 2431 if (bus->iommu_ops && 2432 bus->iommu_ops->get_address_space == riscv_iommu_find_as) { 2433 /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ 2434 RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; 2435 QLIST_INSERT_AFTER(last, iommu, iommus); 2436 } else if (!bus->iommu_ops && !bus->iommu_opaque) { 2437 pci_setup_iommu(bus, &riscv_iommu_ops, iommu); 2438 } else { 2439 error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", 2440 pci_bus_num(bus)); 2441 } 2442 } 2443 2444 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, 2445 MemTxAttrs attrs) 2446 { 2447 return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; 2448 } 2449 2450 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) 2451 { 2452 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2453 return 1 << as->iommu->pid_bits; 2454 } 2455 2456 static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data) 2457 { 2458 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 2459 2460 imrc->translate = riscv_iommu_memory_region_translate; 2461 imrc->notify_flag_changed = riscv_iommu_memory_region_notify; 2462 imrc->attrs_to_index = riscv_iommu_memory_region_index; 2463 imrc->num_indexes = riscv_iommu_memory_region_index_len; 2464 } 2465 2466 static const TypeInfo riscv_iommu_memory_region_info = { 2467 .parent = TYPE_IOMMU_MEMORY_REGION, 2468 .name = TYPE_RISCV_IOMMU_MEMORY_REGION, 2469 .class_init = riscv_iommu_memory_region_init, 2470 }; 2471 2472 static void riscv_iommu_register_mr_types(void) 2473 { 2474 type_register_static(&riscv_iommu_memory_region_info); 2475 type_register_static(&riscv_iommu_info); 2476 } 2477 2478 type_init(riscv_iommu_register_mr_types); 2479