1 /* 2 * QEMU emulation of an RISC-V IOMMU 3 * 4 * Copyright (C) 2021-2023, Rivos Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qom/object.h" 21 #include "hw/pci/pci_bus.h" 22 #include "hw/pci/pci_device.h" 23 #include "hw/qdev-properties.h" 24 #include "hw/riscv/riscv_hart.h" 25 #include "migration/vmstate.h" 26 #include "qapi/error.h" 27 #include "qemu/timer.h" 28 29 #include "cpu_bits.h" 30 #include "riscv-iommu.h" 31 #include "riscv-iommu-bits.h" 32 #include "trace.h" 33 34 #define LIMIT_CACHE_CTX (1U << 7) 35 #define LIMIT_CACHE_IOT (1U << 20) 36 37 /* Physical page number coversions */ 38 #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) 39 #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) 40 41 typedef struct RISCVIOMMUContext RISCVIOMMUContext; 42 typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; 43 44 /* Device assigned I/O address space */ 45 struct RISCVIOMMUSpace { 46 IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ 47 AddressSpace iova_as; /* IOVA address space for attached device */ 48 RISCVIOMMUState *iommu; /* Managing IOMMU device state */ 49 uint32_t devid; /* Requester identifier, AKA device_id */ 50 bool notifier; /* IOMMU unmap notifier enabled */ 51 QLIST_ENTRY(RISCVIOMMUSpace) list; 52 }; 53 54 /* Device translation context state. */ 55 struct RISCVIOMMUContext { 56 uint64_t devid:24; /* Requester Id, AKA device_id */ 57 uint64_t process_id:20; /* Process ID. PASID for PCIe */ 58 uint64_t tc; /* Translation Control */ 59 uint64_t ta; /* Translation Attributes */ 60 uint64_t satp; /* S-Stage address translation and protection */ 61 uint64_t gatp; /* G-Stage address translation and protection */ 62 uint64_t msi_addr_mask; /* MSI filtering - address mask */ 63 uint64_t msi_addr_pattern; /* MSI filtering - address pattern */ 64 uint64_t msiptp; /* MSI redirection page table pointer */ 65 }; 66 67 typedef enum RISCVIOMMUTransTag { 68 RISCV_IOMMU_TRANS_TAG_BY, /* Bypass */ 69 RISCV_IOMMU_TRANS_TAG_SS, /* Single Stage */ 70 RISCV_IOMMU_TRANS_TAG_VG, /* G-stage only */ 71 RISCV_IOMMU_TRANS_TAG_VN, /* Nested translation */ 72 } RISCVIOMMUTransTag; 73 74 /* Address translation cache entry */ 75 struct RISCVIOMMUEntry { 76 RISCVIOMMUTransTag tag; /* Translation Tag */ 77 uint64_t iova:44; /* IOVA Page Number */ 78 uint64_t pscid:20; /* Process Soft-Context identifier */ 79 uint64_t phys:44; /* Physical Page Number */ 80 uint64_t gscid:16; /* Guest Soft-Context identifier */ 81 uint64_t perm:2; /* IOMMU_RW flags */ 82 }; 83 84 /* IOMMU index for transactions without process_id specified. */ 85 #define RISCV_IOMMU_NOPROCID 0 86 87 static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) 88 { 89 switch (vec_type) { 90 case RISCV_IOMMU_INTR_CQ: 91 return icvec & RISCV_IOMMU_ICVEC_CIV; 92 case RISCV_IOMMU_INTR_FQ: 93 return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; 94 case RISCV_IOMMU_INTR_PM: 95 return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; 96 case RISCV_IOMMU_INTR_PQ: 97 return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; 98 default: 99 g_assert_not_reached(); 100 } 101 } 102 103 static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) 104 { 105 uint32_t ipsr, icvec, vector; 106 107 if (!s->notify) { 108 return; 109 } 110 111 icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); 112 ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); 113 114 if (!(ipsr & (1 << vec_type))) { 115 vector = riscv_iommu_get_icvec_vector(icvec, vec_type); 116 s->notify(s, vector); 117 trace_riscv_iommu_notify_int_vector(vec_type, vector); 118 } 119 } 120 121 static void riscv_iommu_fault(RISCVIOMMUState *s, 122 struct riscv_iommu_fq_record *ev) 123 { 124 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 125 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; 126 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; 127 uint32_t next = (tail + 1) & s->fq_mask; 128 uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); 129 130 trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 131 PCI_FUNC(devid), ev->hdr, ev->iotval); 132 133 if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || 134 !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { 135 return; 136 } 137 138 if (head == next) { 139 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 140 RISCV_IOMMU_FQCSR_FQOF, 0); 141 } else { 142 dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); 143 if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), 144 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 145 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 146 RISCV_IOMMU_FQCSR_FQMF, 0); 147 } else { 148 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); 149 } 150 } 151 152 if (ctrl & RISCV_IOMMU_FQCSR_FIE) { 153 riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); 154 } 155 } 156 157 static void riscv_iommu_pri(RISCVIOMMUState *s, 158 struct riscv_iommu_pq_record *pr) 159 { 160 uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 161 uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; 162 uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; 163 uint32_t next = (tail + 1) & s->pq_mask; 164 uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); 165 166 trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), 167 PCI_FUNC(devid), pr->payload); 168 169 if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || 170 !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { 171 return; 172 } 173 174 if (head == next) { 175 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 176 RISCV_IOMMU_PQCSR_PQOF, 0); 177 } else { 178 dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); 179 if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), 180 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 181 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 182 RISCV_IOMMU_PQCSR_PQMF, 0); 183 } else { 184 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); 185 } 186 } 187 188 if (ctrl & RISCV_IOMMU_PQCSR_PIE) { 189 riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); 190 } 191 } 192 193 /* 194 * Discards all bits from 'val' whose matching bits in the same 195 * positions in the mask 'ext' are zeros, and packs the remaining 196 * bits from 'val' contiguously at the least-significant end of the 197 * result, keeping the same bit order as 'val' and filling any 198 * other bits at the most-significant end of the result with zeros. 199 * 200 * For example, for the following 'val' and 'ext', the return 'ret' 201 * will be: 202 * 203 * val = a b c d e f g h 204 * ext = 1 0 1 0 0 1 1 0 205 * ret = 0 0 0 0 a c f g 206 * 207 * This function, taken from the riscv-iommu 1.0 spec, section 2.3.3 208 * "Process to translate addresses of MSIs", is similar to bit manip 209 * function PEXT (Parallel bits extract) from x86. 210 */ 211 static uint64_t riscv_iommu_pext_u64(uint64_t val, uint64_t ext) 212 { 213 uint64_t ret = 0; 214 uint64_t rot = 1; 215 216 while (ext) { 217 if (ext & 1) { 218 if (val & 1) { 219 ret |= rot; 220 } 221 rot <<= 1; 222 } 223 val >>= 1; 224 ext >>= 1; 225 } 226 227 return ret; 228 } 229 230 /* Check if GPA matches MSI/MRIF pattern. */ 231 static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 232 dma_addr_t gpa) 233 { 234 if (!s->enable_msi) { 235 return false; 236 } 237 238 if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != 239 RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 240 return false; /* Invalid MSI/MRIF mode */ 241 } 242 243 if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { 244 return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ 245 } 246 247 return true; 248 } 249 250 /* 251 * RISCV IOMMU Address Translation Lookup - Page Table Walk 252 * 253 * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c 254 * Both implementation can be merged into single helper function in future. 255 * Keeping them separate for now, as error reporting and flow specifics are 256 * sufficiently different for separate implementation. 257 * 258 * @s : IOMMU Device State 259 * @ctx : Translation context for device id and process address space id. 260 * @iotlb : translation data: physical address and access mode. 261 * @return : success or fault cause code. 262 */ 263 static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 264 IOMMUTLBEntry *iotlb) 265 { 266 dma_addr_t addr, base; 267 uint64_t satp, gatp, pte; 268 bool en_s, en_g; 269 struct { 270 unsigned char step; 271 unsigned char levels; 272 unsigned char ptidxbits; 273 unsigned char ptesize; 274 } sc[2]; 275 /* Translation stage phase */ 276 enum { 277 S_STAGE = 0, 278 G_STAGE = 1, 279 } pass; 280 MemTxResult ret; 281 282 satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 283 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 284 285 en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; 286 en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; 287 288 /* 289 * Early check for MSI address match when IOVA == GPA. 290 * Note that the (!en_s) condition means that the MSI 291 * page table may only be used when guest pages are 292 * mapped using the g-stage page table, whether single- 293 * or two-stage paging is enabled. It's unavoidable though, 294 * because the spec mandates that we do a first-stage 295 * translation before we check the MSI page table, which 296 * means we can't do an early MSI check unless we have 297 * strictly !en_s. 298 */ 299 if (!en_s && (iotlb->perm & IOMMU_WO) && 300 riscv_iommu_msi_check(s, ctx, iotlb->iova)) { 301 iotlb->target_as = &s->trap_as; 302 iotlb->translated_addr = iotlb->iova; 303 iotlb->addr_mask = ~TARGET_PAGE_MASK; 304 return 0; 305 } 306 307 /* Exit early for pass-through mode. */ 308 if (!(en_s || en_g)) { 309 iotlb->translated_addr = iotlb->iova; 310 iotlb->addr_mask = ~TARGET_PAGE_MASK; 311 /* Allow R/W in pass-through mode */ 312 iotlb->perm = IOMMU_RW; 313 return 0; 314 } 315 316 /* S/G translation parameters. */ 317 for (pass = 0; pass < 2; pass++) { 318 uint32_t sv_mode; 319 320 sc[pass].step = 0; 321 if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : 322 (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { 323 /* 32bit mode for GXL/SXL == 1 */ 324 switch (pass ? gatp : satp) { 325 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 326 sc[pass].levels = 0; 327 sc[pass].ptidxbits = 0; 328 sc[pass].ptesize = 0; 329 break; 330 case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: 331 sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; 332 if (!(s->cap & sv_mode)) { 333 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 334 } 335 sc[pass].levels = 2; 336 sc[pass].ptidxbits = 10; 337 sc[pass].ptesize = 4; 338 break; 339 default: 340 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 341 } 342 } else { 343 /* 64bit mode for GXL/SXL == 0 */ 344 switch (pass ? gatp : satp) { 345 case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: 346 sc[pass].levels = 0; 347 sc[pass].ptidxbits = 0; 348 sc[pass].ptesize = 0; 349 break; 350 case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: 351 sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; 352 if (!(s->cap & sv_mode)) { 353 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 354 } 355 sc[pass].levels = 3; 356 sc[pass].ptidxbits = 9; 357 sc[pass].ptesize = 8; 358 break; 359 case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: 360 sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; 361 if (!(s->cap & sv_mode)) { 362 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 363 } 364 sc[pass].levels = 4; 365 sc[pass].ptidxbits = 9; 366 sc[pass].ptesize = 8; 367 break; 368 case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: 369 sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; 370 if (!(s->cap & sv_mode)) { 371 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 372 } 373 sc[pass].levels = 5; 374 sc[pass].ptidxbits = 9; 375 sc[pass].ptesize = 8; 376 break; 377 default: 378 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 379 } 380 } 381 }; 382 383 /* S/G stages translation tables root pointers */ 384 gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); 385 satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); 386 addr = (en_s && en_g) ? satp : iotlb->iova; 387 base = en_g ? gatp : satp; 388 pass = en_g ? G_STAGE : S_STAGE; 389 390 do { 391 const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; 392 const unsigned va_bits = widened + sc[pass].ptidxbits; 393 const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * 394 (sc[pass].levels - 1 - sc[pass].step); 395 const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); 396 const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; 397 const bool ade = 398 ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); 399 400 /* Address range check before first level lookup */ 401 if (!sc[pass].step) { 402 const uint64_t va_len = va_skip + va_bits; 403 const uint64_t va_mask = (1ULL << va_len) - 1; 404 405 if (pass == S_STAGE && va_len > 32) { 406 target_ulong mask, masked_msbs; 407 408 mask = (1L << (TARGET_LONG_BITS - (va_len - 1))) - 1; 409 masked_msbs = (addr >> (va_len - 1)) & mask; 410 411 if (masked_msbs != 0 && masked_msbs != mask) { 412 return (iotlb->perm & IOMMU_WO) ? 413 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S : 414 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S; 415 } 416 } else { 417 if ((addr & va_mask) != addr) { 418 return (iotlb->perm & IOMMU_WO) ? 419 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 420 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS; 421 } 422 } 423 } 424 425 /* Read page table entry */ 426 if (sc[pass].ptesize == 4) { 427 uint32_t pte32 = 0; 428 ret = ldl_le_dma(s->target_as, pte_addr, &pte32, 429 MEMTXATTRS_UNSPECIFIED); 430 pte = pte32; 431 } else { 432 ret = ldq_le_dma(s->target_as, pte_addr, &pte, 433 MEMTXATTRS_UNSPECIFIED); 434 } 435 if (ret != MEMTX_OK) { 436 return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT 437 : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; 438 } 439 440 sc[pass].step++; 441 hwaddr ppn = pte >> PTE_PPN_SHIFT; 442 443 if (!(pte & PTE_V)) { 444 break; /* Invalid PTE */ 445 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { 446 base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ 447 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { 448 break; /* Reserved leaf PTE flags: PTE_W */ 449 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { 450 break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ 451 } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { 452 break; /* Misaligned PPN */ 453 } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { 454 break; /* Read access check failed */ 455 } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { 456 break; /* Write access check failed */ 457 } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { 458 break; /* Access bit not set */ 459 } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { 460 break; /* Dirty bit not set */ 461 } else { 462 /* Leaf PTE, translation completed. */ 463 sc[pass].step = sc[pass].levels; 464 base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); 465 /* Update address mask based on smallest translation granularity */ 466 iotlb->addr_mask &= (1ULL << va_skip) - 1; 467 /* Continue with S-Stage translation? */ 468 if (pass && sc[0].step != sc[0].levels) { 469 pass = S_STAGE; 470 addr = iotlb->iova; 471 continue; 472 } 473 /* Translation phase completed (GPA or SPA) */ 474 iotlb->translated_addr = base; 475 iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) 476 : IOMMU_RO; 477 478 /* Check MSI GPA address match */ 479 if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && 480 riscv_iommu_msi_check(s, ctx, base)) { 481 /* Trap MSI writes and return GPA address. */ 482 iotlb->target_as = &s->trap_as; 483 iotlb->addr_mask = ~TARGET_PAGE_MASK; 484 return 0; 485 } 486 487 /* Continue with G-Stage translation? */ 488 if (!pass && en_g) { 489 pass = G_STAGE; 490 addr = base; 491 base = gatp; 492 sc[pass].step = 0; 493 continue; 494 } 495 496 return 0; 497 } 498 499 if (sc[pass].step == sc[pass].levels) { 500 break; /* Can't find leaf PTE */ 501 } 502 503 /* Continue with G-Stage translation? */ 504 if (!pass && en_g) { 505 pass = G_STAGE; 506 addr = base; 507 base = gatp; 508 sc[pass].step = 0; 509 } 510 } while (1); 511 512 return (iotlb->perm & IOMMU_WO) ? 513 (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : 514 RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : 515 (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : 516 RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); 517 } 518 519 static void riscv_iommu_report_fault(RISCVIOMMUState *s, 520 RISCVIOMMUContext *ctx, 521 uint32_t fault_type, uint32_t cause, 522 bool pv, 523 uint64_t iotval, uint64_t iotval2) 524 { 525 struct riscv_iommu_fq_record ev = { 0 }; 526 527 if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { 528 switch (cause) { 529 case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: 530 case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: 531 case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: 532 case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: 533 case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: 534 case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: 535 case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: 536 break; 537 default: 538 /* DTF prevents reporting a fault for this given cause */ 539 return; 540 } 541 } 542 543 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); 544 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); 545 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); 546 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); 547 548 if (pv) { 549 ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); 550 } 551 552 ev.iotval = iotval; 553 ev.iotval2 = iotval2; 554 555 riscv_iommu_fault(s, &ev); 556 } 557 558 /* Redirect MSI write for given GPA. */ 559 static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, 560 RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, 561 unsigned size, MemTxAttrs attrs) 562 { 563 MemTxResult res; 564 dma_addr_t addr; 565 uint64_t intn; 566 uint32_t n190; 567 uint64_t pte[2]; 568 int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 569 int cause; 570 571 /* Interrupt File Number */ 572 intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); 573 if (intn >= 256) { 574 /* Interrupt file number out of range */ 575 res = MEMTX_ACCESS_ERROR; 576 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 577 goto err; 578 } 579 580 /* fetch MSI PTE */ 581 addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); 582 addr = addr | (intn * sizeof(pte)); 583 res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), 584 MEMTXATTRS_UNSPECIFIED); 585 if (res != MEMTX_OK) { 586 if (res == MEMTX_DECODE_ERROR) { 587 cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; 588 } else { 589 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 590 } 591 goto err; 592 } 593 594 le64_to_cpus(&pte[0]); 595 le64_to_cpus(&pte[1]); 596 597 if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { 598 /* 599 * The spec mentions that: "If msipte.C == 1, then further 600 * processing to interpret the PTE is implementation 601 * defined.". We'll abort with cause = 262 for this 602 * case too. 603 */ 604 res = MEMTX_ACCESS_ERROR; 605 cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; 606 goto err; 607 } 608 609 switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { 610 case RISCV_IOMMU_MSI_PTE_M_BASIC: 611 /* MSI Pass-through mode */ 612 addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); 613 614 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 615 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 616 gpa, addr); 617 618 res = dma_memory_write(s->target_as, addr, &data, size, attrs); 619 if (res != MEMTX_OK) { 620 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 621 goto err; 622 } 623 624 return MEMTX_OK; 625 case RISCV_IOMMU_MSI_PTE_M_MRIF: 626 /* MRIF mode, continue. */ 627 break; 628 default: 629 res = MEMTX_ACCESS_ERROR; 630 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 631 goto err; 632 } 633 634 /* 635 * Report an error for interrupt identities exceeding the maximum allowed 636 * for an IMSIC interrupt file (2047) or destination address is not 32-bit 637 * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. 638 */ 639 if ((data > 2047) || (gpa & 3)) { 640 res = MEMTX_ACCESS_ERROR; 641 cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; 642 goto err; 643 } 644 645 /* MSI MRIF mode, non atomic pending bit update */ 646 647 /* MRIF pending bit address */ 648 addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; 649 addr = addr | ((data & 0x7c0) >> 3); 650 651 trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), 652 PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), 653 gpa, addr); 654 655 /* MRIF pending bit mask */ 656 data = 1ULL << (data & 0x03f); 657 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 658 if (res != MEMTX_OK) { 659 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 660 goto err; 661 } 662 663 intn = intn | data; 664 res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); 665 if (res != MEMTX_OK) { 666 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 667 goto err; 668 } 669 670 /* Get MRIF enable bits */ 671 addr = addr + sizeof(intn); 672 res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); 673 if (res != MEMTX_OK) { 674 cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; 675 goto err; 676 } 677 678 if (!(intn & data)) { 679 /* notification disabled, MRIF update completed. */ 680 return MEMTX_OK; 681 } 682 683 /* Send notification message */ 684 addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); 685 n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | 686 (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); 687 688 res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); 689 if (res != MEMTX_OK) { 690 cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; 691 goto err; 692 } 693 694 trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); 695 696 return MEMTX_OK; 697 698 err: 699 riscv_iommu_report_fault(s, ctx, fault_type, cause, 700 !!ctx->process_id, 0, 0); 701 return res; 702 } 703 704 /* 705 * Check device context configuration as described by the 706 * riscv-iommu spec section "Device-context configuration 707 * checks". 708 */ 709 static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, 710 RISCVIOMMUContext *ctx) 711 { 712 uint32_t fsc_mode, msi_mode; 713 uint64_t gatp; 714 715 if (!(s->cap & RISCV_IOMMU_CAP_ATS) && 716 (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS || 717 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI || 718 ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) { 719 return false; 720 } 721 722 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) && 723 (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA || 724 ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) { 725 return false; 726 } 727 728 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && 729 ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { 730 return false; 731 } 732 733 if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && 734 ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { 735 return false; 736 } 737 738 if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { 739 msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); 740 741 if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && 742 msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { 743 return false; 744 } 745 } 746 747 gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 748 if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA && 749 gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { 750 return false; 751 } 752 753 fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 754 755 if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { 756 switch (fsc_mode) { 757 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: 758 if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { 759 return false; 760 } 761 break; 762 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: 763 if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { 764 return false; 765 } 766 break; 767 case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: 768 if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { 769 return false; 770 } 771 break; 772 } 773 } else { 774 /* DC.tc.PDTV is 0 */ 775 if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { 776 return false; 777 } 778 779 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 780 if (fsc_mode == RISCV_IOMMU_CAP_SV32 && 781 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 782 return false; 783 } 784 } else { 785 switch (fsc_mode) { 786 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 787 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 788 return false; 789 } 790 break; 791 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 792 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 793 return false; 794 } 795 break; 796 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 797 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 798 return false; 799 } 800 break; 801 } 802 } 803 } 804 805 /* 806 * CAP_END is always zero (only one endianess). FCTL_BE is 807 * always zero (little-endian accesses). Thus TC_SBE must 808 * always be LE, i.e. zero. 809 */ 810 if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { 811 return false; 812 } 813 814 return true; 815 } 816 817 /* 818 * Validate process context (PC) according to section 819 * "Process-context configuration checks". 820 */ 821 static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, 822 RISCVIOMMUContext *ctx) 823 { 824 uint32_t mode; 825 826 if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { 827 return false; 828 } 829 830 if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { 831 return false; 832 } 833 834 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 835 switch (mode) { 836 case RISCV_IOMMU_DC_FSC_MODE_BARE: 837 /* sv39 and sv32 modes have the same value (8) */ 838 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 839 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 840 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 841 break; 842 default: 843 return false; 844 } 845 846 if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { 847 if (mode == RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 && 848 !(s->cap & RISCV_IOMMU_CAP_SV32)) { 849 return false; 850 } 851 } else { 852 switch (mode) { 853 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: 854 if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { 855 return false; 856 } 857 break; 858 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: 859 if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { 860 return false; 861 } 862 break; 863 case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: 864 if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { 865 return false; 866 } 867 break; 868 } 869 } 870 871 return true; 872 } 873 874 /* 875 * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk 876 * 877 * @s : IOMMU Device State 878 * @ctx : Device Translation Context with devid and process_id set. 879 * @return : success or fault code. 880 */ 881 static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) 882 { 883 const uint64_t ddtp = s->ddtp; 884 unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); 885 dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); 886 struct riscv_iommu_dc dc; 887 /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ 888 const int dc_fmt = !s->enable_msi; 889 const size_t dc_len = sizeof(dc) >> dc_fmt; 890 int depth; 891 uint64_t de; 892 893 switch (mode) { 894 case RISCV_IOMMU_DDTP_MODE_OFF: 895 return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; 896 897 case RISCV_IOMMU_DDTP_MODE_BARE: 898 /* mock up pass-through translation context */ 899 ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 900 RISCV_IOMMU_DC_IOHGATP_MODE_BARE); 901 ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, 902 RISCV_IOMMU_DC_FSC_MODE_BARE); 903 904 ctx->tc = RISCV_IOMMU_DC_TC_V; 905 if (s->enable_ats) { 906 ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS; 907 } 908 909 ctx->ta = 0; 910 ctx->msiptp = 0; 911 return 0; 912 913 case RISCV_IOMMU_DDTP_MODE_1LVL: 914 depth = 0; 915 break; 916 917 case RISCV_IOMMU_DDTP_MODE_2LVL: 918 depth = 1; 919 break; 920 921 case RISCV_IOMMU_DDTP_MODE_3LVL: 922 depth = 2; 923 break; 924 925 default: 926 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 927 } 928 929 /* 930 * Check supported device id width (in bits). 931 * See IOMMU Specification, Chapter 6. Software guidelines. 932 * - if extended device-context format is used: 933 * 1LVL: 6, 2LVL: 15, 3LVL: 24 934 * - if base device-context format is used: 935 * 1LVL: 7, 2LVL: 16, 3LVL: 24 936 */ 937 if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { 938 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 939 } 940 941 /* Device directory tree walk */ 942 for (; depth-- > 0; ) { 943 /* 944 * Select device id index bits based on device directory tree level 945 * and device context format. 946 * See IOMMU Specification, Chapter 2. Data Structures. 947 * - if extended device-context format is used: 948 * device index: [23:15][14:6][5:0] 949 * - if base device-context format is used: 950 * device index: [23:16][15:7][6:0] 951 */ 952 const int split = depth * 9 + 6 + dc_fmt; 953 addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; 954 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 955 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 956 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 957 } 958 le64_to_cpus(&de); 959 if (!(de & RISCV_IOMMU_DDTE_VALID)) { 960 /* invalid directory entry */ 961 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 962 } 963 if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { 964 /* reserved bits set */ 965 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 966 } 967 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); 968 } 969 970 /* index into device context entry page */ 971 addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; 972 973 memset(&dc, 0, sizeof(dc)); 974 if (dma_memory_read(s->target_as, addr, &dc, dc_len, 975 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 976 return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; 977 } 978 979 /* Set translation context. */ 980 ctx->tc = le64_to_cpu(dc.tc); 981 ctx->gatp = le64_to_cpu(dc.iohgatp); 982 ctx->satp = le64_to_cpu(dc.fsc); 983 ctx->ta = le64_to_cpu(dc.ta); 984 ctx->msiptp = le64_to_cpu(dc.msiptp); 985 ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); 986 ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); 987 988 if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { 989 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 990 } 991 992 if (!riscv_iommu_validate_device_ctx(s, ctx)) { 993 return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; 994 } 995 996 /* FSC field checks */ 997 mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); 998 addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); 999 1000 if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { 1001 if (ctx->process_id != RISCV_IOMMU_NOPROCID) { 1002 /* PID is disabled */ 1003 return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1004 } 1005 if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { 1006 /* Invalid translation mode */ 1007 return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; 1008 } 1009 return 0; 1010 } 1011 1012 if (ctx->process_id == RISCV_IOMMU_NOPROCID) { 1013 if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { 1014 /* No default process_id enabled, set BARE mode */ 1015 ctx->satp = 0ULL; 1016 return 0; 1017 } else { 1018 /* Use default process_id #0 */ 1019 ctx->process_id = 0; 1020 } 1021 } 1022 1023 if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1024 /* No S-Stage translation, done. */ 1025 return 0; 1026 } 1027 1028 /* FSC.TC.PDTV enabled */ 1029 if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { 1030 /* Invalid PDTP.MODE */ 1031 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1032 } 1033 1034 for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { 1035 /* 1036 * Select process id index bits based on process directory tree 1037 * level. See IOMMU Specification, 2.2. Process-Directory-Table. 1038 */ 1039 const int split = depth * 9 + 8; 1040 addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; 1041 if (dma_memory_read(s->target_as, addr, &de, sizeof(de), 1042 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1043 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1044 } 1045 le64_to_cpus(&de); 1046 if (!(de & RISCV_IOMMU_PC_TA_V)) { 1047 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1048 } 1049 addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN)); 1050 } 1051 1052 /* Leaf entry in PDT */ 1053 addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; 1054 if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, 1055 MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { 1056 return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; 1057 } 1058 1059 /* Use FSC and TA from process directory entry. */ 1060 ctx->ta = le64_to_cpu(dc.ta); 1061 ctx->satp = le64_to_cpu(dc.fsc); 1062 1063 if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { 1064 return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; 1065 } 1066 1067 if (!riscv_iommu_validate_process_ctx(s, ctx)) { 1068 return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; 1069 } 1070 1071 return 0; 1072 } 1073 1074 /* Translation Context cache support */ 1075 static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) 1076 { 1077 RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; 1078 RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; 1079 return c1->devid == c2->devid && 1080 c1->process_id == c2->process_id; 1081 } 1082 1083 static guint riscv_iommu_ctx_hash(gconstpointer v) 1084 { 1085 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; 1086 /* 1087 * Generate simple hash of (process_id, devid) 1088 * assuming 24-bit wide devid. 1089 */ 1090 return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); 1091 } 1092 1093 static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, 1094 gpointer data) 1095 { 1096 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1097 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1098 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1099 ctx->devid == arg->devid && 1100 ctx->process_id == arg->process_id) { 1101 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1102 } 1103 } 1104 1105 static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, 1106 gpointer data) 1107 { 1108 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1109 RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; 1110 if (ctx->tc & RISCV_IOMMU_DC_TC_V && 1111 ctx->devid == arg->devid) { 1112 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1113 } 1114 } 1115 1116 static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, 1117 gpointer data) 1118 { 1119 RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; 1120 if (ctx->tc & RISCV_IOMMU_DC_TC_V) { 1121 ctx->tc &= ~RISCV_IOMMU_DC_TC_V; 1122 } 1123 } 1124 1125 static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, 1126 uint32_t devid, uint32_t process_id) 1127 { 1128 GHashTable *ctx_cache; 1129 RISCVIOMMUContext key = { 1130 .devid = devid, 1131 .process_id = process_id, 1132 }; 1133 ctx_cache = g_hash_table_ref(s->ctx_cache); 1134 g_hash_table_foreach(ctx_cache, func, &key); 1135 g_hash_table_unref(ctx_cache); 1136 } 1137 1138 /* Find or allocate translation context for a given {device_id, process_id} */ 1139 static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, 1140 unsigned devid, unsigned process_id, 1141 void **ref) 1142 { 1143 GHashTable *ctx_cache; 1144 RISCVIOMMUContext *ctx; 1145 RISCVIOMMUContext key = { 1146 .devid = devid, 1147 .process_id = process_id, 1148 }; 1149 1150 ctx_cache = g_hash_table_ref(s->ctx_cache); 1151 ctx = g_hash_table_lookup(ctx_cache, &key); 1152 1153 if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { 1154 *ref = ctx_cache; 1155 return ctx; 1156 } 1157 1158 ctx = g_new0(RISCVIOMMUContext, 1); 1159 ctx->devid = devid; 1160 ctx->process_id = process_id; 1161 1162 int fault = riscv_iommu_ctx_fetch(s, ctx); 1163 if (!fault) { 1164 if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { 1165 g_hash_table_unref(ctx_cache); 1166 ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 1167 riscv_iommu_ctx_equal, 1168 g_free, NULL); 1169 g_hash_table_ref(ctx_cache); 1170 g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); 1171 } 1172 g_hash_table_add(ctx_cache, ctx); 1173 *ref = ctx_cache; 1174 return ctx; 1175 } 1176 1177 g_hash_table_unref(ctx_cache); 1178 *ref = NULL; 1179 1180 riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, 1181 fault, !!process_id, 0, 0); 1182 1183 g_free(ctx); 1184 return NULL; 1185 } 1186 1187 static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) 1188 { 1189 if (ref) { 1190 g_hash_table_unref((GHashTable *)ref); 1191 } 1192 } 1193 1194 /* Find or allocate address space for a given device */ 1195 static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) 1196 { 1197 RISCVIOMMUSpace *as; 1198 1199 /* FIXME: PCIe bus remapping for attached endpoints. */ 1200 devid |= s->bus << 8; 1201 1202 QLIST_FOREACH(as, &s->spaces, list) { 1203 if (as->devid == devid) { 1204 break; 1205 } 1206 } 1207 1208 if (as == NULL) { 1209 char name[64]; 1210 as = g_new0(RISCVIOMMUSpace, 1); 1211 1212 as->iommu = s; 1213 as->devid = devid; 1214 1215 snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", 1216 PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1217 1218 /* IOVA address space, untranslated addresses */ 1219 memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), 1220 TYPE_RISCV_IOMMU_MEMORY_REGION, 1221 OBJECT(as), "riscv_iommu", UINT64_MAX); 1222 address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); 1223 1224 QLIST_INSERT_HEAD(&s->spaces, as, list); 1225 1226 trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), 1227 PCI_SLOT(as->devid), PCI_FUNC(as->devid)); 1228 } 1229 return &as->iova_as; 1230 } 1231 1232 /* Translation Object cache support */ 1233 static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2) 1234 { 1235 RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1; 1236 RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2; 1237 return t1->gscid == t2->gscid && t1->pscid == t2->pscid && 1238 t1->iova == t2->iova && t1->tag == t2->tag; 1239 } 1240 1241 static guint riscv_iommu_iot_hash(gconstpointer v) 1242 { 1243 RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v; 1244 return (guint)t->iova; 1245 } 1246 1247 /* GV: 0 AV: 0 PSCV: 0 GVMA: 0 */ 1248 /* GV: 0 AV: 0 GVMA: 1 */ 1249 static 1250 void riscv_iommu_iot_inval_all(gpointer key, gpointer value, gpointer data) 1251 { 1252 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1253 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1254 if (iot->tag == arg->tag) { 1255 iot->perm = IOMMU_NONE; 1256 } 1257 } 1258 1259 /* GV: 0 AV: 0 PSCV: 1 GVMA: 0 */ 1260 static 1261 void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, gpointer data) 1262 { 1263 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1264 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1265 if (iot->tag == arg->tag && 1266 iot->pscid == arg->pscid) { 1267 iot->perm = IOMMU_NONE; 1268 } 1269 } 1270 1271 /* GV: 0 AV: 1 PSCV: 0 GVMA: 0 */ 1272 static 1273 void riscv_iommu_iot_inval_iova(gpointer key, gpointer value, gpointer data) 1274 { 1275 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1276 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1277 if (iot->tag == arg->tag && 1278 iot->iova == arg->iova) { 1279 iot->perm = IOMMU_NONE; 1280 } 1281 } 1282 1283 /* GV: 0 AV: 1 PSCV: 1 GVMA: 0 */ 1284 static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value, 1285 gpointer data) 1286 { 1287 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1288 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1289 if (iot->tag == arg->tag && 1290 iot->pscid == arg->pscid && 1291 iot->iova == arg->iova) { 1292 iot->perm = IOMMU_NONE; 1293 } 1294 } 1295 1296 /* GV: 1 AV: 0 PSCV: 0 GVMA: 0 */ 1297 /* GV: 1 AV: 0 GVMA: 1 */ 1298 static 1299 void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, gpointer data) 1300 { 1301 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1302 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1303 if (iot->tag == arg->tag && 1304 iot->gscid == arg->gscid) { 1305 iot->perm = IOMMU_NONE; 1306 } 1307 } 1308 1309 /* GV: 1 AV: 0 PSCV: 1 GVMA: 0 */ 1310 static void riscv_iommu_iot_inval_gscid_pscid(gpointer key, gpointer value, 1311 gpointer data) 1312 { 1313 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1314 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1315 if (iot->tag == arg->tag && 1316 iot->gscid == arg->gscid && 1317 iot->pscid == arg->pscid) { 1318 iot->perm = IOMMU_NONE; 1319 } 1320 } 1321 1322 /* GV: 1 AV: 1 PSCV: 0 GVMA: 0 */ 1323 /* GV: 1 AV: 1 GVMA: 1 */ 1324 static void riscv_iommu_iot_inval_gscid_iova(gpointer key, gpointer value, 1325 gpointer data) 1326 { 1327 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1328 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1329 if (iot->tag == arg->tag && 1330 iot->gscid == arg->gscid && 1331 iot->iova == arg->iova) { 1332 iot->perm = IOMMU_NONE; 1333 } 1334 } 1335 1336 /* GV: 1 AV: 1 PSCV: 1 GVMA: 0 */ 1337 static void riscv_iommu_iot_inval_gscid_pscid_iova(gpointer key, gpointer value, 1338 gpointer data) 1339 { 1340 RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; 1341 RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; 1342 if (iot->tag == arg->tag && 1343 iot->gscid == arg->gscid && 1344 iot->pscid == arg->pscid && 1345 iot->iova == arg->iova) { 1346 iot->perm = IOMMU_NONE; 1347 } 1348 } 1349 1350 /* caller should keep ref-count for iot_cache object */ 1351 static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx, 1352 GHashTable *iot_cache, hwaddr iova, RISCVIOMMUTransTag transtag) 1353 { 1354 RISCVIOMMUEntry key = { 1355 .tag = transtag, 1356 .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID), 1357 .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID), 1358 .iova = PPN_DOWN(iova), 1359 }; 1360 return g_hash_table_lookup(iot_cache, &key); 1361 } 1362 1363 /* caller should keep ref-count for iot_cache object */ 1364 static void riscv_iommu_iot_update(RISCVIOMMUState *s, 1365 GHashTable *iot_cache, RISCVIOMMUEntry *iot) 1366 { 1367 if (!s->iot_limit) { 1368 return; 1369 } 1370 1371 if (g_hash_table_size(s->iot_cache) >= s->iot_limit) { 1372 iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 1373 riscv_iommu_iot_equal, 1374 g_free, NULL); 1375 g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache)); 1376 } 1377 g_hash_table_add(iot_cache, iot); 1378 } 1379 1380 static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func, 1381 uint32_t gscid, uint32_t pscid, hwaddr iova, RISCVIOMMUTransTag transtag) 1382 { 1383 GHashTable *iot_cache; 1384 RISCVIOMMUEntry key = { 1385 .tag = transtag, 1386 .gscid = gscid, 1387 .pscid = pscid, 1388 .iova = PPN_DOWN(iova), 1389 }; 1390 1391 iot_cache = g_hash_table_ref(s->iot_cache); 1392 g_hash_table_foreach(iot_cache, func, &key); 1393 g_hash_table_unref(iot_cache); 1394 } 1395 1396 static RISCVIOMMUTransTag riscv_iommu_get_transtag(RISCVIOMMUContext *ctx) 1397 { 1398 uint64_t satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); 1399 uint64_t gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); 1400 1401 if (satp == RISCV_IOMMU_DC_FSC_MODE_BARE) { 1402 return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ? 1403 RISCV_IOMMU_TRANS_TAG_BY : RISCV_IOMMU_TRANS_TAG_VG; 1404 } else { 1405 return (gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) ? 1406 RISCV_IOMMU_TRANS_TAG_SS : RISCV_IOMMU_TRANS_TAG_VN; 1407 } 1408 } 1409 1410 static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, 1411 IOMMUTLBEntry *iotlb, bool enable_cache) 1412 { 1413 RISCVIOMMUTransTag transtag = riscv_iommu_get_transtag(ctx); 1414 RISCVIOMMUEntry *iot; 1415 IOMMUAccessFlags perm; 1416 bool enable_pid; 1417 bool enable_pri; 1418 GHashTable *iot_cache; 1419 int fault; 1420 1421 iot_cache = g_hash_table_ref(s->iot_cache); 1422 /* 1423 * TC[32] is reserved for custom extensions, used here to temporarily 1424 * enable automatic page-request generation for ATS queries. 1425 */ 1426 enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); 1427 enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); 1428 1429 /* Check for ATS request. */ 1430 if (iotlb->perm == IOMMU_NONE) { 1431 /* Check if ATS is disabled. */ 1432 if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { 1433 enable_pri = false; 1434 fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; 1435 goto done; 1436 } 1437 } 1438 1439 iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova, transtag); 1440 perm = iot ? iot->perm : IOMMU_NONE; 1441 if (perm != IOMMU_NONE) { 1442 iotlb->translated_addr = PPN_PHYS(iot->phys); 1443 iotlb->addr_mask = ~TARGET_PAGE_MASK; 1444 iotlb->perm = perm; 1445 fault = 0; 1446 goto done; 1447 } 1448 1449 /* Translate using device directory / page table information. */ 1450 fault = riscv_iommu_spa_fetch(s, ctx, iotlb); 1451 1452 if (!fault && iotlb->target_as == &s->trap_as) { 1453 /* Do not cache trapped MSI translations */ 1454 goto done; 1455 } 1456 1457 /* 1458 * We made an implementation choice to not cache identity-mapped 1459 * translations, as allowed by the specification, to avoid 1460 * translation cache evictions for other devices sharing the 1461 * IOMMU hardware model. 1462 */ 1463 if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) { 1464 iot = g_new0(RISCVIOMMUEntry, 1); 1465 iot->iova = PPN_DOWN(iotlb->iova); 1466 iot->phys = PPN_DOWN(iotlb->translated_addr); 1467 iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); 1468 iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); 1469 iot->perm = iotlb->perm; 1470 iot->tag = transtag; 1471 riscv_iommu_iot_update(s, iot_cache, iot); 1472 } 1473 1474 done: 1475 g_hash_table_unref(iot_cache); 1476 1477 if (enable_pri && fault) { 1478 struct riscv_iommu_pq_record pr = {0}; 1479 if (enable_pid) { 1480 pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, 1481 RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); 1482 } 1483 pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); 1484 pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | 1485 RISCV_IOMMU_PREQ_PAYLOAD_M; 1486 riscv_iommu_pri(s, &pr); 1487 return fault; 1488 } 1489 1490 if (fault) { 1491 unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ; 1492 1493 if (iotlb->perm & IOMMU_RW) { 1494 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; 1495 } else if (iotlb->perm & IOMMU_RO) { 1496 ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; 1497 } 1498 1499 riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, 1500 iotlb->iova, iotlb->translated_addr); 1501 return fault; 1502 } 1503 1504 return 0; 1505 } 1506 1507 /* IOMMU Command Interface */ 1508 static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, 1509 uint64_t addr, uint32_t data) 1510 { 1511 /* 1512 * ATS processing in this implementation of the IOMMU is synchronous, 1513 * no need to wait for completions here. 1514 */ 1515 if (!notify) { 1516 return MEMTX_OK; 1517 } 1518 1519 return dma_memory_write(s->target_as, addr, &data, sizeof(data), 1520 MEMTXATTRS_UNSPECIFIED); 1521 } 1522 1523 static void riscv_iommu_ats(RISCVIOMMUState *s, 1524 struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag, 1525 IOMMUAccessFlags perm, 1526 void (*trace_fn)(const char *id)) 1527 { 1528 RISCVIOMMUSpace *as = NULL; 1529 IOMMUNotifier *n; 1530 IOMMUTLBEvent event; 1531 uint32_t pid; 1532 uint32_t devid; 1533 const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV; 1534 1535 if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) { 1536 /* Use device segment and requester id */ 1537 devid = get_field(cmd->dword0, 1538 RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID); 1539 } else { 1540 devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID); 1541 } 1542 1543 pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID); 1544 1545 QLIST_FOREACH(as, &s->spaces, list) { 1546 if (as->devid == devid) { 1547 break; 1548 } 1549 } 1550 1551 if (!as || !as->notifier) { 1552 return; 1553 } 1554 1555 event.type = flag; 1556 event.entry.perm = perm; 1557 event.entry.target_as = s->target_as; 1558 1559 IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) { 1560 if (!pv || n->iommu_idx == pid) { 1561 event.entry.iova = n->start; 1562 event.entry.addr_mask = n->end - n->start; 1563 trace_fn(as->iova_mr.parent_obj.name); 1564 memory_region_notify_iommu_one(n, &event); 1565 } 1566 } 1567 } 1568 1569 static void riscv_iommu_ats_inval(RISCVIOMMUState *s, 1570 struct riscv_iommu_command *cmd) 1571 { 1572 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE, 1573 trace_riscv_iommu_ats_inval); 1574 } 1575 1576 static void riscv_iommu_ats_prgr(RISCVIOMMUState *s, 1577 struct riscv_iommu_command *cmd) 1578 { 1579 unsigned resp_code = get_field(cmd->dword1, 1580 RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE); 1581 1582 /* Using the access flag to carry response code information */ 1583 IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW; 1584 return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm, 1585 trace_riscv_iommu_ats_prgr); 1586 } 1587 1588 static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) 1589 { 1590 uint64_t old_ddtp = s->ddtp; 1591 uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); 1592 unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); 1593 unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); 1594 bool ok = false; 1595 1596 /* 1597 * Check for allowed DDTP.MODE transitions: 1598 * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} 1599 * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} 1600 */ 1601 if (new_mode == old_mode || 1602 new_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1603 new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { 1604 ok = true; 1605 } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || 1606 new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || 1607 new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { 1608 ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || 1609 old_mode == RISCV_IOMMU_DDTP_MODE_BARE; 1610 } 1611 1612 if (ok) { 1613 /* clear reserved and busy bits, report back sanitized version */ 1614 new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, 1615 RISCV_IOMMU_DDTP_MODE, new_mode); 1616 } else { 1617 new_ddtp = old_ddtp; 1618 } 1619 s->ddtp = new_ddtp; 1620 1621 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); 1622 } 1623 1624 /* Command function and opcode field. */ 1625 #define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) 1626 1627 static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) 1628 { 1629 struct riscv_iommu_command cmd; 1630 MemTxResult res; 1631 dma_addr_t addr; 1632 uint32_t tail, head, ctrl; 1633 uint64_t cmd_opcode; 1634 GHFunc func; 1635 1636 ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1637 tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; 1638 head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; 1639 1640 /* Check for pending error or queue processing disabled */ 1641 if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || 1642 !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { 1643 return; 1644 } 1645 1646 while (tail != head) { 1647 addr = s->cq_addr + head * sizeof(cmd); 1648 res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), 1649 MEMTXATTRS_UNSPECIFIED); 1650 1651 if (res != MEMTX_OK) { 1652 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1653 RISCV_IOMMU_CQCSR_CQMF, 0); 1654 goto fault; 1655 } 1656 1657 trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); 1658 1659 cmd_opcode = get_field(cmd.dword0, 1660 RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); 1661 1662 switch (cmd_opcode) { 1663 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, 1664 RISCV_IOMMU_CMD_IOFENCE_OPCODE): 1665 res = riscv_iommu_iofence(s, 1666 cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, 1667 get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); 1668 1669 if (res != MEMTX_OK) { 1670 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1671 RISCV_IOMMU_CQCSR_CQMF, 0); 1672 goto fault; 1673 } 1674 break; 1675 1676 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, 1677 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1678 { 1679 bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV); 1680 bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV); 1681 bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV); 1682 uint32_t gscid = get_field(cmd.dword0, 1683 RISCV_IOMMU_CMD_IOTINVAL_GSCID); 1684 uint32_t pscid = get_field(cmd.dword0, 1685 RISCV_IOMMU_CMD_IOTINVAL_PSCID); 1686 hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK; 1687 1688 if (pscv) { 1689 /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ 1690 goto cmd_ill; 1691 } 1692 1693 func = riscv_iommu_iot_inval_all; 1694 1695 if (gv) { 1696 func = (av) ? riscv_iommu_iot_inval_gscid_iova : 1697 riscv_iommu_iot_inval_gscid; 1698 } 1699 1700 riscv_iommu_iot_inval( 1701 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VG); 1702 1703 riscv_iommu_iot_inval( 1704 s, func, gscid, pscid, iova, RISCV_IOMMU_TRANS_TAG_VN); 1705 break; 1706 } 1707 1708 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, 1709 RISCV_IOMMU_CMD_IOTINVAL_OPCODE): 1710 { 1711 bool gv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV); 1712 bool av = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV); 1713 bool pscv = !!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV); 1714 uint32_t gscid = get_field(cmd.dword0, 1715 RISCV_IOMMU_CMD_IOTINVAL_GSCID); 1716 uint32_t pscid = get_field(cmd.dword0, 1717 RISCV_IOMMU_CMD_IOTINVAL_PSCID); 1718 hwaddr iova = (cmd.dword1 << 2) & TARGET_PAGE_MASK; 1719 RISCVIOMMUTransTag transtag; 1720 1721 if (gv) { 1722 transtag = RISCV_IOMMU_TRANS_TAG_VN; 1723 if (pscv) { 1724 func = (av) ? riscv_iommu_iot_inval_gscid_pscid_iova : 1725 riscv_iommu_iot_inval_gscid_pscid; 1726 } else { 1727 func = (av) ? riscv_iommu_iot_inval_gscid_iova : 1728 riscv_iommu_iot_inval_gscid; 1729 } 1730 } else { 1731 transtag = RISCV_IOMMU_TRANS_TAG_SS; 1732 if (pscv) { 1733 func = (av) ? riscv_iommu_iot_inval_pscid_iova : 1734 riscv_iommu_iot_inval_pscid; 1735 } else { 1736 func = (av) ? riscv_iommu_iot_inval_iova : 1737 riscv_iommu_iot_inval_all; 1738 } 1739 } 1740 1741 riscv_iommu_iot_inval(s, func, gscid, pscid, iova, transtag); 1742 break; 1743 } 1744 1745 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, 1746 RISCV_IOMMU_CMD_IODIR_OPCODE): 1747 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1748 /* invalidate all device context cache mappings */ 1749 func = riscv_iommu_ctx_inval_all; 1750 } else { 1751 /* invalidate all device context matching DID */ 1752 func = riscv_iommu_ctx_inval_devid; 1753 } 1754 riscv_iommu_ctx_inval(s, func, 1755 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); 1756 break; 1757 1758 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, 1759 RISCV_IOMMU_CMD_IODIR_OPCODE): 1760 if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { 1761 /* illegal command arguments IODIR_PDT & DV == 0 */ 1762 goto cmd_ill; 1763 } else { 1764 func = riscv_iommu_ctx_inval_devid_procid; 1765 } 1766 riscv_iommu_ctx_inval(s, func, 1767 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 1768 get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); 1769 break; 1770 1771 /* ATS commands */ 1772 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL, 1773 RISCV_IOMMU_CMD_ATS_OPCODE): 1774 if (!s->enable_ats) { 1775 goto cmd_ill; 1776 } 1777 1778 riscv_iommu_ats_inval(s, &cmd); 1779 break; 1780 1781 case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR, 1782 RISCV_IOMMU_CMD_ATS_OPCODE): 1783 if (!s->enable_ats) { 1784 goto cmd_ill; 1785 } 1786 1787 riscv_iommu_ats_prgr(s, &cmd); 1788 break; 1789 1790 default: 1791 cmd_ill: 1792 /* Invalid instruction, do not advance instruction index. */ 1793 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 1794 RISCV_IOMMU_CQCSR_CMD_ILL, 0); 1795 goto fault; 1796 } 1797 1798 /* Advance and update head pointer after command completes. */ 1799 head = (head + 1) & s->cq_mask; 1800 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); 1801 } 1802 return; 1803 1804 fault: 1805 if (ctrl & RISCV_IOMMU_CQCSR_CIE) { 1806 riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); 1807 } 1808 } 1809 1810 static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) 1811 { 1812 uint64_t base; 1813 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1814 uint32_t ctrl_clr; 1815 bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); 1816 bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); 1817 1818 if (enable && !active) { 1819 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); 1820 s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; 1821 s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); 1822 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); 1823 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); 1824 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); 1825 ctrl_set = RISCV_IOMMU_CQCSR_CQON; 1826 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | 1827 RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | 1828 RISCV_IOMMU_CQCSR_FENCE_W_IP; 1829 } else if (!enable && active) { 1830 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); 1831 ctrl_set = 0; 1832 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; 1833 } else { 1834 ctrl_set = 0; 1835 ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; 1836 } 1837 1838 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); 1839 } 1840 1841 static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) 1842 { 1843 uint64_t base; 1844 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1845 uint32_t ctrl_clr; 1846 bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); 1847 bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); 1848 1849 if (enable && !active) { 1850 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); 1851 s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; 1852 s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); 1853 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); 1854 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); 1855 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); 1856 ctrl_set = RISCV_IOMMU_FQCSR_FQON; 1857 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | 1858 RISCV_IOMMU_FQCSR_FQOF; 1859 } else if (!enable && active) { 1860 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); 1861 ctrl_set = 0; 1862 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; 1863 } else { 1864 ctrl_set = 0; 1865 ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; 1866 } 1867 1868 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); 1869 } 1870 1871 static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) 1872 { 1873 uint64_t base; 1874 uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 1875 uint32_t ctrl_clr; 1876 bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); 1877 bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); 1878 1879 if (enable && !active) { 1880 base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); 1881 s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; 1882 s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); 1883 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); 1884 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); 1885 stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); 1886 ctrl_set = RISCV_IOMMU_PQCSR_PQON; 1887 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | 1888 RISCV_IOMMU_PQCSR_PQOF; 1889 } else if (!enable && active) { 1890 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); 1891 ctrl_set = 0; 1892 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; 1893 } else { 1894 ctrl_set = 0; 1895 ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; 1896 } 1897 1898 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); 1899 } 1900 1901 static void riscv_iommu_process_dbg(RISCVIOMMUState *s) 1902 { 1903 uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA); 1904 uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL); 1905 unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID); 1906 unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID); 1907 RISCVIOMMUContext *ctx; 1908 void *ref; 1909 1910 if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) { 1911 return; 1912 } 1913 1914 ctx = riscv_iommu_ctx(s, devid, pid, &ref); 1915 if (ctx == NULL) { 1916 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, 1917 RISCV_IOMMU_TR_RESPONSE_FAULT | 1918 (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10)); 1919 } else { 1920 IOMMUTLBEntry iotlb = { 1921 .iova = iova, 1922 .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW, 1923 .addr_mask = ~0, 1924 .target_as = NULL, 1925 }; 1926 int fault = riscv_iommu_translate(s, ctx, &iotlb, false); 1927 if (fault) { 1928 iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); 1929 } else { 1930 iova = iotlb.translated_addr & ~iotlb.addr_mask; 1931 iova >>= TARGET_PAGE_BITS; 1932 iova &= RISCV_IOMMU_TR_RESPONSE_PPN; 1933 1934 /* We do not support superpages (> 4kbs) for now */ 1935 iova &= ~RISCV_IOMMU_TR_RESPONSE_S; 1936 } 1937 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); 1938 } 1939 1940 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 1941 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 1942 riscv_iommu_ctx_put(s, ref); 1943 } 1944 1945 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); 1946 1947 static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) 1948 { 1949 uint64_t icvec = 0; 1950 1951 icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, 1952 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); 1953 1954 icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, 1955 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); 1956 1957 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, 1958 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); 1959 1960 icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, 1961 s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); 1962 1963 trace_riscv_iommu_icvec_write(data, icvec); 1964 1965 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); 1966 } 1967 1968 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) 1969 { 1970 uint32_t cqcsr, fqcsr, pqcsr; 1971 uint32_t ipsr_set = 0; 1972 uint32_t ipsr_clr = 0; 1973 1974 if (data & RISCV_IOMMU_IPSR_CIP) { 1975 cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); 1976 1977 if (cqcsr & RISCV_IOMMU_CQCSR_CIE && 1978 (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || 1979 cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || 1980 cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || 1981 cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { 1982 ipsr_set |= RISCV_IOMMU_IPSR_CIP; 1983 } else { 1984 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1985 } 1986 } else { 1987 ipsr_clr |= RISCV_IOMMU_IPSR_CIP; 1988 } 1989 1990 if (data & RISCV_IOMMU_IPSR_FIP) { 1991 fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); 1992 1993 if (fqcsr & RISCV_IOMMU_FQCSR_FIE && 1994 (fqcsr & RISCV_IOMMU_FQCSR_FQOF || 1995 fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { 1996 ipsr_set |= RISCV_IOMMU_IPSR_FIP; 1997 } else { 1998 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 1999 } 2000 } else { 2001 ipsr_clr |= RISCV_IOMMU_IPSR_FIP; 2002 } 2003 2004 if (data & RISCV_IOMMU_IPSR_PIP) { 2005 pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); 2006 2007 if (pqcsr & RISCV_IOMMU_PQCSR_PIE && 2008 (pqcsr & RISCV_IOMMU_PQCSR_PQOF || 2009 pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { 2010 ipsr_set |= RISCV_IOMMU_IPSR_PIP; 2011 } else { 2012 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 2013 } 2014 } else { 2015 ipsr_clr |= RISCV_IOMMU_IPSR_PIP; 2016 } 2017 2018 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); 2019 } 2020 2021 /* 2022 * Write the resulting value of 'data' for the reg specified 2023 * by 'reg_addr', after considering read-only/read-write/write-clear 2024 * bits, in the pointer 'dest'. 2025 * 2026 * The result is written in little-endian. 2027 */ 2028 static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, 2029 void *dest, hwaddr reg_addr, 2030 int size, uint64_t data) 2031 { 2032 uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); 2033 uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); 2034 uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); 2035 2036 stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); 2037 } 2038 2039 static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, 2040 uint64_t data, unsigned size, 2041 MemTxAttrs attrs) 2042 { 2043 riscv_iommu_process_fn *process_fn = NULL; 2044 RISCVIOMMUState *s = opaque; 2045 uint32_t regb = addr & ~3; 2046 uint32_t busy = 0; 2047 uint64_t val = 0; 2048 2049 if ((addr & (size - 1)) != 0) { 2050 /* Unsupported MMIO alignment or access size */ 2051 return MEMTX_ERROR; 2052 } 2053 2054 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2055 /* Unsupported MMIO access location. */ 2056 return MEMTX_ACCESS_ERROR; 2057 } 2058 2059 /* Track actionable MMIO write. */ 2060 switch (regb) { 2061 case RISCV_IOMMU_REG_DDTP: 2062 case RISCV_IOMMU_REG_DDTP + 4: 2063 process_fn = riscv_iommu_process_ddtp; 2064 regb = RISCV_IOMMU_REG_DDTP; 2065 busy = RISCV_IOMMU_DDTP_BUSY; 2066 break; 2067 2068 case RISCV_IOMMU_REG_CQT: 2069 process_fn = riscv_iommu_process_cq_tail; 2070 break; 2071 2072 case RISCV_IOMMU_REG_CQCSR: 2073 process_fn = riscv_iommu_process_cq_control; 2074 busy = RISCV_IOMMU_CQCSR_BUSY; 2075 break; 2076 2077 case RISCV_IOMMU_REG_FQCSR: 2078 process_fn = riscv_iommu_process_fq_control; 2079 busy = RISCV_IOMMU_FQCSR_BUSY; 2080 break; 2081 2082 case RISCV_IOMMU_REG_PQCSR: 2083 process_fn = riscv_iommu_process_pq_control; 2084 busy = RISCV_IOMMU_PQCSR_BUSY; 2085 break; 2086 2087 case RISCV_IOMMU_REG_ICVEC: 2088 case RISCV_IOMMU_REG_IPSR: 2089 /* 2090 * ICVEC and IPSR have special read/write procedures. We'll 2091 * call their respective helpers and exit. 2092 */ 2093 riscv_iommu_write_reg_val(s, &val, addr, size, data); 2094 2095 /* 2096 * 'val' is stored as LE. Switch to host endianess 2097 * before using it. 2098 */ 2099 val = le64_to_cpu(val); 2100 2101 if (regb == RISCV_IOMMU_REG_ICVEC) { 2102 riscv_iommu_update_icvec(s, val); 2103 } else { 2104 riscv_iommu_update_ipsr(s, val); 2105 } 2106 2107 return MEMTX_OK; 2108 2109 case RISCV_IOMMU_REG_TR_REQ_CTL: 2110 process_fn = riscv_iommu_process_dbg; 2111 regb = RISCV_IOMMU_REG_TR_REQ_CTL; 2112 busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; 2113 break; 2114 2115 default: 2116 break; 2117 } 2118 2119 /* 2120 * Registers update might be not synchronized with core logic. 2121 * If system software updates register when relevant BUSY bit 2122 * is set IOMMU behavior of additional writes to the register 2123 * is UNSPECIFIED. 2124 */ 2125 riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); 2126 2127 /* Busy flag update, MSB 4-byte register. */ 2128 if (busy) { 2129 uint32_t rw = ldl_le_p(&s->regs_rw[regb]); 2130 stl_le_p(&s->regs_rw[regb], rw | busy); 2131 } 2132 2133 if (process_fn) { 2134 process_fn(s); 2135 } 2136 2137 return MEMTX_OK; 2138 } 2139 2140 static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, 2141 uint64_t *data, unsigned size, MemTxAttrs attrs) 2142 { 2143 RISCVIOMMUState *s = opaque; 2144 uint64_t val = -1; 2145 uint8_t *ptr; 2146 2147 if ((addr & (size - 1)) != 0) { 2148 /* Unsupported MMIO alignment. */ 2149 return MEMTX_ERROR; 2150 } 2151 2152 if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { 2153 return MEMTX_ACCESS_ERROR; 2154 } 2155 2156 ptr = &s->regs_rw[addr]; 2157 val = ldn_le_p(ptr, size); 2158 2159 *data = val; 2160 2161 return MEMTX_OK; 2162 } 2163 2164 static const MemoryRegionOps riscv_iommu_mmio_ops = { 2165 .read_with_attrs = riscv_iommu_mmio_read, 2166 .write_with_attrs = riscv_iommu_mmio_write, 2167 .endianness = DEVICE_NATIVE_ENDIAN, 2168 .impl = { 2169 .min_access_size = 4, 2170 .max_access_size = 8, 2171 .unaligned = false, 2172 }, 2173 .valid = { 2174 .min_access_size = 4, 2175 .max_access_size = 8, 2176 } 2177 }; 2178 2179 /* 2180 * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" 2181 * memory region as untranslated address, for additional MSI/MRIF interception 2182 * by IOMMU interrupt remapping implementation. 2183 * Note: Device emulation code generating an MSI is expected to provide a valid 2184 * memory transaction attributes with requested_id set. 2185 */ 2186 static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, 2187 uint64_t data, unsigned size, MemTxAttrs attrs) 2188 { 2189 RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; 2190 RISCVIOMMUContext *ctx; 2191 MemTxResult res; 2192 void *ref; 2193 uint32_t devid = attrs.requester_id; 2194 2195 if (attrs.unspecified) { 2196 return MEMTX_ACCESS_ERROR; 2197 } 2198 2199 /* FIXME: PCIe bus remapping for attached endpoints. */ 2200 devid |= s->bus << 8; 2201 2202 ctx = riscv_iommu_ctx(s, devid, 0, &ref); 2203 if (ctx == NULL) { 2204 res = MEMTX_ACCESS_ERROR; 2205 } else { 2206 res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); 2207 } 2208 riscv_iommu_ctx_put(s, ref); 2209 return res; 2210 } 2211 2212 static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, 2213 uint64_t *data, unsigned size, MemTxAttrs attrs) 2214 { 2215 return MEMTX_ACCESS_ERROR; 2216 } 2217 2218 static const MemoryRegionOps riscv_iommu_trap_ops = { 2219 .read_with_attrs = riscv_iommu_trap_read, 2220 .write_with_attrs = riscv_iommu_trap_write, 2221 .endianness = DEVICE_LITTLE_ENDIAN, 2222 .impl = { 2223 .min_access_size = 4, 2224 .max_access_size = 8, 2225 .unaligned = true, 2226 }, 2227 .valid = { 2228 .min_access_size = 4, 2229 .max_access_size = 8, 2230 } 2231 }; 2232 2233 void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode) 2234 { 2235 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_IGS, mode); 2236 } 2237 2238 static void riscv_iommu_instance_init(Object *obj) 2239 { 2240 RISCVIOMMUState *s = RISCV_IOMMU(obj); 2241 2242 /* Enable translation debug interface */ 2243 s->cap = RISCV_IOMMU_CAP_DBG; 2244 2245 /* Report QEMU target physical address space limits */ 2246 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, 2247 TARGET_PHYS_ADDR_SPACE_BITS); 2248 2249 /* TODO: method to report supported PID bits */ 2250 s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ 2251 s->cap |= RISCV_IOMMU_CAP_PD8; 2252 2253 /* register storage */ 2254 s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2255 s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2256 s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); 2257 2258 /* Mark all registers read-only */ 2259 memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); 2260 2261 /* Device translation context cache */ 2262 s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, 2263 riscv_iommu_ctx_equal, 2264 g_free, NULL); 2265 2266 s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, 2267 riscv_iommu_iot_equal, 2268 g_free, NULL); 2269 2270 s->iommus.le_next = NULL; 2271 s->iommus.le_prev = NULL; 2272 QLIST_INIT(&s->spaces); 2273 } 2274 2275 static void riscv_iommu_realize(DeviceState *dev, Error **errp) 2276 { 2277 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2278 2279 s->cap |= s->version & RISCV_IOMMU_CAP_VERSION; 2280 if (s->enable_msi) { 2281 s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; 2282 } 2283 if (s->enable_ats) { 2284 s->cap |= RISCV_IOMMU_CAP_ATS; 2285 } 2286 if (s->enable_s_stage) { 2287 s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | 2288 RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; 2289 } 2290 if (s->enable_g_stage) { 2291 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | 2292 RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; 2293 } 2294 2295 /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ 2296 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? 2297 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); 2298 2299 /* 2300 * Register complete MMIO space, including MSI/PBA registers. 2301 * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, 2302 * managed directly by the PCIDevice implementation. 2303 */ 2304 memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, 2305 "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); 2306 2307 /* Set power-on register state */ 2308 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); 2309 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); 2310 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], 2311 ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); 2312 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], 2313 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); 2314 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], 2315 ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); 2316 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], 2317 ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); 2318 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], 2319 ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); 2320 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | 2321 RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); 2322 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | 2323 RISCV_IOMMU_CQCSR_BUSY); 2324 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | 2325 RISCV_IOMMU_FQCSR_FQOF); 2326 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | 2327 RISCV_IOMMU_FQCSR_BUSY); 2328 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | 2329 RISCV_IOMMU_PQCSR_PQOF); 2330 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | 2331 RISCV_IOMMU_PQCSR_BUSY); 2332 stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); 2333 stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); 2334 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); 2335 /* If debug registers enabled. */ 2336 if (s->cap & RISCV_IOMMU_CAP_DBG) { 2337 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0); 2338 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL], 2339 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2340 } 2341 2342 /* Memory region for downstream access, if specified. */ 2343 if (s->target_mr) { 2344 s->target_as = g_new0(AddressSpace, 1); 2345 address_space_init(s->target_as, s->target_mr, 2346 "riscv-iommu-downstream"); 2347 } else { 2348 /* Fallback to global system memory. */ 2349 s->target_as = &address_space_memory; 2350 } 2351 2352 /* Memory region for untranslated MRIF/MSI writes */ 2353 memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, 2354 "riscv-iommu-trap", ~0ULL); 2355 address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); 2356 } 2357 2358 static void riscv_iommu_unrealize(DeviceState *dev) 2359 { 2360 RISCVIOMMUState *s = RISCV_IOMMU(dev); 2361 2362 g_hash_table_unref(s->iot_cache); 2363 g_hash_table_unref(s->ctx_cache); 2364 } 2365 2366 void riscv_iommu_reset(RISCVIOMMUState *s) 2367 { 2368 uint32_t reg_clr; 2369 int ddtp_mode; 2370 2371 /* 2372 * Clear DDTP while setting DDTP_mode back to user 2373 * initial setting. 2374 */ 2375 ddtp_mode = s->enable_off ? 2376 RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE; 2377 s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, ddtp_mode); 2378 riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, s->ddtp); 2379 2380 reg_clr = RISCV_IOMMU_CQCSR_CQEN | RISCV_IOMMU_CQCSR_CIE | 2381 RISCV_IOMMU_CQCSR_CQON | RISCV_IOMMU_CQCSR_BUSY; 2382 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, 0, reg_clr); 2383 2384 reg_clr = RISCV_IOMMU_FQCSR_FQEN | RISCV_IOMMU_FQCSR_FIE | 2385 RISCV_IOMMU_FQCSR_FQON | RISCV_IOMMU_FQCSR_BUSY; 2386 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, 0, reg_clr); 2387 2388 reg_clr = RISCV_IOMMU_PQCSR_PQEN | RISCV_IOMMU_PQCSR_PIE | 2389 RISCV_IOMMU_PQCSR_PQON | RISCV_IOMMU_PQCSR_BUSY; 2390 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, 0, reg_clr); 2391 2392 riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, 2393 RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); 2394 2395 riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_IPSR, 0); 2396 2397 g_hash_table_remove_all(s->ctx_cache); 2398 g_hash_table_remove_all(s->iot_cache); 2399 } 2400 2401 static const Property riscv_iommu_properties[] = { 2402 DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, 2403 RISCV_IOMMU_SPEC_DOT_VER), 2404 DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), 2405 DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit, 2406 LIMIT_CACHE_IOT), 2407 DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), 2408 DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE), 2409 DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), 2410 DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), 2411 DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), 2412 DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, 2413 TYPE_MEMORY_REGION, MemoryRegion *), 2414 }; 2415 2416 static void riscv_iommu_class_init(ObjectClass *klass, void* data) 2417 { 2418 DeviceClass *dc = DEVICE_CLASS(klass); 2419 2420 /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ 2421 dc->user_creatable = false; 2422 dc->realize = riscv_iommu_realize; 2423 dc->unrealize = riscv_iommu_unrealize; 2424 device_class_set_props(dc, riscv_iommu_properties); 2425 } 2426 2427 static const TypeInfo riscv_iommu_info = { 2428 .name = TYPE_RISCV_IOMMU, 2429 .parent = TYPE_DEVICE, 2430 .instance_size = sizeof(RISCVIOMMUState), 2431 .instance_init = riscv_iommu_instance_init, 2432 .class_init = riscv_iommu_class_init, 2433 }; 2434 2435 static const char *IOMMU_FLAG_STR[] = { 2436 "NA", 2437 "RO", 2438 "WR", 2439 "RW", 2440 }; 2441 2442 /* RISC-V IOMMU Memory Region - Address Translation Space */ 2443 static IOMMUTLBEntry riscv_iommu_memory_region_translate( 2444 IOMMUMemoryRegion *iommu_mr, hwaddr addr, 2445 IOMMUAccessFlags flag, int iommu_idx) 2446 { 2447 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2448 RISCVIOMMUContext *ctx; 2449 void *ref; 2450 IOMMUTLBEntry iotlb = { 2451 .iova = addr, 2452 .target_as = as->iommu->target_as, 2453 .addr_mask = ~0ULL, 2454 .perm = flag, 2455 }; 2456 2457 ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); 2458 if (ctx == NULL) { 2459 /* Translation disabled or invalid. */ 2460 iotlb.addr_mask = 0; 2461 iotlb.perm = IOMMU_NONE; 2462 } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) { 2463 /* Translation disabled or fault reported. */ 2464 iotlb.addr_mask = 0; 2465 iotlb.perm = IOMMU_NONE; 2466 } 2467 2468 /* Trace all dma translations with original access flags. */ 2469 trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), 2470 PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, 2471 IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, 2472 iotlb.translated_addr); 2473 2474 riscv_iommu_ctx_put(as->iommu, ref); 2475 2476 return iotlb; 2477 } 2478 2479 static int riscv_iommu_memory_region_notify( 2480 IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, 2481 IOMMUNotifierFlag new, Error **errp) 2482 { 2483 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2484 2485 if (old == IOMMU_NOTIFIER_NONE) { 2486 as->notifier = true; 2487 trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); 2488 } else if (new == IOMMU_NOTIFIER_NONE) { 2489 as->notifier = false; 2490 trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); 2491 } 2492 2493 return 0; 2494 } 2495 2496 static inline bool pci_is_iommu(PCIDevice *pdev) 2497 { 2498 return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; 2499 } 2500 2501 static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) 2502 { 2503 RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; 2504 PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); 2505 AddressSpace *as = NULL; 2506 2507 if (pdev && pci_is_iommu(pdev)) { 2508 return s->target_as; 2509 } 2510 2511 /* Find first registered IOMMU device */ 2512 while (s->iommus.le_prev) { 2513 s = *(s->iommus.le_prev); 2514 } 2515 2516 /* Find first matching IOMMU */ 2517 while (s != NULL && as == NULL) { 2518 as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); 2519 s = s->iommus.le_next; 2520 } 2521 2522 return as ? as : &address_space_memory; 2523 } 2524 2525 static const PCIIOMMUOps riscv_iommu_ops = { 2526 .get_address_space = riscv_iommu_find_as, 2527 }; 2528 2529 void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, 2530 Error **errp) 2531 { 2532 if (bus->iommu_ops && 2533 bus->iommu_ops->get_address_space == riscv_iommu_find_as) { 2534 /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ 2535 RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; 2536 QLIST_INSERT_AFTER(last, iommu, iommus); 2537 } else if (!bus->iommu_ops && !bus->iommu_opaque) { 2538 pci_setup_iommu(bus, &riscv_iommu_ops, iommu); 2539 } else { 2540 error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", 2541 pci_bus_num(bus)); 2542 } 2543 } 2544 2545 static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, 2546 MemTxAttrs attrs) 2547 { 2548 return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; 2549 } 2550 2551 static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) 2552 { 2553 RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); 2554 return 1 << as->iommu->pid_bits; 2555 } 2556 2557 static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data) 2558 { 2559 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); 2560 2561 imrc->translate = riscv_iommu_memory_region_translate; 2562 imrc->notify_flag_changed = riscv_iommu_memory_region_notify; 2563 imrc->attrs_to_index = riscv_iommu_memory_region_index; 2564 imrc->num_indexes = riscv_iommu_memory_region_index_len; 2565 } 2566 2567 static const TypeInfo riscv_iommu_memory_region_info = { 2568 .parent = TYPE_IOMMU_MEMORY_REGION, 2569 .name = TYPE_RISCV_IOMMU_MEMORY_REGION, 2570 .class_init = riscv_iommu_memory_region_init, 2571 }; 2572 2573 static void riscv_iommu_register_mr_types(void) 2574 { 2575 type_register_static(&riscv_iommu_memory_region_info); 2576 type_register_static(&riscv_iommu_info); 2577 } 2578 2579 type_init(riscv_iommu_register_mr_types); 2580