1 /* 2 * QEMU emulation of an Intel IOMMU (VT-d) 3 * (DMA Remapping device) 4 * 5 * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com> 6 * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 18 * You should have received a copy of the GNU General Public License along 19 * with this program; if not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #include "qemu/osdep.h" 23 #include "qemu/error-report.h" 24 #include "hw/sysbus.h" 25 #include "exec/address-spaces.h" 26 #include "intel_iommu_internal.h" 27 #include "hw/pci/pci.h" 28 #include "hw/pci/pci_bus.h" 29 #include "hw/i386/pc.h" 30 #include "hw/i386/apic-msidef.h" 31 #include "hw/boards.h" 32 #include "hw/i386/x86-iommu.h" 33 #include "hw/pci-host/q35.h" 34 #include "sysemu/kvm.h" 35 36 /*#define DEBUG_INTEL_IOMMU*/ 37 #ifdef DEBUG_INTEL_IOMMU 38 enum { 39 DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, 40 DEBUG_CACHE, DEBUG_IR, 41 }; 42 #define VTD_DBGBIT(x) (1 << DEBUG_##x) 43 static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR); 44 45 #define VTD_DPRINTF(what, fmt, ...) do { \ 46 if (vtd_dbgflags & VTD_DBGBIT(what)) { \ 47 fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ 48 ## __VA_ARGS__); } \ 49 } while (0) 50 #else 51 #define VTD_DPRINTF(what, fmt, ...) do {} while (0) 52 #endif 53 54 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, 55 uint64_t wmask, uint64_t w1cmask) 56 { 57 stq_le_p(&s->csr[addr], val); 58 stq_le_p(&s->wmask[addr], wmask); 59 stq_le_p(&s->w1cmask[addr], w1cmask); 60 } 61 62 static void vtd_define_quad_wo(IntelIOMMUState *s, hwaddr addr, uint64_t mask) 63 { 64 stq_le_p(&s->womask[addr], mask); 65 } 66 67 static void vtd_define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val, 68 uint32_t wmask, uint32_t w1cmask) 69 { 70 stl_le_p(&s->csr[addr], val); 71 stl_le_p(&s->wmask[addr], wmask); 72 stl_le_p(&s->w1cmask[addr], w1cmask); 73 } 74 75 static void vtd_define_long_wo(IntelIOMMUState *s, hwaddr addr, uint32_t mask) 76 { 77 stl_le_p(&s->womask[addr], mask); 78 } 79 80 /* "External" get/set operations */ 81 static void vtd_set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) 82 { 83 uint64_t oldval = ldq_le_p(&s->csr[addr]); 84 uint64_t wmask = ldq_le_p(&s->wmask[addr]); 85 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); 86 stq_le_p(&s->csr[addr], 87 ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); 88 } 89 90 static void vtd_set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val) 91 { 92 uint32_t oldval = ldl_le_p(&s->csr[addr]); 93 uint32_t wmask = ldl_le_p(&s->wmask[addr]); 94 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); 95 stl_le_p(&s->csr[addr], 96 ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); 97 } 98 99 static uint64_t vtd_get_quad(IntelIOMMUState *s, hwaddr addr) 100 { 101 uint64_t val = ldq_le_p(&s->csr[addr]); 102 uint64_t womask = ldq_le_p(&s->womask[addr]); 103 return val & ~womask; 104 } 105 106 static uint32_t vtd_get_long(IntelIOMMUState *s, hwaddr addr) 107 { 108 uint32_t val = ldl_le_p(&s->csr[addr]); 109 uint32_t womask = ldl_le_p(&s->womask[addr]); 110 return val & ~womask; 111 } 112 113 /* "Internal" get/set operations */ 114 static uint64_t vtd_get_quad_raw(IntelIOMMUState *s, hwaddr addr) 115 { 116 return ldq_le_p(&s->csr[addr]); 117 } 118 119 static uint32_t vtd_get_long_raw(IntelIOMMUState *s, hwaddr addr) 120 { 121 return ldl_le_p(&s->csr[addr]); 122 } 123 124 static void vtd_set_quad_raw(IntelIOMMUState *s, hwaddr addr, uint64_t val) 125 { 126 stq_le_p(&s->csr[addr], val); 127 } 128 129 static uint32_t vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr, 130 uint32_t clear, uint32_t mask) 131 { 132 uint32_t new_val = (ldl_le_p(&s->csr[addr]) & ~clear) | mask; 133 stl_le_p(&s->csr[addr], new_val); 134 return new_val; 135 } 136 137 static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr, 138 uint64_t clear, uint64_t mask) 139 { 140 uint64_t new_val = (ldq_le_p(&s->csr[addr]) & ~clear) | mask; 141 stq_le_p(&s->csr[addr], new_val); 142 return new_val; 143 } 144 145 /* GHashTable functions */ 146 static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2) 147 { 148 return *((const uint64_t *)v1) == *((const uint64_t *)v2); 149 } 150 151 static guint vtd_uint64_hash(gconstpointer v) 152 { 153 return (guint)*(const uint64_t *)v; 154 } 155 156 static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, 157 gpointer user_data) 158 { 159 VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; 160 uint16_t domain_id = *(uint16_t *)user_data; 161 return entry->domain_id == domain_id; 162 } 163 164 /* The shift of an addr for a certain level of paging structure */ 165 static inline uint32_t vtd_slpt_level_shift(uint32_t level) 166 { 167 return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS; 168 } 169 170 static inline uint64_t vtd_slpt_level_page_mask(uint32_t level) 171 { 172 return ~((1ULL << vtd_slpt_level_shift(level)) - 1); 173 } 174 175 static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, 176 gpointer user_data) 177 { 178 VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; 179 VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; 180 uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask; 181 uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K; 182 return (entry->domain_id == info->domain_id) && 183 (((entry->gfn & info->mask) == gfn) || 184 (entry->gfn == gfn_tlb)); 185 } 186 187 /* Reset all the gen of VTDAddressSpace to zero and set the gen of 188 * IntelIOMMUState to 1. 189 */ 190 static void vtd_reset_context_cache(IntelIOMMUState *s) 191 { 192 VTDAddressSpace *vtd_as; 193 VTDBus *vtd_bus; 194 GHashTableIter bus_it; 195 uint32_t devfn_it; 196 197 g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr); 198 199 VTD_DPRINTF(CACHE, "global context_cache_gen=1"); 200 while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) { 201 for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) { 202 vtd_as = vtd_bus->dev_as[devfn_it]; 203 if (!vtd_as) { 204 continue; 205 } 206 vtd_as->context_cache_entry.context_cache_gen = 0; 207 } 208 } 209 s->context_cache_gen = 1; 210 } 211 212 static void vtd_reset_iotlb(IntelIOMMUState *s) 213 { 214 assert(s->iotlb); 215 g_hash_table_remove_all(s->iotlb); 216 } 217 218 static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint8_t source_id, 219 uint32_t level) 220 { 221 return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) | 222 ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT); 223 } 224 225 static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) 226 { 227 return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; 228 } 229 230 static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, 231 hwaddr addr) 232 { 233 VTDIOTLBEntry *entry; 234 uint64_t key; 235 int level; 236 237 for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { 238 key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level), 239 source_id, level); 240 entry = g_hash_table_lookup(s->iotlb, &key); 241 if (entry) { 242 goto out; 243 } 244 } 245 246 out: 247 return entry; 248 } 249 250 static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, 251 uint16_t domain_id, hwaddr addr, uint64_t slpte, 252 bool read_flags, bool write_flags, 253 uint32_t level) 254 { 255 VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); 256 uint64_t *key = g_malloc(sizeof(*key)); 257 uint64_t gfn = vtd_get_iotlb_gfn(addr, level); 258 259 VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64 260 " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte, 261 domain_id); 262 if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) { 263 VTD_DPRINTF(CACHE, "iotlb exceeds size limit, forced to reset"); 264 vtd_reset_iotlb(s); 265 } 266 267 entry->gfn = gfn; 268 entry->domain_id = domain_id; 269 entry->slpte = slpte; 270 entry->read_flags = read_flags; 271 entry->write_flags = write_flags; 272 entry->mask = vtd_slpt_level_page_mask(level); 273 *key = vtd_get_iotlb_key(gfn, source_id, level); 274 g_hash_table_replace(s->iotlb, key, entry); 275 } 276 277 /* Given the reg addr of both the message data and address, generate an 278 * interrupt via MSI. 279 */ 280 static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg, 281 hwaddr mesg_data_reg) 282 { 283 hwaddr addr; 284 uint32_t data; 285 286 assert(mesg_data_reg < DMAR_REG_SIZE); 287 assert(mesg_addr_reg < DMAR_REG_SIZE); 288 289 addr = vtd_get_long_raw(s, mesg_addr_reg); 290 data = vtd_get_long_raw(s, mesg_data_reg); 291 292 VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32, addr, data); 293 address_space_stl_le(&address_space_memory, addr, data, 294 MEMTXATTRS_UNSPECIFIED, NULL); 295 } 296 297 /* Generate a fault event to software via MSI if conditions are met. 298 * Notice that the value of FSTS_REG being passed to it should be the one 299 * before any update. 300 */ 301 static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts) 302 { 303 if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO || 304 pre_fsts & VTD_FSTS_IQE) { 305 VTD_DPRINTF(FLOG, "there are previous interrupt conditions " 306 "to be serviced by software, fault event is not generated " 307 "(FSTS_REG 0x%"PRIx32 ")", pre_fsts); 308 return; 309 } 310 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP); 311 if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) { 312 VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not generated"); 313 } else { 314 vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); 315 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); 316 } 317 } 318 319 /* Check if the Fault (F) field of the Fault Recording Register referenced by 320 * @index is Set. 321 */ 322 static bool vtd_is_frcd_set(IntelIOMMUState *s, uint16_t index) 323 { 324 /* Each reg is 128-bit */ 325 hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); 326 addr += 8; /* Access the high 64-bit half */ 327 328 assert(index < DMAR_FRCD_REG_NR); 329 330 return vtd_get_quad_raw(s, addr) & VTD_FRCD_F; 331 } 332 333 /* Update the PPF field of Fault Status Register. 334 * Should be called whenever change the F field of any fault recording 335 * registers. 336 */ 337 static void vtd_update_fsts_ppf(IntelIOMMUState *s) 338 { 339 uint32_t i; 340 uint32_t ppf_mask = 0; 341 342 for (i = 0; i < DMAR_FRCD_REG_NR; i++) { 343 if (vtd_is_frcd_set(s, i)) { 344 ppf_mask = VTD_FSTS_PPF; 345 break; 346 } 347 } 348 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask); 349 VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0); 350 } 351 352 static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index) 353 { 354 /* Each reg is 128-bit */ 355 hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); 356 addr += 8; /* Access the high 64-bit half */ 357 358 assert(index < DMAR_FRCD_REG_NR); 359 360 vtd_set_clear_mask_quad(s, addr, 0, VTD_FRCD_F); 361 vtd_update_fsts_ppf(s); 362 } 363 364 /* Must not update F field now, should be done later */ 365 static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, 366 uint16_t source_id, hwaddr addr, 367 VTDFaultReason fault, bool is_write) 368 { 369 uint64_t hi = 0, lo; 370 hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); 371 372 assert(index < DMAR_FRCD_REG_NR); 373 374 lo = VTD_FRCD_FI(addr); 375 hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault); 376 if (!is_write) { 377 hi |= VTD_FRCD_T; 378 } 379 vtd_set_quad_raw(s, frcd_reg_addr, lo); 380 vtd_set_quad_raw(s, frcd_reg_addr + 8, hi); 381 VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64 382 ", lo 0x%"PRIx64, index, hi, lo); 383 } 384 385 /* Try to collapse multiple pending faults from the same requester */ 386 static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id) 387 { 388 uint32_t i; 389 uint64_t frcd_reg; 390 hwaddr addr = DMAR_FRCD_REG_OFFSET + 8; /* The high 64-bit half */ 391 392 for (i = 0; i < DMAR_FRCD_REG_NR; i++) { 393 frcd_reg = vtd_get_quad_raw(s, addr); 394 VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg); 395 if ((frcd_reg & VTD_FRCD_F) && 396 ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) { 397 return true; 398 } 399 addr += 16; /* 128-bit for each */ 400 } 401 return false; 402 } 403 404 /* Log and report an DMAR (address translation) fault to software */ 405 static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, 406 hwaddr addr, VTDFaultReason fault, 407 bool is_write) 408 { 409 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); 410 411 assert(fault < VTD_FR_MAX); 412 413 if (fault == VTD_FR_RESERVED_ERR) { 414 /* This is not a normal fault reason case. Drop it. */ 415 return; 416 } 417 VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64 418 ", is_write %d", source_id, fault, addr, is_write); 419 if (fsts_reg & VTD_FSTS_PFO) { 420 VTD_DPRINTF(FLOG, "new fault is not recorded due to " 421 "Primary Fault Overflow"); 422 return; 423 } 424 if (vtd_try_collapse_fault(s, source_id)) { 425 VTD_DPRINTF(FLOG, "new fault is not recorded due to " 426 "compression of faults"); 427 return; 428 } 429 if (vtd_is_frcd_set(s, s->next_frcd_reg)) { 430 VTD_DPRINTF(FLOG, "Primary Fault Overflow and " 431 "new fault is not recorded, set PFO field"); 432 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO); 433 return; 434 } 435 436 vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write); 437 438 if (fsts_reg & VTD_FSTS_PPF) { 439 VTD_DPRINTF(FLOG, "there are pending faults already, " 440 "fault event is not generated"); 441 vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); 442 s->next_frcd_reg++; 443 if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { 444 s->next_frcd_reg = 0; 445 } 446 } else { 447 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_FRI_MASK, 448 VTD_FSTS_FRI(s->next_frcd_reg)); 449 vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); /* Will set PPF */ 450 s->next_frcd_reg++; 451 if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { 452 s->next_frcd_reg = 0; 453 } 454 /* This case actually cause the PPF to be Set. 455 * So generate fault event (interrupt). 456 */ 457 vtd_generate_fault_event(s, fsts_reg); 458 } 459 } 460 461 /* Handle Invalidation Queue Errors of queued invalidation interface error 462 * conditions. 463 */ 464 static void vtd_handle_inv_queue_error(IntelIOMMUState *s) 465 { 466 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); 467 468 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE); 469 vtd_generate_fault_event(s, fsts_reg); 470 } 471 472 /* Set the IWC field and try to generate an invalidation completion interrupt */ 473 static void vtd_generate_completion_event(IntelIOMMUState *s) 474 { 475 VTD_DPRINTF(INV, "completes an invalidation wait command with " 476 "Interrupt Flag"); 477 if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) { 478 VTD_DPRINTF(INV, "there is a previous interrupt condition to be " 479 "serviced by software, " 480 "new invalidation event is not generated"); 481 return; 482 } 483 vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC); 484 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP); 485 if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) { 486 VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation " 487 "event is not generated"); 488 return; 489 } else { 490 /* Generate the interrupt event */ 491 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); 492 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); 493 } 494 } 495 496 static inline bool vtd_root_entry_present(VTDRootEntry *root) 497 { 498 return root->val & VTD_ROOT_ENTRY_P; 499 } 500 501 static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index, 502 VTDRootEntry *re) 503 { 504 dma_addr_t addr; 505 506 addr = s->root + index * sizeof(*re); 507 if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) { 508 VTD_DPRINTF(GENERAL, "error: fail to access root-entry at 0x%"PRIx64 509 " + %"PRIu8, s->root, index); 510 re->val = 0; 511 return -VTD_FR_ROOT_TABLE_INV; 512 } 513 re->val = le64_to_cpu(re->val); 514 return 0; 515 } 516 517 static inline bool vtd_context_entry_present(VTDContextEntry *context) 518 { 519 return context->lo & VTD_CONTEXT_ENTRY_P; 520 } 521 522 static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index, 523 VTDContextEntry *ce) 524 { 525 dma_addr_t addr; 526 527 if (!vtd_root_entry_present(root)) { 528 VTD_DPRINTF(GENERAL, "error: root-entry is not present"); 529 return -VTD_FR_ROOT_ENTRY_P; 530 } 531 addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce); 532 if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) { 533 VTD_DPRINTF(GENERAL, "error: fail to access context-entry at 0x%"PRIx64 534 " + %"PRIu8, 535 (uint64_t)(root->val & VTD_ROOT_ENTRY_CTP), index); 536 return -VTD_FR_CONTEXT_TABLE_INV; 537 } 538 ce->lo = le64_to_cpu(ce->lo); 539 ce->hi = le64_to_cpu(ce->hi); 540 return 0; 541 } 542 543 static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce) 544 { 545 return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; 546 } 547 548 static inline uint64_t vtd_get_slpte_addr(uint64_t slpte) 549 { 550 return slpte & VTD_SL_PT_BASE_ADDR_MASK; 551 } 552 553 /* Whether the pte indicates the address of the page frame */ 554 static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level) 555 { 556 return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK); 557 } 558 559 /* Get the content of a spte located in @base_addr[@index] */ 560 static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) 561 { 562 uint64_t slpte; 563 564 assert(index < VTD_SL_PT_ENTRY_NR); 565 566 if (dma_memory_read(&address_space_memory, 567 base_addr + index * sizeof(slpte), &slpte, 568 sizeof(slpte))) { 569 slpte = (uint64_t)-1; 570 return slpte; 571 } 572 slpte = le64_to_cpu(slpte); 573 return slpte; 574 } 575 576 /* Given a gpa and the level of paging structure, return the offset of current 577 * level. 578 */ 579 static inline uint32_t vtd_gpa_level_offset(uint64_t gpa, uint32_t level) 580 { 581 return (gpa >> vtd_slpt_level_shift(level)) & 582 ((1ULL << VTD_SL_LEVEL_BITS) - 1); 583 } 584 585 /* Check Capability Register to see if the @level of page-table is supported */ 586 static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level) 587 { 588 return VTD_CAP_SAGAW_MASK & s->cap & 589 (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT)); 590 } 591 592 /* Get the page-table level that hardware should use for the second-level 593 * page-table walk from the Address Width field of context-entry. 594 */ 595 static inline uint32_t vtd_get_level_from_context_entry(VTDContextEntry *ce) 596 { 597 return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW); 598 } 599 600 static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce) 601 { 602 return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9; 603 } 604 605 static const uint64_t vtd_paging_entry_rsvd_field[] = { 606 [0] = ~0ULL, 607 /* For not large page */ 608 [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 609 [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 610 [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 611 [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 612 /* For large page */ 613 [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 614 [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 615 [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 616 [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), 617 }; 618 619 static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) 620 { 621 if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) { 622 /* Maybe large page */ 623 return slpte & vtd_paging_entry_rsvd_field[level + 4]; 624 } else { 625 return slpte & vtd_paging_entry_rsvd_field[level]; 626 } 627 } 628 629 /* Given the @gpa, get relevant @slptep. @slpte_level will be the last level 630 * of the translation, can be used for deciding the size of large page. 631 */ 632 static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write, 633 uint64_t *slptep, uint32_t *slpte_level, 634 bool *reads, bool *writes) 635 { 636 dma_addr_t addr = vtd_get_slpt_base_from_context(ce); 637 uint32_t level = vtd_get_level_from_context_entry(ce); 638 uint32_t offset; 639 uint64_t slpte; 640 uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); 641 uint64_t access_right_check; 642 643 /* Check if @gpa is above 2^X-1, where X is the minimum of MGAW in CAP_REG 644 * and AW in context-entry. 645 */ 646 if (gpa & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) { 647 VTD_DPRINTF(GENERAL, "error: gpa 0x%"PRIx64 " exceeds limits", gpa); 648 return -VTD_FR_ADDR_BEYOND_MGAW; 649 } 650 651 /* FIXME: what is the Atomics request here? */ 652 access_right_check = is_write ? VTD_SL_W : VTD_SL_R; 653 654 while (true) { 655 offset = vtd_gpa_level_offset(gpa, level); 656 slpte = vtd_get_slpte(addr, offset); 657 658 if (slpte == (uint64_t)-1) { 659 VTD_DPRINTF(GENERAL, "error: fail to access second-level paging " 660 "entry at level %"PRIu32 " for gpa 0x%"PRIx64, 661 level, gpa); 662 if (level == vtd_get_level_from_context_entry(ce)) { 663 /* Invalid programming of context-entry */ 664 return -VTD_FR_CONTEXT_ENTRY_INV; 665 } else { 666 return -VTD_FR_PAGING_ENTRY_INV; 667 } 668 } 669 *reads = (*reads) && (slpte & VTD_SL_R); 670 *writes = (*writes) && (slpte & VTD_SL_W); 671 if (!(slpte & access_right_check)) { 672 VTD_DPRINTF(GENERAL, "error: lack of %s permission for " 673 "gpa 0x%"PRIx64 " slpte 0x%"PRIx64, 674 (is_write ? "write" : "read"), gpa, slpte); 675 return is_write ? -VTD_FR_WRITE : -VTD_FR_READ; 676 } 677 if (vtd_slpte_nonzero_rsvd(slpte, level)) { 678 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second " 679 "level paging entry level %"PRIu32 " slpte 0x%"PRIx64, 680 level, slpte); 681 return -VTD_FR_PAGING_ENTRY_RSVD; 682 } 683 684 if (vtd_is_last_slpte(slpte, level)) { 685 *slptep = slpte; 686 *slpte_level = level; 687 return 0; 688 } 689 addr = vtd_get_slpte_addr(slpte); 690 level--; 691 } 692 } 693 694 /* Map a device to its corresponding domain (context-entry) */ 695 static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, 696 uint8_t devfn, VTDContextEntry *ce) 697 { 698 VTDRootEntry re; 699 int ret_fr; 700 701 ret_fr = vtd_get_root_entry(s, bus_num, &re); 702 if (ret_fr) { 703 return ret_fr; 704 } 705 706 if (!vtd_root_entry_present(&re)) { 707 VTD_DPRINTF(GENERAL, "error: root-entry #%"PRIu8 " is not present", 708 bus_num); 709 return -VTD_FR_ROOT_ENTRY_P; 710 } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { 711 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in root-entry " 712 "hi 0x%"PRIx64 " lo 0x%"PRIx64, re.rsvd, re.val); 713 return -VTD_FR_ROOT_ENTRY_RSVD; 714 } 715 716 ret_fr = vtd_get_context_entry_from_root(&re, devfn, ce); 717 if (ret_fr) { 718 return ret_fr; 719 } 720 721 if (!vtd_context_entry_present(ce)) { 722 VTD_DPRINTF(GENERAL, 723 "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") " 724 "is not present", devfn, bus_num); 725 return -VTD_FR_CONTEXT_ENTRY_P; 726 } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || 727 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { 728 VTD_DPRINTF(GENERAL, 729 "error: non-zero reserved field in context-entry " 730 "hi 0x%"PRIx64 " lo 0x%"PRIx64, ce->hi, ce->lo); 731 return -VTD_FR_CONTEXT_ENTRY_RSVD; 732 } 733 /* Check if the programming of context-entry is valid */ 734 if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) { 735 VTD_DPRINTF(GENERAL, "error: unsupported Address Width value in " 736 "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64, 737 ce->hi, ce->lo); 738 return -VTD_FR_CONTEXT_ENTRY_INV; 739 } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) { 740 VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in " 741 "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64, 742 ce->hi, ce->lo); 743 return -VTD_FR_CONTEXT_ENTRY_INV; 744 } 745 return 0; 746 } 747 748 static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn) 749 { 750 return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL); 751 } 752 753 static const bool vtd_qualified_faults[] = { 754 [VTD_FR_RESERVED] = false, 755 [VTD_FR_ROOT_ENTRY_P] = false, 756 [VTD_FR_CONTEXT_ENTRY_P] = true, 757 [VTD_FR_CONTEXT_ENTRY_INV] = true, 758 [VTD_FR_ADDR_BEYOND_MGAW] = true, 759 [VTD_FR_WRITE] = true, 760 [VTD_FR_READ] = true, 761 [VTD_FR_PAGING_ENTRY_INV] = true, 762 [VTD_FR_ROOT_TABLE_INV] = false, 763 [VTD_FR_CONTEXT_TABLE_INV] = false, 764 [VTD_FR_ROOT_ENTRY_RSVD] = false, 765 [VTD_FR_PAGING_ENTRY_RSVD] = true, 766 [VTD_FR_CONTEXT_ENTRY_TT] = true, 767 [VTD_FR_RESERVED_ERR] = false, 768 [VTD_FR_MAX] = false, 769 }; 770 771 /* To see if a fault condition is "qualified", which is reported to software 772 * only if the FPD field in the context-entry used to process the faulting 773 * request is 0. 774 */ 775 static inline bool vtd_is_qualified_fault(VTDFaultReason fault) 776 { 777 return vtd_qualified_faults[fault]; 778 } 779 780 static inline bool vtd_is_interrupt_addr(hwaddr addr) 781 { 782 return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; 783 } 784 785 /* Map dev to context-entry then do a paging-structures walk to do a iommu 786 * translation. 787 * 788 * Called from RCU critical section. 789 * 790 * @bus_num: The bus number 791 * @devfn: The devfn, which is the combined of device and function number 792 * @is_write: The access is a write operation 793 * @entry: IOMMUTLBEntry that contain the addr to be translated and result 794 */ 795 static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, 796 uint8_t devfn, hwaddr addr, bool is_write, 797 IOMMUTLBEntry *entry) 798 { 799 IntelIOMMUState *s = vtd_as->iommu_state; 800 VTDContextEntry ce; 801 uint8_t bus_num = pci_bus_num(bus); 802 VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry; 803 uint64_t slpte, page_mask; 804 uint32_t level; 805 uint16_t source_id = vtd_make_source_id(bus_num, devfn); 806 int ret_fr; 807 bool is_fpd_set = false; 808 bool reads = true; 809 bool writes = true; 810 VTDIOTLBEntry *iotlb_entry; 811 812 /* Check if the request is in interrupt address range */ 813 if (vtd_is_interrupt_addr(addr)) { 814 if (is_write) { 815 /* FIXME: since we don't know the length of the access here, we 816 * treat Non-DWORD length write requests without PASID as 817 * interrupt requests, too. Withoud interrupt remapping support, 818 * we just use 1:1 mapping. 819 */ 820 VTD_DPRINTF(MMU, "write request to interrupt address " 821 "gpa 0x%"PRIx64, addr); 822 entry->iova = addr & VTD_PAGE_MASK_4K; 823 entry->translated_addr = addr & VTD_PAGE_MASK_4K; 824 entry->addr_mask = ~VTD_PAGE_MASK_4K; 825 entry->perm = IOMMU_WO; 826 return; 827 } else { 828 VTD_DPRINTF(GENERAL, "error: read request from interrupt address " 829 "gpa 0x%"PRIx64, addr); 830 vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write); 831 return; 832 } 833 } 834 /* Try to fetch slpte form IOTLB */ 835 iotlb_entry = vtd_lookup_iotlb(s, source_id, addr); 836 if (iotlb_entry) { 837 VTD_DPRINTF(CACHE, "hit iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64 838 " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, 839 iotlb_entry->slpte, iotlb_entry->domain_id); 840 slpte = iotlb_entry->slpte; 841 reads = iotlb_entry->read_flags; 842 writes = iotlb_entry->write_flags; 843 page_mask = iotlb_entry->mask; 844 goto out; 845 } 846 /* Try to fetch context-entry from cache first */ 847 if (cc_entry->context_cache_gen == s->context_cache_gen) { 848 VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d " 849 "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 ")", 850 bus_num, devfn, cc_entry->context_entry.hi, 851 cc_entry->context_entry.lo, cc_entry->context_cache_gen); 852 ce = cc_entry->context_entry; 853 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; 854 } else { 855 ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce); 856 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; 857 if (ret_fr) { 858 ret_fr = -ret_fr; 859 if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { 860 VTD_DPRINTF(FLOG, "fault processing is disabled for DMA " 861 "requests through this context-entry " 862 "(with FPD Set)"); 863 } else { 864 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); 865 } 866 return; 867 } 868 /* Update context-cache */ 869 VTD_DPRINTF(CACHE, "update context-cache bus %d devfn %d " 870 "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 "->%"PRIu32 ")", 871 bus_num, devfn, ce.hi, ce.lo, 872 cc_entry->context_cache_gen, s->context_cache_gen); 873 cc_entry->context_entry = ce; 874 cc_entry->context_cache_gen = s->context_cache_gen; 875 } 876 877 ret_fr = vtd_gpa_to_slpte(&ce, addr, is_write, &slpte, &level, 878 &reads, &writes); 879 if (ret_fr) { 880 ret_fr = -ret_fr; 881 if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { 882 VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests " 883 "through this context-entry (with FPD Set)"); 884 } else { 885 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); 886 } 887 return; 888 } 889 890 page_mask = vtd_slpt_level_page_mask(level); 891 vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte, 892 reads, writes, level); 893 out: 894 entry->iova = addr & page_mask; 895 entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; 896 entry->addr_mask = ~page_mask; 897 entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0); 898 } 899 900 static void vtd_root_table_setup(IntelIOMMUState *s) 901 { 902 s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG); 903 s->root_extended = s->root & VTD_RTADDR_RTT; 904 s->root &= VTD_RTADDR_ADDR_MASK; 905 906 VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root, 907 (s->root_extended ? "(extended)" : "")); 908 } 909 910 static void vtd_iec_notify_all(IntelIOMMUState *s, bool global, 911 uint32_t index, uint32_t mask) 912 { 913 x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask); 914 } 915 916 static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) 917 { 918 uint64_t value = 0; 919 value = vtd_get_quad_raw(s, DMAR_IRTA_REG); 920 s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1); 921 s->intr_root = value & VTD_IRTA_ADDR_MASK; 922 s->intr_eime = value & VTD_IRTA_EIME; 923 924 /* Notify global invalidation */ 925 vtd_iec_notify_all(s, true, 0, 0); 926 927 VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32, 928 s->intr_root, s->intr_size); 929 } 930 931 static void vtd_context_global_invalidate(IntelIOMMUState *s) 932 { 933 s->context_cache_gen++; 934 if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) { 935 vtd_reset_context_cache(s); 936 } 937 } 938 939 940 /* Find the VTD address space currently associated with a given bus number, 941 */ 942 static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) 943 { 944 VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; 945 if (!vtd_bus) { 946 /* Iterate over the registered buses to find the one 947 * which currently hold this bus number, and update the bus_num lookup table: 948 */ 949 GHashTableIter iter; 950 951 g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); 952 while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) { 953 if (pci_bus_num(vtd_bus->bus) == bus_num) { 954 s->vtd_as_by_bus_num[bus_num] = vtd_bus; 955 return vtd_bus; 956 } 957 } 958 } 959 return vtd_bus; 960 } 961 962 /* Do a context-cache device-selective invalidation. 963 * @func_mask: FM field after shifting 964 */ 965 static void vtd_context_device_invalidate(IntelIOMMUState *s, 966 uint16_t source_id, 967 uint16_t func_mask) 968 { 969 uint16_t mask; 970 VTDBus *vtd_bus; 971 VTDAddressSpace *vtd_as; 972 uint16_t devfn; 973 uint16_t devfn_it; 974 975 switch (func_mask & 3) { 976 case 0: 977 mask = 0; /* No bits in the SID field masked */ 978 break; 979 case 1: 980 mask = 4; /* Mask bit 2 in the SID field */ 981 break; 982 case 2: 983 mask = 6; /* Mask bit 2:1 in the SID field */ 984 break; 985 case 3: 986 mask = 7; /* Mask bit 2:0 in the SID field */ 987 break; 988 } 989 VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16 990 " mask %"PRIu16, source_id, mask); 991 vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id)); 992 if (vtd_bus) { 993 devfn = VTD_SID_TO_DEVFN(source_id); 994 for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) { 995 vtd_as = vtd_bus->dev_as[devfn_it]; 996 if (vtd_as && ((devfn_it & mask) == (devfn & mask))) { 997 VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16, 998 devfn_it); 999 vtd_as->context_cache_entry.context_cache_gen = 0; 1000 } 1001 } 1002 } 1003 } 1004 1005 /* Context-cache invalidation 1006 * Returns the Context Actual Invalidation Granularity. 1007 * @val: the content of the CCMD_REG 1008 */ 1009 static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val) 1010 { 1011 uint64_t caig; 1012 uint64_t type = val & VTD_CCMD_CIRG_MASK; 1013 1014 switch (type) { 1015 case VTD_CCMD_DOMAIN_INVL: 1016 VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, 1017 (uint16_t)VTD_CCMD_DID(val)); 1018 /* Fall through */ 1019 case VTD_CCMD_GLOBAL_INVL: 1020 VTD_DPRINTF(INV, "global invalidation"); 1021 caig = VTD_CCMD_GLOBAL_INVL_A; 1022 vtd_context_global_invalidate(s); 1023 break; 1024 1025 case VTD_CCMD_DEVICE_INVL: 1026 caig = VTD_CCMD_DEVICE_INVL_A; 1027 vtd_context_device_invalidate(s, VTD_CCMD_SID(val), VTD_CCMD_FM(val)); 1028 break; 1029 1030 default: 1031 VTD_DPRINTF(GENERAL, "error: invalid granularity"); 1032 caig = 0; 1033 } 1034 return caig; 1035 } 1036 1037 static void vtd_iotlb_global_invalidate(IntelIOMMUState *s) 1038 { 1039 vtd_reset_iotlb(s); 1040 } 1041 1042 static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) 1043 { 1044 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain, 1045 &domain_id); 1046 } 1047 1048 static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, 1049 hwaddr addr, uint8_t am) 1050 { 1051 VTDIOTLBPageInvInfo info; 1052 1053 assert(am <= VTD_MAMV); 1054 info.domain_id = domain_id; 1055 info.addr = addr; 1056 info.mask = ~((1 << am) - 1); 1057 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); 1058 } 1059 1060 /* Flush IOTLB 1061 * Returns the IOTLB Actual Invalidation Granularity. 1062 * @val: the content of the IOTLB_REG 1063 */ 1064 static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val) 1065 { 1066 uint64_t iaig; 1067 uint64_t type = val & VTD_TLB_FLUSH_GRANU_MASK; 1068 uint16_t domain_id; 1069 hwaddr addr; 1070 uint8_t am; 1071 1072 switch (type) { 1073 case VTD_TLB_GLOBAL_FLUSH: 1074 VTD_DPRINTF(INV, "global invalidation"); 1075 iaig = VTD_TLB_GLOBAL_FLUSH_A; 1076 vtd_iotlb_global_invalidate(s); 1077 break; 1078 1079 case VTD_TLB_DSI_FLUSH: 1080 domain_id = VTD_TLB_DID(val); 1081 VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, 1082 domain_id); 1083 iaig = VTD_TLB_DSI_FLUSH_A; 1084 vtd_iotlb_domain_invalidate(s, domain_id); 1085 break; 1086 1087 case VTD_TLB_PSI_FLUSH: 1088 domain_id = VTD_TLB_DID(val); 1089 addr = vtd_get_quad_raw(s, DMAR_IVA_REG); 1090 am = VTD_IVA_AM(addr); 1091 addr = VTD_IVA_ADDR(addr); 1092 VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16 1093 " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am); 1094 if (am > VTD_MAMV) { 1095 VTD_DPRINTF(GENERAL, "error: supported max address mask value is " 1096 "%"PRIu8, (uint8_t)VTD_MAMV); 1097 iaig = 0; 1098 break; 1099 } 1100 iaig = VTD_TLB_PSI_FLUSH_A; 1101 vtd_iotlb_page_invalidate(s, domain_id, addr, am); 1102 break; 1103 1104 default: 1105 VTD_DPRINTF(GENERAL, "error: invalid granularity"); 1106 iaig = 0; 1107 } 1108 return iaig; 1109 } 1110 1111 static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s) 1112 { 1113 return s->iq_tail == 0; 1114 } 1115 1116 static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s) 1117 { 1118 return s->qi_enabled && (s->iq_tail == s->iq_head) && 1119 (s->iq_last_desc_type == VTD_INV_DESC_WAIT); 1120 } 1121 1122 static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) 1123 { 1124 uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG); 1125 1126 VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off")); 1127 if (en) { 1128 if (vtd_queued_inv_enable_check(s)) { 1129 s->iq = iqa_val & VTD_IQA_IQA_MASK; 1130 /* 2^(x+8) entries */ 1131 s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); 1132 s->qi_enabled = true; 1133 VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val); 1134 VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d", 1135 s->iq, s->iq_size); 1136 /* Ok - report back to driver */ 1137 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES); 1138 } else { 1139 VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: " 1140 "tail %"PRIu16, s->iq_tail); 1141 } 1142 } else { 1143 if (vtd_queued_inv_disable_check(s)) { 1144 /* disable Queued Invalidation */ 1145 vtd_set_quad_raw(s, DMAR_IQH_REG, 0); 1146 s->iq_head = 0; 1147 s->qi_enabled = false; 1148 /* Ok - report back to driver */ 1149 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0); 1150 } else { 1151 VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: " 1152 "head %"PRIu16 ", tail %"PRIu16 1153 ", last_descriptor %"PRIu8, 1154 s->iq_head, s->iq_tail, s->iq_last_desc_type); 1155 } 1156 } 1157 } 1158 1159 /* Set Root Table Pointer */ 1160 static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) 1161 { 1162 VTD_DPRINTF(CSR, "set Root Table Pointer"); 1163 1164 vtd_root_table_setup(s); 1165 /* Ok - report back to driver */ 1166 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS); 1167 } 1168 1169 /* Set Interrupt Remap Table Pointer */ 1170 static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) 1171 { 1172 VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer"); 1173 1174 vtd_interrupt_remap_table_setup(s); 1175 /* Ok - report back to driver */ 1176 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); 1177 } 1178 1179 /* Handle Translation Enable/Disable */ 1180 static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) 1181 { 1182 VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off")); 1183 1184 if (en) { 1185 s->dmar_enabled = true; 1186 /* Ok - report back to driver */ 1187 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_TES); 1188 } else { 1189 s->dmar_enabled = false; 1190 1191 /* Clear the index of Fault Recording Register */ 1192 s->next_frcd_reg = 0; 1193 /* Ok - report back to driver */ 1194 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0); 1195 } 1196 } 1197 1198 /* Handle Interrupt Remap Enable/Disable */ 1199 static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en) 1200 { 1201 VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off")); 1202 1203 if (en) { 1204 s->intr_enabled = true; 1205 /* Ok - report back to driver */ 1206 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES); 1207 } else { 1208 s->intr_enabled = false; 1209 /* Ok - report back to driver */ 1210 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0); 1211 } 1212 } 1213 1214 /* Handle write to Global Command Register */ 1215 static void vtd_handle_gcmd_write(IntelIOMMUState *s) 1216 { 1217 uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG); 1218 uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG); 1219 uint32_t changed = status ^ val; 1220 1221 VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status); 1222 if (changed & VTD_GCMD_TE) { 1223 /* Translation enable/disable */ 1224 vtd_handle_gcmd_te(s, val & VTD_GCMD_TE); 1225 } 1226 if (val & VTD_GCMD_SRTP) { 1227 /* Set/update the root-table pointer */ 1228 vtd_handle_gcmd_srtp(s); 1229 } 1230 if (changed & VTD_GCMD_QIE) { 1231 /* Queued Invalidation Enable */ 1232 vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE); 1233 } 1234 if (val & VTD_GCMD_SIRTP) { 1235 /* Set/update the interrupt remapping root-table pointer */ 1236 vtd_handle_gcmd_sirtp(s); 1237 } 1238 if (changed & VTD_GCMD_IRE) { 1239 /* Interrupt remap enable/disable */ 1240 vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE); 1241 } 1242 } 1243 1244 /* Handle write to Context Command Register */ 1245 static void vtd_handle_ccmd_write(IntelIOMMUState *s) 1246 { 1247 uint64_t ret; 1248 uint64_t val = vtd_get_quad_raw(s, DMAR_CCMD_REG); 1249 1250 /* Context-cache invalidation request */ 1251 if (val & VTD_CCMD_ICC) { 1252 if (s->qi_enabled) { 1253 VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " 1254 "should not use register-based invalidation"); 1255 return; 1256 } 1257 ret = vtd_context_cache_invalidate(s, val); 1258 /* Invalidation completed. Change something to show */ 1259 vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL); 1260 ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK, 1261 ret); 1262 VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret); 1263 } 1264 } 1265 1266 /* Handle write to IOTLB Invalidation Register */ 1267 static void vtd_handle_iotlb_write(IntelIOMMUState *s) 1268 { 1269 uint64_t ret; 1270 uint64_t val = vtd_get_quad_raw(s, DMAR_IOTLB_REG); 1271 1272 /* IOTLB invalidation request */ 1273 if (val & VTD_TLB_IVT) { 1274 if (s->qi_enabled) { 1275 VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " 1276 "should not use register-based invalidation"); 1277 return; 1278 } 1279 ret = vtd_iotlb_flush(s, val); 1280 /* Invalidation completed. Change something to show */ 1281 vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL); 1282 ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, 1283 VTD_TLB_FLUSH_GRANU_MASK_A, ret); 1284 VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret); 1285 } 1286 } 1287 1288 /* Fetch an Invalidation Descriptor from the Invalidation Queue */ 1289 static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset, 1290 VTDInvDesc *inv_desc) 1291 { 1292 dma_addr_t addr = base_addr + offset * sizeof(*inv_desc); 1293 if (dma_memory_read(&address_space_memory, addr, inv_desc, 1294 sizeof(*inv_desc))) { 1295 VTD_DPRINTF(GENERAL, "error: fail to fetch Invalidation Descriptor " 1296 "base_addr 0x%"PRIx64 " offset %"PRIu32, base_addr, offset); 1297 inv_desc->lo = 0; 1298 inv_desc->hi = 0; 1299 1300 return false; 1301 } 1302 inv_desc->lo = le64_to_cpu(inv_desc->lo); 1303 inv_desc->hi = le64_to_cpu(inv_desc->hi); 1304 return true; 1305 } 1306 1307 static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) 1308 { 1309 if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) || 1310 (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) { 1311 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Invalidation " 1312 "Wait Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, 1313 inv_desc->hi, inv_desc->lo); 1314 return false; 1315 } 1316 if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) { 1317 /* Status Write */ 1318 uint32_t status_data = (uint32_t)(inv_desc->lo >> 1319 VTD_INV_DESC_WAIT_DATA_SHIFT); 1320 1321 assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF)); 1322 1323 /* FIXME: need to be masked with HAW? */ 1324 dma_addr_t status_addr = inv_desc->hi; 1325 VTD_DPRINTF(INV, "status data 0x%x, status addr 0x%"PRIx64, 1326 status_data, status_addr); 1327 status_data = cpu_to_le32(status_data); 1328 if (dma_memory_write(&address_space_memory, status_addr, &status_data, 1329 sizeof(status_data))) { 1330 VTD_DPRINTF(GENERAL, "error: fail to perform a coherent write"); 1331 return false; 1332 } 1333 } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) { 1334 /* Interrupt flag */ 1335 VTD_DPRINTF(INV, "Invalidation Wait Descriptor interrupt completion"); 1336 vtd_generate_completion_event(s); 1337 } else { 1338 VTD_DPRINTF(GENERAL, "error: invalid Invalidation Wait Descriptor: " 1339 "hi 0x%"PRIx64 " lo 0x%"PRIx64, inv_desc->hi, inv_desc->lo); 1340 return false; 1341 } 1342 return true; 1343 } 1344 1345 static bool vtd_process_context_cache_desc(IntelIOMMUState *s, 1346 VTDInvDesc *inv_desc) 1347 { 1348 if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) { 1349 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Context-cache " 1350 "Invalidate Descriptor"); 1351 return false; 1352 } 1353 switch (inv_desc->lo & VTD_INV_DESC_CC_G) { 1354 case VTD_INV_DESC_CC_DOMAIN: 1355 VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, 1356 (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo)); 1357 /* Fall through */ 1358 case VTD_INV_DESC_CC_GLOBAL: 1359 VTD_DPRINTF(INV, "global invalidation"); 1360 vtd_context_global_invalidate(s); 1361 break; 1362 1363 case VTD_INV_DESC_CC_DEVICE: 1364 vtd_context_device_invalidate(s, VTD_INV_DESC_CC_SID(inv_desc->lo), 1365 VTD_INV_DESC_CC_FM(inv_desc->lo)); 1366 break; 1367 1368 default: 1369 VTD_DPRINTF(GENERAL, "error: invalid granularity in Context-cache " 1370 "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, 1371 inv_desc->hi, inv_desc->lo); 1372 return false; 1373 } 1374 return true; 1375 } 1376 1377 static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) 1378 { 1379 uint16_t domain_id; 1380 uint8_t am; 1381 hwaddr addr; 1382 1383 if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) || 1384 (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) { 1385 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in IOTLB " 1386 "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, 1387 inv_desc->hi, inv_desc->lo); 1388 return false; 1389 } 1390 1391 switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) { 1392 case VTD_INV_DESC_IOTLB_GLOBAL: 1393 VTD_DPRINTF(INV, "global invalidation"); 1394 vtd_iotlb_global_invalidate(s); 1395 break; 1396 1397 case VTD_INV_DESC_IOTLB_DOMAIN: 1398 domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo); 1399 VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, 1400 domain_id); 1401 vtd_iotlb_domain_invalidate(s, domain_id); 1402 break; 1403 1404 case VTD_INV_DESC_IOTLB_PAGE: 1405 domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo); 1406 addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi); 1407 am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi); 1408 VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16 1409 " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am); 1410 if (am > VTD_MAMV) { 1411 VTD_DPRINTF(GENERAL, "error: supported max address mask value is " 1412 "%"PRIu8, (uint8_t)VTD_MAMV); 1413 return false; 1414 } 1415 vtd_iotlb_page_invalidate(s, domain_id, addr, am); 1416 break; 1417 1418 default: 1419 VTD_DPRINTF(GENERAL, "error: invalid granularity in IOTLB Invalidate " 1420 "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, 1421 inv_desc->hi, inv_desc->lo); 1422 return false; 1423 } 1424 return true; 1425 } 1426 1427 static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, 1428 VTDInvDesc *inv_desc) 1429 { 1430 VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d", 1431 inv_desc->iec.granularity, 1432 inv_desc->iec.index, 1433 inv_desc->iec.index_mask); 1434 1435 vtd_iec_notify_all(s, !inv_desc->iec.granularity, 1436 inv_desc->iec.index, 1437 inv_desc->iec.index_mask); 1438 1439 return true; 1440 } 1441 1442 static bool vtd_process_inv_desc(IntelIOMMUState *s) 1443 { 1444 VTDInvDesc inv_desc; 1445 uint8_t desc_type; 1446 1447 VTD_DPRINTF(INV, "iq head %"PRIu16, s->iq_head); 1448 if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) { 1449 s->iq_last_desc_type = VTD_INV_DESC_NONE; 1450 return false; 1451 } 1452 desc_type = inv_desc.lo & VTD_INV_DESC_TYPE; 1453 /* FIXME: should update at first or at last? */ 1454 s->iq_last_desc_type = desc_type; 1455 1456 switch (desc_type) { 1457 case VTD_INV_DESC_CC: 1458 VTD_DPRINTF(INV, "Context-cache Invalidate Descriptor hi 0x%"PRIx64 1459 " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); 1460 if (!vtd_process_context_cache_desc(s, &inv_desc)) { 1461 return false; 1462 } 1463 break; 1464 1465 case VTD_INV_DESC_IOTLB: 1466 VTD_DPRINTF(INV, "IOTLB Invalidate Descriptor hi 0x%"PRIx64 1467 " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); 1468 if (!vtd_process_iotlb_desc(s, &inv_desc)) { 1469 return false; 1470 } 1471 break; 1472 1473 case VTD_INV_DESC_WAIT: 1474 VTD_DPRINTF(INV, "Invalidation Wait Descriptor hi 0x%"PRIx64 1475 " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); 1476 if (!vtd_process_wait_desc(s, &inv_desc)) { 1477 return false; 1478 } 1479 break; 1480 1481 case VTD_INV_DESC_IEC: 1482 VTD_DPRINTF(INV, "Invalidation Interrupt Entry Cache " 1483 "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, 1484 inv_desc.hi, inv_desc.lo); 1485 if (!vtd_process_inv_iec_desc(s, &inv_desc)) { 1486 return false; 1487 } 1488 break; 1489 1490 default: 1491 VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type " 1492 "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8, 1493 inv_desc.hi, inv_desc.lo, desc_type); 1494 return false; 1495 } 1496 s->iq_head++; 1497 if (s->iq_head == s->iq_size) { 1498 s->iq_head = 0; 1499 } 1500 return true; 1501 } 1502 1503 /* Try to fetch and process more Invalidation Descriptors */ 1504 static void vtd_fetch_inv_desc(IntelIOMMUState *s) 1505 { 1506 VTD_DPRINTF(INV, "fetch Invalidation Descriptors"); 1507 if (s->iq_tail >= s->iq_size) { 1508 /* Detects an invalid Tail pointer */ 1509 VTD_DPRINTF(GENERAL, "error: iq_tail is %"PRIu16 1510 " while iq_size is %"PRIu16, s->iq_tail, s->iq_size); 1511 vtd_handle_inv_queue_error(s); 1512 return; 1513 } 1514 while (s->iq_head != s->iq_tail) { 1515 if (!vtd_process_inv_desc(s)) { 1516 /* Invalidation Queue Errors */ 1517 vtd_handle_inv_queue_error(s); 1518 break; 1519 } 1520 /* Must update the IQH_REG in time */ 1521 vtd_set_quad_raw(s, DMAR_IQH_REG, 1522 (((uint64_t)(s->iq_head)) << VTD_IQH_QH_SHIFT) & 1523 VTD_IQH_QH_MASK); 1524 } 1525 } 1526 1527 /* Handle write to Invalidation Queue Tail Register */ 1528 static void vtd_handle_iqt_write(IntelIOMMUState *s) 1529 { 1530 uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG); 1531 1532 s->iq_tail = VTD_IQT_QT(val); 1533 VTD_DPRINTF(INV, "set iq tail %"PRIu16, s->iq_tail); 1534 if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) { 1535 /* Process Invalidation Queue here */ 1536 vtd_fetch_inv_desc(s); 1537 } 1538 } 1539 1540 static void vtd_handle_fsts_write(IntelIOMMUState *s) 1541 { 1542 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); 1543 uint32_t fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG); 1544 uint32_t status_fields = VTD_FSTS_PFO | VTD_FSTS_PPF | VTD_FSTS_IQE; 1545 1546 if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) { 1547 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); 1548 VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear " 1549 "IP field of FECTL_REG"); 1550 } 1551 /* FIXME: when IQE is Clear, should we try to fetch some Invalidation 1552 * Descriptors if there are any when Queued Invalidation is enabled? 1553 */ 1554 } 1555 1556 static void vtd_handle_fectl_write(IntelIOMMUState *s) 1557 { 1558 uint32_t fectl_reg; 1559 /* FIXME: when software clears the IM field, check the IP field. But do we 1560 * need to compare the old value and the new value to conclude that 1561 * software clears the IM field? Or just check if the IM field is zero? 1562 */ 1563 fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG); 1564 if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) { 1565 vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); 1566 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); 1567 VTD_DPRINTF(FLOG, "IM field is cleared, generate " 1568 "fault event interrupt"); 1569 } 1570 } 1571 1572 static void vtd_handle_ics_write(IntelIOMMUState *s) 1573 { 1574 uint32_t ics_reg = vtd_get_long_raw(s, DMAR_ICS_REG); 1575 uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG); 1576 1577 if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) { 1578 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); 1579 VTD_DPRINTF(INV, "pending completion interrupt condition serviced, " 1580 "clear IP field of IECTL_REG"); 1581 } 1582 } 1583 1584 static void vtd_handle_iectl_write(IntelIOMMUState *s) 1585 { 1586 uint32_t iectl_reg; 1587 /* FIXME: when software clears the IM field, check the IP field. But do we 1588 * need to compare the old value and the new value to conclude that 1589 * software clears the IM field? Or just check if the IM field is zero? 1590 */ 1591 iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG); 1592 if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) { 1593 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); 1594 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); 1595 VTD_DPRINTF(INV, "IM field is cleared, generate " 1596 "invalidation event interrupt"); 1597 } 1598 } 1599 1600 static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) 1601 { 1602 IntelIOMMUState *s = opaque; 1603 uint64_t val; 1604 1605 if (addr + size > DMAR_REG_SIZE) { 1606 VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 1607 ", got 0x%"PRIx64 " %d", 1608 (uint64_t)DMAR_REG_SIZE, addr, size); 1609 return (uint64_t)-1; 1610 } 1611 1612 switch (addr) { 1613 /* Root Table Address Register, 64-bit */ 1614 case DMAR_RTADDR_REG: 1615 if (size == 4) { 1616 val = s->root & ((1ULL << 32) - 1); 1617 } else { 1618 val = s->root; 1619 } 1620 break; 1621 1622 case DMAR_RTADDR_REG_HI: 1623 assert(size == 4); 1624 val = s->root >> 32; 1625 break; 1626 1627 /* Invalidation Queue Address Register, 64-bit */ 1628 case DMAR_IQA_REG: 1629 val = s->iq | (vtd_get_quad(s, DMAR_IQA_REG) & VTD_IQA_QS); 1630 if (size == 4) { 1631 val = val & ((1ULL << 32) - 1); 1632 } 1633 break; 1634 1635 case DMAR_IQA_REG_HI: 1636 assert(size == 4); 1637 val = s->iq >> 32; 1638 break; 1639 1640 default: 1641 if (size == 4) { 1642 val = vtd_get_long(s, addr); 1643 } else { 1644 val = vtd_get_quad(s, addr); 1645 } 1646 } 1647 VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64, 1648 addr, size, val); 1649 return val; 1650 } 1651 1652 static void vtd_mem_write(void *opaque, hwaddr addr, 1653 uint64_t val, unsigned size) 1654 { 1655 IntelIOMMUState *s = opaque; 1656 1657 if (addr + size > DMAR_REG_SIZE) { 1658 VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 1659 ", got 0x%"PRIx64 " %d", 1660 (uint64_t)DMAR_REG_SIZE, addr, size); 1661 return; 1662 } 1663 1664 switch (addr) { 1665 /* Global Command Register, 32-bit */ 1666 case DMAR_GCMD_REG: 1667 VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64 1668 ", size %d, val 0x%"PRIx64, addr, size, val); 1669 vtd_set_long(s, addr, val); 1670 vtd_handle_gcmd_write(s); 1671 break; 1672 1673 /* Context Command Register, 64-bit */ 1674 case DMAR_CCMD_REG: 1675 VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64 1676 ", size %d, val 0x%"PRIx64, addr, size, val); 1677 if (size == 4) { 1678 vtd_set_long(s, addr, val); 1679 } else { 1680 vtd_set_quad(s, addr, val); 1681 vtd_handle_ccmd_write(s); 1682 } 1683 break; 1684 1685 case DMAR_CCMD_REG_HI: 1686 VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64 1687 ", size %d, val 0x%"PRIx64, addr, size, val); 1688 assert(size == 4); 1689 vtd_set_long(s, addr, val); 1690 vtd_handle_ccmd_write(s); 1691 break; 1692 1693 /* IOTLB Invalidation Register, 64-bit */ 1694 case DMAR_IOTLB_REG: 1695 VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64 1696 ", size %d, val 0x%"PRIx64, addr, size, val); 1697 if (size == 4) { 1698 vtd_set_long(s, addr, val); 1699 } else { 1700 vtd_set_quad(s, addr, val); 1701 vtd_handle_iotlb_write(s); 1702 } 1703 break; 1704 1705 case DMAR_IOTLB_REG_HI: 1706 VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64 1707 ", size %d, val 0x%"PRIx64, addr, size, val); 1708 assert(size == 4); 1709 vtd_set_long(s, addr, val); 1710 vtd_handle_iotlb_write(s); 1711 break; 1712 1713 /* Invalidate Address Register, 64-bit */ 1714 case DMAR_IVA_REG: 1715 VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64 1716 ", size %d, val 0x%"PRIx64, addr, size, val); 1717 if (size == 4) { 1718 vtd_set_long(s, addr, val); 1719 } else { 1720 vtd_set_quad(s, addr, val); 1721 } 1722 break; 1723 1724 case DMAR_IVA_REG_HI: 1725 VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64 1726 ", size %d, val 0x%"PRIx64, addr, size, val); 1727 assert(size == 4); 1728 vtd_set_long(s, addr, val); 1729 break; 1730 1731 /* Fault Status Register, 32-bit */ 1732 case DMAR_FSTS_REG: 1733 VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64 1734 ", size %d, val 0x%"PRIx64, addr, size, val); 1735 assert(size == 4); 1736 vtd_set_long(s, addr, val); 1737 vtd_handle_fsts_write(s); 1738 break; 1739 1740 /* Fault Event Control Register, 32-bit */ 1741 case DMAR_FECTL_REG: 1742 VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64 1743 ", size %d, val 0x%"PRIx64, addr, size, val); 1744 assert(size == 4); 1745 vtd_set_long(s, addr, val); 1746 vtd_handle_fectl_write(s); 1747 break; 1748 1749 /* Fault Event Data Register, 32-bit */ 1750 case DMAR_FEDATA_REG: 1751 VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64 1752 ", size %d, val 0x%"PRIx64, addr, size, val); 1753 assert(size == 4); 1754 vtd_set_long(s, addr, val); 1755 break; 1756 1757 /* Fault Event Address Register, 32-bit */ 1758 case DMAR_FEADDR_REG: 1759 VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64 1760 ", size %d, val 0x%"PRIx64, addr, size, val); 1761 assert(size == 4); 1762 vtd_set_long(s, addr, val); 1763 break; 1764 1765 /* Fault Event Upper Address Register, 32-bit */ 1766 case DMAR_FEUADDR_REG: 1767 VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64 1768 ", size %d, val 0x%"PRIx64, addr, size, val); 1769 assert(size == 4); 1770 vtd_set_long(s, addr, val); 1771 break; 1772 1773 /* Protected Memory Enable Register, 32-bit */ 1774 case DMAR_PMEN_REG: 1775 VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64 1776 ", size %d, val 0x%"PRIx64, addr, size, val); 1777 assert(size == 4); 1778 vtd_set_long(s, addr, val); 1779 break; 1780 1781 /* Root Table Address Register, 64-bit */ 1782 case DMAR_RTADDR_REG: 1783 VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64 1784 ", size %d, val 0x%"PRIx64, addr, size, val); 1785 if (size == 4) { 1786 vtd_set_long(s, addr, val); 1787 } else { 1788 vtd_set_quad(s, addr, val); 1789 } 1790 break; 1791 1792 case DMAR_RTADDR_REG_HI: 1793 VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64 1794 ", size %d, val 0x%"PRIx64, addr, size, val); 1795 assert(size == 4); 1796 vtd_set_long(s, addr, val); 1797 break; 1798 1799 /* Invalidation Queue Tail Register, 64-bit */ 1800 case DMAR_IQT_REG: 1801 VTD_DPRINTF(INV, "DMAR_IQT_REG write addr 0x%"PRIx64 1802 ", size %d, val 0x%"PRIx64, addr, size, val); 1803 if (size == 4) { 1804 vtd_set_long(s, addr, val); 1805 } else { 1806 vtd_set_quad(s, addr, val); 1807 } 1808 vtd_handle_iqt_write(s); 1809 break; 1810 1811 case DMAR_IQT_REG_HI: 1812 VTD_DPRINTF(INV, "DMAR_IQT_REG_HI write addr 0x%"PRIx64 1813 ", size %d, val 0x%"PRIx64, addr, size, val); 1814 assert(size == 4); 1815 vtd_set_long(s, addr, val); 1816 /* 19:63 of IQT_REG is RsvdZ, do nothing here */ 1817 break; 1818 1819 /* Invalidation Queue Address Register, 64-bit */ 1820 case DMAR_IQA_REG: 1821 VTD_DPRINTF(INV, "DMAR_IQA_REG write addr 0x%"PRIx64 1822 ", size %d, val 0x%"PRIx64, addr, size, val); 1823 if (size == 4) { 1824 vtd_set_long(s, addr, val); 1825 } else { 1826 vtd_set_quad(s, addr, val); 1827 } 1828 break; 1829 1830 case DMAR_IQA_REG_HI: 1831 VTD_DPRINTF(INV, "DMAR_IQA_REG_HI write addr 0x%"PRIx64 1832 ", size %d, val 0x%"PRIx64, addr, size, val); 1833 assert(size == 4); 1834 vtd_set_long(s, addr, val); 1835 break; 1836 1837 /* Invalidation Completion Status Register, 32-bit */ 1838 case DMAR_ICS_REG: 1839 VTD_DPRINTF(INV, "DMAR_ICS_REG write addr 0x%"PRIx64 1840 ", size %d, val 0x%"PRIx64, addr, size, val); 1841 assert(size == 4); 1842 vtd_set_long(s, addr, val); 1843 vtd_handle_ics_write(s); 1844 break; 1845 1846 /* Invalidation Event Control Register, 32-bit */ 1847 case DMAR_IECTL_REG: 1848 VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64 1849 ", size %d, val 0x%"PRIx64, addr, size, val); 1850 assert(size == 4); 1851 vtd_set_long(s, addr, val); 1852 vtd_handle_iectl_write(s); 1853 break; 1854 1855 /* Invalidation Event Data Register, 32-bit */ 1856 case DMAR_IEDATA_REG: 1857 VTD_DPRINTF(INV, "DMAR_IEDATA_REG write addr 0x%"PRIx64 1858 ", size %d, val 0x%"PRIx64, addr, size, val); 1859 assert(size == 4); 1860 vtd_set_long(s, addr, val); 1861 break; 1862 1863 /* Invalidation Event Address Register, 32-bit */ 1864 case DMAR_IEADDR_REG: 1865 VTD_DPRINTF(INV, "DMAR_IEADDR_REG write addr 0x%"PRIx64 1866 ", size %d, val 0x%"PRIx64, addr, size, val); 1867 assert(size == 4); 1868 vtd_set_long(s, addr, val); 1869 break; 1870 1871 /* Invalidation Event Upper Address Register, 32-bit */ 1872 case DMAR_IEUADDR_REG: 1873 VTD_DPRINTF(INV, "DMAR_IEUADDR_REG write addr 0x%"PRIx64 1874 ", size %d, val 0x%"PRIx64, addr, size, val); 1875 assert(size == 4); 1876 vtd_set_long(s, addr, val); 1877 break; 1878 1879 /* Fault Recording Registers, 128-bit */ 1880 case DMAR_FRCD_REG_0_0: 1881 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64 1882 ", size %d, val 0x%"PRIx64, addr, size, val); 1883 if (size == 4) { 1884 vtd_set_long(s, addr, val); 1885 } else { 1886 vtd_set_quad(s, addr, val); 1887 } 1888 break; 1889 1890 case DMAR_FRCD_REG_0_1: 1891 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64 1892 ", size %d, val 0x%"PRIx64, addr, size, val); 1893 assert(size == 4); 1894 vtd_set_long(s, addr, val); 1895 break; 1896 1897 case DMAR_FRCD_REG_0_2: 1898 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64 1899 ", size %d, val 0x%"PRIx64, addr, size, val); 1900 if (size == 4) { 1901 vtd_set_long(s, addr, val); 1902 } else { 1903 vtd_set_quad(s, addr, val); 1904 /* May clear bit 127 (Fault), update PPF */ 1905 vtd_update_fsts_ppf(s); 1906 } 1907 break; 1908 1909 case DMAR_FRCD_REG_0_3: 1910 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64 1911 ", size %d, val 0x%"PRIx64, addr, size, val); 1912 assert(size == 4); 1913 vtd_set_long(s, addr, val); 1914 /* May clear bit 127 (Fault), update PPF */ 1915 vtd_update_fsts_ppf(s); 1916 break; 1917 1918 case DMAR_IRTA_REG: 1919 VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64 1920 ", size %d, val 0x%"PRIx64, addr, size, val); 1921 if (size == 4) { 1922 vtd_set_long(s, addr, val); 1923 } else { 1924 vtd_set_quad(s, addr, val); 1925 } 1926 break; 1927 1928 case DMAR_IRTA_REG_HI: 1929 VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64 1930 ", size %d, val 0x%"PRIx64, addr, size, val); 1931 assert(size == 4); 1932 vtd_set_long(s, addr, val); 1933 break; 1934 1935 default: 1936 VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64 1937 ", size %d, val 0x%"PRIx64, addr, size, val); 1938 if (size == 4) { 1939 vtd_set_long(s, addr, val); 1940 } else { 1941 vtd_set_quad(s, addr, val); 1942 } 1943 } 1944 } 1945 1946 static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, 1947 bool is_write) 1948 { 1949 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); 1950 IntelIOMMUState *s = vtd_as->iommu_state; 1951 IOMMUTLBEntry ret = { 1952 .target_as = &address_space_memory, 1953 .iova = addr, 1954 .translated_addr = 0, 1955 .addr_mask = ~(hwaddr)0, 1956 .perm = IOMMU_NONE, 1957 }; 1958 1959 if (!s->dmar_enabled) { 1960 /* DMAR disabled, passthrough, use 4k-page*/ 1961 ret.iova = addr & VTD_PAGE_MASK_4K; 1962 ret.translated_addr = addr & VTD_PAGE_MASK_4K; 1963 ret.addr_mask = ~VTD_PAGE_MASK_4K; 1964 ret.perm = IOMMU_RW; 1965 return ret; 1966 } 1967 1968 vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr, 1969 is_write, &ret); 1970 VTD_DPRINTF(MMU, 1971 "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8 1972 " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus), 1973 VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn), 1974 vtd_as->devfn, addr, ret.translated_addr); 1975 return ret; 1976 } 1977 1978 static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu, 1979 IOMMUNotifierFlag old, 1980 IOMMUNotifierFlag new) 1981 { 1982 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); 1983 1984 if (new & IOMMU_NOTIFIER_MAP) { 1985 error_report("Device at bus %s addr %02x.%d requires iommu " 1986 "notifier which is currently not supported by " 1987 "intel-iommu emulation", 1988 vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn), 1989 PCI_FUNC(vtd_as->devfn)); 1990 exit(1); 1991 } 1992 } 1993 1994 static const VMStateDescription vtd_vmstate = { 1995 .name = "iommu-intel", 1996 .unmigratable = 1, 1997 }; 1998 1999 static const MemoryRegionOps vtd_mem_ops = { 2000 .read = vtd_mem_read, 2001 .write = vtd_mem_write, 2002 .endianness = DEVICE_LITTLE_ENDIAN, 2003 .impl = { 2004 .min_access_size = 4, 2005 .max_access_size = 8, 2006 }, 2007 .valid = { 2008 .min_access_size = 4, 2009 .max_access_size = 8, 2010 }, 2011 }; 2012 2013 static Property vtd_properties[] = { 2014 DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0), 2015 DEFINE_PROP_END_OF_LIST(), 2016 }; 2017 2018 /* Read IRTE entry with specific index */ 2019 static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, 2020 VTD_IR_TableEntry *entry, uint16_t sid) 2021 { 2022 static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \ 2023 {0xffff, 0xfffb, 0xfff9, 0xfff8}; 2024 dma_addr_t addr = 0x00; 2025 uint16_t mask, source_id; 2026 uint8_t bus, bus_max, bus_min; 2027 2028 addr = iommu->intr_root + index * sizeof(*entry); 2029 if (dma_memory_read(&address_space_memory, addr, entry, 2030 sizeof(*entry))) { 2031 VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64 2032 " + %"PRIu16, iommu->intr_root, index); 2033 return -VTD_FR_IR_ROOT_INVAL; 2034 } 2035 2036 if (!entry->irte.present) { 2037 VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE" 2038 " entry index %u value 0x%"PRIx64 " 0x%"PRIx64, 2039 index, le64_to_cpu(entry->data[1]), 2040 le64_to_cpu(entry->data[0])); 2041 return -VTD_FR_IR_ENTRY_P; 2042 } 2043 2044 if (entry->irte.__reserved_0 || entry->irte.__reserved_1 || 2045 entry->irte.__reserved_2) { 2046 VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16 2047 " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64, 2048 index, le64_to_cpu(entry->data[1]), 2049 le64_to_cpu(entry->data[0])); 2050 return -VTD_FR_IR_IRTE_RSVD; 2051 } 2052 2053 if (sid != X86_IOMMU_SID_INVALID) { 2054 /* Validate IRTE SID */ 2055 source_id = le32_to_cpu(entry->irte.source_id); 2056 switch (entry->irte.sid_vtype) { 2057 case VTD_SVT_NONE: 2058 VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index); 2059 break; 2060 2061 case VTD_SVT_ALL: 2062 mask = vtd_svt_mask[entry->irte.sid_q]; 2063 if ((source_id & mask) != (sid & mask)) { 2064 VTD_DPRINTF(GENERAL, "SID validation for IRTE index " 2065 "%d failed (reqid 0x%04x sid 0x%04x)", index, 2066 sid, source_id); 2067 return -VTD_FR_IR_SID_ERR; 2068 } 2069 break; 2070 2071 case VTD_SVT_BUS: 2072 bus_max = source_id >> 8; 2073 bus_min = source_id & 0xff; 2074 bus = sid >> 8; 2075 if (bus > bus_max || bus < bus_min) { 2076 VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d " 2077 "failed (bus %d outside %d-%d)", index, bus, 2078 bus_min, bus_max); 2079 return -VTD_FR_IR_SID_ERR; 2080 } 2081 break; 2082 2083 default: 2084 VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index " 2085 "%d", entry->irte.sid_vtype, index); 2086 /* Take this as verification failure. */ 2087 return -VTD_FR_IR_SID_ERR; 2088 break; 2089 } 2090 } 2091 2092 return 0; 2093 } 2094 2095 /* Fetch IRQ information of specific IR index */ 2096 static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, 2097 VTDIrq *irq, uint16_t sid) 2098 { 2099 VTD_IR_TableEntry irte = {}; 2100 int ret = 0; 2101 2102 ret = vtd_irte_get(iommu, index, &irte, sid); 2103 if (ret) { 2104 return ret; 2105 } 2106 2107 irq->trigger_mode = irte.irte.trigger_mode; 2108 irq->vector = irte.irte.vector; 2109 irq->delivery_mode = irte.irte.delivery_mode; 2110 irq->dest = le32_to_cpu(irte.irte.dest_id); 2111 if (!iommu->intr_eime) { 2112 #define VTD_IR_APIC_DEST_MASK (0xff00ULL) 2113 #define VTD_IR_APIC_DEST_SHIFT (8) 2114 irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >> 2115 VTD_IR_APIC_DEST_SHIFT; 2116 } 2117 irq->dest_mode = irte.irte.dest_mode; 2118 irq->redir_hint = irte.irte.redir_hint; 2119 2120 VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u," 2121 "deliver:%u,dest:%u,dest_mode:%u", index, 2122 irq->trigger_mode, irq->vector, irq->delivery_mode, 2123 irq->dest, irq->dest_mode); 2124 2125 return 0; 2126 } 2127 2128 /* Generate one MSI message from VTDIrq info */ 2129 static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out) 2130 { 2131 VTD_MSIMessage msg = {}; 2132 2133 /* Generate address bits */ 2134 msg.dest_mode = irq->dest_mode; 2135 msg.redir_hint = irq->redir_hint; 2136 msg.dest = irq->dest; 2137 msg.__addr_head = cpu_to_le32(0xfee); 2138 /* Keep this from original MSI address bits */ 2139 msg.__not_used = irq->msi_addr_last_bits; 2140 2141 /* Generate data bits */ 2142 msg.vector = irq->vector; 2143 msg.delivery_mode = irq->delivery_mode; 2144 msg.level = 1; 2145 msg.trigger_mode = irq->trigger_mode; 2146 2147 msg_out->address = msg.msi_addr; 2148 msg_out->data = msg.msi_data; 2149 } 2150 2151 /* Interrupt remapping for MSI/MSI-X entry */ 2152 static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, 2153 MSIMessage *origin, 2154 MSIMessage *translated, 2155 uint16_t sid) 2156 { 2157 int ret = 0; 2158 VTD_IR_MSIAddress addr; 2159 uint16_t index; 2160 VTDIrq irq = {}; 2161 2162 assert(origin && translated); 2163 2164 if (!iommu || !iommu->intr_enabled) { 2165 goto do_not_translate; 2166 } 2167 2168 if (origin->address & VTD_MSI_ADDR_HI_MASK) { 2169 VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero" 2170 " during interrupt remapping: 0x%"PRIx32, 2171 (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \ 2172 VTD_MSI_ADDR_HI_SHIFT)); 2173 return -VTD_FR_IR_REQ_RSVD; 2174 } 2175 2176 addr.data = origin->address & VTD_MSI_ADDR_LO_MASK; 2177 if (le16_to_cpu(addr.addr.__head) != 0xfee) { 2178 VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: " 2179 "0x%"PRIx32, addr.data); 2180 return -VTD_FR_IR_REQ_RSVD; 2181 } 2182 2183 /* This is compatible mode. */ 2184 if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) { 2185 goto do_not_translate; 2186 } 2187 2188 index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l); 2189 2190 #define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff) 2191 #define VTD_IR_MSI_DATA_RESERVED (0xffff0000) 2192 2193 if (addr.addr.sub_valid) { 2194 /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */ 2195 index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE; 2196 } 2197 2198 ret = vtd_remap_irq_get(iommu, index, &irq, sid); 2199 if (ret) { 2200 return ret; 2201 } 2202 2203 if (addr.addr.sub_valid) { 2204 VTD_DPRINTF(IR, "received MSI interrupt"); 2205 if (origin->data & VTD_IR_MSI_DATA_RESERVED) { 2206 VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for " 2207 "interrupt remappable entry: 0x%"PRIx32, 2208 origin->data); 2209 return -VTD_FR_IR_REQ_RSVD; 2210 } 2211 } else { 2212 uint8_t vector = origin->data & 0xff; 2213 uint8_t trigger_mode = (origin->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; 2214 2215 VTD_DPRINTF(IR, "received IOAPIC interrupt"); 2216 /* IOAPIC entry vector should be aligned with IRTE vector 2217 * (see vt-d spec 5.1.5.1). */ 2218 if (vector != irq.vector) { 2219 VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: " 2220 "entry: %d, IRTE: %d, index: %d", 2221 vector, irq.vector, index); 2222 } 2223 2224 /* The Trigger Mode field must match the Trigger Mode in the IRTE. 2225 * (see vt-d spec 5.1.5.1). */ 2226 if (trigger_mode != irq.trigger_mode) { 2227 VTD_DPRINTF(GENERAL, "IOAPIC trigger mode inconsistent: " 2228 "entry: %u, IRTE: %u, index: %d", 2229 trigger_mode, irq.trigger_mode, index); 2230 } 2231 2232 } 2233 2234 /* 2235 * We'd better keep the last two bits, assuming that guest OS 2236 * might modify it. Keep it does not hurt after all. 2237 */ 2238 irq.msi_addr_last_bits = addr.addr.__not_care; 2239 2240 /* Translate VTDIrq to MSI message */ 2241 vtd_generate_msi_message(&irq, translated); 2242 2243 VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> " 2244 "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data, 2245 translated->address, translated->data); 2246 return 0; 2247 2248 do_not_translate: 2249 memcpy(translated, origin, sizeof(*origin)); 2250 return 0; 2251 } 2252 2253 static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src, 2254 MSIMessage *dst, uint16_t sid) 2255 { 2256 return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu), 2257 src, dst, sid); 2258 } 2259 2260 static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr, 2261 uint64_t *data, unsigned size, 2262 MemTxAttrs attrs) 2263 { 2264 return MEMTX_OK; 2265 } 2266 2267 static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr, 2268 uint64_t value, unsigned size, 2269 MemTxAttrs attrs) 2270 { 2271 int ret = 0; 2272 MSIMessage from = {}, to = {}; 2273 uint16_t sid = X86_IOMMU_SID_INVALID; 2274 2275 from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST; 2276 from.data = (uint32_t) value; 2277 2278 if (!attrs.unspecified) { 2279 /* We have explicit Source ID */ 2280 sid = attrs.requester_id; 2281 } 2282 2283 ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid); 2284 if (ret) { 2285 /* TODO: report error */ 2286 VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64 2287 " data 0x%"PRIx32, from.address, from.data); 2288 /* Drop this interrupt */ 2289 return MEMTX_ERROR; 2290 } 2291 2292 VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32 2293 " for device sid 0x%04x", 2294 to.address, to.data, sid); 2295 2296 if (dma_memory_write(&address_space_memory, to.address, 2297 &to.data, size)) { 2298 VTD_DPRINTF(GENERAL, "error: fail to write 0x%"PRIx64 2299 " value 0x%"PRIx32, to.address, to.data); 2300 } 2301 2302 return MEMTX_OK; 2303 } 2304 2305 static const MemoryRegionOps vtd_mem_ir_ops = { 2306 .read_with_attrs = vtd_mem_ir_read, 2307 .write_with_attrs = vtd_mem_ir_write, 2308 .endianness = DEVICE_LITTLE_ENDIAN, 2309 .impl = { 2310 .min_access_size = 4, 2311 .max_access_size = 4, 2312 }, 2313 .valid = { 2314 .min_access_size = 4, 2315 .max_access_size = 4, 2316 }, 2317 }; 2318 2319 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) 2320 { 2321 uintptr_t key = (uintptr_t)bus; 2322 VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key); 2323 VTDAddressSpace *vtd_dev_as; 2324 2325 if (!vtd_bus) { 2326 /* No corresponding free() */ 2327 vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \ 2328 X86_IOMMU_PCI_DEVFN_MAX); 2329 vtd_bus->bus = bus; 2330 key = (uintptr_t)bus; 2331 g_hash_table_insert(s->vtd_as_by_busptr, &key, vtd_bus); 2332 } 2333 2334 vtd_dev_as = vtd_bus->dev_as[devfn]; 2335 2336 if (!vtd_dev_as) { 2337 vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace)); 2338 2339 vtd_dev_as->bus = bus; 2340 vtd_dev_as->devfn = (uint8_t)devfn; 2341 vtd_dev_as->iommu_state = s; 2342 vtd_dev_as->context_cache_entry.context_cache_gen = 0; 2343 memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s), 2344 &s->iommu_ops, "intel_iommu", UINT64_MAX); 2345 memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s), 2346 &vtd_mem_ir_ops, s, "intel_iommu_ir", 2347 VTD_INTERRUPT_ADDR_SIZE); 2348 memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST, 2349 &vtd_dev_as->iommu_ir); 2350 address_space_init(&vtd_dev_as->as, 2351 &vtd_dev_as->iommu, "intel_iommu"); 2352 } 2353 return vtd_dev_as; 2354 } 2355 2356 /* Do the initialization. It will also be called when reset, so pay 2357 * attention when adding new initialization stuff. 2358 */ 2359 static void vtd_init(IntelIOMMUState *s) 2360 { 2361 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); 2362 2363 memset(s->csr, 0, DMAR_REG_SIZE); 2364 memset(s->wmask, 0, DMAR_REG_SIZE); 2365 memset(s->w1cmask, 0, DMAR_REG_SIZE); 2366 memset(s->womask, 0, DMAR_REG_SIZE); 2367 2368 s->iommu_ops.translate = vtd_iommu_translate; 2369 s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed; 2370 s->root = 0; 2371 s->root_extended = false; 2372 s->dmar_enabled = false; 2373 s->iq_head = 0; 2374 s->iq_tail = 0; 2375 s->iq = 0; 2376 s->iq_size = 0; 2377 s->qi_enabled = false; 2378 s->iq_last_desc_type = VTD_INV_DESC_NONE; 2379 s->next_frcd_reg = 0; 2380 s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW | 2381 VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS; 2382 s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; 2383 2384 if (x86_iommu->intr_supported) { 2385 s->ecap |= VTD_ECAP_IR | VTD_ECAP_EIM | VTD_ECAP_MHMV; 2386 } 2387 2388 vtd_reset_context_cache(s); 2389 vtd_reset_iotlb(s); 2390 2391 /* Define registers with default values and bit semantics */ 2392 vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0); 2393 vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); 2394 vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); 2395 vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0); 2396 vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL); 2397 vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0); 2398 vtd_define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffff000ULL, 0); 2399 vtd_define_quad(s, DMAR_CCMD_REG, 0, 0xe0000003ffffffffULL, 0); 2400 vtd_define_quad_wo(s, DMAR_CCMD_REG, 0x3ffff0000ULL); 2401 2402 /* Advanced Fault Logging not supported */ 2403 vtd_define_long(s, DMAR_FSTS_REG, 0, 0, 0x11UL); 2404 vtd_define_long(s, DMAR_FECTL_REG, 0x80000000UL, 0x80000000UL, 0); 2405 vtd_define_long(s, DMAR_FEDATA_REG, 0, 0x0000ffffUL, 0); 2406 vtd_define_long(s, DMAR_FEADDR_REG, 0, 0xfffffffcUL, 0); 2407 2408 /* Treated as RsvdZ when EIM in ECAP_REG is not supported 2409 * vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0xffffffffUL, 0); 2410 */ 2411 vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0, 0); 2412 2413 /* Treated as RO for implementations that PLMR and PHMR fields reported 2414 * as Clear in the CAP_REG. 2415 * vtd_define_long(s, DMAR_PMEN_REG, 0, 0x80000000UL, 0); 2416 */ 2417 vtd_define_long(s, DMAR_PMEN_REG, 0, 0, 0); 2418 2419 vtd_define_quad(s, DMAR_IQH_REG, 0, 0, 0); 2420 vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0); 2421 vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0); 2422 vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL); 2423 vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0); 2424 vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0); 2425 vtd_define_long(s, DMAR_IEADDR_REG, 0, 0xfffffffcUL, 0); 2426 /* Treadted as RsvdZ when EIM in ECAP_REG is not supported */ 2427 vtd_define_long(s, DMAR_IEUADDR_REG, 0, 0, 0); 2428 2429 /* IOTLB registers */ 2430 vtd_define_quad(s, DMAR_IOTLB_REG, 0, 0Xb003ffff00000000ULL, 0); 2431 vtd_define_quad(s, DMAR_IVA_REG, 0, 0xfffffffffffff07fULL, 0); 2432 vtd_define_quad_wo(s, DMAR_IVA_REG, 0xfffffffffffff07fULL); 2433 2434 /* Fault Recording Registers, 128-bit */ 2435 vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0); 2436 vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL); 2437 2438 /* 2439 * Interrupt remapping registers. 2440 */ 2441 vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0); 2442 } 2443 2444 /* Should not reset address_spaces when reset because devices will still use 2445 * the address space they got at first (won't ask the bus again). 2446 */ 2447 static void vtd_reset(DeviceState *dev) 2448 { 2449 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); 2450 2451 VTD_DPRINTF(GENERAL, ""); 2452 vtd_init(s); 2453 } 2454 2455 static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) 2456 { 2457 IntelIOMMUState *s = opaque; 2458 VTDAddressSpace *vtd_as; 2459 2460 assert(0 <= devfn && devfn <= X86_IOMMU_PCI_DEVFN_MAX); 2461 2462 vtd_as = vtd_find_add_as(s, bus, devfn); 2463 return &vtd_as->as; 2464 } 2465 2466 static void vtd_realize(DeviceState *dev, Error **errp) 2467 { 2468 PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); 2469 PCIBus *bus = pcms->bus; 2470 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); 2471 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); 2472 2473 VTD_DPRINTF(GENERAL, ""); 2474 x86_iommu->type = TYPE_INTEL; 2475 memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); 2476 memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, 2477 "intel_iommu", DMAR_REG_SIZE); 2478 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem); 2479 /* No corresponding destroy */ 2480 s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, 2481 g_free, g_free); 2482 s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, 2483 g_free, g_free); 2484 vtd_init(s); 2485 sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); 2486 pci_setup_iommu(bus, vtd_host_dma_iommu, dev); 2487 /* Pseudo address space under root PCI bus. */ 2488 pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); 2489 2490 /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */ 2491 if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() && 2492 !kvm_irqchip_is_split()) { 2493 error_report("Intel Interrupt Remapping cannot work with " 2494 "kernel-irqchip=on, please use 'split|off'."); 2495 exit(1); 2496 } 2497 } 2498 2499 static void vtd_class_init(ObjectClass *klass, void *data) 2500 { 2501 DeviceClass *dc = DEVICE_CLASS(klass); 2502 X86IOMMUClass *x86_class = X86_IOMMU_CLASS(klass); 2503 2504 dc->reset = vtd_reset; 2505 dc->vmsd = &vtd_vmstate; 2506 dc->props = vtd_properties; 2507 dc->hotpluggable = false; 2508 x86_class->realize = vtd_realize; 2509 x86_class->int_remap = vtd_int_remap; 2510 } 2511 2512 static const TypeInfo vtd_info = { 2513 .name = TYPE_INTEL_IOMMU_DEVICE, 2514 .parent = TYPE_X86_IOMMU_DEVICE, 2515 .instance_size = sizeof(IntelIOMMUState), 2516 .class_init = vtd_class_init, 2517 }; 2518 2519 static void vtd_register_types(void) 2520 { 2521 VTD_DPRINTF(GENERAL, ""); 2522 type_register_static(&vtd_info); 2523 } 2524 2525 type_init(vtd_register_types) 2526