1 /* 2 * Intel IOMMU APIs 3 * 4 * Copyright (C) 2016 Red Hat, Inc. 5 * 6 * Authors: 7 * Peter Xu <peterx@redhat.com>, 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2 or 10 * later. 11 */ 12 13 #include "intel-iommu.h" 14 #include "libcflat.h" 15 #include "pci.h" 16 #include "atomic.h" 17 #include "alloc_page.h" 18 19 /* 20 * VT-d in QEMU currently only support 39 bits address width, which is 21 * 3-level translation. 22 */ 23 #define VTD_PAGE_LEVEL 3 24 #define VTD_CE_AW_39BIT 0x1 25 26 typedef uint64_t vtd_pte_t; 27 28 struct vtd_root_entry { 29 /* Quad 1 */ 30 uint64_t present:1; 31 uint64_t __reserved:11; 32 uint64_t context_table_p:52; 33 /* Quad 2 */ 34 uint64_t __reserved_2; 35 } __attribute__ ((packed)); 36 typedef struct vtd_root_entry vtd_re_t; 37 38 struct vtd_context_entry { 39 /* Quad 1 */ 40 uint64_t present:1; 41 uint64_t disable_fault_report:1; 42 uint64_t trans_type:2; 43 uint64_t __reserved:8; 44 uint64_t slptptr:52; 45 /* Quad 2 */ 46 uint64_t addr_width:3; 47 uint64_t __ignore:4; 48 uint64_t __reserved_2:1; 49 uint64_t domain_id:16; 50 uint64_t __reserved_3:40; 51 } __attribute__ ((packed)); 52 typedef struct vtd_context_entry vtd_ce_t; 53 54 struct vtd_irte { 55 uint32_t present:1; 56 uint32_t fault_disable:1; /* Fault Processing Disable */ 57 uint32_t dest_mode:1; /* Destination Mode */ 58 uint32_t redir_hint:1; /* Redirection Hint */ 59 uint32_t trigger_mode:1; /* Trigger Mode */ 60 uint32_t delivery_mode:3; /* Delivery Mode */ 61 uint32_t __avail:4; /* Available spaces for software */ 62 uint32_t __reserved_0:3; /* Reserved 0 */ 63 uint32_t irte_mode:1; /* IRTE Mode */ 64 uint32_t vector:8; /* Interrupt Vector */ 65 uint32_t __reserved_1:8; /* Reserved 1 */ 66 uint32_t dest_id; /* Destination ID */ 67 uint16_t source_id:16; /* Source-ID */ 68 uint64_t sid_q:2; /* Source-ID Qualifier */ 69 uint64_t sid_vtype:2; /* Source-ID Validation Type */ 70 uint64_t __reserved_2:44; /* Reserved 2 */ 71 } __attribute__ ((packed)); 72 typedef struct vtd_irte vtd_irte_t; 73 74 #define VTD_RTA_MASK (PAGE_MASK) 75 #define VTD_IRTA_MASK (PAGE_MASK) 76 77 void *vtd_reg_base; 78 79 static uint64_t vtd_root_table(void) 80 { 81 /* No extend root table support yet */ 82 return vtd_readq(DMAR_RTADDR_REG) & VTD_RTA_MASK; 83 } 84 85 static uint64_t vtd_ir_table(void) 86 { 87 return vtd_readq(DMAR_IRTA_REG) & VTD_IRTA_MASK; 88 } 89 90 static void vtd_gcmd_or(uint32_t cmd) 91 { 92 uint32_t status; 93 94 /* We only allow set one bit for each time */ 95 assert(is_power_of_2(cmd)); 96 97 status = vtd_readl(DMAR_GSTS_REG); 98 vtd_writel(DMAR_GCMD_REG, status | cmd); 99 100 if (cmd & VTD_GCMD_ONE_SHOT_BITS) { 101 /* One-shot bits are taking effect immediately */ 102 return; 103 } 104 105 /* Make sure IOMMU handled our command request */ 106 while (!(vtd_readl(DMAR_GSTS_REG) & cmd)) 107 cpu_relax(); 108 } 109 110 static void vtd_dump_init_info(void) 111 { 112 uint32_t version; 113 114 version = vtd_readl(DMAR_VER_REG); 115 116 /* Major version >= 1 */ 117 assert(((version >> 3) & 0xf) >= 1); 118 119 printf("VT-d version: %#x\n", version); 120 printf(" cap: %#018lx\n", vtd_readq(DMAR_CAP_REG)); 121 printf(" ecap: %#018lx\n", vtd_readq(DMAR_ECAP_REG)); 122 } 123 124 static void vtd_setup_root_table(void) 125 { 126 void *root = alloc_page(); 127 128 memset(root, 0, PAGE_SIZE); 129 vtd_writeq(DMAR_RTADDR_REG, virt_to_phys(root)); 130 vtd_gcmd_or(VTD_GCMD_ROOT); 131 printf("DMAR table address: %#018lx\n", vtd_root_table()); 132 } 133 134 static void vtd_setup_ir_table(void) 135 { 136 void *root = alloc_page(); 137 138 memset(root, 0, PAGE_SIZE); 139 /* 0xf stands for table size (2^(0xf+1) == 65536) */ 140 vtd_writeq(DMAR_IRTA_REG, virt_to_phys(root) | 0xf); 141 vtd_gcmd_or(VTD_GCMD_IR_TABLE); 142 printf("IR table address: %#018lx\n", vtd_ir_table()); 143 } 144 145 static void vtd_install_pte(vtd_pte_t *root, iova_t iova, 146 phys_addr_t pa, int level_target) 147 { 148 int level; 149 unsigned int offset; 150 void *page; 151 152 for (level = VTD_PAGE_LEVEL; level > level_target; level--) { 153 offset = PGDIR_OFFSET(iova, level); 154 if (!(root[offset] & VTD_PTE_RW)) { 155 page = alloc_page(); 156 memset(page, 0, PAGE_SIZE); 157 root[offset] = virt_to_phys(page) | VTD_PTE_RW; 158 } 159 root = (uint64_t *)(phys_to_virt(root[offset] & 160 VTD_PTE_ADDR)); 161 } 162 163 offset = PGDIR_OFFSET(iova, level); 164 root[offset] = pa | VTD_PTE_RW; 165 if (level != 1) { 166 /* This is huge page */ 167 root[offset] |= VTD_PTE_HUGE; 168 } 169 } 170 171 /** 172 * vtd_map_range: setup IO address mapping for specific memory range 173 * 174 * @sid: source ID of the device to setup 175 * @iova: start IO virtual address 176 * @pa: start physical address 177 * @size: size of the mapping area 178 */ 179 void vtd_map_range(uint16_t sid, iova_t iova, phys_addr_t pa, size_t size) 180 { 181 uint8_t bus_n, devfn; 182 void *slptptr; 183 vtd_ce_t *ce; 184 vtd_re_t *re = phys_to_virt(vtd_root_table()); 185 186 assert(IS_ALIGNED(iova, SZ_4K)); 187 assert(IS_ALIGNED(pa, SZ_4K)); 188 assert(IS_ALIGNED(size, SZ_4K)); 189 190 bus_n = PCI_BDF_GET_BUS(sid); 191 devfn = PCI_BDF_GET_DEVFN(sid); 192 193 /* Point to the correct root entry */ 194 re += bus_n; 195 196 if (!re->present) { 197 ce = alloc_page(); 198 memset(ce, 0, PAGE_SIZE); 199 memset(re, 0, sizeof(*re)); 200 re->context_table_p = virt_to_phys(ce) >> VTD_PAGE_SHIFT; 201 re->present = 1; 202 printf("allocated vt-d root entry for PCI bus %d\n", 203 bus_n); 204 } else 205 ce = phys_to_virt(re->context_table_p << VTD_PAGE_SHIFT); 206 207 /* Point to the correct context entry */ 208 ce += devfn; 209 210 if (!ce->present) { 211 slptptr = alloc_page(); 212 memset(slptptr, 0, PAGE_SIZE); 213 memset(ce, 0, sizeof(*ce)); 214 /* To make it simple, domain ID is the same as SID */ 215 ce->domain_id = sid; 216 /* We only test 39 bits width case (3-level paging) */ 217 ce->addr_width = VTD_CE_AW_39BIT; 218 ce->slptptr = virt_to_phys(slptptr) >> VTD_PAGE_SHIFT; 219 ce->trans_type = VTD_CONTEXT_TT_MULTI_LEVEL; 220 ce->present = 1; 221 /* No error reporting yet */ 222 ce->disable_fault_report = 1; 223 printf("allocated vt-d context entry for devfn %#x\n", 224 devfn); 225 } else 226 slptptr = phys_to_virt(ce->slptptr << VTD_PAGE_SHIFT); 227 228 while (size) { 229 /* TODO: currently we only map 4K pages (level = 1) */ 230 printf("map 4K page IOVA %#lx to %#lx (sid=%#06x)\n", 231 iova, pa, sid); 232 vtd_install_pte(slptptr, iova, pa, 1); 233 size -= VTD_PAGE_SIZE; 234 iova += VTD_PAGE_SIZE; 235 pa += VTD_PAGE_SIZE; 236 } 237 } 238 239 static uint16_t vtd_intr_index_alloc(void) 240 { 241 static volatile int index_ctr = 0; 242 int ctr; 243 244 assert(index_ctr < 65535); 245 ctr = atomic_inc_fetch(&index_ctr); 246 printf("INTR: alloc IRTE index %d\n", ctr); 247 return ctr; 248 } 249 250 static void vtd_setup_irte(struct pci_dev *dev, vtd_irte_t *irte, 251 int vector, int dest_id, trigger_mode_t trigger) 252 { 253 assert(sizeof(vtd_irte_t) == 16); 254 memset(irte, 0, sizeof(*irte)); 255 irte->fault_disable = 1; 256 irte->dest_mode = 0; /* physical */ 257 irte->trigger_mode = trigger; 258 irte->delivery_mode = 0; /* fixed */ 259 irte->irte_mode = 0; /* remapped */ 260 irte->vector = vector; 261 irte->dest_id = dest_id; 262 irte->source_id = dev->bdf; 263 irte->sid_q = 0; 264 irte->sid_vtype = 1; /* full-sid verify */ 265 irte->present = 1; 266 } 267 268 struct vtd_msi_addr { 269 uint32_t __dont_care:2; 270 uint32_t handle_15:1; /* handle[15] */ 271 uint32_t shv:1; 272 uint32_t interrupt_format:1; 273 uint32_t handle_0_14:15; /* handle[0:14] */ 274 uint32_t head:12; /* 0xfee */ 275 uint32_t addr_hi; /* not used except with x2apic */ 276 } __attribute__ ((packed)); 277 typedef struct vtd_msi_addr vtd_msi_addr_t; 278 279 struct vtd_msi_data { 280 uint16_t __reserved; 281 uint16_t subhandle; 282 } __attribute__ ((packed)); 283 typedef struct vtd_msi_data vtd_msi_data_t; 284 285 struct vtd_ioapic_entry { 286 uint64_t vector:8; 287 uint64_t __zeros:3; 288 uint64_t index_15:1; 289 uint64_t delivery_status:1; 290 uint64_t polarity:1; 291 uint64_t remote_irr:1; 292 uint64_t trigger_mode:1; 293 uint64_t mask:1; 294 uint64_t __zeros_2:31; 295 uint64_t interrupt_format:1; 296 uint64_t index_0_14:15; 297 } __attribute__ ((packed)); 298 typedef struct vtd_ioapic_entry vtd_ioapic_entry_t; 299 300 /** 301 * vtd_setup_msi - setup MSI message for a device 302 * 303 * @dev: PCI device to setup MSI 304 * @vector: interrupt vector 305 * @dest_id: destination processor 306 */ 307 bool vtd_setup_msi(struct pci_dev *dev, int vector, int dest_id) 308 { 309 vtd_msi_data_t msi_data = {}; 310 vtd_msi_addr_t msi_addr = {}; 311 vtd_irte_t *irte = phys_to_virt(vtd_ir_table()); 312 uint16_t index = vtd_intr_index_alloc(); 313 314 assert(sizeof(vtd_msi_addr_t) == 8); 315 assert(sizeof(vtd_msi_data_t) == 4); 316 317 /* Use edge irq as default */ 318 vtd_setup_irte(dev, irte + index, vector, 319 dest_id, TRIGGER_EDGE); 320 321 msi_addr.handle_15 = index >> 15 & 1; 322 msi_addr.shv = 0; 323 msi_addr.interrupt_format = 1; 324 msi_addr.handle_0_14 = index & 0x7fff; 325 msi_addr.head = 0xfee; 326 msi_data.subhandle = 0; 327 328 printf("%s: msi_addr=%#" PRIx64 ", msi_data=%#x\n", __func__, 329 *(uint64_t *)&msi_addr, *(uint32_t *)&msi_data); 330 331 return pci_setup_msi(dev, *(uint64_t *)&msi_addr, 332 *(uint32_t *)&msi_data); 333 } 334 335 void vtd_setup_ioapic_irq(struct pci_dev *dev, int vector, 336 int dest_id, trigger_mode_t trigger) 337 { 338 vtd_ioapic_entry_t entry = {}; 339 vtd_irte_t *irte = phys_to_virt(vtd_ir_table()); 340 ioapic_redir_entry_t *entry_2 = (ioapic_redir_entry_t *)&entry; 341 uint16_t index = vtd_intr_index_alloc(); 342 uint8_t line; 343 344 assert(dev); 345 assert(sizeof(vtd_ioapic_entry_t) == 8); 346 347 vtd_setup_irte(dev, irte + index, vector, 348 dest_id, trigger); 349 350 entry.vector = vector; 351 entry.trigger_mode = trigger; 352 entry.index_15 = (index >> 15) & 1; 353 entry.interrupt_format = 1; 354 entry.index_0_14 = index & 0x7fff; 355 356 line = pci_intx_line(dev); 357 ioapic_write_redir(line, *entry_2); 358 } 359 360 void vtd_init(void) 361 { 362 vtd_reg_base = ioremap(Q35_HOST_BRIDGE_IOMMU_ADDR, PAGE_SIZE); 363 364 vtd_dump_init_info(); 365 vtd_gcmd_or(VTD_GCMD_QI); /* Enable QI */ 366 vtd_setup_root_table(); 367 vtd_setup_ir_table(); 368 vtd_gcmd_or(VTD_GCMD_DMAR); /* Enable DMAR */ 369 vtd_gcmd_or(VTD_GCMD_IR); /* Enable IR */ 370 } 371