1 /* 2 * Intel IOMMU APIs 3 * 4 * Copyright (C) 2016 Red Hat, Inc. 5 * 6 * Authors: 7 * Peter Xu <peterx@redhat.com>, 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2 or 10 * later. 11 */ 12 13 #include "intel-iommu.h" 14 #include "libcflat.h" 15 #include "pci.h" 16 #include "atomic.h" 17 18 /* 19 * VT-d in QEMU currently only support 39 bits address width, which is 20 * 3-level translation. 21 */ 22 #define VTD_PAGE_LEVEL 3 23 #define VTD_CE_AW_39BIT 0x1 24 25 typedef uint64_t vtd_pte_t; 26 27 struct vtd_root_entry { 28 /* Quad 1 */ 29 uint64_t present:1; 30 uint64_t __reserved:11; 31 uint64_t context_table_p:52; 32 /* Quad 2 */ 33 uint64_t __reserved_2; 34 } __attribute__ ((packed)); 35 typedef struct vtd_root_entry vtd_re_t; 36 37 struct vtd_context_entry { 38 /* Quad 1 */ 39 uint64_t present:1; 40 uint64_t disable_fault_report:1; 41 uint64_t trans_type:2; 42 uint64_t __reserved:8; 43 uint64_t slptptr:52; 44 /* Quad 2 */ 45 uint64_t addr_width:3; 46 uint64_t __ignore:4; 47 uint64_t __reserved_2:1; 48 uint64_t domain_id:16; 49 uint64_t __reserved_3:40; 50 } __attribute__ ((packed)); 51 typedef struct vtd_context_entry vtd_ce_t; 52 53 struct vtd_irte { 54 uint32_t present:1; 55 uint32_t fault_disable:1; /* Fault Processing Disable */ 56 uint32_t dest_mode:1; /* Destination Mode */ 57 uint32_t redir_hint:1; /* Redirection Hint */ 58 uint32_t trigger_mode:1; /* Trigger Mode */ 59 uint32_t delivery_mode:3; /* Delivery Mode */ 60 uint32_t __avail:4; /* Available spaces for software */ 61 uint32_t __reserved_0:3; /* Reserved 0 */ 62 uint32_t irte_mode:1; /* IRTE Mode */ 63 uint32_t vector:8; /* Interrupt Vector */ 64 uint32_t __reserved_1:8; /* Reserved 1 */ 65 uint32_t dest_id; /* Destination ID */ 66 uint16_t source_id:16; /* Source-ID */ 67 uint64_t sid_q:2; /* Source-ID Qualifier */ 68 uint64_t sid_vtype:2; /* Source-ID Validation Type */ 69 uint64_t __reserved_2:44; /* Reserved 2 */ 70 } __attribute__ ((packed)); 71 typedef struct vtd_irte vtd_irte_t; 72 73 #define VTD_RTA_MASK (PAGE_MASK) 74 #define VTD_IRTA_MASK (PAGE_MASK) 75 76 void *vtd_reg_base; 77 78 static uint64_t vtd_root_table(void) 79 { 80 /* No extend root table support yet */ 81 return vtd_readq(DMAR_RTADDR_REG) & VTD_RTA_MASK; 82 } 83 84 static uint64_t vtd_ir_table(void) 85 { 86 return vtd_readq(DMAR_IRTA_REG) & VTD_IRTA_MASK; 87 } 88 89 static void vtd_gcmd_or(uint32_t cmd) 90 { 91 uint32_t status; 92 93 /* We only allow set one bit for each time */ 94 assert(is_power_of_2(cmd)); 95 96 status = vtd_readl(DMAR_GSTS_REG); 97 vtd_writel(DMAR_GCMD_REG, status | cmd); 98 99 if (cmd & VTD_GCMD_ONE_SHOT_BITS) { 100 /* One-shot bits are taking effect immediately */ 101 return; 102 } 103 104 /* Make sure IOMMU handled our command request */ 105 while (!(vtd_readl(DMAR_GSTS_REG) & cmd)) 106 cpu_relax(); 107 } 108 109 static void vtd_dump_init_info(void) 110 { 111 uint32_t version; 112 113 version = vtd_readl(DMAR_VER_REG); 114 115 /* Major version >= 1 */ 116 assert(((version >> 3) & 0xf) >= 1); 117 118 printf("VT-d version: 0x%x\n", version); 119 printf(" cap: 0x%016lx\n", vtd_readq(DMAR_CAP_REG)); 120 printf(" ecap: 0x%016lx\n", vtd_readq(DMAR_ECAP_REG)); 121 } 122 123 static void vtd_setup_root_table(void) 124 { 125 void *root = alloc_page(); 126 127 memset(root, 0, PAGE_SIZE); 128 vtd_writeq(DMAR_RTADDR_REG, virt_to_phys(root)); 129 vtd_gcmd_or(VTD_GCMD_ROOT); 130 printf("DMAR table address: 0x%016lx\n", vtd_root_table()); 131 } 132 133 static void vtd_setup_ir_table(void) 134 { 135 void *root = alloc_page(); 136 137 memset(root, 0, PAGE_SIZE); 138 /* 0xf stands for table size (2^(0xf+1) == 65536) */ 139 vtd_writeq(DMAR_IRTA_REG, virt_to_phys(root) | 0xf); 140 vtd_gcmd_or(VTD_GCMD_IR_TABLE); 141 printf("IR table address: 0x%016lx\n", vtd_ir_table()); 142 } 143 144 static void vtd_install_pte(vtd_pte_t *root, iova_t iova, 145 phys_addr_t pa, int level_target) 146 { 147 int level; 148 unsigned int offset; 149 void *page; 150 151 for (level = VTD_PAGE_LEVEL; level > level_target; level--) { 152 offset = PGDIR_OFFSET(iova, level); 153 if (!(root[offset] & VTD_PTE_RW)) { 154 page = alloc_page(); 155 memset(page, 0, PAGE_SIZE); 156 root[offset] = virt_to_phys(page) | VTD_PTE_RW; 157 } 158 root = (uint64_t *)(phys_to_virt(root[offset] & 159 VTD_PTE_ADDR)); 160 } 161 162 offset = PGDIR_OFFSET(iova, level); 163 root[offset] = pa | VTD_PTE_RW; 164 if (level != 1) { 165 /* This is huge page */ 166 root[offset] |= VTD_PTE_HUGE; 167 } 168 } 169 170 #define VTD_PHYS_TO_VIRT(x) \ 171 ((void *)(((uint64_t)phys_to_virt(x)) >> VTD_PAGE_SHIFT)) 172 173 /** 174 * vtd_map_range: setup IO address mapping for specific memory range 175 * 176 * @sid: source ID of the device to setup 177 * @iova: start IO virtual address 178 * @pa: start physical address 179 * @size: size of the mapping area 180 */ 181 void vtd_map_range(uint16_t sid, iova_t iova, phys_addr_t pa, size_t size) 182 { 183 uint8_t bus_n, devfn; 184 void *slptptr; 185 vtd_ce_t *ce; 186 vtd_re_t *re = phys_to_virt(vtd_root_table()); 187 188 assert(IS_ALIGNED(iova, SZ_4K)); 189 assert(IS_ALIGNED(pa, SZ_4K)); 190 assert(IS_ALIGNED(size, SZ_4K)); 191 192 bus_n = PCI_BDF_GET_BUS(sid); 193 devfn = PCI_BDF_GET_DEVFN(sid); 194 195 /* Point to the correct root entry */ 196 re += bus_n; 197 198 if (!re->present) { 199 ce = alloc_page(); 200 memset(ce, 0, PAGE_SIZE); 201 memset(re, 0, sizeof(*re)); 202 re->context_table_p = virt_to_phys(ce) >> VTD_PAGE_SHIFT; 203 re->present = 1; 204 printf("allocated vt-d root entry for PCI bus %d\n", 205 bus_n); 206 } else 207 ce = VTD_PHYS_TO_VIRT(re->context_table_p); 208 209 /* Point to the correct context entry */ 210 ce += devfn; 211 212 if (!ce->present) { 213 slptptr = alloc_page(); 214 memset(slptptr, 0, PAGE_SIZE); 215 memset(ce, 0, sizeof(*ce)); 216 /* To make it simple, domain ID is the same as SID */ 217 ce->domain_id = sid; 218 /* We only test 39 bits width case (3-level paging) */ 219 ce->addr_width = VTD_CE_AW_39BIT; 220 ce->slptptr = virt_to_phys(slptptr) >> VTD_PAGE_SHIFT; 221 ce->trans_type = VTD_CONTEXT_TT_MULTI_LEVEL; 222 ce->present = 1; 223 /* No error reporting yet */ 224 ce->disable_fault_report = 1; 225 printf("allocated vt-d context entry for devfn 0x%x\n", 226 devfn); 227 } else 228 slptptr = VTD_PHYS_TO_VIRT(ce->slptptr); 229 230 while (size) { 231 /* TODO: currently we only map 4K pages (level = 1) */ 232 printf("map 4K page IOVA 0x%lx to 0x%lx (sid=0x%04x)\n", 233 iova, pa, sid); 234 vtd_install_pte(slptptr, iova, pa, 1); 235 size -= VTD_PAGE_SIZE; 236 iova += VTD_PAGE_SIZE; 237 pa += VTD_PAGE_SIZE; 238 } 239 } 240 241 static uint16_t vtd_intr_index_alloc(void) 242 { 243 static volatile int index_ctr = 0; 244 int ctr; 245 246 assert(index_ctr < 65535); 247 ctr = atomic_inc_fetch(&index_ctr); 248 printf("INTR: alloc IRTE index %d\n", ctr); 249 return ctr; 250 } 251 252 static void vtd_setup_irte(struct pci_dev *dev, vtd_irte_t *irte, 253 int vector, int dest_id, trigger_mode_t trigger) 254 { 255 assert(sizeof(vtd_irte_t) == 16); 256 memset(irte, 0, sizeof(*irte)); 257 irte->fault_disable = 1; 258 irte->dest_mode = 0; /* physical */ 259 irte->trigger_mode = trigger; 260 irte->delivery_mode = 0; /* fixed */ 261 irte->irte_mode = 0; /* remapped */ 262 irte->vector = vector; 263 irte->dest_id = dest_id; 264 irte->source_id = dev->bdf; 265 irte->sid_q = 0; 266 irte->sid_vtype = 1; /* full-sid verify */ 267 irte->present = 1; 268 } 269 270 struct vtd_msi_addr { 271 uint32_t __dont_care:2; 272 uint32_t handle_15:1; /* handle[15] */ 273 uint32_t shv:1; 274 uint32_t interrupt_format:1; 275 uint32_t handle_0_14:15; /* handle[0:14] */ 276 uint32_t head:12; /* 0xfee */ 277 uint32_t addr_hi; /* not used except with x2apic */ 278 } __attribute__ ((packed)); 279 typedef struct vtd_msi_addr vtd_msi_addr_t; 280 281 struct vtd_msi_data { 282 uint16_t __reserved; 283 uint16_t subhandle; 284 } __attribute__ ((packed)); 285 typedef struct vtd_msi_data vtd_msi_data_t; 286 287 /** 288 * vtd_setup_msi - setup MSI message for a device 289 * 290 * @dev: PCI device to setup MSI 291 * @vector: interrupt vector 292 * @dest_id: destination processor 293 */ 294 bool vtd_setup_msi(struct pci_dev *dev, int vector, int dest_id) 295 { 296 vtd_msi_data_t msi_data = {}; 297 vtd_msi_addr_t msi_addr = {}; 298 vtd_irte_t *irte = phys_to_virt(vtd_ir_table()); 299 uint16_t index = vtd_intr_index_alloc(); 300 301 assert(sizeof(vtd_msi_addr_t) == 8); 302 assert(sizeof(vtd_msi_data_t) == 4); 303 304 /* Use edge irq as default */ 305 vtd_setup_irte(dev, irte + index, vector, 306 dest_id, TRIGGER_EDGE); 307 308 msi_addr.handle_15 = index >> 15 & 1; 309 msi_addr.shv = 0; 310 msi_addr.interrupt_format = 1; 311 msi_addr.handle_0_14 = index & 0x7fff; 312 msi_addr.head = 0xfee; 313 msi_data.subhandle = 0; 314 315 return pci_setup_msi(dev, *(uint64_t *)&msi_addr, 316 *(uint32_t *)&msi_data); 317 } 318 319 void vtd_init(void) 320 { 321 setup_vm(); 322 smp_init(); 323 324 vtd_reg_base = ioremap(Q35_HOST_BRIDGE_IOMMU_ADDR, PAGE_SIZE); 325 326 vtd_dump_init_info(); 327 vtd_gcmd_or(VTD_GCMD_QI); /* Enable QI */ 328 vtd_setup_root_table(); 329 vtd_setup_ir_table(); 330 vtd_gcmd_or(VTD_GCMD_DMAR); /* Enable DMAR */ 331 vtd_gcmd_or(VTD_GCMD_IR); /* Enable IR */ 332 } 333