1 /*
2 * Intel IOMMU APIs
3 *
4 * Copyright (C) 2016 Red Hat, Inc.
5 *
6 * Authors:
7 * Peter Xu <peterx@redhat.com>,
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2 or
10 * later.
11 */
12
13 #include "intel-iommu.h"
14 #include "libcflat.h"
15 #include "pci.h"
16 #include "atomic.h"
17 #include "alloc_page.h"
18
19 /*
20 * VT-d in QEMU currently only support 39 bits address width, which is
21 * 3-level translation.
22 */
23 #define VTD_PAGE_LEVEL 3
24 #define VTD_CE_AW_39BIT 0x1
25
26 typedef uint64_t vtd_pte_t;
27
28 struct vtd_root_entry {
29 /* Quad 1 */
30 uint64_t present:1;
31 uint64_t __reserved:11;
32 uint64_t context_table_p:52;
33 /* Quad 2 */
34 uint64_t __reserved_2;
35 } __attribute__ ((packed));
36 typedef struct vtd_root_entry vtd_re_t;
37
38 struct vtd_context_entry {
39 /* Quad 1 */
40 uint64_t present:1;
41 uint64_t disable_fault_report:1;
42 uint64_t trans_type:2;
43 uint64_t __reserved:8;
44 uint64_t slptptr:52;
45 /* Quad 2 */
46 uint64_t addr_width:3;
47 uint64_t __ignore:4;
48 uint64_t __reserved_2:1;
49 uint64_t domain_id:16;
50 uint64_t __reserved_3:40;
51 } __attribute__ ((packed));
52 typedef struct vtd_context_entry vtd_ce_t;
53
54 struct vtd_irte {
55 uint32_t present:1;
56 uint32_t fault_disable:1; /* Fault Processing Disable */
57 uint32_t dest_mode:1; /* Destination Mode */
58 uint32_t redir_hint:1; /* Redirection Hint */
59 uint32_t trigger_mode:1; /* Trigger Mode */
60 uint32_t delivery_mode:3; /* Delivery Mode */
61 uint32_t __avail:4; /* Available spaces for software */
62 uint32_t __reserved_0:3; /* Reserved 0 */
63 uint32_t irte_mode:1; /* IRTE Mode */
64 uint32_t vector:8; /* Interrupt Vector */
65 uint32_t __reserved_1:8; /* Reserved 1 */
66 uint32_t dest_id; /* Destination ID */
67 uint16_t source_id:16; /* Source-ID */
68 uint64_t sid_q:2; /* Source-ID Qualifier */
69 uint64_t sid_vtype:2; /* Source-ID Validation Type */
70 uint64_t __reserved_2:44; /* Reserved 2 */
71 } __attribute__ ((packed));
72 typedef struct vtd_irte vtd_irte_t;
73
74 #define VTD_RTA_MASK (PAGE_MASK)
75 #define VTD_IRTA_MASK (PAGE_MASK)
76
77 void *vtd_reg_base;
78
vtd_root_table(void)79 static uint64_t vtd_root_table(void)
80 {
81 /* No extend root table support yet */
82 return vtd_readq(DMAR_RTADDR_REG) & VTD_RTA_MASK;
83 }
84
vtd_ir_table(void)85 static uint64_t vtd_ir_table(void)
86 {
87 return vtd_readq(DMAR_IRTA_REG) & VTD_IRTA_MASK;
88 }
89
vtd_gcmd_or(uint32_t cmd)90 static void vtd_gcmd_or(uint32_t cmd)
91 {
92 uint32_t status;
93
94 /* We only allow set one bit for each time */
95 assert(is_power_of_2(cmd));
96
97 status = vtd_readl(DMAR_GSTS_REG);
98 vtd_writel(DMAR_GCMD_REG, status | cmd);
99
100 if (cmd & VTD_GCMD_ONE_SHOT_BITS) {
101 /* One-shot bits are taking effect immediately */
102 return;
103 }
104
105 /* Make sure IOMMU handled our command request */
106 while (!(vtd_readl(DMAR_GSTS_REG) & cmd))
107 cpu_relax();
108 }
109
vtd_dump_init_info(void)110 static void vtd_dump_init_info(void)
111 {
112 uint32_t version;
113
114 version = vtd_readl(DMAR_VER_REG);
115
116 /* Major version >= 1 */
117 assert(((version >> 3) & 0xf) >= 1);
118
119 printf("VT-d version: %#x\n", version);
120 printf(" cap: %#018lx\n", vtd_readq(DMAR_CAP_REG));
121 printf(" ecap: %#018lx\n", vtd_readq(DMAR_ECAP_REG));
122 }
123
vtd_setup_root_table(void)124 static void vtd_setup_root_table(void)
125 {
126 void *root = alloc_page();
127
128 vtd_writeq(DMAR_RTADDR_REG, virt_to_phys(root));
129 vtd_gcmd_or(VTD_GCMD_ROOT);
130 printf("DMAR table address: %#018lx\n", vtd_root_table());
131 }
132
vtd_setup_ir_table(void)133 static void vtd_setup_ir_table(void)
134 {
135 void *root = alloc_page();
136
137 /* 0xf stands for table size (2^(0xf+1) == 65536) */
138 vtd_writeq(DMAR_IRTA_REG, virt_to_phys(root) | 0xf);
139 vtd_gcmd_or(VTD_GCMD_IR_TABLE);
140 printf("IR table address: %#018lx\n", vtd_ir_table());
141 }
142
vtd_install_pte(vtd_pte_t * root,iova_t iova,phys_addr_t pa,int level_target)143 static void vtd_install_pte(vtd_pte_t *root, iova_t iova,
144 phys_addr_t pa, int level_target)
145 {
146 int level;
147 unsigned int offset;
148 void *page;
149
150 for (level = VTD_PAGE_LEVEL; level > level_target; level--) {
151 offset = PGDIR_OFFSET(iova, level);
152 if (!(root[offset] & VTD_PTE_RW)) {
153 page = alloc_page();
154 root[offset] = virt_to_phys(page) | VTD_PTE_RW;
155 }
156 root = (uint64_t *)(phys_to_virt(root[offset] &
157 VTD_PTE_ADDR));
158 }
159
160 offset = PGDIR_OFFSET(iova, level);
161 root[offset] = pa | VTD_PTE_RW;
162 if (level != 1) {
163 /* This is huge page */
164 root[offset] |= VTD_PTE_HUGE;
165 }
166 }
167
168 /**
169 * vtd_map_range: setup IO address mapping for specific memory range
170 *
171 * @sid: source ID of the device to setup
172 * @iova: start IO virtual address
173 * @pa: start physical address
174 * @size: size of the mapping area
175 */
vtd_map_range(uint16_t sid,iova_t iova,phys_addr_t pa,size_t size)176 void vtd_map_range(uint16_t sid, iova_t iova, phys_addr_t pa, size_t size)
177 {
178 uint8_t bus_n, devfn;
179 void *slptptr;
180 vtd_ce_t *ce;
181 vtd_re_t *re = phys_to_virt(vtd_root_table());
182
183 assert(IS_ALIGNED(iova, SZ_4K));
184 assert(IS_ALIGNED(pa, SZ_4K));
185 assert(IS_ALIGNED(size, SZ_4K));
186
187 bus_n = PCI_BDF_GET_BUS(sid);
188 devfn = PCI_BDF_GET_DEVFN(sid);
189
190 /* Point to the correct root entry */
191 re += bus_n;
192
193 if (!re->present) {
194 ce = alloc_page();
195 memset(re, 0, sizeof(*re));
196 re->context_table_p = virt_to_phys(ce) >> VTD_PAGE_SHIFT;
197 re->present = 1;
198 printf("allocated vt-d root entry for PCI bus %d\n",
199 bus_n);
200 } else
201 ce = phys_to_virt(re->context_table_p << VTD_PAGE_SHIFT);
202
203 /* Point to the correct context entry */
204 ce += devfn;
205
206 if (!ce->present) {
207 slptptr = alloc_page();
208 memset(ce, 0, sizeof(*ce));
209 /* To make it simple, domain ID is the same as SID */
210 ce->domain_id = sid;
211 /* We only test 39 bits width case (3-level paging) */
212 ce->addr_width = VTD_CE_AW_39BIT;
213 ce->slptptr = virt_to_phys(slptptr) >> VTD_PAGE_SHIFT;
214 ce->trans_type = VTD_CONTEXT_TT_MULTI_LEVEL;
215 ce->present = 1;
216 /* No error reporting yet */
217 ce->disable_fault_report = 1;
218 printf("allocated vt-d context entry for devfn %#x\n",
219 devfn);
220 } else
221 slptptr = phys_to_virt(ce->slptptr << VTD_PAGE_SHIFT);
222
223 while (size) {
224 /* TODO: currently we only map 4K pages (level = 1) */
225 printf("map 4K page IOVA %#lx to %#lx (sid=%#06x)\n",
226 iova, pa, sid);
227 vtd_install_pte(slptptr, iova, pa, 1);
228 size -= VTD_PAGE_SIZE;
229 iova += VTD_PAGE_SIZE;
230 pa += VTD_PAGE_SIZE;
231 }
232 }
233
vtd_intr_index_alloc(void)234 static uint16_t vtd_intr_index_alloc(void)
235 {
236 static volatile int index_ctr = 0;
237 int ctr;
238
239 assert(index_ctr < 65535);
240 ctr = atomic_inc_fetch(&index_ctr);
241 printf("INTR: alloc IRTE index %d\n", ctr);
242 return ctr;
243 }
244
vtd_setup_irte(struct pci_dev * dev,vtd_irte_t * irte,int vector,int dest_id,trigger_mode_t trigger)245 static void vtd_setup_irte(struct pci_dev *dev, vtd_irte_t *irte,
246 int vector, int dest_id, trigger_mode_t trigger)
247 {
248 assert(sizeof(vtd_irte_t) == 16);
249 memset(irte, 0, sizeof(*irte));
250 irte->fault_disable = 1;
251 irte->dest_mode = 0; /* physical */
252 irte->trigger_mode = trigger;
253 irte->delivery_mode = 0; /* fixed */
254 irte->irte_mode = 0; /* remapped */
255 irte->vector = vector;
256 irte->dest_id = dest_id;
257 irte->source_id = dev->bdf;
258 irte->sid_q = 0;
259 irte->sid_vtype = 1; /* full-sid verify */
260 irte->present = 1;
261 }
262
263 struct vtd_msi_addr {
264 uint32_t __dont_care:2;
265 uint32_t handle_15:1; /* handle[15] */
266 uint32_t shv:1;
267 uint32_t interrupt_format:1;
268 uint32_t handle_0_14:15; /* handle[0:14] */
269 uint32_t head:12; /* 0xfee */
270 uint32_t addr_hi; /* not used except with x2apic */
271 } __attribute__ ((packed));
272 typedef struct vtd_msi_addr vtd_msi_addr_t;
273
274 struct vtd_msi_data {
275 uint16_t __reserved;
276 uint16_t subhandle;
277 } __attribute__ ((packed));
278 typedef struct vtd_msi_data vtd_msi_data_t;
279
280 struct vtd_ioapic_entry {
281 uint64_t vector:8;
282 uint64_t __zeros:3;
283 uint64_t index_15:1;
284 uint64_t delivery_status:1;
285 uint64_t polarity:1;
286 uint64_t remote_irr:1;
287 uint64_t trigger_mode:1;
288 uint64_t mask:1;
289 uint64_t __zeros_2:31;
290 uint64_t interrupt_format:1;
291 uint64_t index_0_14:15;
292 } __attribute__ ((packed));
293 typedef struct vtd_ioapic_entry vtd_ioapic_entry_t;
294
295 /**
296 * vtd_setup_msi - setup MSI message for a device
297 *
298 * @dev: PCI device to setup MSI
299 * @vector: interrupt vector
300 * @dest_id: destination processor
301 */
vtd_setup_msi(struct pci_dev * dev,int vector,int dest_id)302 bool vtd_setup_msi(struct pci_dev *dev, int vector, int dest_id)
303 {
304 vtd_msi_data_t msi_data = {};
305 vtd_msi_addr_t msi_addr = {};
306 vtd_irte_t *irte = phys_to_virt(vtd_ir_table());
307 uint16_t index = vtd_intr_index_alloc();
308
309 assert(sizeof(vtd_msi_addr_t) == 8);
310 assert(sizeof(vtd_msi_data_t) == 4);
311
312 /* Use edge irq as default */
313 vtd_setup_irte(dev, irte + index, vector,
314 dest_id, TRIGGER_EDGE);
315
316 msi_addr.handle_15 = index >> 15 & 1;
317 msi_addr.shv = 0;
318 msi_addr.interrupt_format = 1;
319 msi_addr.handle_0_14 = index & 0x7fff;
320 msi_addr.head = 0xfee;
321 msi_data.subhandle = 0;
322
323 printf("%s: msi_addr=%#" PRIx64 ", msi_data=%#x\n", __func__,
324 *(uint64_t *)&msi_addr, *(uint32_t *)&msi_data);
325
326 return pci_setup_msi(dev, *(uint64_t *)&msi_addr,
327 *(uint32_t *)&msi_data);
328 }
329
vtd_setup_ioapic_irq(struct pci_dev * dev,int vector,int dest_id,trigger_mode_t trigger)330 void vtd_setup_ioapic_irq(struct pci_dev *dev, int vector,
331 int dest_id, trigger_mode_t trigger)
332 {
333 vtd_ioapic_entry_t entry = {};
334 vtd_irte_t *irte = phys_to_virt(vtd_ir_table());
335 ioapic_redir_entry_t *entry_2 = (ioapic_redir_entry_t *)&entry;
336 uint16_t index = vtd_intr_index_alloc();
337 uint8_t line;
338
339 assert(dev);
340 assert(sizeof(vtd_ioapic_entry_t) == 8);
341
342 vtd_setup_irte(dev, irte + index, vector,
343 dest_id, trigger);
344
345 entry.vector = vector;
346 entry.trigger_mode = trigger;
347 entry.index_15 = (index >> 15) & 1;
348 entry.interrupt_format = 1;
349 entry.index_0_14 = index & 0x7fff;
350
351 line = pci_intx_line(dev);
352 ioapic_write_redir(line, *entry_2);
353 }
354
vtd_init(void)355 void vtd_init(void)
356 {
357 vtd_reg_base = ioremap(Q35_HOST_BRIDGE_IOMMU_ADDR, PAGE_SIZE);
358
359 vtd_dump_init_info();
360 vtd_gcmd_or(VTD_GCMD_QI); /* Enable QI */
361 vtd_setup_root_table();
362 vtd_setup_ir_table();
363 vtd_gcmd_or(VTD_GCMD_DMAR); /* Enable DMAR */
364 vtd_gcmd_or(VTD_GCMD_IR); /* Enable IR */
365 }
366