xref: /kvm-unit-tests/lib/x86/intel-iommu.c (revision 4363f1d9a646a5c7ea673bee8fc33ca6f2cddbd8)
1 /*
2  * Intel IOMMU APIs
3  *
4  * Copyright (C) 2016 Red Hat, Inc.
5  *
6  * Authors:
7  *   Peter Xu <peterx@redhat.com>,
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2 or
10  * later.
11  */
12 
13 #include "intel-iommu.h"
14 #include "libcflat.h"
15 #include "pci.h"
16 #include "atomic.h"
17 #include "alloc_page.h"
18 
19 /*
20  * VT-d in QEMU currently only support 39 bits address width, which is
21  * 3-level translation.
22  */
23 #define VTD_PAGE_LEVEL      3
24 #define VTD_CE_AW_39BIT     0x1
25 
26 typedef uint64_t vtd_pte_t;
27 
28 struct vtd_root_entry {
29 	/* Quad 1 */
30 	uint64_t present:1;
31 	uint64_t __reserved:11;
32 	uint64_t context_table_p:52;
33 	/* Quad 2 */
34 	uint64_t __reserved_2;
35 } __attribute__ ((packed));
36 typedef struct vtd_root_entry vtd_re_t;
37 
38 struct vtd_context_entry {
39 	/* Quad 1 */
40 	uint64_t present:1;
41 	uint64_t disable_fault_report:1;
42 	uint64_t trans_type:2;
43 	uint64_t __reserved:8;
44 	uint64_t slptptr:52;
45 	/* Quad 2 */
46 	uint64_t addr_width:3;
47 	uint64_t __ignore:4;
48 	uint64_t __reserved_2:1;
49 	uint64_t domain_id:16;
50 	uint64_t __reserved_3:40;
51 } __attribute__ ((packed));
52 typedef struct vtd_context_entry vtd_ce_t;
53 
54 struct vtd_irte {
55 	uint32_t present:1;
56 	uint32_t fault_disable:1;    /* Fault Processing Disable */
57 	uint32_t dest_mode:1;        /* Destination Mode */
58 	uint32_t redir_hint:1;       /* Redirection Hint */
59 	uint32_t trigger_mode:1;     /* Trigger Mode */
60 	uint32_t delivery_mode:3;    /* Delivery Mode */
61 	uint32_t __avail:4;          /* Available spaces for software */
62 	uint32_t __reserved_0:3;     /* Reserved 0 */
63 	uint32_t irte_mode:1;        /* IRTE Mode */
64 	uint32_t vector:8;           /* Interrupt Vector */
65 	uint32_t __reserved_1:8;     /* Reserved 1 */
66 	uint32_t dest_id;            /* Destination ID */
67 	uint16_t source_id:16;       /* Source-ID */
68 	uint64_t sid_q:2;            /* Source-ID Qualifier */
69 	uint64_t sid_vtype:2;        /* Source-ID Validation Type */
70 	uint64_t __reserved_2:44;    /* Reserved 2 */
71 } __attribute__ ((packed));
72 typedef struct vtd_irte vtd_irte_t;
73 
74 #define VTD_RTA_MASK  (PAGE_MASK)
75 #define VTD_IRTA_MASK (PAGE_MASK)
76 
77 void *vtd_reg_base;
78 
79 static uint64_t vtd_root_table(void)
80 {
81 	/* No extend root table support yet */
82 	return vtd_readq(DMAR_RTADDR_REG) & VTD_RTA_MASK;
83 }
84 
85 static uint64_t vtd_ir_table(void)
86 {
87 	return vtd_readq(DMAR_IRTA_REG) & VTD_IRTA_MASK;
88 }
89 
90 static void vtd_gcmd_or(uint32_t cmd)
91 {
92 	uint32_t status;
93 
94 	/* We only allow set one bit for each time */
95 	assert(is_power_of_2(cmd));
96 
97 	status = vtd_readl(DMAR_GSTS_REG);
98 	vtd_writel(DMAR_GCMD_REG, status | cmd);
99 
100 	if (cmd & VTD_GCMD_ONE_SHOT_BITS) {
101 		/* One-shot bits are taking effect immediately */
102 		return;
103 	}
104 
105 	/* Make sure IOMMU handled our command request */
106 	while (!(vtd_readl(DMAR_GSTS_REG) & cmd))
107 		cpu_relax();
108 }
109 
110 static void vtd_dump_init_info(void)
111 {
112 	uint32_t version;
113 
114 	version = vtd_readl(DMAR_VER_REG);
115 
116 	/* Major version >= 1 */
117 	assert(((version >> 3) & 0xf) >= 1);
118 
119 	printf("VT-d version:   %#x\n", version);
120 	printf("     cap:       %#018lx\n", vtd_readq(DMAR_CAP_REG));
121 	printf("     ecap:      %#018lx\n", vtd_readq(DMAR_ECAP_REG));
122 }
123 
124 static void vtd_setup_root_table(void)
125 {
126 	void *root = alloc_page();
127 
128 	memset(root, 0, PAGE_SIZE);
129 	vtd_writeq(DMAR_RTADDR_REG, virt_to_phys(root));
130 	vtd_gcmd_or(VTD_GCMD_ROOT);
131 	printf("DMAR table address: %#018lx\n", vtd_root_table());
132 }
133 
134 static void vtd_setup_ir_table(void)
135 {
136 	void *root = alloc_page();
137 
138 	memset(root, 0, PAGE_SIZE);
139 	/* 0xf stands for table size (2^(0xf+1) == 65536) */
140 	vtd_writeq(DMAR_IRTA_REG, virt_to_phys(root) | 0xf);
141 	vtd_gcmd_or(VTD_GCMD_IR_TABLE);
142 	printf("IR table address: %#018lx\n", vtd_ir_table());
143 }
144 
145 static void vtd_install_pte(vtd_pte_t *root, iova_t iova,
146 			    phys_addr_t pa, int level_target)
147 {
148 	int level;
149 	unsigned int offset;
150 	void *page;
151 
152 	for (level = VTD_PAGE_LEVEL; level > level_target; level--) {
153 		offset = PGDIR_OFFSET(iova, level);
154 		if (!(root[offset] & VTD_PTE_RW)) {
155 			page = alloc_page();
156 			memset(page, 0, PAGE_SIZE);
157 			root[offset] = virt_to_phys(page) | VTD_PTE_RW;
158 		}
159 		root = (uint64_t *)(phys_to_virt(root[offset] &
160 						 VTD_PTE_ADDR));
161 	}
162 
163 	offset = PGDIR_OFFSET(iova, level);
164 	root[offset] = pa | VTD_PTE_RW;
165 	if (level != 1) {
166 		/* This is huge page */
167 		root[offset] |= VTD_PTE_HUGE;
168 	}
169 }
170 
171 /**
172  * vtd_map_range: setup IO address mapping for specific memory range
173  *
174  * @sid: source ID of the device to setup
175  * @iova: start IO virtual address
176  * @pa: start physical address
177  * @size: size of the mapping area
178  */
179 void vtd_map_range(uint16_t sid, iova_t iova, phys_addr_t pa, size_t size)
180 {
181 	uint8_t bus_n, devfn;
182 	void *slptptr;
183 	vtd_ce_t *ce;
184 	vtd_re_t *re = phys_to_virt(vtd_root_table());
185 
186 	assert(IS_ALIGNED(iova, SZ_4K));
187 	assert(IS_ALIGNED(pa, SZ_4K));
188 	assert(IS_ALIGNED(size, SZ_4K));
189 
190 	bus_n = PCI_BDF_GET_BUS(sid);
191 	devfn = PCI_BDF_GET_DEVFN(sid);
192 
193 	/* Point to the correct root entry */
194 	re += bus_n;
195 
196 	if (!re->present) {
197 		ce = alloc_page();
198 		memset(ce, 0, PAGE_SIZE);
199 		memset(re, 0, sizeof(*re));
200 		re->context_table_p = virt_to_phys(ce) >> VTD_PAGE_SHIFT;
201 		re->present = 1;
202 		printf("allocated vt-d root entry for PCI bus %d\n",
203 		       bus_n);
204 	} else
205 		ce = phys_to_virt(re->context_table_p << VTD_PAGE_SHIFT);
206 
207 	/* Point to the correct context entry */
208 	ce += devfn;
209 
210 	if (!ce->present) {
211 		slptptr = alloc_page();
212 		memset(slptptr, 0, PAGE_SIZE);
213 		memset(ce, 0, sizeof(*ce));
214 		/* To make it simple, domain ID is the same as SID */
215 		ce->domain_id = sid;
216 		/* We only test 39 bits width case (3-level paging) */
217 		ce->addr_width = VTD_CE_AW_39BIT;
218 		ce->slptptr = virt_to_phys(slptptr) >> VTD_PAGE_SHIFT;
219 		ce->trans_type = VTD_CONTEXT_TT_MULTI_LEVEL;
220 		ce->present = 1;
221 		/* No error reporting yet */
222 		ce->disable_fault_report = 1;
223 		printf("allocated vt-d context entry for devfn %#x\n",
224 		       devfn);
225 	} else
226 		slptptr = phys_to_virt(ce->slptptr << VTD_PAGE_SHIFT);
227 
228 	while (size) {
229 		/* TODO: currently we only map 4K pages (level = 1) */
230 		printf("map 4K page IOVA %#lx to %#lx (sid=%#06x)\n",
231 		       iova, pa, sid);
232 		vtd_install_pte(slptptr, iova, pa, 1);
233 		size -= VTD_PAGE_SIZE;
234 		iova += VTD_PAGE_SIZE;
235 		pa += VTD_PAGE_SIZE;
236 	}
237 }
238 
239 static uint16_t vtd_intr_index_alloc(void)
240 {
241 	static volatile int index_ctr = 0;
242 	int ctr;
243 
244 	assert(index_ctr < 65535);
245 	ctr = atomic_inc_fetch(&index_ctr);
246 	printf("INTR: alloc IRTE index %d\n", ctr);
247 	return ctr;
248 }
249 
250 static void vtd_setup_irte(struct pci_dev *dev, vtd_irte_t *irte,
251 			   int vector, int dest_id, trigger_mode_t trigger)
252 {
253 	assert(sizeof(vtd_irte_t) == 16);
254 	memset(irte, 0, sizeof(*irte));
255 	irte->fault_disable = 1;
256 	irte->dest_mode = 0;	 /* physical */
257 	irte->trigger_mode = trigger;
258 	irte->delivery_mode = 0; /* fixed */
259 	irte->irte_mode = 0;	 /* remapped */
260 	irte->vector = vector;
261 	irte->dest_id = dest_id;
262 	irte->source_id = dev->bdf;
263 	irte->sid_q = 0;
264 	irte->sid_vtype = 1;     /* full-sid verify */
265 	irte->present = 1;
266 }
267 
268 struct vtd_msi_addr {
269 	uint32_t __dont_care:2;
270 	uint32_t handle_15:1;	 /* handle[15] */
271 	uint32_t shv:1;
272 	uint32_t interrupt_format:1;
273 	uint32_t handle_0_14:15; /* handle[0:14] */
274 	uint32_t head:12;	 /* 0xfee */
275 	uint32_t addr_hi;	 /* not used except with x2apic */
276 } __attribute__ ((packed));
277 typedef struct vtd_msi_addr vtd_msi_addr_t;
278 
279 struct vtd_msi_data {
280 	uint16_t __reserved;
281 	uint16_t subhandle;
282 } __attribute__ ((packed));
283 typedef struct vtd_msi_data vtd_msi_data_t;
284 
285 struct vtd_ioapic_entry {
286 	uint64_t vector:8;
287 	uint64_t __zeros:3;
288 	uint64_t index_15:1;
289 	uint64_t delivery_status:1;
290 	uint64_t polarity:1;
291 	uint64_t remote_irr:1;
292 	uint64_t trigger_mode:1;
293 	uint64_t mask:1;
294 	uint64_t __zeros_2:31;
295 	uint64_t interrupt_format:1;
296 	uint64_t index_0_14:15;
297 } __attribute__ ((packed));
298 typedef struct vtd_ioapic_entry vtd_ioapic_entry_t;
299 
300 /**
301  * vtd_setup_msi - setup MSI message for a device
302  *
303  * @dev: PCI device to setup MSI
304  * @vector: interrupt vector
305  * @dest_id: destination processor
306  */
307 bool vtd_setup_msi(struct pci_dev *dev, int vector, int dest_id)
308 {
309 	vtd_msi_data_t msi_data = {};
310 	vtd_msi_addr_t msi_addr = {};
311 	vtd_irte_t *irte = phys_to_virt(vtd_ir_table());
312 	uint16_t index = vtd_intr_index_alloc();
313 
314 	assert(sizeof(vtd_msi_addr_t) == 8);
315 	assert(sizeof(vtd_msi_data_t) == 4);
316 
317 	/* Use edge irq as default */
318 	vtd_setup_irte(dev, irte + index, vector,
319 		       dest_id, TRIGGER_EDGE);
320 
321 	msi_addr.handle_15 = index >> 15 & 1;
322 	msi_addr.shv = 0;
323 	msi_addr.interrupt_format = 1;
324 	msi_addr.handle_0_14 = index & 0x7fff;
325 	msi_addr.head = 0xfee;
326 	msi_data.subhandle = 0;
327 
328 	printf("%s: msi_addr=%#" PRIx64 ", msi_data=%#x\n", __func__,
329 		*(uint64_t *)&msi_addr, *(uint32_t *)&msi_data);
330 
331 	return pci_setup_msi(dev, *(uint64_t *)&msi_addr,
332 			     *(uint32_t *)&msi_data);
333 }
334 
335 void vtd_setup_ioapic_irq(struct pci_dev *dev, int vector,
336 			  int dest_id, trigger_mode_t trigger)
337 {
338 	vtd_ioapic_entry_t entry = {};
339 	vtd_irte_t *irte = phys_to_virt(vtd_ir_table());
340 	ioapic_redir_entry_t *entry_2 = (ioapic_redir_entry_t *)&entry;
341 	uint16_t index = vtd_intr_index_alloc();
342 	uint8_t line;
343 
344 	assert(dev);
345 	assert(sizeof(vtd_ioapic_entry_t) == 8);
346 
347 	vtd_setup_irte(dev, irte + index, vector,
348 		       dest_id, trigger);
349 
350 	entry.vector = vector;
351 	entry.trigger_mode = trigger;
352 	entry.index_15 = (index >> 15) & 1;
353 	entry.interrupt_format = 1;
354 	entry.index_0_14 = index & 0x7fff;
355 
356 	line = pci_intx_line(dev);
357 	ioapic_write_redir(line, *entry_2);
358 }
359 
360 void vtd_init(void)
361 {
362 	vtd_reg_base = ioremap(Q35_HOST_BRIDGE_IOMMU_ADDR, PAGE_SIZE);
363 
364 	vtd_dump_init_info();
365 	vtd_gcmd_or(VTD_GCMD_QI); /* Enable QI */
366 	vtd_setup_root_table();
367 	vtd_setup_ir_table();
368 	vtd_gcmd_or(VTD_GCMD_DMAR); /* Enable DMAR */
369 	vtd_gcmd_or(VTD_GCMD_IR);   /* Enable IR */
370 }
371