xref: /kvm-unit-tests/lib/x86/intel-iommu.c (revision 560586d9605558e455cc82281cb97bda04f0dc5f)
1 /*
2  * Intel IOMMU APIs
3  *
4  * Copyright (C) 2016 Red Hat, Inc.
5  *
6  * Authors:
7  *   Peter Xu <peterx@redhat.com>,
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2 or
10  * later.
11  */
12 
13 #include "intel-iommu.h"
14 #include "libcflat.h"
15 #include "pci.h"
16 #include "atomic.h"
17 
18 /*
19  * VT-d in QEMU currently only support 39 bits address width, which is
20  * 3-level translation.
21  */
22 #define VTD_PAGE_LEVEL      3
23 #define VTD_CE_AW_39BIT     0x1
24 
25 typedef uint64_t vtd_pte_t;
26 
27 struct vtd_root_entry {
28 	/* Quad 1 */
29 	uint64_t present:1;
30 	uint64_t __reserved:11;
31 	uint64_t context_table_p:52;
32 	/* Quad 2 */
33 	uint64_t __reserved_2;
34 } __attribute__ ((packed));
35 typedef struct vtd_root_entry vtd_re_t;
36 
37 struct vtd_context_entry {
38 	/* Quad 1 */
39 	uint64_t present:1;
40 	uint64_t disable_fault_report:1;
41 	uint64_t trans_type:2;
42 	uint64_t __reserved:8;
43 	uint64_t slptptr:52;
44 	/* Quad 2 */
45 	uint64_t addr_width:3;
46 	uint64_t __ignore:4;
47 	uint64_t __reserved_2:1;
48 	uint64_t domain_id:16;
49 	uint64_t __reserved_3:40;
50 } __attribute__ ((packed));
51 typedef struct vtd_context_entry vtd_ce_t;
52 
53 struct vtd_irte {
54 	uint32_t present:1;
55 	uint32_t fault_disable:1;    /* Fault Processing Disable */
56 	uint32_t dest_mode:1;        /* Destination Mode */
57 	uint32_t redir_hint:1;       /* Redirection Hint */
58 	uint32_t trigger_mode:1;     /* Trigger Mode */
59 	uint32_t delivery_mode:3;    /* Delivery Mode */
60 	uint32_t __avail:4;          /* Available spaces for software */
61 	uint32_t __reserved_0:3;     /* Reserved 0 */
62 	uint32_t irte_mode:1;        /* IRTE Mode */
63 	uint32_t vector:8;           /* Interrupt Vector */
64 	uint32_t __reserved_1:8;     /* Reserved 1 */
65 	uint32_t dest_id;            /* Destination ID */
66 	uint16_t source_id:16;       /* Source-ID */
67 	uint64_t sid_q:2;            /* Source-ID Qualifier */
68 	uint64_t sid_vtype:2;        /* Source-ID Validation Type */
69 	uint64_t __reserved_2:44;    /* Reserved 2 */
70 } __attribute__ ((packed));
71 typedef struct vtd_irte vtd_irte_t;
72 
73 #define VTD_RTA_MASK  (PAGE_MASK)
74 #define VTD_IRTA_MASK (PAGE_MASK)
75 
76 void *vtd_reg_base;
77 
78 static uint64_t vtd_root_table(void)
79 {
80 	/* No extend root table support yet */
81 	return vtd_readq(DMAR_RTADDR_REG) & VTD_RTA_MASK;
82 }
83 
84 static uint64_t vtd_ir_table(void)
85 {
86 	return vtd_readq(DMAR_IRTA_REG) & VTD_IRTA_MASK;
87 }
88 
89 static void vtd_gcmd_or(uint32_t cmd)
90 {
91 	uint32_t status;
92 
93 	/* We only allow set one bit for each time */
94 	assert(is_power_of_2(cmd));
95 
96 	status = vtd_readl(DMAR_GSTS_REG);
97 	vtd_writel(DMAR_GCMD_REG, status | cmd);
98 
99 	if (cmd & VTD_GCMD_ONE_SHOT_BITS) {
100 		/* One-shot bits are taking effect immediately */
101 		return;
102 	}
103 
104 	/* Make sure IOMMU handled our command request */
105 	while (!(vtd_readl(DMAR_GSTS_REG) & cmd))
106 		cpu_relax();
107 }
108 
109 static void vtd_dump_init_info(void)
110 {
111 	uint32_t version;
112 
113 	version = vtd_readl(DMAR_VER_REG);
114 
115 	/* Major version >= 1 */
116 	assert(((version >> 3) & 0xf) >= 1);
117 
118 	printf("VT-d version:   0x%x\n", version);
119 	printf("     cap:       0x%016lx\n", vtd_readq(DMAR_CAP_REG));
120 	printf("     ecap:      0x%016lx\n", vtd_readq(DMAR_ECAP_REG));
121 }
122 
123 static void vtd_setup_root_table(void)
124 {
125 	void *root = alloc_page();
126 
127 	memset(root, 0, PAGE_SIZE);
128 	vtd_writeq(DMAR_RTADDR_REG, virt_to_phys(root));
129 	vtd_gcmd_or(VTD_GCMD_ROOT);
130 	printf("DMAR table address: 0x%016lx\n", vtd_root_table());
131 }
132 
133 static void vtd_setup_ir_table(void)
134 {
135 	void *root = alloc_page();
136 
137 	memset(root, 0, PAGE_SIZE);
138 	/* 0xf stands for table size (2^(0xf+1) == 65536) */
139 	vtd_writeq(DMAR_IRTA_REG, virt_to_phys(root) | 0xf);
140 	vtd_gcmd_or(VTD_GCMD_IR_TABLE);
141 	printf("IR table address: 0x%016lx\n", vtd_ir_table());
142 }
143 
144 static void vtd_install_pte(vtd_pte_t *root, iova_t iova,
145 			    phys_addr_t pa, int level_target)
146 {
147 	int level;
148 	unsigned int offset;
149 	void *page;
150 
151 	for (level = VTD_PAGE_LEVEL; level > level_target; level--) {
152 		offset = PGDIR_OFFSET(iova, level);
153 		if (!(root[offset] & VTD_PTE_RW)) {
154 			page = alloc_page();
155 			memset(page, 0, PAGE_SIZE);
156 			root[offset] = virt_to_phys(page) | VTD_PTE_RW;
157 		}
158 		root = (uint64_t *)(phys_to_virt(root[offset] &
159 						 VTD_PTE_ADDR));
160 	}
161 
162 	offset = PGDIR_OFFSET(iova, level);
163 	root[offset] = pa | VTD_PTE_RW;
164 	if (level != 1) {
165 		/* This is huge page */
166 		root[offset] |= VTD_PTE_HUGE;
167 	}
168 }
169 
170 #define  VTD_PHYS_TO_VIRT(x) \
171 	((void *)(((uint64_t)phys_to_virt(x)) >> VTD_PAGE_SHIFT))
172 
173 /**
174  * vtd_map_range: setup IO address mapping for specific memory range
175  *
176  * @sid: source ID of the device to setup
177  * @iova: start IO virtual address
178  * @pa: start physical address
179  * @size: size of the mapping area
180  */
181 void vtd_map_range(uint16_t sid, iova_t iova, phys_addr_t pa, size_t size)
182 {
183 	uint8_t bus_n, devfn;
184 	void *slptptr;
185 	vtd_ce_t *ce;
186 	vtd_re_t *re = phys_to_virt(vtd_root_table());
187 
188 	assert(IS_ALIGNED(iova, SZ_4K));
189 	assert(IS_ALIGNED(pa, SZ_4K));
190 	assert(IS_ALIGNED(size, SZ_4K));
191 
192 	bus_n = PCI_BDF_GET_BUS(sid);
193 	devfn = PCI_BDF_GET_DEVFN(sid);
194 
195 	/* Point to the correct root entry */
196 	re += bus_n;
197 
198 	if (!re->present) {
199 		ce = alloc_page();
200 		memset(ce, 0, PAGE_SIZE);
201 		memset(re, 0, sizeof(*re));
202 		re->context_table_p = virt_to_phys(ce) >> VTD_PAGE_SHIFT;
203 		re->present = 1;
204 		printf("allocated vt-d root entry for PCI bus %d\n",
205 		       bus_n);
206 	} else
207 		ce = VTD_PHYS_TO_VIRT(re->context_table_p);
208 
209 	/* Point to the correct context entry */
210 	ce += devfn;
211 
212 	if (!ce->present) {
213 		slptptr = alloc_page();
214 		memset(slptptr, 0, PAGE_SIZE);
215 		memset(ce, 0, sizeof(*ce));
216 		/* To make it simple, domain ID is the same as SID */
217 		ce->domain_id = sid;
218 		/* We only test 39 bits width case (3-level paging) */
219 		ce->addr_width = VTD_CE_AW_39BIT;
220 		ce->slptptr = virt_to_phys(slptptr) >> VTD_PAGE_SHIFT;
221 		ce->trans_type = VTD_CONTEXT_TT_MULTI_LEVEL;
222 		ce->present = 1;
223 		/* No error reporting yet */
224 		ce->disable_fault_report = 1;
225 		printf("allocated vt-d context entry for devfn 0x%x\n",
226 		       devfn);
227 	} else
228 		slptptr = VTD_PHYS_TO_VIRT(ce->slptptr);
229 
230 	while (size) {
231 		/* TODO: currently we only map 4K pages (level = 1) */
232 		printf("map 4K page IOVA 0x%lx to 0x%lx (sid=0x%04x)\n",
233 		       iova, pa, sid);
234 		vtd_install_pte(slptptr, iova, pa, 1);
235 		size -= VTD_PAGE_SIZE;
236 		iova += VTD_PAGE_SIZE;
237 		pa += VTD_PAGE_SIZE;
238 	}
239 }
240 
241 static uint16_t vtd_intr_index_alloc(void)
242 {
243 	static volatile int index_ctr = 0;
244 	int ctr;
245 
246 	assert(index_ctr < 65535);
247 	ctr = atomic_inc_fetch(&index_ctr);
248 	printf("INTR: alloc IRTE index %d\n", ctr);
249 	return ctr;
250 }
251 
252 static void vtd_setup_irte(struct pci_dev *dev, vtd_irte_t *irte,
253 			   int vector, int dest_id, trigger_mode_t trigger)
254 {
255 	assert(sizeof(vtd_irte_t) == 16);
256 	memset(irte, 0, sizeof(*irte));
257 	irte->fault_disable = 1;
258 	irte->dest_mode = 0;	 /* physical */
259 	irte->trigger_mode = trigger;
260 	irte->delivery_mode = 0; /* fixed */
261 	irte->irte_mode = 0;	 /* remapped */
262 	irte->vector = vector;
263 	irte->dest_id = dest_id;
264 	irte->source_id = dev->bdf;
265 	irte->sid_q = 0;
266 	irte->sid_vtype = 1;     /* full-sid verify */
267 	irte->present = 1;
268 }
269 
270 struct vtd_msi_addr {
271 	uint32_t __dont_care:2;
272 	uint32_t handle_15:1;	 /* handle[15] */
273 	uint32_t shv:1;
274 	uint32_t interrupt_format:1;
275 	uint32_t handle_0_14:15; /* handle[0:14] */
276 	uint32_t head:12;	 /* 0xfee */
277 	uint32_t addr_hi;	 /* not used except with x2apic */
278 } __attribute__ ((packed));
279 typedef struct vtd_msi_addr vtd_msi_addr_t;
280 
281 struct vtd_msi_data {
282 	uint16_t __reserved;
283 	uint16_t subhandle;
284 } __attribute__ ((packed));
285 typedef struct vtd_msi_data vtd_msi_data_t;
286 
287 /**
288  * vtd_setup_msi - setup MSI message for a device
289  *
290  * @dev: PCI device to setup MSI
291  * @vector: interrupt vector
292  * @dest_id: destination processor
293  */
294 bool vtd_setup_msi(struct pci_dev *dev, int vector, int dest_id)
295 {
296 	vtd_msi_data_t msi_data = {};
297 	vtd_msi_addr_t msi_addr = {};
298 	vtd_irte_t *irte = phys_to_virt(vtd_ir_table());
299 	uint16_t index = vtd_intr_index_alloc();
300 
301 	assert(sizeof(vtd_msi_addr_t) == 8);
302 	assert(sizeof(vtd_msi_data_t) == 4);
303 
304 	/* Use edge irq as default */
305 	vtd_setup_irte(dev, irte + index, vector,
306 		       dest_id, TRIGGER_EDGE);
307 
308 	msi_addr.handle_15 = index >> 15 & 1;
309 	msi_addr.shv = 0;
310 	msi_addr.interrupt_format = 1;
311 	msi_addr.handle_0_14 = index & 0x7fff;
312 	msi_addr.head = 0xfee;
313 	msi_data.subhandle = 0;
314 
315 	return pci_setup_msi(dev, *(uint64_t *)&msi_addr,
316 			     *(uint32_t *)&msi_data);
317 }
318 
319 void vtd_init(void)
320 {
321 	setup_vm();
322 	smp_init();
323 
324 	vtd_reg_base = ioremap(Q35_HOST_BRIDGE_IOMMU_ADDR, PAGE_SIZE);
325 
326 	vtd_dump_init_info();
327 	vtd_gcmd_or(VTD_GCMD_QI); /* Enable QI */
328 	vtd_setup_root_table();
329 	vtd_setup_ir_table();
330 	vtd_gcmd_or(VTD_GCMD_DMAR); /* Enable DMAR */
331 	vtd_gcmd_or(VTD_GCMD_IR);   /* Enable IR */
332 }
333