xref: /linux/drivers/iommu/intel/pasid.c (revision ab93e0dd72c37d378dd936f031ffb83ff2bd87ce)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * intel-pasid.c - PASID idr, table and entry manipulation
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Author: Lu Baolu <baolu.lu@linux.intel.com>
8  */
9 
10 #define pr_fmt(fmt)	"DMAR: " fmt
11 
12 #include <linux/bitops.h>
13 #include <linux/cpufeature.h>
14 #include <linux/dmar.h>
15 #include <linux/iommu.h>
16 #include <linux/memory.h>
17 #include <linux/pci.h>
18 #include <linux/pci-ats.h>
19 #include <linux/spinlock.h>
20 
21 #include "iommu.h"
22 #include "pasid.h"
23 #include "../iommu-pages.h"
24 
25 /*
26  * Intel IOMMU system wide PASID name space:
27  */
28 u32 intel_pasid_max_id = PASID_MAX;
29 
30 /*
31  * Per device pasid table management:
32  */
33 
34 /*
35  * Allocate a pasid table for @dev. It should be called in a
36  * single-thread context.
37  */
intel_pasid_alloc_table(struct device * dev)38 int intel_pasid_alloc_table(struct device *dev)
39 {
40 	struct device_domain_info *info;
41 	struct pasid_table *pasid_table;
42 	struct pasid_dir_entry *dir;
43 	u32 max_pasid = 0;
44 	int order, size;
45 
46 	might_sleep();
47 	info = dev_iommu_priv_get(dev);
48 	if (WARN_ON(!info || !dev_is_pci(dev)))
49 		return -ENODEV;
50 	if (WARN_ON(info->pasid_table))
51 		return -EEXIST;
52 
53 	pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
54 	if (!pasid_table)
55 		return -ENOMEM;
56 
57 	if (info->pasid_supported)
58 		max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
59 				  intel_pasid_max_id);
60 
61 	size = max_pasid >> (PASID_PDE_SHIFT - 3);
62 	order = size ? get_order(size) : 0;
63 	dir = iommu_alloc_pages_node_sz(info->iommu->node, GFP_KERNEL,
64 					1 << (order + PAGE_SHIFT));
65 	if (!dir) {
66 		kfree(pasid_table);
67 		return -ENOMEM;
68 	}
69 
70 	pasid_table->table = dir;
71 	pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
72 	info->pasid_table = pasid_table;
73 
74 	if (!ecap_coherent(info->iommu->ecap))
75 		clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE);
76 
77 	return 0;
78 }
79 
intel_pasid_free_table(struct device * dev)80 void intel_pasid_free_table(struct device *dev)
81 {
82 	struct device_domain_info *info;
83 	struct pasid_table *pasid_table;
84 	struct pasid_dir_entry *dir;
85 	struct pasid_entry *table;
86 	int i, max_pde;
87 
88 	info = dev_iommu_priv_get(dev);
89 	if (!info || !dev_is_pci(dev) || !info->pasid_table)
90 		return;
91 
92 	pasid_table = info->pasid_table;
93 	info->pasid_table = NULL;
94 
95 	/* Free scalable mode PASID directory tables: */
96 	dir = pasid_table->table;
97 	max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
98 	for (i = 0; i < max_pde; i++) {
99 		table = get_pasid_table_from_pde(&dir[i]);
100 		iommu_free_pages(table);
101 	}
102 
103 	iommu_free_pages(pasid_table->table);
104 	kfree(pasid_table);
105 }
106 
intel_pasid_get_table(struct device * dev)107 struct pasid_table *intel_pasid_get_table(struct device *dev)
108 {
109 	struct device_domain_info *info;
110 
111 	info = dev_iommu_priv_get(dev);
112 	if (!info)
113 		return NULL;
114 
115 	return info->pasid_table;
116 }
117 
intel_pasid_get_dev_max_id(struct device * dev)118 static int intel_pasid_get_dev_max_id(struct device *dev)
119 {
120 	struct device_domain_info *info;
121 
122 	info = dev_iommu_priv_get(dev);
123 	if (!info || !info->pasid_table)
124 		return 0;
125 
126 	return info->pasid_table->max_pasid;
127 }
128 
intel_pasid_get_entry(struct device * dev,u32 pasid)129 static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
130 {
131 	struct device_domain_info *info;
132 	struct pasid_table *pasid_table;
133 	struct pasid_dir_entry *dir;
134 	struct pasid_entry *entries;
135 	int dir_index, index;
136 
137 	pasid_table = intel_pasid_get_table(dev);
138 	if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev)))
139 		return NULL;
140 
141 	dir = pasid_table->table;
142 	info = dev_iommu_priv_get(dev);
143 	dir_index = pasid >> PASID_PDE_SHIFT;
144 	index = pasid & PASID_PTE_MASK;
145 
146 retry:
147 	entries = get_pasid_table_from_pde(&dir[dir_index]);
148 	if (!entries) {
149 		u64 tmp;
150 
151 		entries = iommu_alloc_pages_node_sz(info->iommu->node,
152 						    GFP_ATOMIC, SZ_4K);
153 		if (!entries)
154 			return NULL;
155 
156 		/*
157 		 * The pasid directory table entry won't be freed after
158 		 * allocation. No worry about the race with free and
159 		 * clear. However, this entry might be populated by others
160 		 * while we are preparing it. Use theirs with a retry.
161 		 */
162 		tmp = 0ULL;
163 		if (!try_cmpxchg64(&dir[dir_index].val, &tmp,
164 				   (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
165 			iommu_free_pages(entries);
166 			goto retry;
167 		}
168 		if (!ecap_coherent(info->iommu->ecap)) {
169 			clflush_cache_range(entries, VTD_PAGE_SIZE);
170 			clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
171 		}
172 	}
173 
174 	return &entries[index];
175 }
176 
177 /*
178  * Interfaces for PASID table entry manipulation:
179  */
180 static void
intel_pasid_clear_entry(struct device * dev,u32 pasid,bool fault_ignore)181 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore)
182 {
183 	struct pasid_entry *pe;
184 
185 	pe = intel_pasid_get_entry(dev, pasid);
186 	if (WARN_ON(!pe))
187 		return;
188 
189 	if (fault_ignore && pasid_pte_is_present(pe))
190 		pasid_clear_entry_with_fpd(pe);
191 	else
192 		pasid_clear_entry(pe);
193 }
194 
195 static void
pasid_cache_invalidation_with_pasid(struct intel_iommu * iommu,u16 did,u32 pasid)196 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
197 				    u16 did, u32 pasid)
198 {
199 	struct qi_desc desc;
200 
201 	desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) |
202 		QI_PC_PASID(pasid) | QI_PC_TYPE;
203 	desc.qw1 = 0;
204 	desc.qw2 = 0;
205 	desc.qw3 = 0;
206 
207 	qi_submit_sync(iommu, &desc, 1, 0);
208 }
209 
210 static void
devtlb_invalidation_with_pasid(struct intel_iommu * iommu,struct device * dev,u32 pasid)211 devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
212 			       struct device *dev, u32 pasid)
213 {
214 	struct device_domain_info *info;
215 	u16 sid, qdep, pfsid;
216 
217 	info = dev_iommu_priv_get(dev);
218 	if (!info || !info->ats_enabled)
219 		return;
220 
221 	if (pci_dev_is_disconnected(to_pci_dev(dev)))
222 		return;
223 
224 	sid = PCI_DEVID(info->bus, info->devfn);
225 	qdep = info->ats_qdep;
226 	pfsid = info->pfsid;
227 
228 	/*
229 	 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID),
230 	 * devTLB flush w/o PASID should be used. For non-zero PASID under
231 	 * SVA usage, device could do DMA with multiple PASIDs. It is more
232 	 * efficient to flush devTLB specific to the PASID.
233 	 */
234 	if (pasid == IOMMU_NO_PASID)
235 		qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
236 	else
237 		qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
238 }
239 
intel_pasid_tear_down_entry(struct intel_iommu * iommu,struct device * dev,u32 pasid,bool fault_ignore)240 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
241 				 u32 pasid, bool fault_ignore)
242 {
243 	struct pasid_entry *pte;
244 	u16 did, pgtt;
245 
246 	spin_lock(&iommu->lock);
247 	pte = intel_pasid_get_entry(dev, pasid);
248 	if (WARN_ON(!pte)) {
249 		spin_unlock(&iommu->lock);
250 		return;
251 	}
252 
253 	if (!pasid_pte_is_present(pte)) {
254 		if (!pasid_pte_is_fault_disabled(pte)) {
255 			WARN_ON(READ_ONCE(pte->val[0]) != 0);
256 			spin_unlock(&iommu->lock);
257 			return;
258 		}
259 
260 		/*
261 		 * When a PASID is used for SVA by a device, it's possible
262 		 * that the pasid entry is non-present with the Fault
263 		 * Processing Disabled bit set. Clear the pasid entry and
264 		 * drain the PRQ for the PASID before return.
265 		 */
266 		pasid_clear_entry(pte);
267 		spin_unlock(&iommu->lock);
268 		intel_iommu_drain_pasid_prq(dev, pasid);
269 
270 		return;
271 	}
272 
273 	did = pasid_get_domain_id(pte);
274 	pgtt = pasid_pte_get_pgtt(pte);
275 	intel_pasid_clear_entry(dev, pasid, fault_ignore);
276 	spin_unlock(&iommu->lock);
277 
278 	if (!ecap_coherent(iommu->ecap))
279 		clflush_cache_range(pte, sizeof(*pte));
280 
281 	pasid_cache_invalidation_with_pasid(iommu, did, pasid);
282 
283 	if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY)
284 		qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
285 	else
286 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
287 
288 	devtlb_invalidation_with_pasid(iommu, dev, pasid);
289 	if (!fault_ignore)
290 		intel_iommu_drain_pasid_prq(dev, pasid);
291 }
292 
293 /*
294  * This function flushes cache for a newly setup pasid table entry.
295  * Caller of it should not modify the in-use pasid table entries.
296  */
pasid_flush_caches(struct intel_iommu * iommu,struct pasid_entry * pte,u32 pasid,u16 did)297 static void pasid_flush_caches(struct intel_iommu *iommu,
298 				struct pasid_entry *pte,
299 			       u32 pasid, u16 did)
300 {
301 	if (!ecap_coherent(iommu->ecap))
302 		clflush_cache_range(pte, sizeof(*pte));
303 
304 	if (cap_caching_mode(iommu->cap)) {
305 		pasid_cache_invalidation_with_pasid(iommu, did, pasid);
306 		qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
307 	} else {
308 		iommu_flush_write_buffer(iommu);
309 	}
310 }
311 
312 /*
313  * This function is supposed to be used after caller updates the fields
314  * except for the SSADE and P bit of a pasid table entry. It does the
315  * below:
316  * - Flush cacheline if needed
317  * - Flush the caches per Table 28 ”Guidance to Software for Invalidations“
318  *   of VT-d spec 5.0.
319  */
intel_pasid_flush_present(struct intel_iommu * iommu,struct device * dev,u32 pasid,u16 did,struct pasid_entry * pte)320 static void intel_pasid_flush_present(struct intel_iommu *iommu,
321 				      struct device *dev,
322 				      u32 pasid, u16 did,
323 				      struct pasid_entry *pte)
324 {
325 	if (!ecap_coherent(iommu->ecap))
326 		clflush_cache_range(pte, sizeof(*pte));
327 
328 	/*
329 	 * VT-d spec 5.0 table28 states guides for cache invalidation:
330 	 *
331 	 * - PASID-selective-within-Domain PASID-cache invalidation
332 	 * - PASID-selective PASID-based IOTLB invalidation
333 	 * - If (pasid is RID_PASID)
334 	 *    - Global Device-TLB invalidation to affected functions
335 	 *   Else
336 	 *    - PASID-based Device-TLB invalidation (with S=1 and
337 	 *      Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
338 	 */
339 	pasid_cache_invalidation_with_pasid(iommu, did, pasid);
340 	qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);
341 
342 	devtlb_invalidation_with_pasid(iommu, dev, pasid);
343 }
344 
345 /*
346  * Set up the scalable mode pasid table entry for first only
347  * translation type.
348  */
pasid_pte_config_first_level(struct intel_iommu * iommu,struct pasid_entry * pte,phys_addr_t fsptptr,u16 did,int flags)349 static void pasid_pte_config_first_level(struct intel_iommu *iommu,
350 					 struct pasid_entry *pte,
351 					 phys_addr_t fsptptr, u16 did,
352 					 int flags)
353 {
354 	lockdep_assert_held(&iommu->lock);
355 
356 	pasid_clear_entry(pte);
357 
358 	/* Setup the first level page table pointer: */
359 	pasid_set_flptr(pte, fsptptr);
360 
361 	if (flags & PASID_FLAG_FL5LP)
362 		pasid_set_flpm(pte, 1);
363 
364 	if (flags & PASID_FLAG_PAGE_SNOOP)
365 		pasid_set_pgsnp(pte);
366 
367 	pasid_set_domain_id(pte, did);
368 	pasid_set_address_width(pte, iommu->agaw);
369 	pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
370 
371 	/* Setup Present and PASID Granular Transfer Type: */
372 	pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
373 	pasid_set_present(pte);
374 }
375 
intel_pasid_setup_first_level(struct intel_iommu * iommu,struct device * dev,phys_addr_t fsptptr,u32 pasid,u16 did,int flags)376 int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev,
377 				  phys_addr_t fsptptr, u32 pasid, u16 did,
378 				  int flags)
379 {
380 	struct pasid_entry *pte;
381 
382 	if (!ecap_flts(iommu->ecap)) {
383 		pr_err("No first level translation support on %s\n",
384 		       iommu->name);
385 		return -EINVAL;
386 	}
387 
388 	if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
389 		pr_err("No 5-level paging support for first-level on %s\n",
390 		       iommu->name);
391 		return -EINVAL;
392 	}
393 
394 	spin_lock(&iommu->lock);
395 	pte = intel_pasid_get_entry(dev, pasid);
396 	if (!pte) {
397 		spin_unlock(&iommu->lock);
398 		return -ENODEV;
399 	}
400 
401 	if (pasid_pte_is_present(pte)) {
402 		spin_unlock(&iommu->lock);
403 		return -EBUSY;
404 	}
405 
406 	pasid_pte_config_first_level(iommu, pte, fsptptr, did, flags);
407 
408 	spin_unlock(&iommu->lock);
409 
410 	pasid_flush_caches(iommu, pte, pasid, did);
411 
412 	return 0;
413 }
414 
intel_pasid_replace_first_level(struct intel_iommu * iommu,struct device * dev,phys_addr_t fsptptr,u32 pasid,u16 did,u16 old_did,int flags)415 int intel_pasid_replace_first_level(struct intel_iommu *iommu,
416 				    struct device *dev, phys_addr_t fsptptr,
417 				    u32 pasid, u16 did, u16 old_did,
418 				    int flags)
419 {
420 	struct pasid_entry *pte, new_pte;
421 
422 	if (!ecap_flts(iommu->ecap)) {
423 		pr_err("No first level translation support on %s\n",
424 		       iommu->name);
425 		return -EINVAL;
426 	}
427 
428 	if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
429 		pr_err("No 5-level paging support for first-level on %s\n",
430 		       iommu->name);
431 		return -EINVAL;
432 	}
433 
434 	pasid_pte_config_first_level(iommu, &new_pte, fsptptr, did, flags);
435 
436 	spin_lock(&iommu->lock);
437 	pte = intel_pasid_get_entry(dev, pasid);
438 	if (!pte) {
439 		spin_unlock(&iommu->lock);
440 		return -ENODEV;
441 	}
442 
443 	if (!pasid_pte_is_present(pte)) {
444 		spin_unlock(&iommu->lock);
445 		return -EINVAL;
446 	}
447 
448 	WARN_ON(old_did != pasid_get_domain_id(pte));
449 
450 	*pte = new_pte;
451 	spin_unlock(&iommu->lock);
452 
453 	intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
454 	intel_iommu_drain_pasid_prq(dev, pasid);
455 
456 	return 0;
457 }
458 
459 /*
460  * Set up the scalable mode pasid entry for second only translation type.
461  */
pasid_pte_config_second_level(struct intel_iommu * iommu,struct pasid_entry * pte,u64 pgd_val,int agaw,u16 did,bool dirty_tracking)462 static void pasid_pte_config_second_level(struct intel_iommu *iommu,
463 					  struct pasid_entry *pte,
464 					  u64 pgd_val, int agaw, u16 did,
465 					  bool dirty_tracking)
466 {
467 	lockdep_assert_held(&iommu->lock);
468 
469 	pasid_clear_entry(pte);
470 	pasid_set_domain_id(pte, did);
471 	pasid_set_slptr(pte, pgd_val);
472 	pasid_set_address_width(pte, agaw);
473 	pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
474 	pasid_set_fault_enable(pte);
475 	pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
476 	if (dirty_tracking)
477 		pasid_set_ssade(pte);
478 
479 	pasid_set_present(pte);
480 }
481 
intel_pasid_setup_second_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,u32 pasid)482 int intel_pasid_setup_second_level(struct intel_iommu *iommu,
483 				   struct dmar_domain *domain,
484 				   struct device *dev, u32 pasid)
485 {
486 	struct pasid_entry *pte;
487 	struct dma_pte *pgd;
488 	u64 pgd_val;
489 	u16 did;
490 
491 	/*
492 	 * If hardware advertises no support for second level
493 	 * translation, return directly.
494 	 */
495 	if (!ecap_slts(iommu->ecap)) {
496 		pr_err("No second level translation support on %s\n",
497 		       iommu->name);
498 		return -EINVAL;
499 	}
500 
501 	pgd = domain->pgd;
502 	pgd_val = virt_to_phys(pgd);
503 	did = domain_id_iommu(domain, iommu);
504 
505 	spin_lock(&iommu->lock);
506 	pte = intel_pasid_get_entry(dev, pasid);
507 	if (!pte) {
508 		spin_unlock(&iommu->lock);
509 		return -ENODEV;
510 	}
511 
512 	if (pasid_pte_is_present(pte)) {
513 		spin_unlock(&iommu->lock);
514 		return -EBUSY;
515 	}
516 
517 	pasid_pte_config_second_level(iommu, pte, pgd_val, domain->agaw,
518 				      did, domain->dirty_tracking);
519 	spin_unlock(&iommu->lock);
520 
521 	pasid_flush_caches(iommu, pte, pasid, did);
522 
523 	return 0;
524 }
525 
intel_pasid_replace_second_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,u16 old_did,u32 pasid)526 int intel_pasid_replace_second_level(struct intel_iommu *iommu,
527 				     struct dmar_domain *domain,
528 				     struct device *dev, u16 old_did,
529 				     u32 pasid)
530 {
531 	struct pasid_entry *pte, new_pte;
532 	struct dma_pte *pgd;
533 	u64 pgd_val;
534 	u16 did;
535 
536 	/*
537 	 * If hardware advertises no support for second level
538 	 * translation, return directly.
539 	 */
540 	if (!ecap_slts(iommu->ecap)) {
541 		pr_err("No second level translation support on %s\n",
542 		       iommu->name);
543 		return -EINVAL;
544 	}
545 
546 	pgd = domain->pgd;
547 	pgd_val = virt_to_phys(pgd);
548 	did = domain_id_iommu(domain, iommu);
549 
550 	pasid_pte_config_second_level(iommu, &new_pte, pgd_val,
551 				      domain->agaw, did,
552 				      domain->dirty_tracking);
553 
554 	spin_lock(&iommu->lock);
555 	pte = intel_pasid_get_entry(dev, pasid);
556 	if (!pte) {
557 		spin_unlock(&iommu->lock);
558 		return -ENODEV;
559 	}
560 
561 	if (!pasid_pte_is_present(pte)) {
562 		spin_unlock(&iommu->lock);
563 		return -EINVAL;
564 	}
565 
566 	WARN_ON(old_did != pasid_get_domain_id(pte));
567 
568 	*pte = new_pte;
569 	spin_unlock(&iommu->lock);
570 
571 	intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
572 	intel_iommu_drain_pasid_prq(dev, pasid);
573 
574 	return 0;
575 }
576 
577 /*
578  * Set up dirty tracking on a second only or nested translation type.
579  */
intel_pasid_setup_dirty_tracking(struct intel_iommu * iommu,struct device * dev,u32 pasid,bool enabled)580 int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
581 				     struct device *dev, u32 pasid,
582 				     bool enabled)
583 {
584 	struct pasid_entry *pte;
585 	u16 did, pgtt;
586 
587 	spin_lock(&iommu->lock);
588 
589 	pte = intel_pasid_get_entry(dev, pasid);
590 	if (!pte) {
591 		spin_unlock(&iommu->lock);
592 		dev_err_ratelimited(
593 			dev, "Failed to get pasid entry of PASID %d\n", pasid);
594 		return -ENODEV;
595 	}
596 
597 	did = pasid_get_domain_id(pte);
598 	pgtt = pasid_pte_get_pgtt(pte);
599 	if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
600 	    pgtt != PASID_ENTRY_PGTT_NESTED) {
601 		spin_unlock(&iommu->lock);
602 		dev_err_ratelimited(
603 			dev,
604 			"Dirty tracking not supported on translation type %d\n",
605 			pgtt);
606 		return -EOPNOTSUPP;
607 	}
608 
609 	if (pasid_get_ssade(pte) == enabled) {
610 		spin_unlock(&iommu->lock);
611 		return 0;
612 	}
613 
614 	if (enabled)
615 		pasid_set_ssade(pte);
616 	else
617 		pasid_clear_ssade(pte);
618 	spin_unlock(&iommu->lock);
619 
620 	if (!ecap_coherent(iommu->ecap))
621 		clflush_cache_range(pte, sizeof(*pte));
622 
623 	/*
624 	 * From VT-d spec table 25 "Guidance to Software for Invalidations":
625 	 *
626 	 * - PASID-selective-within-Domain PASID-cache invalidation
627 	 *   If (PGTT=SS or Nested)
628 	 *    - Domain-selective IOTLB invalidation
629 	 *   Else
630 	 *    - PASID-selective PASID-based IOTLB invalidation
631 	 * - If (pasid is RID_PASID)
632 	 *    - Global Device-TLB invalidation to affected functions
633 	 *   Else
634 	 *    - PASID-based Device-TLB invalidation (with S=1 and
635 	 *      Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
636 	 */
637 	pasid_cache_invalidation_with_pasid(iommu, did, pasid);
638 
639 	iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
640 
641 	devtlb_invalidation_with_pasid(iommu, dev, pasid);
642 
643 	return 0;
644 }
645 
646 /*
647  * Set up the scalable mode pasid entry for passthrough translation type.
648  */
pasid_pte_config_pass_through(struct intel_iommu * iommu,struct pasid_entry * pte,u16 did)649 static void pasid_pte_config_pass_through(struct intel_iommu *iommu,
650 					  struct pasid_entry *pte, u16 did)
651 {
652 	lockdep_assert_held(&iommu->lock);
653 
654 	pasid_clear_entry(pte);
655 	pasid_set_domain_id(pte, did);
656 	pasid_set_address_width(pte, iommu->agaw);
657 	pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
658 	pasid_set_fault_enable(pte);
659 	pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
660 	pasid_set_present(pte);
661 }
662 
intel_pasid_setup_pass_through(struct intel_iommu * iommu,struct device * dev,u32 pasid)663 int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
664 				   struct device *dev, u32 pasid)
665 {
666 	u16 did = FLPT_DEFAULT_DID;
667 	struct pasid_entry *pte;
668 
669 	spin_lock(&iommu->lock);
670 	pte = intel_pasid_get_entry(dev, pasid);
671 	if (!pte) {
672 		spin_unlock(&iommu->lock);
673 		return -ENODEV;
674 	}
675 
676 	if (pasid_pte_is_present(pte)) {
677 		spin_unlock(&iommu->lock);
678 		return -EBUSY;
679 	}
680 
681 	pasid_pte_config_pass_through(iommu, pte, did);
682 	spin_unlock(&iommu->lock);
683 
684 	pasid_flush_caches(iommu, pte, pasid, did);
685 
686 	return 0;
687 }
688 
intel_pasid_replace_pass_through(struct intel_iommu * iommu,struct device * dev,u16 old_did,u32 pasid)689 int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
690 				     struct device *dev, u16 old_did,
691 				     u32 pasid)
692 {
693 	struct pasid_entry *pte, new_pte;
694 	u16 did = FLPT_DEFAULT_DID;
695 
696 	pasid_pte_config_pass_through(iommu, &new_pte, did);
697 
698 	spin_lock(&iommu->lock);
699 	pte = intel_pasid_get_entry(dev, pasid);
700 	if (!pte) {
701 		spin_unlock(&iommu->lock);
702 		return -ENODEV;
703 	}
704 
705 	if (!pasid_pte_is_present(pte)) {
706 		spin_unlock(&iommu->lock);
707 		return -EINVAL;
708 	}
709 
710 	WARN_ON(old_did != pasid_get_domain_id(pte));
711 
712 	*pte = new_pte;
713 	spin_unlock(&iommu->lock);
714 
715 	intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
716 	intel_iommu_drain_pasid_prq(dev, pasid);
717 
718 	return 0;
719 }
720 
721 /*
722  * Set the page snoop control for a pasid entry which has been set up.
723  */
intel_pasid_setup_page_snoop_control(struct intel_iommu * iommu,struct device * dev,u32 pasid)724 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
725 					  struct device *dev, u32 pasid)
726 {
727 	struct pasid_entry *pte;
728 	u16 did;
729 
730 	spin_lock(&iommu->lock);
731 	pte = intel_pasid_get_entry(dev, pasid);
732 	if (WARN_ON(!pte || !pasid_pte_is_present(pte))) {
733 		spin_unlock(&iommu->lock);
734 		return;
735 	}
736 
737 	pasid_set_pgsnp(pte);
738 	did = pasid_get_domain_id(pte);
739 	spin_unlock(&iommu->lock);
740 
741 	intel_pasid_flush_present(iommu, dev, pasid, did, pte);
742 }
743 
pasid_pte_config_nestd(struct intel_iommu * iommu,struct pasid_entry * pte,struct iommu_hwpt_vtd_s1 * s1_cfg,struct dmar_domain * s2_domain,u16 did)744 static void pasid_pte_config_nestd(struct intel_iommu *iommu,
745 				   struct pasid_entry *pte,
746 				   struct iommu_hwpt_vtd_s1 *s1_cfg,
747 				   struct dmar_domain *s2_domain,
748 				   u16 did)
749 {
750 	struct dma_pte *pgd = s2_domain->pgd;
751 
752 	lockdep_assert_held(&iommu->lock);
753 
754 	pasid_clear_entry(pte);
755 
756 	if (s1_cfg->addr_width == ADDR_WIDTH_5LEVEL)
757 		pasid_set_flpm(pte, 1);
758 
759 	pasid_set_flptr(pte, s1_cfg->pgtbl_addr);
760 
761 	if (s1_cfg->flags & IOMMU_VTD_S1_SRE) {
762 		pasid_set_sre(pte);
763 		if (s1_cfg->flags & IOMMU_VTD_S1_WPE)
764 			pasid_set_wpe(pte);
765 	}
766 
767 	if (s1_cfg->flags & IOMMU_VTD_S1_EAFE)
768 		pasid_set_eafe(pte);
769 
770 	if (s2_domain->force_snooping)
771 		pasid_set_pgsnp(pte);
772 
773 	pasid_set_slptr(pte, virt_to_phys(pgd));
774 	pasid_set_fault_enable(pte);
775 	pasid_set_domain_id(pte, did);
776 	pasid_set_address_width(pte, s2_domain->agaw);
777 	pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
778 	if (s2_domain->dirty_tracking)
779 		pasid_set_ssade(pte);
780 	pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
781 	pasid_set_present(pte);
782 }
783 
784 /**
785  * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
786  * @iommu:      IOMMU which the device belong to
787  * @dev:        Device to be set up for translation
788  * @pasid:      PASID to be programmed in the device PASID table
789  * @domain:     User stage-1 domain nested on a stage-2 domain
790  *
791  * This is used for nested translation. The input domain should be
792  * nested type and nested on a parent with 'is_nested_parent' flag
793  * set.
794  */
intel_pasid_setup_nested(struct intel_iommu * iommu,struct device * dev,u32 pasid,struct dmar_domain * domain)795 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
796 			     u32 pasid, struct dmar_domain *domain)
797 {
798 	struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
799 	struct dmar_domain *s2_domain = domain->s2_domain;
800 	u16 did = domain_id_iommu(domain, iommu);
801 	struct pasid_entry *pte;
802 
803 	/* Address width should match the address width supported by hardware */
804 	switch (s1_cfg->addr_width) {
805 	case ADDR_WIDTH_4LEVEL:
806 		break;
807 	case ADDR_WIDTH_5LEVEL:
808 		if (!cap_fl5lp_support(iommu->cap)) {
809 			dev_err_ratelimited(dev,
810 					    "5-level paging not supported\n");
811 			return -EINVAL;
812 		}
813 		break;
814 	default:
815 		dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
816 				    s1_cfg->addr_width);
817 		return -EINVAL;
818 	}
819 
820 	if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
821 		pr_err_ratelimited("No supervisor request support on %s\n",
822 				   iommu->name);
823 		return -EINVAL;
824 	}
825 
826 	if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
827 		pr_err_ratelimited("No extended access flag support on %s\n",
828 				   iommu->name);
829 		return -EINVAL;
830 	}
831 
832 	spin_lock(&iommu->lock);
833 	pte = intel_pasid_get_entry(dev, pasid);
834 	if (!pte) {
835 		spin_unlock(&iommu->lock);
836 		return -ENODEV;
837 	}
838 	if (pasid_pte_is_present(pte)) {
839 		spin_unlock(&iommu->lock);
840 		return -EBUSY;
841 	}
842 
843 	pasid_pte_config_nestd(iommu, pte, s1_cfg, s2_domain, did);
844 	spin_unlock(&iommu->lock);
845 
846 	pasid_flush_caches(iommu, pte, pasid, did);
847 
848 	return 0;
849 }
850 
intel_pasid_replace_nested(struct intel_iommu * iommu,struct device * dev,u32 pasid,u16 old_did,struct dmar_domain * domain)851 int intel_pasid_replace_nested(struct intel_iommu *iommu,
852 			       struct device *dev, u32 pasid,
853 			       u16 old_did, struct dmar_domain *domain)
854 {
855 	struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
856 	struct dmar_domain *s2_domain = domain->s2_domain;
857 	u16 did = domain_id_iommu(domain, iommu);
858 	struct pasid_entry *pte, new_pte;
859 
860 	/* Address width should match the address width supported by hardware */
861 	switch (s1_cfg->addr_width) {
862 	case ADDR_WIDTH_4LEVEL:
863 		break;
864 	case ADDR_WIDTH_5LEVEL:
865 		if (!cap_fl5lp_support(iommu->cap)) {
866 			dev_err_ratelimited(dev,
867 					    "5-level paging not supported\n");
868 			return -EINVAL;
869 		}
870 		break;
871 	default:
872 		dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
873 				    s1_cfg->addr_width);
874 		return -EINVAL;
875 	}
876 
877 	if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
878 		pr_err_ratelimited("No supervisor request support on %s\n",
879 				   iommu->name);
880 		return -EINVAL;
881 	}
882 
883 	if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
884 		pr_err_ratelimited("No extended access flag support on %s\n",
885 				   iommu->name);
886 		return -EINVAL;
887 	}
888 
889 	pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did);
890 
891 	spin_lock(&iommu->lock);
892 	pte = intel_pasid_get_entry(dev, pasid);
893 	if (!pte) {
894 		spin_unlock(&iommu->lock);
895 		return -ENODEV;
896 	}
897 
898 	if (!pasid_pte_is_present(pte)) {
899 		spin_unlock(&iommu->lock);
900 		return -EINVAL;
901 	}
902 
903 	WARN_ON(old_did != pasid_get_domain_id(pte));
904 
905 	*pte = new_pte;
906 	spin_unlock(&iommu->lock);
907 
908 	intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
909 	intel_iommu_drain_pasid_prq(dev, pasid);
910 
911 	return 0;
912 }
913 
914 /*
915  * Interfaces to setup or teardown a pasid table to the scalable-mode
916  * context table entry:
917  */
918 
device_pasid_table_teardown(struct device * dev,u8 bus,u8 devfn)919 static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
920 {
921 	struct device_domain_info *info = dev_iommu_priv_get(dev);
922 	struct intel_iommu *iommu = info->iommu;
923 	struct context_entry *context;
924 	u16 did;
925 
926 	spin_lock(&iommu->lock);
927 	context = iommu_context_addr(iommu, bus, devfn, false);
928 	if (!context) {
929 		spin_unlock(&iommu->lock);
930 		return;
931 	}
932 
933 	did = context_domain_id(context);
934 	context_clear_entry(context);
935 	__iommu_flush_cache(iommu, context, sizeof(*context));
936 	spin_unlock(&iommu->lock);
937 	intel_context_flush_no_pasid(info, context, did);
938 }
939 
pci_pasid_table_teardown(struct pci_dev * pdev,u16 alias,void * data)940 static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
941 {
942 	struct device *dev = data;
943 
944 	if (dev == &pdev->dev)
945 		device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff);
946 
947 	return 0;
948 }
949 
intel_pasid_teardown_sm_context(struct device * dev)950 void intel_pasid_teardown_sm_context(struct device *dev)
951 {
952 	struct device_domain_info *info = dev_iommu_priv_get(dev);
953 
954 	if (!dev_is_pci(dev)) {
955 		device_pasid_table_teardown(dev, info->bus, info->devfn);
956 		return;
957 	}
958 
959 	pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev);
960 }
961 
962 /*
963  * Get the PASID directory size for scalable mode context entry.
964  * Value of X in the PDTS field of a scalable mode context entry
965  * indicates PASID directory with 2^(X + 7) entries.
966  */
context_get_sm_pds(struct pasid_table * table)967 static unsigned long context_get_sm_pds(struct pasid_table *table)
968 {
969 	unsigned long pds, max_pde;
970 
971 	max_pde = table->max_pasid >> PASID_PDE_SHIFT;
972 	pds = find_first_bit(&max_pde, MAX_NR_PASID_BITS);
973 	if (pds < 7)
974 		return 0;
975 
976 	return pds - 7;
977 }
978 
context_entry_set_pasid_table(struct context_entry * context,struct device * dev)979 static int context_entry_set_pasid_table(struct context_entry *context,
980 					 struct device *dev)
981 {
982 	struct device_domain_info *info = dev_iommu_priv_get(dev);
983 	struct pasid_table *table = info->pasid_table;
984 	struct intel_iommu *iommu = info->iommu;
985 	unsigned long pds;
986 
987 	context_clear_entry(context);
988 
989 	pds = context_get_sm_pds(table);
990 	context->lo = (u64)virt_to_phys(table->table) | context_pdts(pds);
991 	context_set_sm_rid2pasid(context, IOMMU_NO_PASID);
992 
993 	if (info->ats_supported)
994 		context_set_sm_dte(context);
995 	if (info->pasid_supported)
996 		context_set_pasid(context);
997 	if (info->pri_supported)
998 		context_set_sm_pre(context);
999 
1000 	context_set_fault_enable(context);
1001 	context_set_present(context);
1002 	__iommu_flush_cache(iommu, context, sizeof(*context));
1003 
1004 	return 0;
1005 }
1006 
device_pasid_table_setup(struct device * dev,u8 bus,u8 devfn)1007 static int device_pasid_table_setup(struct device *dev, u8 bus, u8 devfn)
1008 {
1009 	struct device_domain_info *info = dev_iommu_priv_get(dev);
1010 	struct intel_iommu *iommu = info->iommu;
1011 	struct context_entry *context;
1012 
1013 	spin_lock(&iommu->lock);
1014 	context = iommu_context_addr(iommu, bus, devfn, true);
1015 	if (!context) {
1016 		spin_unlock(&iommu->lock);
1017 		return -ENOMEM;
1018 	}
1019 
1020 	if (context_present(context) && !context_copied(iommu, bus, devfn)) {
1021 		spin_unlock(&iommu->lock);
1022 		return 0;
1023 	}
1024 
1025 	if (context_copied(iommu, bus, devfn)) {
1026 		context_clear_entry(context);
1027 		__iommu_flush_cache(iommu, context, sizeof(*context));
1028 
1029 		/*
1030 		 * For kdump cases, old valid entries may be cached due to
1031 		 * the in-flight DMA and copied pgtable, but there is no
1032 		 * unmapping behaviour for them, thus we need explicit cache
1033 		 * flushes for all affected domain IDs and PASIDs used in
1034 		 * the copied PASID table. Given that we have no idea about
1035 		 * which domain IDs and PASIDs were used in the copied tables,
1036 		 * upgrade them to global PASID and IOTLB cache invalidation.
1037 		 */
1038 		iommu->flush.flush_context(iommu, 0,
1039 					   PCI_DEVID(bus, devfn),
1040 					   DMA_CCMD_MASK_NOBIT,
1041 					   DMA_CCMD_DEVICE_INVL);
1042 		qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1043 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1044 		devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
1045 
1046 		/*
1047 		 * At this point, the device is supposed to finish reset at
1048 		 * its driver probe stage, so no in-flight DMA will exist,
1049 		 * and we don't need to worry anymore hereafter.
1050 		 */
1051 		clear_context_copied(iommu, bus, devfn);
1052 	}
1053 
1054 	context_entry_set_pasid_table(context, dev);
1055 	spin_unlock(&iommu->lock);
1056 
1057 	/*
1058 	 * It's a non-present to present mapping. If hardware doesn't cache
1059 	 * non-present entry we don't need to flush the caches. If it does
1060 	 * cache non-present entries, then it does so in the special
1061 	 * domain #0, which we have to flush:
1062 	 */
1063 	if (cap_caching_mode(iommu->cap)) {
1064 		iommu->flush.flush_context(iommu, 0,
1065 					   PCI_DEVID(bus, devfn),
1066 					   DMA_CCMD_MASK_NOBIT,
1067 					   DMA_CCMD_DEVICE_INVL);
1068 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
1069 	}
1070 
1071 	return 0;
1072 }
1073 
pci_pasid_table_setup(struct pci_dev * pdev,u16 alias,void * data)1074 static int pci_pasid_table_setup(struct pci_dev *pdev, u16 alias, void *data)
1075 {
1076 	struct device *dev = data;
1077 
1078 	if (dev != &pdev->dev)
1079 		return 0;
1080 
1081 	return device_pasid_table_setup(dev, PCI_BUS_NUM(alias), alias & 0xff);
1082 }
1083 
1084 /*
1085  * Set the device's PASID table to its context table entry.
1086  *
1087  * The PASID table is set to the context entries of both device itself
1088  * and its alias requester ID for DMA.
1089  */
intel_pasid_setup_sm_context(struct device * dev)1090 int intel_pasid_setup_sm_context(struct device *dev)
1091 {
1092 	struct device_domain_info *info = dev_iommu_priv_get(dev);
1093 
1094 	if (!dev_is_pci(dev))
1095 		return device_pasid_table_setup(dev, info->bus, info->devfn);
1096 
1097 	return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev);
1098 }
1099 
1100 /*
1101  * Global Device-TLB invalidation following changes in a context entry which
1102  * was present.
1103  */
__context_flush_dev_iotlb(struct device_domain_info * info)1104 static void __context_flush_dev_iotlb(struct device_domain_info *info)
1105 {
1106 	if (!info->ats_enabled)
1107 		return;
1108 
1109 	qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn),
1110 			   info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH);
1111 
1112 	/*
1113 	 * There is no guarantee that the device DMA is stopped when it reaches
1114 	 * here. Therefore, always attempt the extra device TLB invalidation
1115 	 * quirk. The impact on performance is acceptable since this is not a
1116 	 * performance-critical path.
1117 	 */
1118 	quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID,
1119 				  info->ats_qdep);
1120 }
1121 
1122 /*
1123  * Cache invalidations after change in a context table entry that was present
1124  * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations).
1125  * This helper can only be used when IOMMU is working in the legacy mode or
1126  * IOMMU is in scalable mode but all PASID table entries of the device are
1127  * non-present.
1128  */
intel_context_flush_no_pasid(struct device_domain_info * info,struct context_entry * context,u16 did)1129 void intel_context_flush_no_pasid(struct device_domain_info *info,
1130 				  struct context_entry *context, u16 did)
1131 {
1132 	struct intel_iommu *iommu = info->iommu;
1133 
1134 	/*
1135 	 * Device-selective context-cache invalidation. The Domain-ID field
1136 	 * of the Context-cache Invalidate Descriptor is ignored by hardware
1137 	 * when operating in scalable mode. Therefore the @did value doesn't
1138 	 * matter in scalable mode.
1139 	 */
1140 	iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn),
1141 				   DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL);
1142 
1143 	/*
1144 	 * For legacy mode:
1145 	 * - Domain-selective IOTLB invalidation
1146 	 * - Global Device-TLB invalidation to all affected functions
1147 	 */
1148 	if (!sm_supported(iommu)) {
1149 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1150 		__context_flush_dev_iotlb(info);
1151 
1152 		return;
1153 	}
1154 
1155 	__context_flush_dev_iotlb(info);
1156 }
1157