1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23 
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/export.h>
28 #include <linux/slab.h>
29 #include <linux/irq.h>
30 #include <linux/interrupt.h>
31 #include <linux/spinlock.h>
32 #include <linux/pci.h>
33 #include <linux/dmar.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/mempool.h>
36 #include <linux/timer.h>
37 #include <linux/iova.h>
38 #include <linux/iommu.h>
39 #include <linux/intel-iommu.h>
40 #include <linux/syscore_ops.h>
41 #include <linux/tboot.h>
42 #include <linux/dmi.h>
43 #include <linux/pci-ats.h>
44 #include <linux/memblock.h>
45 #include <asm/cacheflush.h>
46 #include <asm/iommu.h>
47 
48 #define ROOT_SIZE		VTD_PAGE_SIZE
49 #define CONTEXT_SIZE		VTD_PAGE_SIZE
50 
51 #define IS_BRIDGE_HOST_DEVICE(pdev) \
52 			    ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
53 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
54 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
55 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
56 
57 #define IOAPIC_RANGE_START	(0xfee00000)
58 #define IOAPIC_RANGE_END	(0xfeefffff)
59 #define IOVA_START_ADDR		(0x1000)
60 
61 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
62 
63 #define MAX_AGAW_WIDTH 64
64 
65 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67 
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
71 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73 
74 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
75 #define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
76 #define DMA_64BIT_PFN		IOVA_PFN(DMA_BIT_MASK(64))
77 
78 /* page table handling */
79 #define LEVEL_STRIDE		(9)
80 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
81 
82 /*
83  * This bitmap is used to advertise the page sizes our hardware support
84  * to the IOMMU core, which will then use this information to split
85  * physically contiguous memory regions it is mapping into page sizes
86  * that we support.
87  *
88  * Traditionally the IOMMU core just handed us the mappings directly,
89  * after making sure the size is an order of a 4KiB page and that the
90  * mapping has natural alignment.
91  *
92  * To retain this behavior, we currently advertise that we support
93  * all page sizes that are an order of 4KiB.
94  *
95  * If at some point we'd like to utilize the IOMMU core's new behavior,
96  * we could change this to advertise the real page sizes we support.
97  */
98 #define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
99 
agaw_to_level(int agaw)100 static inline int agaw_to_level(int agaw)
101 {
102 	return agaw + 2;
103 }
104 
agaw_to_width(int agaw)105 static inline int agaw_to_width(int agaw)
106 {
107 	return 30 + agaw * LEVEL_STRIDE;
108 }
109 
width_to_agaw(int width)110 static inline int width_to_agaw(int width)
111 {
112 	return (width - 30) / LEVEL_STRIDE;
113 }
114 
level_to_offset_bits(int level)115 static inline unsigned int level_to_offset_bits(int level)
116 {
117 	return (level - 1) * LEVEL_STRIDE;
118 }
119 
pfn_level_offset(unsigned long pfn,int level)120 static inline int pfn_level_offset(unsigned long pfn, int level)
121 {
122 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123 }
124 
level_mask(int level)125 static inline unsigned long level_mask(int level)
126 {
127 	return -1UL << level_to_offset_bits(level);
128 }
129 
level_size(int level)130 static inline unsigned long level_size(int level)
131 {
132 	return 1UL << level_to_offset_bits(level);
133 }
134 
align_to_level(unsigned long pfn,int level)135 static inline unsigned long align_to_level(unsigned long pfn, int level)
136 {
137 	return (pfn + level_size(level) - 1) & level_mask(level);
138 }
139 
lvl_to_nr_pages(unsigned int lvl)140 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141 {
142 	return  1 << ((lvl - 1) * LEVEL_STRIDE);
143 }
144 
145 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146    are never going to work. */
dma_to_mm_pfn(unsigned long dma_pfn)147 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148 {
149 	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150 }
151 
mm_to_dma_pfn(unsigned long mm_pfn)152 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153 {
154 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155 }
page_to_dma_pfn(struct page * pg)156 static inline unsigned long page_to_dma_pfn(struct page *pg)
157 {
158 	return mm_to_dma_pfn(page_to_pfn(pg));
159 }
virt_to_dma_pfn(void * p)160 static inline unsigned long virt_to_dma_pfn(void *p)
161 {
162 	return page_to_dma_pfn(virt_to_page(p));
163 }
164 
165 /* global iommu list, set NULL for ignored DMAR units */
166 static struct intel_iommu **g_iommus;
167 
168 static void __init check_tylersburg_isoch(void);
169 static int rwbf_quirk;
170 
171 /*
172  * set to 1 to panic kernel if can't successfully enable VT-d
173  * (used when kernel is launched w/ TXT)
174  */
175 static int force_on = 0;
176 
177 /*
178  * 0: Present
179  * 1-11: Reserved
180  * 12-63: Context Ptr (12 - (haw-1))
181  * 64-127: Reserved
182  */
183 struct root_entry {
184 	u64	val;
185 	u64	rsvd1;
186 };
187 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
root_present(struct root_entry * root)188 static inline bool root_present(struct root_entry *root)
189 {
190 	return (root->val & 1);
191 }
set_root_present(struct root_entry * root)192 static inline void set_root_present(struct root_entry *root)
193 {
194 	root->val |= 1;
195 }
set_root_value(struct root_entry * root,unsigned long value)196 static inline void set_root_value(struct root_entry *root, unsigned long value)
197 {
198 	root->val |= value & VTD_PAGE_MASK;
199 }
200 
201 static inline struct context_entry *
get_context_addr_from_root(struct root_entry * root)202 get_context_addr_from_root(struct root_entry *root)
203 {
204 	return (struct context_entry *)
205 		(root_present(root)?phys_to_virt(
206 		root->val & VTD_PAGE_MASK) :
207 		NULL);
208 }
209 
210 /*
211  * low 64 bits:
212  * 0: present
213  * 1: fault processing disable
214  * 2-3: translation type
215  * 12-63: address space root
216  * high 64 bits:
217  * 0-2: address width
218  * 3-6: aval
219  * 8-23: domain id
220  */
221 struct context_entry {
222 	u64 lo;
223 	u64 hi;
224 };
225 
context_present(struct context_entry * context)226 static inline bool context_present(struct context_entry *context)
227 {
228 	return (context->lo & 1);
229 }
context_set_present(struct context_entry * context)230 static inline void context_set_present(struct context_entry *context)
231 {
232 	context->lo |= 1;
233 }
234 
context_set_fault_enable(struct context_entry * context)235 static inline void context_set_fault_enable(struct context_entry *context)
236 {
237 	context->lo &= (((u64)-1) << 2) | 1;
238 }
239 
context_set_translation_type(struct context_entry * context,unsigned long value)240 static inline void context_set_translation_type(struct context_entry *context,
241 						unsigned long value)
242 {
243 	context->lo &= (((u64)-1) << 4) | 3;
244 	context->lo |= (value & 3) << 2;
245 }
246 
context_set_address_root(struct context_entry * context,unsigned long value)247 static inline void context_set_address_root(struct context_entry *context,
248 					    unsigned long value)
249 {
250 	context->lo |= value & VTD_PAGE_MASK;
251 }
252 
context_set_address_width(struct context_entry * context,unsigned long value)253 static inline void context_set_address_width(struct context_entry *context,
254 					     unsigned long value)
255 {
256 	context->hi |= value & 7;
257 }
258 
context_set_domain_id(struct context_entry * context,unsigned long value)259 static inline void context_set_domain_id(struct context_entry *context,
260 					 unsigned long value)
261 {
262 	context->hi |= (value & ((1 << 16) - 1)) << 8;
263 }
264 
context_clear_entry(struct context_entry * context)265 static inline void context_clear_entry(struct context_entry *context)
266 {
267 	context->lo = 0;
268 	context->hi = 0;
269 }
270 
271 /*
272  * 0: readable
273  * 1: writable
274  * 2-6: reserved
275  * 7: super page
276  * 8-10: available
277  * 11: snoop behavior
278  * 12-63: Host physcial address
279  */
280 struct dma_pte {
281 	u64 val;
282 };
283 
dma_clear_pte(struct dma_pte * pte)284 static inline void dma_clear_pte(struct dma_pte *pte)
285 {
286 	pte->val = 0;
287 }
288 
dma_set_pte_readable(struct dma_pte * pte)289 static inline void dma_set_pte_readable(struct dma_pte *pte)
290 {
291 	pte->val |= DMA_PTE_READ;
292 }
293 
dma_set_pte_writable(struct dma_pte * pte)294 static inline void dma_set_pte_writable(struct dma_pte *pte)
295 {
296 	pte->val |= DMA_PTE_WRITE;
297 }
298 
dma_set_pte_snp(struct dma_pte * pte)299 static inline void dma_set_pte_snp(struct dma_pte *pte)
300 {
301 	pte->val |= DMA_PTE_SNP;
302 }
303 
dma_set_pte_prot(struct dma_pte * pte,unsigned long prot)304 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
305 {
306 	pte->val = (pte->val & ~3) | (prot & 3);
307 }
308 
dma_pte_addr(struct dma_pte * pte)309 static inline u64 dma_pte_addr(struct dma_pte *pte)
310 {
311 #ifdef CONFIG_64BIT
312 	return pte->val & VTD_PAGE_MASK;
313 #else
314 	/* Must have a full atomic 64-bit read */
315 	return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
316 #endif
317 }
318 
dma_set_pte_pfn(struct dma_pte * pte,unsigned long pfn)319 static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
320 {
321 	pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
322 }
323 
dma_pte_present(struct dma_pte * pte)324 static inline bool dma_pte_present(struct dma_pte *pte)
325 {
326 	return (pte->val & 3) != 0;
327 }
328 
dma_pte_superpage(struct dma_pte * pte)329 static inline bool dma_pte_superpage(struct dma_pte *pte)
330 {
331 	return (pte->val & (1 << 7));
332 }
333 
first_pte_in_page(struct dma_pte * pte)334 static inline int first_pte_in_page(struct dma_pte *pte)
335 {
336 	return !((unsigned long)pte & ~VTD_PAGE_MASK);
337 }
338 
339 /*
340  * This domain is a statically identity mapping domain.
341  *	1. This domain creats a static 1:1 mapping to all usable memory.
342  * 	2. It maps to each iommu if successful.
343  *	3. Each iommu mapps to this domain if successful.
344  */
345 static struct dmar_domain *si_domain;
346 static int hw_pass_through = 1;
347 
348 /* devices under the same p2p bridge are owned in one domain */
349 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
350 
351 /* domain represents a virtual machine, more than one devices
352  * across iommus may be owned in one domain, e.g. kvm guest.
353  */
354 #define DOMAIN_FLAG_VIRTUAL_MACHINE	(1 << 1)
355 
356 /* si_domain contains mulitple devices */
357 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 2)
358 
359 struct dmar_domain {
360 	int	id;			/* domain id */
361 	int	nid;			/* node id */
362 	unsigned long iommu_bmp;	/* bitmap of iommus this domain uses*/
363 
364 	struct list_head devices; 	/* all devices' list */
365 	struct iova_domain iovad;	/* iova's that belong to this domain */
366 
367 	struct dma_pte	*pgd;		/* virtual address */
368 	int		gaw;		/* max guest address width */
369 
370 	/* adjusted guest address width, 0 is level 2 30-bit */
371 	int		agaw;
372 
373 	int		flags;		/* flags to find out type of domain */
374 
375 	int		iommu_coherency;/* indicate coherency of iommu access */
376 	int		iommu_snooping; /* indicate snooping control feature*/
377 	int		iommu_count;	/* reference count of iommu */
378 	int		iommu_superpage;/* Level of superpages supported:
379 					   0 == 4KiB (no superpages), 1 == 2MiB,
380 					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
381 	spinlock_t	iommu_lock;	/* protect iommu set in domain */
382 	u64		max_addr;	/* maximum mapped address */
383 };
384 
385 /* PCI domain-device relationship */
386 struct device_domain_info {
387 	struct list_head link;	/* link to domain siblings */
388 	struct list_head global; /* link to global list */
389 	int segment;		/* PCI domain */
390 	u8 bus;			/* PCI bus number */
391 	u8 devfn;		/* PCI devfn number */
392 	struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
393 	struct intel_iommu *iommu; /* IOMMU used by this device */
394 	struct dmar_domain *domain; /* pointer to domain */
395 };
396 
397 static void flush_unmaps_timeout(unsigned long data);
398 
399 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
400 
401 #define HIGH_WATER_MARK 250
402 struct deferred_flush_tables {
403 	int next;
404 	struct iova *iova[HIGH_WATER_MARK];
405 	struct dmar_domain *domain[HIGH_WATER_MARK];
406 };
407 
408 static struct deferred_flush_tables *deferred_flush;
409 
410 /* bitmap for indexing intel_iommus */
411 static int g_num_of_iommus;
412 
413 static DEFINE_SPINLOCK(async_umap_flush_lock);
414 static LIST_HEAD(unmaps_to_do);
415 
416 static int timer_on;
417 static long list_size;
418 
419 static void domain_remove_dev_info(struct dmar_domain *domain);
420 
421 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
422 int dmar_disabled = 0;
423 #else
424 int dmar_disabled = 1;
425 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
426 
427 int intel_iommu_enabled = 0;
428 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
429 
430 static int dmar_map_gfx = 1;
431 static int dmar_forcedac;
432 static int intel_iommu_strict;
433 static int intel_iommu_superpage = 1;
434 
435 int intel_iommu_gfx_mapped;
436 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
437 
438 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
439 static DEFINE_SPINLOCK(device_domain_lock);
440 static LIST_HEAD(device_domain_list);
441 
442 static struct iommu_ops intel_iommu_ops;
443 
intel_iommu_setup(char * str)444 static int __init intel_iommu_setup(char *str)
445 {
446 	if (!str)
447 		return -EINVAL;
448 	while (*str) {
449 		if (!strncmp(str, "on", 2)) {
450 			dmar_disabled = 0;
451 			printk(KERN_INFO "Intel-IOMMU: enabled\n");
452 		} else if (!strncmp(str, "off", 3)) {
453 			dmar_disabled = 1;
454 			printk(KERN_INFO "Intel-IOMMU: disabled\n");
455 		} else if (!strncmp(str, "igfx_off", 8)) {
456 			dmar_map_gfx = 0;
457 			printk(KERN_INFO
458 				"Intel-IOMMU: disable GFX device mapping\n");
459 		} else if (!strncmp(str, "forcedac", 8)) {
460 			printk(KERN_INFO
461 				"Intel-IOMMU: Forcing DAC for PCI devices\n");
462 			dmar_forcedac = 1;
463 		} else if (!strncmp(str, "strict", 6)) {
464 			printk(KERN_INFO
465 				"Intel-IOMMU: disable batched IOTLB flush\n");
466 			intel_iommu_strict = 1;
467 		} else if (!strncmp(str, "sp_off", 6)) {
468 			printk(KERN_INFO
469 				"Intel-IOMMU: disable supported super page\n");
470 			intel_iommu_superpage = 0;
471 		}
472 
473 		str += strcspn(str, ",");
474 		while (*str == ',')
475 			str++;
476 	}
477 	return 0;
478 }
479 __setup("intel_iommu=", intel_iommu_setup);
480 
481 static struct kmem_cache *iommu_domain_cache;
482 static struct kmem_cache *iommu_devinfo_cache;
483 static struct kmem_cache *iommu_iova_cache;
484 
alloc_pgtable_page(int node)485 static inline void *alloc_pgtable_page(int node)
486 {
487 	struct page *page;
488 	void *vaddr = NULL;
489 
490 	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
491 	if (page)
492 		vaddr = page_address(page);
493 	return vaddr;
494 }
495 
free_pgtable_page(void * vaddr)496 static inline void free_pgtable_page(void *vaddr)
497 {
498 	free_page((unsigned long)vaddr);
499 }
500 
alloc_domain_mem(void)501 static inline void *alloc_domain_mem(void)
502 {
503 	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
504 }
505 
free_domain_mem(void * vaddr)506 static void free_domain_mem(void *vaddr)
507 {
508 	kmem_cache_free(iommu_domain_cache, vaddr);
509 }
510 
alloc_devinfo_mem(void)511 static inline void * alloc_devinfo_mem(void)
512 {
513 	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
514 }
515 
free_devinfo_mem(void * vaddr)516 static inline void free_devinfo_mem(void *vaddr)
517 {
518 	kmem_cache_free(iommu_devinfo_cache, vaddr);
519 }
520 
alloc_iova_mem(void)521 struct iova *alloc_iova_mem(void)
522 {
523 	return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
524 }
525 
free_iova_mem(struct iova * iova)526 void free_iova_mem(struct iova *iova)
527 {
528 	kmem_cache_free(iommu_iova_cache, iova);
529 }
530 
531 
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)532 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
533 {
534 	unsigned long sagaw;
535 	int agaw = -1;
536 
537 	sagaw = cap_sagaw(iommu->cap);
538 	for (agaw = width_to_agaw(max_gaw);
539 	     agaw >= 0; agaw--) {
540 		if (test_bit(agaw, &sagaw))
541 			break;
542 	}
543 
544 	return agaw;
545 }
546 
547 /*
548  * Calculate max SAGAW for each iommu.
549  */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)550 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
551 {
552 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
553 }
554 
555 /*
556  * calculate agaw for each iommu.
557  * "SAGAW" may be different across iommus, use a default agaw, and
558  * get a supported less agaw for iommus that don't support the default agaw.
559  */
iommu_calculate_agaw(struct intel_iommu * iommu)560 int iommu_calculate_agaw(struct intel_iommu *iommu)
561 {
562 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
563 }
564 
565 /* This functionin only returns single iommu in a domain */
domain_get_iommu(struct dmar_domain * domain)566 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
567 {
568 	int iommu_id;
569 
570 	/* si_domain and vm domain should not get here. */
571 	BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
572 	BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
573 
574 	iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
575 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
576 		return NULL;
577 
578 	return g_iommus[iommu_id];
579 }
580 
domain_update_iommu_coherency(struct dmar_domain * domain)581 static void domain_update_iommu_coherency(struct dmar_domain *domain)
582 {
583 	int i;
584 
585 	domain->iommu_coherency = 1;
586 
587 	for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
588 		if (!ecap_coherent(g_iommus[i]->ecap)) {
589 			domain->iommu_coherency = 0;
590 			break;
591 		}
592 	}
593 }
594 
domain_update_iommu_snooping(struct dmar_domain * domain)595 static void domain_update_iommu_snooping(struct dmar_domain *domain)
596 {
597 	int i;
598 
599 	domain->iommu_snooping = 1;
600 
601 	for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
602 		if (!ecap_sc_support(g_iommus[i]->ecap)) {
603 			domain->iommu_snooping = 0;
604 			break;
605 		}
606 	}
607 }
608 
domain_update_iommu_superpage(struct dmar_domain * domain)609 static void domain_update_iommu_superpage(struct dmar_domain *domain)
610 {
611 	struct dmar_drhd_unit *drhd;
612 	struct intel_iommu *iommu = NULL;
613 	int mask = 0xf;
614 
615 	if (!intel_iommu_superpage) {
616 		domain->iommu_superpage = 0;
617 		return;
618 	}
619 
620 	/* set iommu_superpage to the smallest common denominator */
621 	for_each_active_iommu(iommu, drhd) {
622 		mask &= cap_super_page_val(iommu->cap);
623 		if (!mask) {
624 			break;
625 		}
626 	}
627 	domain->iommu_superpage = fls(mask);
628 }
629 
630 /* Some capabilities may be different across iommus */
domain_update_iommu_cap(struct dmar_domain * domain)631 static void domain_update_iommu_cap(struct dmar_domain *domain)
632 {
633 	domain_update_iommu_coherency(domain);
634 	domain_update_iommu_snooping(domain);
635 	domain_update_iommu_superpage(domain);
636 }
637 
device_to_iommu(int segment,u8 bus,u8 devfn)638 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
639 {
640 	struct dmar_drhd_unit *drhd = NULL;
641 	int i;
642 
643 	for_each_drhd_unit(drhd) {
644 		if (drhd->ignored)
645 			continue;
646 		if (segment != drhd->segment)
647 			continue;
648 
649 		for (i = 0; i < drhd->devices_cnt; i++) {
650 			if (drhd->devices[i] &&
651 			    drhd->devices[i]->bus->number == bus &&
652 			    drhd->devices[i]->devfn == devfn)
653 				return drhd->iommu;
654 			if (drhd->devices[i] &&
655 			    drhd->devices[i]->subordinate &&
656 			    drhd->devices[i]->subordinate->number <= bus &&
657 			    drhd->devices[i]->subordinate->subordinate >= bus)
658 				return drhd->iommu;
659 		}
660 
661 		if (drhd->include_all)
662 			return drhd->iommu;
663 	}
664 
665 	return NULL;
666 }
667 
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)668 static void domain_flush_cache(struct dmar_domain *domain,
669 			       void *addr, int size)
670 {
671 	if (!domain->iommu_coherency)
672 		clflush_cache_range(addr, size);
673 }
674 
675 /* Gets context entry for a given bus and devfn */
device_to_context_entry(struct intel_iommu * iommu,u8 bus,u8 devfn)676 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
677 		u8 bus, u8 devfn)
678 {
679 	struct root_entry *root;
680 	struct context_entry *context;
681 	unsigned long phy_addr;
682 	unsigned long flags;
683 
684 	spin_lock_irqsave(&iommu->lock, flags);
685 	root = &iommu->root_entry[bus];
686 	context = get_context_addr_from_root(root);
687 	if (!context) {
688 		context = (struct context_entry *)
689 				alloc_pgtable_page(iommu->node);
690 		if (!context) {
691 			spin_unlock_irqrestore(&iommu->lock, flags);
692 			return NULL;
693 		}
694 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
695 		phy_addr = virt_to_phys((void *)context);
696 		set_root_value(root, phy_addr);
697 		set_root_present(root);
698 		__iommu_flush_cache(iommu, root, sizeof(*root));
699 	}
700 	spin_unlock_irqrestore(&iommu->lock, flags);
701 	return &context[devfn];
702 }
703 
device_context_mapped(struct intel_iommu * iommu,u8 bus,u8 devfn)704 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
705 {
706 	struct root_entry *root;
707 	struct context_entry *context;
708 	int ret;
709 	unsigned long flags;
710 
711 	spin_lock_irqsave(&iommu->lock, flags);
712 	root = &iommu->root_entry[bus];
713 	context = get_context_addr_from_root(root);
714 	if (!context) {
715 		ret = 0;
716 		goto out;
717 	}
718 	ret = context_present(&context[devfn]);
719 out:
720 	spin_unlock_irqrestore(&iommu->lock, flags);
721 	return ret;
722 }
723 
clear_context_table(struct intel_iommu * iommu,u8 bus,u8 devfn)724 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
725 {
726 	struct root_entry *root;
727 	struct context_entry *context;
728 	unsigned long flags;
729 
730 	spin_lock_irqsave(&iommu->lock, flags);
731 	root = &iommu->root_entry[bus];
732 	context = get_context_addr_from_root(root);
733 	if (context) {
734 		context_clear_entry(&context[devfn]);
735 		__iommu_flush_cache(iommu, &context[devfn], \
736 			sizeof(*context));
737 	}
738 	spin_unlock_irqrestore(&iommu->lock, flags);
739 }
740 
free_context_table(struct intel_iommu * iommu)741 static void free_context_table(struct intel_iommu *iommu)
742 {
743 	struct root_entry *root;
744 	int i;
745 	unsigned long flags;
746 	struct context_entry *context;
747 
748 	spin_lock_irqsave(&iommu->lock, flags);
749 	if (!iommu->root_entry) {
750 		goto out;
751 	}
752 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
753 		root = &iommu->root_entry[i];
754 		context = get_context_addr_from_root(root);
755 		if (context)
756 			free_pgtable_page(context);
757 	}
758 	free_pgtable_page(iommu->root_entry);
759 	iommu->root_entry = NULL;
760 out:
761 	spin_unlock_irqrestore(&iommu->lock, flags);
762 }
763 
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int target_level)764 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
765 				      unsigned long pfn, int target_level)
766 {
767 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
768 	struct dma_pte *parent, *pte = NULL;
769 	int level = agaw_to_level(domain->agaw);
770 	int offset;
771 
772 	BUG_ON(!domain->pgd);
773 	BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
774 	parent = domain->pgd;
775 
776 	while (level > 0) {
777 		void *tmp_page;
778 
779 		offset = pfn_level_offset(pfn, level);
780 		pte = &parent[offset];
781 		if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
782 			break;
783 		if (level == target_level)
784 			break;
785 
786 		if (!dma_pte_present(pte)) {
787 			uint64_t pteval;
788 
789 			tmp_page = alloc_pgtable_page(domain->nid);
790 
791 			if (!tmp_page)
792 				return NULL;
793 
794 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
795 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
796 			if (cmpxchg64(&pte->val, 0ULL, pteval)) {
797 				/* Someone else set it while we were thinking; use theirs. */
798 				free_pgtable_page(tmp_page);
799 			} else {
800 				dma_pte_addr(pte);
801 				domain_flush_cache(domain, pte, sizeof(*pte));
802 			}
803 		}
804 		parent = phys_to_virt(dma_pte_addr(pte));
805 		level--;
806 	}
807 
808 	return pte;
809 }
810 
811 
812 /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)813 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
814 					 unsigned long pfn,
815 					 int level, int *large_page)
816 {
817 	struct dma_pte *parent, *pte = NULL;
818 	int total = agaw_to_level(domain->agaw);
819 	int offset;
820 
821 	parent = domain->pgd;
822 	while (level <= total) {
823 		offset = pfn_level_offset(pfn, total);
824 		pte = &parent[offset];
825 		if (level == total)
826 			return pte;
827 
828 		if (!dma_pte_present(pte)) {
829 			*large_page = total;
830 			break;
831 		}
832 
833 		if (pte->val & DMA_PTE_LARGE_PAGE) {
834 			*large_page = total;
835 			return pte;
836 		}
837 
838 		parent = phys_to_virt(dma_pte_addr(pte));
839 		total--;
840 	}
841 	return NULL;
842 }
843 
844 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)845 static int dma_pte_clear_range(struct dmar_domain *domain,
846 				unsigned long start_pfn,
847 				unsigned long last_pfn)
848 {
849 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
850 	unsigned int large_page = 1;
851 	struct dma_pte *first_pte, *pte;
852 	int order;
853 
854 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
855 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
856 	BUG_ON(start_pfn > last_pfn);
857 
858 	/* we don't need lock here; nobody else touches the iova range */
859 	do {
860 		large_page = 1;
861 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
862 		if (!pte) {
863 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
864 			continue;
865 		}
866 		do {
867 			dma_clear_pte(pte);
868 			start_pfn += lvl_to_nr_pages(large_page);
869 			pte++;
870 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
871 
872 		domain_flush_cache(domain, first_pte,
873 				   (void *)pte - (void *)first_pte);
874 
875 	} while (start_pfn && start_pfn <= last_pfn);
876 
877 	order = (large_page - 1) * 9;
878 	return order;
879 }
880 
881 /* free page table pages. last level pte should already be cleared */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)882 static void dma_pte_free_pagetable(struct dmar_domain *domain,
883 				   unsigned long start_pfn,
884 				   unsigned long last_pfn)
885 {
886 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
887 	struct dma_pte *first_pte, *pte;
888 	int total = agaw_to_level(domain->agaw);
889 	int level;
890 	unsigned long tmp;
891 	int large_page = 2;
892 
893 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
894 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
895 	BUG_ON(start_pfn > last_pfn);
896 
897 	/* We don't need lock here; nobody else touches the iova range */
898 	level = 2;
899 	while (level <= total) {
900 		tmp = align_to_level(start_pfn, level);
901 
902 		/* If we can't even clear one PTE at this level, we're done */
903 		if (tmp + level_size(level) - 1 > last_pfn)
904 			return;
905 
906 		do {
907 			large_page = level;
908 			first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
909 			if (large_page > level)
910 				level = large_page + 1;
911 			if (!pte) {
912 				tmp = align_to_level(tmp + 1, level + 1);
913 				continue;
914 			}
915 			do {
916 				if (dma_pte_present(pte)) {
917 					free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
918 					dma_clear_pte(pte);
919 				}
920 				pte++;
921 				tmp += level_size(level);
922 			} while (!first_pte_in_page(pte) &&
923 				 tmp + level_size(level) - 1 <= last_pfn);
924 
925 			domain_flush_cache(domain, first_pte,
926 					   (void *)pte - (void *)first_pte);
927 
928 		} while (tmp && tmp + level_size(level) - 1 <= last_pfn);
929 		level++;
930 	}
931 	/* free pgd */
932 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
933 		free_pgtable_page(domain->pgd);
934 		domain->pgd = NULL;
935 	}
936 }
937 
938 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)939 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
940 {
941 	struct root_entry *root;
942 	unsigned long flags;
943 
944 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
945 	if (!root)
946 		return -ENOMEM;
947 
948 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
949 
950 	spin_lock_irqsave(&iommu->lock, flags);
951 	iommu->root_entry = root;
952 	spin_unlock_irqrestore(&iommu->lock, flags);
953 
954 	return 0;
955 }
956 
iommu_set_root_entry(struct intel_iommu * iommu)957 static void iommu_set_root_entry(struct intel_iommu *iommu)
958 {
959 	void *addr;
960 	u32 sts;
961 	unsigned long flag;
962 
963 	addr = iommu->root_entry;
964 
965 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
966 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
967 
968 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
969 
970 	/* Make sure hardware complete it */
971 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
972 		      readl, (sts & DMA_GSTS_RTPS), sts);
973 
974 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
975 }
976 
iommu_flush_write_buffer(struct intel_iommu * iommu)977 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
978 {
979 	u32 val;
980 	unsigned long flag;
981 
982 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
983 		return;
984 
985 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
986 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
987 
988 	/* Make sure hardware complete it */
989 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
990 		      readl, (!(val & DMA_GSTS_WBFS)), val);
991 
992 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
993 }
994 
995 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)996 static void __iommu_flush_context(struct intel_iommu *iommu,
997 				  u16 did, u16 source_id, u8 function_mask,
998 				  u64 type)
999 {
1000 	u64 val = 0;
1001 	unsigned long flag;
1002 
1003 	switch (type) {
1004 	case DMA_CCMD_GLOBAL_INVL:
1005 		val = DMA_CCMD_GLOBAL_INVL;
1006 		break;
1007 	case DMA_CCMD_DOMAIN_INVL:
1008 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1009 		break;
1010 	case DMA_CCMD_DEVICE_INVL:
1011 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1012 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1013 		break;
1014 	default:
1015 		BUG();
1016 	}
1017 	val |= DMA_CCMD_ICC;
1018 
1019 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1020 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1021 
1022 	/* Make sure hardware complete it */
1023 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1024 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1025 
1026 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1027 }
1028 
1029 /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1030 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1031 				u64 addr, unsigned int size_order, u64 type)
1032 {
1033 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1034 	u64 val = 0, val_iva = 0;
1035 	unsigned long flag;
1036 
1037 	switch (type) {
1038 	case DMA_TLB_GLOBAL_FLUSH:
1039 		/* global flush doesn't need set IVA_REG */
1040 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1041 		break;
1042 	case DMA_TLB_DSI_FLUSH:
1043 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1044 		break;
1045 	case DMA_TLB_PSI_FLUSH:
1046 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1047 		/* Note: always flush non-leaf currently */
1048 		val_iva = size_order | addr;
1049 		break;
1050 	default:
1051 		BUG();
1052 	}
1053 	/* Note: set drain read/write */
1054 #if 0
1055 	/*
1056 	 * This is probably to be super secure.. Looks like we can
1057 	 * ignore it without any impact.
1058 	 */
1059 	if (cap_read_drain(iommu->cap))
1060 		val |= DMA_TLB_READ_DRAIN;
1061 #endif
1062 	if (cap_write_drain(iommu->cap))
1063 		val |= DMA_TLB_WRITE_DRAIN;
1064 
1065 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1066 	/* Note: Only uses first TLB reg currently */
1067 	if (val_iva)
1068 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1069 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1070 
1071 	/* Make sure hardware complete it */
1072 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1073 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1074 
1075 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1076 
1077 	/* check IOTLB invalidation granularity */
1078 	if (DMA_TLB_IAIG(val) == 0)
1079 		printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1080 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1081 		pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1082 			(unsigned long long)DMA_TLB_IIRG(type),
1083 			(unsigned long long)DMA_TLB_IAIG(val));
1084 }
1085 
iommu_support_dev_iotlb(struct dmar_domain * domain,int segment,u8 bus,u8 devfn)1086 static struct device_domain_info *iommu_support_dev_iotlb(
1087 	struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1088 {
1089 	int found = 0;
1090 	unsigned long flags;
1091 	struct device_domain_info *info;
1092 	struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1093 
1094 	if (!ecap_dev_iotlb_support(iommu->ecap))
1095 		return NULL;
1096 
1097 	if (!iommu->qi)
1098 		return NULL;
1099 
1100 	spin_lock_irqsave(&device_domain_lock, flags);
1101 	list_for_each_entry(info, &domain->devices, link)
1102 		if (info->bus == bus && info->devfn == devfn) {
1103 			found = 1;
1104 			break;
1105 		}
1106 	spin_unlock_irqrestore(&device_domain_lock, flags);
1107 
1108 	if (!found || !info->dev)
1109 		return NULL;
1110 
1111 	if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1112 		return NULL;
1113 
1114 	if (!dmar_find_matched_atsr_unit(info->dev))
1115 		return NULL;
1116 
1117 	info->iommu = iommu;
1118 
1119 	return info;
1120 }
1121 
iommu_enable_dev_iotlb(struct device_domain_info * info)1122 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1123 {
1124 	if (!info)
1125 		return;
1126 
1127 	pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1128 }
1129 
iommu_disable_dev_iotlb(struct device_domain_info * info)1130 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1131 {
1132 	if (!info->dev || !pci_ats_enabled(info->dev))
1133 		return;
1134 
1135 	pci_disable_ats(info->dev);
1136 }
1137 
iommu_flush_dev_iotlb(struct dmar_domain * domain,u64 addr,unsigned mask)1138 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1139 				  u64 addr, unsigned mask)
1140 {
1141 	u16 sid, qdep;
1142 	unsigned long flags;
1143 	struct device_domain_info *info;
1144 
1145 	spin_lock_irqsave(&device_domain_lock, flags);
1146 	list_for_each_entry(info, &domain->devices, link) {
1147 		if (!info->dev || !pci_ats_enabled(info->dev))
1148 			continue;
1149 
1150 		sid = info->bus << 8 | info->devfn;
1151 		qdep = pci_ats_queue_depth(info->dev);
1152 		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1153 	}
1154 	spin_unlock_irqrestore(&device_domain_lock, flags);
1155 }
1156 
iommu_flush_iotlb_psi(struct intel_iommu * iommu,u16 did,unsigned long pfn,unsigned int pages,int map)1157 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1158 				  unsigned long pfn, unsigned int pages, int map)
1159 {
1160 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1161 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1162 
1163 	BUG_ON(pages == 0);
1164 
1165 	/*
1166 	 * Fallback to domain selective flush if no PSI support or the size is
1167 	 * too big.
1168 	 * PSI requires page size to be 2 ^ x, and the base address is naturally
1169 	 * aligned to the size
1170 	 */
1171 	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1172 		iommu->flush.flush_iotlb(iommu, did, 0, 0,
1173 						DMA_TLB_DSI_FLUSH);
1174 	else
1175 		iommu->flush.flush_iotlb(iommu, did, addr, mask,
1176 						DMA_TLB_PSI_FLUSH);
1177 
1178 	/*
1179 	 * In caching mode, changes of pages from non-present to present require
1180 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1181 	 */
1182 	if (!cap_caching_mode(iommu->cap) || !map)
1183 		iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1184 }
1185 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1186 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1187 {
1188 	u32 pmen;
1189 	unsigned long flags;
1190 
1191 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1192 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1193 	pmen &= ~DMA_PMEN_EPM;
1194 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1195 
1196 	/* wait for the protected region status bit to clear */
1197 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1198 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1199 
1200 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1201 }
1202 
iommu_enable_translation(struct intel_iommu * iommu)1203 static int iommu_enable_translation(struct intel_iommu *iommu)
1204 {
1205 	u32 sts;
1206 	unsigned long flags;
1207 
1208 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1209 	iommu->gcmd |= DMA_GCMD_TE;
1210 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1211 
1212 	/* Make sure hardware complete it */
1213 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1214 		      readl, (sts & DMA_GSTS_TES), sts);
1215 
1216 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1217 	return 0;
1218 }
1219 
iommu_disable_translation(struct intel_iommu * iommu)1220 static int iommu_disable_translation(struct intel_iommu *iommu)
1221 {
1222 	u32 sts;
1223 	unsigned long flag;
1224 
1225 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1226 	iommu->gcmd &= ~DMA_GCMD_TE;
1227 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1228 
1229 	/* Make sure hardware complete it */
1230 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1231 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1232 
1233 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1234 	return 0;
1235 }
1236 
1237 
iommu_init_domains(struct intel_iommu * iommu)1238 static int iommu_init_domains(struct intel_iommu *iommu)
1239 {
1240 	unsigned long ndomains;
1241 	unsigned long nlongs;
1242 
1243 	ndomains = cap_ndoms(iommu->cap);
1244 	pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1245 			ndomains);
1246 	nlongs = BITS_TO_LONGS(ndomains);
1247 
1248 	spin_lock_init(&iommu->lock);
1249 
1250 	/* TBD: there might be 64K domains,
1251 	 * consider other allocation for future chip
1252 	 */
1253 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1254 	if (!iommu->domain_ids) {
1255 		printk(KERN_ERR "Allocating domain id array failed\n");
1256 		return -ENOMEM;
1257 	}
1258 	iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1259 			GFP_KERNEL);
1260 	if (!iommu->domains) {
1261 		printk(KERN_ERR "Allocating domain array failed\n");
1262 		return -ENOMEM;
1263 	}
1264 
1265 	/*
1266 	 * if Caching mode is set, then invalid translations are tagged
1267 	 * with domainid 0. Hence we need to pre-allocate it.
1268 	 */
1269 	if (cap_caching_mode(iommu->cap))
1270 		set_bit(0, iommu->domain_ids);
1271 	return 0;
1272 }
1273 
1274 
1275 static void domain_exit(struct dmar_domain *domain);
1276 static void vm_domain_exit(struct dmar_domain *domain);
1277 
free_dmar_iommu(struct intel_iommu * iommu)1278 void free_dmar_iommu(struct intel_iommu *iommu)
1279 {
1280 	struct dmar_domain *domain;
1281 	int i;
1282 	unsigned long flags;
1283 
1284 	if ((iommu->domains) && (iommu->domain_ids)) {
1285 		for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1286 			domain = iommu->domains[i];
1287 			clear_bit(i, iommu->domain_ids);
1288 
1289 			spin_lock_irqsave(&domain->iommu_lock, flags);
1290 			if (--domain->iommu_count == 0) {
1291 				if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1292 					vm_domain_exit(domain);
1293 				else
1294 					domain_exit(domain);
1295 			}
1296 			spin_unlock_irqrestore(&domain->iommu_lock, flags);
1297 		}
1298 	}
1299 
1300 	if (iommu->gcmd & DMA_GCMD_TE)
1301 		iommu_disable_translation(iommu);
1302 
1303 	if (iommu->irq) {
1304 		irq_set_handler_data(iommu->irq, NULL);
1305 		/* This will mask the irq */
1306 		free_irq(iommu->irq, iommu);
1307 		destroy_irq(iommu->irq);
1308 	}
1309 
1310 	kfree(iommu->domains);
1311 	kfree(iommu->domain_ids);
1312 
1313 	g_iommus[iommu->seq_id] = NULL;
1314 
1315 	/* if all iommus are freed, free g_iommus */
1316 	for (i = 0; i < g_num_of_iommus; i++) {
1317 		if (g_iommus[i])
1318 			break;
1319 	}
1320 
1321 	if (i == g_num_of_iommus)
1322 		kfree(g_iommus);
1323 
1324 	/* free context mapping */
1325 	free_context_table(iommu);
1326 }
1327 
alloc_domain(void)1328 static struct dmar_domain *alloc_domain(void)
1329 {
1330 	struct dmar_domain *domain;
1331 
1332 	domain = alloc_domain_mem();
1333 	if (!domain)
1334 		return NULL;
1335 
1336 	domain->nid = -1;
1337 	memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1338 	domain->flags = 0;
1339 
1340 	return domain;
1341 }
1342 
iommu_attach_domain(struct dmar_domain * domain,struct intel_iommu * iommu)1343 static int iommu_attach_domain(struct dmar_domain *domain,
1344 			       struct intel_iommu *iommu)
1345 {
1346 	int num;
1347 	unsigned long ndomains;
1348 	unsigned long flags;
1349 
1350 	ndomains = cap_ndoms(iommu->cap);
1351 
1352 	spin_lock_irqsave(&iommu->lock, flags);
1353 
1354 	num = find_first_zero_bit(iommu->domain_ids, ndomains);
1355 	if (num >= ndomains) {
1356 		spin_unlock_irqrestore(&iommu->lock, flags);
1357 		printk(KERN_ERR "IOMMU: no free domain ids\n");
1358 		return -ENOMEM;
1359 	}
1360 
1361 	domain->id = num;
1362 	set_bit(num, iommu->domain_ids);
1363 	set_bit(iommu->seq_id, &domain->iommu_bmp);
1364 	iommu->domains[num] = domain;
1365 	spin_unlock_irqrestore(&iommu->lock, flags);
1366 
1367 	return 0;
1368 }
1369 
iommu_detach_domain(struct dmar_domain * domain,struct intel_iommu * iommu)1370 static void iommu_detach_domain(struct dmar_domain *domain,
1371 				struct intel_iommu *iommu)
1372 {
1373 	unsigned long flags;
1374 	int num, ndomains;
1375 	int found = 0;
1376 
1377 	spin_lock_irqsave(&iommu->lock, flags);
1378 	ndomains = cap_ndoms(iommu->cap);
1379 	for_each_set_bit(num, iommu->domain_ids, ndomains) {
1380 		if (iommu->domains[num] == domain) {
1381 			found = 1;
1382 			break;
1383 		}
1384 	}
1385 
1386 	if (found) {
1387 		clear_bit(num, iommu->domain_ids);
1388 		clear_bit(iommu->seq_id, &domain->iommu_bmp);
1389 		iommu->domains[num] = NULL;
1390 	}
1391 	spin_unlock_irqrestore(&iommu->lock, flags);
1392 }
1393 
1394 static struct iova_domain reserved_iova_list;
1395 static struct lock_class_key reserved_rbtree_key;
1396 
dmar_init_reserved_ranges(void)1397 static int dmar_init_reserved_ranges(void)
1398 {
1399 	struct pci_dev *pdev = NULL;
1400 	struct iova *iova;
1401 	int i;
1402 
1403 	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1404 
1405 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1406 		&reserved_rbtree_key);
1407 
1408 	/* IOAPIC ranges shouldn't be accessed by DMA */
1409 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1410 		IOVA_PFN(IOAPIC_RANGE_END));
1411 	if (!iova) {
1412 		printk(KERN_ERR "Reserve IOAPIC range failed\n");
1413 		return -ENODEV;
1414 	}
1415 
1416 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1417 	for_each_pci_dev(pdev) {
1418 		struct resource *r;
1419 
1420 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1421 			r = &pdev->resource[i];
1422 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1423 				continue;
1424 			iova = reserve_iova(&reserved_iova_list,
1425 					    IOVA_PFN(r->start),
1426 					    IOVA_PFN(r->end));
1427 			if (!iova) {
1428 				printk(KERN_ERR "Reserve iova failed\n");
1429 				return -ENODEV;
1430 			}
1431 		}
1432 	}
1433 	return 0;
1434 }
1435 
domain_reserve_special_ranges(struct dmar_domain * domain)1436 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1437 {
1438 	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1439 }
1440 
guestwidth_to_adjustwidth(int gaw)1441 static inline int guestwidth_to_adjustwidth(int gaw)
1442 {
1443 	int agaw;
1444 	int r = (gaw - 12) % 9;
1445 
1446 	if (r == 0)
1447 		agaw = gaw;
1448 	else
1449 		agaw = gaw + 9 - r;
1450 	if (agaw > 64)
1451 		agaw = 64;
1452 	return agaw;
1453 }
1454 
domain_init(struct dmar_domain * domain,int guest_width)1455 static int domain_init(struct dmar_domain *domain, int guest_width)
1456 {
1457 	struct intel_iommu *iommu;
1458 	int adjust_width, agaw;
1459 	unsigned long sagaw;
1460 
1461 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1462 	spin_lock_init(&domain->iommu_lock);
1463 
1464 	domain_reserve_special_ranges(domain);
1465 
1466 	/* calculate AGAW */
1467 	iommu = domain_get_iommu(domain);
1468 	if (guest_width > cap_mgaw(iommu->cap))
1469 		guest_width = cap_mgaw(iommu->cap);
1470 	domain->gaw = guest_width;
1471 	adjust_width = guestwidth_to_adjustwidth(guest_width);
1472 	agaw = width_to_agaw(adjust_width);
1473 	sagaw = cap_sagaw(iommu->cap);
1474 	if (!test_bit(agaw, &sagaw)) {
1475 		/* hardware doesn't support it, choose a bigger one */
1476 		pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1477 		agaw = find_next_bit(&sagaw, 5, agaw);
1478 		if (agaw >= 5)
1479 			return -ENODEV;
1480 	}
1481 	domain->agaw = agaw;
1482 	INIT_LIST_HEAD(&domain->devices);
1483 
1484 	if (ecap_coherent(iommu->ecap))
1485 		domain->iommu_coherency = 1;
1486 	else
1487 		domain->iommu_coherency = 0;
1488 
1489 	if (ecap_sc_support(iommu->ecap))
1490 		domain->iommu_snooping = 1;
1491 	else
1492 		domain->iommu_snooping = 0;
1493 
1494 	domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1495 	domain->iommu_count = 1;
1496 	domain->nid = iommu->node;
1497 
1498 	/* always allocate the top pgd */
1499 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1500 	if (!domain->pgd)
1501 		return -ENOMEM;
1502 	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1503 	return 0;
1504 }
1505 
domain_exit(struct dmar_domain * domain)1506 static void domain_exit(struct dmar_domain *domain)
1507 {
1508 	struct dmar_drhd_unit *drhd;
1509 	struct intel_iommu *iommu;
1510 
1511 	/* Domain 0 is reserved, so dont process it */
1512 	if (!domain)
1513 		return;
1514 
1515 	/* Flush any lazy unmaps that may reference this domain */
1516 	if (!intel_iommu_strict)
1517 		flush_unmaps_timeout(0);
1518 
1519 	domain_remove_dev_info(domain);
1520 	/* destroy iovas */
1521 	put_iova_domain(&domain->iovad);
1522 
1523 	/* clear ptes */
1524 	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1525 
1526 	/* free page tables */
1527 	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1528 
1529 	for_each_active_iommu(iommu, drhd)
1530 		if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1531 			iommu_detach_domain(domain, iommu);
1532 
1533 	free_domain_mem(domain);
1534 }
1535 
domain_context_mapping_one(struct dmar_domain * domain,int segment,u8 bus,u8 devfn,int translation)1536 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1537 				 u8 bus, u8 devfn, int translation)
1538 {
1539 	struct context_entry *context;
1540 	unsigned long flags;
1541 	struct intel_iommu *iommu;
1542 	struct dma_pte *pgd;
1543 	unsigned long num;
1544 	unsigned long ndomains;
1545 	int id;
1546 	int agaw;
1547 	struct device_domain_info *info = NULL;
1548 
1549 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1550 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1551 
1552 	BUG_ON(!domain->pgd);
1553 	BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1554 	       translation != CONTEXT_TT_MULTI_LEVEL);
1555 
1556 	iommu = device_to_iommu(segment, bus, devfn);
1557 	if (!iommu)
1558 		return -ENODEV;
1559 
1560 	context = device_to_context_entry(iommu, bus, devfn);
1561 	if (!context)
1562 		return -ENOMEM;
1563 	spin_lock_irqsave(&iommu->lock, flags);
1564 	if (context_present(context)) {
1565 		spin_unlock_irqrestore(&iommu->lock, flags);
1566 		return 0;
1567 	}
1568 
1569 	id = domain->id;
1570 	pgd = domain->pgd;
1571 
1572 	if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1573 	    domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1574 		int found = 0;
1575 
1576 		/* find an available domain id for this device in iommu */
1577 		ndomains = cap_ndoms(iommu->cap);
1578 		for_each_set_bit(num, iommu->domain_ids, ndomains) {
1579 			if (iommu->domains[num] == domain) {
1580 				id = num;
1581 				found = 1;
1582 				break;
1583 			}
1584 		}
1585 
1586 		if (found == 0) {
1587 			num = find_first_zero_bit(iommu->domain_ids, ndomains);
1588 			if (num >= ndomains) {
1589 				spin_unlock_irqrestore(&iommu->lock, flags);
1590 				printk(KERN_ERR "IOMMU: no free domain ids\n");
1591 				return -EFAULT;
1592 			}
1593 
1594 			set_bit(num, iommu->domain_ids);
1595 			iommu->domains[num] = domain;
1596 			id = num;
1597 		}
1598 
1599 		/* Skip top levels of page tables for
1600 		 * iommu which has less agaw than default.
1601 		 * Unnecessary for PT mode.
1602 		 */
1603 		if (translation != CONTEXT_TT_PASS_THROUGH) {
1604 			for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1605 				pgd = phys_to_virt(dma_pte_addr(pgd));
1606 				if (!dma_pte_present(pgd)) {
1607 					spin_unlock_irqrestore(&iommu->lock, flags);
1608 					return -ENOMEM;
1609 				}
1610 			}
1611 		}
1612 	}
1613 
1614 	context_set_domain_id(context, id);
1615 
1616 	if (translation != CONTEXT_TT_PASS_THROUGH) {
1617 		info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1618 		translation = info ? CONTEXT_TT_DEV_IOTLB :
1619 				     CONTEXT_TT_MULTI_LEVEL;
1620 	}
1621 	/*
1622 	 * In pass through mode, AW must be programmed to indicate the largest
1623 	 * AGAW value supported by hardware. And ASR is ignored by hardware.
1624 	 */
1625 	if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1626 		context_set_address_width(context, iommu->msagaw);
1627 	else {
1628 		context_set_address_root(context, virt_to_phys(pgd));
1629 		context_set_address_width(context, iommu->agaw);
1630 	}
1631 
1632 	context_set_translation_type(context, translation);
1633 	context_set_fault_enable(context);
1634 	context_set_present(context);
1635 	domain_flush_cache(domain, context, sizeof(*context));
1636 
1637 	/*
1638 	 * It's a non-present to present mapping. If hardware doesn't cache
1639 	 * non-present entry we only need to flush the write-buffer. If the
1640 	 * _does_ cache non-present entries, then it does so in the special
1641 	 * domain #0, which we have to flush:
1642 	 */
1643 	if (cap_caching_mode(iommu->cap)) {
1644 		iommu->flush.flush_context(iommu, 0,
1645 					   (((u16)bus) << 8) | devfn,
1646 					   DMA_CCMD_MASK_NOBIT,
1647 					   DMA_CCMD_DEVICE_INVL);
1648 		iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1649 	} else {
1650 		iommu_flush_write_buffer(iommu);
1651 	}
1652 	iommu_enable_dev_iotlb(info);
1653 	spin_unlock_irqrestore(&iommu->lock, flags);
1654 
1655 	spin_lock_irqsave(&domain->iommu_lock, flags);
1656 	if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1657 		domain->iommu_count++;
1658 		if (domain->iommu_count == 1)
1659 			domain->nid = iommu->node;
1660 		domain_update_iommu_cap(domain);
1661 	}
1662 	spin_unlock_irqrestore(&domain->iommu_lock, flags);
1663 	return 0;
1664 }
1665 
1666 static int
domain_context_mapping(struct dmar_domain * domain,struct pci_dev * pdev,int translation)1667 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1668 			int translation)
1669 {
1670 	int ret;
1671 	struct pci_dev *tmp, *parent;
1672 
1673 	ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1674 					 pdev->bus->number, pdev->devfn,
1675 					 translation);
1676 	if (ret)
1677 		return ret;
1678 
1679 	/* dependent device mapping */
1680 	tmp = pci_find_upstream_pcie_bridge(pdev);
1681 	if (!tmp)
1682 		return 0;
1683 	/* Secondary interface's bus number and devfn 0 */
1684 	parent = pdev->bus->self;
1685 	while (parent != tmp) {
1686 		ret = domain_context_mapping_one(domain,
1687 						 pci_domain_nr(parent->bus),
1688 						 parent->bus->number,
1689 						 parent->devfn, translation);
1690 		if (ret)
1691 			return ret;
1692 		parent = parent->bus->self;
1693 	}
1694 	if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1695 		return domain_context_mapping_one(domain,
1696 					pci_domain_nr(tmp->subordinate),
1697 					tmp->subordinate->number, 0,
1698 					translation);
1699 	else /* this is a legacy PCI bridge */
1700 		return domain_context_mapping_one(domain,
1701 						  pci_domain_nr(tmp->bus),
1702 						  tmp->bus->number,
1703 						  tmp->devfn,
1704 						  translation);
1705 }
1706 
domain_context_mapped(struct pci_dev * pdev)1707 static int domain_context_mapped(struct pci_dev *pdev)
1708 {
1709 	int ret;
1710 	struct pci_dev *tmp, *parent;
1711 	struct intel_iommu *iommu;
1712 
1713 	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1714 				pdev->devfn);
1715 	if (!iommu)
1716 		return -ENODEV;
1717 
1718 	ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1719 	if (!ret)
1720 		return ret;
1721 	/* dependent device mapping */
1722 	tmp = pci_find_upstream_pcie_bridge(pdev);
1723 	if (!tmp)
1724 		return ret;
1725 	/* Secondary interface's bus number and devfn 0 */
1726 	parent = pdev->bus->self;
1727 	while (parent != tmp) {
1728 		ret = device_context_mapped(iommu, parent->bus->number,
1729 					    parent->devfn);
1730 		if (!ret)
1731 			return ret;
1732 		parent = parent->bus->self;
1733 	}
1734 	if (pci_is_pcie(tmp))
1735 		return device_context_mapped(iommu, tmp->subordinate->number,
1736 					     0);
1737 	else
1738 		return device_context_mapped(iommu, tmp->bus->number,
1739 					     tmp->devfn);
1740 }
1741 
1742 /* Returns a number of VTD pages, but aligned to MM page size */
aligned_nrpages(unsigned long host_addr,size_t size)1743 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1744 					    size_t size)
1745 {
1746 	host_addr &= ~PAGE_MASK;
1747 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1748 }
1749 
1750 /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)1751 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1752 					  unsigned long iov_pfn,
1753 					  unsigned long phy_pfn,
1754 					  unsigned long pages)
1755 {
1756 	int support, level = 1;
1757 	unsigned long pfnmerge;
1758 
1759 	support = domain->iommu_superpage;
1760 
1761 	/* To use a large page, the virtual *and* physical addresses
1762 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1763 	   of them will mean we have to use smaller pages. So just
1764 	   merge them and check both at once. */
1765 	pfnmerge = iov_pfn | phy_pfn;
1766 
1767 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1768 		pages >>= VTD_STRIDE_SHIFT;
1769 		if (!pages)
1770 			break;
1771 		pfnmerge >>= VTD_STRIDE_SHIFT;
1772 		level++;
1773 		support--;
1774 	}
1775 	return level;
1776 }
1777 
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)1778 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1779 			    struct scatterlist *sg, unsigned long phys_pfn,
1780 			    unsigned long nr_pages, int prot)
1781 {
1782 	struct dma_pte *first_pte = NULL, *pte = NULL;
1783 	phys_addr_t uninitialized_var(pteval);
1784 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1785 	unsigned long sg_res;
1786 	unsigned int largepage_lvl = 0;
1787 	unsigned long lvl_pages = 0;
1788 
1789 	BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1790 
1791 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1792 		return -EINVAL;
1793 
1794 	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1795 
1796 	if (sg)
1797 		sg_res = 0;
1798 	else {
1799 		sg_res = nr_pages + 1;
1800 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1801 	}
1802 
1803 	while (nr_pages > 0) {
1804 		uint64_t tmp;
1805 
1806 		if (!sg_res) {
1807 			sg_res = aligned_nrpages(sg->offset, sg->length);
1808 			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1809 			sg->dma_length = sg->length;
1810 			pteval = page_to_phys(sg_page(sg)) | prot;
1811 			phys_pfn = pteval >> VTD_PAGE_SHIFT;
1812 		}
1813 
1814 		if (!pte) {
1815 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1816 
1817 			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1818 			if (!pte)
1819 				return -ENOMEM;
1820 			/* It is large page*/
1821 			if (largepage_lvl > 1)
1822 				pteval |= DMA_PTE_LARGE_PAGE;
1823 			else
1824 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1825 
1826 		}
1827 		/* We don't need lock here, nobody else
1828 		 * touches the iova range
1829 		 */
1830 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1831 		if (tmp) {
1832 			static int dumps = 5;
1833 			printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1834 			       iov_pfn, tmp, (unsigned long long)pteval);
1835 			if (dumps) {
1836 				dumps--;
1837 				debug_dma_dump_mappings(NULL);
1838 			}
1839 			WARN_ON(1);
1840 		}
1841 
1842 		lvl_pages = lvl_to_nr_pages(largepage_lvl);
1843 
1844 		BUG_ON(nr_pages < lvl_pages);
1845 		BUG_ON(sg_res < lvl_pages);
1846 
1847 		nr_pages -= lvl_pages;
1848 		iov_pfn += lvl_pages;
1849 		phys_pfn += lvl_pages;
1850 		pteval += lvl_pages * VTD_PAGE_SIZE;
1851 		sg_res -= lvl_pages;
1852 
1853 		/* If the next PTE would be the first in a new page, then we
1854 		   need to flush the cache on the entries we've just written.
1855 		   And then we'll need to recalculate 'pte', so clear it and
1856 		   let it get set again in the if (!pte) block above.
1857 
1858 		   If we're done (!nr_pages) we need to flush the cache too.
1859 
1860 		   Also if we've been setting superpages, we may need to
1861 		   recalculate 'pte' and switch back to smaller pages for the
1862 		   end of the mapping, if the trailing size is not enough to
1863 		   use another superpage (i.e. sg_res < lvl_pages). */
1864 		pte++;
1865 		if (!nr_pages || first_pte_in_page(pte) ||
1866 		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
1867 			domain_flush_cache(domain, first_pte,
1868 					   (void *)pte - (void *)first_pte);
1869 			pte = NULL;
1870 		}
1871 
1872 		if (!sg_res && nr_pages)
1873 			sg = sg_next(sg);
1874 	}
1875 	return 0;
1876 }
1877 
domain_sg_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long nr_pages,int prot)1878 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1879 				    struct scatterlist *sg, unsigned long nr_pages,
1880 				    int prot)
1881 {
1882 	return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1883 }
1884 
domain_pfn_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot)1885 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1886 				     unsigned long phys_pfn, unsigned long nr_pages,
1887 				     int prot)
1888 {
1889 	return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1890 }
1891 
iommu_detach_dev(struct intel_iommu * iommu,u8 bus,u8 devfn)1892 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1893 {
1894 	if (!iommu)
1895 		return;
1896 
1897 	clear_context_table(iommu, bus, devfn);
1898 	iommu->flush.flush_context(iommu, 0, 0, 0,
1899 					   DMA_CCMD_GLOBAL_INVL);
1900 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1901 }
1902 
domain_remove_dev_info(struct dmar_domain * domain)1903 static void domain_remove_dev_info(struct dmar_domain *domain)
1904 {
1905 	struct device_domain_info *info;
1906 	unsigned long flags;
1907 	struct intel_iommu *iommu;
1908 
1909 	spin_lock_irqsave(&device_domain_lock, flags);
1910 	while (!list_empty(&domain->devices)) {
1911 		info = list_entry(domain->devices.next,
1912 			struct device_domain_info, link);
1913 		list_del(&info->link);
1914 		list_del(&info->global);
1915 		if (info->dev)
1916 			info->dev->dev.archdata.iommu = NULL;
1917 		spin_unlock_irqrestore(&device_domain_lock, flags);
1918 
1919 		iommu_disable_dev_iotlb(info);
1920 		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1921 		iommu_detach_dev(iommu, info->bus, info->devfn);
1922 		free_devinfo_mem(info);
1923 
1924 		spin_lock_irqsave(&device_domain_lock, flags);
1925 	}
1926 	spin_unlock_irqrestore(&device_domain_lock, flags);
1927 }
1928 
1929 /*
1930  * find_domain
1931  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1932  */
1933 static struct dmar_domain *
find_domain(struct pci_dev * pdev)1934 find_domain(struct pci_dev *pdev)
1935 {
1936 	struct device_domain_info *info;
1937 
1938 	/* No lock here, assumes no domain exit in normal case */
1939 	info = pdev->dev.archdata.iommu;
1940 	if (info)
1941 		return info->domain;
1942 	return NULL;
1943 }
1944 
1945 /* domain is initialized */
get_domain_for_dev(struct pci_dev * pdev,int gaw)1946 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1947 {
1948 	struct dmar_domain *domain, *found = NULL;
1949 	struct intel_iommu *iommu;
1950 	struct dmar_drhd_unit *drhd;
1951 	struct device_domain_info *info, *tmp;
1952 	struct pci_dev *dev_tmp;
1953 	unsigned long flags;
1954 	int bus = 0, devfn = 0;
1955 	int segment;
1956 	int ret;
1957 
1958 	domain = find_domain(pdev);
1959 	if (domain)
1960 		return domain;
1961 
1962 	segment = pci_domain_nr(pdev->bus);
1963 
1964 	dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1965 	if (dev_tmp) {
1966 		if (pci_is_pcie(dev_tmp)) {
1967 			bus = dev_tmp->subordinate->number;
1968 			devfn = 0;
1969 		} else {
1970 			bus = dev_tmp->bus->number;
1971 			devfn = dev_tmp->devfn;
1972 		}
1973 		spin_lock_irqsave(&device_domain_lock, flags);
1974 		list_for_each_entry(info, &device_domain_list, global) {
1975 			if (info->segment == segment &&
1976 			    info->bus == bus && info->devfn == devfn) {
1977 				found = info->domain;
1978 				break;
1979 			}
1980 		}
1981 		spin_unlock_irqrestore(&device_domain_lock, flags);
1982 		/* pcie-pci bridge already has a domain, uses it */
1983 		if (found) {
1984 			domain = found;
1985 			goto found_domain;
1986 		}
1987 	}
1988 
1989 	domain = alloc_domain();
1990 	if (!domain)
1991 		goto error;
1992 
1993 	/* Allocate new domain for the device */
1994 	drhd = dmar_find_matched_drhd_unit(pdev);
1995 	if (!drhd) {
1996 		printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1997 			pci_name(pdev));
1998 		return NULL;
1999 	}
2000 	iommu = drhd->iommu;
2001 
2002 	ret = iommu_attach_domain(domain, iommu);
2003 	if (ret) {
2004 		free_domain_mem(domain);
2005 		goto error;
2006 	}
2007 
2008 	if (domain_init(domain, gaw)) {
2009 		domain_exit(domain);
2010 		goto error;
2011 	}
2012 
2013 	/* register pcie-to-pci device */
2014 	if (dev_tmp) {
2015 		info = alloc_devinfo_mem();
2016 		if (!info) {
2017 			domain_exit(domain);
2018 			goto error;
2019 		}
2020 		info->segment = segment;
2021 		info->bus = bus;
2022 		info->devfn = devfn;
2023 		info->dev = NULL;
2024 		info->domain = domain;
2025 		/* This domain is shared by devices under p2p bridge */
2026 		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2027 
2028 		/* pcie-to-pci bridge already has a domain, uses it */
2029 		found = NULL;
2030 		spin_lock_irqsave(&device_domain_lock, flags);
2031 		list_for_each_entry(tmp, &device_domain_list, global) {
2032 			if (tmp->segment == segment &&
2033 			    tmp->bus == bus && tmp->devfn == devfn) {
2034 				found = tmp->domain;
2035 				break;
2036 			}
2037 		}
2038 		if (found) {
2039 			spin_unlock_irqrestore(&device_domain_lock, flags);
2040 			free_devinfo_mem(info);
2041 			domain_exit(domain);
2042 			domain = found;
2043 		} else {
2044 			list_add(&info->link, &domain->devices);
2045 			list_add(&info->global, &device_domain_list);
2046 			spin_unlock_irqrestore(&device_domain_lock, flags);
2047 		}
2048 	}
2049 
2050 found_domain:
2051 	info = alloc_devinfo_mem();
2052 	if (!info)
2053 		goto error;
2054 	info->segment = segment;
2055 	info->bus = pdev->bus->number;
2056 	info->devfn = pdev->devfn;
2057 	info->dev = pdev;
2058 	info->domain = domain;
2059 	spin_lock_irqsave(&device_domain_lock, flags);
2060 	/* somebody is fast */
2061 	found = find_domain(pdev);
2062 	if (found != NULL) {
2063 		spin_unlock_irqrestore(&device_domain_lock, flags);
2064 		if (found != domain) {
2065 			domain_exit(domain);
2066 			domain = found;
2067 		}
2068 		free_devinfo_mem(info);
2069 		return domain;
2070 	}
2071 	list_add(&info->link, &domain->devices);
2072 	list_add(&info->global, &device_domain_list);
2073 	pdev->dev.archdata.iommu = info;
2074 	spin_unlock_irqrestore(&device_domain_lock, flags);
2075 	return domain;
2076 error:
2077 	/* recheck it here, maybe others set it */
2078 	return find_domain(pdev);
2079 }
2080 
2081 static int iommu_identity_mapping;
2082 #define IDENTMAP_ALL		1
2083 #define IDENTMAP_GFX		2
2084 #define IDENTMAP_AZALIA		4
2085 
iommu_domain_identity_map(struct dmar_domain * domain,unsigned long long start,unsigned long long end)2086 static int iommu_domain_identity_map(struct dmar_domain *domain,
2087 				     unsigned long long start,
2088 				     unsigned long long end)
2089 {
2090 	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2091 	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2092 
2093 	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2094 			  dma_to_mm_pfn(last_vpfn))) {
2095 		printk(KERN_ERR "IOMMU: reserve iova failed\n");
2096 		return -ENOMEM;
2097 	}
2098 
2099 	pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2100 		 start, end, domain->id);
2101 	/*
2102 	 * RMRR range might have overlap with physical memory range,
2103 	 * clear it first
2104 	 */
2105 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2106 
2107 	return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2108 				  last_vpfn - first_vpfn + 1,
2109 				  DMA_PTE_READ|DMA_PTE_WRITE);
2110 }
2111 
iommu_prepare_identity_map(struct pci_dev * pdev,unsigned long long start,unsigned long long end)2112 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2113 				      unsigned long long start,
2114 				      unsigned long long end)
2115 {
2116 	struct dmar_domain *domain;
2117 	int ret;
2118 
2119 	domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2120 	if (!domain)
2121 		return -ENOMEM;
2122 
2123 	/* For _hardware_ passthrough, don't bother. But for software
2124 	   passthrough, we do it anyway -- it may indicate a memory
2125 	   range which is reserved in E820, so which didn't get set
2126 	   up to start with in si_domain */
2127 	if (domain == si_domain && hw_pass_through) {
2128 		printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2129 		       pci_name(pdev), start, end);
2130 		return 0;
2131 	}
2132 
2133 	printk(KERN_INFO
2134 	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2135 	       pci_name(pdev), start, end);
2136 
2137 	if (end < start) {
2138 		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2139 			"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2140 			dmi_get_system_info(DMI_BIOS_VENDOR),
2141 			dmi_get_system_info(DMI_BIOS_VERSION),
2142 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2143 		ret = -EIO;
2144 		goto error;
2145 	}
2146 
2147 	if (end >> agaw_to_width(domain->agaw)) {
2148 		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2149 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2150 		     agaw_to_width(domain->agaw),
2151 		     dmi_get_system_info(DMI_BIOS_VENDOR),
2152 		     dmi_get_system_info(DMI_BIOS_VERSION),
2153 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2154 		ret = -EIO;
2155 		goto error;
2156 	}
2157 
2158 	ret = iommu_domain_identity_map(domain, start, end);
2159 	if (ret)
2160 		goto error;
2161 
2162 	/* context entry init */
2163 	ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2164 	if (ret)
2165 		goto error;
2166 
2167 	return 0;
2168 
2169  error:
2170 	domain_exit(domain);
2171 	return ret;
2172 }
2173 
iommu_prepare_rmrr_dev(struct dmar_rmrr_unit * rmrr,struct pci_dev * pdev)2174 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2175 	struct pci_dev *pdev)
2176 {
2177 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2178 		return 0;
2179 	return iommu_prepare_identity_map(pdev, rmrr->base_address,
2180 		rmrr->end_address);
2181 }
2182 
2183 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
iommu_prepare_isa(void)2184 static inline void iommu_prepare_isa(void)
2185 {
2186 	struct pci_dev *pdev;
2187 	int ret;
2188 
2189 	pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2190 	if (!pdev)
2191 		return;
2192 
2193 	printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2194 	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2195 
2196 	if (ret)
2197 		printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2198 		       "floppy might not work\n");
2199 
2200 }
2201 #else
iommu_prepare_isa(void)2202 static inline void iommu_prepare_isa(void)
2203 {
2204 	return;
2205 }
2206 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2207 
2208 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2209 
si_domain_init(int hw)2210 static int __init si_domain_init(int hw)
2211 {
2212 	struct dmar_drhd_unit *drhd;
2213 	struct intel_iommu *iommu;
2214 	int nid, ret = 0;
2215 
2216 	si_domain = alloc_domain();
2217 	if (!si_domain)
2218 		return -EFAULT;
2219 
2220 	pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2221 
2222 	for_each_active_iommu(iommu, drhd) {
2223 		ret = iommu_attach_domain(si_domain, iommu);
2224 		if (ret) {
2225 			domain_exit(si_domain);
2226 			return -EFAULT;
2227 		}
2228 	}
2229 
2230 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2231 		domain_exit(si_domain);
2232 		return -EFAULT;
2233 	}
2234 
2235 	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2236 
2237 	if (hw)
2238 		return 0;
2239 
2240 	for_each_online_node(nid) {
2241 		unsigned long start_pfn, end_pfn;
2242 		int i;
2243 
2244 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2245 			ret = iommu_domain_identity_map(si_domain,
2246 					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2247 			if (ret)
2248 				return ret;
2249 		}
2250 	}
2251 
2252 	return 0;
2253 }
2254 
2255 static void domain_remove_one_dev_info(struct dmar_domain *domain,
2256 					  struct pci_dev *pdev);
identity_mapping(struct pci_dev * pdev)2257 static int identity_mapping(struct pci_dev *pdev)
2258 {
2259 	struct device_domain_info *info;
2260 
2261 	if (likely(!iommu_identity_mapping))
2262 		return 0;
2263 
2264 	info = pdev->dev.archdata.iommu;
2265 	if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2266 		return (info->domain == si_domain);
2267 
2268 	return 0;
2269 }
2270 
domain_add_dev_info(struct dmar_domain * domain,struct pci_dev * pdev,int translation)2271 static int domain_add_dev_info(struct dmar_domain *domain,
2272 			       struct pci_dev *pdev,
2273 			       int translation)
2274 {
2275 	struct device_domain_info *info;
2276 	unsigned long flags;
2277 	int ret;
2278 
2279 	info = alloc_devinfo_mem();
2280 	if (!info)
2281 		return -ENOMEM;
2282 
2283 	ret = domain_context_mapping(domain, pdev, translation);
2284 	if (ret) {
2285 		free_devinfo_mem(info);
2286 		return ret;
2287 	}
2288 
2289 	info->segment = pci_domain_nr(pdev->bus);
2290 	info->bus = pdev->bus->number;
2291 	info->devfn = pdev->devfn;
2292 	info->dev = pdev;
2293 	info->domain = domain;
2294 
2295 	spin_lock_irqsave(&device_domain_lock, flags);
2296 	list_add(&info->link, &domain->devices);
2297 	list_add(&info->global, &device_domain_list);
2298 	pdev->dev.archdata.iommu = info;
2299 	spin_unlock_irqrestore(&device_domain_lock, flags);
2300 
2301 	return 0;
2302 }
2303 
iommu_should_identity_map(struct pci_dev * pdev,int startup)2304 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2305 {
2306 	if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2307 		return 1;
2308 
2309 	if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2310 		return 1;
2311 
2312 	if (!(iommu_identity_mapping & IDENTMAP_ALL))
2313 		return 0;
2314 
2315 	/*
2316 	 * We want to start off with all devices in the 1:1 domain, and
2317 	 * take them out later if we find they can't access all of memory.
2318 	 *
2319 	 * However, we can't do this for PCI devices behind bridges,
2320 	 * because all PCI devices behind the same bridge will end up
2321 	 * with the same source-id on their transactions.
2322 	 *
2323 	 * Practically speaking, we can't change things around for these
2324 	 * devices at run-time, because we can't be sure there'll be no
2325 	 * DMA transactions in flight for any of their siblings.
2326 	 *
2327 	 * So PCI devices (unless they're on the root bus) as well as
2328 	 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2329 	 * the 1:1 domain, just in _case_ one of their siblings turns out
2330 	 * not to be able to map all of memory.
2331 	 */
2332 	if (!pci_is_pcie(pdev)) {
2333 		if (!pci_is_root_bus(pdev->bus))
2334 			return 0;
2335 		if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2336 			return 0;
2337 	} else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2338 		return 0;
2339 
2340 	/*
2341 	 * At boot time, we don't yet know if devices will be 64-bit capable.
2342 	 * Assume that they will -- if they turn out not to be, then we can
2343 	 * take them out of the 1:1 domain later.
2344 	 */
2345 	if (!startup) {
2346 		/*
2347 		 * If the device's dma_mask is less than the system's memory
2348 		 * size then this is not a candidate for identity mapping.
2349 		 */
2350 		u64 dma_mask = pdev->dma_mask;
2351 
2352 		if (pdev->dev.coherent_dma_mask &&
2353 		    pdev->dev.coherent_dma_mask < dma_mask)
2354 			dma_mask = pdev->dev.coherent_dma_mask;
2355 
2356 		return dma_mask >= dma_get_required_mask(&pdev->dev);
2357 	}
2358 
2359 	return 1;
2360 }
2361 
iommu_prepare_static_identity_mapping(int hw)2362 static int __init iommu_prepare_static_identity_mapping(int hw)
2363 {
2364 	struct pci_dev *pdev = NULL;
2365 	int ret;
2366 
2367 	ret = si_domain_init(hw);
2368 	if (ret)
2369 		return -EFAULT;
2370 
2371 	for_each_pci_dev(pdev) {
2372 		/* Skip Host/PCI Bridge devices */
2373 		if (IS_BRIDGE_HOST_DEVICE(pdev))
2374 			continue;
2375 		if (iommu_should_identity_map(pdev, 1)) {
2376 			printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2377 			       hw ? "hardware" : "software", pci_name(pdev));
2378 
2379 			ret = domain_add_dev_info(si_domain, pdev,
2380 						     hw ? CONTEXT_TT_PASS_THROUGH :
2381 						     CONTEXT_TT_MULTI_LEVEL);
2382 			if (ret)
2383 				return ret;
2384 		}
2385 	}
2386 
2387 	return 0;
2388 }
2389 
init_dmars(void)2390 static int __init init_dmars(void)
2391 {
2392 	struct dmar_drhd_unit *drhd;
2393 	struct dmar_rmrr_unit *rmrr;
2394 	struct pci_dev *pdev;
2395 	struct intel_iommu *iommu;
2396 	int i, ret;
2397 
2398 	/*
2399 	 * for each drhd
2400 	 *    allocate root
2401 	 *    initialize and program root entry to not present
2402 	 * endfor
2403 	 */
2404 	for_each_drhd_unit(drhd) {
2405 		g_num_of_iommus++;
2406 		/*
2407 		 * lock not needed as this is only incremented in the single
2408 		 * threaded kernel __init code path all other access are read
2409 		 * only
2410 		 */
2411 	}
2412 
2413 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2414 			GFP_KERNEL);
2415 	if (!g_iommus) {
2416 		printk(KERN_ERR "Allocating global iommu array failed\n");
2417 		ret = -ENOMEM;
2418 		goto error;
2419 	}
2420 
2421 	deferred_flush = kzalloc(g_num_of_iommus *
2422 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
2423 	if (!deferred_flush) {
2424 		ret = -ENOMEM;
2425 		goto error;
2426 	}
2427 
2428 	for_each_drhd_unit(drhd) {
2429 		if (drhd->ignored)
2430 			continue;
2431 
2432 		iommu = drhd->iommu;
2433 		g_iommus[iommu->seq_id] = iommu;
2434 
2435 		ret = iommu_init_domains(iommu);
2436 		if (ret)
2437 			goto error;
2438 
2439 		/*
2440 		 * TBD:
2441 		 * we could share the same root & context tables
2442 		 * among all IOMMU's. Need to Split it later.
2443 		 */
2444 		ret = iommu_alloc_root_entry(iommu);
2445 		if (ret) {
2446 			printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2447 			goto error;
2448 		}
2449 		if (!ecap_pass_through(iommu->ecap))
2450 			hw_pass_through = 0;
2451 	}
2452 
2453 	/*
2454 	 * Start from the sane iommu hardware state.
2455 	 */
2456 	for_each_drhd_unit(drhd) {
2457 		if (drhd->ignored)
2458 			continue;
2459 
2460 		iommu = drhd->iommu;
2461 
2462 		/*
2463 		 * If the queued invalidation is already initialized by us
2464 		 * (for example, while enabling interrupt-remapping) then
2465 		 * we got the things already rolling from a sane state.
2466 		 */
2467 		if (iommu->qi)
2468 			continue;
2469 
2470 		/*
2471 		 * Clear any previous faults.
2472 		 */
2473 		dmar_fault(-1, iommu);
2474 		/*
2475 		 * Disable queued invalidation if supported and already enabled
2476 		 * before OS handover.
2477 		 */
2478 		dmar_disable_qi(iommu);
2479 	}
2480 
2481 	for_each_drhd_unit(drhd) {
2482 		if (drhd->ignored)
2483 			continue;
2484 
2485 		iommu = drhd->iommu;
2486 
2487 		if (dmar_enable_qi(iommu)) {
2488 			/*
2489 			 * Queued Invalidate not enabled, use Register Based
2490 			 * Invalidate
2491 			 */
2492 			iommu->flush.flush_context = __iommu_flush_context;
2493 			iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2494 			printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2495 			       "invalidation\n",
2496 				iommu->seq_id,
2497 			       (unsigned long long)drhd->reg_base_addr);
2498 		} else {
2499 			iommu->flush.flush_context = qi_flush_context;
2500 			iommu->flush.flush_iotlb = qi_flush_iotlb;
2501 			printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2502 			       "invalidation\n",
2503 				iommu->seq_id,
2504 			       (unsigned long long)drhd->reg_base_addr);
2505 		}
2506 	}
2507 
2508 	if (iommu_pass_through)
2509 		iommu_identity_mapping |= IDENTMAP_ALL;
2510 
2511 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2512 	iommu_identity_mapping |= IDENTMAP_GFX;
2513 #endif
2514 
2515 	check_tylersburg_isoch();
2516 
2517 	/*
2518 	 * If pass through is not set or not enabled, setup context entries for
2519 	 * identity mappings for rmrr, gfx, and isa and may fall back to static
2520 	 * identity mapping if iommu_identity_mapping is set.
2521 	 */
2522 	if (iommu_identity_mapping) {
2523 		ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2524 		if (ret) {
2525 			printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2526 			goto error;
2527 		}
2528 	}
2529 	/*
2530 	 * For each rmrr
2531 	 *   for each dev attached to rmrr
2532 	 *   do
2533 	 *     locate drhd for dev, alloc domain for dev
2534 	 *     allocate free domain
2535 	 *     allocate page table entries for rmrr
2536 	 *     if context not allocated for bus
2537 	 *           allocate and init context
2538 	 *           set present in root table for this bus
2539 	 *     init context with domain, translation etc
2540 	 *    endfor
2541 	 * endfor
2542 	 */
2543 	printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2544 	for_each_rmrr_units(rmrr) {
2545 		for (i = 0; i < rmrr->devices_cnt; i++) {
2546 			pdev = rmrr->devices[i];
2547 			/*
2548 			 * some BIOS lists non-exist devices in DMAR
2549 			 * table.
2550 			 */
2551 			if (!pdev)
2552 				continue;
2553 			ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2554 			if (ret)
2555 				printk(KERN_ERR
2556 				       "IOMMU: mapping reserved region failed\n");
2557 		}
2558 	}
2559 
2560 	iommu_prepare_isa();
2561 
2562 	/*
2563 	 * for each drhd
2564 	 *   enable fault log
2565 	 *   global invalidate context cache
2566 	 *   global invalidate iotlb
2567 	 *   enable translation
2568 	 */
2569 	for_each_drhd_unit(drhd) {
2570 		if (drhd->ignored) {
2571 			/*
2572 			 * we always have to disable PMRs or DMA may fail on
2573 			 * this device
2574 			 */
2575 			if (force_on)
2576 				iommu_disable_protect_mem_regions(drhd->iommu);
2577 			continue;
2578 		}
2579 		iommu = drhd->iommu;
2580 
2581 		iommu_flush_write_buffer(iommu);
2582 
2583 		ret = dmar_set_interrupt(iommu);
2584 		if (ret)
2585 			goto error;
2586 
2587 		iommu_set_root_entry(iommu);
2588 
2589 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2590 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2591 
2592 		ret = iommu_enable_translation(iommu);
2593 		if (ret)
2594 			goto error;
2595 
2596 		iommu_disable_protect_mem_regions(iommu);
2597 	}
2598 
2599 	return 0;
2600 error:
2601 	for_each_drhd_unit(drhd) {
2602 		if (drhd->ignored)
2603 			continue;
2604 		iommu = drhd->iommu;
2605 		free_iommu(iommu);
2606 	}
2607 	kfree(g_iommus);
2608 	return ret;
2609 }
2610 
2611 /* This takes a number of _MM_ pages, not VTD pages */
intel_alloc_iova(struct device * dev,struct dmar_domain * domain,unsigned long nrpages,uint64_t dma_mask)2612 static struct iova *intel_alloc_iova(struct device *dev,
2613 				     struct dmar_domain *domain,
2614 				     unsigned long nrpages, uint64_t dma_mask)
2615 {
2616 	struct pci_dev *pdev = to_pci_dev(dev);
2617 	struct iova *iova = NULL;
2618 
2619 	/* Restrict dma_mask to the width that the iommu can handle */
2620 	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2621 
2622 	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2623 		/*
2624 		 * First try to allocate an io virtual address in
2625 		 * DMA_BIT_MASK(32) and if that fails then try allocating
2626 		 * from higher range
2627 		 */
2628 		iova = alloc_iova(&domain->iovad, nrpages,
2629 				  IOVA_PFN(DMA_BIT_MASK(32)), 1);
2630 		if (iova)
2631 			return iova;
2632 	}
2633 	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2634 	if (unlikely(!iova)) {
2635 		printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2636 		       nrpages, pci_name(pdev));
2637 		return NULL;
2638 	}
2639 
2640 	return iova;
2641 }
2642 
__get_valid_domain_for_dev(struct pci_dev * pdev)2643 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2644 {
2645 	struct dmar_domain *domain;
2646 	int ret;
2647 
2648 	domain = get_domain_for_dev(pdev,
2649 			DEFAULT_DOMAIN_ADDRESS_WIDTH);
2650 	if (!domain) {
2651 		printk(KERN_ERR
2652 			"Allocating domain for %s failed", pci_name(pdev));
2653 		return NULL;
2654 	}
2655 
2656 	/* make sure context mapping is ok */
2657 	if (unlikely(!domain_context_mapped(pdev))) {
2658 		ret = domain_context_mapping(domain, pdev,
2659 					     CONTEXT_TT_MULTI_LEVEL);
2660 		if (ret) {
2661 			printk(KERN_ERR
2662 				"Domain context map for %s failed",
2663 				pci_name(pdev));
2664 			return NULL;
2665 		}
2666 	}
2667 
2668 	return domain;
2669 }
2670 
get_valid_domain_for_dev(struct pci_dev * dev)2671 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2672 {
2673 	struct device_domain_info *info;
2674 
2675 	/* No lock here, assumes no domain exit in normal case */
2676 	info = dev->dev.archdata.iommu;
2677 	if (likely(info))
2678 		return info->domain;
2679 
2680 	return __get_valid_domain_for_dev(dev);
2681 }
2682 
iommu_dummy(struct pci_dev * pdev)2683 static int iommu_dummy(struct pci_dev *pdev)
2684 {
2685 	return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2686 }
2687 
2688 /* Check if the pdev needs to go through non-identity map and unmap process.*/
iommu_no_mapping(struct device * dev)2689 static int iommu_no_mapping(struct device *dev)
2690 {
2691 	struct pci_dev *pdev;
2692 	int found;
2693 
2694 	if (unlikely(dev->bus != &pci_bus_type))
2695 		return 1;
2696 
2697 	pdev = to_pci_dev(dev);
2698 	if (iommu_dummy(pdev))
2699 		return 1;
2700 
2701 	if (!iommu_identity_mapping)
2702 		return 0;
2703 
2704 	found = identity_mapping(pdev);
2705 	if (found) {
2706 		if (iommu_should_identity_map(pdev, 0))
2707 			return 1;
2708 		else {
2709 			/*
2710 			 * 32 bit DMA is removed from si_domain and fall back
2711 			 * to non-identity mapping.
2712 			 */
2713 			domain_remove_one_dev_info(si_domain, pdev);
2714 			printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2715 			       pci_name(pdev));
2716 			return 0;
2717 		}
2718 	} else {
2719 		/*
2720 		 * In case of a detached 64 bit DMA device from vm, the device
2721 		 * is put into si_domain for identity mapping.
2722 		 */
2723 		if (iommu_should_identity_map(pdev, 0)) {
2724 			int ret;
2725 			ret = domain_add_dev_info(si_domain, pdev,
2726 						  hw_pass_through ?
2727 						  CONTEXT_TT_PASS_THROUGH :
2728 						  CONTEXT_TT_MULTI_LEVEL);
2729 			if (!ret) {
2730 				printk(KERN_INFO "64bit %s uses identity mapping\n",
2731 				       pci_name(pdev));
2732 				return 1;
2733 			}
2734 		}
2735 	}
2736 
2737 	return 0;
2738 }
2739 
__intel_map_single(struct device * hwdev,phys_addr_t paddr,size_t size,int dir,u64 dma_mask)2740 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2741 				     size_t size, int dir, u64 dma_mask)
2742 {
2743 	struct pci_dev *pdev = to_pci_dev(hwdev);
2744 	struct dmar_domain *domain;
2745 	phys_addr_t start_paddr;
2746 	struct iova *iova;
2747 	int prot = 0;
2748 	int ret;
2749 	struct intel_iommu *iommu;
2750 	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2751 
2752 	BUG_ON(dir == DMA_NONE);
2753 
2754 	if (iommu_no_mapping(hwdev))
2755 		return paddr;
2756 
2757 	domain = get_valid_domain_for_dev(pdev);
2758 	if (!domain)
2759 		return 0;
2760 
2761 	iommu = domain_get_iommu(domain);
2762 	size = aligned_nrpages(paddr, size);
2763 
2764 	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2765 	if (!iova)
2766 		goto error;
2767 
2768 	/*
2769 	 * Check if DMAR supports zero-length reads on write only
2770 	 * mappings..
2771 	 */
2772 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2773 			!cap_zlr(iommu->cap))
2774 		prot |= DMA_PTE_READ;
2775 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2776 		prot |= DMA_PTE_WRITE;
2777 	/*
2778 	 * paddr - (paddr + size) might be partial page, we should map the whole
2779 	 * page.  Note: if two part of one page are separately mapped, we
2780 	 * might have two guest_addr mapping to the same host paddr, but this
2781 	 * is not a big problem
2782 	 */
2783 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2784 				 mm_to_dma_pfn(paddr_pfn), size, prot);
2785 	if (ret)
2786 		goto error;
2787 
2788 	/* it's a non-present to present mapping. Only flush if caching mode */
2789 	if (cap_caching_mode(iommu->cap))
2790 		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2791 	else
2792 		iommu_flush_write_buffer(iommu);
2793 
2794 	start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2795 	start_paddr += paddr & ~PAGE_MASK;
2796 	return start_paddr;
2797 
2798 error:
2799 	if (iova)
2800 		__free_iova(&domain->iovad, iova);
2801 	printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2802 		pci_name(pdev), size, (unsigned long long)paddr, dir);
2803 	return 0;
2804 }
2805 
intel_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)2806 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2807 				 unsigned long offset, size_t size,
2808 				 enum dma_data_direction dir,
2809 				 struct dma_attrs *attrs)
2810 {
2811 	return __intel_map_single(dev, page_to_phys(page) + offset, size,
2812 				  dir, to_pci_dev(dev)->dma_mask);
2813 }
2814 
flush_unmaps(void)2815 static void flush_unmaps(void)
2816 {
2817 	int i, j;
2818 
2819 	timer_on = 0;
2820 
2821 	/* just flush them all */
2822 	for (i = 0; i < g_num_of_iommus; i++) {
2823 		struct intel_iommu *iommu = g_iommus[i];
2824 		if (!iommu)
2825 			continue;
2826 
2827 		if (!deferred_flush[i].next)
2828 			continue;
2829 
2830 		/* In caching mode, global flushes turn emulation expensive */
2831 		if (!cap_caching_mode(iommu->cap))
2832 			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2833 					 DMA_TLB_GLOBAL_FLUSH);
2834 		for (j = 0; j < deferred_flush[i].next; j++) {
2835 			unsigned long mask;
2836 			struct iova *iova = deferred_flush[i].iova[j];
2837 			struct dmar_domain *domain = deferred_flush[i].domain[j];
2838 
2839 			/* On real hardware multiple invalidations are expensive */
2840 			if (cap_caching_mode(iommu->cap))
2841 				iommu_flush_iotlb_psi(iommu, domain->id,
2842 				iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2843 			else {
2844 				mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2845 				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2846 						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2847 			}
2848 			__free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2849 		}
2850 		deferred_flush[i].next = 0;
2851 	}
2852 
2853 	list_size = 0;
2854 }
2855 
flush_unmaps_timeout(unsigned long data)2856 static void flush_unmaps_timeout(unsigned long data)
2857 {
2858 	unsigned long flags;
2859 
2860 	spin_lock_irqsave(&async_umap_flush_lock, flags);
2861 	flush_unmaps();
2862 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2863 }
2864 
add_unmap(struct dmar_domain * dom,struct iova * iova)2865 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2866 {
2867 	unsigned long flags;
2868 	int next, iommu_id;
2869 	struct intel_iommu *iommu;
2870 
2871 	spin_lock_irqsave(&async_umap_flush_lock, flags);
2872 	if (list_size == HIGH_WATER_MARK)
2873 		flush_unmaps();
2874 
2875 	iommu = domain_get_iommu(dom);
2876 	iommu_id = iommu->seq_id;
2877 
2878 	next = deferred_flush[iommu_id].next;
2879 	deferred_flush[iommu_id].domain[next] = dom;
2880 	deferred_flush[iommu_id].iova[next] = iova;
2881 	deferred_flush[iommu_id].next++;
2882 
2883 	if (!timer_on) {
2884 		mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2885 		timer_on = 1;
2886 	}
2887 	list_size++;
2888 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2889 }
2890 
intel_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)2891 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2892 			     size_t size, enum dma_data_direction dir,
2893 			     struct dma_attrs *attrs)
2894 {
2895 	struct pci_dev *pdev = to_pci_dev(dev);
2896 	struct dmar_domain *domain;
2897 	unsigned long start_pfn, last_pfn;
2898 	struct iova *iova;
2899 	struct intel_iommu *iommu;
2900 
2901 	if (iommu_no_mapping(dev))
2902 		return;
2903 
2904 	domain = find_domain(pdev);
2905 	BUG_ON(!domain);
2906 
2907 	iommu = domain_get_iommu(domain);
2908 
2909 	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2910 	if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2911 		      (unsigned long long)dev_addr))
2912 		return;
2913 
2914 	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2915 	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2916 
2917 	pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2918 		 pci_name(pdev), start_pfn, last_pfn);
2919 
2920 	/*  clear the whole page */
2921 	dma_pte_clear_range(domain, start_pfn, last_pfn);
2922 
2923 	/* free page tables */
2924 	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2925 
2926 	if (intel_iommu_strict) {
2927 		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2928 				      last_pfn - start_pfn + 1, 0);
2929 		/* free iova */
2930 		__free_iova(&domain->iovad, iova);
2931 	} else {
2932 		add_unmap(domain, iova);
2933 		/*
2934 		 * queue up the release of the unmap to save the 1/6th of the
2935 		 * cpu used up by the iotlb flush operation...
2936 		 */
2937 	}
2938 }
2939 
intel_alloc_coherent(struct device * hwdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)2940 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2941 				  dma_addr_t *dma_handle, gfp_t flags)
2942 {
2943 	void *vaddr;
2944 	int order;
2945 
2946 	size = PAGE_ALIGN(size);
2947 	order = get_order(size);
2948 
2949 	if (!iommu_no_mapping(hwdev))
2950 		flags &= ~(GFP_DMA | GFP_DMA32);
2951 	else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2952 		if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2953 			flags |= GFP_DMA;
2954 		else
2955 			flags |= GFP_DMA32;
2956 	}
2957 
2958 	vaddr = (void *)__get_free_pages(flags, order);
2959 	if (!vaddr)
2960 		return NULL;
2961 	memset(vaddr, 0, size);
2962 
2963 	*dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2964 					 DMA_BIDIRECTIONAL,
2965 					 hwdev->coherent_dma_mask);
2966 	if (*dma_handle)
2967 		return vaddr;
2968 	free_pages((unsigned long)vaddr, order);
2969 	return NULL;
2970 }
2971 
intel_free_coherent(struct device * hwdev,size_t size,void * vaddr,dma_addr_t dma_handle)2972 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2973 				dma_addr_t dma_handle)
2974 {
2975 	int order;
2976 
2977 	size = PAGE_ALIGN(size);
2978 	order = get_order(size);
2979 
2980 	intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2981 	free_pages((unsigned long)vaddr, order);
2982 }
2983 
intel_unmap_sg(struct device * hwdev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)2984 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2985 			   int nelems, enum dma_data_direction dir,
2986 			   struct dma_attrs *attrs)
2987 {
2988 	struct pci_dev *pdev = to_pci_dev(hwdev);
2989 	struct dmar_domain *domain;
2990 	unsigned long start_pfn, last_pfn;
2991 	struct iova *iova;
2992 	struct intel_iommu *iommu;
2993 
2994 	if (iommu_no_mapping(hwdev))
2995 		return;
2996 
2997 	domain = find_domain(pdev);
2998 	BUG_ON(!domain);
2999 
3000 	iommu = domain_get_iommu(domain);
3001 
3002 	iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3003 	if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3004 		      (unsigned long long)sglist[0].dma_address))
3005 		return;
3006 
3007 	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3008 	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3009 
3010 	/*  clear the whole page */
3011 	dma_pte_clear_range(domain, start_pfn, last_pfn);
3012 
3013 	/* free page tables */
3014 	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
3015 
3016 	if (intel_iommu_strict) {
3017 		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3018 				      last_pfn - start_pfn + 1, 0);
3019 		/* free iova */
3020 		__free_iova(&domain->iovad, iova);
3021 	} else {
3022 		add_unmap(domain, iova);
3023 		/*
3024 		 * queue up the release of the unmap to save the 1/6th of the
3025 		 * cpu used up by the iotlb flush operation...
3026 		 */
3027 	}
3028 }
3029 
intel_nontranslate_map_sg(struct device * hddev,struct scatterlist * sglist,int nelems,int dir)3030 static int intel_nontranslate_map_sg(struct device *hddev,
3031 	struct scatterlist *sglist, int nelems, int dir)
3032 {
3033 	int i;
3034 	struct scatterlist *sg;
3035 
3036 	for_each_sg(sglist, sg, nelems, i) {
3037 		BUG_ON(!sg_page(sg));
3038 		sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3039 		sg->dma_length = sg->length;
3040 	}
3041 	return nelems;
3042 }
3043 
intel_map_sg(struct device * hwdev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)3044 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3045 			enum dma_data_direction dir, struct dma_attrs *attrs)
3046 {
3047 	int i;
3048 	struct pci_dev *pdev = to_pci_dev(hwdev);
3049 	struct dmar_domain *domain;
3050 	size_t size = 0;
3051 	int prot = 0;
3052 	struct iova *iova = NULL;
3053 	int ret;
3054 	struct scatterlist *sg;
3055 	unsigned long start_vpfn;
3056 	struct intel_iommu *iommu;
3057 
3058 	BUG_ON(dir == DMA_NONE);
3059 	if (iommu_no_mapping(hwdev))
3060 		return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3061 
3062 	domain = get_valid_domain_for_dev(pdev);
3063 	if (!domain)
3064 		return 0;
3065 
3066 	iommu = domain_get_iommu(domain);
3067 
3068 	for_each_sg(sglist, sg, nelems, i)
3069 		size += aligned_nrpages(sg->offset, sg->length);
3070 
3071 	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3072 				pdev->dma_mask);
3073 	if (!iova) {
3074 		sglist->dma_length = 0;
3075 		return 0;
3076 	}
3077 
3078 	/*
3079 	 * Check if DMAR supports zero-length reads on write only
3080 	 * mappings..
3081 	 */
3082 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3083 			!cap_zlr(iommu->cap))
3084 		prot |= DMA_PTE_READ;
3085 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3086 		prot |= DMA_PTE_WRITE;
3087 
3088 	start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3089 
3090 	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3091 	if (unlikely(ret)) {
3092 		/*  clear the page */
3093 		dma_pte_clear_range(domain, start_vpfn,
3094 				    start_vpfn + size - 1);
3095 		/* free page tables */
3096 		dma_pte_free_pagetable(domain, start_vpfn,
3097 				       start_vpfn + size - 1);
3098 		/* free iova */
3099 		__free_iova(&domain->iovad, iova);
3100 		return 0;
3101 	}
3102 
3103 	/* it's a non-present to present mapping. Only flush if caching mode */
3104 	if (cap_caching_mode(iommu->cap))
3105 		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3106 	else
3107 		iommu_flush_write_buffer(iommu);
3108 
3109 	return nelems;
3110 }
3111 
intel_mapping_error(struct device * dev,dma_addr_t dma_addr)3112 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3113 {
3114 	return !dma_addr;
3115 }
3116 
3117 struct dma_map_ops intel_dma_ops = {
3118 	.alloc_coherent = intel_alloc_coherent,
3119 	.free_coherent = intel_free_coherent,
3120 	.map_sg = intel_map_sg,
3121 	.unmap_sg = intel_unmap_sg,
3122 	.map_page = intel_map_page,
3123 	.unmap_page = intel_unmap_page,
3124 	.mapping_error = intel_mapping_error,
3125 };
3126 
iommu_domain_cache_init(void)3127 static inline int iommu_domain_cache_init(void)
3128 {
3129 	int ret = 0;
3130 
3131 	iommu_domain_cache = kmem_cache_create("iommu_domain",
3132 					 sizeof(struct dmar_domain),
3133 					 0,
3134 					 SLAB_HWCACHE_ALIGN,
3135 
3136 					 NULL);
3137 	if (!iommu_domain_cache) {
3138 		printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3139 		ret = -ENOMEM;
3140 	}
3141 
3142 	return ret;
3143 }
3144 
iommu_devinfo_cache_init(void)3145 static inline int iommu_devinfo_cache_init(void)
3146 {
3147 	int ret = 0;
3148 
3149 	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3150 					 sizeof(struct device_domain_info),
3151 					 0,
3152 					 SLAB_HWCACHE_ALIGN,
3153 					 NULL);
3154 	if (!iommu_devinfo_cache) {
3155 		printk(KERN_ERR "Couldn't create devinfo cache\n");
3156 		ret = -ENOMEM;
3157 	}
3158 
3159 	return ret;
3160 }
3161 
iommu_iova_cache_init(void)3162 static inline int iommu_iova_cache_init(void)
3163 {
3164 	int ret = 0;
3165 
3166 	iommu_iova_cache = kmem_cache_create("iommu_iova",
3167 					 sizeof(struct iova),
3168 					 0,
3169 					 SLAB_HWCACHE_ALIGN,
3170 					 NULL);
3171 	if (!iommu_iova_cache) {
3172 		printk(KERN_ERR "Couldn't create iova cache\n");
3173 		ret = -ENOMEM;
3174 	}
3175 
3176 	return ret;
3177 }
3178 
iommu_init_mempool(void)3179 static int __init iommu_init_mempool(void)
3180 {
3181 	int ret;
3182 	ret = iommu_iova_cache_init();
3183 	if (ret)
3184 		return ret;
3185 
3186 	ret = iommu_domain_cache_init();
3187 	if (ret)
3188 		goto domain_error;
3189 
3190 	ret = iommu_devinfo_cache_init();
3191 	if (!ret)
3192 		return ret;
3193 
3194 	kmem_cache_destroy(iommu_domain_cache);
3195 domain_error:
3196 	kmem_cache_destroy(iommu_iova_cache);
3197 
3198 	return -ENOMEM;
3199 }
3200 
iommu_exit_mempool(void)3201 static void __init iommu_exit_mempool(void)
3202 {
3203 	kmem_cache_destroy(iommu_devinfo_cache);
3204 	kmem_cache_destroy(iommu_domain_cache);
3205 	kmem_cache_destroy(iommu_iova_cache);
3206 
3207 }
3208 
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)3209 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3210 {
3211 	struct dmar_drhd_unit *drhd;
3212 	u32 vtbar;
3213 	int rc;
3214 
3215 	/* We know that this device on this chipset has its own IOMMU.
3216 	 * If we find it under a different IOMMU, then the BIOS is lying
3217 	 * to us. Hope that the IOMMU for this device is actually
3218 	 * disabled, and it needs no translation...
3219 	 */
3220 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3221 	if (rc) {
3222 		/* "can't" happen */
3223 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3224 		return;
3225 	}
3226 	vtbar &= 0xffff0000;
3227 
3228 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
3229 	drhd = dmar_find_matched_drhd_unit(pdev);
3230 	if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3231 			    TAINT_FIRMWARE_WORKAROUND,
3232 			    "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3233 		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3234 }
3235 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3236 
init_no_remapping_devices(void)3237 static void __init init_no_remapping_devices(void)
3238 {
3239 	struct dmar_drhd_unit *drhd;
3240 
3241 	for_each_drhd_unit(drhd) {
3242 		if (!drhd->include_all) {
3243 			int i;
3244 			for (i = 0; i < drhd->devices_cnt; i++)
3245 				if (drhd->devices[i] != NULL)
3246 					break;
3247 			/* ignore DMAR unit if no pci devices exist */
3248 			if (i == drhd->devices_cnt)
3249 				drhd->ignored = 1;
3250 		}
3251 	}
3252 
3253 	for_each_drhd_unit(drhd) {
3254 		int i;
3255 		if (drhd->ignored || drhd->include_all)
3256 			continue;
3257 
3258 		for (i = 0; i < drhd->devices_cnt; i++)
3259 			if (drhd->devices[i] &&
3260 			    !IS_GFX_DEVICE(drhd->devices[i]))
3261 				break;
3262 
3263 		if (i < drhd->devices_cnt)
3264 			continue;
3265 
3266 		/* This IOMMU has *only* gfx devices. Either bypass it or
3267 		   set the gfx_mapped flag, as appropriate */
3268 		if (dmar_map_gfx) {
3269 			intel_iommu_gfx_mapped = 1;
3270 		} else {
3271 			drhd->ignored = 1;
3272 			for (i = 0; i < drhd->devices_cnt; i++) {
3273 				if (!drhd->devices[i])
3274 					continue;
3275 				drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3276 			}
3277 		}
3278 	}
3279 }
3280 
3281 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)3282 static int init_iommu_hw(void)
3283 {
3284 	struct dmar_drhd_unit *drhd;
3285 	struct intel_iommu *iommu = NULL;
3286 
3287 	for_each_active_iommu(iommu, drhd)
3288 		if (iommu->qi)
3289 			dmar_reenable_qi(iommu);
3290 
3291 	for_each_iommu(iommu, drhd) {
3292 		if (drhd->ignored) {
3293 			/*
3294 			 * we always have to disable PMRs or DMA may fail on
3295 			 * this device
3296 			 */
3297 			if (force_on)
3298 				iommu_disable_protect_mem_regions(iommu);
3299 			continue;
3300 		}
3301 
3302 		iommu_flush_write_buffer(iommu);
3303 
3304 		iommu_set_root_entry(iommu);
3305 
3306 		iommu->flush.flush_context(iommu, 0, 0, 0,
3307 					   DMA_CCMD_GLOBAL_INVL);
3308 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3309 					 DMA_TLB_GLOBAL_FLUSH);
3310 		if (iommu_enable_translation(iommu))
3311 			return 1;
3312 		iommu_disable_protect_mem_regions(iommu);
3313 	}
3314 
3315 	return 0;
3316 }
3317 
iommu_flush_all(void)3318 static void iommu_flush_all(void)
3319 {
3320 	struct dmar_drhd_unit *drhd;
3321 	struct intel_iommu *iommu;
3322 
3323 	for_each_active_iommu(iommu, drhd) {
3324 		iommu->flush.flush_context(iommu, 0, 0, 0,
3325 					   DMA_CCMD_GLOBAL_INVL);
3326 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3327 					 DMA_TLB_GLOBAL_FLUSH);
3328 	}
3329 }
3330 
iommu_suspend(void)3331 static int iommu_suspend(void)
3332 {
3333 	struct dmar_drhd_unit *drhd;
3334 	struct intel_iommu *iommu = NULL;
3335 	unsigned long flag;
3336 
3337 	for_each_active_iommu(iommu, drhd) {
3338 		iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3339 						 GFP_ATOMIC);
3340 		if (!iommu->iommu_state)
3341 			goto nomem;
3342 	}
3343 
3344 	iommu_flush_all();
3345 
3346 	for_each_active_iommu(iommu, drhd) {
3347 		iommu_disable_translation(iommu);
3348 
3349 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3350 
3351 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
3352 			readl(iommu->reg + DMAR_FECTL_REG);
3353 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3354 			readl(iommu->reg + DMAR_FEDATA_REG);
3355 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3356 			readl(iommu->reg + DMAR_FEADDR_REG);
3357 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3358 			readl(iommu->reg + DMAR_FEUADDR_REG);
3359 
3360 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3361 	}
3362 	return 0;
3363 
3364 nomem:
3365 	for_each_active_iommu(iommu, drhd)
3366 		kfree(iommu->iommu_state);
3367 
3368 	return -ENOMEM;
3369 }
3370 
iommu_resume(void)3371 static void iommu_resume(void)
3372 {
3373 	struct dmar_drhd_unit *drhd;
3374 	struct intel_iommu *iommu = NULL;
3375 	unsigned long flag;
3376 
3377 	if (init_iommu_hw()) {
3378 		if (force_on)
3379 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3380 		else
3381 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3382 		return;
3383 	}
3384 
3385 	for_each_active_iommu(iommu, drhd) {
3386 
3387 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3388 
3389 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3390 			iommu->reg + DMAR_FECTL_REG);
3391 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3392 			iommu->reg + DMAR_FEDATA_REG);
3393 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3394 			iommu->reg + DMAR_FEADDR_REG);
3395 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3396 			iommu->reg + DMAR_FEUADDR_REG);
3397 
3398 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3399 	}
3400 
3401 	for_each_active_iommu(iommu, drhd)
3402 		kfree(iommu->iommu_state);
3403 }
3404 
3405 static struct syscore_ops iommu_syscore_ops = {
3406 	.resume		= iommu_resume,
3407 	.suspend	= iommu_suspend,
3408 };
3409 
init_iommu_pm_ops(void)3410 static void __init init_iommu_pm_ops(void)
3411 {
3412 	register_syscore_ops(&iommu_syscore_ops);
3413 }
3414 
3415 #else
init_iommu_pm_ops(void)3416 static inline void init_iommu_pm_ops(void) {}
3417 #endif	/* CONFIG_PM */
3418 
3419 LIST_HEAD(dmar_rmrr_units);
3420 
dmar_register_rmrr_unit(struct dmar_rmrr_unit * rmrr)3421 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3422 {
3423 	list_add(&rmrr->list, &dmar_rmrr_units);
3424 }
3425 
3426 
dmar_parse_one_rmrr(struct acpi_dmar_header * header)3427 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3428 {
3429 	struct acpi_dmar_reserved_memory *rmrr;
3430 	struct dmar_rmrr_unit *rmrru;
3431 
3432 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3433 	if (!rmrru)
3434 		return -ENOMEM;
3435 
3436 	rmrru->hdr = header;
3437 	rmrr = (struct acpi_dmar_reserved_memory *)header;
3438 	rmrru->base_address = rmrr->base_address;
3439 	rmrru->end_address = rmrr->end_address;
3440 
3441 	dmar_register_rmrr_unit(rmrru);
3442 	return 0;
3443 }
3444 
3445 static int __init
rmrr_parse_dev(struct dmar_rmrr_unit * rmrru)3446 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3447 {
3448 	struct acpi_dmar_reserved_memory *rmrr;
3449 	int ret;
3450 
3451 	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3452 	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3453 		((void *)rmrr) + rmrr->header.length,
3454 		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3455 
3456 	if (ret || (rmrru->devices_cnt == 0)) {
3457 		list_del(&rmrru->list);
3458 		kfree(rmrru);
3459 	}
3460 	return ret;
3461 }
3462 
3463 static LIST_HEAD(dmar_atsr_units);
3464 
dmar_parse_one_atsr(struct acpi_dmar_header * hdr)3465 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3466 {
3467 	struct acpi_dmar_atsr *atsr;
3468 	struct dmar_atsr_unit *atsru;
3469 
3470 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3471 	atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3472 	if (!atsru)
3473 		return -ENOMEM;
3474 
3475 	atsru->hdr = hdr;
3476 	atsru->include_all = atsr->flags & 0x1;
3477 
3478 	list_add(&atsru->list, &dmar_atsr_units);
3479 
3480 	return 0;
3481 }
3482 
atsr_parse_dev(struct dmar_atsr_unit * atsru)3483 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3484 {
3485 	int rc;
3486 	struct acpi_dmar_atsr *atsr;
3487 
3488 	if (atsru->include_all)
3489 		return 0;
3490 
3491 	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3492 	rc = dmar_parse_dev_scope((void *)(atsr + 1),
3493 				(void *)atsr + atsr->header.length,
3494 				&atsru->devices_cnt, &atsru->devices,
3495 				atsr->segment);
3496 	if (rc || !atsru->devices_cnt) {
3497 		list_del(&atsru->list);
3498 		kfree(atsru);
3499 	}
3500 
3501 	return rc;
3502 }
3503 
dmar_find_matched_atsr_unit(struct pci_dev * dev)3504 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3505 {
3506 	int i;
3507 	struct pci_bus *bus;
3508 	struct acpi_dmar_atsr *atsr;
3509 	struct dmar_atsr_unit *atsru;
3510 
3511 	dev = pci_physfn(dev);
3512 
3513 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
3514 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3515 		if (atsr->segment == pci_domain_nr(dev->bus))
3516 			goto found;
3517 	}
3518 
3519 	return 0;
3520 
3521 found:
3522 	for (bus = dev->bus; bus; bus = bus->parent) {
3523 		struct pci_dev *bridge = bus->self;
3524 
3525 		if (!bridge || !pci_is_pcie(bridge) ||
3526 		    bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
3527 			return 0;
3528 
3529 		if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
3530 			for (i = 0; i < atsru->devices_cnt; i++)
3531 				if (atsru->devices[i] == bridge)
3532 					return 1;
3533 			break;
3534 		}
3535 	}
3536 
3537 	if (atsru->include_all)
3538 		return 1;
3539 
3540 	return 0;
3541 }
3542 
dmar_parse_rmrr_atsr_dev(void)3543 int __init dmar_parse_rmrr_atsr_dev(void)
3544 {
3545 	struct dmar_rmrr_unit *rmrr, *rmrr_n;
3546 	struct dmar_atsr_unit *atsr, *atsr_n;
3547 	int ret = 0;
3548 
3549 	list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3550 		ret = rmrr_parse_dev(rmrr);
3551 		if (ret)
3552 			return ret;
3553 	}
3554 
3555 	list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3556 		ret = atsr_parse_dev(atsr);
3557 		if (ret)
3558 			return ret;
3559 	}
3560 
3561 	return ret;
3562 }
3563 
3564 /*
3565  * Here we only respond to action of unbound device from driver.
3566  *
3567  * Added device is not attached to its DMAR domain here yet. That will happen
3568  * when mapping the device to iova.
3569  */
device_notifier(struct notifier_block * nb,unsigned long action,void * data)3570 static int device_notifier(struct notifier_block *nb,
3571 				  unsigned long action, void *data)
3572 {
3573 	struct device *dev = data;
3574 	struct pci_dev *pdev = to_pci_dev(dev);
3575 	struct dmar_domain *domain;
3576 
3577 	if (iommu_no_mapping(dev))
3578 		return 0;
3579 
3580 	domain = find_domain(pdev);
3581 	if (!domain)
3582 		return 0;
3583 
3584 	if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3585 		domain_remove_one_dev_info(domain, pdev);
3586 
3587 		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3588 		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3589 		    list_empty(&domain->devices))
3590 			domain_exit(domain);
3591 	}
3592 
3593 	return 0;
3594 }
3595 
3596 static struct notifier_block device_nb = {
3597 	.notifier_call = device_notifier,
3598 };
3599 
intel_iommu_init(void)3600 int __init intel_iommu_init(void)
3601 {
3602 	int ret = 0;
3603 
3604 	/* VT-d is required for a TXT/tboot launch, so enforce that */
3605 	force_on = tboot_force_iommu();
3606 
3607 	if (dmar_table_init()) {
3608 		if (force_on)
3609 			panic("tboot: Failed to initialize DMAR table\n");
3610 		return 	-ENODEV;
3611 	}
3612 
3613 	if (dmar_dev_scope_init() < 0) {
3614 		if (force_on)
3615 			panic("tboot: Failed to initialize DMAR device scope\n");
3616 		return 	-ENODEV;
3617 	}
3618 
3619 	if (no_iommu || dmar_disabled)
3620 		return -ENODEV;
3621 
3622 	if (iommu_init_mempool()) {
3623 		if (force_on)
3624 			panic("tboot: Failed to initialize iommu memory\n");
3625 		return 	-ENODEV;
3626 	}
3627 
3628 	if (list_empty(&dmar_rmrr_units))
3629 		printk(KERN_INFO "DMAR: No RMRR found\n");
3630 
3631 	if (list_empty(&dmar_atsr_units))
3632 		printk(KERN_INFO "DMAR: No ATSR found\n");
3633 
3634 	if (dmar_init_reserved_ranges()) {
3635 		if (force_on)
3636 			panic("tboot: Failed to reserve iommu ranges\n");
3637 		return 	-ENODEV;
3638 	}
3639 
3640 	init_no_remapping_devices();
3641 
3642 	ret = init_dmars();
3643 	if (ret) {
3644 		if (force_on)
3645 			panic("tboot: Failed to initialize DMARs\n");
3646 		printk(KERN_ERR "IOMMU: dmar init failed\n");
3647 		put_iova_domain(&reserved_iova_list);
3648 		iommu_exit_mempool();
3649 		return ret;
3650 	}
3651 	printk(KERN_INFO
3652 	"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3653 
3654 	init_timer(&unmap_timer);
3655 #ifdef CONFIG_SWIOTLB
3656 	swiotlb = 0;
3657 #endif
3658 	dma_ops = &intel_dma_ops;
3659 
3660 	init_iommu_pm_ops();
3661 
3662 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3663 
3664 	bus_register_notifier(&pci_bus_type, &device_nb);
3665 
3666 	intel_iommu_enabled = 1;
3667 
3668 	return 0;
3669 }
3670 
iommu_detach_dependent_devices(struct intel_iommu * iommu,struct pci_dev * pdev)3671 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3672 					   struct pci_dev *pdev)
3673 {
3674 	struct pci_dev *tmp, *parent;
3675 
3676 	if (!iommu || !pdev)
3677 		return;
3678 
3679 	/* dependent device detach */
3680 	tmp = pci_find_upstream_pcie_bridge(pdev);
3681 	/* Secondary interface's bus number and devfn 0 */
3682 	if (tmp) {
3683 		parent = pdev->bus->self;
3684 		while (parent != tmp) {
3685 			iommu_detach_dev(iommu, parent->bus->number,
3686 					 parent->devfn);
3687 			parent = parent->bus->self;
3688 		}
3689 		if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3690 			iommu_detach_dev(iommu,
3691 				tmp->subordinate->number, 0);
3692 		else /* this is a legacy PCI bridge */
3693 			iommu_detach_dev(iommu, tmp->bus->number,
3694 					 tmp->devfn);
3695 	}
3696 }
3697 
domain_remove_one_dev_info(struct dmar_domain * domain,struct pci_dev * pdev)3698 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3699 					  struct pci_dev *pdev)
3700 {
3701 	struct device_domain_info *info;
3702 	struct intel_iommu *iommu;
3703 	unsigned long flags;
3704 	int found = 0;
3705 	struct list_head *entry, *tmp;
3706 
3707 	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3708 				pdev->devfn);
3709 	if (!iommu)
3710 		return;
3711 
3712 	spin_lock_irqsave(&device_domain_lock, flags);
3713 	list_for_each_safe(entry, tmp, &domain->devices) {
3714 		info = list_entry(entry, struct device_domain_info, link);
3715 		if (info->segment == pci_domain_nr(pdev->bus) &&
3716 		    info->bus == pdev->bus->number &&
3717 		    info->devfn == pdev->devfn) {
3718 			list_del(&info->link);
3719 			list_del(&info->global);
3720 			if (info->dev)
3721 				info->dev->dev.archdata.iommu = NULL;
3722 			spin_unlock_irqrestore(&device_domain_lock, flags);
3723 
3724 			iommu_disable_dev_iotlb(info);
3725 			iommu_detach_dev(iommu, info->bus, info->devfn);
3726 			iommu_detach_dependent_devices(iommu, pdev);
3727 			free_devinfo_mem(info);
3728 
3729 			spin_lock_irqsave(&device_domain_lock, flags);
3730 
3731 			if (found)
3732 				break;
3733 			else
3734 				continue;
3735 		}
3736 
3737 		/* if there is no other devices under the same iommu
3738 		 * owned by this domain, clear this iommu in iommu_bmp
3739 		 * update iommu count and coherency
3740 		 */
3741 		if (iommu == device_to_iommu(info->segment, info->bus,
3742 					    info->devfn))
3743 			found = 1;
3744 	}
3745 
3746 	spin_unlock_irqrestore(&device_domain_lock, flags);
3747 
3748 	if (found == 0) {
3749 		unsigned long tmp_flags;
3750 		spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3751 		clear_bit(iommu->seq_id, &domain->iommu_bmp);
3752 		domain->iommu_count--;
3753 		domain_update_iommu_cap(domain);
3754 		spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3755 
3756 		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3757 		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3758 			spin_lock_irqsave(&iommu->lock, tmp_flags);
3759 			clear_bit(domain->id, iommu->domain_ids);
3760 			iommu->domains[domain->id] = NULL;
3761 			spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3762 		}
3763 	}
3764 }
3765 
vm_domain_remove_all_dev_info(struct dmar_domain * domain)3766 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3767 {
3768 	struct device_domain_info *info;
3769 	struct intel_iommu *iommu;
3770 	unsigned long flags1, flags2;
3771 
3772 	spin_lock_irqsave(&device_domain_lock, flags1);
3773 	while (!list_empty(&domain->devices)) {
3774 		info = list_entry(domain->devices.next,
3775 			struct device_domain_info, link);
3776 		list_del(&info->link);
3777 		list_del(&info->global);
3778 		if (info->dev)
3779 			info->dev->dev.archdata.iommu = NULL;
3780 
3781 		spin_unlock_irqrestore(&device_domain_lock, flags1);
3782 
3783 		iommu_disable_dev_iotlb(info);
3784 		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3785 		iommu_detach_dev(iommu, info->bus, info->devfn);
3786 		iommu_detach_dependent_devices(iommu, info->dev);
3787 
3788 		/* clear this iommu in iommu_bmp, update iommu count
3789 		 * and capabilities
3790 		 */
3791 		spin_lock_irqsave(&domain->iommu_lock, flags2);
3792 		if (test_and_clear_bit(iommu->seq_id,
3793 				       &domain->iommu_bmp)) {
3794 			domain->iommu_count--;
3795 			domain_update_iommu_cap(domain);
3796 		}
3797 		spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3798 
3799 		free_devinfo_mem(info);
3800 		spin_lock_irqsave(&device_domain_lock, flags1);
3801 	}
3802 	spin_unlock_irqrestore(&device_domain_lock, flags1);
3803 }
3804 
3805 /* domain id for virtual machine, it won't be set in context */
3806 static unsigned long vm_domid;
3807 
iommu_alloc_vm_domain(void)3808 static struct dmar_domain *iommu_alloc_vm_domain(void)
3809 {
3810 	struct dmar_domain *domain;
3811 
3812 	domain = alloc_domain_mem();
3813 	if (!domain)
3814 		return NULL;
3815 
3816 	domain->id = vm_domid++;
3817 	domain->nid = -1;
3818 	memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3819 	domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3820 
3821 	return domain;
3822 }
3823 
md_domain_init(struct dmar_domain * domain,int guest_width)3824 static int md_domain_init(struct dmar_domain *domain, int guest_width)
3825 {
3826 	int adjust_width;
3827 
3828 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3829 	spin_lock_init(&domain->iommu_lock);
3830 
3831 	domain_reserve_special_ranges(domain);
3832 
3833 	/* calculate AGAW */
3834 	domain->gaw = guest_width;
3835 	adjust_width = guestwidth_to_adjustwidth(guest_width);
3836 	domain->agaw = width_to_agaw(adjust_width);
3837 
3838 	INIT_LIST_HEAD(&domain->devices);
3839 
3840 	domain->iommu_count = 0;
3841 	domain->iommu_coherency = 0;
3842 	domain->iommu_snooping = 0;
3843 	domain->iommu_superpage = 0;
3844 	domain->max_addr = 0;
3845 	domain->nid = -1;
3846 
3847 	/* always allocate the top pgd */
3848 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3849 	if (!domain->pgd)
3850 		return -ENOMEM;
3851 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3852 	return 0;
3853 }
3854 
iommu_free_vm_domain(struct dmar_domain * domain)3855 static void iommu_free_vm_domain(struct dmar_domain *domain)
3856 {
3857 	unsigned long flags;
3858 	struct dmar_drhd_unit *drhd;
3859 	struct intel_iommu *iommu;
3860 	unsigned long i;
3861 	unsigned long ndomains;
3862 
3863 	for_each_drhd_unit(drhd) {
3864 		if (drhd->ignored)
3865 			continue;
3866 		iommu = drhd->iommu;
3867 
3868 		ndomains = cap_ndoms(iommu->cap);
3869 		for_each_set_bit(i, iommu->domain_ids, ndomains) {
3870 			if (iommu->domains[i] == domain) {
3871 				spin_lock_irqsave(&iommu->lock, flags);
3872 				clear_bit(i, iommu->domain_ids);
3873 				iommu->domains[i] = NULL;
3874 				spin_unlock_irqrestore(&iommu->lock, flags);
3875 				break;
3876 			}
3877 		}
3878 	}
3879 }
3880 
vm_domain_exit(struct dmar_domain * domain)3881 static void vm_domain_exit(struct dmar_domain *domain)
3882 {
3883 	/* Domain 0 is reserved, so dont process it */
3884 	if (!domain)
3885 		return;
3886 
3887 	vm_domain_remove_all_dev_info(domain);
3888 	/* destroy iovas */
3889 	put_iova_domain(&domain->iovad);
3890 
3891 	/* clear ptes */
3892 	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3893 
3894 	/* free page tables */
3895 	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3896 
3897 	iommu_free_vm_domain(domain);
3898 	free_domain_mem(domain);
3899 }
3900 
intel_iommu_domain_init(struct iommu_domain * domain)3901 static int intel_iommu_domain_init(struct iommu_domain *domain)
3902 {
3903 	struct dmar_domain *dmar_domain;
3904 
3905 	dmar_domain = iommu_alloc_vm_domain();
3906 	if (!dmar_domain) {
3907 		printk(KERN_ERR
3908 			"intel_iommu_domain_init: dmar_domain == NULL\n");
3909 		return -ENOMEM;
3910 	}
3911 	if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3912 		printk(KERN_ERR
3913 			"intel_iommu_domain_init() failed\n");
3914 		vm_domain_exit(dmar_domain);
3915 		return -ENOMEM;
3916 	}
3917 	domain_update_iommu_cap(dmar_domain);
3918 	domain->priv = dmar_domain;
3919 
3920 	return 0;
3921 }
3922 
intel_iommu_domain_destroy(struct iommu_domain * domain)3923 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3924 {
3925 	struct dmar_domain *dmar_domain = domain->priv;
3926 
3927 	domain->priv = NULL;
3928 	vm_domain_exit(dmar_domain);
3929 }
3930 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)3931 static int intel_iommu_attach_device(struct iommu_domain *domain,
3932 				     struct device *dev)
3933 {
3934 	struct dmar_domain *dmar_domain = domain->priv;
3935 	struct pci_dev *pdev = to_pci_dev(dev);
3936 	struct intel_iommu *iommu;
3937 	int addr_width;
3938 
3939 	/* normally pdev is not mapped */
3940 	if (unlikely(domain_context_mapped(pdev))) {
3941 		struct dmar_domain *old_domain;
3942 
3943 		old_domain = find_domain(pdev);
3944 		if (old_domain) {
3945 			if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3946 			    dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3947 				domain_remove_one_dev_info(old_domain, pdev);
3948 			else
3949 				domain_remove_dev_info(old_domain);
3950 		}
3951 	}
3952 
3953 	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3954 				pdev->devfn);
3955 	if (!iommu)
3956 		return -ENODEV;
3957 
3958 	/* check if this iommu agaw is sufficient for max mapped address */
3959 	addr_width = agaw_to_width(iommu->agaw);
3960 	if (addr_width > cap_mgaw(iommu->cap))
3961 		addr_width = cap_mgaw(iommu->cap);
3962 
3963 	if (dmar_domain->max_addr > (1LL << addr_width)) {
3964 		printk(KERN_ERR "%s: iommu width (%d) is not "
3965 		       "sufficient for the mapped address (%llx)\n",
3966 		       __func__, addr_width, dmar_domain->max_addr);
3967 		return -EFAULT;
3968 	}
3969 	dmar_domain->gaw = addr_width;
3970 
3971 	/*
3972 	 * Knock out extra levels of page tables if necessary
3973 	 */
3974 	while (iommu->agaw < dmar_domain->agaw) {
3975 		struct dma_pte *pte;
3976 
3977 		pte = dmar_domain->pgd;
3978 		if (dma_pte_present(pte)) {
3979 			dmar_domain->pgd = (struct dma_pte *)
3980 				phys_to_virt(dma_pte_addr(pte));
3981 			free_pgtable_page(pte);
3982 		}
3983 		dmar_domain->agaw--;
3984 	}
3985 
3986 	return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3987 }
3988 
intel_iommu_detach_device(struct iommu_domain * domain,struct device * dev)3989 static void intel_iommu_detach_device(struct iommu_domain *domain,
3990 				      struct device *dev)
3991 {
3992 	struct dmar_domain *dmar_domain = domain->priv;
3993 	struct pci_dev *pdev = to_pci_dev(dev);
3994 
3995 	domain_remove_one_dev_info(dmar_domain, pdev);
3996 }
3997 
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot)3998 static int intel_iommu_map(struct iommu_domain *domain,
3999 			   unsigned long iova, phys_addr_t hpa,
4000 			   size_t size, int iommu_prot)
4001 {
4002 	struct dmar_domain *dmar_domain = domain->priv;
4003 	u64 max_addr;
4004 	int prot = 0;
4005 	int ret;
4006 
4007 	if (iommu_prot & IOMMU_READ)
4008 		prot |= DMA_PTE_READ;
4009 	if (iommu_prot & IOMMU_WRITE)
4010 		prot |= DMA_PTE_WRITE;
4011 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4012 		prot |= DMA_PTE_SNP;
4013 
4014 	max_addr = iova + size;
4015 	if (dmar_domain->max_addr < max_addr) {
4016 		u64 end;
4017 
4018 		/* check if minimum agaw is sufficient for mapped address */
4019 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4020 		if (end < max_addr) {
4021 			printk(KERN_ERR "%s: iommu width (%d) is not "
4022 			       "sufficient for the mapped address (%llx)\n",
4023 			       __func__, dmar_domain->gaw, max_addr);
4024 			return -EFAULT;
4025 		}
4026 		dmar_domain->max_addr = max_addr;
4027 	}
4028 	/* Round up size to next multiple of PAGE_SIZE, if it and
4029 	   the low bits of hpa would take us onto the next page */
4030 	size = aligned_nrpages(hpa, size);
4031 	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4032 				 hpa >> VTD_PAGE_SHIFT, size, prot);
4033 	return ret;
4034 }
4035 
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)4036 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4037 			     unsigned long iova, size_t size)
4038 {
4039 	struct dmar_domain *dmar_domain = domain->priv;
4040 	int order;
4041 
4042 	order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4043 			    (iova + size - 1) >> VTD_PAGE_SHIFT);
4044 
4045 	if (dmar_domain->max_addr == iova + size)
4046 		dmar_domain->max_addr = iova;
4047 
4048 	return PAGE_SIZE << order;
4049 }
4050 
intel_iommu_iova_to_phys(struct iommu_domain * domain,unsigned long iova)4051 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4052 					    unsigned long iova)
4053 {
4054 	struct dmar_domain *dmar_domain = domain->priv;
4055 	struct dma_pte *pte;
4056 	u64 phys = 0;
4057 
4058 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
4059 	if (pte)
4060 		phys = dma_pte_addr(pte);
4061 
4062 	return phys;
4063 }
4064 
intel_iommu_domain_has_cap(struct iommu_domain * domain,unsigned long cap)4065 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4066 				      unsigned long cap)
4067 {
4068 	struct dmar_domain *dmar_domain = domain->priv;
4069 
4070 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
4071 		return dmar_domain->iommu_snooping;
4072 	if (cap == IOMMU_CAP_INTR_REMAP)
4073 		return intr_remapping_enabled;
4074 
4075 	return 0;
4076 }
4077 
4078 /*
4079  * Group numbers are arbitrary.  Device with the same group number
4080  * indicate the iommu cannot differentiate between them.  To avoid
4081  * tracking used groups we just use the seg|bus|devfn of the lowest
4082  * level we're able to differentiate devices
4083  */
intel_iommu_device_group(struct device * dev,unsigned int * groupid)4084 static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
4085 {
4086 	struct pci_dev *pdev = to_pci_dev(dev);
4087 	struct pci_dev *bridge;
4088 	union {
4089 		struct {
4090 			u8 devfn;
4091 			u8 bus;
4092 			u16 segment;
4093 		} pci;
4094 		u32 group;
4095 	} id;
4096 
4097 	if (iommu_no_mapping(dev))
4098 		return -ENODEV;
4099 
4100 	id.pci.segment = pci_domain_nr(pdev->bus);
4101 	id.pci.bus = pdev->bus->number;
4102 	id.pci.devfn = pdev->devfn;
4103 
4104 	if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
4105 		return -ENODEV;
4106 
4107 	bridge = pci_find_upstream_pcie_bridge(pdev);
4108 	if (bridge) {
4109 		if (pci_is_pcie(bridge)) {
4110 			id.pci.bus = bridge->subordinate->number;
4111 			id.pci.devfn = 0;
4112 		} else {
4113 			id.pci.bus = bridge->bus->number;
4114 			id.pci.devfn = bridge->devfn;
4115 		}
4116 	}
4117 
4118 	if (!pdev->is_virtfn && iommu_group_mf)
4119 		id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
4120 
4121 	*groupid = id.group;
4122 
4123 	return 0;
4124 }
4125 
4126 static struct iommu_ops intel_iommu_ops = {
4127 	.domain_init	= intel_iommu_domain_init,
4128 	.domain_destroy = intel_iommu_domain_destroy,
4129 	.attach_dev	= intel_iommu_attach_device,
4130 	.detach_dev	= intel_iommu_detach_device,
4131 	.map		= intel_iommu_map,
4132 	.unmap		= intel_iommu_unmap,
4133 	.iova_to_phys	= intel_iommu_iova_to_phys,
4134 	.domain_has_cap = intel_iommu_domain_has_cap,
4135 	.device_group	= intel_iommu_device_group,
4136 	.pgsize_bitmap	= INTEL_IOMMU_PGSIZES,
4137 };
4138 
quirk_iommu_rwbf(struct pci_dev * dev)4139 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
4140 {
4141 	/*
4142 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
4143 	 * but needs it:
4144 	 */
4145 	printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4146 	rwbf_quirk = 1;
4147 
4148 	/* https://bugzilla.redhat.com/show_bug.cgi?id=538163 */
4149 	if (dev->revision == 0x07) {
4150 		printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4151 		dmar_map_gfx = 0;
4152 	}
4153 }
4154 
4155 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4156 
4157 #define GGC 0x52
4158 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
4159 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
4160 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
4161 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
4162 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
4163 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
4164 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
4165 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
4166 
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)4167 static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4168 {
4169 	unsigned short ggc;
4170 
4171 	if (pci_read_config_word(dev, GGC, &ggc))
4172 		return;
4173 
4174 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4175 		printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4176 		dmar_map_gfx = 0;
4177 	} else if (dmar_map_gfx) {
4178 		/* we have to ensure the gfx device is idle before we flush */
4179 		printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4180 		intel_iommu_strict = 1;
4181        }
4182 }
4183 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4184 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4185 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4186 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4187 
4188 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4189    ISOCH DMAR unit for the Azalia sound device, but not give it any
4190    TLB entries, which causes it to deadlock. Check for that.  We do
4191    this in a function called from init_dmars(), instead of in a PCI
4192    quirk, because we don't want to print the obnoxious "BIOS broken"
4193    message if VT-d is actually disabled.
4194 */
check_tylersburg_isoch(void)4195 static void __init check_tylersburg_isoch(void)
4196 {
4197 	struct pci_dev *pdev;
4198 	uint32_t vtisochctrl;
4199 
4200 	/* If there's no Azalia in the system anyway, forget it. */
4201 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4202 	if (!pdev)
4203 		return;
4204 	pci_dev_put(pdev);
4205 
4206 	/* System Management Registers. Might be hidden, in which case
4207 	   we can't do the sanity check. But that's OK, because the
4208 	   known-broken BIOSes _don't_ actually hide it, so far. */
4209 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4210 	if (!pdev)
4211 		return;
4212 
4213 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4214 		pci_dev_put(pdev);
4215 		return;
4216 	}
4217 
4218 	pci_dev_put(pdev);
4219 
4220 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4221 	if (vtisochctrl & 1)
4222 		return;
4223 
4224 	/* Drop all bits other than the number of TLB entries */
4225 	vtisochctrl &= 0x1c;
4226 
4227 	/* If we have the recommended number of TLB entries (16), fine. */
4228 	if (vtisochctrl == 0x10)
4229 		return;
4230 
4231 	/* Zero TLB entries? You get to ride the short bus to school. */
4232 	if (!vtisochctrl) {
4233 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4234 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4235 		     dmi_get_system_info(DMI_BIOS_VENDOR),
4236 		     dmi_get_system_info(DMI_BIOS_VERSION),
4237 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
4238 		iommu_identity_mapping |= IDENTMAP_AZALIA;
4239 		return;
4240 	}
4241 
4242 	printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4243 	       vtisochctrl);
4244 }
4245