a312e4966686f55b6e8f7a3c3ff9baa829d7b86e
[firefly-linux-kernel-4.4.55.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-contiguous.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
46
47 #include "irq_remapping.h"
48
49 #define ROOT_SIZE               VTD_PAGE_SIZE
50 #define CONTEXT_SIZE            VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START      (0xfee00000)
57 #define IOAPIC_RANGE_END        (0xfeefffff)
58 #define IOVA_START_ADDR         (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
64
65 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
71                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74 /* IO virtual address start page frame number */
75 #define IOVA_START_PFN          (1)
76
77 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
78 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
79 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
80
81 /* page table handling */
82 #define LEVEL_STRIDE            (9)
83 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
84
85 /*
86  * This bitmap is used to advertise the page sizes our hardware support
87  * to the IOMMU core, which will then use this information to split
88  * physically contiguous memory regions it is mapping into page sizes
89  * that we support.
90  *
91  * Traditionally the IOMMU core just handed us the mappings directly,
92  * after making sure the size is an order of a 4KiB page and that the
93  * mapping has natural alignment.
94  *
95  * To retain this behavior, we currently advertise that we support
96  * all page sizes that are an order of 4KiB.
97  *
98  * If at some point we'd like to utilize the IOMMU core's new behavior,
99  * we could change this to advertise the real page sizes we support.
100  */
101 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
102
103 static inline int agaw_to_level(int agaw)
104 {
105         return agaw + 2;
106 }
107
108 static inline int agaw_to_width(int agaw)
109 {
110         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
111 }
112
113 static inline int width_to_agaw(int width)
114 {
115         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
116 }
117
118 static inline unsigned int level_to_offset_bits(int level)
119 {
120         return (level - 1) * LEVEL_STRIDE;
121 }
122
123 static inline int pfn_level_offset(unsigned long pfn, int level)
124 {
125         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
126 }
127
128 static inline unsigned long level_mask(int level)
129 {
130         return -1UL << level_to_offset_bits(level);
131 }
132
133 static inline unsigned long level_size(int level)
134 {
135         return 1UL << level_to_offset_bits(level);
136 }
137
138 static inline unsigned long align_to_level(unsigned long pfn, int level)
139 {
140         return (pfn + level_size(level) - 1) & level_mask(level);
141 }
142
143 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
144 {
145         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
146 }
147
148 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
149    are never going to work. */
150 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
151 {
152         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
153 }
154
155 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
156 {
157         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
158 }
159 static inline unsigned long page_to_dma_pfn(struct page *pg)
160 {
161         return mm_to_dma_pfn(page_to_pfn(pg));
162 }
163 static inline unsigned long virt_to_dma_pfn(void *p)
164 {
165         return page_to_dma_pfn(virt_to_page(p));
166 }
167
168 /* global iommu list, set NULL for ignored DMAR units */
169 static struct intel_iommu **g_iommus;
170
171 static void __init check_tylersburg_isoch(void);
172 static int rwbf_quirk;
173
174 /*
175  * set to 1 to panic kernel if can't successfully enable VT-d
176  * (used when kernel is launched w/ TXT)
177  */
178 static int force_on = 0;
179
180 /*
181  * 0: Present
182  * 1-11: Reserved
183  * 12-63: Context Ptr (12 - (haw-1))
184  * 64-127: Reserved
185  */
186 struct root_entry {
187         u64     val;
188         u64     rsvd1;
189 };
190 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
191 static inline bool root_present(struct root_entry *root)
192 {
193         return (root->val & 1);
194 }
195 static inline void set_root_present(struct root_entry *root)
196 {
197         root->val |= 1;
198 }
199 static inline void set_root_value(struct root_entry *root, unsigned long value)
200 {
201         root->val &= ~VTD_PAGE_MASK;
202         root->val |= value & VTD_PAGE_MASK;
203 }
204
205 static inline struct context_entry *
206 get_context_addr_from_root(struct root_entry *root)
207 {
208         return (struct context_entry *)
209                 (root_present(root)?phys_to_virt(
210                 root->val & VTD_PAGE_MASK) :
211                 NULL);
212 }
213
214 /*
215  * low 64 bits:
216  * 0: present
217  * 1: fault processing disable
218  * 2-3: translation type
219  * 12-63: address space root
220  * high 64 bits:
221  * 0-2: address width
222  * 3-6: aval
223  * 8-23: domain id
224  */
225 struct context_entry {
226         u64 lo;
227         u64 hi;
228 };
229
230 static inline bool context_present(struct context_entry *context)
231 {
232         return (context->lo & 1);
233 }
234 static inline void context_set_present(struct context_entry *context)
235 {
236         context->lo |= 1;
237 }
238
239 static inline void context_set_fault_enable(struct context_entry *context)
240 {
241         context->lo &= (((u64)-1) << 2) | 1;
242 }
243
244 static inline void context_set_translation_type(struct context_entry *context,
245                                                 unsigned long value)
246 {
247         context->lo &= (((u64)-1) << 4) | 3;
248         context->lo |= (value & 3) << 2;
249 }
250
251 static inline void context_set_address_root(struct context_entry *context,
252                                             unsigned long value)
253 {
254         context->lo &= ~VTD_PAGE_MASK;
255         context->lo |= value & VTD_PAGE_MASK;
256 }
257
258 static inline void context_set_address_width(struct context_entry *context,
259                                              unsigned long value)
260 {
261         context->hi |= value & 7;
262 }
263
264 static inline void context_set_domain_id(struct context_entry *context,
265                                          unsigned long value)
266 {
267         context->hi |= (value & ((1 << 16) - 1)) << 8;
268 }
269
270 static inline void context_clear_entry(struct context_entry *context)
271 {
272         context->lo = 0;
273         context->hi = 0;
274 }
275
276 /*
277  * 0: readable
278  * 1: writable
279  * 2-6: reserved
280  * 7: super page
281  * 8-10: available
282  * 11: snoop behavior
283  * 12-63: Host physcial address
284  */
285 struct dma_pte {
286         u64 val;
287 };
288
289 static inline void dma_clear_pte(struct dma_pte *pte)
290 {
291         pte->val = 0;
292 }
293
294 static inline u64 dma_pte_addr(struct dma_pte *pte)
295 {
296 #ifdef CONFIG_64BIT
297         return pte->val & VTD_PAGE_MASK;
298 #else
299         /* Must have a full atomic 64-bit read */
300         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
301 #endif
302 }
303
304 static inline bool dma_pte_present(struct dma_pte *pte)
305 {
306         return (pte->val & 3) != 0;
307 }
308
309 static inline bool dma_pte_superpage(struct dma_pte *pte)
310 {
311         return (pte->val & DMA_PTE_LARGE_PAGE);
312 }
313
314 static inline int first_pte_in_page(struct dma_pte *pte)
315 {
316         return !((unsigned long)pte & ~VTD_PAGE_MASK);
317 }
318
319 /*
320  * This domain is a statically identity mapping domain.
321  *      1. This domain creats a static 1:1 mapping to all usable memory.
322  *      2. It maps to each iommu if successful.
323  *      3. Each iommu mapps to this domain if successful.
324  */
325 static struct dmar_domain *si_domain;
326 static int hw_pass_through = 1;
327
328 /* domain represents a virtual machine, more than one devices
329  * across iommus may be owned in one domain, e.g. kvm guest.
330  */
331 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
332
333 /* si_domain contains mulitple devices */
334 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
335
336 struct dmar_domain {
337         int     id;                     /* domain id */
338         int     nid;                    /* node id */
339         DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
340                                         /* bitmap of iommus this domain uses*/
341
342         struct list_head devices;       /* all devices' list */
343         struct iova_domain iovad;       /* iova's that belong to this domain */
344
345         struct dma_pte  *pgd;           /* virtual address */
346         int             gaw;            /* max guest address width */
347
348         /* adjusted guest address width, 0 is level 2 30-bit */
349         int             agaw;
350
351         int             flags;          /* flags to find out type of domain */
352
353         int             iommu_coherency;/* indicate coherency of iommu access */
354         int             iommu_snooping; /* indicate snooping control feature*/
355         int             iommu_count;    /* reference count of iommu */
356         int             iommu_superpage;/* Level of superpages supported:
357                                            0 == 4KiB (no superpages), 1 == 2MiB,
358                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
359         spinlock_t      iommu_lock;     /* protect iommu set in domain */
360         u64             max_addr;       /* maximum mapped address */
361
362         struct iommu_domain domain;     /* generic domain data structure for
363                                            iommu core */
364 };
365
366 /* PCI domain-device relationship */
367 struct device_domain_info {
368         struct list_head link;  /* link to domain siblings */
369         struct list_head global; /* link to global list */
370         u8 bus;                 /* PCI bus number */
371         u8 devfn;               /* PCI devfn number */
372         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
373         struct intel_iommu *iommu; /* IOMMU used by this device */
374         struct dmar_domain *domain; /* pointer to domain */
375 };
376
377 struct dmar_rmrr_unit {
378         struct list_head list;          /* list of rmrr units   */
379         struct acpi_dmar_header *hdr;   /* ACPI header          */
380         u64     base_address;           /* reserved base address*/
381         u64     end_address;            /* reserved end address */
382         struct dmar_dev_scope *devices; /* target devices */
383         int     devices_cnt;            /* target device count */
384 };
385
386 struct dmar_atsr_unit {
387         struct list_head list;          /* list of ATSR units */
388         struct acpi_dmar_header *hdr;   /* ACPI header */
389         struct dmar_dev_scope *devices; /* target devices */
390         int devices_cnt;                /* target device count */
391         u8 include_all:1;               /* include all ports */
392 };
393
394 static LIST_HEAD(dmar_atsr_units);
395 static LIST_HEAD(dmar_rmrr_units);
396
397 #define for_each_rmrr_units(rmrr) \
398         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
399
400 static void flush_unmaps_timeout(unsigned long data);
401
402 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
403
404 #define HIGH_WATER_MARK 250
405 struct deferred_flush_tables {
406         int next;
407         struct iova *iova[HIGH_WATER_MARK];
408         struct dmar_domain *domain[HIGH_WATER_MARK];
409         struct page *freelist[HIGH_WATER_MARK];
410 };
411
412 static struct deferred_flush_tables *deferred_flush;
413
414 /* bitmap for indexing intel_iommus */
415 static int g_num_of_iommus;
416
417 static DEFINE_SPINLOCK(async_umap_flush_lock);
418 static LIST_HEAD(unmaps_to_do);
419
420 static int timer_on;
421 static long list_size;
422
423 static void domain_exit(struct dmar_domain *domain);
424 static void domain_remove_dev_info(struct dmar_domain *domain);
425 static void domain_remove_one_dev_info(struct dmar_domain *domain,
426                                        struct device *dev);
427 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
428                                            struct device *dev);
429 static int domain_detach_iommu(struct dmar_domain *domain,
430                                struct intel_iommu *iommu);
431
432 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
433 int dmar_disabled = 0;
434 #else
435 int dmar_disabled = 1;
436 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
437
438 int intel_iommu_enabled = 0;
439 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
440
441 static int dmar_map_gfx = 1;
442 static int dmar_forcedac;
443 static int intel_iommu_strict;
444 static int intel_iommu_superpage = 1;
445
446 int intel_iommu_gfx_mapped;
447 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
448
449 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
450 static DEFINE_SPINLOCK(device_domain_lock);
451 static LIST_HEAD(device_domain_list);
452
453 static const struct iommu_ops intel_iommu_ops;
454
455 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
456 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
457 {
458         return container_of(dom, struct dmar_domain, domain);
459 }
460
461 static int __init intel_iommu_setup(char *str)
462 {
463         if (!str)
464                 return -EINVAL;
465         while (*str) {
466                 if (!strncmp(str, "on", 2)) {
467                         dmar_disabled = 0;
468                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
469                 } else if (!strncmp(str, "off", 3)) {
470                         dmar_disabled = 1;
471                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
472                 } else if (!strncmp(str, "igfx_off", 8)) {
473                         dmar_map_gfx = 0;
474                         printk(KERN_INFO
475                                 "Intel-IOMMU: disable GFX device mapping\n");
476                 } else if (!strncmp(str, "forcedac", 8)) {
477                         printk(KERN_INFO
478                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
479                         dmar_forcedac = 1;
480                 } else if (!strncmp(str, "strict", 6)) {
481                         printk(KERN_INFO
482                                 "Intel-IOMMU: disable batched IOTLB flush\n");
483                         intel_iommu_strict = 1;
484                 } else if (!strncmp(str, "sp_off", 6)) {
485                         printk(KERN_INFO
486                                 "Intel-IOMMU: disable supported super page\n");
487                         intel_iommu_superpage = 0;
488                 }
489
490                 str += strcspn(str, ",");
491                 while (*str == ',')
492                         str++;
493         }
494         return 0;
495 }
496 __setup("intel_iommu=", intel_iommu_setup);
497
498 static struct kmem_cache *iommu_domain_cache;
499 static struct kmem_cache *iommu_devinfo_cache;
500
501 static inline void *alloc_pgtable_page(int node)
502 {
503         struct page *page;
504         void *vaddr = NULL;
505
506         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
507         if (page)
508                 vaddr = page_address(page);
509         return vaddr;
510 }
511
512 static inline void free_pgtable_page(void *vaddr)
513 {
514         free_page((unsigned long)vaddr);
515 }
516
517 static inline void *alloc_domain_mem(void)
518 {
519         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
520 }
521
522 static void free_domain_mem(void *vaddr)
523 {
524         kmem_cache_free(iommu_domain_cache, vaddr);
525 }
526
527 static inline void * alloc_devinfo_mem(void)
528 {
529         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
530 }
531
532 static inline void free_devinfo_mem(void *vaddr)
533 {
534         kmem_cache_free(iommu_devinfo_cache, vaddr);
535 }
536
537 static inline int domain_type_is_vm(struct dmar_domain *domain)
538 {
539         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
540 }
541
542 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
543 {
544         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
545                                 DOMAIN_FLAG_STATIC_IDENTITY);
546 }
547
548 static inline int domain_pfn_supported(struct dmar_domain *domain,
549                                        unsigned long pfn)
550 {
551         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
552
553         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
554 }
555
556 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
557 {
558         unsigned long sagaw;
559         int agaw = -1;
560
561         sagaw = cap_sagaw(iommu->cap);
562         for (agaw = width_to_agaw(max_gaw);
563              agaw >= 0; agaw--) {
564                 if (test_bit(agaw, &sagaw))
565                         break;
566         }
567
568         return agaw;
569 }
570
571 /*
572  * Calculate max SAGAW for each iommu.
573  */
574 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
575 {
576         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
577 }
578
579 /*
580  * calculate agaw for each iommu.
581  * "SAGAW" may be different across iommus, use a default agaw, and
582  * get a supported less agaw for iommus that don't support the default agaw.
583  */
584 int iommu_calculate_agaw(struct intel_iommu *iommu)
585 {
586         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
587 }
588
589 /* This functionin only returns single iommu in a domain */
590 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
591 {
592         int iommu_id;
593
594         /* si_domain and vm domain should not get here. */
595         BUG_ON(domain_type_is_vm_or_si(domain));
596         iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
597         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
598                 return NULL;
599
600         return g_iommus[iommu_id];
601 }
602
603 static void domain_update_iommu_coherency(struct dmar_domain *domain)
604 {
605         struct dmar_drhd_unit *drhd;
606         struct intel_iommu *iommu;
607         int i, found = 0;
608
609         domain->iommu_coherency = 1;
610
611         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
612                 found = 1;
613                 if (!ecap_coherent(g_iommus[i]->ecap)) {
614                         domain->iommu_coherency = 0;
615                         break;
616                 }
617         }
618         if (found)
619                 return;
620
621         /* No hardware attached; use lowest common denominator */
622         rcu_read_lock();
623         for_each_active_iommu(iommu, drhd) {
624                 if (!ecap_coherent(iommu->ecap)) {
625                         domain->iommu_coherency = 0;
626                         break;
627                 }
628         }
629         rcu_read_unlock();
630 }
631
632 static int domain_update_iommu_snooping(struct intel_iommu *skip)
633 {
634         struct dmar_drhd_unit *drhd;
635         struct intel_iommu *iommu;
636         int ret = 1;
637
638         rcu_read_lock();
639         for_each_active_iommu(iommu, drhd) {
640                 if (iommu != skip) {
641                         if (!ecap_sc_support(iommu->ecap)) {
642                                 ret = 0;
643                                 break;
644                         }
645                 }
646         }
647         rcu_read_unlock();
648
649         return ret;
650 }
651
652 static int domain_update_iommu_superpage(struct intel_iommu *skip)
653 {
654         struct dmar_drhd_unit *drhd;
655         struct intel_iommu *iommu;
656         int mask = 0xf;
657
658         if (!intel_iommu_superpage) {
659                 return 0;
660         }
661
662         /* set iommu_superpage to the smallest common denominator */
663         rcu_read_lock();
664         for_each_active_iommu(iommu, drhd) {
665                 if (iommu != skip) {
666                         mask &= cap_super_page_val(iommu->cap);
667                         if (!mask)
668                                 break;
669                 }
670         }
671         rcu_read_unlock();
672
673         return fls(mask);
674 }
675
676 /* Some capabilities may be different across iommus */
677 static void domain_update_iommu_cap(struct dmar_domain *domain)
678 {
679         domain_update_iommu_coherency(domain);
680         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
681         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
682 }
683
684 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
685 {
686         struct dmar_drhd_unit *drhd = NULL;
687         struct intel_iommu *iommu;
688         struct device *tmp;
689         struct pci_dev *ptmp, *pdev = NULL;
690         u16 segment = 0;
691         int i;
692
693         if (dev_is_pci(dev)) {
694                 pdev = to_pci_dev(dev);
695                 segment = pci_domain_nr(pdev->bus);
696         } else if (ACPI_COMPANION(dev))
697                 dev = &ACPI_COMPANION(dev)->dev;
698
699         rcu_read_lock();
700         for_each_active_iommu(iommu, drhd) {
701                 if (pdev && segment != drhd->segment)
702                         continue;
703
704                 for_each_active_dev_scope(drhd->devices,
705                                           drhd->devices_cnt, i, tmp) {
706                         if (tmp == dev) {
707                                 *bus = drhd->devices[i].bus;
708                                 *devfn = drhd->devices[i].devfn;
709                                 goto out;
710                         }
711
712                         if (!pdev || !dev_is_pci(tmp))
713                                 continue;
714
715                         ptmp = to_pci_dev(tmp);
716                         if (ptmp->subordinate &&
717                             ptmp->subordinate->number <= pdev->bus->number &&
718                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
719                                 goto got_pdev;
720                 }
721
722                 if (pdev && drhd->include_all) {
723                 got_pdev:
724                         *bus = pdev->bus->number;
725                         *devfn = pdev->devfn;
726                         goto out;
727                 }
728         }
729         iommu = NULL;
730  out:
731         rcu_read_unlock();
732
733         return iommu;
734 }
735
736 static void domain_flush_cache(struct dmar_domain *domain,
737                                void *addr, int size)
738 {
739         if (!domain->iommu_coherency)
740                 clflush_cache_range(addr, size);
741 }
742
743 /* Gets context entry for a given bus and devfn */
744 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
745                 u8 bus, u8 devfn)
746 {
747         struct root_entry *root;
748         struct context_entry *context;
749         unsigned long phy_addr;
750         unsigned long flags;
751
752         spin_lock_irqsave(&iommu->lock, flags);
753         root = &iommu->root_entry[bus];
754         context = get_context_addr_from_root(root);
755         if (!context) {
756                 context = (struct context_entry *)
757                                 alloc_pgtable_page(iommu->node);
758                 if (!context) {
759                         spin_unlock_irqrestore(&iommu->lock, flags);
760                         return NULL;
761                 }
762                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
763                 phy_addr = virt_to_phys((void *)context);
764                 set_root_value(root, phy_addr);
765                 set_root_present(root);
766                 __iommu_flush_cache(iommu, root, sizeof(*root));
767         }
768         spin_unlock_irqrestore(&iommu->lock, flags);
769         return &context[devfn];
770 }
771
772 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
773 {
774         struct root_entry *root;
775         struct context_entry *context;
776         int ret;
777         unsigned long flags;
778
779         spin_lock_irqsave(&iommu->lock, flags);
780         root = &iommu->root_entry[bus];
781         context = get_context_addr_from_root(root);
782         if (!context) {
783                 ret = 0;
784                 goto out;
785         }
786         ret = context_present(&context[devfn]);
787 out:
788         spin_unlock_irqrestore(&iommu->lock, flags);
789         return ret;
790 }
791
792 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
793 {
794         struct root_entry *root;
795         struct context_entry *context;
796         unsigned long flags;
797
798         spin_lock_irqsave(&iommu->lock, flags);
799         root = &iommu->root_entry[bus];
800         context = get_context_addr_from_root(root);
801         if (context) {
802                 context_clear_entry(&context[devfn]);
803                 __iommu_flush_cache(iommu, &context[devfn], \
804                         sizeof(*context));
805         }
806         spin_unlock_irqrestore(&iommu->lock, flags);
807 }
808
809 static void free_context_table(struct intel_iommu *iommu)
810 {
811         struct root_entry *root;
812         int i;
813         unsigned long flags;
814         struct context_entry *context;
815
816         spin_lock_irqsave(&iommu->lock, flags);
817         if (!iommu->root_entry) {
818                 goto out;
819         }
820         for (i = 0; i < ROOT_ENTRY_NR; i++) {
821                 root = &iommu->root_entry[i];
822                 context = get_context_addr_from_root(root);
823                 if (context)
824                         free_pgtable_page(context);
825         }
826         free_pgtable_page(iommu->root_entry);
827         iommu->root_entry = NULL;
828 out:
829         spin_unlock_irqrestore(&iommu->lock, flags);
830 }
831
832 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
833                                       unsigned long pfn, int *target_level)
834 {
835         struct dma_pte *parent, *pte = NULL;
836         int level = agaw_to_level(domain->agaw);
837         int offset;
838
839         BUG_ON(!domain->pgd);
840
841         if (!domain_pfn_supported(domain, pfn))
842                 /* Address beyond IOMMU's addressing capabilities. */
843                 return NULL;
844
845         parent = domain->pgd;
846
847         while (1) {
848                 void *tmp_page;
849
850                 offset = pfn_level_offset(pfn, level);
851                 pte = &parent[offset];
852                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
853                         break;
854                 if (level == *target_level)
855                         break;
856
857                 if (!dma_pte_present(pte)) {
858                         uint64_t pteval;
859
860                         tmp_page = alloc_pgtable_page(domain->nid);
861
862                         if (!tmp_page)
863                                 return NULL;
864
865                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
866                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
867                         if (cmpxchg64(&pte->val, 0ULL, pteval))
868                                 /* Someone else set it while we were thinking; use theirs. */
869                                 free_pgtable_page(tmp_page);
870                         else
871                                 domain_flush_cache(domain, pte, sizeof(*pte));
872                 }
873                 if (level == 1)
874                         break;
875
876                 parent = phys_to_virt(dma_pte_addr(pte));
877                 level--;
878         }
879
880         if (!*target_level)
881                 *target_level = level;
882
883         return pte;
884 }
885
886
887 /* return address's pte at specific level */
888 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
889                                          unsigned long pfn,
890                                          int level, int *large_page)
891 {
892         struct dma_pte *parent, *pte = NULL;
893         int total = agaw_to_level(domain->agaw);
894         int offset;
895
896         parent = domain->pgd;
897         while (level <= total) {
898                 offset = pfn_level_offset(pfn, total);
899                 pte = &parent[offset];
900                 if (level == total)
901                         return pte;
902
903                 if (!dma_pte_present(pte)) {
904                         *large_page = total;
905                         break;
906                 }
907
908                 if (dma_pte_superpage(pte)) {
909                         *large_page = total;
910                         return pte;
911                 }
912
913                 parent = phys_to_virt(dma_pte_addr(pte));
914                 total--;
915         }
916         return NULL;
917 }
918
919 /* clear last level pte, a tlb flush should be followed */
920 static void dma_pte_clear_range(struct dmar_domain *domain,
921                                 unsigned long start_pfn,
922                                 unsigned long last_pfn)
923 {
924         unsigned int large_page = 1;
925         struct dma_pte *first_pte, *pte;
926
927         BUG_ON(!domain_pfn_supported(domain, start_pfn));
928         BUG_ON(!domain_pfn_supported(domain, last_pfn));
929         BUG_ON(start_pfn > last_pfn);
930
931         /* we don't need lock here; nobody else touches the iova range */
932         do {
933                 large_page = 1;
934                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
935                 if (!pte) {
936                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
937                         continue;
938                 }
939                 do {
940                         dma_clear_pte(pte);
941                         start_pfn += lvl_to_nr_pages(large_page);
942                         pte++;
943                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
944
945                 domain_flush_cache(domain, first_pte,
946                                    (void *)pte - (void *)first_pte);
947
948         } while (start_pfn && start_pfn <= last_pfn);
949 }
950
951 static void dma_pte_free_level(struct dmar_domain *domain, int level,
952                                struct dma_pte *pte, unsigned long pfn,
953                                unsigned long start_pfn, unsigned long last_pfn)
954 {
955         pfn = max(start_pfn, pfn);
956         pte = &pte[pfn_level_offset(pfn, level)];
957
958         do {
959                 unsigned long level_pfn;
960                 struct dma_pte *level_pte;
961
962                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
963                         goto next;
964
965                 level_pfn = pfn & level_mask(level - 1);
966                 level_pte = phys_to_virt(dma_pte_addr(pte));
967
968                 if (level > 2)
969                         dma_pte_free_level(domain, level - 1, level_pte,
970                                            level_pfn, start_pfn, last_pfn);
971
972                 /* If range covers entire pagetable, free it */
973                 if (!(start_pfn > level_pfn ||
974                       last_pfn < level_pfn + level_size(level) - 1)) {
975                         dma_clear_pte(pte);
976                         domain_flush_cache(domain, pte, sizeof(*pte));
977                         free_pgtable_page(level_pte);
978                 }
979 next:
980                 pfn += level_size(level);
981         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
982 }
983
984 /* free page table pages. last level pte should already be cleared */
985 static void dma_pte_free_pagetable(struct dmar_domain *domain,
986                                    unsigned long start_pfn,
987                                    unsigned long last_pfn)
988 {
989         BUG_ON(!domain_pfn_supported(domain, start_pfn));
990         BUG_ON(!domain_pfn_supported(domain, last_pfn));
991         BUG_ON(start_pfn > last_pfn);
992
993         dma_pte_clear_range(domain, start_pfn, last_pfn);
994
995         /* We don't need lock here; nobody else touches the iova range */
996         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
997                            domain->pgd, 0, start_pfn, last_pfn);
998
999         /* free pgd */
1000         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1001                 free_pgtable_page(domain->pgd);
1002                 domain->pgd = NULL;
1003         }
1004 }
1005
1006 /* When a page at a given level is being unlinked from its parent, we don't
1007    need to *modify* it at all. All we need to do is make a list of all the
1008    pages which can be freed just as soon as we've flushed the IOTLB and we
1009    know the hardware page-walk will no longer touch them.
1010    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1011    be freed. */
1012 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1013                                             int level, struct dma_pte *pte,
1014                                             struct page *freelist)
1015 {
1016         struct page *pg;
1017
1018         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1019         pg->freelist = freelist;
1020         freelist = pg;
1021
1022         if (level == 1)
1023                 return freelist;
1024
1025         pte = page_address(pg);
1026         do {
1027                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1028                         freelist = dma_pte_list_pagetables(domain, level - 1,
1029                                                            pte, freelist);
1030                 pte++;
1031         } while (!first_pte_in_page(pte));
1032
1033         return freelist;
1034 }
1035
1036 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1037                                         struct dma_pte *pte, unsigned long pfn,
1038                                         unsigned long start_pfn,
1039                                         unsigned long last_pfn,
1040                                         struct page *freelist)
1041 {
1042         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1043
1044         pfn = max(start_pfn, pfn);
1045         pte = &pte[pfn_level_offset(pfn, level)];
1046
1047         do {
1048                 unsigned long level_pfn;
1049
1050                 if (!dma_pte_present(pte))
1051                         goto next;
1052
1053                 level_pfn = pfn & level_mask(level);
1054
1055                 /* If range covers entire pagetable, free it */
1056                 if (start_pfn <= level_pfn &&
1057                     last_pfn >= level_pfn + level_size(level) - 1) {
1058                         /* These suborbinate page tables are going away entirely. Don't
1059                            bother to clear them; we're just going to *free* them. */
1060                         if (level > 1 && !dma_pte_superpage(pte))
1061                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1062
1063                         dma_clear_pte(pte);
1064                         if (!first_pte)
1065                                 first_pte = pte;
1066                         last_pte = pte;
1067                 } else if (level > 1) {
1068                         /* Recurse down into a level that isn't *entirely* obsolete */
1069                         freelist = dma_pte_clear_level(domain, level - 1,
1070                                                        phys_to_virt(dma_pte_addr(pte)),
1071                                                        level_pfn, start_pfn, last_pfn,
1072                                                        freelist);
1073                 }
1074 next:
1075                 pfn += level_size(level);
1076         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1077
1078         if (first_pte)
1079                 domain_flush_cache(domain, first_pte,
1080                                    (void *)++last_pte - (void *)first_pte);
1081
1082         return freelist;
1083 }
1084
1085 /* We can't just free the pages because the IOMMU may still be walking
1086    the page tables, and may have cached the intermediate levels. The
1087    pages can only be freed after the IOTLB flush has been done. */
1088 struct page *domain_unmap(struct dmar_domain *domain,
1089                           unsigned long start_pfn,
1090                           unsigned long last_pfn)
1091 {
1092         struct page *freelist = NULL;
1093
1094         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1095         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1096         BUG_ON(start_pfn > last_pfn);
1097
1098         /* we don't need lock here; nobody else touches the iova range */
1099         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1100                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1101
1102         /* free pgd */
1103         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1104                 struct page *pgd_page = virt_to_page(domain->pgd);
1105                 pgd_page->freelist = freelist;
1106                 freelist = pgd_page;
1107
1108                 domain->pgd = NULL;
1109         }
1110
1111         return freelist;
1112 }
1113
1114 void dma_free_pagelist(struct page *freelist)
1115 {
1116         struct page *pg;
1117
1118         while ((pg = freelist)) {
1119                 freelist = pg->freelist;
1120                 free_pgtable_page(page_address(pg));
1121         }
1122 }
1123
1124 /* iommu handling */
1125 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1126 {
1127         struct root_entry *root;
1128         unsigned long flags;
1129
1130         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1131         if (!root) {
1132                 pr_err("IOMMU: allocating root entry for %s failed\n",
1133                         iommu->name);
1134                 return -ENOMEM;
1135         }
1136
1137         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1138
1139         spin_lock_irqsave(&iommu->lock, flags);
1140         iommu->root_entry = root;
1141         spin_unlock_irqrestore(&iommu->lock, flags);
1142
1143         return 0;
1144 }
1145
1146 static void iommu_set_root_entry(struct intel_iommu *iommu)
1147 {
1148         void *addr;
1149         u32 sts;
1150         unsigned long flag;
1151
1152         addr = iommu->root_entry;
1153
1154         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1155         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1156
1157         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1158
1159         /* Make sure hardware complete it */
1160         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1161                       readl, (sts & DMA_GSTS_RTPS), sts);
1162
1163         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1164 }
1165
1166 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1167 {
1168         u32 val;
1169         unsigned long flag;
1170
1171         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1172                 return;
1173
1174         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1175         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1176
1177         /* Make sure hardware complete it */
1178         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1179                       readl, (!(val & DMA_GSTS_WBFS)), val);
1180
1181         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1182 }
1183
1184 /* return value determine if we need a write buffer flush */
1185 static void __iommu_flush_context(struct intel_iommu *iommu,
1186                                   u16 did, u16 source_id, u8 function_mask,
1187                                   u64 type)
1188 {
1189         u64 val = 0;
1190         unsigned long flag;
1191
1192         switch (type) {
1193         case DMA_CCMD_GLOBAL_INVL:
1194                 val = DMA_CCMD_GLOBAL_INVL;
1195                 break;
1196         case DMA_CCMD_DOMAIN_INVL:
1197                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1198                 break;
1199         case DMA_CCMD_DEVICE_INVL:
1200                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1201                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1202                 break;
1203         default:
1204                 BUG();
1205         }
1206         val |= DMA_CCMD_ICC;
1207
1208         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1209         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1210
1211         /* Make sure hardware complete it */
1212         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1213                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1214
1215         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1216 }
1217
1218 /* return value determine if we need a write buffer flush */
1219 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1220                                 u64 addr, unsigned int size_order, u64 type)
1221 {
1222         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1223         u64 val = 0, val_iva = 0;
1224         unsigned long flag;
1225
1226         switch (type) {
1227         case DMA_TLB_GLOBAL_FLUSH:
1228                 /* global flush doesn't need set IVA_REG */
1229                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1230                 break;
1231         case DMA_TLB_DSI_FLUSH:
1232                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1233                 break;
1234         case DMA_TLB_PSI_FLUSH:
1235                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1236                 /* IH bit is passed in as part of address */
1237                 val_iva = size_order | addr;
1238                 break;
1239         default:
1240                 BUG();
1241         }
1242         /* Note: set drain read/write */
1243 #if 0
1244         /*
1245          * This is probably to be super secure.. Looks like we can
1246          * ignore it without any impact.
1247          */
1248         if (cap_read_drain(iommu->cap))
1249                 val |= DMA_TLB_READ_DRAIN;
1250 #endif
1251         if (cap_write_drain(iommu->cap))
1252                 val |= DMA_TLB_WRITE_DRAIN;
1253
1254         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1255         /* Note: Only uses first TLB reg currently */
1256         if (val_iva)
1257                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1258         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1259
1260         /* Make sure hardware complete it */
1261         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1262                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1263
1264         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1265
1266         /* check IOTLB invalidation granularity */
1267         if (DMA_TLB_IAIG(val) == 0)
1268                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1269         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1270                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1271                         (unsigned long long)DMA_TLB_IIRG(type),
1272                         (unsigned long long)DMA_TLB_IAIG(val));
1273 }
1274
1275 static struct device_domain_info *
1276 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1277                          u8 bus, u8 devfn)
1278 {
1279         int found = 0;
1280         unsigned long flags;
1281         struct device_domain_info *info;
1282         struct pci_dev *pdev;
1283
1284         if (!ecap_dev_iotlb_support(iommu->ecap))
1285                 return NULL;
1286
1287         if (!iommu->qi)
1288                 return NULL;
1289
1290         spin_lock_irqsave(&device_domain_lock, flags);
1291         list_for_each_entry(info, &domain->devices, link)
1292                 if (info->iommu == iommu && info->bus == bus &&
1293                     info->devfn == devfn) {
1294                         found = 1;
1295                         break;
1296                 }
1297         spin_unlock_irqrestore(&device_domain_lock, flags);
1298
1299         if (!found || !info->dev || !dev_is_pci(info->dev))
1300                 return NULL;
1301
1302         pdev = to_pci_dev(info->dev);
1303
1304         if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1305                 return NULL;
1306
1307         if (!dmar_find_matched_atsr_unit(pdev))
1308                 return NULL;
1309
1310         return info;
1311 }
1312
1313 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1314 {
1315         if (!info || !dev_is_pci(info->dev))
1316                 return;
1317
1318         pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1319 }
1320
1321 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1322 {
1323         if (!info->dev || !dev_is_pci(info->dev) ||
1324             !pci_ats_enabled(to_pci_dev(info->dev)))
1325                 return;
1326
1327         pci_disable_ats(to_pci_dev(info->dev));
1328 }
1329
1330 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1331                                   u64 addr, unsigned mask)
1332 {
1333         u16 sid, qdep;
1334         unsigned long flags;
1335         struct device_domain_info *info;
1336
1337         spin_lock_irqsave(&device_domain_lock, flags);
1338         list_for_each_entry(info, &domain->devices, link) {
1339                 struct pci_dev *pdev;
1340                 if (!info->dev || !dev_is_pci(info->dev))
1341                         continue;
1342
1343                 pdev = to_pci_dev(info->dev);
1344                 if (!pci_ats_enabled(pdev))
1345                         continue;
1346
1347                 sid = info->bus << 8 | info->devfn;
1348                 qdep = pci_ats_queue_depth(pdev);
1349                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1350         }
1351         spin_unlock_irqrestore(&device_domain_lock, flags);
1352 }
1353
1354 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1355                                   unsigned long pfn, unsigned int pages, int ih, int map)
1356 {
1357         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1358         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1359
1360         BUG_ON(pages == 0);
1361
1362         if (ih)
1363                 ih = 1 << 6;
1364         /*
1365          * Fallback to domain selective flush if no PSI support or the size is
1366          * too big.
1367          * PSI requires page size to be 2 ^ x, and the base address is naturally
1368          * aligned to the size
1369          */
1370         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1371                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1372                                                 DMA_TLB_DSI_FLUSH);
1373         else
1374                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1375                                                 DMA_TLB_PSI_FLUSH);
1376
1377         /*
1378          * In caching mode, changes of pages from non-present to present require
1379          * flush. However, device IOTLB doesn't need to be flushed in this case.
1380          */
1381         if (!cap_caching_mode(iommu->cap) || !map)
1382                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1383 }
1384
1385 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1386 {
1387         u32 pmen;
1388         unsigned long flags;
1389
1390         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1391         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1392         pmen &= ~DMA_PMEN_EPM;
1393         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1394
1395         /* wait for the protected region status bit to clear */
1396         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1397                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1398
1399         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1400 }
1401
1402 static void iommu_enable_translation(struct intel_iommu *iommu)
1403 {
1404         u32 sts;
1405         unsigned long flags;
1406
1407         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1408         iommu->gcmd |= DMA_GCMD_TE;
1409         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1410
1411         /* Make sure hardware complete it */
1412         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1413                       readl, (sts & DMA_GSTS_TES), sts);
1414
1415         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1416 }
1417
1418 static void iommu_disable_translation(struct intel_iommu *iommu)
1419 {
1420         u32 sts;
1421         unsigned long flag;
1422
1423         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1424         iommu->gcmd &= ~DMA_GCMD_TE;
1425         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1426
1427         /* Make sure hardware complete it */
1428         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1429                       readl, (!(sts & DMA_GSTS_TES)), sts);
1430
1431         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1432 }
1433
1434
1435 static int iommu_init_domains(struct intel_iommu *iommu)
1436 {
1437         unsigned long ndomains;
1438         unsigned long nlongs;
1439
1440         ndomains = cap_ndoms(iommu->cap);
1441         pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1442                  iommu->seq_id, ndomains);
1443         nlongs = BITS_TO_LONGS(ndomains);
1444
1445         spin_lock_init(&iommu->lock);
1446
1447         /* TBD: there might be 64K domains,
1448          * consider other allocation for future chip
1449          */
1450         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1451         if (!iommu->domain_ids) {
1452                 pr_err("IOMMU%d: allocating domain id array failed\n",
1453                        iommu->seq_id);
1454                 return -ENOMEM;
1455         }
1456         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1457                         GFP_KERNEL);
1458         if (!iommu->domains) {
1459                 pr_err("IOMMU%d: allocating domain array failed\n",
1460                        iommu->seq_id);
1461                 kfree(iommu->domain_ids);
1462                 iommu->domain_ids = NULL;
1463                 return -ENOMEM;
1464         }
1465
1466         /*
1467          * if Caching mode is set, then invalid translations are tagged
1468          * with domainid 0. Hence we need to pre-allocate it.
1469          */
1470         if (cap_caching_mode(iommu->cap))
1471                 set_bit(0, iommu->domain_ids);
1472         return 0;
1473 }
1474
1475 static void disable_dmar_iommu(struct intel_iommu *iommu)
1476 {
1477         struct dmar_domain *domain;
1478         int i;
1479
1480         if ((iommu->domains) && (iommu->domain_ids)) {
1481                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1482                         /*
1483                          * Domain id 0 is reserved for invalid translation
1484                          * if hardware supports caching mode.
1485                          */
1486                         if (cap_caching_mode(iommu->cap) && i == 0)
1487                                 continue;
1488
1489                         domain = iommu->domains[i];
1490                         clear_bit(i, iommu->domain_ids);
1491                         if (domain_detach_iommu(domain, iommu) == 0 &&
1492                             !domain_type_is_vm(domain))
1493                                 domain_exit(domain);
1494                 }
1495         }
1496
1497         if (iommu->gcmd & DMA_GCMD_TE)
1498                 iommu_disable_translation(iommu);
1499 }
1500
1501 static void free_dmar_iommu(struct intel_iommu *iommu)
1502 {
1503         if ((iommu->domains) && (iommu->domain_ids)) {
1504                 kfree(iommu->domains);
1505                 kfree(iommu->domain_ids);
1506                 iommu->domains = NULL;
1507                 iommu->domain_ids = NULL;
1508         }
1509
1510         g_iommus[iommu->seq_id] = NULL;
1511
1512         /* free context mapping */
1513         free_context_table(iommu);
1514 }
1515
1516 static struct dmar_domain *alloc_domain(int flags)
1517 {
1518         /* domain id for virtual machine, it won't be set in context */
1519         static atomic_t vm_domid = ATOMIC_INIT(0);
1520         struct dmar_domain *domain;
1521
1522         domain = alloc_domain_mem();
1523         if (!domain)
1524                 return NULL;
1525
1526         memset(domain, 0, sizeof(*domain));
1527         domain->nid = -1;
1528         domain->flags = flags;
1529         spin_lock_init(&domain->iommu_lock);
1530         INIT_LIST_HEAD(&domain->devices);
1531         if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1532                 domain->id = atomic_inc_return(&vm_domid);
1533
1534         return domain;
1535 }
1536
1537 static int __iommu_attach_domain(struct dmar_domain *domain,
1538                                  struct intel_iommu *iommu)
1539 {
1540         int num;
1541         unsigned long ndomains;
1542
1543         ndomains = cap_ndoms(iommu->cap);
1544         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1545         if (num < ndomains) {
1546                 set_bit(num, iommu->domain_ids);
1547                 iommu->domains[num] = domain;
1548         } else {
1549                 num = -ENOSPC;
1550         }
1551
1552         return num;
1553 }
1554
1555 static int iommu_attach_domain(struct dmar_domain *domain,
1556                                struct intel_iommu *iommu)
1557 {
1558         int num;
1559         unsigned long flags;
1560
1561         spin_lock_irqsave(&iommu->lock, flags);
1562         num = __iommu_attach_domain(domain, iommu);
1563         spin_unlock_irqrestore(&iommu->lock, flags);
1564         if (num < 0)
1565                 pr_err("IOMMU: no free domain ids\n");
1566
1567         return num;
1568 }
1569
1570 static int iommu_attach_vm_domain(struct dmar_domain *domain,
1571                                   struct intel_iommu *iommu)
1572 {
1573         int num;
1574         unsigned long ndomains;
1575
1576         ndomains = cap_ndoms(iommu->cap);
1577         for_each_set_bit(num, iommu->domain_ids, ndomains)
1578                 if (iommu->domains[num] == domain)
1579                         return num;
1580
1581         return __iommu_attach_domain(domain, iommu);
1582 }
1583
1584 static void iommu_detach_domain(struct dmar_domain *domain,
1585                                 struct intel_iommu *iommu)
1586 {
1587         unsigned long flags;
1588         int num, ndomains;
1589
1590         spin_lock_irqsave(&iommu->lock, flags);
1591         if (domain_type_is_vm_or_si(domain)) {
1592                 ndomains = cap_ndoms(iommu->cap);
1593                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1594                         if (iommu->domains[num] == domain) {
1595                                 clear_bit(num, iommu->domain_ids);
1596                                 iommu->domains[num] = NULL;
1597                                 break;
1598                         }
1599                 }
1600         } else {
1601                 clear_bit(domain->id, iommu->domain_ids);
1602                 iommu->domains[domain->id] = NULL;
1603         }
1604         spin_unlock_irqrestore(&iommu->lock, flags);
1605 }
1606
1607 static void domain_attach_iommu(struct dmar_domain *domain,
1608                                struct intel_iommu *iommu)
1609 {
1610         unsigned long flags;
1611
1612         spin_lock_irqsave(&domain->iommu_lock, flags);
1613         if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1614                 domain->iommu_count++;
1615                 if (domain->iommu_count == 1)
1616                         domain->nid = iommu->node;
1617                 domain_update_iommu_cap(domain);
1618         }
1619         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1620 }
1621
1622 static int domain_detach_iommu(struct dmar_domain *domain,
1623                                struct intel_iommu *iommu)
1624 {
1625         unsigned long flags;
1626         int count = INT_MAX;
1627
1628         spin_lock_irqsave(&domain->iommu_lock, flags);
1629         if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1630                 count = --domain->iommu_count;
1631                 domain_update_iommu_cap(domain);
1632         }
1633         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1634
1635         return count;
1636 }
1637
1638 static struct iova_domain reserved_iova_list;
1639 static struct lock_class_key reserved_rbtree_key;
1640
1641 static int dmar_init_reserved_ranges(void)
1642 {
1643         struct pci_dev *pdev = NULL;
1644         struct iova *iova;
1645         int i;
1646
1647         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1648                         DMA_32BIT_PFN);
1649
1650         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1651                 &reserved_rbtree_key);
1652
1653         /* IOAPIC ranges shouldn't be accessed by DMA */
1654         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1655                 IOVA_PFN(IOAPIC_RANGE_END));
1656         if (!iova) {
1657                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1658                 return -ENODEV;
1659         }
1660
1661         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1662         for_each_pci_dev(pdev) {
1663                 struct resource *r;
1664
1665                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1666                         r = &pdev->resource[i];
1667                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1668                                 continue;
1669                         iova = reserve_iova(&reserved_iova_list,
1670                                             IOVA_PFN(r->start),
1671                                             IOVA_PFN(r->end));
1672                         if (!iova) {
1673                                 printk(KERN_ERR "Reserve iova failed\n");
1674                                 return -ENODEV;
1675                         }
1676                 }
1677         }
1678         return 0;
1679 }
1680
1681 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1682 {
1683         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1684 }
1685
1686 static inline int guestwidth_to_adjustwidth(int gaw)
1687 {
1688         int agaw;
1689         int r = (gaw - 12) % 9;
1690
1691         if (r == 0)
1692                 agaw = gaw;
1693         else
1694                 agaw = gaw + 9 - r;
1695         if (agaw > 64)
1696                 agaw = 64;
1697         return agaw;
1698 }
1699
1700 static int domain_init(struct dmar_domain *domain, int guest_width)
1701 {
1702         struct intel_iommu *iommu;
1703         int adjust_width, agaw;
1704         unsigned long sagaw;
1705
1706         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1707                         DMA_32BIT_PFN);
1708         domain_reserve_special_ranges(domain);
1709
1710         /* calculate AGAW */
1711         iommu = domain_get_iommu(domain);
1712         if (guest_width > cap_mgaw(iommu->cap))
1713                 guest_width = cap_mgaw(iommu->cap);
1714         domain->gaw = guest_width;
1715         adjust_width = guestwidth_to_adjustwidth(guest_width);
1716         agaw = width_to_agaw(adjust_width);
1717         sagaw = cap_sagaw(iommu->cap);
1718         if (!test_bit(agaw, &sagaw)) {
1719                 /* hardware doesn't support it, choose a bigger one */
1720                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1721                 agaw = find_next_bit(&sagaw, 5, agaw);
1722                 if (agaw >= 5)
1723                         return -ENODEV;
1724         }
1725         domain->agaw = agaw;
1726
1727         if (ecap_coherent(iommu->ecap))
1728                 domain->iommu_coherency = 1;
1729         else
1730                 domain->iommu_coherency = 0;
1731
1732         if (ecap_sc_support(iommu->ecap))
1733                 domain->iommu_snooping = 1;
1734         else
1735                 domain->iommu_snooping = 0;
1736
1737         if (intel_iommu_superpage)
1738                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1739         else
1740                 domain->iommu_superpage = 0;
1741
1742         domain->nid = iommu->node;
1743
1744         /* always allocate the top pgd */
1745         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1746         if (!domain->pgd)
1747                 return -ENOMEM;
1748         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1749         return 0;
1750 }
1751
1752 static void domain_exit(struct dmar_domain *domain)
1753 {
1754         struct dmar_drhd_unit *drhd;
1755         struct intel_iommu *iommu;
1756         struct page *freelist = NULL;
1757
1758         /* Domain 0 is reserved, so dont process it */
1759         if (!domain)
1760                 return;
1761
1762         /* Flush any lazy unmaps that may reference this domain */
1763         if (!intel_iommu_strict)
1764                 flush_unmaps_timeout(0);
1765
1766         /* remove associated devices */
1767         domain_remove_dev_info(domain);
1768
1769         /* destroy iovas */
1770         put_iova_domain(&domain->iovad);
1771
1772         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1773
1774         /* clear attached or cached domains */
1775         rcu_read_lock();
1776         for_each_active_iommu(iommu, drhd)
1777                 iommu_detach_domain(domain, iommu);
1778         rcu_read_unlock();
1779
1780         dma_free_pagelist(freelist);
1781
1782         free_domain_mem(domain);
1783 }
1784
1785 static int domain_context_mapping_one(struct dmar_domain *domain,
1786                                       struct intel_iommu *iommu,
1787                                       u8 bus, u8 devfn, int translation)
1788 {
1789         struct context_entry *context;
1790         unsigned long flags;
1791         struct dma_pte *pgd;
1792         int id;
1793         int agaw;
1794         struct device_domain_info *info = NULL;
1795
1796         pr_debug("Set context mapping for %02x:%02x.%d\n",
1797                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1798
1799         BUG_ON(!domain->pgd);
1800         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1801                translation != CONTEXT_TT_MULTI_LEVEL);
1802
1803         context = device_to_context_entry(iommu, bus, devfn);
1804         if (!context)
1805                 return -ENOMEM;
1806         spin_lock_irqsave(&iommu->lock, flags);
1807         if (context_present(context)) {
1808                 spin_unlock_irqrestore(&iommu->lock, flags);
1809                 return 0;
1810         }
1811
1812         id = domain->id;
1813         pgd = domain->pgd;
1814
1815         if (domain_type_is_vm_or_si(domain)) {
1816                 if (domain_type_is_vm(domain)) {
1817                         id = iommu_attach_vm_domain(domain, iommu);
1818                         if (id < 0) {
1819                                 spin_unlock_irqrestore(&iommu->lock, flags);
1820                                 pr_err("IOMMU: no free domain ids\n");
1821                                 return -EFAULT;
1822                         }
1823                 }
1824
1825                 /* Skip top levels of page tables for
1826                  * iommu which has less agaw than default.
1827                  * Unnecessary for PT mode.
1828                  */
1829                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1830                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1831                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1832                                 if (!dma_pte_present(pgd)) {
1833                                         spin_unlock_irqrestore(&iommu->lock, flags);
1834                                         return -ENOMEM;
1835                                 }
1836                         }
1837                 }
1838         }
1839
1840         context_set_domain_id(context, id);
1841
1842         if (translation != CONTEXT_TT_PASS_THROUGH) {
1843                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1844                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1845                                      CONTEXT_TT_MULTI_LEVEL;
1846         }
1847         /*
1848          * In pass through mode, AW must be programmed to indicate the largest
1849          * AGAW value supported by hardware. And ASR is ignored by hardware.
1850          */
1851         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1852                 context_set_address_width(context, iommu->msagaw);
1853         else {
1854                 context_set_address_root(context, virt_to_phys(pgd));
1855                 context_set_address_width(context, iommu->agaw);
1856         }
1857
1858         context_set_translation_type(context, translation);
1859         context_set_fault_enable(context);
1860         context_set_present(context);
1861         domain_flush_cache(domain, context, sizeof(*context));
1862
1863         /*
1864          * It's a non-present to present mapping. If hardware doesn't cache
1865          * non-present entry we only need to flush the write-buffer. If the
1866          * _does_ cache non-present entries, then it does so in the special
1867          * domain #0, which we have to flush:
1868          */
1869         if (cap_caching_mode(iommu->cap)) {
1870                 iommu->flush.flush_context(iommu, 0,
1871                                            (((u16)bus) << 8) | devfn,
1872                                            DMA_CCMD_MASK_NOBIT,
1873                                            DMA_CCMD_DEVICE_INVL);
1874                 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
1875         } else {
1876                 iommu_flush_write_buffer(iommu);
1877         }
1878         iommu_enable_dev_iotlb(info);
1879         spin_unlock_irqrestore(&iommu->lock, flags);
1880
1881         domain_attach_iommu(domain, iommu);
1882
1883         return 0;
1884 }
1885
1886 struct domain_context_mapping_data {
1887         struct dmar_domain *domain;
1888         struct intel_iommu *iommu;
1889         int translation;
1890 };
1891
1892 static int domain_context_mapping_cb(struct pci_dev *pdev,
1893                                      u16 alias, void *opaque)
1894 {
1895         struct domain_context_mapping_data *data = opaque;
1896
1897         return domain_context_mapping_one(data->domain, data->iommu,
1898                                           PCI_BUS_NUM(alias), alias & 0xff,
1899                                           data->translation);
1900 }
1901
1902 static int
1903 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1904                        int translation)
1905 {
1906         struct intel_iommu *iommu;
1907         u8 bus, devfn;
1908         struct domain_context_mapping_data data;
1909
1910         iommu = device_to_iommu(dev, &bus, &devfn);
1911         if (!iommu)
1912                 return -ENODEV;
1913
1914         if (!dev_is_pci(dev))
1915                 return domain_context_mapping_one(domain, iommu, bus, devfn,
1916                                                   translation);
1917
1918         data.domain = domain;
1919         data.iommu = iommu;
1920         data.translation = translation;
1921
1922         return pci_for_each_dma_alias(to_pci_dev(dev),
1923                                       &domain_context_mapping_cb, &data);
1924 }
1925
1926 static int domain_context_mapped_cb(struct pci_dev *pdev,
1927                                     u16 alias, void *opaque)
1928 {
1929         struct intel_iommu *iommu = opaque;
1930
1931         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
1932 }
1933
1934 static int domain_context_mapped(struct device *dev)
1935 {
1936         struct intel_iommu *iommu;
1937         u8 bus, devfn;
1938
1939         iommu = device_to_iommu(dev, &bus, &devfn);
1940         if (!iommu)
1941                 return -ENODEV;
1942
1943         if (!dev_is_pci(dev))
1944                 return device_context_mapped(iommu, bus, devfn);
1945
1946         return !pci_for_each_dma_alias(to_pci_dev(dev),
1947                                        domain_context_mapped_cb, iommu);
1948 }
1949
1950 /* Returns a number of VTD pages, but aligned to MM page size */
1951 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1952                                             size_t size)
1953 {
1954         host_addr &= ~PAGE_MASK;
1955         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1956 }
1957
1958 /* Return largest possible superpage level for a given mapping */
1959 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1960                                           unsigned long iov_pfn,
1961                                           unsigned long phy_pfn,
1962                                           unsigned long pages)
1963 {
1964         int support, level = 1;
1965         unsigned long pfnmerge;
1966
1967         support = domain->iommu_superpage;
1968
1969         /* To use a large page, the virtual *and* physical addresses
1970            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1971            of them will mean we have to use smaller pages. So just
1972            merge them and check both at once. */
1973         pfnmerge = iov_pfn | phy_pfn;
1974
1975         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1976                 pages >>= VTD_STRIDE_SHIFT;
1977                 if (!pages)
1978                         break;
1979                 pfnmerge >>= VTD_STRIDE_SHIFT;
1980                 level++;
1981                 support--;
1982         }
1983         return level;
1984 }
1985
1986 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1987                             struct scatterlist *sg, unsigned long phys_pfn,
1988                             unsigned long nr_pages, int prot)
1989 {
1990         struct dma_pte *first_pte = NULL, *pte = NULL;
1991         phys_addr_t uninitialized_var(pteval);
1992         unsigned long sg_res = 0;
1993         unsigned int largepage_lvl = 0;
1994         unsigned long lvl_pages = 0;
1995
1996         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
1997
1998         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1999                 return -EINVAL;
2000
2001         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2002
2003         if (!sg) {
2004                 sg_res = nr_pages;
2005                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2006         }
2007
2008         while (nr_pages > 0) {
2009                 uint64_t tmp;
2010
2011                 if (!sg_res) {
2012                         sg_res = aligned_nrpages(sg->offset, sg->length);
2013                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2014                         sg->dma_length = sg->length;
2015                         pteval = page_to_phys(sg_page(sg)) | prot;
2016                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2017                 }
2018
2019                 if (!pte) {
2020                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2021
2022                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2023                         if (!pte)
2024                                 return -ENOMEM;
2025                         /* It is large page*/
2026                         if (largepage_lvl > 1) {
2027                                 pteval |= DMA_PTE_LARGE_PAGE;
2028                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2029                                 /*
2030                                  * Ensure that old small page tables are
2031                                  * removed to make room for superpage,
2032                                  * if they exist.
2033                                  */
2034                                 dma_pte_free_pagetable(domain, iov_pfn,
2035                                                        iov_pfn + lvl_pages - 1);
2036                         } else {
2037                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2038                         }
2039
2040                 }
2041                 /* We don't need lock here, nobody else
2042                  * touches the iova range
2043                  */
2044                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2045                 if (tmp) {
2046                         static int dumps = 5;
2047                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2048                                iov_pfn, tmp, (unsigned long long)pteval);
2049                         if (dumps) {
2050                                 dumps--;
2051                                 debug_dma_dump_mappings(NULL);
2052                         }
2053                         WARN_ON(1);
2054                 }
2055
2056                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2057
2058                 BUG_ON(nr_pages < lvl_pages);
2059                 BUG_ON(sg_res < lvl_pages);
2060
2061                 nr_pages -= lvl_pages;
2062                 iov_pfn += lvl_pages;
2063                 phys_pfn += lvl_pages;
2064                 pteval += lvl_pages * VTD_PAGE_SIZE;
2065                 sg_res -= lvl_pages;
2066
2067                 /* If the next PTE would be the first in a new page, then we
2068                    need to flush the cache on the entries we've just written.
2069                    And then we'll need to recalculate 'pte', so clear it and
2070                    let it get set again in the if (!pte) block above.
2071
2072                    If we're done (!nr_pages) we need to flush the cache too.
2073
2074                    Also if we've been setting superpages, we may need to
2075                    recalculate 'pte' and switch back to smaller pages for the
2076                    end of the mapping, if the trailing size is not enough to
2077                    use another superpage (i.e. sg_res < lvl_pages). */
2078                 pte++;
2079                 if (!nr_pages || first_pte_in_page(pte) ||
2080                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2081                         domain_flush_cache(domain, first_pte,
2082                                            (void *)pte - (void *)first_pte);
2083                         pte = NULL;
2084                 }
2085
2086                 if (!sg_res && nr_pages)
2087                         sg = sg_next(sg);
2088         }
2089         return 0;
2090 }
2091
2092 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2093                                     struct scatterlist *sg, unsigned long nr_pages,
2094                                     int prot)
2095 {
2096         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2097 }
2098
2099 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2100                                      unsigned long phys_pfn, unsigned long nr_pages,
2101                                      int prot)
2102 {
2103         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2104 }
2105
2106 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2107 {
2108         if (!iommu)
2109                 return;
2110
2111         clear_context_table(iommu, bus, devfn);
2112         iommu->flush.flush_context(iommu, 0, 0, 0,
2113                                            DMA_CCMD_GLOBAL_INVL);
2114         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2115 }
2116
2117 static inline void unlink_domain_info(struct device_domain_info *info)
2118 {
2119         assert_spin_locked(&device_domain_lock);
2120         list_del(&info->link);
2121         list_del(&info->global);
2122         if (info->dev)
2123                 info->dev->archdata.iommu = NULL;
2124 }
2125
2126 static void domain_remove_dev_info(struct dmar_domain *domain)
2127 {
2128         struct device_domain_info *info, *tmp;
2129         unsigned long flags;
2130
2131         spin_lock_irqsave(&device_domain_lock, flags);
2132         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
2133                 unlink_domain_info(info);
2134                 spin_unlock_irqrestore(&device_domain_lock, flags);
2135
2136                 iommu_disable_dev_iotlb(info);
2137                 iommu_detach_dev(info->iommu, info->bus, info->devfn);
2138
2139                 if (domain_type_is_vm(domain)) {
2140                         iommu_detach_dependent_devices(info->iommu, info->dev);
2141                         domain_detach_iommu(domain, info->iommu);
2142                 }
2143
2144                 free_devinfo_mem(info);
2145                 spin_lock_irqsave(&device_domain_lock, flags);
2146         }
2147         spin_unlock_irqrestore(&device_domain_lock, flags);
2148 }
2149
2150 /*
2151  * find_domain
2152  * Note: we use struct device->archdata.iommu stores the info
2153  */
2154 static struct dmar_domain *find_domain(struct device *dev)
2155 {
2156         struct device_domain_info *info;
2157
2158         /* No lock here, assumes no domain exit in normal case */
2159         info = dev->archdata.iommu;
2160         if (info)
2161                 return info->domain;
2162         return NULL;
2163 }
2164
2165 static inline struct device_domain_info *
2166 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2167 {
2168         struct device_domain_info *info;
2169
2170         list_for_each_entry(info, &device_domain_list, global)
2171                 if (info->iommu->segment == segment && info->bus == bus &&
2172                     info->devfn == devfn)
2173                         return info;
2174
2175         return NULL;
2176 }
2177
2178 static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2179                                                 int bus, int devfn,
2180                                                 struct device *dev,
2181                                                 struct dmar_domain *domain)
2182 {
2183         struct dmar_domain *found = NULL;
2184         struct device_domain_info *info;
2185         unsigned long flags;
2186
2187         info = alloc_devinfo_mem();
2188         if (!info)
2189                 return NULL;
2190
2191         info->bus = bus;
2192         info->devfn = devfn;
2193         info->dev = dev;
2194         info->domain = domain;
2195         info->iommu = iommu;
2196
2197         spin_lock_irqsave(&device_domain_lock, flags);
2198         if (dev)
2199                 found = find_domain(dev);
2200         else {
2201                 struct device_domain_info *info2;
2202                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2203                 if (info2)
2204                         found = info2->domain;
2205         }
2206         if (found) {
2207                 spin_unlock_irqrestore(&device_domain_lock, flags);
2208                 free_devinfo_mem(info);
2209                 /* Caller must free the original domain */
2210                 return found;
2211         }
2212
2213         list_add(&info->link, &domain->devices);
2214         list_add(&info->global, &device_domain_list);
2215         if (dev)
2216                 dev->archdata.iommu = info;
2217         spin_unlock_irqrestore(&device_domain_lock, flags);
2218
2219         return domain;
2220 }
2221
2222 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2223 {
2224         *(u16 *)opaque = alias;
2225         return 0;
2226 }
2227
2228 /* domain is initialized */
2229 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2230 {
2231         struct dmar_domain *domain, *tmp;
2232         struct intel_iommu *iommu;
2233         struct device_domain_info *info;
2234         u16 dma_alias;
2235         unsigned long flags;
2236         u8 bus, devfn;
2237
2238         domain = find_domain(dev);
2239         if (domain)
2240                 return domain;
2241
2242         iommu = device_to_iommu(dev, &bus, &devfn);
2243         if (!iommu)
2244                 return NULL;
2245
2246         if (dev_is_pci(dev)) {
2247                 struct pci_dev *pdev = to_pci_dev(dev);
2248
2249                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2250
2251                 spin_lock_irqsave(&device_domain_lock, flags);
2252                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2253                                                       PCI_BUS_NUM(dma_alias),
2254                                                       dma_alias & 0xff);
2255                 if (info) {
2256                         iommu = info->iommu;
2257                         domain = info->domain;
2258                 }
2259                 spin_unlock_irqrestore(&device_domain_lock, flags);
2260
2261                 /* DMA alias already has a domain, uses it */
2262                 if (info)
2263                         goto found_domain;
2264         }
2265
2266         /* Allocate and initialize new domain for the device */
2267         domain = alloc_domain(0);
2268         if (!domain)
2269                 return NULL;
2270         domain->id = iommu_attach_domain(domain, iommu);
2271         if (domain->id < 0) {
2272                 free_domain_mem(domain);
2273                 return NULL;
2274         }
2275         domain_attach_iommu(domain, iommu);
2276         if (domain_init(domain, gaw)) {
2277                 domain_exit(domain);
2278                 return NULL;
2279         }
2280
2281         /* register PCI DMA alias device */
2282         if (dev_is_pci(dev)) {
2283                 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2284                                            dma_alias & 0xff, NULL, domain);
2285
2286                 if (!tmp || tmp != domain) {
2287                         domain_exit(domain);
2288                         domain = tmp;
2289                 }
2290
2291                 if (!domain)
2292                         return NULL;
2293         }
2294
2295 found_domain:
2296         tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2297
2298         if (!tmp || tmp != domain) {
2299                 domain_exit(domain);
2300                 domain = tmp;
2301         }
2302
2303         return domain;
2304 }
2305
2306 static int iommu_identity_mapping;
2307 #define IDENTMAP_ALL            1
2308 #define IDENTMAP_GFX            2
2309 #define IDENTMAP_AZALIA         4
2310
2311 static int iommu_domain_identity_map(struct dmar_domain *domain,
2312                                      unsigned long long start,
2313                                      unsigned long long end)
2314 {
2315         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2316         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2317
2318         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2319                           dma_to_mm_pfn(last_vpfn))) {
2320                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2321                 return -ENOMEM;
2322         }
2323
2324         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2325                  start, end, domain->id);
2326         /*
2327          * RMRR range might have overlap with physical memory range,
2328          * clear it first
2329          */
2330         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2331
2332         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2333                                   last_vpfn - first_vpfn + 1,
2334                                   DMA_PTE_READ|DMA_PTE_WRITE);
2335 }
2336
2337 static int iommu_prepare_identity_map(struct device *dev,
2338                                       unsigned long long start,
2339                                       unsigned long long end)
2340 {
2341         struct dmar_domain *domain;
2342         int ret;
2343
2344         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2345         if (!domain)
2346                 return -ENOMEM;
2347
2348         /* For _hardware_ passthrough, don't bother. But for software
2349            passthrough, we do it anyway -- it may indicate a memory
2350            range which is reserved in E820, so which didn't get set
2351            up to start with in si_domain */
2352         if (domain == si_domain && hw_pass_through) {
2353                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2354                        dev_name(dev), start, end);
2355                 return 0;
2356         }
2357
2358         printk(KERN_INFO
2359                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2360                dev_name(dev), start, end);
2361         
2362         if (end < start) {
2363                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2364                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2365                         dmi_get_system_info(DMI_BIOS_VENDOR),
2366                         dmi_get_system_info(DMI_BIOS_VERSION),
2367                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2368                 ret = -EIO;
2369                 goto error;
2370         }
2371
2372         if (end >> agaw_to_width(domain->agaw)) {
2373                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2374                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2375                      agaw_to_width(domain->agaw),
2376                      dmi_get_system_info(DMI_BIOS_VENDOR),
2377                      dmi_get_system_info(DMI_BIOS_VERSION),
2378                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2379                 ret = -EIO;
2380                 goto error;
2381         }
2382
2383         ret = iommu_domain_identity_map(domain, start, end);
2384         if (ret)
2385                 goto error;
2386
2387         /* context entry init */
2388         ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2389         if (ret)
2390                 goto error;
2391
2392         return 0;
2393
2394  error:
2395         domain_exit(domain);
2396         return ret;
2397 }
2398
2399 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2400                                          struct device *dev)
2401 {
2402         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2403                 return 0;
2404         return iommu_prepare_identity_map(dev, rmrr->base_address,
2405                                           rmrr->end_address);
2406 }
2407
2408 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2409 static inline void iommu_prepare_isa(void)
2410 {
2411         struct pci_dev *pdev;
2412         int ret;
2413
2414         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2415         if (!pdev)
2416                 return;
2417
2418         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2419         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2420
2421         if (ret)
2422                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2423                        "floppy might not work\n");
2424
2425         pci_dev_put(pdev);
2426 }
2427 #else
2428 static inline void iommu_prepare_isa(void)
2429 {
2430         return;
2431 }
2432 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2433
2434 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2435
2436 static int __init si_domain_init(int hw)
2437 {
2438         struct dmar_drhd_unit *drhd;
2439         struct intel_iommu *iommu;
2440         int nid, ret = 0;
2441         bool first = true;
2442
2443         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2444         if (!si_domain)
2445                 return -EFAULT;
2446
2447         for_each_active_iommu(iommu, drhd) {
2448                 ret = iommu_attach_domain(si_domain, iommu);
2449                 if (ret < 0) {
2450                         domain_exit(si_domain);
2451                         return -EFAULT;
2452                 } else if (first) {
2453                         si_domain->id = ret;
2454                         first = false;
2455                 } else if (si_domain->id != ret) {
2456                         domain_exit(si_domain);
2457                         return -EFAULT;
2458                 }
2459                 domain_attach_iommu(si_domain, iommu);
2460         }
2461
2462         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2463                 domain_exit(si_domain);
2464                 return -EFAULT;
2465         }
2466
2467         pr_debug("IOMMU: identity mapping domain is domain %d\n",
2468                  si_domain->id);
2469
2470         if (hw)
2471                 return 0;
2472
2473         for_each_online_node(nid) {
2474                 unsigned long start_pfn, end_pfn;
2475                 int i;
2476
2477                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2478                         ret = iommu_domain_identity_map(si_domain,
2479                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2480                         if (ret)
2481                                 return ret;
2482                 }
2483         }
2484
2485         return 0;
2486 }
2487
2488 static int identity_mapping(struct device *dev)
2489 {
2490         struct device_domain_info *info;
2491
2492         if (likely(!iommu_identity_mapping))
2493                 return 0;
2494
2495         info = dev->archdata.iommu;
2496         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2497                 return (info->domain == si_domain);
2498
2499         return 0;
2500 }
2501
2502 static int domain_add_dev_info(struct dmar_domain *domain,
2503                                struct device *dev, int translation)
2504 {
2505         struct dmar_domain *ndomain;
2506         struct intel_iommu *iommu;
2507         u8 bus, devfn;
2508         int ret;
2509
2510         iommu = device_to_iommu(dev, &bus, &devfn);
2511         if (!iommu)
2512                 return -ENODEV;
2513
2514         ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2515         if (ndomain != domain)
2516                 return -EBUSY;
2517
2518         ret = domain_context_mapping(domain, dev, translation);
2519         if (ret) {
2520                 domain_remove_one_dev_info(domain, dev);
2521                 return ret;
2522         }
2523
2524         return 0;
2525 }
2526
2527 static bool device_has_rmrr(struct device *dev)
2528 {
2529         struct dmar_rmrr_unit *rmrr;
2530         struct device *tmp;
2531         int i;
2532
2533         rcu_read_lock();
2534         for_each_rmrr_units(rmrr) {
2535                 /*
2536                  * Return TRUE if this RMRR contains the device that
2537                  * is passed in.
2538                  */
2539                 for_each_active_dev_scope(rmrr->devices,
2540                                           rmrr->devices_cnt, i, tmp)
2541                         if (tmp == dev) {
2542                                 rcu_read_unlock();
2543                                 return true;
2544                         }
2545         }
2546         rcu_read_unlock();
2547         return false;
2548 }
2549
2550 /*
2551  * There are a couple cases where we need to restrict the functionality of
2552  * devices associated with RMRRs.  The first is when evaluating a device for
2553  * identity mapping because problems exist when devices are moved in and out
2554  * of domains and their respective RMRR information is lost.  This means that
2555  * a device with associated RMRRs will never be in a "passthrough" domain.
2556  * The second is use of the device through the IOMMU API.  This interface
2557  * expects to have full control of the IOVA space for the device.  We cannot
2558  * satisfy both the requirement that RMRR access is maintained and have an
2559  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2560  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2561  * We therefore prevent devices associated with an RMRR from participating in
2562  * the IOMMU API, which eliminates them from device assignment.
2563  *
2564  * In both cases we assume that PCI USB devices with RMRRs have them largely
2565  * for historical reasons and that the RMRR space is not actively used post
2566  * boot.  This exclusion may change if vendors begin to abuse it.
2567  */
2568 static bool device_is_rmrr_locked(struct device *dev)
2569 {
2570         if (!device_has_rmrr(dev))
2571                 return false;
2572
2573         if (dev_is_pci(dev)) {
2574                 struct pci_dev *pdev = to_pci_dev(dev);
2575
2576                 if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
2577                         return false;
2578         }
2579
2580         return true;
2581 }
2582
2583 static int iommu_should_identity_map(struct device *dev, int startup)
2584 {
2585
2586         if (dev_is_pci(dev)) {
2587                 struct pci_dev *pdev = to_pci_dev(dev);
2588
2589                 if (device_is_rmrr_locked(dev))
2590                         return 0;
2591
2592                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2593                         return 1;
2594
2595                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2596                         return 1;
2597
2598                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2599                         return 0;
2600
2601                 /*
2602                  * We want to start off with all devices in the 1:1 domain, and
2603                  * take them out later if we find they can't access all of memory.
2604                  *
2605                  * However, we can't do this for PCI devices behind bridges,
2606                  * because all PCI devices behind the same bridge will end up
2607                  * with the same source-id on their transactions.
2608                  *
2609                  * Practically speaking, we can't change things around for these
2610                  * devices at run-time, because we can't be sure there'll be no
2611                  * DMA transactions in flight for any of their siblings.
2612                  *
2613                  * So PCI devices (unless they're on the root bus) as well as
2614                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2615                  * the 1:1 domain, just in _case_ one of their siblings turns out
2616                  * not to be able to map all of memory.
2617                  */
2618                 if (!pci_is_pcie(pdev)) {
2619                         if (!pci_is_root_bus(pdev->bus))
2620                                 return 0;
2621                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2622                                 return 0;
2623                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2624                         return 0;
2625         } else {
2626                 if (device_has_rmrr(dev))
2627                         return 0;
2628         }
2629
2630         /*
2631          * At boot time, we don't yet know if devices will be 64-bit capable.
2632          * Assume that they will — if they turn out not to be, then we can
2633          * take them out of the 1:1 domain later.
2634          */
2635         if (!startup) {
2636                 /*
2637                  * If the device's dma_mask is less than the system's memory
2638                  * size then this is not a candidate for identity mapping.
2639                  */
2640                 u64 dma_mask = *dev->dma_mask;
2641
2642                 if (dev->coherent_dma_mask &&
2643                     dev->coherent_dma_mask < dma_mask)
2644                         dma_mask = dev->coherent_dma_mask;
2645
2646                 return dma_mask >= dma_get_required_mask(dev);
2647         }
2648
2649         return 1;
2650 }
2651
2652 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2653 {
2654         int ret;
2655
2656         if (!iommu_should_identity_map(dev, 1))
2657                 return 0;
2658
2659         ret = domain_add_dev_info(si_domain, dev,
2660                                   hw ? CONTEXT_TT_PASS_THROUGH :
2661                                        CONTEXT_TT_MULTI_LEVEL);
2662         if (!ret)
2663                 pr_info("IOMMU: %s identity mapping for device %s\n",
2664                         hw ? "hardware" : "software", dev_name(dev));
2665         else if (ret == -ENODEV)
2666                 /* device not associated with an iommu */
2667                 ret = 0;
2668
2669         return ret;
2670 }
2671
2672
2673 static int __init iommu_prepare_static_identity_mapping(int hw)
2674 {
2675         struct pci_dev *pdev = NULL;
2676         struct dmar_drhd_unit *drhd;
2677         struct intel_iommu *iommu;
2678         struct device *dev;
2679         int i;
2680         int ret = 0;
2681
2682         ret = si_domain_init(hw);
2683         if (ret)
2684                 return -EFAULT;
2685
2686         for_each_pci_dev(pdev) {
2687                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2688                 if (ret)
2689                         return ret;
2690         }
2691
2692         for_each_active_iommu(iommu, drhd)
2693                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2694                         struct acpi_device_physical_node *pn;
2695                         struct acpi_device *adev;
2696
2697                         if (dev->bus != &acpi_bus_type)
2698                                 continue;
2699                                 
2700                         adev= to_acpi_device(dev);
2701                         mutex_lock(&adev->physical_node_lock);
2702                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2703                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2704                                 if (ret)
2705                                         break;
2706                         }
2707                         mutex_unlock(&adev->physical_node_lock);
2708                         if (ret)
2709                                 return ret;
2710                 }
2711
2712         return 0;
2713 }
2714
2715 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2716 {
2717         /*
2718          * Start from the sane iommu hardware state.
2719          * If the queued invalidation is already initialized by us
2720          * (for example, while enabling interrupt-remapping) then
2721          * we got the things already rolling from a sane state.
2722          */
2723         if (!iommu->qi) {
2724                 /*
2725                  * Clear any previous faults.
2726                  */
2727                 dmar_fault(-1, iommu);
2728                 /*
2729                  * Disable queued invalidation if supported and already enabled
2730                  * before OS handover.
2731                  */
2732                 dmar_disable_qi(iommu);
2733         }
2734
2735         if (dmar_enable_qi(iommu)) {
2736                 /*
2737                  * Queued Invalidate not enabled, use Register Based Invalidate
2738                  */
2739                 iommu->flush.flush_context = __iommu_flush_context;
2740                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2741                 pr_info("IOMMU: %s using Register based invalidation\n",
2742                         iommu->name);
2743         } else {
2744                 iommu->flush.flush_context = qi_flush_context;
2745                 iommu->flush.flush_iotlb = qi_flush_iotlb;
2746                 pr_info("IOMMU: %s using Queued invalidation\n", iommu->name);
2747         }
2748 }
2749
2750 static int __init init_dmars(void)
2751 {
2752         struct dmar_drhd_unit *drhd;
2753         struct dmar_rmrr_unit *rmrr;
2754         struct device *dev;
2755         struct intel_iommu *iommu;
2756         int i, ret;
2757
2758         /*
2759          * for each drhd
2760          *    allocate root
2761          *    initialize and program root entry to not present
2762          * endfor
2763          */
2764         for_each_drhd_unit(drhd) {
2765                 /*
2766                  * lock not needed as this is only incremented in the single
2767                  * threaded kernel __init code path all other access are read
2768                  * only
2769                  */
2770                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
2771                         g_num_of_iommus++;
2772                         continue;
2773                 }
2774                 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2775                           DMAR_UNITS_SUPPORTED);
2776         }
2777
2778         /* Preallocate enough resources for IOMMU hot-addition */
2779         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
2780                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
2781
2782         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2783                         GFP_KERNEL);
2784         if (!g_iommus) {
2785                 printk(KERN_ERR "Allocating global iommu array failed\n");
2786                 ret = -ENOMEM;
2787                 goto error;
2788         }
2789
2790         deferred_flush = kzalloc(g_num_of_iommus *
2791                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2792         if (!deferred_flush) {
2793                 ret = -ENOMEM;
2794                 goto free_g_iommus;
2795         }
2796
2797         for_each_active_iommu(iommu, drhd) {
2798                 g_iommus[iommu->seq_id] = iommu;
2799
2800                 ret = iommu_init_domains(iommu);
2801                 if (ret)
2802                         goto free_iommu;
2803
2804                 /*
2805                  * TBD:
2806                  * we could share the same root & context tables
2807                  * among all IOMMU's. Need to Split it later.
2808                  */
2809                 ret = iommu_alloc_root_entry(iommu);
2810                 if (ret)
2811                         goto free_iommu;
2812                 if (!ecap_pass_through(iommu->ecap))
2813                         hw_pass_through = 0;
2814         }
2815
2816         for_each_active_iommu(iommu, drhd)
2817                 intel_iommu_init_qi(iommu);
2818
2819         if (iommu_pass_through)
2820                 iommu_identity_mapping |= IDENTMAP_ALL;
2821
2822 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2823         iommu_identity_mapping |= IDENTMAP_GFX;
2824 #endif
2825
2826         check_tylersburg_isoch();
2827
2828         /*
2829          * If pass through is not set or not enabled, setup context entries for
2830          * identity mappings for rmrr, gfx, and isa and may fall back to static
2831          * identity mapping if iommu_identity_mapping is set.
2832          */
2833         if (iommu_identity_mapping) {
2834                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2835                 if (ret) {
2836                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2837                         goto free_iommu;
2838                 }
2839         }
2840         /*
2841          * For each rmrr
2842          *   for each dev attached to rmrr
2843          *   do
2844          *     locate drhd for dev, alloc domain for dev
2845          *     allocate free domain
2846          *     allocate page table entries for rmrr
2847          *     if context not allocated for bus
2848          *           allocate and init context
2849          *           set present in root table for this bus
2850          *     init context with domain, translation etc
2851          *    endfor
2852          * endfor
2853          */
2854         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2855         for_each_rmrr_units(rmrr) {
2856                 /* some BIOS lists non-exist devices in DMAR table. */
2857                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2858                                           i, dev) {
2859                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
2860                         if (ret)
2861                                 printk(KERN_ERR
2862                                        "IOMMU: mapping reserved region failed\n");
2863                 }
2864         }
2865
2866         iommu_prepare_isa();
2867
2868         /*
2869          * for each drhd
2870          *   enable fault log
2871          *   global invalidate context cache
2872          *   global invalidate iotlb
2873          *   enable translation
2874          */
2875         for_each_iommu(iommu, drhd) {
2876                 if (drhd->ignored) {
2877                         /*
2878                          * we always have to disable PMRs or DMA may fail on
2879                          * this device
2880                          */
2881                         if (force_on)
2882                                 iommu_disable_protect_mem_regions(iommu);
2883                         continue;
2884                 }
2885
2886                 iommu_flush_write_buffer(iommu);
2887
2888                 ret = dmar_set_interrupt(iommu);
2889                 if (ret)
2890                         goto free_iommu;
2891
2892                 iommu_set_root_entry(iommu);
2893
2894                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2895                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2896                 iommu_enable_translation(iommu);
2897                 iommu_disable_protect_mem_regions(iommu);
2898         }
2899
2900         return 0;
2901
2902 free_iommu:
2903         for_each_active_iommu(iommu, drhd) {
2904                 disable_dmar_iommu(iommu);
2905                 free_dmar_iommu(iommu);
2906         }
2907         kfree(deferred_flush);
2908 free_g_iommus:
2909         kfree(g_iommus);
2910 error:
2911         return ret;
2912 }
2913
2914 /* This takes a number of _MM_ pages, not VTD pages */
2915 static struct iova *intel_alloc_iova(struct device *dev,
2916                                      struct dmar_domain *domain,
2917                                      unsigned long nrpages, uint64_t dma_mask)
2918 {
2919         struct iova *iova = NULL;
2920
2921         /* Restrict dma_mask to the width that the iommu can handle */
2922         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2923
2924         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2925                 /*
2926                  * First try to allocate an io virtual address in
2927                  * DMA_BIT_MASK(32) and if that fails then try allocating
2928                  * from higher range
2929                  */
2930                 iova = alloc_iova(&domain->iovad, nrpages,
2931                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2932                 if (iova)
2933                         return iova;
2934         }
2935         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2936         if (unlikely(!iova)) {
2937                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2938                        nrpages, dev_name(dev));
2939                 return NULL;
2940         }
2941
2942         return iova;
2943 }
2944
2945 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2946 {
2947         struct dmar_domain *domain;
2948         int ret;
2949
2950         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2951         if (!domain) {
2952                 printk(KERN_ERR "Allocating domain for %s failed",
2953                        dev_name(dev));
2954                 return NULL;
2955         }
2956
2957         /* make sure context mapping is ok */
2958         if (unlikely(!domain_context_mapped(dev))) {
2959                 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2960                 if (ret) {
2961                         printk(KERN_ERR "Domain context map for %s failed",
2962                                dev_name(dev));
2963                         return NULL;
2964                 }
2965         }
2966
2967         return domain;
2968 }
2969
2970 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2971 {
2972         struct device_domain_info *info;
2973
2974         /* No lock here, assumes no domain exit in normal case */
2975         info = dev->archdata.iommu;
2976         if (likely(info))
2977                 return info->domain;
2978
2979         return __get_valid_domain_for_dev(dev);
2980 }
2981
2982 static int iommu_dummy(struct device *dev)
2983 {
2984         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2985 }
2986
2987 /* Check if the dev needs to go through non-identity map and unmap process.*/
2988 static int iommu_no_mapping(struct device *dev)
2989 {
2990         int found;
2991
2992         if (iommu_dummy(dev))
2993                 return 1;
2994
2995         if (!iommu_identity_mapping)
2996                 return 0;
2997
2998         found = identity_mapping(dev);
2999         if (found) {
3000                 if (iommu_should_identity_map(dev, 0))
3001                         return 1;
3002                 else {
3003                         /*
3004                          * 32 bit DMA is removed from si_domain and fall back
3005                          * to non-identity mapping.
3006                          */
3007                         domain_remove_one_dev_info(si_domain, dev);
3008                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
3009                                dev_name(dev));
3010                         return 0;
3011                 }
3012         } else {
3013                 /*
3014                  * In case of a detached 64 bit DMA device from vm, the device
3015                  * is put into si_domain for identity mapping.
3016                  */
3017                 if (iommu_should_identity_map(dev, 0)) {
3018                         int ret;
3019                         ret = domain_add_dev_info(si_domain, dev,
3020                                                   hw_pass_through ?
3021                                                   CONTEXT_TT_PASS_THROUGH :
3022                                                   CONTEXT_TT_MULTI_LEVEL);
3023                         if (!ret) {
3024                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
3025                                        dev_name(dev));
3026                                 return 1;
3027                         }
3028                 }
3029         }
3030
3031         return 0;
3032 }
3033
3034 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3035                                      size_t size, int dir, u64 dma_mask)
3036 {
3037         struct dmar_domain *domain;
3038         phys_addr_t start_paddr;
3039         struct iova *iova;
3040         int prot = 0;
3041         int ret;
3042         struct intel_iommu *iommu;
3043         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3044
3045         BUG_ON(dir == DMA_NONE);
3046
3047         if (iommu_no_mapping(dev))
3048                 return paddr;
3049
3050         domain = get_valid_domain_for_dev(dev);
3051         if (!domain)
3052                 return 0;
3053
3054         iommu = domain_get_iommu(domain);
3055         size = aligned_nrpages(paddr, size);
3056
3057         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3058         if (!iova)
3059                 goto error;
3060
3061         /*
3062          * Check if DMAR supports zero-length reads on write only
3063          * mappings..
3064          */
3065         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3066                         !cap_zlr(iommu->cap))
3067                 prot |= DMA_PTE_READ;
3068         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3069                 prot |= DMA_PTE_WRITE;
3070         /*
3071          * paddr - (paddr + size) might be partial page, we should map the whole
3072          * page.  Note: if two part of one page are separately mapped, we
3073          * might have two guest_addr mapping to the same host paddr, but this
3074          * is not a big problem
3075          */
3076         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3077                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3078         if (ret)
3079                 goto error;
3080
3081         /* it's a non-present to present mapping. Only flush if caching mode */
3082         if (cap_caching_mode(iommu->cap))
3083                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3084         else
3085                 iommu_flush_write_buffer(iommu);
3086
3087         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3088         start_paddr += paddr & ~PAGE_MASK;
3089         return start_paddr;
3090
3091 error:
3092         if (iova)
3093                 __free_iova(&domain->iovad, iova);
3094         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3095                 dev_name(dev), size, (unsigned long long)paddr, dir);
3096         return 0;
3097 }
3098
3099 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3100                                  unsigned long offset, size_t size,
3101                                  enum dma_data_direction dir,
3102                                  struct dma_attrs *attrs)
3103 {
3104         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3105                                   dir, *dev->dma_mask);
3106 }
3107
3108 static void flush_unmaps(void)
3109 {
3110         int i, j;
3111
3112         timer_on = 0;
3113
3114         /* just flush them all */
3115         for (i = 0; i < g_num_of_iommus; i++) {
3116                 struct intel_iommu *iommu = g_iommus[i];
3117                 if (!iommu)
3118                         continue;
3119
3120                 if (!deferred_flush[i].next)
3121                         continue;
3122
3123                 /* In caching mode, global flushes turn emulation expensive */
3124                 if (!cap_caching_mode(iommu->cap))
3125                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3126                                          DMA_TLB_GLOBAL_FLUSH);
3127                 for (j = 0; j < deferred_flush[i].next; j++) {
3128                         unsigned long mask;
3129                         struct iova *iova = deferred_flush[i].iova[j];
3130                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3131
3132                         /* On real hardware multiple invalidations are expensive */
3133                         if (cap_caching_mode(iommu->cap))
3134                                 iommu_flush_iotlb_psi(iommu, domain->id,
3135                                         iova->pfn_lo, iova_size(iova),
3136                                         !deferred_flush[i].freelist[j], 0);
3137                         else {
3138                                 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3139                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3140                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3141                         }
3142                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3143                         if (deferred_flush[i].freelist[j])
3144                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3145                 }
3146                 deferred_flush[i].next = 0;
3147         }
3148
3149         list_size = 0;
3150 }
3151
3152 static void flush_unmaps_timeout(unsigned long data)
3153 {
3154         unsigned long flags;
3155
3156         spin_lock_irqsave(&async_umap_flush_lock, flags);
3157         flush_unmaps();
3158         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3159 }
3160
3161 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3162 {
3163         unsigned long flags;
3164         int next, iommu_id;
3165         struct intel_iommu *iommu;
3166
3167         spin_lock_irqsave(&async_umap_flush_lock, flags);
3168         if (list_size == HIGH_WATER_MARK)
3169                 flush_unmaps();
3170
3171         iommu = domain_get_iommu(dom);
3172         iommu_id = iommu->seq_id;
3173
3174         next = deferred_flush[iommu_id].next;
3175         deferred_flush[iommu_id].domain[next] = dom;
3176         deferred_flush[iommu_id].iova[next] = iova;
3177         deferred_flush[iommu_id].freelist[next] = freelist;
3178         deferred_flush[iommu_id].next++;
3179
3180         if (!timer_on) {
3181                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3182                 timer_on = 1;
3183         }
3184         list_size++;
3185         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3186 }
3187
3188 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3189 {
3190         struct dmar_domain *domain;
3191         unsigned long start_pfn, last_pfn;
3192         struct iova *iova;
3193         struct intel_iommu *iommu;
3194         struct page *freelist;
3195
3196         if (iommu_no_mapping(dev))
3197                 return;
3198
3199         domain = find_domain(dev);
3200         BUG_ON(!domain);
3201
3202         iommu = domain_get_iommu(domain);
3203
3204         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3205         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3206                       (unsigned long long)dev_addr))
3207                 return;
3208
3209         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3210         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3211
3212         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3213                  dev_name(dev), start_pfn, last_pfn);
3214
3215         freelist = domain_unmap(domain, start_pfn, last_pfn);
3216
3217         if (intel_iommu_strict) {
3218                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3219                                       last_pfn - start_pfn + 1, !freelist, 0);
3220                 /* free iova */
3221                 __free_iova(&domain->iovad, iova);
3222                 dma_free_pagelist(freelist);
3223         } else {
3224                 add_unmap(domain, iova, freelist);
3225                 /*
3226                  * queue up the release of the unmap to save the 1/6th of the
3227                  * cpu used up by the iotlb flush operation...
3228                  */
3229         }
3230 }
3231
3232 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3233                              size_t size, enum dma_data_direction dir,
3234                              struct dma_attrs *attrs)
3235 {
3236         intel_unmap(dev, dev_addr);
3237 }
3238
3239 static void *intel_alloc_coherent(struct device *dev, size_t size,
3240                                   dma_addr_t *dma_handle, gfp_t flags,
3241                                   struct dma_attrs *attrs)
3242 {
3243         struct page *page = NULL;
3244         int order;
3245
3246         size = PAGE_ALIGN(size);
3247         order = get_order(size);
3248
3249         if (!iommu_no_mapping(dev))
3250                 flags &= ~(GFP_DMA | GFP_DMA32);
3251         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3252                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3253                         flags |= GFP_DMA;
3254                 else
3255                         flags |= GFP_DMA32;
3256         }
3257
3258         if (flags & __GFP_WAIT) {
3259                 unsigned int count = size >> PAGE_SHIFT;
3260
3261                 page = dma_alloc_from_contiguous(dev, count, order);
3262                 if (page && iommu_no_mapping(dev) &&
3263                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3264                         dma_release_from_contiguous(dev, page, count);
3265                         page = NULL;
3266                 }
3267         }
3268
3269         if (!page)
3270                 page = alloc_pages(flags, order);
3271         if (!page)
3272                 return NULL;
3273         memset(page_address(page), 0, size);
3274
3275         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3276                                          DMA_BIDIRECTIONAL,
3277                                          dev->coherent_dma_mask);
3278         if (*dma_handle)
3279                 return page_address(page);
3280         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3281                 __free_pages(page, order);
3282
3283         return NULL;
3284 }
3285
3286 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3287                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3288 {
3289         int order;
3290         struct page *page = virt_to_page(vaddr);
3291
3292         size = PAGE_ALIGN(size);
3293         order = get_order(size);
3294
3295         intel_unmap(dev, dma_handle);
3296         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3297                 __free_pages(page, order);
3298 }
3299
3300 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3301                            int nelems, enum dma_data_direction dir,
3302                            struct dma_attrs *attrs)
3303 {
3304         intel_unmap(dev, sglist[0].dma_address);
3305 }
3306
3307 static int intel_nontranslate_map_sg(struct device *hddev,
3308         struct scatterlist *sglist, int nelems, int dir)
3309 {
3310         int i;
3311         struct scatterlist *sg;
3312
3313         for_each_sg(sglist, sg, nelems, i) {
3314                 BUG_ON(!sg_page(sg));
3315                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3316                 sg->dma_length = sg->length;
3317         }
3318         return nelems;
3319 }
3320
3321 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3322                         enum dma_data_direction dir, struct dma_attrs *attrs)
3323 {
3324         int i;
3325         struct dmar_domain *domain;
3326         size_t size = 0;
3327         int prot = 0;
3328         struct iova *iova = NULL;
3329         int ret;
3330         struct scatterlist *sg;
3331         unsigned long start_vpfn;
3332         struct intel_iommu *iommu;
3333
3334         BUG_ON(dir == DMA_NONE);
3335         if (iommu_no_mapping(dev))
3336                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3337
3338         domain = get_valid_domain_for_dev(dev);
3339         if (!domain)
3340                 return 0;
3341
3342         iommu = domain_get_iommu(domain);
3343
3344         for_each_sg(sglist, sg, nelems, i)
3345                 size += aligned_nrpages(sg->offset, sg->length);
3346
3347         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3348                                 *dev->dma_mask);
3349         if (!iova) {
3350                 sglist->dma_length = 0;
3351                 return 0;
3352         }
3353
3354         /*
3355          * Check if DMAR supports zero-length reads on write only
3356          * mappings..
3357          */
3358         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3359                         !cap_zlr(iommu->cap))
3360                 prot |= DMA_PTE_READ;
3361         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3362                 prot |= DMA_PTE_WRITE;
3363
3364         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3365
3366         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3367         if (unlikely(ret)) {
3368                 dma_pte_free_pagetable(domain, start_vpfn,
3369                                        start_vpfn + size - 1);
3370                 __free_iova(&domain->iovad, iova);
3371                 return 0;
3372         }
3373
3374         /* it's a non-present to present mapping. Only flush if caching mode */
3375         if (cap_caching_mode(iommu->cap))
3376                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3377         else
3378                 iommu_flush_write_buffer(iommu);
3379
3380         return nelems;
3381 }
3382
3383 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3384 {
3385         return !dma_addr;
3386 }
3387
3388 struct dma_map_ops intel_dma_ops = {
3389         .alloc = intel_alloc_coherent,
3390         .free = intel_free_coherent,
3391         .map_sg = intel_map_sg,
3392         .unmap_sg = intel_unmap_sg,
3393         .map_page = intel_map_page,
3394         .unmap_page = intel_unmap_page,
3395         .mapping_error = intel_mapping_error,
3396 };
3397
3398 static inline int iommu_domain_cache_init(void)
3399 {
3400         int ret = 0;
3401
3402         iommu_domain_cache = kmem_cache_create("iommu_domain",
3403                                          sizeof(struct dmar_domain),
3404                                          0,
3405                                          SLAB_HWCACHE_ALIGN,
3406
3407                                          NULL);
3408         if (!iommu_domain_cache) {
3409                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3410                 ret = -ENOMEM;
3411         }
3412
3413         return ret;
3414 }
3415
3416 static inline int iommu_devinfo_cache_init(void)
3417 {
3418         int ret = 0;
3419
3420         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3421                                          sizeof(struct device_domain_info),
3422                                          0,
3423                                          SLAB_HWCACHE_ALIGN,
3424                                          NULL);
3425         if (!iommu_devinfo_cache) {
3426                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3427                 ret = -ENOMEM;
3428         }
3429
3430         return ret;
3431 }
3432
3433 static int __init iommu_init_mempool(void)
3434 {
3435         int ret;
3436         ret = iommu_iova_cache_init();
3437         if (ret)
3438                 return ret;
3439
3440         ret = iommu_domain_cache_init();
3441         if (ret)
3442                 goto domain_error;
3443
3444         ret = iommu_devinfo_cache_init();
3445         if (!ret)
3446                 return ret;
3447
3448         kmem_cache_destroy(iommu_domain_cache);
3449 domain_error:
3450         iommu_iova_cache_destroy();
3451
3452         return -ENOMEM;
3453 }
3454
3455 static void __init iommu_exit_mempool(void)
3456 {
3457         kmem_cache_destroy(iommu_devinfo_cache);
3458         kmem_cache_destroy(iommu_domain_cache);
3459         iommu_iova_cache_destroy();
3460 }
3461
3462 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3463 {
3464         struct dmar_drhd_unit *drhd;
3465         u32 vtbar;
3466         int rc;
3467
3468         /* We know that this device on this chipset has its own IOMMU.
3469          * If we find it under a different IOMMU, then the BIOS is lying
3470          * to us. Hope that the IOMMU for this device is actually
3471          * disabled, and it needs no translation...
3472          */
3473         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3474         if (rc) {
3475                 /* "can't" happen */
3476                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3477                 return;
3478         }
3479         vtbar &= 0xffff0000;
3480
3481         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3482         drhd = dmar_find_matched_drhd_unit(pdev);
3483         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3484                             TAINT_FIRMWARE_WORKAROUND,
3485                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3486                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3487 }
3488 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3489
3490 static void __init init_no_remapping_devices(void)
3491 {
3492         struct dmar_drhd_unit *drhd;
3493         struct device *dev;
3494         int i;
3495
3496         for_each_drhd_unit(drhd) {
3497                 if (!drhd->include_all) {
3498                         for_each_active_dev_scope(drhd->devices,
3499                                                   drhd->devices_cnt, i, dev)
3500                                 break;
3501                         /* ignore DMAR unit if no devices exist */
3502                         if (i == drhd->devices_cnt)
3503                                 drhd->ignored = 1;
3504                 }
3505         }
3506
3507         for_each_active_drhd_unit(drhd) {
3508                 if (drhd->include_all)
3509                         continue;
3510
3511                 for_each_active_dev_scope(drhd->devices,
3512                                           drhd->devices_cnt, i, dev)
3513                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3514                                 break;
3515                 if (i < drhd->devices_cnt)
3516                         continue;
3517
3518                 /* This IOMMU has *only* gfx devices. Either bypass it or
3519                    set the gfx_mapped flag, as appropriate */
3520                 if (dmar_map_gfx) {
3521                         intel_iommu_gfx_mapped = 1;
3522                 } else {
3523                         drhd->ignored = 1;
3524                         for_each_active_dev_scope(drhd->devices,
3525                                                   drhd->devices_cnt, i, dev)
3526                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3527                 }
3528         }
3529 }
3530
3531 #ifdef CONFIG_SUSPEND
3532 static int init_iommu_hw(void)
3533 {
3534         struct dmar_drhd_unit *drhd;
3535         struct intel_iommu *iommu = NULL;
3536
3537         for_each_active_iommu(iommu, drhd)
3538                 if (iommu->qi)
3539                         dmar_reenable_qi(iommu);
3540
3541         for_each_iommu(iommu, drhd) {
3542                 if (drhd->ignored) {
3543                         /*
3544                          * we always have to disable PMRs or DMA may fail on
3545                          * this device
3546                          */
3547                         if (force_on)
3548                                 iommu_disable_protect_mem_regions(iommu);
3549                         continue;
3550                 }
3551         
3552                 iommu_flush_write_buffer(iommu);
3553
3554                 iommu_set_root_entry(iommu);
3555
3556                 iommu->flush.flush_context(iommu, 0, 0, 0,
3557                                            DMA_CCMD_GLOBAL_INVL);
3558                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3559                 iommu_enable_translation(iommu);
3560                 iommu_disable_protect_mem_regions(iommu);
3561         }
3562
3563         return 0;
3564 }
3565
3566 static void iommu_flush_all(void)
3567 {
3568         struct dmar_drhd_unit *drhd;
3569         struct intel_iommu *iommu;
3570
3571         for_each_active_iommu(iommu, drhd) {
3572                 iommu->flush.flush_context(iommu, 0, 0, 0,
3573                                            DMA_CCMD_GLOBAL_INVL);
3574                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3575                                          DMA_TLB_GLOBAL_FLUSH);
3576         }
3577 }
3578
3579 static int iommu_suspend(void)
3580 {
3581         struct dmar_drhd_unit *drhd;
3582         struct intel_iommu *iommu = NULL;
3583         unsigned long flag;
3584
3585         for_each_active_iommu(iommu, drhd) {
3586                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3587                                                  GFP_ATOMIC);
3588                 if (!iommu->iommu_state)
3589                         goto nomem;
3590         }
3591
3592         iommu_flush_all();
3593
3594         for_each_active_iommu(iommu, drhd) {
3595                 iommu_disable_translation(iommu);
3596
3597                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3598
3599                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3600                         readl(iommu->reg + DMAR_FECTL_REG);
3601                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3602                         readl(iommu->reg + DMAR_FEDATA_REG);
3603                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3604                         readl(iommu->reg + DMAR_FEADDR_REG);
3605                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3606                         readl(iommu->reg + DMAR_FEUADDR_REG);
3607
3608                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3609         }
3610         return 0;
3611
3612 nomem:
3613         for_each_active_iommu(iommu, drhd)
3614                 kfree(iommu->iommu_state);
3615
3616         return -ENOMEM;
3617 }
3618
3619 static void iommu_resume(void)
3620 {
3621         struct dmar_drhd_unit *drhd;
3622         struct intel_iommu *iommu = NULL;
3623         unsigned long flag;
3624
3625         if (init_iommu_hw()) {
3626                 if (force_on)
3627                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3628                 else
3629                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3630                 return;
3631         }
3632
3633         for_each_active_iommu(iommu, drhd) {
3634
3635                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3636
3637                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3638                         iommu->reg + DMAR_FECTL_REG);
3639                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3640                         iommu->reg + DMAR_FEDATA_REG);
3641                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3642                         iommu->reg + DMAR_FEADDR_REG);
3643                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3644                         iommu->reg + DMAR_FEUADDR_REG);
3645
3646                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3647         }
3648
3649         for_each_active_iommu(iommu, drhd)
3650                 kfree(iommu->iommu_state);
3651 }
3652
3653 static struct syscore_ops iommu_syscore_ops = {
3654         .resume         = iommu_resume,
3655         .suspend        = iommu_suspend,
3656 };
3657
3658 static void __init init_iommu_pm_ops(void)
3659 {
3660         register_syscore_ops(&iommu_syscore_ops);
3661 }
3662
3663 #else
3664 static inline void init_iommu_pm_ops(void) {}
3665 #endif  /* CONFIG_PM */
3666
3667
3668 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3669 {
3670         struct acpi_dmar_reserved_memory *rmrr;
3671         struct dmar_rmrr_unit *rmrru;
3672
3673         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3674         if (!rmrru)
3675                 return -ENOMEM;
3676
3677         rmrru->hdr = header;
3678         rmrr = (struct acpi_dmar_reserved_memory *)header;
3679         rmrru->base_address = rmrr->base_address;
3680         rmrru->end_address = rmrr->end_address;
3681         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3682                                 ((void *)rmrr) + rmrr->header.length,
3683                                 &rmrru->devices_cnt);
3684         if (rmrru->devices_cnt && rmrru->devices == NULL) {
3685                 kfree(rmrru);
3686                 return -ENOMEM;
3687         }
3688
3689         list_add(&rmrru->list, &dmar_rmrr_units);
3690
3691         return 0;
3692 }
3693
3694 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3695 {
3696         struct dmar_atsr_unit *atsru;
3697         struct acpi_dmar_atsr *tmp;
3698
3699         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3700                 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3701                 if (atsr->segment != tmp->segment)
3702                         continue;
3703                 if (atsr->header.length != tmp->header.length)
3704                         continue;
3705                 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3706                         return atsru;
3707         }
3708
3709         return NULL;
3710 }
3711
3712 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3713 {
3714         struct acpi_dmar_atsr *atsr;
3715         struct dmar_atsr_unit *atsru;
3716
3717         if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
3718                 return 0;
3719
3720         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3721         atsru = dmar_find_atsr(atsr);
3722         if (atsru)
3723                 return 0;
3724
3725         atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3726         if (!atsru)
3727                 return -ENOMEM;
3728
3729         /*
3730          * If memory is allocated from slab by ACPI _DSM method, we need to
3731          * copy the memory content because the memory buffer will be freed
3732          * on return.
3733          */
3734         atsru->hdr = (void *)(atsru + 1);
3735         memcpy(atsru->hdr, hdr, hdr->length);
3736         atsru->include_all = atsr->flags & 0x1;
3737         if (!atsru->include_all) {
3738                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3739                                 (void *)atsr + atsr->header.length,
3740                                 &atsru->devices_cnt);
3741                 if (atsru->devices_cnt && atsru->devices == NULL) {
3742                         kfree(atsru);
3743                         return -ENOMEM;
3744                 }
3745         }
3746
3747         list_add_rcu(&atsru->list, &dmar_atsr_units);
3748
3749         return 0;
3750 }
3751
3752 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3753 {
3754         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3755         kfree(atsru);
3756 }
3757
3758 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3759 {
3760         struct acpi_dmar_atsr *atsr;
3761         struct dmar_atsr_unit *atsru;
3762
3763         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3764         atsru = dmar_find_atsr(atsr);
3765         if (atsru) {
3766                 list_del_rcu(&atsru->list);
3767                 synchronize_rcu();
3768                 intel_iommu_free_atsr(atsru);
3769         }
3770
3771         return 0;
3772 }
3773
3774 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3775 {
3776         int i;
3777         struct device *dev;
3778         struct acpi_dmar_atsr *atsr;
3779         struct dmar_atsr_unit *atsru;
3780
3781         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3782         atsru = dmar_find_atsr(atsr);
3783         if (!atsru)
3784                 return 0;
3785
3786         if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
3787                 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3788                                           i, dev)
3789                         return -EBUSY;
3790
3791         return 0;
3792 }
3793
3794 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3795 {
3796         int sp, ret = 0;
3797         struct intel_iommu *iommu = dmaru->iommu;
3798
3799         if (g_iommus[iommu->seq_id])
3800                 return 0;
3801
3802         if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3803                 pr_warn("IOMMU: %s doesn't support hardware pass through.\n",
3804                         iommu->name);
3805                 return -ENXIO;
3806         }
3807         if (!ecap_sc_support(iommu->ecap) &&
3808             domain_update_iommu_snooping(iommu)) {
3809                 pr_warn("IOMMU: %s doesn't support snooping.\n",
3810                         iommu->name);
3811                 return -ENXIO;
3812         }
3813         sp = domain_update_iommu_superpage(iommu) - 1;
3814         if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3815                 pr_warn("IOMMU: %s doesn't support large page.\n",
3816                         iommu->name);
3817                 return -ENXIO;
3818         }
3819
3820         /*
3821          * Disable translation if already enabled prior to OS handover.
3822          */
3823         if (iommu->gcmd & DMA_GCMD_TE)
3824                 iommu_disable_translation(iommu);
3825
3826         g_iommus[iommu->seq_id] = iommu;
3827         ret = iommu_init_domains(iommu);
3828         if (ret == 0)
3829                 ret = iommu_alloc_root_entry(iommu);
3830         if (ret)
3831                 goto out;
3832
3833         if (dmaru->ignored) {
3834                 /*
3835                  * we always have to disable PMRs or DMA may fail on this device
3836                  */
3837                 if (force_on)
3838                         iommu_disable_protect_mem_regions(iommu);
3839                 return 0;
3840         }
3841
3842         intel_iommu_init_qi(iommu);
3843         iommu_flush_write_buffer(iommu);
3844         ret = dmar_set_interrupt(iommu);
3845         if (ret)
3846                 goto disable_iommu;
3847
3848         iommu_set_root_entry(iommu);
3849         iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3850         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3851         iommu_enable_translation(iommu);
3852
3853         if (si_domain) {
3854                 ret = iommu_attach_domain(si_domain, iommu);
3855                 if (ret < 0 || si_domain->id != ret)
3856                         goto disable_iommu;
3857                 domain_attach_iommu(si_domain, iommu);
3858         }
3859
3860         iommu_disable_protect_mem_regions(iommu);
3861         return 0;
3862
3863 disable_iommu:
3864         disable_dmar_iommu(iommu);
3865 out:
3866         free_dmar_iommu(iommu);
3867         return ret;
3868 }
3869
3870 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3871 {
3872         int ret = 0;
3873         struct intel_iommu *iommu = dmaru->iommu;
3874
3875         if (!intel_iommu_enabled)
3876                 return 0;
3877         if (iommu == NULL)
3878                 return -EINVAL;
3879
3880         if (insert) {
3881                 ret = intel_iommu_add(dmaru);
3882         } else {
3883                 disable_dmar_iommu(iommu);
3884                 free_dmar_iommu(iommu);
3885         }
3886
3887         return ret;
3888 }
3889
3890 static void intel_iommu_free_dmars(void)
3891 {
3892         struct dmar_rmrr_unit *rmrru, *rmrr_n;
3893         struct dmar_atsr_unit *atsru, *atsr_n;
3894
3895         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3896                 list_del(&rmrru->list);
3897                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3898                 kfree(rmrru);
3899         }
3900
3901         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3902                 list_del(&atsru->list);
3903                 intel_iommu_free_atsr(atsru);
3904         }
3905 }
3906
3907 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3908 {
3909         int i, ret = 1;
3910         struct pci_bus *bus;
3911         struct pci_dev *bridge = NULL;
3912         struct device *tmp;
3913         struct acpi_dmar_atsr *atsr;
3914         struct dmar_atsr_unit *atsru;
3915
3916         dev = pci_physfn(dev);
3917         for (bus = dev->bus; bus; bus = bus->parent) {
3918                 bridge = bus->self;
3919                 if (!bridge || !pci_is_pcie(bridge) ||
3920                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3921                         return 0;
3922                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3923                         break;
3924         }
3925         if (!bridge)
3926                 return 0;
3927
3928         rcu_read_lock();
3929         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3930                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3931                 if (atsr->segment != pci_domain_nr(dev->bus))
3932                         continue;
3933
3934                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3935                         if (tmp == &bridge->dev)
3936                                 goto out;
3937
3938                 if (atsru->include_all)
3939                         goto out;
3940         }
3941         ret = 0;
3942 out:
3943         rcu_read_unlock();
3944
3945         return ret;
3946 }
3947
3948 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3949 {
3950         int ret = 0;
3951         struct dmar_rmrr_unit *rmrru;
3952         struct dmar_atsr_unit *atsru;
3953         struct acpi_dmar_atsr *atsr;
3954         struct acpi_dmar_reserved_memory *rmrr;
3955
3956         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3957                 return 0;
3958
3959         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3960                 rmrr = container_of(rmrru->hdr,
3961                                     struct acpi_dmar_reserved_memory, header);
3962                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3963                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3964                                 ((void *)rmrr) + rmrr->header.length,
3965                                 rmrr->segment, rmrru->devices,
3966                                 rmrru->devices_cnt);
3967                         if(ret < 0)
3968                                 return ret;
3969                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3970                         dmar_remove_dev_scope(info, rmrr->segment,
3971                                 rmrru->devices, rmrru->devices_cnt);
3972                 }
3973         }
3974
3975         list_for_each_entry(atsru, &dmar_atsr_units, list) {
3976                 if (atsru->include_all)
3977                         continue;
3978
3979                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3980                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3981                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3982                                         (void *)atsr + atsr->header.length,
3983                                         atsr->segment, atsru->devices,
3984                                         atsru->devices_cnt);
3985                         if (ret > 0)
3986                                 break;
3987                         else if(ret < 0)
3988                                 return ret;
3989                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3990                         if (dmar_remove_dev_scope(info, atsr->segment,
3991                                         atsru->devices, atsru->devices_cnt))
3992                                 break;
3993                 }
3994         }
3995
3996         return 0;
3997 }
3998
3999 /*
4000  * Here we only respond to action of unbound device from driver.
4001  *
4002  * Added device is not attached to its DMAR domain here yet. That will happen
4003  * when mapping the device to iova.
4004  */
4005 static int device_notifier(struct notifier_block *nb,
4006                                   unsigned long action, void *data)
4007 {
4008         struct device *dev = data;
4009         struct dmar_domain *domain;
4010
4011         if (iommu_dummy(dev))
4012                 return 0;
4013
4014         if (action != BUS_NOTIFY_REMOVED_DEVICE)
4015                 return 0;
4016
4017         domain = find_domain(dev);
4018         if (!domain)
4019                 return 0;
4020
4021         down_read(&dmar_global_lock);
4022         domain_remove_one_dev_info(domain, dev);
4023         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4024                 domain_exit(domain);
4025         up_read(&dmar_global_lock);
4026
4027         return 0;
4028 }
4029
4030 static struct notifier_block device_nb = {
4031         .notifier_call = device_notifier,
4032 };
4033
4034 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4035                                        unsigned long val, void *v)
4036 {
4037         struct memory_notify *mhp = v;
4038         unsigned long long start, end;
4039         unsigned long start_vpfn, last_vpfn;
4040
4041         switch (val) {
4042         case MEM_GOING_ONLINE:
4043                 start = mhp->start_pfn << PAGE_SHIFT;
4044                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4045                 if (iommu_domain_identity_map(si_domain, start, end)) {
4046                         pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
4047                                 start, end);
4048                         return NOTIFY_BAD;
4049                 }
4050                 break;
4051
4052         case MEM_OFFLINE:
4053         case MEM_CANCEL_ONLINE:
4054                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4055                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4056                 while (start_vpfn <= last_vpfn) {
4057                         struct iova *iova;
4058                         struct dmar_drhd_unit *drhd;
4059                         struct intel_iommu *iommu;
4060                         struct page *freelist;
4061
4062                         iova = find_iova(&si_domain->iovad, start_vpfn);
4063                         if (iova == NULL) {
4064                                 pr_debug("dmar: failed get IOVA for PFN %lx\n",
4065                                          start_vpfn);
4066                                 break;
4067                         }
4068
4069                         iova = split_and_remove_iova(&si_domain->iovad, iova,
4070                                                      start_vpfn, last_vpfn);
4071                         if (iova == NULL) {
4072                                 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
4073                                         start_vpfn, last_vpfn);
4074                                 return NOTIFY_BAD;
4075                         }
4076
4077                         freelist = domain_unmap(si_domain, iova->pfn_lo,
4078                                                iova->pfn_hi);
4079
4080                         rcu_read_lock();
4081                         for_each_active_iommu(iommu, drhd)
4082                                 iommu_flush_iotlb_psi(iommu, si_domain->id,
4083                                         iova->pfn_lo, iova_size(iova),
4084                                         !freelist, 0);
4085                         rcu_read_unlock();
4086                         dma_free_pagelist(freelist);
4087
4088                         start_vpfn = iova->pfn_hi + 1;
4089                         free_iova_mem(iova);
4090                 }
4091                 break;
4092         }
4093
4094         return NOTIFY_OK;
4095 }
4096
4097 static struct notifier_block intel_iommu_memory_nb = {
4098         .notifier_call = intel_iommu_memory_notifier,
4099         .priority = 0
4100 };
4101
4102
4103 static ssize_t intel_iommu_show_version(struct device *dev,
4104                                         struct device_attribute *attr,
4105                                         char *buf)
4106 {
4107         struct intel_iommu *iommu = dev_get_drvdata(dev);
4108         u32 ver = readl(iommu->reg + DMAR_VER_REG);
4109         return sprintf(buf, "%d:%d\n",
4110                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4111 }
4112 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4113
4114 static ssize_t intel_iommu_show_address(struct device *dev,
4115                                         struct device_attribute *attr,
4116                                         char *buf)
4117 {
4118         struct intel_iommu *iommu = dev_get_drvdata(dev);
4119         return sprintf(buf, "%llx\n", iommu->reg_phys);
4120 }
4121 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4122
4123 static ssize_t intel_iommu_show_cap(struct device *dev,
4124                                     struct device_attribute *attr,
4125                                     char *buf)
4126 {
4127         struct intel_iommu *iommu = dev_get_drvdata(dev);
4128         return sprintf(buf, "%llx\n", iommu->cap);
4129 }
4130 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4131
4132 static ssize_t intel_iommu_show_ecap(struct device *dev,
4133                                     struct device_attribute *attr,
4134                                     char *buf)
4135 {
4136         struct intel_iommu *iommu = dev_get_drvdata(dev);
4137         return sprintf(buf, "%llx\n", iommu->ecap);
4138 }
4139 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4140
4141 static struct attribute *intel_iommu_attrs[] = {
4142         &dev_attr_version.attr,
4143         &dev_attr_address.attr,
4144         &dev_attr_cap.attr,
4145         &dev_attr_ecap.attr,
4146         NULL,
4147 };
4148
4149 static struct attribute_group intel_iommu_group = {
4150         .name = "intel-iommu",
4151         .attrs = intel_iommu_attrs,
4152 };
4153
4154 const struct attribute_group *intel_iommu_groups[] = {
4155         &intel_iommu_group,
4156         NULL,
4157 };
4158
4159 int __init intel_iommu_init(void)
4160 {
4161         int ret = -ENODEV;
4162         struct dmar_drhd_unit *drhd;
4163         struct intel_iommu *iommu;
4164
4165         /* VT-d is required for a TXT/tboot launch, so enforce that */
4166         force_on = tboot_force_iommu();
4167
4168         if (iommu_init_mempool()) {
4169                 if (force_on)
4170                         panic("tboot: Failed to initialize iommu memory\n");
4171                 return -ENOMEM;
4172         }
4173
4174         down_write(&dmar_global_lock);
4175         if (dmar_table_init()) {
4176                 if (force_on)
4177                         panic("tboot: Failed to initialize DMAR table\n");
4178                 goto out_free_dmar;
4179         }
4180
4181         /*
4182          * Disable translation if already enabled prior to OS handover.
4183          */
4184         for_each_active_iommu(iommu, drhd)
4185                 if (iommu->gcmd & DMA_GCMD_TE)
4186                         iommu_disable_translation(iommu);
4187
4188         if (dmar_dev_scope_init() < 0) {
4189                 if (force_on)
4190                         panic("tboot: Failed to initialize DMAR device scope\n");
4191                 goto out_free_dmar;
4192         }
4193
4194         if (no_iommu || dmar_disabled)
4195                 goto out_free_dmar;
4196
4197         if (list_empty(&dmar_rmrr_units))
4198                 printk(KERN_INFO "DMAR: No RMRR found\n");
4199
4200         if (list_empty(&dmar_atsr_units))
4201                 printk(KERN_INFO "DMAR: No ATSR found\n");
4202
4203         if (dmar_init_reserved_ranges()) {
4204                 if (force_on)
4205                         panic("tboot: Failed to reserve iommu ranges\n");
4206                 goto out_free_reserved_range;
4207         }
4208
4209         init_no_remapping_devices();
4210
4211         ret = init_dmars();
4212         if (ret) {
4213                 if (force_on)
4214                         panic("tboot: Failed to initialize DMARs\n");
4215                 printk(KERN_ERR "IOMMU: dmar init failed\n");
4216                 goto out_free_reserved_range;
4217         }
4218         up_write(&dmar_global_lock);
4219         printk(KERN_INFO
4220         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4221
4222         init_timer(&unmap_timer);
4223 #ifdef CONFIG_SWIOTLB
4224         swiotlb = 0;
4225 #endif
4226         dma_ops = &intel_dma_ops;
4227
4228         init_iommu_pm_ops();
4229
4230         for_each_active_iommu(iommu, drhd)
4231                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4232                                                        intel_iommu_groups,
4233                                                        iommu->name);
4234
4235         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4236         bus_register_notifier(&pci_bus_type, &device_nb);
4237         if (si_domain && !hw_pass_through)
4238                 register_memory_notifier(&intel_iommu_memory_nb);
4239
4240         intel_iommu_enabled = 1;
4241
4242         return 0;
4243
4244 out_free_reserved_range:
4245         put_iova_domain(&reserved_iova_list);
4246 out_free_dmar:
4247         intel_iommu_free_dmars();
4248         up_write(&dmar_global_lock);
4249         iommu_exit_mempool();
4250         return ret;
4251 }
4252
4253 static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4254 {
4255         struct intel_iommu *iommu = opaque;
4256
4257         iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4258         return 0;
4259 }
4260
4261 /*
4262  * NB - intel-iommu lacks any sort of reference counting for the users of
4263  * dependent devices.  If multiple endpoints have intersecting dependent
4264  * devices, unbinding the driver from any one of them will possibly leave
4265  * the others unable to operate.
4266  */
4267 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4268                                            struct device *dev)
4269 {
4270         if (!iommu || !dev || !dev_is_pci(dev))
4271                 return;
4272
4273         pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
4274 }
4275
4276 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4277                                        struct device *dev)
4278 {
4279         struct device_domain_info *info, *tmp;
4280         struct intel_iommu *iommu;
4281         unsigned long flags;
4282         int found = 0;
4283         u8 bus, devfn;
4284
4285         iommu = device_to_iommu(dev, &bus, &devfn);
4286         if (!iommu)
4287                 return;
4288
4289         spin_lock_irqsave(&device_domain_lock, flags);
4290         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4291                 if (info->iommu == iommu && info->bus == bus &&
4292                     info->devfn == devfn) {
4293                         unlink_domain_info(info);
4294                         spin_unlock_irqrestore(&device_domain_lock, flags);
4295
4296                         iommu_disable_dev_iotlb(info);
4297                         iommu_detach_dev(iommu, info->bus, info->devfn);
4298                         iommu_detach_dependent_devices(iommu, dev);
4299                         free_devinfo_mem(info);
4300
4301                         spin_lock_irqsave(&device_domain_lock, flags);
4302
4303                         if (found)
4304                                 break;
4305                         else
4306                                 continue;
4307                 }
4308
4309                 /* if there is no other devices under the same iommu
4310                  * owned by this domain, clear this iommu in iommu_bmp
4311                  * update iommu count and coherency
4312                  */
4313                 if (info->iommu == iommu)
4314                         found = 1;
4315         }
4316
4317         spin_unlock_irqrestore(&device_domain_lock, flags);
4318
4319         if (found == 0) {
4320                 domain_detach_iommu(domain, iommu);
4321                 if (!domain_type_is_vm_or_si(domain))
4322                         iommu_detach_domain(domain, iommu);
4323         }
4324 }
4325
4326 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4327 {
4328         int adjust_width;
4329
4330         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4331                         DMA_32BIT_PFN);
4332         domain_reserve_special_ranges(domain);
4333
4334         /* calculate AGAW */
4335         domain->gaw = guest_width;
4336         adjust_width = guestwidth_to_adjustwidth(guest_width);
4337         domain->agaw = width_to_agaw(adjust_width);
4338
4339         domain->iommu_coherency = 0;
4340         domain->iommu_snooping = 0;
4341         domain->iommu_superpage = 0;
4342         domain->max_addr = 0;
4343
4344         /* always allocate the top pgd */
4345         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4346         if (!domain->pgd)
4347                 return -ENOMEM;
4348         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4349         return 0;
4350 }
4351
4352 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4353 {
4354         struct dmar_domain *dmar_domain;
4355         struct iommu_domain *domain;
4356
4357         if (type != IOMMU_DOMAIN_UNMANAGED)
4358                 return NULL;
4359
4360         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4361         if (!dmar_domain) {
4362                 printk(KERN_ERR
4363                         "intel_iommu_domain_init: dmar_domain == NULL\n");
4364                 return NULL;
4365         }
4366         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4367                 printk(KERN_ERR
4368                         "intel_iommu_domain_init() failed\n");
4369                 domain_exit(dmar_domain);
4370                 return NULL;
4371         }
4372         domain_update_iommu_cap(dmar_domain);
4373
4374         domain = &dmar_domain->domain;
4375         domain->geometry.aperture_start = 0;
4376         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4377         domain->geometry.force_aperture = true;
4378
4379         return domain;
4380 }
4381
4382 static void intel_iommu_domain_free(struct iommu_domain *domain)
4383 {
4384         domain_exit(to_dmar_domain(domain));
4385 }
4386
4387 static int intel_iommu_attach_device(struct iommu_domain *domain,
4388                                      struct device *dev)
4389 {
4390         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4391         struct intel_iommu *iommu;
4392         int addr_width;
4393         u8 bus, devfn;
4394
4395         if (device_is_rmrr_locked(dev)) {
4396                 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4397                 return -EPERM;
4398         }
4399
4400         /* normally dev is not mapped */
4401         if (unlikely(domain_context_mapped(dev))) {
4402                 struct dmar_domain *old_domain;
4403
4404                 old_domain = find_domain(dev);
4405                 if (old_domain) {
4406                         if (domain_type_is_vm_or_si(dmar_domain))
4407                                 domain_remove_one_dev_info(old_domain, dev);
4408                         else
4409                                 domain_remove_dev_info(old_domain);
4410
4411                         if (!domain_type_is_vm_or_si(old_domain) &&
4412                              list_empty(&old_domain->devices))
4413                                 domain_exit(old_domain);
4414                 }
4415         }
4416
4417         iommu = device_to_iommu(dev, &bus, &devfn);
4418         if (!iommu)
4419                 return -ENODEV;
4420
4421         /* check if this iommu agaw is sufficient for max mapped address */
4422         addr_width = agaw_to_width(iommu->agaw);
4423         if (addr_width > cap_mgaw(iommu->cap))
4424                 addr_width = cap_mgaw(iommu->cap);
4425
4426         if (dmar_domain->max_addr > (1LL << addr_width)) {
4427                 printk(KERN_ERR "%s: iommu width (%d) is not "
4428                        "sufficient for the mapped address (%llx)\n",
4429                        __func__, addr_width, dmar_domain->max_addr);
4430                 return -EFAULT;
4431         }
4432         dmar_domain->gaw = addr_width;
4433
4434         /*
4435          * Knock out extra levels of page tables if necessary
4436          */
4437         while (iommu->agaw < dmar_domain->agaw) {
4438                 struct dma_pte *pte;
4439
4440                 pte = dmar_domain->pgd;
4441                 if (dma_pte_present(pte)) {
4442                         dmar_domain->pgd = (struct dma_pte *)
4443                                 phys_to_virt(dma_pte_addr(pte));
4444                         free_pgtable_page(pte);
4445                 }
4446                 dmar_domain->agaw--;
4447         }
4448
4449         return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4450 }
4451
4452 static void intel_iommu_detach_device(struct iommu_domain *domain,
4453                                       struct device *dev)
4454 {
4455         domain_remove_one_dev_info(to_dmar_domain(domain), dev);
4456 }
4457
4458 static int intel_iommu_map(struct iommu_domain *domain,
4459                            unsigned long iova, phys_addr_t hpa,
4460                            size_t size, int iommu_prot)
4461 {
4462         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4463         u64 max_addr;
4464         int prot = 0;
4465         int ret;
4466
4467         if (iommu_prot & IOMMU_READ)
4468                 prot |= DMA_PTE_READ;
4469         if (iommu_prot & IOMMU_WRITE)
4470                 prot |= DMA_PTE_WRITE;
4471         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4472                 prot |= DMA_PTE_SNP;
4473
4474         max_addr = iova + size;
4475         if (dmar_domain->max_addr < max_addr) {
4476                 u64 end;
4477
4478                 /* check if minimum agaw is sufficient for mapped address */
4479                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4480                 if (end < max_addr) {
4481                         printk(KERN_ERR "%s: iommu width (%d) is not "
4482                                "sufficient for the mapped address (%llx)\n",
4483                                __func__, dmar_domain->gaw, max_addr);
4484                         return -EFAULT;
4485                 }
4486                 dmar_domain->max_addr = max_addr;
4487         }
4488         /* Round up size to next multiple of PAGE_SIZE, if it and
4489            the low bits of hpa would take us onto the next page */
4490         size = aligned_nrpages(hpa, size);
4491         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4492                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4493         return ret;
4494 }
4495
4496 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4497                                 unsigned long iova, size_t size)
4498 {
4499         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4500         struct page *freelist = NULL;
4501         struct intel_iommu *iommu;
4502         unsigned long start_pfn, last_pfn;
4503         unsigned int npages;
4504         int iommu_id, num, ndomains, level = 0;
4505
4506         /* Cope with horrid API which requires us to unmap more than the
4507            size argument if it happens to be a large-page mapping. */
4508         if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4509                 BUG();
4510
4511         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4512                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4513
4514         start_pfn = iova >> VTD_PAGE_SHIFT;
4515         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4516
4517         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4518
4519         npages = last_pfn - start_pfn + 1;
4520
4521         for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4522                iommu = g_iommus[iommu_id];
4523
4524                /*
4525                 * find bit position of dmar_domain
4526                 */
4527                ndomains = cap_ndoms(iommu->cap);
4528                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4529                        if (iommu->domains[num] == dmar_domain)
4530                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4531                                                      npages, !freelist, 0);
4532                }
4533
4534         }
4535
4536         dma_free_pagelist(freelist);
4537
4538         if (dmar_domain->max_addr == iova + size)
4539                 dmar_domain->max_addr = iova;
4540
4541         return size;
4542 }
4543
4544 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4545                                             dma_addr_t iova)
4546 {
4547         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4548         struct dma_pte *pte;
4549         int level = 0;
4550         u64 phys = 0;
4551
4552         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4553         if (pte)
4554                 phys = dma_pte_addr(pte);
4555
4556         return phys;
4557 }
4558
4559 static bool intel_iommu_capable(enum iommu_cap cap)
4560 {
4561         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4562                 return domain_update_iommu_snooping(NULL) == 1;
4563         if (cap == IOMMU_CAP_INTR_REMAP)
4564                 return irq_remapping_enabled == 1;
4565
4566         return false;
4567 }
4568
4569 static int intel_iommu_add_device(struct device *dev)
4570 {
4571         struct intel_iommu *iommu;
4572         struct iommu_group *group;
4573         u8 bus, devfn;
4574
4575         iommu = device_to_iommu(dev, &bus, &devfn);
4576         if (!iommu)
4577                 return -ENODEV;
4578
4579         iommu_device_link(iommu->iommu_dev, dev);
4580
4581         group = iommu_group_get_for_dev(dev);
4582
4583         if (IS_ERR(group))
4584                 return PTR_ERR(group);
4585
4586         iommu_group_put(group);
4587         return 0;
4588 }
4589
4590 static void intel_iommu_remove_device(struct device *dev)
4591 {
4592         struct intel_iommu *iommu;
4593         u8 bus, devfn;
4594
4595         iommu = device_to_iommu(dev, &bus, &devfn);
4596         if (!iommu)
4597                 return;
4598
4599         iommu_group_remove_device(dev);
4600
4601         iommu_device_unlink(iommu->iommu_dev, dev);
4602 }
4603
4604 static const struct iommu_ops intel_iommu_ops = {
4605         .capable        = intel_iommu_capable,
4606         .domain_alloc   = intel_iommu_domain_alloc,
4607         .domain_free    = intel_iommu_domain_free,
4608         .attach_dev     = intel_iommu_attach_device,
4609         .detach_dev     = intel_iommu_detach_device,
4610         .map            = intel_iommu_map,
4611         .unmap          = intel_iommu_unmap,
4612         .map_sg         = default_iommu_map_sg,
4613         .iova_to_phys   = intel_iommu_iova_to_phys,
4614         .add_device     = intel_iommu_add_device,
4615         .remove_device  = intel_iommu_remove_device,
4616         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4617 };
4618
4619 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4620 {
4621         /* G4x/GM45 integrated gfx dmar support is totally busted. */
4622         printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4623         dmar_map_gfx = 0;
4624 }
4625
4626 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4627 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4628 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4629 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4630 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4631 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4632 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4633
4634 static void quirk_iommu_rwbf(struct pci_dev *dev)
4635 {
4636         /*
4637          * Mobile 4 Series Chipset neglects to set RWBF capability,
4638          * but needs it. Same seems to hold for the desktop versions.
4639          */
4640         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4641         rwbf_quirk = 1;
4642 }
4643
4644 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4645 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4646 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4647 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4648 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4649 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4650 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4651
4652 #define GGC 0x52
4653 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4654 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4655 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4656 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4657 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4658 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4659 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4660 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4661
4662 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4663 {
4664         unsigned short ggc;
4665
4666         if (pci_read_config_word(dev, GGC, &ggc))
4667                 return;
4668
4669         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4670                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4671                 dmar_map_gfx = 0;
4672         } else if (dmar_map_gfx) {
4673                 /* we have to ensure the gfx device is idle before we flush */
4674                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4675                 intel_iommu_strict = 1;
4676        }
4677 }
4678 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4679 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4680 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4681 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4682
4683 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4684    ISOCH DMAR unit for the Azalia sound device, but not give it any
4685    TLB entries, which causes it to deadlock. Check for that.  We do
4686    this in a function called from init_dmars(), instead of in a PCI
4687    quirk, because we don't want to print the obnoxious "BIOS broken"
4688    message if VT-d is actually disabled.
4689 */
4690 static void __init check_tylersburg_isoch(void)
4691 {
4692         struct pci_dev *pdev;
4693         uint32_t vtisochctrl;
4694
4695         /* If there's no Azalia in the system anyway, forget it. */
4696         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4697         if (!pdev)
4698                 return;
4699         pci_dev_put(pdev);
4700
4701         /* System Management Registers. Might be hidden, in which case
4702            we can't do the sanity check. But that's OK, because the
4703            known-broken BIOSes _don't_ actually hide it, so far. */
4704         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4705         if (!pdev)
4706                 return;
4707
4708         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4709                 pci_dev_put(pdev);
4710                 return;
4711         }
4712
4713         pci_dev_put(pdev);
4714
4715         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4716         if (vtisochctrl & 1)
4717                 return;
4718
4719         /* Drop all bits other than the number of TLB entries */
4720         vtisochctrl &= 0x1c;
4721
4722         /* If we have the recommended number of TLB entries (16), fine. */
4723         if (vtisochctrl == 0x10)
4724                 return;
4725
4726         /* Zero TLB entries? You get to ride the short bus to school. */
4727         if (!vtisochctrl) {
4728                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4729                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4730                      dmi_get_system_info(DMI_BIOS_VENDOR),
4731                      dmi_get_system_info(DMI_BIOS_VERSION),
4732                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4733                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4734                 return;
4735         }
4736         
4737         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4738                vtisochctrl);
4739 }