0984fa58d87274302a8b9c57e8ee7f54c9c47b20
[firefly-linux-kernel-4.4.55.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-contiguous.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
46
47 #include "irq_remapping.h"
48
49 #define ROOT_SIZE               VTD_PAGE_SIZE
50 #define CONTEXT_SIZE            VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START      (0xfee00000)
57 #define IOAPIC_RANGE_END        (0xfeefffff)
58 #define IOVA_START_ADDR         (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
64
65 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
71                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
75 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
76 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
77
78 /* page table handling */
79 #define LEVEL_STRIDE            (9)
80 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
81
82 /*
83  * This bitmap is used to advertise the page sizes our hardware support
84  * to the IOMMU core, which will then use this information to split
85  * physically contiguous memory regions it is mapping into page sizes
86  * that we support.
87  *
88  * Traditionally the IOMMU core just handed us the mappings directly,
89  * after making sure the size is an order of a 4KiB page and that the
90  * mapping has natural alignment.
91  *
92  * To retain this behavior, we currently advertise that we support
93  * all page sizes that are an order of 4KiB.
94  *
95  * If at some point we'd like to utilize the IOMMU core's new behavior,
96  * we could change this to advertise the real page sizes we support.
97  */
98 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
99
100 static inline int agaw_to_level(int agaw)
101 {
102         return agaw + 2;
103 }
104
105 static inline int agaw_to_width(int agaw)
106 {
107         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
108 }
109
110 static inline int width_to_agaw(int width)
111 {
112         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
113 }
114
115 static inline unsigned int level_to_offset_bits(int level)
116 {
117         return (level - 1) * LEVEL_STRIDE;
118 }
119
120 static inline int pfn_level_offset(unsigned long pfn, int level)
121 {
122         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123 }
124
125 static inline unsigned long level_mask(int level)
126 {
127         return -1UL << level_to_offset_bits(level);
128 }
129
130 static inline unsigned long level_size(int level)
131 {
132         return 1UL << level_to_offset_bits(level);
133 }
134
135 static inline unsigned long align_to_level(unsigned long pfn, int level)
136 {
137         return (pfn + level_size(level) - 1) & level_mask(level);
138 }
139
140 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141 {
142         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
143 }
144
145 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146    are never going to work. */
147 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148 {
149         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150 }
151
152 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153 {
154         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155 }
156 static inline unsigned long page_to_dma_pfn(struct page *pg)
157 {
158         return mm_to_dma_pfn(page_to_pfn(pg));
159 }
160 static inline unsigned long virt_to_dma_pfn(void *p)
161 {
162         return page_to_dma_pfn(virt_to_page(p));
163 }
164
165 /* global iommu list, set NULL for ignored DMAR units */
166 static struct intel_iommu **g_iommus;
167
168 static void __init check_tylersburg_isoch(void);
169 static int rwbf_quirk;
170
171 /*
172  * set to 1 to panic kernel if can't successfully enable VT-d
173  * (used when kernel is launched w/ TXT)
174  */
175 static int force_on = 0;
176
177 /*
178  * 0: Present
179  * 1-11: Reserved
180  * 12-63: Context Ptr (12 - (haw-1))
181  * 64-127: Reserved
182  */
183 struct root_entry {
184         u64     val;
185         u64     rsvd1;
186 };
187 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 static inline bool root_present(struct root_entry *root)
189 {
190         return (root->val & 1);
191 }
192 static inline void set_root_present(struct root_entry *root)
193 {
194         root->val |= 1;
195 }
196 static inline void set_root_value(struct root_entry *root, unsigned long value)
197 {
198         root->val |= value & VTD_PAGE_MASK;
199 }
200
201 static inline struct context_entry *
202 get_context_addr_from_root(struct root_entry *root)
203 {
204         return (struct context_entry *)
205                 (root_present(root)?phys_to_virt(
206                 root->val & VTD_PAGE_MASK) :
207                 NULL);
208 }
209
210 /*
211  * low 64 bits:
212  * 0: present
213  * 1: fault processing disable
214  * 2-3: translation type
215  * 12-63: address space root
216  * high 64 bits:
217  * 0-2: address width
218  * 3-6: aval
219  * 8-23: domain id
220  */
221 struct context_entry {
222         u64 lo;
223         u64 hi;
224 };
225
226 static inline bool context_present(struct context_entry *context)
227 {
228         return (context->lo & 1);
229 }
230 static inline void context_set_present(struct context_entry *context)
231 {
232         context->lo |= 1;
233 }
234
235 static inline void context_set_fault_enable(struct context_entry *context)
236 {
237         context->lo &= (((u64)-1) << 2) | 1;
238 }
239
240 static inline void context_set_translation_type(struct context_entry *context,
241                                                 unsigned long value)
242 {
243         context->lo &= (((u64)-1) << 4) | 3;
244         context->lo |= (value & 3) << 2;
245 }
246
247 static inline void context_set_address_root(struct context_entry *context,
248                                             unsigned long value)
249 {
250         context->lo |= value & VTD_PAGE_MASK;
251 }
252
253 static inline void context_set_address_width(struct context_entry *context,
254                                              unsigned long value)
255 {
256         context->hi |= value & 7;
257 }
258
259 static inline void context_set_domain_id(struct context_entry *context,
260                                          unsigned long value)
261 {
262         context->hi |= (value & ((1 << 16) - 1)) << 8;
263 }
264
265 static inline void context_clear_entry(struct context_entry *context)
266 {
267         context->lo = 0;
268         context->hi = 0;
269 }
270
271 /*
272  * 0: readable
273  * 1: writable
274  * 2-6: reserved
275  * 7: super page
276  * 8-10: available
277  * 11: snoop behavior
278  * 12-63: Host physcial address
279  */
280 struct dma_pte {
281         u64 val;
282 };
283
284 static inline void dma_clear_pte(struct dma_pte *pte)
285 {
286         pte->val = 0;
287 }
288
289 static inline u64 dma_pte_addr(struct dma_pte *pte)
290 {
291 #ifdef CONFIG_64BIT
292         return pte->val & VTD_PAGE_MASK;
293 #else
294         /* Must have a full atomic 64-bit read */
295         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
296 #endif
297 }
298
299 static inline bool dma_pte_present(struct dma_pte *pte)
300 {
301         return (pte->val & 3) != 0;
302 }
303
304 static inline bool dma_pte_superpage(struct dma_pte *pte)
305 {
306         return (pte->val & DMA_PTE_LARGE_PAGE);
307 }
308
309 static inline int first_pte_in_page(struct dma_pte *pte)
310 {
311         return !((unsigned long)pte & ~VTD_PAGE_MASK);
312 }
313
314 /*
315  * This domain is a statically identity mapping domain.
316  *      1. This domain creats a static 1:1 mapping to all usable memory.
317  *      2. It maps to each iommu if successful.
318  *      3. Each iommu mapps to this domain if successful.
319  */
320 static struct dmar_domain *si_domain;
321 static int hw_pass_through = 1;
322
323 /* domain represents a virtual machine, more than one devices
324  * across iommus may be owned in one domain, e.g. kvm guest.
325  */
326 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
327
328 /* si_domain contains mulitple devices */
329 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
330
331 /* define the limit of IOMMUs supported in each domain */
332 #ifdef  CONFIG_X86
333 # define        IOMMU_UNITS_SUPPORTED   MAX_IO_APICS
334 #else
335 # define        IOMMU_UNITS_SUPPORTED   64
336 #endif
337
338 struct dmar_domain {
339         int     id;                     /* domain id */
340         int     nid;                    /* node id */
341         DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
342                                         /* bitmap of iommus this domain uses*/
343
344         struct list_head devices;       /* all devices' list */
345         struct iova_domain iovad;       /* iova's that belong to this domain */
346
347         struct dma_pte  *pgd;           /* virtual address */
348         int             gaw;            /* max guest address width */
349
350         /* adjusted guest address width, 0 is level 2 30-bit */
351         int             agaw;
352
353         int             flags;          /* flags to find out type of domain */
354
355         int             iommu_coherency;/* indicate coherency of iommu access */
356         int             iommu_snooping; /* indicate snooping control feature*/
357         int             iommu_count;    /* reference count of iommu */
358         int             iommu_superpage;/* Level of superpages supported:
359                                            0 == 4KiB (no superpages), 1 == 2MiB,
360                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
361         spinlock_t      iommu_lock;     /* protect iommu set in domain */
362         u64             max_addr;       /* maximum mapped address */
363 };
364
365 /* PCI domain-device relationship */
366 struct device_domain_info {
367         struct list_head link;  /* link to domain siblings */
368         struct list_head global; /* link to global list */
369         u8 bus;                 /* PCI bus number */
370         u8 devfn;               /* PCI devfn number */
371         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
372         struct intel_iommu *iommu; /* IOMMU used by this device */
373         struct dmar_domain *domain; /* pointer to domain */
374 };
375
376 struct dmar_rmrr_unit {
377         struct list_head list;          /* list of rmrr units   */
378         struct acpi_dmar_header *hdr;   /* ACPI header          */
379         u64     base_address;           /* reserved base address*/
380         u64     end_address;            /* reserved end address */
381         struct dmar_dev_scope *devices; /* target devices */
382         int     devices_cnt;            /* target device count */
383 };
384
385 struct dmar_atsr_unit {
386         struct list_head list;          /* list of ATSR units */
387         struct acpi_dmar_header *hdr;   /* ACPI header */
388         struct dmar_dev_scope *devices; /* target devices */
389         int devices_cnt;                /* target device count */
390         u8 include_all:1;               /* include all ports */
391 };
392
393 static LIST_HEAD(dmar_atsr_units);
394 static LIST_HEAD(dmar_rmrr_units);
395
396 #define for_each_rmrr_units(rmrr) \
397         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
398
399 static void flush_unmaps_timeout(unsigned long data);
400
401 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
402
403 #define HIGH_WATER_MARK 250
404 struct deferred_flush_tables {
405         int next;
406         struct iova *iova[HIGH_WATER_MARK];
407         struct dmar_domain *domain[HIGH_WATER_MARK];
408         struct page *freelist[HIGH_WATER_MARK];
409 };
410
411 static struct deferred_flush_tables *deferred_flush;
412
413 /* bitmap for indexing intel_iommus */
414 static int g_num_of_iommus;
415
416 static DEFINE_SPINLOCK(async_umap_flush_lock);
417 static LIST_HEAD(unmaps_to_do);
418
419 static int timer_on;
420 static long list_size;
421
422 static void domain_exit(struct dmar_domain *domain);
423 static void domain_remove_dev_info(struct dmar_domain *domain);
424 static void domain_remove_one_dev_info(struct dmar_domain *domain,
425                                        struct device *dev);
426 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
427                                            struct device *dev);
428 static int domain_detach_iommu(struct dmar_domain *domain,
429                                struct intel_iommu *iommu);
430
431 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
432 int dmar_disabled = 0;
433 #else
434 int dmar_disabled = 1;
435 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
436
437 int intel_iommu_enabled = 0;
438 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
439
440 static int dmar_map_gfx = 1;
441 static int dmar_forcedac;
442 static int intel_iommu_strict;
443 static int intel_iommu_superpage = 1;
444
445 int intel_iommu_gfx_mapped;
446 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
447
448 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
449 static DEFINE_SPINLOCK(device_domain_lock);
450 static LIST_HEAD(device_domain_list);
451
452 static const struct iommu_ops intel_iommu_ops;
453
454 static int __init intel_iommu_setup(char *str)
455 {
456         if (!str)
457                 return -EINVAL;
458         while (*str) {
459                 if (!strncmp(str, "on", 2)) {
460                         dmar_disabled = 0;
461                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
462                 } else if (!strncmp(str, "off", 3)) {
463                         dmar_disabled = 1;
464                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
465                 } else if (!strncmp(str, "igfx_off", 8)) {
466                         dmar_map_gfx = 0;
467                         printk(KERN_INFO
468                                 "Intel-IOMMU: disable GFX device mapping\n");
469                 } else if (!strncmp(str, "forcedac", 8)) {
470                         printk(KERN_INFO
471                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
472                         dmar_forcedac = 1;
473                 } else if (!strncmp(str, "strict", 6)) {
474                         printk(KERN_INFO
475                                 "Intel-IOMMU: disable batched IOTLB flush\n");
476                         intel_iommu_strict = 1;
477                 } else if (!strncmp(str, "sp_off", 6)) {
478                         printk(KERN_INFO
479                                 "Intel-IOMMU: disable supported super page\n");
480                         intel_iommu_superpage = 0;
481                 }
482
483                 str += strcspn(str, ",");
484                 while (*str == ',')
485                         str++;
486         }
487         return 0;
488 }
489 __setup("intel_iommu=", intel_iommu_setup);
490
491 static struct kmem_cache *iommu_domain_cache;
492 static struct kmem_cache *iommu_devinfo_cache;
493 static struct kmem_cache *iommu_iova_cache;
494
495 static inline void *alloc_pgtable_page(int node)
496 {
497         struct page *page;
498         void *vaddr = NULL;
499
500         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
501         if (page)
502                 vaddr = page_address(page);
503         return vaddr;
504 }
505
506 static inline void free_pgtable_page(void *vaddr)
507 {
508         free_page((unsigned long)vaddr);
509 }
510
511 static inline void *alloc_domain_mem(void)
512 {
513         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
514 }
515
516 static void free_domain_mem(void *vaddr)
517 {
518         kmem_cache_free(iommu_domain_cache, vaddr);
519 }
520
521 static inline void * alloc_devinfo_mem(void)
522 {
523         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
524 }
525
526 static inline void free_devinfo_mem(void *vaddr)
527 {
528         kmem_cache_free(iommu_devinfo_cache, vaddr);
529 }
530
531 struct iova *alloc_iova_mem(void)
532 {
533         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
534 }
535
536 void free_iova_mem(struct iova *iova)
537 {
538         kmem_cache_free(iommu_iova_cache, iova);
539 }
540
541 static inline int domain_type_is_vm(struct dmar_domain *domain)
542 {
543         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
544 }
545
546 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
547 {
548         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
549                                 DOMAIN_FLAG_STATIC_IDENTITY);
550 }
551
552 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
553 {
554         unsigned long sagaw;
555         int agaw = -1;
556
557         sagaw = cap_sagaw(iommu->cap);
558         for (agaw = width_to_agaw(max_gaw);
559              agaw >= 0; agaw--) {
560                 if (test_bit(agaw, &sagaw))
561                         break;
562         }
563
564         return agaw;
565 }
566
567 /*
568  * Calculate max SAGAW for each iommu.
569  */
570 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
571 {
572         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
573 }
574
575 /*
576  * calculate agaw for each iommu.
577  * "SAGAW" may be different across iommus, use a default agaw, and
578  * get a supported less agaw for iommus that don't support the default agaw.
579  */
580 int iommu_calculate_agaw(struct intel_iommu *iommu)
581 {
582         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
583 }
584
585 /* This functionin only returns single iommu in a domain */
586 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
587 {
588         int iommu_id;
589
590         /* si_domain and vm domain should not get here. */
591         BUG_ON(domain_type_is_vm_or_si(domain));
592         iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
593         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
594                 return NULL;
595
596         return g_iommus[iommu_id];
597 }
598
599 static void domain_update_iommu_coherency(struct dmar_domain *domain)
600 {
601         struct dmar_drhd_unit *drhd;
602         struct intel_iommu *iommu;
603         int i, found = 0;
604
605         domain->iommu_coherency = 1;
606
607         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
608                 found = 1;
609                 if (!ecap_coherent(g_iommus[i]->ecap)) {
610                         domain->iommu_coherency = 0;
611                         break;
612                 }
613         }
614         if (found)
615                 return;
616
617         /* No hardware attached; use lowest common denominator */
618         rcu_read_lock();
619         for_each_active_iommu(iommu, drhd) {
620                 if (!ecap_coherent(iommu->ecap)) {
621                         domain->iommu_coherency = 0;
622                         break;
623                 }
624         }
625         rcu_read_unlock();
626 }
627
628 static void domain_update_iommu_snooping(struct dmar_domain *domain)
629 {
630         int i;
631
632         domain->iommu_snooping = 1;
633
634         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
635                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
636                         domain->iommu_snooping = 0;
637                         break;
638                 }
639         }
640 }
641
642 static void domain_update_iommu_superpage(struct dmar_domain *domain)
643 {
644         struct dmar_drhd_unit *drhd;
645         struct intel_iommu *iommu = NULL;
646         int mask = 0xf;
647
648         if (!intel_iommu_superpage) {
649                 domain->iommu_superpage = 0;
650                 return;
651         }
652
653         /* set iommu_superpage to the smallest common denominator */
654         rcu_read_lock();
655         for_each_active_iommu(iommu, drhd) {
656                 mask &= cap_super_page_val(iommu->cap);
657                 if (!mask) {
658                         break;
659                 }
660         }
661         rcu_read_unlock();
662
663         domain->iommu_superpage = fls(mask);
664 }
665
666 /* Some capabilities may be different across iommus */
667 static void domain_update_iommu_cap(struct dmar_domain *domain)
668 {
669         domain_update_iommu_coherency(domain);
670         domain_update_iommu_snooping(domain);
671         domain_update_iommu_superpage(domain);
672 }
673
674 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
675 {
676         struct dmar_drhd_unit *drhd = NULL;
677         struct intel_iommu *iommu;
678         struct device *tmp;
679         struct pci_dev *ptmp, *pdev = NULL;
680         u16 segment = 0;
681         int i;
682
683         if (dev_is_pci(dev)) {
684                 pdev = to_pci_dev(dev);
685                 segment = pci_domain_nr(pdev->bus);
686         } else if (ACPI_COMPANION(dev))
687                 dev = &ACPI_COMPANION(dev)->dev;
688
689         rcu_read_lock();
690         for_each_active_iommu(iommu, drhd) {
691                 if (pdev && segment != drhd->segment)
692                         continue;
693
694                 for_each_active_dev_scope(drhd->devices,
695                                           drhd->devices_cnt, i, tmp) {
696                         if (tmp == dev) {
697                                 *bus = drhd->devices[i].bus;
698                                 *devfn = drhd->devices[i].devfn;
699                                 goto out;
700                         }
701
702                         if (!pdev || !dev_is_pci(tmp))
703                                 continue;
704
705                         ptmp = to_pci_dev(tmp);
706                         if (ptmp->subordinate &&
707                             ptmp->subordinate->number <= pdev->bus->number &&
708                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
709                                 goto got_pdev;
710                 }
711
712                 if (pdev && drhd->include_all) {
713                 got_pdev:
714                         *bus = pdev->bus->number;
715                         *devfn = pdev->devfn;
716                         goto out;
717                 }
718         }
719         iommu = NULL;
720  out:
721         rcu_read_unlock();
722
723         return iommu;
724 }
725
726 static void domain_flush_cache(struct dmar_domain *domain,
727                                void *addr, int size)
728 {
729         if (!domain->iommu_coherency)
730                 clflush_cache_range(addr, size);
731 }
732
733 /* Gets context entry for a given bus and devfn */
734 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
735                 u8 bus, u8 devfn)
736 {
737         struct root_entry *root;
738         struct context_entry *context;
739         unsigned long phy_addr;
740         unsigned long flags;
741
742         spin_lock_irqsave(&iommu->lock, flags);
743         root = &iommu->root_entry[bus];
744         context = get_context_addr_from_root(root);
745         if (!context) {
746                 context = (struct context_entry *)
747                                 alloc_pgtable_page(iommu->node);
748                 if (!context) {
749                         spin_unlock_irqrestore(&iommu->lock, flags);
750                         return NULL;
751                 }
752                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
753                 phy_addr = virt_to_phys((void *)context);
754                 set_root_value(root, phy_addr);
755                 set_root_present(root);
756                 __iommu_flush_cache(iommu, root, sizeof(*root));
757         }
758         spin_unlock_irqrestore(&iommu->lock, flags);
759         return &context[devfn];
760 }
761
762 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
763 {
764         struct root_entry *root;
765         struct context_entry *context;
766         int ret;
767         unsigned long flags;
768
769         spin_lock_irqsave(&iommu->lock, flags);
770         root = &iommu->root_entry[bus];
771         context = get_context_addr_from_root(root);
772         if (!context) {
773                 ret = 0;
774                 goto out;
775         }
776         ret = context_present(&context[devfn]);
777 out:
778         spin_unlock_irqrestore(&iommu->lock, flags);
779         return ret;
780 }
781
782 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
783 {
784         struct root_entry *root;
785         struct context_entry *context;
786         unsigned long flags;
787
788         spin_lock_irqsave(&iommu->lock, flags);
789         root = &iommu->root_entry[bus];
790         context = get_context_addr_from_root(root);
791         if (context) {
792                 context_clear_entry(&context[devfn]);
793                 __iommu_flush_cache(iommu, &context[devfn], \
794                         sizeof(*context));
795         }
796         spin_unlock_irqrestore(&iommu->lock, flags);
797 }
798
799 static void free_context_table(struct intel_iommu *iommu)
800 {
801         struct root_entry *root;
802         int i;
803         unsigned long flags;
804         struct context_entry *context;
805
806         spin_lock_irqsave(&iommu->lock, flags);
807         if (!iommu->root_entry) {
808                 goto out;
809         }
810         for (i = 0; i < ROOT_ENTRY_NR; i++) {
811                 root = &iommu->root_entry[i];
812                 context = get_context_addr_from_root(root);
813                 if (context)
814                         free_pgtable_page(context);
815         }
816         free_pgtable_page(iommu->root_entry);
817         iommu->root_entry = NULL;
818 out:
819         spin_unlock_irqrestore(&iommu->lock, flags);
820 }
821
822 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
823                                       unsigned long pfn, int *target_level)
824 {
825         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
826         struct dma_pte *parent, *pte = NULL;
827         int level = agaw_to_level(domain->agaw);
828         int offset;
829
830         BUG_ON(!domain->pgd);
831
832         if (addr_width < BITS_PER_LONG && pfn >> addr_width)
833                 /* Address beyond IOMMU's addressing capabilities. */
834                 return NULL;
835
836         parent = domain->pgd;
837
838         while (1) {
839                 void *tmp_page;
840
841                 offset = pfn_level_offset(pfn, level);
842                 pte = &parent[offset];
843                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
844                         break;
845                 if (level == *target_level)
846                         break;
847
848                 if (!dma_pte_present(pte)) {
849                         uint64_t pteval;
850
851                         tmp_page = alloc_pgtable_page(domain->nid);
852
853                         if (!tmp_page)
854                                 return NULL;
855
856                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
857                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
858                         if (cmpxchg64(&pte->val, 0ULL, pteval))
859                                 /* Someone else set it while we were thinking; use theirs. */
860                                 free_pgtable_page(tmp_page);
861                         else
862                                 domain_flush_cache(domain, pte, sizeof(*pte));
863                 }
864                 if (level == 1)
865                         break;
866
867                 parent = phys_to_virt(dma_pte_addr(pte));
868                 level--;
869         }
870
871         if (!*target_level)
872                 *target_level = level;
873
874         return pte;
875 }
876
877
878 /* return address's pte at specific level */
879 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
880                                          unsigned long pfn,
881                                          int level, int *large_page)
882 {
883         struct dma_pte *parent, *pte = NULL;
884         int total = agaw_to_level(domain->agaw);
885         int offset;
886
887         parent = domain->pgd;
888         while (level <= total) {
889                 offset = pfn_level_offset(pfn, total);
890                 pte = &parent[offset];
891                 if (level == total)
892                         return pte;
893
894                 if (!dma_pte_present(pte)) {
895                         *large_page = total;
896                         break;
897                 }
898
899                 if (dma_pte_superpage(pte)) {
900                         *large_page = total;
901                         return pte;
902                 }
903
904                 parent = phys_to_virt(dma_pte_addr(pte));
905                 total--;
906         }
907         return NULL;
908 }
909
910 /* clear last level pte, a tlb flush should be followed */
911 static void dma_pte_clear_range(struct dmar_domain *domain,
912                                 unsigned long start_pfn,
913                                 unsigned long last_pfn)
914 {
915         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
916         unsigned int large_page = 1;
917         struct dma_pte *first_pte, *pte;
918
919         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
920         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
921         BUG_ON(start_pfn > last_pfn);
922
923         /* we don't need lock here; nobody else touches the iova range */
924         do {
925                 large_page = 1;
926                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
927                 if (!pte) {
928                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
929                         continue;
930                 }
931                 do {
932                         dma_clear_pte(pte);
933                         start_pfn += lvl_to_nr_pages(large_page);
934                         pte++;
935                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
936
937                 domain_flush_cache(domain, first_pte,
938                                    (void *)pte - (void *)first_pte);
939
940         } while (start_pfn && start_pfn <= last_pfn);
941 }
942
943 static void dma_pte_free_level(struct dmar_domain *domain, int level,
944                                struct dma_pte *pte, unsigned long pfn,
945                                unsigned long start_pfn, unsigned long last_pfn)
946 {
947         pfn = max(start_pfn, pfn);
948         pte = &pte[pfn_level_offset(pfn, level)];
949
950         do {
951                 unsigned long level_pfn;
952                 struct dma_pte *level_pte;
953
954                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
955                         goto next;
956
957                 level_pfn = pfn & level_mask(level - 1);
958                 level_pte = phys_to_virt(dma_pte_addr(pte));
959
960                 if (level > 2)
961                         dma_pte_free_level(domain, level - 1, level_pte,
962                                            level_pfn, start_pfn, last_pfn);
963
964                 /* If range covers entire pagetable, free it */
965                 if (!(start_pfn > level_pfn ||
966                       last_pfn < level_pfn + level_size(level) - 1)) {
967                         dma_clear_pte(pte);
968                         domain_flush_cache(domain, pte, sizeof(*pte));
969                         free_pgtable_page(level_pte);
970                 }
971 next:
972                 pfn += level_size(level);
973         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
974 }
975
976 /* free page table pages. last level pte should already be cleared */
977 static void dma_pte_free_pagetable(struct dmar_domain *domain,
978                                    unsigned long start_pfn,
979                                    unsigned long last_pfn)
980 {
981         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
982
983         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
984         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
985         BUG_ON(start_pfn > last_pfn);
986
987         /* We don't need lock here; nobody else touches the iova range */
988         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
989                            domain->pgd, 0, start_pfn, last_pfn);
990
991         /* free pgd */
992         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
993                 free_pgtable_page(domain->pgd);
994                 domain->pgd = NULL;
995         }
996 }
997
998 /* When a page at a given level is being unlinked from its parent, we don't
999    need to *modify* it at all. All we need to do is make a list of all the
1000    pages which can be freed just as soon as we've flushed the IOTLB and we
1001    know the hardware page-walk will no longer touch them.
1002    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1003    be freed. */
1004 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1005                                             int level, struct dma_pte *pte,
1006                                             struct page *freelist)
1007 {
1008         struct page *pg;
1009
1010         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1011         pg->freelist = freelist;
1012         freelist = pg;
1013
1014         if (level == 1)
1015                 return freelist;
1016
1017         pte = page_address(pg);
1018         do {
1019                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1020                         freelist = dma_pte_list_pagetables(domain, level - 1,
1021                                                            pte, freelist);
1022                 pte++;
1023         } while (!first_pte_in_page(pte));
1024
1025         return freelist;
1026 }
1027
1028 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1029                                         struct dma_pte *pte, unsigned long pfn,
1030                                         unsigned long start_pfn,
1031                                         unsigned long last_pfn,
1032                                         struct page *freelist)
1033 {
1034         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1035
1036         pfn = max(start_pfn, pfn);
1037         pte = &pte[pfn_level_offset(pfn, level)];
1038
1039         do {
1040                 unsigned long level_pfn;
1041
1042                 if (!dma_pte_present(pte))
1043                         goto next;
1044
1045                 level_pfn = pfn & level_mask(level);
1046
1047                 /* If range covers entire pagetable, free it */
1048                 if (start_pfn <= level_pfn &&
1049                     last_pfn >= level_pfn + level_size(level) - 1) {
1050                         /* These suborbinate page tables are going away entirely. Don't
1051                            bother to clear them; we're just going to *free* them. */
1052                         if (level > 1 && !dma_pte_superpage(pte))
1053                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1054
1055                         dma_clear_pte(pte);
1056                         if (!first_pte)
1057                                 first_pte = pte;
1058                         last_pte = pte;
1059                 } else if (level > 1) {
1060                         /* Recurse down into a level that isn't *entirely* obsolete */
1061                         freelist = dma_pte_clear_level(domain, level - 1,
1062                                                        phys_to_virt(dma_pte_addr(pte)),
1063                                                        level_pfn, start_pfn, last_pfn,
1064                                                        freelist);
1065                 }
1066 next:
1067                 pfn += level_size(level);
1068         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1069
1070         if (first_pte)
1071                 domain_flush_cache(domain, first_pte,
1072                                    (void *)++last_pte - (void *)first_pte);
1073
1074         return freelist;
1075 }
1076
1077 /* We can't just free the pages because the IOMMU may still be walking
1078    the page tables, and may have cached the intermediate levels. The
1079    pages can only be freed after the IOTLB flush has been done. */
1080 struct page *domain_unmap(struct dmar_domain *domain,
1081                           unsigned long start_pfn,
1082                           unsigned long last_pfn)
1083 {
1084         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1085         struct page *freelist = NULL;
1086
1087         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1088         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1089         BUG_ON(start_pfn > last_pfn);
1090
1091         /* we don't need lock here; nobody else touches the iova range */
1092         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1093                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1094
1095         /* free pgd */
1096         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1097                 struct page *pgd_page = virt_to_page(domain->pgd);
1098                 pgd_page->freelist = freelist;
1099                 freelist = pgd_page;
1100
1101                 domain->pgd = NULL;
1102         }
1103
1104         return freelist;
1105 }
1106
1107 void dma_free_pagelist(struct page *freelist)
1108 {
1109         struct page *pg;
1110
1111         while ((pg = freelist)) {
1112                 freelist = pg->freelist;
1113                 free_pgtable_page(page_address(pg));
1114         }
1115 }
1116
1117 /* iommu handling */
1118 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1119 {
1120         struct root_entry *root;
1121         unsigned long flags;
1122
1123         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1124         if (!root)
1125                 return -ENOMEM;
1126
1127         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1128
1129         spin_lock_irqsave(&iommu->lock, flags);
1130         iommu->root_entry = root;
1131         spin_unlock_irqrestore(&iommu->lock, flags);
1132
1133         return 0;
1134 }
1135
1136 static void iommu_set_root_entry(struct intel_iommu *iommu)
1137 {
1138         void *addr;
1139         u32 sts;
1140         unsigned long flag;
1141
1142         addr = iommu->root_entry;
1143
1144         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1145         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1146
1147         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1148
1149         /* Make sure hardware complete it */
1150         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1151                       readl, (sts & DMA_GSTS_RTPS), sts);
1152
1153         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1154 }
1155
1156 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1157 {
1158         u32 val;
1159         unsigned long flag;
1160
1161         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1162                 return;
1163
1164         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1165         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1166
1167         /* Make sure hardware complete it */
1168         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1169                       readl, (!(val & DMA_GSTS_WBFS)), val);
1170
1171         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1172 }
1173
1174 /* return value determine if we need a write buffer flush */
1175 static void __iommu_flush_context(struct intel_iommu *iommu,
1176                                   u16 did, u16 source_id, u8 function_mask,
1177                                   u64 type)
1178 {
1179         u64 val = 0;
1180         unsigned long flag;
1181
1182         switch (type) {
1183         case DMA_CCMD_GLOBAL_INVL:
1184                 val = DMA_CCMD_GLOBAL_INVL;
1185                 break;
1186         case DMA_CCMD_DOMAIN_INVL:
1187                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1188                 break;
1189         case DMA_CCMD_DEVICE_INVL:
1190                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1191                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1192                 break;
1193         default:
1194                 BUG();
1195         }
1196         val |= DMA_CCMD_ICC;
1197
1198         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1199         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1200
1201         /* Make sure hardware complete it */
1202         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1203                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1204
1205         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1206 }
1207
1208 /* return value determine if we need a write buffer flush */
1209 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1210                                 u64 addr, unsigned int size_order, u64 type)
1211 {
1212         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1213         u64 val = 0, val_iva = 0;
1214         unsigned long flag;
1215
1216         switch (type) {
1217         case DMA_TLB_GLOBAL_FLUSH:
1218                 /* global flush doesn't need set IVA_REG */
1219                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1220                 break;
1221         case DMA_TLB_DSI_FLUSH:
1222                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1223                 break;
1224         case DMA_TLB_PSI_FLUSH:
1225                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1226                 /* IH bit is passed in as part of address */
1227                 val_iva = size_order | addr;
1228                 break;
1229         default:
1230                 BUG();
1231         }
1232         /* Note: set drain read/write */
1233 #if 0
1234         /*
1235          * This is probably to be super secure.. Looks like we can
1236          * ignore it without any impact.
1237          */
1238         if (cap_read_drain(iommu->cap))
1239                 val |= DMA_TLB_READ_DRAIN;
1240 #endif
1241         if (cap_write_drain(iommu->cap))
1242                 val |= DMA_TLB_WRITE_DRAIN;
1243
1244         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1245         /* Note: Only uses first TLB reg currently */
1246         if (val_iva)
1247                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1248         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1249
1250         /* Make sure hardware complete it */
1251         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1252                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1253
1254         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1255
1256         /* check IOTLB invalidation granularity */
1257         if (DMA_TLB_IAIG(val) == 0)
1258                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1259         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1260                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1261                         (unsigned long long)DMA_TLB_IIRG(type),
1262                         (unsigned long long)DMA_TLB_IAIG(val));
1263 }
1264
1265 static struct device_domain_info *
1266 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1267                          u8 bus, u8 devfn)
1268 {
1269         int found = 0;
1270         unsigned long flags;
1271         struct device_domain_info *info;
1272         struct pci_dev *pdev;
1273
1274         if (!ecap_dev_iotlb_support(iommu->ecap))
1275                 return NULL;
1276
1277         if (!iommu->qi)
1278                 return NULL;
1279
1280         spin_lock_irqsave(&device_domain_lock, flags);
1281         list_for_each_entry(info, &domain->devices, link)
1282                 if (info->iommu == iommu && info->bus == bus &&
1283                     info->devfn == devfn) {
1284                         found = 1;
1285                         break;
1286                 }
1287         spin_unlock_irqrestore(&device_domain_lock, flags);
1288
1289         if (!found || !info->dev || !dev_is_pci(info->dev))
1290                 return NULL;
1291
1292         pdev = to_pci_dev(info->dev);
1293
1294         if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1295                 return NULL;
1296
1297         if (!dmar_find_matched_atsr_unit(pdev))
1298                 return NULL;
1299
1300         return info;
1301 }
1302
1303 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1304 {
1305         if (!info || !dev_is_pci(info->dev))
1306                 return;
1307
1308         pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1309 }
1310
1311 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1312 {
1313         if (!info->dev || !dev_is_pci(info->dev) ||
1314             !pci_ats_enabled(to_pci_dev(info->dev)))
1315                 return;
1316
1317         pci_disable_ats(to_pci_dev(info->dev));
1318 }
1319
1320 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1321                                   u64 addr, unsigned mask)
1322 {
1323         u16 sid, qdep;
1324         unsigned long flags;
1325         struct device_domain_info *info;
1326
1327         spin_lock_irqsave(&device_domain_lock, flags);
1328         list_for_each_entry(info, &domain->devices, link) {
1329                 struct pci_dev *pdev;
1330                 if (!info->dev || !dev_is_pci(info->dev))
1331                         continue;
1332
1333                 pdev = to_pci_dev(info->dev);
1334                 if (!pci_ats_enabled(pdev))
1335                         continue;
1336
1337                 sid = info->bus << 8 | info->devfn;
1338                 qdep = pci_ats_queue_depth(pdev);
1339                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1340         }
1341         spin_unlock_irqrestore(&device_domain_lock, flags);
1342 }
1343
1344 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1345                                   unsigned long pfn, unsigned int pages, int ih, int map)
1346 {
1347         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1348         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1349
1350         BUG_ON(pages == 0);
1351
1352         if (ih)
1353                 ih = 1 << 6;
1354         /*
1355          * Fallback to domain selective flush if no PSI support or the size is
1356          * too big.
1357          * PSI requires page size to be 2 ^ x, and the base address is naturally
1358          * aligned to the size
1359          */
1360         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1361                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1362                                                 DMA_TLB_DSI_FLUSH);
1363         else
1364                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1365                                                 DMA_TLB_PSI_FLUSH);
1366
1367         /*
1368          * In caching mode, changes of pages from non-present to present require
1369          * flush. However, device IOTLB doesn't need to be flushed in this case.
1370          */
1371         if (!cap_caching_mode(iommu->cap) || !map)
1372                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1373 }
1374
1375 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1376 {
1377         u32 pmen;
1378         unsigned long flags;
1379
1380         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1381         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1382         pmen &= ~DMA_PMEN_EPM;
1383         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1384
1385         /* wait for the protected region status bit to clear */
1386         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1387                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1388
1389         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1390 }
1391
1392 static int iommu_enable_translation(struct intel_iommu *iommu)
1393 {
1394         u32 sts;
1395         unsigned long flags;
1396
1397         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1398         iommu->gcmd |= DMA_GCMD_TE;
1399         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1400
1401         /* Make sure hardware complete it */
1402         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1403                       readl, (sts & DMA_GSTS_TES), sts);
1404
1405         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1406         return 0;
1407 }
1408
1409 static int iommu_disable_translation(struct intel_iommu *iommu)
1410 {
1411         u32 sts;
1412         unsigned long flag;
1413
1414         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1415         iommu->gcmd &= ~DMA_GCMD_TE;
1416         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1417
1418         /* Make sure hardware complete it */
1419         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1420                       readl, (!(sts & DMA_GSTS_TES)), sts);
1421
1422         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1423         return 0;
1424 }
1425
1426
1427 static int iommu_init_domains(struct intel_iommu *iommu)
1428 {
1429         unsigned long ndomains;
1430         unsigned long nlongs;
1431
1432         ndomains = cap_ndoms(iommu->cap);
1433         pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1434                  iommu->seq_id, ndomains);
1435         nlongs = BITS_TO_LONGS(ndomains);
1436
1437         spin_lock_init(&iommu->lock);
1438
1439         /* TBD: there might be 64K domains,
1440          * consider other allocation for future chip
1441          */
1442         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1443         if (!iommu->domain_ids) {
1444                 pr_err("IOMMU%d: allocating domain id array failed\n",
1445                        iommu->seq_id);
1446                 return -ENOMEM;
1447         }
1448         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1449                         GFP_KERNEL);
1450         if (!iommu->domains) {
1451                 pr_err("IOMMU%d: allocating domain array failed\n",
1452                        iommu->seq_id);
1453                 kfree(iommu->domain_ids);
1454                 iommu->domain_ids = NULL;
1455                 return -ENOMEM;
1456         }
1457
1458         /*
1459          * if Caching mode is set, then invalid translations are tagged
1460          * with domainid 0. Hence we need to pre-allocate it.
1461          */
1462         if (cap_caching_mode(iommu->cap))
1463                 set_bit(0, iommu->domain_ids);
1464         return 0;
1465 }
1466
1467 static void free_dmar_iommu(struct intel_iommu *iommu)
1468 {
1469         struct dmar_domain *domain;
1470         int i;
1471
1472         if ((iommu->domains) && (iommu->domain_ids)) {
1473                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1474                         /*
1475                          * Domain id 0 is reserved for invalid translation
1476                          * if hardware supports caching mode.
1477                          */
1478                         if (cap_caching_mode(iommu->cap) && i == 0)
1479                                 continue;
1480
1481                         domain = iommu->domains[i];
1482                         clear_bit(i, iommu->domain_ids);
1483                         if (domain_detach_iommu(domain, iommu) == 0)
1484                                 domain_exit(domain);
1485                 }
1486         }
1487
1488         if (iommu->gcmd & DMA_GCMD_TE)
1489                 iommu_disable_translation(iommu);
1490
1491         kfree(iommu->domains);
1492         kfree(iommu->domain_ids);
1493         iommu->domains = NULL;
1494         iommu->domain_ids = NULL;
1495
1496         g_iommus[iommu->seq_id] = NULL;
1497
1498         /* free context mapping */
1499         free_context_table(iommu);
1500 }
1501
1502 static struct dmar_domain *alloc_domain(int flags)
1503 {
1504         /* domain id for virtual machine, it won't be set in context */
1505         static atomic_t vm_domid = ATOMIC_INIT(0);
1506         struct dmar_domain *domain;
1507
1508         domain = alloc_domain_mem();
1509         if (!domain)
1510                 return NULL;
1511
1512         memset(domain, 0, sizeof(*domain));
1513         domain->nid = -1;
1514         domain->flags = flags;
1515         spin_lock_init(&domain->iommu_lock);
1516         INIT_LIST_HEAD(&domain->devices);
1517         if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1518                 domain->id = atomic_inc_return(&vm_domid);
1519
1520         return domain;
1521 }
1522
1523 static int __iommu_attach_domain(struct dmar_domain *domain,
1524                                  struct intel_iommu *iommu)
1525 {
1526         int num;
1527         unsigned long ndomains;
1528
1529         ndomains = cap_ndoms(iommu->cap);
1530         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1531         if (num < ndomains) {
1532                 set_bit(num, iommu->domain_ids);
1533                 iommu->domains[num] = domain;
1534         } else {
1535                 num = -ENOSPC;
1536         }
1537
1538         return num;
1539 }
1540
1541 static int iommu_attach_domain(struct dmar_domain *domain,
1542                                struct intel_iommu *iommu)
1543 {
1544         int num;
1545         unsigned long flags;
1546
1547         spin_lock_irqsave(&iommu->lock, flags);
1548         num = __iommu_attach_domain(domain, iommu);
1549         spin_unlock_irqrestore(&iommu->lock, flags);
1550         if (num < 0)
1551                 pr_err("IOMMU: no free domain ids\n");
1552
1553         return num;
1554 }
1555
1556 static int iommu_attach_vm_domain(struct dmar_domain *domain,
1557                                   struct intel_iommu *iommu)
1558 {
1559         int num;
1560         unsigned long ndomains;
1561
1562         ndomains = cap_ndoms(iommu->cap);
1563         for_each_set_bit(num, iommu->domain_ids, ndomains)
1564                 if (iommu->domains[num] == domain)
1565                         return num;
1566
1567         return __iommu_attach_domain(domain, iommu);
1568 }
1569
1570 static void iommu_detach_domain(struct dmar_domain *domain,
1571                                 struct intel_iommu *iommu)
1572 {
1573         unsigned long flags;
1574         int num, ndomains;
1575
1576         spin_lock_irqsave(&iommu->lock, flags);
1577         if (domain_type_is_vm_or_si(domain)) {
1578                 ndomains = cap_ndoms(iommu->cap);
1579                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1580                         if (iommu->domains[num] == domain) {
1581                                 clear_bit(num, iommu->domain_ids);
1582                                 iommu->domains[num] = NULL;
1583                                 break;
1584                         }
1585                 }
1586         } else {
1587                 clear_bit(domain->id, iommu->domain_ids);
1588                 iommu->domains[domain->id] = NULL;
1589         }
1590         spin_unlock_irqrestore(&iommu->lock, flags);
1591 }
1592
1593 static void domain_attach_iommu(struct dmar_domain *domain,
1594                                struct intel_iommu *iommu)
1595 {
1596         unsigned long flags;
1597
1598         spin_lock_irqsave(&domain->iommu_lock, flags);
1599         if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1600                 domain->iommu_count++;
1601                 if (domain->iommu_count == 1)
1602                         domain->nid = iommu->node;
1603                 domain_update_iommu_cap(domain);
1604         }
1605         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1606 }
1607
1608 static int domain_detach_iommu(struct dmar_domain *domain,
1609                                struct intel_iommu *iommu)
1610 {
1611         unsigned long flags;
1612         int count = INT_MAX;
1613
1614         spin_lock_irqsave(&domain->iommu_lock, flags);
1615         if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1616                 count = --domain->iommu_count;
1617                 domain_update_iommu_cap(domain);
1618         }
1619         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1620
1621         return count;
1622 }
1623
1624 static struct iova_domain reserved_iova_list;
1625 static struct lock_class_key reserved_rbtree_key;
1626
1627 static int dmar_init_reserved_ranges(void)
1628 {
1629         struct pci_dev *pdev = NULL;
1630         struct iova *iova;
1631         int i;
1632
1633         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1634
1635         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1636                 &reserved_rbtree_key);
1637
1638         /* IOAPIC ranges shouldn't be accessed by DMA */
1639         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1640                 IOVA_PFN(IOAPIC_RANGE_END));
1641         if (!iova) {
1642                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1643                 return -ENODEV;
1644         }
1645
1646         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1647         for_each_pci_dev(pdev) {
1648                 struct resource *r;
1649
1650                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1651                         r = &pdev->resource[i];
1652                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1653                                 continue;
1654                         iova = reserve_iova(&reserved_iova_list,
1655                                             IOVA_PFN(r->start),
1656                                             IOVA_PFN(r->end));
1657                         if (!iova) {
1658                                 printk(KERN_ERR "Reserve iova failed\n");
1659                                 return -ENODEV;
1660                         }
1661                 }
1662         }
1663         return 0;
1664 }
1665
1666 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1667 {
1668         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1669 }
1670
1671 static inline int guestwidth_to_adjustwidth(int gaw)
1672 {
1673         int agaw;
1674         int r = (gaw - 12) % 9;
1675
1676         if (r == 0)
1677                 agaw = gaw;
1678         else
1679                 agaw = gaw + 9 - r;
1680         if (agaw > 64)
1681                 agaw = 64;
1682         return agaw;
1683 }
1684
1685 static int domain_init(struct dmar_domain *domain, int guest_width)
1686 {
1687         struct intel_iommu *iommu;
1688         int adjust_width, agaw;
1689         unsigned long sagaw;
1690
1691         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1692         domain_reserve_special_ranges(domain);
1693
1694         /* calculate AGAW */
1695         iommu = domain_get_iommu(domain);
1696         if (guest_width > cap_mgaw(iommu->cap))
1697                 guest_width = cap_mgaw(iommu->cap);
1698         domain->gaw = guest_width;
1699         adjust_width = guestwidth_to_adjustwidth(guest_width);
1700         agaw = width_to_agaw(adjust_width);
1701         sagaw = cap_sagaw(iommu->cap);
1702         if (!test_bit(agaw, &sagaw)) {
1703                 /* hardware doesn't support it, choose a bigger one */
1704                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1705                 agaw = find_next_bit(&sagaw, 5, agaw);
1706                 if (agaw >= 5)
1707                         return -ENODEV;
1708         }
1709         domain->agaw = agaw;
1710
1711         if (ecap_coherent(iommu->ecap))
1712                 domain->iommu_coherency = 1;
1713         else
1714                 domain->iommu_coherency = 0;
1715
1716         if (ecap_sc_support(iommu->ecap))
1717                 domain->iommu_snooping = 1;
1718         else
1719                 domain->iommu_snooping = 0;
1720
1721         if (intel_iommu_superpage)
1722                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1723         else
1724                 domain->iommu_superpage = 0;
1725
1726         domain->nid = iommu->node;
1727
1728         /* always allocate the top pgd */
1729         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1730         if (!domain->pgd)
1731                 return -ENOMEM;
1732         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1733         return 0;
1734 }
1735
1736 static void domain_exit(struct dmar_domain *domain)
1737 {
1738         struct dmar_drhd_unit *drhd;
1739         struct intel_iommu *iommu;
1740         struct page *freelist = NULL;
1741
1742         /* Domain 0 is reserved, so dont process it */
1743         if (!domain)
1744                 return;
1745
1746         /* Flush any lazy unmaps that may reference this domain */
1747         if (!intel_iommu_strict)
1748                 flush_unmaps_timeout(0);
1749
1750         /* remove associated devices */
1751         domain_remove_dev_info(domain);
1752
1753         /* destroy iovas */
1754         put_iova_domain(&domain->iovad);
1755
1756         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1757
1758         /* clear attached or cached domains */
1759         rcu_read_lock();
1760         for_each_active_iommu(iommu, drhd)
1761                 iommu_detach_domain(domain, iommu);
1762         rcu_read_unlock();
1763
1764         dma_free_pagelist(freelist);
1765
1766         free_domain_mem(domain);
1767 }
1768
1769 static int domain_context_mapping_one(struct dmar_domain *domain,
1770                                       struct intel_iommu *iommu,
1771                                       u8 bus, u8 devfn, int translation)
1772 {
1773         struct context_entry *context;
1774         unsigned long flags;
1775         struct dma_pte *pgd;
1776         int id;
1777         int agaw;
1778         struct device_domain_info *info = NULL;
1779
1780         pr_debug("Set context mapping for %02x:%02x.%d\n",
1781                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1782
1783         BUG_ON(!domain->pgd);
1784         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1785                translation != CONTEXT_TT_MULTI_LEVEL);
1786
1787         context = device_to_context_entry(iommu, bus, devfn);
1788         if (!context)
1789                 return -ENOMEM;
1790         spin_lock_irqsave(&iommu->lock, flags);
1791         if (context_present(context)) {
1792                 spin_unlock_irqrestore(&iommu->lock, flags);
1793                 return 0;
1794         }
1795
1796         id = domain->id;
1797         pgd = domain->pgd;
1798
1799         if (domain_type_is_vm_or_si(domain)) {
1800                 if (domain_type_is_vm(domain)) {
1801                         id = iommu_attach_vm_domain(domain, iommu);
1802                         if (id < 0) {
1803                                 spin_unlock_irqrestore(&iommu->lock, flags);
1804                                 pr_err("IOMMU: no free domain ids\n");
1805                                 return -EFAULT;
1806                         }
1807                 }
1808
1809                 /* Skip top levels of page tables for
1810                  * iommu which has less agaw than default.
1811                  * Unnecessary for PT mode.
1812                  */
1813                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1814                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1815                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1816                                 if (!dma_pte_present(pgd)) {
1817                                         spin_unlock_irqrestore(&iommu->lock, flags);
1818                                         return -ENOMEM;
1819                                 }
1820                         }
1821                 }
1822         }
1823
1824         context_set_domain_id(context, id);
1825
1826         if (translation != CONTEXT_TT_PASS_THROUGH) {
1827                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1828                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1829                                      CONTEXT_TT_MULTI_LEVEL;
1830         }
1831         /*
1832          * In pass through mode, AW must be programmed to indicate the largest
1833          * AGAW value supported by hardware. And ASR is ignored by hardware.
1834          */
1835         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1836                 context_set_address_width(context, iommu->msagaw);
1837         else {
1838                 context_set_address_root(context, virt_to_phys(pgd));
1839                 context_set_address_width(context, iommu->agaw);
1840         }
1841
1842         context_set_translation_type(context, translation);
1843         context_set_fault_enable(context);
1844         context_set_present(context);
1845         domain_flush_cache(domain, context, sizeof(*context));
1846
1847         /*
1848          * It's a non-present to present mapping. If hardware doesn't cache
1849          * non-present entry we only need to flush the write-buffer. If the
1850          * _does_ cache non-present entries, then it does so in the special
1851          * domain #0, which we have to flush:
1852          */
1853         if (cap_caching_mode(iommu->cap)) {
1854                 iommu->flush.flush_context(iommu, 0,
1855                                            (((u16)bus) << 8) | devfn,
1856                                            DMA_CCMD_MASK_NOBIT,
1857                                            DMA_CCMD_DEVICE_INVL);
1858                 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
1859         } else {
1860                 iommu_flush_write_buffer(iommu);
1861         }
1862         iommu_enable_dev_iotlb(info);
1863         spin_unlock_irqrestore(&iommu->lock, flags);
1864
1865         domain_attach_iommu(domain, iommu);
1866
1867         return 0;
1868 }
1869
1870 struct domain_context_mapping_data {
1871         struct dmar_domain *domain;
1872         struct intel_iommu *iommu;
1873         int translation;
1874 };
1875
1876 static int domain_context_mapping_cb(struct pci_dev *pdev,
1877                                      u16 alias, void *opaque)
1878 {
1879         struct domain_context_mapping_data *data = opaque;
1880
1881         return domain_context_mapping_one(data->domain, data->iommu,
1882                                           PCI_BUS_NUM(alias), alias & 0xff,
1883                                           data->translation);
1884 }
1885
1886 static int
1887 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1888                        int translation)
1889 {
1890         struct intel_iommu *iommu;
1891         u8 bus, devfn;
1892         struct domain_context_mapping_data data;
1893
1894         iommu = device_to_iommu(dev, &bus, &devfn);
1895         if (!iommu)
1896                 return -ENODEV;
1897
1898         if (!dev_is_pci(dev))
1899                 return domain_context_mapping_one(domain, iommu, bus, devfn,
1900                                                   translation);
1901
1902         data.domain = domain;
1903         data.iommu = iommu;
1904         data.translation = translation;
1905
1906         return pci_for_each_dma_alias(to_pci_dev(dev),
1907                                       &domain_context_mapping_cb, &data);
1908 }
1909
1910 static int domain_context_mapped_cb(struct pci_dev *pdev,
1911                                     u16 alias, void *opaque)
1912 {
1913         struct intel_iommu *iommu = opaque;
1914
1915         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
1916 }
1917
1918 static int domain_context_mapped(struct device *dev)
1919 {
1920         struct intel_iommu *iommu;
1921         u8 bus, devfn;
1922
1923         iommu = device_to_iommu(dev, &bus, &devfn);
1924         if (!iommu)
1925                 return -ENODEV;
1926
1927         if (!dev_is_pci(dev))
1928                 return device_context_mapped(iommu, bus, devfn);
1929
1930         return !pci_for_each_dma_alias(to_pci_dev(dev),
1931                                        domain_context_mapped_cb, iommu);
1932 }
1933
1934 /* Returns a number of VTD pages, but aligned to MM page size */
1935 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1936                                             size_t size)
1937 {
1938         host_addr &= ~PAGE_MASK;
1939         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1940 }
1941
1942 /* Return largest possible superpage level for a given mapping */
1943 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1944                                           unsigned long iov_pfn,
1945                                           unsigned long phy_pfn,
1946                                           unsigned long pages)
1947 {
1948         int support, level = 1;
1949         unsigned long pfnmerge;
1950
1951         support = domain->iommu_superpage;
1952
1953         /* To use a large page, the virtual *and* physical addresses
1954            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1955            of them will mean we have to use smaller pages. So just
1956            merge them and check both at once. */
1957         pfnmerge = iov_pfn | phy_pfn;
1958
1959         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1960                 pages >>= VTD_STRIDE_SHIFT;
1961                 if (!pages)
1962                         break;
1963                 pfnmerge >>= VTD_STRIDE_SHIFT;
1964                 level++;
1965                 support--;
1966         }
1967         return level;
1968 }
1969
1970 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1971                             struct scatterlist *sg, unsigned long phys_pfn,
1972                             unsigned long nr_pages, int prot)
1973 {
1974         struct dma_pte *first_pte = NULL, *pte = NULL;
1975         phys_addr_t uninitialized_var(pteval);
1976         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1977         unsigned long sg_res;
1978         unsigned int largepage_lvl = 0;
1979         unsigned long lvl_pages = 0;
1980
1981         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1982
1983         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1984                 return -EINVAL;
1985
1986         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1987
1988         if (sg)
1989                 sg_res = 0;
1990         else {
1991                 sg_res = nr_pages + 1;
1992                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1993         }
1994
1995         while (nr_pages > 0) {
1996                 uint64_t tmp;
1997
1998                 if (!sg_res) {
1999                         sg_res = aligned_nrpages(sg->offset, sg->length);
2000                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2001                         sg->dma_length = sg->length;
2002                         pteval = page_to_phys(sg_page(sg)) | prot;
2003                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2004                 }
2005
2006                 if (!pte) {
2007                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2008
2009                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2010                         if (!pte)
2011                                 return -ENOMEM;
2012                         /* It is large page*/
2013                         if (largepage_lvl > 1) {
2014                                 pteval |= DMA_PTE_LARGE_PAGE;
2015                                 /* Ensure that old small page tables are removed to make room
2016                                    for superpage, if they exist. */
2017                                 dma_pte_clear_range(domain, iov_pfn,
2018                                                     iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2019                                 dma_pte_free_pagetable(domain, iov_pfn,
2020                                                        iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2021                         } else {
2022                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2023                         }
2024
2025                 }
2026                 /* We don't need lock here, nobody else
2027                  * touches the iova range
2028                  */
2029                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2030                 if (tmp) {
2031                         static int dumps = 5;
2032                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2033                                iov_pfn, tmp, (unsigned long long)pteval);
2034                         if (dumps) {
2035                                 dumps--;
2036                                 debug_dma_dump_mappings(NULL);
2037                         }
2038                         WARN_ON(1);
2039                 }
2040
2041                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2042
2043                 BUG_ON(nr_pages < lvl_pages);
2044                 BUG_ON(sg_res < lvl_pages);
2045
2046                 nr_pages -= lvl_pages;
2047                 iov_pfn += lvl_pages;
2048                 phys_pfn += lvl_pages;
2049                 pteval += lvl_pages * VTD_PAGE_SIZE;
2050                 sg_res -= lvl_pages;
2051
2052                 /* If the next PTE would be the first in a new page, then we
2053                    need to flush the cache on the entries we've just written.
2054                    And then we'll need to recalculate 'pte', so clear it and
2055                    let it get set again in the if (!pte) block above.
2056
2057                    If we're done (!nr_pages) we need to flush the cache too.
2058
2059                    Also if we've been setting superpages, we may need to
2060                    recalculate 'pte' and switch back to smaller pages for the
2061                    end of the mapping, if the trailing size is not enough to
2062                    use another superpage (i.e. sg_res < lvl_pages). */
2063                 pte++;
2064                 if (!nr_pages || first_pte_in_page(pte) ||
2065                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2066                         domain_flush_cache(domain, first_pte,
2067                                            (void *)pte - (void *)first_pte);
2068                         pte = NULL;
2069                 }
2070
2071                 if (!sg_res && nr_pages)
2072                         sg = sg_next(sg);
2073         }
2074         return 0;
2075 }
2076
2077 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2078                                     struct scatterlist *sg, unsigned long nr_pages,
2079                                     int prot)
2080 {
2081         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2082 }
2083
2084 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2085                                      unsigned long phys_pfn, unsigned long nr_pages,
2086                                      int prot)
2087 {
2088         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2089 }
2090
2091 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2092 {
2093         if (!iommu)
2094                 return;
2095
2096         clear_context_table(iommu, bus, devfn);
2097         iommu->flush.flush_context(iommu, 0, 0, 0,
2098                                            DMA_CCMD_GLOBAL_INVL);
2099         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2100 }
2101
2102 static inline void unlink_domain_info(struct device_domain_info *info)
2103 {
2104         assert_spin_locked(&device_domain_lock);
2105         list_del(&info->link);
2106         list_del(&info->global);
2107         if (info->dev)
2108                 info->dev->archdata.iommu = NULL;
2109 }
2110
2111 static void domain_remove_dev_info(struct dmar_domain *domain)
2112 {
2113         struct device_domain_info *info, *tmp;
2114         unsigned long flags;
2115
2116         spin_lock_irqsave(&device_domain_lock, flags);
2117         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
2118                 unlink_domain_info(info);
2119                 spin_unlock_irqrestore(&device_domain_lock, flags);
2120
2121                 iommu_disable_dev_iotlb(info);
2122                 iommu_detach_dev(info->iommu, info->bus, info->devfn);
2123
2124                 if (domain_type_is_vm(domain)) {
2125                         iommu_detach_dependent_devices(info->iommu, info->dev);
2126                         domain_detach_iommu(domain, info->iommu);
2127                 }
2128
2129                 free_devinfo_mem(info);
2130                 spin_lock_irqsave(&device_domain_lock, flags);
2131         }
2132         spin_unlock_irqrestore(&device_domain_lock, flags);
2133 }
2134
2135 /*
2136  * find_domain
2137  * Note: we use struct device->archdata.iommu stores the info
2138  */
2139 static struct dmar_domain *find_domain(struct device *dev)
2140 {
2141         struct device_domain_info *info;
2142
2143         /* No lock here, assumes no domain exit in normal case */
2144         info = dev->archdata.iommu;
2145         if (info)
2146                 return info->domain;
2147         return NULL;
2148 }
2149
2150 static inline struct device_domain_info *
2151 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2152 {
2153         struct device_domain_info *info;
2154
2155         list_for_each_entry(info, &device_domain_list, global)
2156                 if (info->iommu->segment == segment && info->bus == bus &&
2157                     info->devfn == devfn)
2158                         return info;
2159
2160         return NULL;
2161 }
2162
2163 static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2164                                                 int bus, int devfn,
2165                                                 struct device *dev,
2166                                                 struct dmar_domain *domain)
2167 {
2168         struct dmar_domain *found = NULL;
2169         struct device_domain_info *info;
2170         unsigned long flags;
2171
2172         info = alloc_devinfo_mem();
2173         if (!info)
2174                 return NULL;
2175
2176         info->bus = bus;
2177         info->devfn = devfn;
2178         info->dev = dev;
2179         info->domain = domain;
2180         info->iommu = iommu;
2181
2182         spin_lock_irqsave(&device_domain_lock, flags);
2183         if (dev)
2184                 found = find_domain(dev);
2185         else {
2186                 struct device_domain_info *info2;
2187                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2188                 if (info2)
2189                         found = info2->domain;
2190         }
2191         if (found) {
2192                 spin_unlock_irqrestore(&device_domain_lock, flags);
2193                 free_devinfo_mem(info);
2194                 /* Caller must free the original domain */
2195                 return found;
2196         }
2197
2198         list_add(&info->link, &domain->devices);
2199         list_add(&info->global, &device_domain_list);
2200         if (dev)
2201                 dev->archdata.iommu = info;
2202         spin_unlock_irqrestore(&device_domain_lock, flags);
2203
2204         return domain;
2205 }
2206
2207 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2208 {
2209         *(u16 *)opaque = alias;
2210         return 0;
2211 }
2212
2213 /* domain is initialized */
2214 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2215 {
2216         struct dmar_domain *domain, *tmp;
2217         struct intel_iommu *iommu;
2218         struct device_domain_info *info;
2219         u16 dma_alias;
2220         unsigned long flags;
2221         u8 bus, devfn;
2222
2223         domain = find_domain(dev);
2224         if (domain)
2225                 return domain;
2226
2227         iommu = device_to_iommu(dev, &bus, &devfn);
2228         if (!iommu)
2229                 return NULL;
2230
2231         if (dev_is_pci(dev)) {
2232                 struct pci_dev *pdev = to_pci_dev(dev);
2233
2234                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2235
2236                 spin_lock_irqsave(&device_domain_lock, flags);
2237                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2238                                                       PCI_BUS_NUM(dma_alias),
2239                                                       dma_alias & 0xff);
2240                 if (info) {
2241                         iommu = info->iommu;
2242                         domain = info->domain;
2243                 }
2244                 spin_unlock_irqrestore(&device_domain_lock, flags);
2245
2246                 /* DMA alias already has a domain, uses it */
2247                 if (info)
2248                         goto found_domain;
2249         }
2250
2251         /* Allocate and initialize new domain for the device */
2252         domain = alloc_domain(0);
2253         if (!domain)
2254                 return NULL;
2255         domain->id = iommu_attach_domain(domain, iommu);
2256         if (domain->id < 0) {
2257                 free_domain_mem(domain);
2258                 return NULL;
2259         }
2260         domain_attach_iommu(domain, iommu);
2261         if (domain_init(domain, gaw)) {
2262                 domain_exit(domain);
2263                 return NULL;
2264         }
2265
2266         /* register PCI DMA alias device */
2267         if (dev_is_pci(dev)) {
2268                 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2269                                            dma_alias & 0xff, NULL, domain);
2270
2271                 if (!tmp || tmp != domain) {
2272                         domain_exit(domain);
2273                         domain = tmp;
2274                 }
2275
2276                 if (!domain)
2277                         return NULL;
2278         }
2279
2280 found_domain:
2281         tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2282
2283         if (!tmp || tmp != domain) {
2284                 domain_exit(domain);
2285                 domain = tmp;
2286         }
2287
2288         return domain;
2289 }
2290
2291 static int iommu_identity_mapping;
2292 #define IDENTMAP_ALL            1
2293 #define IDENTMAP_GFX            2
2294 #define IDENTMAP_AZALIA         4
2295
2296 static int iommu_domain_identity_map(struct dmar_domain *domain,
2297                                      unsigned long long start,
2298                                      unsigned long long end)
2299 {
2300         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2301         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2302
2303         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2304                           dma_to_mm_pfn(last_vpfn))) {
2305                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2306                 return -ENOMEM;
2307         }
2308
2309         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2310                  start, end, domain->id);
2311         /*
2312          * RMRR range might have overlap with physical memory range,
2313          * clear it first
2314          */
2315         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2316
2317         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2318                                   last_vpfn - first_vpfn + 1,
2319                                   DMA_PTE_READ|DMA_PTE_WRITE);
2320 }
2321
2322 static int iommu_prepare_identity_map(struct device *dev,
2323                                       unsigned long long start,
2324                                       unsigned long long end)
2325 {
2326         struct dmar_domain *domain;
2327         int ret;
2328
2329         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2330         if (!domain)
2331                 return -ENOMEM;
2332
2333         /* For _hardware_ passthrough, don't bother. But for software
2334            passthrough, we do it anyway -- it may indicate a memory
2335            range which is reserved in E820, so which didn't get set
2336            up to start with in si_domain */
2337         if (domain == si_domain && hw_pass_through) {
2338                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2339                        dev_name(dev), start, end);
2340                 return 0;
2341         }
2342
2343         printk(KERN_INFO
2344                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2345                dev_name(dev), start, end);
2346         
2347         if (end < start) {
2348                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2349                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2350                         dmi_get_system_info(DMI_BIOS_VENDOR),
2351                         dmi_get_system_info(DMI_BIOS_VERSION),
2352                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2353                 ret = -EIO;
2354                 goto error;
2355         }
2356
2357         if (end >> agaw_to_width(domain->agaw)) {
2358                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2359                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2360                      agaw_to_width(domain->agaw),
2361                      dmi_get_system_info(DMI_BIOS_VENDOR),
2362                      dmi_get_system_info(DMI_BIOS_VERSION),
2363                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2364                 ret = -EIO;
2365                 goto error;
2366         }
2367
2368         ret = iommu_domain_identity_map(domain, start, end);
2369         if (ret)
2370                 goto error;
2371
2372         /* context entry init */
2373         ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2374         if (ret)
2375                 goto error;
2376
2377         return 0;
2378
2379  error:
2380         domain_exit(domain);
2381         return ret;
2382 }
2383
2384 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2385                                          struct device *dev)
2386 {
2387         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2388                 return 0;
2389         return iommu_prepare_identity_map(dev, rmrr->base_address,
2390                                           rmrr->end_address);
2391 }
2392
2393 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2394 static inline void iommu_prepare_isa(void)
2395 {
2396         struct pci_dev *pdev;
2397         int ret;
2398
2399         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2400         if (!pdev)
2401                 return;
2402
2403         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2404         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2405
2406         if (ret)
2407                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2408                        "floppy might not work\n");
2409
2410         pci_dev_put(pdev);
2411 }
2412 #else
2413 static inline void iommu_prepare_isa(void)
2414 {
2415         return;
2416 }
2417 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2418
2419 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2420
2421 static int __init si_domain_init(int hw)
2422 {
2423         struct dmar_drhd_unit *drhd;
2424         struct intel_iommu *iommu;
2425         int nid, ret = 0;
2426         bool first = true;
2427
2428         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2429         if (!si_domain)
2430                 return -EFAULT;
2431
2432         for_each_active_iommu(iommu, drhd) {
2433                 ret = iommu_attach_domain(si_domain, iommu);
2434                 if (ret < 0) {
2435                         domain_exit(si_domain);
2436                         return -EFAULT;
2437                 } else if (first) {
2438                         si_domain->id = ret;
2439                         first = false;
2440                 } else if (si_domain->id != ret) {
2441                         domain_exit(si_domain);
2442                         return -EFAULT;
2443                 }
2444                 domain_attach_iommu(si_domain, iommu);
2445         }
2446
2447         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2448                 domain_exit(si_domain);
2449                 return -EFAULT;
2450         }
2451
2452         pr_debug("IOMMU: identity mapping domain is domain %d\n",
2453                  si_domain->id);
2454
2455         if (hw)
2456                 return 0;
2457
2458         for_each_online_node(nid) {
2459                 unsigned long start_pfn, end_pfn;
2460                 int i;
2461
2462                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2463                         ret = iommu_domain_identity_map(si_domain,
2464                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2465                         if (ret)
2466                                 return ret;
2467                 }
2468         }
2469
2470         return 0;
2471 }
2472
2473 static int identity_mapping(struct device *dev)
2474 {
2475         struct device_domain_info *info;
2476
2477         if (likely(!iommu_identity_mapping))
2478                 return 0;
2479
2480         info = dev->archdata.iommu;
2481         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2482                 return (info->domain == si_domain);
2483
2484         return 0;
2485 }
2486
2487 static int domain_add_dev_info(struct dmar_domain *domain,
2488                                struct device *dev, int translation)
2489 {
2490         struct dmar_domain *ndomain;
2491         struct intel_iommu *iommu;
2492         u8 bus, devfn;
2493         int ret;
2494
2495         iommu = device_to_iommu(dev, &bus, &devfn);
2496         if (!iommu)
2497                 return -ENODEV;
2498
2499         ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2500         if (ndomain != domain)
2501                 return -EBUSY;
2502
2503         ret = domain_context_mapping(domain, dev, translation);
2504         if (ret) {
2505                 domain_remove_one_dev_info(domain, dev);
2506                 return ret;
2507         }
2508
2509         return 0;
2510 }
2511
2512 static bool device_has_rmrr(struct device *dev)
2513 {
2514         struct dmar_rmrr_unit *rmrr;
2515         struct device *tmp;
2516         int i;
2517
2518         rcu_read_lock();
2519         for_each_rmrr_units(rmrr) {
2520                 /*
2521                  * Return TRUE if this RMRR contains the device that
2522                  * is passed in.
2523                  */
2524                 for_each_active_dev_scope(rmrr->devices,
2525                                           rmrr->devices_cnt, i, tmp)
2526                         if (tmp == dev) {
2527                                 rcu_read_unlock();
2528                                 return true;
2529                         }
2530         }
2531         rcu_read_unlock();
2532         return false;
2533 }
2534
2535 static int iommu_should_identity_map(struct device *dev, int startup)
2536 {
2537
2538         if (dev_is_pci(dev)) {
2539                 struct pci_dev *pdev = to_pci_dev(dev);
2540
2541                 /*
2542                  * We want to prevent any device associated with an RMRR from
2543                  * getting placed into the SI Domain. This is done because
2544                  * problems exist when devices are moved in and out of domains
2545                  * and their respective RMRR info is lost. We exempt USB devices
2546                  * from this process due to their usage of RMRRs that are known
2547                  * to not be needed after BIOS hand-off to OS.
2548                  */
2549                 if (device_has_rmrr(dev) &&
2550                     (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2551                         return 0;
2552
2553                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2554                         return 1;
2555
2556                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2557                         return 1;
2558
2559                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2560                         return 0;
2561
2562                 /*
2563                  * We want to start off with all devices in the 1:1 domain, and
2564                  * take them out later if we find they can't access all of memory.
2565                  *
2566                  * However, we can't do this for PCI devices behind bridges,
2567                  * because all PCI devices behind the same bridge will end up
2568                  * with the same source-id on their transactions.
2569                  *
2570                  * Practically speaking, we can't change things around for these
2571                  * devices at run-time, because we can't be sure there'll be no
2572                  * DMA transactions in flight for any of their siblings.
2573                  *
2574                  * So PCI devices (unless they're on the root bus) as well as
2575                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2576                  * the 1:1 domain, just in _case_ one of their siblings turns out
2577                  * not to be able to map all of memory.
2578                  */
2579                 if (!pci_is_pcie(pdev)) {
2580                         if (!pci_is_root_bus(pdev->bus))
2581                                 return 0;
2582                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2583                                 return 0;
2584                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2585                         return 0;
2586         } else {
2587                 if (device_has_rmrr(dev))
2588                         return 0;
2589         }
2590
2591         /*
2592          * At boot time, we don't yet know if devices will be 64-bit capable.
2593          * Assume that they will — if they turn out not to be, then we can
2594          * take them out of the 1:1 domain later.
2595          */
2596         if (!startup) {
2597                 /*
2598                  * If the device's dma_mask is less than the system's memory
2599                  * size then this is not a candidate for identity mapping.
2600                  */
2601                 u64 dma_mask = *dev->dma_mask;
2602
2603                 if (dev->coherent_dma_mask &&
2604                     dev->coherent_dma_mask < dma_mask)
2605                         dma_mask = dev->coherent_dma_mask;
2606
2607                 return dma_mask >= dma_get_required_mask(dev);
2608         }
2609
2610         return 1;
2611 }
2612
2613 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2614 {
2615         int ret;
2616
2617         if (!iommu_should_identity_map(dev, 1))
2618                 return 0;
2619
2620         ret = domain_add_dev_info(si_domain, dev,
2621                                   hw ? CONTEXT_TT_PASS_THROUGH :
2622                                        CONTEXT_TT_MULTI_LEVEL);
2623         if (!ret)
2624                 pr_info("IOMMU: %s identity mapping for device %s\n",
2625                         hw ? "hardware" : "software", dev_name(dev));
2626         else if (ret == -ENODEV)
2627                 /* device not associated with an iommu */
2628                 ret = 0;
2629
2630         return ret;
2631 }
2632
2633
2634 static int __init iommu_prepare_static_identity_mapping(int hw)
2635 {
2636         struct pci_dev *pdev = NULL;
2637         struct dmar_drhd_unit *drhd;
2638         struct intel_iommu *iommu;
2639         struct device *dev;
2640         int i;
2641         int ret = 0;
2642
2643         ret = si_domain_init(hw);
2644         if (ret)
2645                 return -EFAULT;
2646
2647         for_each_pci_dev(pdev) {
2648                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2649                 if (ret)
2650                         return ret;
2651         }
2652
2653         for_each_active_iommu(iommu, drhd)
2654                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2655                         struct acpi_device_physical_node *pn;
2656                         struct acpi_device *adev;
2657
2658                         if (dev->bus != &acpi_bus_type)
2659                                 continue;
2660                                 
2661                         adev= to_acpi_device(dev);
2662                         mutex_lock(&adev->physical_node_lock);
2663                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2664                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2665                                 if (ret)
2666                                         break;
2667                         }
2668                         mutex_unlock(&adev->physical_node_lock);
2669                         if (ret)
2670                                 return ret;
2671                 }
2672
2673         return 0;
2674 }
2675
2676 static int __init init_dmars(void)
2677 {
2678         struct dmar_drhd_unit *drhd;
2679         struct dmar_rmrr_unit *rmrr;
2680         struct device *dev;
2681         struct intel_iommu *iommu;
2682         int i, ret;
2683
2684         /*
2685          * for each drhd
2686          *    allocate root
2687          *    initialize and program root entry to not present
2688          * endfor
2689          */
2690         for_each_drhd_unit(drhd) {
2691                 /*
2692                  * lock not needed as this is only incremented in the single
2693                  * threaded kernel __init code path all other access are read
2694                  * only
2695                  */
2696                 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2697                         g_num_of_iommus++;
2698                         continue;
2699                 }
2700                 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2701                           IOMMU_UNITS_SUPPORTED);
2702         }
2703
2704         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2705                         GFP_KERNEL);
2706         if (!g_iommus) {
2707                 printk(KERN_ERR "Allocating global iommu array failed\n");
2708                 ret = -ENOMEM;
2709                 goto error;
2710         }
2711
2712         deferred_flush = kzalloc(g_num_of_iommus *
2713                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2714         if (!deferred_flush) {
2715                 ret = -ENOMEM;
2716                 goto free_g_iommus;
2717         }
2718
2719         for_each_active_iommu(iommu, drhd) {
2720                 g_iommus[iommu->seq_id] = iommu;
2721
2722                 ret = iommu_init_domains(iommu);
2723                 if (ret)
2724                         goto free_iommu;
2725
2726                 /*
2727                  * TBD:
2728                  * we could share the same root & context tables
2729                  * among all IOMMU's. Need to Split it later.
2730                  */
2731                 ret = iommu_alloc_root_entry(iommu);
2732                 if (ret) {
2733                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2734                         goto free_iommu;
2735                 }
2736                 if (!ecap_pass_through(iommu->ecap))
2737                         hw_pass_through = 0;
2738         }
2739
2740         /*
2741          * Start from the sane iommu hardware state.
2742          */
2743         for_each_active_iommu(iommu, drhd) {
2744                 /*
2745                  * If the queued invalidation is already initialized by us
2746                  * (for example, while enabling interrupt-remapping) then
2747                  * we got the things already rolling from a sane state.
2748                  */
2749                 if (iommu->qi)
2750                         continue;
2751
2752                 /*
2753                  * Clear any previous faults.
2754                  */
2755                 dmar_fault(-1, iommu);
2756                 /*
2757                  * Disable queued invalidation if supported and already enabled
2758                  * before OS handover.
2759                  */
2760                 dmar_disable_qi(iommu);
2761         }
2762
2763         for_each_active_iommu(iommu, drhd) {
2764                 if (dmar_enable_qi(iommu)) {
2765                         /*
2766                          * Queued Invalidate not enabled, use Register Based
2767                          * Invalidate
2768                          */
2769                         iommu->flush.flush_context = __iommu_flush_context;
2770                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2771                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2772                                "invalidation\n",
2773                                 iommu->seq_id,
2774                                (unsigned long long)drhd->reg_base_addr);
2775                 } else {
2776                         iommu->flush.flush_context = qi_flush_context;
2777                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2778                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2779                                "invalidation\n",
2780                                 iommu->seq_id,
2781                                (unsigned long long)drhd->reg_base_addr);
2782                 }
2783         }
2784
2785         if (iommu_pass_through)
2786                 iommu_identity_mapping |= IDENTMAP_ALL;
2787
2788 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2789         iommu_identity_mapping |= IDENTMAP_GFX;
2790 #endif
2791
2792         check_tylersburg_isoch();
2793
2794         /*
2795          * If pass through is not set or not enabled, setup context entries for
2796          * identity mappings for rmrr, gfx, and isa and may fall back to static
2797          * identity mapping if iommu_identity_mapping is set.
2798          */
2799         if (iommu_identity_mapping) {
2800                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2801                 if (ret) {
2802                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2803                         goto free_iommu;
2804                 }
2805         }
2806         /*
2807          * For each rmrr
2808          *   for each dev attached to rmrr
2809          *   do
2810          *     locate drhd for dev, alloc domain for dev
2811          *     allocate free domain
2812          *     allocate page table entries for rmrr
2813          *     if context not allocated for bus
2814          *           allocate and init context
2815          *           set present in root table for this bus
2816          *     init context with domain, translation etc
2817          *    endfor
2818          * endfor
2819          */
2820         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2821         for_each_rmrr_units(rmrr) {
2822                 /* some BIOS lists non-exist devices in DMAR table. */
2823                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2824                                           i, dev) {
2825                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
2826                         if (ret)
2827                                 printk(KERN_ERR
2828                                        "IOMMU: mapping reserved region failed\n");
2829                 }
2830         }
2831
2832         iommu_prepare_isa();
2833
2834         /*
2835          * for each drhd
2836          *   enable fault log
2837          *   global invalidate context cache
2838          *   global invalidate iotlb
2839          *   enable translation
2840          */
2841         for_each_iommu(iommu, drhd) {
2842                 if (drhd->ignored) {
2843                         /*
2844                          * we always have to disable PMRs or DMA may fail on
2845                          * this device
2846                          */
2847                         if (force_on)
2848                                 iommu_disable_protect_mem_regions(iommu);
2849                         continue;
2850                 }
2851
2852                 iommu_flush_write_buffer(iommu);
2853
2854                 ret = dmar_set_interrupt(iommu);
2855                 if (ret)
2856                         goto free_iommu;
2857
2858                 iommu_set_root_entry(iommu);
2859
2860                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2861                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2862
2863                 ret = iommu_enable_translation(iommu);
2864                 if (ret)
2865                         goto free_iommu;
2866
2867                 iommu_disable_protect_mem_regions(iommu);
2868         }
2869
2870         return 0;
2871
2872 free_iommu:
2873         for_each_active_iommu(iommu, drhd)
2874                 free_dmar_iommu(iommu);
2875         kfree(deferred_flush);
2876 free_g_iommus:
2877         kfree(g_iommus);
2878 error:
2879         return ret;
2880 }
2881
2882 /* This takes a number of _MM_ pages, not VTD pages */
2883 static struct iova *intel_alloc_iova(struct device *dev,
2884                                      struct dmar_domain *domain,
2885                                      unsigned long nrpages, uint64_t dma_mask)
2886 {
2887         struct iova *iova = NULL;
2888
2889         /* Restrict dma_mask to the width that the iommu can handle */
2890         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2891
2892         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2893                 /*
2894                  * First try to allocate an io virtual address in
2895                  * DMA_BIT_MASK(32) and if that fails then try allocating
2896                  * from higher range
2897                  */
2898                 iova = alloc_iova(&domain->iovad, nrpages,
2899                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2900                 if (iova)
2901                         return iova;
2902         }
2903         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2904         if (unlikely(!iova)) {
2905                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2906                        nrpages, dev_name(dev));
2907                 return NULL;
2908         }
2909
2910         return iova;
2911 }
2912
2913 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2914 {
2915         struct dmar_domain *domain;
2916         int ret;
2917
2918         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2919         if (!domain) {
2920                 printk(KERN_ERR "Allocating domain for %s failed",
2921                        dev_name(dev));
2922                 return NULL;
2923         }
2924
2925         /* make sure context mapping is ok */
2926         if (unlikely(!domain_context_mapped(dev))) {
2927                 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2928                 if (ret) {
2929                         printk(KERN_ERR "Domain context map for %s failed",
2930                                dev_name(dev));
2931                         return NULL;
2932                 }
2933         }
2934
2935         return domain;
2936 }
2937
2938 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2939 {
2940         struct device_domain_info *info;
2941
2942         /* No lock here, assumes no domain exit in normal case */
2943         info = dev->archdata.iommu;
2944         if (likely(info))
2945                 return info->domain;
2946
2947         return __get_valid_domain_for_dev(dev);
2948 }
2949
2950 static int iommu_dummy(struct device *dev)
2951 {
2952         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2953 }
2954
2955 /* Check if the dev needs to go through non-identity map and unmap process.*/
2956 static int iommu_no_mapping(struct device *dev)
2957 {
2958         int found;
2959
2960         if (iommu_dummy(dev))
2961                 return 1;
2962
2963         if (!iommu_identity_mapping)
2964                 return 0;
2965
2966         found = identity_mapping(dev);
2967         if (found) {
2968                 if (iommu_should_identity_map(dev, 0))
2969                         return 1;
2970                 else {
2971                         /*
2972                          * 32 bit DMA is removed from si_domain and fall back
2973                          * to non-identity mapping.
2974                          */
2975                         domain_remove_one_dev_info(si_domain, dev);
2976                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2977                                dev_name(dev));
2978                         return 0;
2979                 }
2980         } else {
2981                 /*
2982                  * In case of a detached 64 bit DMA device from vm, the device
2983                  * is put into si_domain for identity mapping.
2984                  */
2985                 if (iommu_should_identity_map(dev, 0)) {
2986                         int ret;
2987                         ret = domain_add_dev_info(si_domain, dev,
2988                                                   hw_pass_through ?
2989                                                   CONTEXT_TT_PASS_THROUGH :
2990                                                   CONTEXT_TT_MULTI_LEVEL);
2991                         if (!ret) {
2992                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2993                                        dev_name(dev));
2994                                 return 1;
2995                         }
2996                 }
2997         }
2998
2999         return 0;
3000 }
3001
3002 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3003                                      size_t size, int dir, u64 dma_mask)
3004 {
3005         struct dmar_domain *domain;
3006         phys_addr_t start_paddr;
3007         struct iova *iova;
3008         int prot = 0;
3009         int ret;
3010         struct intel_iommu *iommu;
3011         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3012
3013         BUG_ON(dir == DMA_NONE);
3014
3015         if (iommu_no_mapping(dev))
3016                 return paddr;
3017
3018         domain = get_valid_domain_for_dev(dev);
3019         if (!domain)
3020                 return 0;
3021
3022         iommu = domain_get_iommu(domain);
3023         size = aligned_nrpages(paddr, size);
3024
3025         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3026         if (!iova)
3027                 goto error;
3028
3029         /*
3030          * Check if DMAR supports zero-length reads on write only
3031          * mappings..
3032          */
3033         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3034                         !cap_zlr(iommu->cap))
3035                 prot |= DMA_PTE_READ;
3036         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3037                 prot |= DMA_PTE_WRITE;
3038         /*
3039          * paddr - (paddr + size) might be partial page, we should map the whole
3040          * page.  Note: if two part of one page are separately mapped, we
3041          * might have two guest_addr mapping to the same host paddr, but this
3042          * is not a big problem
3043          */
3044         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3045                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3046         if (ret)
3047                 goto error;
3048
3049         /* it's a non-present to present mapping. Only flush if caching mode */
3050         if (cap_caching_mode(iommu->cap))
3051                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3052         else
3053                 iommu_flush_write_buffer(iommu);
3054
3055         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3056         start_paddr += paddr & ~PAGE_MASK;
3057         return start_paddr;
3058
3059 error:
3060         if (iova)
3061                 __free_iova(&domain->iovad, iova);
3062         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3063                 dev_name(dev), size, (unsigned long long)paddr, dir);
3064         return 0;
3065 }
3066
3067 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3068                                  unsigned long offset, size_t size,
3069                                  enum dma_data_direction dir,
3070                                  struct dma_attrs *attrs)
3071 {
3072         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3073                                   dir, *dev->dma_mask);
3074 }
3075
3076 static void flush_unmaps(void)
3077 {
3078         int i, j;
3079
3080         timer_on = 0;
3081
3082         /* just flush them all */
3083         for (i = 0; i < g_num_of_iommus; i++) {
3084                 struct intel_iommu *iommu = g_iommus[i];
3085                 if (!iommu)
3086                         continue;
3087
3088                 if (!deferred_flush[i].next)
3089                         continue;
3090
3091                 /* In caching mode, global flushes turn emulation expensive */
3092                 if (!cap_caching_mode(iommu->cap))
3093                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3094                                          DMA_TLB_GLOBAL_FLUSH);
3095                 for (j = 0; j < deferred_flush[i].next; j++) {
3096                         unsigned long mask;
3097                         struct iova *iova = deferred_flush[i].iova[j];
3098                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3099
3100                         /* On real hardware multiple invalidations are expensive */
3101                         if (cap_caching_mode(iommu->cap))
3102                                 iommu_flush_iotlb_psi(iommu, domain->id,
3103                                         iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3104                                         !deferred_flush[i].freelist[j], 0);
3105                         else {
3106                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3107                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3108                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3109                         }
3110                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3111                         if (deferred_flush[i].freelist[j])
3112                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3113                 }
3114                 deferred_flush[i].next = 0;
3115         }
3116
3117         list_size = 0;
3118 }
3119
3120 static void flush_unmaps_timeout(unsigned long data)
3121 {
3122         unsigned long flags;
3123
3124         spin_lock_irqsave(&async_umap_flush_lock, flags);
3125         flush_unmaps();
3126         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3127 }
3128
3129 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3130 {
3131         unsigned long flags;
3132         int next, iommu_id;
3133         struct intel_iommu *iommu;
3134
3135         spin_lock_irqsave(&async_umap_flush_lock, flags);
3136         if (list_size == HIGH_WATER_MARK)
3137                 flush_unmaps();
3138
3139         iommu = domain_get_iommu(dom);
3140         iommu_id = iommu->seq_id;
3141
3142         next = deferred_flush[iommu_id].next;
3143         deferred_flush[iommu_id].domain[next] = dom;
3144         deferred_flush[iommu_id].iova[next] = iova;
3145         deferred_flush[iommu_id].freelist[next] = freelist;
3146         deferred_flush[iommu_id].next++;
3147
3148         if (!timer_on) {
3149                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3150                 timer_on = 1;
3151         }
3152         list_size++;
3153         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3154 }
3155
3156 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3157                              size_t size, enum dma_data_direction dir,
3158                              struct dma_attrs *attrs)
3159 {
3160         struct dmar_domain *domain;
3161         unsigned long start_pfn, last_pfn;
3162         struct iova *iova;
3163         struct intel_iommu *iommu;
3164         struct page *freelist;
3165
3166         if (iommu_no_mapping(dev))
3167                 return;
3168
3169         domain = find_domain(dev);
3170         BUG_ON(!domain);
3171
3172         iommu = domain_get_iommu(domain);
3173
3174         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3175         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3176                       (unsigned long long)dev_addr))
3177                 return;
3178
3179         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3180         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3181
3182         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3183                  dev_name(dev), start_pfn, last_pfn);
3184
3185         freelist = domain_unmap(domain, start_pfn, last_pfn);
3186
3187         if (intel_iommu_strict) {
3188                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3189                                       last_pfn - start_pfn + 1, !freelist, 0);
3190                 /* free iova */
3191                 __free_iova(&domain->iovad, iova);
3192                 dma_free_pagelist(freelist);
3193         } else {
3194                 add_unmap(domain, iova, freelist);
3195                 /*
3196                  * queue up the release of the unmap to save the 1/6th of the
3197                  * cpu used up by the iotlb flush operation...
3198                  */
3199         }
3200 }
3201
3202 static void *intel_alloc_coherent(struct device *dev, size_t size,
3203                                   dma_addr_t *dma_handle, gfp_t flags,
3204                                   struct dma_attrs *attrs)
3205 {
3206         struct page *page = NULL;
3207         int order;
3208
3209         size = PAGE_ALIGN(size);
3210         order = get_order(size);
3211
3212         if (!iommu_no_mapping(dev))
3213                 flags &= ~(GFP_DMA | GFP_DMA32);
3214         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3215                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3216                         flags |= GFP_DMA;
3217                 else
3218                         flags |= GFP_DMA32;
3219         }
3220
3221         if (flags & __GFP_WAIT) {
3222                 unsigned int count = size >> PAGE_SHIFT;
3223
3224                 page = dma_alloc_from_contiguous(dev, count, order);
3225                 if (page && iommu_no_mapping(dev) &&
3226                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3227                         dma_release_from_contiguous(dev, page, count);
3228                         page = NULL;
3229                 }
3230         }
3231
3232         if (!page)
3233                 page = alloc_pages(flags, order);
3234         if (!page)
3235                 return NULL;
3236         memset(page_address(page), 0, size);
3237
3238         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3239                                          DMA_BIDIRECTIONAL,
3240                                          dev->coherent_dma_mask);
3241         if (*dma_handle)
3242                 return page_address(page);
3243         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3244                 __free_pages(page, order);
3245
3246         return NULL;
3247 }
3248
3249 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3250                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3251 {
3252         int order;
3253         struct page *page = virt_to_page(vaddr);
3254
3255         size = PAGE_ALIGN(size);
3256         order = get_order(size);
3257
3258         intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3259         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3260                 __free_pages(page, order);
3261 }
3262
3263 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3264                            int nelems, enum dma_data_direction dir,
3265                            struct dma_attrs *attrs)
3266 {
3267         struct dmar_domain *domain;
3268         unsigned long start_pfn, last_pfn;
3269         struct iova *iova;
3270         struct intel_iommu *iommu;
3271         struct page *freelist;
3272
3273         if (iommu_no_mapping(dev))
3274                 return;
3275
3276         domain = find_domain(dev);
3277         BUG_ON(!domain);
3278
3279         iommu = domain_get_iommu(domain);
3280
3281         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3282         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3283                       (unsigned long long)sglist[0].dma_address))
3284                 return;
3285
3286         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3287         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3288
3289         freelist = domain_unmap(domain, start_pfn, last_pfn);
3290
3291         if (intel_iommu_strict) {
3292                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3293                                       last_pfn - start_pfn + 1, !freelist, 0);
3294                 /* free iova */
3295                 __free_iova(&domain->iovad, iova);
3296                 dma_free_pagelist(freelist);
3297         } else {
3298                 add_unmap(domain, iova, freelist);
3299                 /*
3300                  * queue up the release of the unmap to save the 1/6th of the
3301                  * cpu used up by the iotlb flush operation...
3302                  */
3303         }
3304 }
3305
3306 static int intel_nontranslate_map_sg(struct device *hddev,
3307         struct scatterlist *sglist, int nelems, int dir)
3308 {
3309         int i;
3310         struct scatterlist *sg;
3311
3312         for_each_sg(sglist, sg, nelems, i) {
3313                 BUG_ON(!sg_page(sg));
3314                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3315                 sg->dma_length = sg->length;
3316         }
3317         return nelems;
3318 }
3319
3320 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3321                         enum dma_data_direction dir, struct dma_attrs *attrs)
3322 {
3323         int i;
3324         struct dmar_domain *domain;
3325         size_t size = 0;
3326         int prot = 0;
3327         struct iova *iova = NULL;
3328         int ret;
3329         struct scatterlist *sg;
3330         unsigned long start_vpfn;
3331         struct intel_iommu *iommu;
3332
3333         BUG_ON(dir == DMA_NONE);
3334         if (iommu_no_mapping(dev))
3335                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3336
3337         domain = get_valid_domain_for_dev(dev);
3338         if (!domain)
3339                 return 0;
3340
3341         iommu = domain_get_iommu(domain);
3342
3343         for_each_sg(sglist, sg, nelems, i)
3344                 size += aligned_nrpages(sg->offset, sg->length);
3345
3346         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3347                                 *dev->dma_mask);
3348         if (!iova) {
3349                 sglist->dma_length = 0;
3350                 return 0;
3351         }
3352
3353         /*
3354          * Check if DMAR supports zero-length reads on write only
3355          * mappings..
3356          */
3357         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3358                         !cap_zlr(iommu->cap))
3359                 prot |= DMA_PTE_READ;
3360         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3361                 prot |= DMA_PTE_WRITE;
3362
3363         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3364
3365         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3366         if (unlikely(ret)) {
3367                 /*  clear the page */
3368                 dma_pte_clear_range(domain, start_vpfn,
3369                                     start_vpfn + size - 1);
3370                 /* free page tables */
3371                 dma_pte_free_pagetable(domain, start_vpfn,
3372                                        start_vpfn + size - 1);
3373                 /* free iova */
3374                 __free_iova(&domain->iovad, iova);
3375                 return 0;
3376         }
3377
3378         /* it's a non-present to present mapping. Only flush if caching mode */
3379         if (cap_caching_mode(iommu->cap))
3380                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3381         else
3382                 iommu_flush_write_buffer(iommu);
3383
3384         return nelems;
3385 }
3386
3387 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3388 {
3389         return !dma_addr;
3390 }
3391
3392 struct dma_map_ops intel_dma_ops = {
3393         .alloc = intel_alloc_coherent,
3394         .free = intel_free_coherent,
3395         .map_sg = intel_map_sg,
3396         .unmap_sg = intel_unmap_sg,
3397         .map_page = intel_map_page,
3398         .unmap_page = intel_unmap_page,
3399         .mapping_error = intel_mapping_error,
3400 };
3401
3402 static inline int iommu_domain_cache_init(void)
3403 {
3404         int ret = 0;
3405
3406         iommu_domain_cache = kmem_cache_create("iommu_domain",
3407                                          sizeof(struct dmar_domain),
3408                                          0,
3409                                          SLAB_HWCACHE_ALIGN,
3410
3411                                          NULL);
3412         if (!iommu_domain_cache) {
3413                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3414                 ret = -ENOMEM;
3415         }
3416
3417         return ret;
3418 }
3419
3420 static inline int iommu_devinfo_cache_init(void)
3421 {
3422         int ret = 0;
3423
3424         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3425                                          sizeof(struct device_domain_info),
3426                                          0,
3427                                          SLAB_HWCACHE_ALIGN,
3428                                          NULL);
3429         if (!iommu_devinfo_cache) {
3430                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3431                 ret = -ENOMEM;
3432         }
3433
3434         return ret;
3435 }
3436
3437 static inline int iommu_iova_cache_init(void)
3438 {
3439         int ret = 0;
3440
3441         iommu_iova_cache = kmem_cache_create("iommu_iova",
3442                                          sizeof(struct iova),
3443                                          0,
3444                                          SLAB_HWCACHE_ALIGN,
3445                                          NULL);
3446         if (!iommu_iova_cache) {
3447                 printk(KERN_ERR "Couldn't create iova cache\n");
3448                 ret = -ENOMEM;
3449         }
3450
3451         return ret;
3452 }
3453
3454 static int __init iommu_init_mempool(void)
3455 {
3456         int ret;
3457         ret = iommu_iova_cache_init();
3458         if (ret)
3459                 return ret;
3460
3461         ret = iommu_domain_cache_init();
3462         if (ret)
3463                 goto domain_error;
3464
3465         ret = iommu_devinfo_cache_init();
3466         if (!ret)
3467                 return ret;
3468
3469         kmem_cache_destroy(iommu_domain_cache);
3470 domain_error:
3471         kmem_cache_destroy(iommu_iova_cache);
3472
3473         return -ENOMEM;
3474 }
3475
3476 static void __init iommu_exit_mempool(void)
3477 {
3478         kmem_cache_destroy(iommu_devinfo_cache);
3479         kmem_cache_destroy(iommu_domain_cache);
3480         kmem_cache_destroy(iommu_iova_cache);
3481
3482 }
3483
3484 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3485 {
3486         struct dmar_drhd_unit *drhd;
3487         u32 vtbar;
3488         int rc;
3489
3490         /* We know that this device on this chipset has its own IOMMU.
3491          * If we find it under a different IOMMU, then the BIOS is lying
3492          * to us. Hope that the IOMMU for this device is actually
3493          * disabled, and it needs no translation...
3494          */
3495         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3496         if (rc) {
3497                 /* "can't" happen */
3498                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3499                 return;
3500         }
3501         vtbar &= 0xffff0000;
3502
3503         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3504         drhd = dmar_find_matched_drhd_unit(pdev);
3505         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3506                             TAINT_FIRMWARE_WORKAROUND,
3507                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3508                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3509 }
3510 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3511
3512 static void __init init_no_remapping_devices(void)
3513 {
3514         struct dmar_drhd_unit *drhd;
3515         struct device *dev;
3516         int i;
3517
3518         for_each_drhd_unit(drhd) {
3519                 if (!drhd->include_all) {
3520                         for_each_active_dev_scope(drhd->devices,
3521                                                   drhd->devices_cnt, i, dev)
3522                                 break;
3523                         /* ignore DMAR unit if no devices exist */
3524                         if (i == drhd->devices_cnt)
3525                                 drhd->ignored = 1;
3526                 }
3527         }
3528
3529         for_each_active_drhd_unit(drhd) {
3530                 if (drhd->include_all)
3531                         continue;
3532
3533                 for_each_active_dev_scope(drhd->devices,
3534                                           drhd->devices_cnt, i, dev)
3535                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3536                                 break;
3537                 if (i < drhd->devices_cnt)
3538                         continue;
3539
3540                 /* This IOMMU has *only* gfx devices. Either bypass it or
3541                    set the gfx_mapped flag, as appropriate */
3542                 if (dmar_map_gfx) {
3543                         intel_iommu_gfx_mapped = 1;
3544                 } else {
3545                         drhd->ignored = 1;
3546                         for_each_active_dev_scope(drhd->devices,
3547                                                   drhd->devices_cnt, i, dev)
3548                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3549                 }
3550         }
3551 }
3552
3553 #ifdef CONFIG_SUSPEND
3554 static int init_iommu_hw(void)
3555 {
3556         struct dmar_drhd_unit *drhd;
3557         struct intel_iommu *iommu = NULL;
3558
3559         for_each_active_iommu(iommu, drhd)
3560                 if (iommu->qi)
3561                         dmar_reenable_qi(iommu);
3562
3563         for_each_iommu(iommu, drhd) {
3564                 if (drhd->ignored) {
3565                         /*
3566                          * we always have to disable PMRs or DMA may fail on
3567                          * this device
3568                          */
3569                         if (force_on)
3570                                 iommu_disable_protect_mem_regions(iommu);
3571                         continue;
3572                 }
3573         
3574                 iommu_flush_write_buffer(iommu);
3575
3576                 iommu_set_root_entry(iommu);
3577
3578                 iommu->flush.flush_context(iommu, 0, 0, 0,
3579                                            DMA_CCMD_GLOBAL_INVL);
3580                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3581                                          DMA_TLB_GLOBAL_FLUSH);
3582                 if (iommu_enable_translation(iommu))
3583                         return 1;
3584                 iommu_disable_protect_mem_regions(iommu);
3585         }
3586
3587         return 0;
3588 }
3589
3590 static void iommu_flush_all(void)
3591 {
3592         struct dmar_drhd_unit *drhd;
3593         struct intel_iommu *iommu;
3594
3595         for_each_active_iommu(iommu, drhd) {
3596                 iommu->flush.flush_context(iommu, 0, 0, 0,
3597                                            DMA_CCMD_GLOBAL_INVL);
3598                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3599                                          DMA_TLB_GLOBAL_FLUSH);
3600         }
3601 }
3602
3603 static int iommu_suspend(void)
3604 {
3605         struct dmar_drhd_unit *drhd;
3606         struct intel_iommu *iommu = NULL;
3607         unsigned long flag;
3608
3609         for_each_active_iommu(iommu, drhd) {
3610                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3611                                                  GFP_ATOMIC);
3612                 if (!iommu->iommu_state)
3613                         goto nomem;
3614         }
3615
3616         iommu_flush_all();
3617
3618         for_each_active_iommu(iommu, drhd) {
3619                 iommu_disable_translation(iommu);
3620
3621                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3622
3623                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3624                         readl(iommu->reg + DMAR_FECTL_REG);
3625                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3626                         readl(iommu->reg + DMAR_FEDATA_REG);
3627                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3628                         readl(iommu->reg + DMAR_FEADDR_REG);
3629                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3630                         readl(iommu->reg + DMAR_FEUADDR_REG);
3631
3632                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3633         }
3634         return 0;
3635
3636 nomem:
3637         for_each_active_iommu(iommu, drhd)
3638                 kfree(iommu->iommu_state);
3639
3640         return -ENOMEM;
3641 }
3642
3643 static void iommu_resume(void)
3644 {
3645         struct dmar_drhd_unit *drhd;
3646         struct intel_iommu *iommu = NULL;
3647         unsigned long flag;
3648
3649         if (init_iommu_hw()) {
3650                 if (force_on)
3651                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3652                 else
3653                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3654                 return;
3655         }
3656
3657         for_each_active_iommu(iommu, drhd) {
3658
3659                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3660
3661                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3662                         iommu->reg + DMAR_FECTL_REG);
3663                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3664                         iommu->reg + DMAR_FEDATA_REG);
3665                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3666                         iommu->reg + DMAR_FEADDR_REG);
3667                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3668                         iommu->reg + DMAR_FEUADDR_REG);
3669
3670                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3671         }
3672
3673         for_each_active_iommu(iommu, drhd)
3674                 kfree(iommu->iommu_state);
3675 }
3676
3677 static struct syscore_ops iommu_syscore_ops = {
3678         .resume         = iommu_resume,
3679         .suspend        = iommu_suspend,
3680 };
3681
3682 static void __init init_iommu_pm_ops(void)
3683 {
3684         register_syscore_ops(&iommu_syscore_ops);
3685 }
3686
3687 #else
3688 static inline void init_iommu_pm_ops(void) {}
3689 #endif  /* CONFIG_PM */
3690
3691
3692 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3693 {
3694         struct acpi_dmar_reserved_memory *rmrr;
3695         struct dmar_rmrr_unit *rmrru;
3696
3697         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3698         if (!rmrru)
3699                 return -ENOMEM;
3700
3701         rmrru->hdr = header;
3702         rmrr = (struct acpi_dmar_reserved_memory *)header;
3703         rmrru->base_address = rmrr->base_address;
3704         rmrru->end_address = rmrr->end_address;
3705         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3706                                 ((void *)rmrr) + rmrr->header.length,
3707                                 &rmrru->devices_cnt);
3708         if (rmrru->devices_cnt && rmrru->devices == NULL) {
3709                 kfree(rmrru);
3710                 return -ENOMEM;
3711         }
3712
3713         list_add(&rmrru->list, &dmar_rmrr_units);
3714
3715         return 0;
3716 }
3717
3718 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3719 {
3720         struct acpi_dmar_atsr *atsr;
3721         struct dmar_atsr_unit *atsru;
3722
3723         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3724         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3725         if (!atsru)
3726                 return -ENOMEM;
3727
3728         atsru->hdr = hdr;
3729         atsru->include_all = atsr->flags & 0x1;
3730         if (!atsru->include_all) {
3731                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3732                                 (void *)atsr + atsr->header.length,
3733                                 &atsru->devices_cnt);
3734                 if (atsru->devices_cnt && atsru->devices == NULL) {
3735                         kfree(atsru);
3736                         return -ENOMEM;
3737                 }
3738         }
3739
3740         list_add_rcu(&atsru->list, &dmar_atsr_units);
3741
3742         return 0;
3743 }
3744
3745 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3746 {
3747         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3748         kfree(atsru);
3749 }
3750
3751 static void intel_iommu_free_dmars(void)
3752 {
3753         struct dmar_rmrr_unit *rmrru, *rmrr_n;
3754         struct dmar_atsr_unit *atsru, *atsr_n;
3755
3756         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3757                 list_del(&rmrru->list);
3758                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3759                 kfree(rmrru);
3760         }
3761
3762         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3763                 list_del(&atsru->list);
3764                 intel_iommu_free_atsr(atsru);
3765         }
3766 }
3767
3768 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3769 {
3770         int i, ret = 1;
3771         struct pci_bus *bus;
3772         struct pci_dev *bridge = NULL;
3773         struct device *tmp;
3774         struct acpi_dmar_atsr *atsr;
3775         struct dmar_atsr_unit *atsru;
3776
3777         dev = pci_physfn(dev);
3778         for (bus = dev->bus; bus; bus = bus->parent) {
3779                 bridge = bus->self;
3780                 if (!bridge || !pci_is_pcie(bridge) ||
3781                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3782                         return 0;
3783                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3784                         break;
3785         }
3786         if (!bridge)
3787                 return 0;
3788
3789         rcu_read_lock();
3790         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3791                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3792                 if (atsr->segment != pci_domain_nr(dev->bus))
3793                         continue;
3794
3795                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3796                         if (tmp == &bridge->dev)
3797                                 goto out;
3798
3799                 if (atsru->include_all)
3800                         goto out;
3801         }
3802         ret = 0;
3803 out:
3804         rcu_read_unlock();
3805
3806         return ret;
3807 }
3808
3809 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3810 {
3811         int ret = 0;
3812         struct dmar_rmrr_unit *rmrru;
3813         struct dmar_atsr_unit *atsru;
3814         struct acpi_dmar_atsr *atsr;
3815         struct acpi_dmar_reserved_memory *rmrr;
3816
3817         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3818                 return 0;
3819
3820         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3821                 rmrr = container_of(rmrru->hdr,
3822                                     struct acpi_dmar_reserved_memory, header);
3823                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3824                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3825                                 ((void *)rmrr) + rmrr->header.length,
3826                                 rmrr->segment, rmrru->devices,
3827                                 rmrru->devices_cnt);
3828                         if(ret < 0)
3829                                 return ret;
3830                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3831                         dmar_remove_dev_scope(info, rmrr->segment,
3832                                 rmrru->devices, rmrru->devices_cnt);
3833                 }
3834         }
3835
3836         list_for_each_entry(atsru, &dmar_atsr_units, list) {
3837                 if (atsru->include_all)
3838                         continue;
3839
3840                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3841                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3842                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3843                                         (void *)atsr + atsr->header.length,
3844                                         atsr->segment, atsru->devices,
3845                                         atsru->devices_cnt);
3846                         if (ret > 0)
3847                                 break;
3848                         else if(ret < 0)
3849                                 return ret;
3850                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3851                         if (dmar_remove_dev_scope(info, atsr->segment,
3852                                         atsru->devices, atsru->devices_cnt))
3853                                 break;
3854                 }
3855         }
3856
3857         return 0;
3858 }
3859
3860 /*
3861  * Here we only respond to action of unbound device from driver.
3862  *
3863  * Added device is not attached to its DMAR domain here yet. That will happen
3864  * when mapping the device to iova.
3865  */
3866 static int device_notifier(struct notifier_block *nb,
3867                                   unsigned long action, void *data)
3868 {
3869         struct device *dev = data;
3870         struct dmar_domain *domain;
3871
3872         if (iommu_dummy(dev))
3873                 return 0;
3874
3875         if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3876             action != BUS_NOTIFY_DEL_DEVICE)
3877                 return 0;
3878
3879         domain = find_domain(dev);
3880         if (!domain)
3881                 return 0;
3882
3883         down_read(&dmar_global_lock);
3884         domain_remove_one_dev_info(domain, dev);
3885         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
3886                 domain_exit(domain);
3887         up_read(&dmar_global_lock);
3888
3889         return 0;
3890 }
3891
3892 static struct notifier_block device_nb = {
3893         .notifier_call = device_notifier,
3894 };
3895
3896 static int intel_iommu_memory_notifier(struct notifier_block *nb,
3897                                        unsigned long val, void *v)
3898 {
3899         struct memory_notify *mhp = v;
3900         unsigned long long start, end;
3901         unsigned long start_vpfn, last_vpfn;
3902
3903         switch (val) {
3904         case MEM_GOING_ONLINE:
3905                 start = mhp->start_pfn << PAGE_SHIFT;
3906                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3907                 if (iommu_domain_identity_map(si_domain, start, end)) {
3908                         pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3909                                 start, end);
3910                         return NOTIFY_BAD;
3911                 }
3912                 break;
3913
3914         case MEM_OFFLINE:
3915         case MEM_CANCEL_ONLINE:
3916                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3917                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3918                 while (start_vpfn <= last_vpfn) {
3919                         struct iova *iova;
3920                         struct dmar_drhd_unit *drhd;
3921                         struct intel_iommu *iommu;
3922                         struct page *freelist;
3923
3924                         iova = find_iova(&si_domain->iovad, start_vpfn);
3925                         if (iova == NULL) {
3926                                 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3927                                          start_vpfn);
3928                                 break;
3929                         }
3930
3931                         iova = split_and_remove_iova(&si_domain->iovad, iova,
3932                                                      start_vpfn, last_vpfn);
3933                         if (iova == NULL) {
3934                                 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3935                                         start_vpfn, last_vpfn);
3936                                 return NOTIFY_BAD;
3937                         }
3938
3939                         freelist = domain_unmap(si_domain, iova->pfn_lo,
3940                                                iova->pfn_hi);
3941
3942                         rcu_read_lock();
3943                         for_each_active_iommu(iommu, drhd)
3944                                 iommu_flush_iotlb_psi(iommu, si_domain->id,
3945                                         iova->pfn_lo,
3946                                         iova->pfn_hi - iova->pfn_lo + 1,
3947                                         !freelist, 0);
3948                         rcu_read_unlock();
3949                         dma_free_pagelist(freelist);
3950
3951                         start_vpfn = iova->pfn_hi + 1;
3952                         free_iova_mem(iova);
3953                 }
3954                 break;
3955         }
3956
3957         return NOTIFY_OK;
3958 }
3959
3960 static struct notifier_block intel_iommu_memory_nb = {
3961         .notifier_call = intel_iommu_memory_notifier,
3962         .priority = 0
3963 };
3964
3965
3966 static ssize_t intel_iommu_show_version(struct device *dev,
3967                                         struct device_attribute *attr,
3968                                         char *buf)
3969 {
3970         struct intel_iommu *iommu = dev_get_drvdata(dev);
3971         u32 ver = readl(iommu->reg + DMAR_VER_REG);
3972         return sprintf(buf, "%d:%d\n",
3973                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3974 }
3975 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
3976
3977 static ssize_t intel_iommu_show_address(struct device *dev,
3978                                         struct device_attribute *attr,
3979                                         char *buf)
3980 {
3981         struct intel_iommu *iommu = dev_get_drvdata(dev);
3982         return sprintf(buf, "%llx\n", iommu->reg_phys);
3983 }
3984 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
3985
3986 static ssize_t intel_iommu_show_cap(struct device *dev,
3987                                     struct device_attribute *attr,
3988                                     char *buf)
3989 {
3990         struct intel_iommu *iommu = dev_get_drvdata(dev);
3991         return sprintf(buf, "%llx\n", iommu->cap);
3992 }
3993 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
3994
3995 static ssize_t intel_iommu_show_ecap(struct device *dev,
3996                                     struct device_attribute *attr,
3997                                     char *buf)
3998 {
3999         struct intel_iommu *iommu = dev_get_drvdata(dev);
4000         return sprintf(buf, "%llx\n", iommu->ecap);
4001 }
4002 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4003
4004 static struct attribute *intel_iommu_attrs[] = {
4005         &dev_attr_version.attr,
4006         &dev_attr_address.attr,
4007         &dev_attr_cap.attr,
4008         &dev_attr_ecap.attr,
4009         NULL,
4010 };
4011
4012 static struct attribute_group intel_iommu_group = {
4013         .name = "intel-iommu",
4014         .attrs = intel_iommu_attrs,
4015 };
4016
4017 const struct attribute_group *intel_iommu_groups[] = {
4018         &intel_iommu_group,
4019         NULL,
4020 };
4021
4022 int __init intel_iommu_init(void)
4023 {
4024         int ret = -ENODEV;
4025         struct dmar_drhd_unit *drhd;
4026         struct intel_iommu *iommu;
4027
4028         /* VT-d is required for a TXT/tboot launch, so enforce that */
4029         force_on = tboot_force_iommu();
4030
4031         if (iommu_init_mempool()) {
4032                 if (force_on)
4033                         panic("tboot: Failed to initialize iommu memory\n");
4034                 return -ENOMEM;
4035         }
4036
4037         down_write(&dmar_global_lock);
4038         if (dmar_table_init()) {
4039                 if (force_on)
4040                         panic("tboot: Failed to initialize DMAR table\n");
4041                 goto out_free_dmar;
4042         }
4043
4044         /*
4045          * Disable translation if already enabled prior to OS handover.
4046          */
4047         for_each_active_iommu(iommu, drhd)
4048                 if (iommu->gcmd & DMA_GCMD_TE)
4049                         iommu_disable_translation(iommu);
4050
4051         if (dmar_dev_scope_init() < 0) {
4052                 if (force_on)
4053                         panic("tboot: Failed to initialize DMAR device scope\n");
4054                 goto out_free_dmar;
4055         }
4056
4057         if (no_iommu || dmar_disabled)
4058                 goto out_free_dmar;
4059
4060         if (list_empty(&dmar_rmrr_units))
4061                 printk(KERN_INFO "DMAR: No RMRR found\n");
4062
4063         if (list_empty(&dmar_atsr_units))
4064                 printk(KERN_INFO "DMAR: No ATSR found\n");
4065
4066         if (dmar_init_reserved_ranges()) {
4067                 if (force_on)
4068                         panic("tboot: Failed to reserve iommu ranges\n");
4069                 goto out_free_reserved_range;
4070         }
4071
4072         init_no_remapping_devices();
4073
4074         ret = init_dmars();
4075         if (ret) {
4076                 if (force_on)
4077                         panic("tboot: Failed to initialize DMARs\n");
4078                 printk(KERN_ERR "IOMMU: dmar init failed\n");
4079                 goto out_free_reserved_range;
4080         }
4081         up_write(&dmar_global_lock);
4082         printk(KERN_INFO
4083         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4084
4085         init_timer(&unmap_timer);
4086 #ifdef CONFIG_SWIOTLB
4087         swiotlb = 0;
4088 #endif
4089         dma_ops = &intel_dma_ops;
4090
4091         init_iommu_pm_ops();
4092
4093         for_each_active_iommu(iommu, drhd)
4094                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4095                                                        intel_iommu_groups,
4096                                                        iommu->name);
4097
4098         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4099         bus_register_notifier(&pci_bus_type, &device_nb);
4100         if (si_domain && !hw_pass_through)
4101                 register_memory_notifier(&intel_iommu_memory_nb);
4102
4103         intel_iommu_enabled = 1;
4104
4105         return 0;
4106
4107 out_free_reserved_range:
4108         put_iova_domain(&reserved_iova_list);
4109 out_free_dmar:
4110         intel_iommu_free_dmars();
4111         up_write(&dmar_global_lock);
4112         iommu_exit_mempool();
4113         return ret;
4114 }
4115
4116 static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4117 {
4118         struct intel_iommu *iommu = opaque;
4119
4120         iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4121         return 0;
4122 }
4123
4124 /*
4125  * NB - intel-iommu lacks any sort of reference counting for the users of
4126  * dependent devices.  If multiple endpoints have intersecting dependent
4127  * devices, unbinding the driver from any one of them will possibly leave
4128  * the others unable to operate.
4129  */
4130 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4131                                            struct device *dev)
4132 {
4133         if (!iommu || !dev || !dev_is_pci(dev))
4134                 return;
4135
4136         pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
4137 }
4138
4139 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4140                                        struct device *dev)
4141 {
4142         struct device_domain_info *info, *tmp;
4143         struct intel_iommu *iommu;
4144         unsigned long flags;
4145         int found = 0;
4146         u8 bus, devfn;
4147
4148         iommu = device_to_iommu(dev, &bus, &devfn);
4149         if (!iommu)
4150                 return;
4151
4152         spin_lock_irqsave(&device_domain_lock, flags);
4153         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4154                 if (info->iommu == iommu && info->bus == bus &&
4155                     info->devfn == devfn) {
4156                         unlink_domain_info(info);
4157                         spin_unlock_irqrestore(&device_domain_lock, flags);
4158
4159                         iommu_disable_dev_iotlb(info);
4160                         iommu_detach_dev(iommu, info->bus, info->devfn);
4161                         iommu_detach_dependent_devices(iommu, dev);
4162                         free_devinfo_mem(info);
4163
4164                         spin_lock_irqsave(&device_domain_lock, flags);
4165
4166                         if (found)
4167                                 break;
4168                         else
4169                                 continue;
4170                 }
4171
4172                 /* if there is no other devices under the same iommu
4173                  * owned by this domain, clear this iommu in iommu_bmp
4174                  * update iommu count and coherency
4175                  */
4176                 if (info->iommu == iommu)
4177                         found = 1;
4178         }
4179
4180         spin_unlock_irqrestore(&device_domain_lock, flags);
4181
4182         if (found == 0) {
4183                 domain_detach_iommu(domain, iommu);
4184                 if (!domain_type_is_vm_or_si(domain))
4185                         iommu_detach_domain(domain, iommu);
4186         }
4187 }
4188
4189 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4190 {
4191         int adjust_width;
4192
4193         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
4194         domain_reserve_special_ranges(domain);
4195
4196         /* calculate AGAW */
4197         domain->gaw = guest_width;
4198         adjust_width = guestwidth_to_adjustwidth(guest_width);
4199         domain->agaw = width_to_agaw(adjust_width);
4200
4201         domain->iommu_coherency = 0;
4202         domain->iommu_snooping = 0;
4203         domain->iommu_superpage = 0;
4204         domain->max_addr = 0;
4205
4206         /* always allocate the top pgd */
4207         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4208         if (!domain->pgd)
4209                 return -ENOMEM;
4210         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4211         return 0;
4212 }
4213
4214 static int intel_iommu_domain_init(struct iommu_domain *domain)
4215 {
4216         struct dmar_domain *dmar_domain;
4217
4218         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4219         if (!dmar_domain) {
4220                 printk(KERN_ERR
4221                         "intel_iommu_domain_init: dmar_domain == NULL\n");
4222                 return -ENOMEM;
4223         }
4224         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4225                 printk(KERN_ERR
4226                         "intel_iommu_domain_init() failed\n");
4227                 domain_exit(dmar_domain);
4228                 return -ENOMEM;
4229         }
4230         domain_update_iommu_cap(dmar_domain);
4231         domain->priv = dmar_domain;
4232
4233         domain->geometry.aperture_start = 0;
4234         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4235         domain->geometry.force_aperture = true;
4236
4237         return 0;
4238 }
4239
4240 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
4241 {
4242         struct dmar_domain *dmar_domain = domain->priv;
4243
4244         domain->priv = NULL;
4245         domain_exit(dmar_domain);
4246 }
4247
4248 static int intel_iommu_attach_device(struct iommu_domain *domain,
4249                                      struct device *dev)
4250 {
4251         struct dmar_domain *dmar_domain = domain->priv;
4252         struct intel_iommu *iommu;
4253         int addr_width;
4254         u8 bus, devfn;
4255
4256         /* normally dev is not mapped */
4257         if (unlikely(domain_context_mapped(dev))) {
4258                 struct dmar_domain *old_domain;
4259
4260                 old_domain = find_domain(dev);
4261                 if (old_domain) {
4262                         if (domain_type_is_vm_or_si(dmar_domain))
4263                                 domain_remove_one_dev_info(old_domain, dev);
4264                         else
4265                                 domain_remove_dev_info(old_domain);
4266                 }
4267         }
4268
4269         iommu = device_to_iommu(dev, &bus, &devfn);
4270         if (!iommu)
4271                 return -ENODEV;
4272
4273         /* check if this iommu agaw is sufficient for max mapped address */
4274         addr_width = agaw_to_width(iommu->agaw);
4275         if (addr_width > cap_mgaw(iommu->cap))
4276                 addr_width = cap_mgaw(iommu->cap);
4277
4278         if (dmar_domain->max_addr > (1LL << addr_width)) {
4279                 printk(KERN_ERR "%s: iommu width (%d) is not "
4280                        "sufficient for the mapped address (%llx)\n",
4281                        __func__, addr_width, dmar_domain->max_addr);
4282                 return -EFAULT;
4283         }
4284         dmar_domain->gaw = addr_width;
4285
4286         /*
4287          * Knock out extra levels of page tables if necessary
4288          */
4289         while (iommu->agaw < dmar_domain->agaw) {
4290                 struct dma_pte *pte;
4291
4292                 pte = dmar_domain->pgd;
4293                 if (dma_pte_present(pte)) {
4294                         dmar_domain->pgd = (struct dma_pte *)
4295                                 phys_to_virt(dma_pte_addr(pte));
4296                         free_pgtable_page(pte);
4297                 }
4298                 dmar_domain->agaw--;
4299         }
4300
4301         return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4302 }
4303
4304 static void intel_iommu_detach_device(struct iommu_domain *domain,
4305                                       struct device *dev)
4306 {
4307         struct dmar_domain *dmar_domain = domain->priv;
4308
4309         domain_remove_one_dev_info(dmar_domain, dev);
4310 }
4311
4312 static int intel_iommu_map(struct iommu_domain *domain,
4313                            unsigned long iova, phys_addr_t hpa,
4314                            size_t size, int iommu_prot)
4315 {
4316         struct dmar_domain *dmar_domain = domain->priv;
4317         u64 max_addr;
4318         int prot = 0;
4319         int ret;
4320
4321         if (iommu_prot & IOMMU_READ)
4322                 prot |= DMA_PTE_READ;
4323         if (iommu_prot & IOMMU_WRITE)
4324                 prot |= DMA_PTE_WRITE;
4325         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4326                 prot |= DMA_PTE_SNP;
4327
4328         max_addr = iova + size;
4329         if (dmar_domain->max_addr < max_addr) {
4330                 u64 end;
4331
4332                 /* check if minimum agaw is sufficient for mapped address */
4333                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4334                 if (end < max_addr) {
4335                         printk(KERN_ERR "%s: iommu width (%d) is not "
4336                                "sufficient for the mapped address (%llx)\n",
4337                                __func__, dmar_domain->gaw, max_addr);
4338                         return -EFAULT;
4339                 }
4340                 dmar_domain->max_addr = max_addr;
4341         }
4342         /* Round up size to next multiple of PAGE_SIZE, if it and
4343            the low bits of hpa would take us onto the next page */
4344         size = aligned_nrpages(hpa, size);
4345         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4346                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4347         return ret;
4348 }
4349
4350 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4351                                 unsigned long iova, size_t size)
4352 {
4353         struct dmar_domain *dmar_domain = domain->priv;
4354         struct page *freelist = NULL;
4355         struct intel_iommu *iommu;
4356         unsigned long start_pfn, last_pfn;
4357         unsigned int npages;
4358         int iommu_id, num, ndomains, level = 0;
4359
4360         /* Cope with horrid API which requires us to unmap more than the
4361            size argument if it happens to be a large-page mapping. */
4362         if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4363                 BUG();
4364
4365         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4366                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4367
4368         start_pfn = iova >> VTD_PAGE_SHIFT;
4369         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4370
4371         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4372
4373         npages = last_pfn - start_pfn + 1;
4374
4375         for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4376                iommu = g_iommus[iommu_id];
4377
4378                /*
4379                 * find bit position of dmar_domain
4380                 */
4381                ndomains = cap_ndoms(iommu->cap);
4382                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4383                        if (iommu->domains[num] == dmar_domain)
4384                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4385                                                      npages, !freelist, 0);
4386                }
4387
4388         }
4389
4390         dma_free_pagelist(freelist);
4391
4392         if (dmar_domain->max_addr == iova + size)
4393                 dmar_domain->max_addr = iova;
4394
4395         return size;
4396 }
4397
4398 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4399                                             dma_addr_t iova)
4400 {
4401         struct dmar_domain *dmar_domain = domain->priv;
4402         struct dma_pte *pte;
4403         int level = 0;
4404         u64 phys = 0;
4405
4406         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4407         if (pte)
4408                 phys = dma_pte_addr(pte);
4409
4410         return phys;
4411 }
4412
4413 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4414                                       unsigned long cap)
4415 {
4416         struct dmar_domain *dmar_domain = domain->priv;
4417
4418         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4419                 return dmar_domain->iommu_snooping;
4420         if (cap == IOMMU_CAP_INTR_REMAP)
4421                 return irq_remapping_enabled;
4422
4423         return 0;
4424 }
4425
4426 static int intel_iommu_add_device(struct device *dev)
4427 {
4428         struct intel_iommu *iommu;
4429         struct iommu_group *group;
4430         u8 bus, devfn;
4431
4432         iommu = device_to_iommu(dev, &bus, &devfn);
4433         if (!iommu)
4434                 return -ENODEV;
4435
4436         iommu_device_link(iommu->iommu_dev, dev);
4437
4438         group = iommu_group_get_for_dev(dev);
4439
4440         if (IS_ERR(group))
4441                 return PTR_ERR(group);
4442
4443         iommu_group_put(group);
4444         return 0;
4445 }
4446
4447 static void intel_iommu_remove_device(struct device *dev)
4448 {
4449         struct intel_iommu *iommu;
4450         u8 bus, devfn;
4451
4452         iommu = device_to_iommu(dev, &bus, &devfn);
4453         if (!iommu)
4454                 return;
4455
4456         iommu_group_remove_device(dev);
4457
4458         iommu_device_unlink(iommu->iommu_dev, dev);
4459 }
4460
4461 static const struct iommu_ops intel_iommu_ops = {
4462         .domain_init    = intel_iommu_domain_init,
4463         .domain_destroy = intel_iommu_domain_destroy,
4464         .attach_dev     = intel_iommu_attach_device,
4465         .detach_dev     = intel_iommu_detach_device,
4466         .map            = intel_iommu_map,
4467         .unmap          = intel_iommu_unmap,
4468         .iova_to_phys   = intel_iommu_iova_to_phys,
4469         .domain_has_cap = intel_iommu_domain_has_cap,
4470         .add_device     = intel_iommu_add_device,
4471         .remove_device  = intel_iommu_remove_device,
4472         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4473 };
4474
4475 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4476 {
4477         /* G4x/GM45 integrated gfx dmar support is totally busted. */
4478         printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4479         dmar_map_gfx = 0;
4480 }
4481
4482 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4483 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4484 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4485 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4486 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4487 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4488 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4489
4490 static void quirk_iommu_rwbf(struct pci_dev *dev)
4491 {
4492         /*
4493          * Mobile 4 Series Chipset neglects to set RWBF capability,
4494          * but needs it. Same seems to hold for the desktop versions.
4495          */
4496         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4497         rwbf_quirk = 1;
4498 }
4499
4500 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4501 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4502 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4503 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4504 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4505 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4506 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4507
4508 #define GGC 0x52
4509 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4510 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4511 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4512 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4513 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4514 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4515 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4516 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4517
4518 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4519 {
4520         unsigned short ggc;
4521
4522         if (pci_read_config_word(dev, GGC, &ggc))
4523                 return;
4524
4525         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4526                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4527                 dmar_map_gfx = 0;
4528         } else if (dmar_map_gfx) {
4529                 /* we have to ensure the gfx device is idle before we flush */
4530                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4531                 intel_iommu_strict = 1;
4532        }
4533 }
4534 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4535 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4536 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4537 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4538
4539 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4540    ISOCH DMAR unit for the Azalia sound device, but not give it any
4541    TLB entries, which causes it to deadlock. Check for that.  We do
4542    this in a function called from init_dmars(), instead of in a PCI
4543    quirk, because we don't want to print the obnoxious "BIOS broken"
4544    message if VT-d is actually disabled.
4545 */
4546 static void __init check_tylersburg_isoch(void)
4547 {
4548         struct pci_dev *pdev;
4549         uint32_t vtisochctrl;
4550
4551         /* If there's no Azalia in the system anyway, forget it. */
4552         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4553         if (!pdev)
4554                 return;
4555         pci_dev_put(pdev);
4556
4557         /* System Management Registers. Might be hidden, in which case
4558            we can't do the sanity check. But that's OK, because the
4559            known-broken BIOSes _don't_ actually hide it, so far. */
4560         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4561         if (!pdev)
4562                 return;
4563
4564         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4565                 pci_dev_put(pdev);
4566                 return;
4567         }
4568
4569         pci_dev_put(pdev);
4570
4571         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4572         if (vtisochctrl & 1)
4573                 return;
4574
4575         /* Drop all bits other than the number of TLB entries */
4576         vtisochctrl &= 0x1c;
4577
4578         /* If we have the recommended number of TLB entries (16), fine. */
4579         if (vtisochctrl == 0x10)
4580                 return;
4581
4582         /* Zero TLB entries? You get to ride the short bus to school. */
4583         if (!vtisochctrl) {
4584                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4585                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4586                      dmi_get_system_info(DMI_BIOS_VENDOR),
4587                      dmi_get_system_info(DMI_BIOS_VERSION),
4588                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4589                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4590                 return;
4591         }
4592         
4593         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4594                vtisochctrl);
4595 }