0ba078bc0f32e70dd67a3226ac5faefcafcdeba3
[firefly-linux-kernel-4.4.55.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-contiguous.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
46
47 #include "irq_remapping.h"
48
49 #define ROOT_SIZE               VTD_PAGE_SIZE
50 #define CONTEXT_SIZE            VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START      (0xfee00000)
57 #define IOAPIC_RANGE_END        (0xfeefffff)
58 #define IOVA_START_ADDR         (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
64
65 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
71                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
75 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
76 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
77
78 /* page table handling */
79 #define LEVEL_STRIDE            (9)
80 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
81
82 /*
83  * This bitmap is used to advertise the page sizes our hardware support
84  * to the IOMMU core, which will then use this information to split
85  * physically contiguous memory regions it is mapping into page sizes
86  * that we support.
87  *
88  * Traditionally the IOMMU core just handed us the mappings directly,
89  * after making sure the size is an order of a 4KiB page and that the
90  * mapping has natural alignment.
91  *
92  * To retain this behavior, we currently advertise that we support
93  * all page sizes that are an order of 4KiB.
94  *
95  * If at some point we'd like to utilize the IOMMU core's new behavior,
96  * we could change this to advertise the real page sizes we support.
97  */
98 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
99
100 static inline int agaw_to_level(int agaw)
101 {
102         return agaw + 2;
103 }
104
105 static inline int agaw_to_width(int agaw)
106 {
107         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
108 }
109
110 static inline int width_to_agaw(int width)
111 {
112         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
113 }
114
115 static inline unsigned int level_to_offset_bits(int level)
116 {
117         return (level - 1) * LEVEL_STRIDE;
118 }
119
120 static inline int pfn_level_offset(unsigned long pfn, int level)
121 {
122         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123 }
124
125 static inline unsigned long level_mask(int level)
126 {
127         return -1UL << level_to_offset_bits(level);
128 }
129
130 static inline unsigned long level_size(int level)
131 {
132         return 1UL << level_to_offset_bits(level);
133 }
134
135 static inline unsigned long align_to_level(unsigned long pfn, int level)
136 {
137         return (pfn + level_size(level) - 1) & level_mask(level);
138 }
139
140 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141 {
142         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
143 }
144
145 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146    are never going to work. */
147 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148 {
149         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150 }
151
152 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153 {
154         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155 }
156 static inline unsigned long page_to_dma_pfn(struct page *pg)
157 {
158         return mm_to_dma_pfn(page_to_pfn(pg));
159 }
160 static inline unsigned long virt_to_dma_pfn(void *p)
161 {
162         return page_to_dma_pfn(virt_to_page(p));
163 }
164
165 /* global iommu list, set NULL for ignored DMAR units */
166 static struct intel_iommu **g_iommus;
167
168 static void __init check_tylersburg_isoch(void);
169 static int rwbf_quirk;
170
171 /*
172  * set to 1 to panic kernel if can't successfully enable VT-d
173  * (used when kernel is launched w/ TXT)
174  */
175 static int force_on = 0;
176
177 /*
178  * 0: Present
179  * 1-11: Reserved
180  * 12-63: Context Ptr (12 - (haw-1))
181  * 64-127: Reserved
182  */
183 struct root_entry {
184         u64     val;
185         u64     rsvd1;
186 };
187 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 static inline bool root_present(struct root_entry *root)
189 {
190         return (root->val & 1);
191 }
192 static inline void set_root_present(struct root_entry *root)
193 {
194         root->val |= 1;
195 }
196 static inline void set_root_value(struct root_entry *root, unsigned long value)
197 {
198         root->val |= value & VTD_PAGE_MASK;
199 }
200
201 static inline struct context_entry *
202 get_context_addr_from_root(struct root_entry *root)
203 {
204         return (struct context_entry *)
205                 (root_present(root)?phys_to_virt(
206                 root->val & VTD_PAGE_MASK) :
207                 NULL);
208 }
209
210 /*
211  * low 64 bits:
212  * 0: present
213  * 1: fault processing disable
214  * 2-3: translation type
215  * 12-63: address space root
216  * high 64 bits:
217  * 0-2: address width
218  * 3-6: aval
219  * 8-23: domain id
220  */
221 struct context_entry {
222         u64 lo;
223         u64 hi;
224 };
225
226 static inline bool context_present(struct context_entry *context)
227 {
228         return (context->lo & 1);
229 }
230 static inline void context_set_present(struct context_entry *context)
231 {
232         context->lo |= 1;
233 }
234
235 static inline void context_set_fault_enable(struct context_entry *context)
236 {
237         context->lo &= (((u64)-1) << 2) | 1;
238 }
239
240 static inline void context_set_translation_type(struct context_entry *context,
241                                                 unsigned long value)
242 {
243         context->lo &= (((u64)-1) << 4) | 3;
244         context->lo |= (value & 3) << 2;
245 }
246
247 static inline void context_set_address_root(struct context_entry *context,
248                                             unsigned long value)
249 {
250         context->lo |= value & VTD_PAGE_MASK;
251 }
252
253 static inline void context_set_address_width(struct context_entry *context,
254                                              unsigned long value)
255 {
256         context->hi |= value & 7;
257 }
258
259 static inline void context_set_domain_id(struct context_entry *context,
260                                          unsigned long value)
261 {
262         context->hi |= (value & ((1 << 16) - 1)) << 8;
263 }
264
265 static inline void context_clear_entry(struct context_entry *context)
266 {
267         context->lo = 0;
268         context->hi = 0;
269 }
270
271 /*
272  * 0: readable
273  * 1: writable
274  * 2-6: reserved
275  * 7: super page
276  * 8-10: available
277  * 11: snoop behavior
278  * 12-63: Host physcial address
279  */
280 struct dma_pte {
281         u64 val;
282 };
283
284 static inline void dma_clear_pte(struct dma_pte *pte)
285 {
286         pte->val = 0;
287 }
288
289 static inline u64 dma_pte_addr(struct dma_pte *pte)
290 {
291 #ifdef CONFIG_64BIT
292         return pte->val & VTD_PAGE_MASK;
293 #else
294         /* Must have a full atomic 64-bit read */
295         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
296 #endif
297 }
298
299 static inline bool dma_pte_present(struct dma_pte *pte)
300 {
301         return (pte->val & 3) != 0;
302 }
303
304 static inline bool dma_pte_superpage(struct dma_pte *pte)
305 {
306         return (pte->val & DMA_PTE_LARGE_PAGE);
307 }
308
309 static inline int first_pte_in_page(struct dma_pte *pte)
310 {
311         return !((unsigned long)pte & ~VTD_PAGE_MASK);
312 }
313
314 /*
315  * This domain is a statically identity mapping domain.
316  *      1. This domain creats a static 1:1 mapping to all usable memory.
317  *      2. It maps to each iommu if successful.
318  *      3. Each iommu mapps to this domain if successful.
319  */
320 static struct dmar_domain *si_domain;
321 static int hw_pass_through = 1;
322
323 /* domain represents a virtual machine, more than one devices
324  * across iommus may be owned in one domain, e.g. kvm guest.
325  */
326 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
327
328 /* si_domain contains mulitple devices */
329 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
330
331 /* define the limit of IOMMUs supported in each domain */
332 #ifdef  CONFIG_X86
333 # define        IOMMU_UNITS_SUPPORTED   MAX_IO_APICS
334 #else
335 # define        IOMMU_UNITS_SUPPORTED   64
336 #endif
337
338 struct dmar_domain {
339         int     id;                     /* domain id */
340         int     nid;                    /* node id */
341         DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
342                                         /* bitmap of iommus this domain uses*/
343
344         struct list_head devices;       /* all devices' list */
345         struct iova_domain iovad;       /* iova's that belong to this domain */
346
347         struct dma_pte  *pgd;           /* virtual address */
348         int             gaw;            /* max guest address width */
349
350         /* adjusted guest address width, 0 is level 2 30-bit */
351         int             agaw;
352
353         int             flags;          /* flags to find out type of domain */
354
355         int             iommu_coherency;/* indicate coherency of iommu access */
356         int             iommu_snooping; /* indicate snooping control feature*/
357         int             iommu_count;    /* reference count of iommu */
358         int             iommu_superpage;/* Level of superpages supported:
359                                            0 == 4KiB (no superpages), 1 == 2MiB,
360                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
361         spinlock_t      iommu_lock;     /* protect iommu set in domain */
362         u64             max_addr;       /* maximum mapped address */
363 };
364
365 /* PCI domain-device relationship */
366 struct device_domain_info {
367         struct list_head link;  /* link to domain siblings */
368         struct list_head global; /* link to global list */
369         u8 bus;                 /* PCI bus number */
370         u8 devfn;               /* PCI devfn number */
371         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
372         struct intel_iommu *iommu; /* IOMMU used by this device */
373         struct dmar_domain *domain; /* pointer to domain */
374 };
375
376 struct dmar_rmrr_unit {
377         struct list_head list;          /* list of rmrr units   */
378         struct acpi_dmar_header *hdr;   /* ACPI header          */
379         u64     base_address;           /* reserved base address*/
380         u64     end_address;            /* reserved end address */
381         struct dmar_dev_scope *devices; /* target devices */
382         int     devices_cnt;            /* target device count */
383 };
384
385 struct dmar_atsr_unit {
386         struct list_head list;          /* list of ATSR units */
387         struct acpi_dmar_header *hdr;   /* ACPI header */
388         struct dmar_dev_scope *devices; /* target devices */
389         int devices_cnt;                /* target device count */
390         u8 include_all:1;               /* include all ports */
391 };
392
393 static LIST_HEAD(dmar_atsr_units);
394 static LIST_HEAD(dmar_rmrr_units);
395
396 #define for_each_rmrr_units(rmrr) \
397         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
398
399 static void flush_unmaps_timeout(unsigned long data);
400
401 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
402
403 #define HIGH_WATER_MARK 250
404 struct deferred_flush_tables {
405         int next;
406         struct iova *iova[HIGH_WATER_MARK];
407         struct dmar_domain *domain[HIGH_WATER_MARK];
408         struct page *freelist[HIGH_WATER_MARK];
409 };
410
411 static struct deferred_flush_tables *deferred_flush;
412
413 /* bitmap for indexing intel_iommus */
414 static int g_num_of_iommus;
415
416 static DEFINE_SPINLOCK(async_umap_flush_lock);
417 static LIST_HEAD(unmaps_to_do);
418
419 static int timer_on;
420 static long list_size;
421
422 static void domain_exit(struct dmar_domain *domain);
423 static void domain_remove_dev_info(struct dmar_domain *domain);
424 static void domain_remove_one_dev_info(struct dmar_domain *domain,
425                                        struct device *dev);
426 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
427                                            struct device *dev);
428 static int domain_detach_iommu(struct dmar_domain *domain,
429                                struct intel_iommu *iommu);
430
431 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
432 int dmar_disabled = 0;
433 #else
434 int dmar_disabled = 1;
435 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
436
437 int intel_iommu_enabled = 0;
438 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
439
440 static int dmar_map_gfx = 1;
441 static int dmar_forcedac;
442 static int intel_iommu_strict;
443 static int intel_iommu_superpage = 1;
444
445 int intel_iommu_gfx_mapped;
446 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
447
448 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
449 static DEFINE_SPINLOCK(device_domain_lock);
450 static LIST_HEAD(device_domain_list);
451
452 static const struct iommu_ops intel_iommu_ops;
453
454 static int __init intel_iommu_setup(char *str)
455 {
456         if (!str)
457                 return -EINVAL;
458         while (*str) {
459                 if (!strncmp(str, "on", 2)) {
460                         dmar_disabled = 0;
461                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
462                 } else if (!strncmp(str, "off", 3)) {
463                         dmar_disabled = 1;
464                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
465                 } else if (!strncmp(str, "igfx_off", 8)) {
466                         dmar_map_gfx = 0;
467                         printk(KERN_INFO
468                                 "Intel-IOMMU: disable GFX device mapping\n");
469                 } else if (!strncmp(str, "forcedac", 8)) {
470                         printk(KERN_INFO
471                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
472                         dmar_forcedac = 1;
473                 } else if (!strncmp(str, "strict", 6)) {
474                         printk(KERN_INFO
475                                 "Intel-IOMMU: disable batched IOTLB flush\n");
476                         intel_iommu_strict = 1;
477                 } else if (!strncmp(str, "sp_off", 6)) {
478                         printk(KERN_INFO
479                                 "Intel-IOMMU: disable supported super page\n");
480                         intel_iommu_superpage = 0;
481                 }
482
483                 str += strcspn(str, ",");
484                 while (*str == ',')
485                         str++;
486         }
487         return 0;
488 }
489 __setup("intel_iommu=", intel_iommu_setup);
490
491 static struct kmem_cache *iommu_domain_cache;
492 static struct kmem_cache *iommu_devinfo_cache;
493 static struct kmem_cache *iommu_iova_cache;
494
495 static inline void *alloc_pgtable_page(int node)
496 {
497         struct page *page;
498         void *vaddr = NULL;
499
500         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
501         if (page)
502                 vaddr = page_address(page);
503         return vaddr;
504 }
505
506 static inline void free_pgtable_page(void *vaddr)
507 {
508         free_page((unsigned long)vaddr);
509 }
510
511 static inline void *alloc_domain_mem(void)
512 {
513         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
514 }
515
516 static void free_domain_mem(void *vaddr)
517 {
518         kmem_cache_free(iommu_domain_cache, vaddr);
519 }
520
521 static inline void * alloc_devinfo_mem(void)
522 {
523         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
524 }
525
526 static inline void free_devinfo_mem(void *vaddr)
527 {
528         kmem_cache_free(iommu_devinfo_cache, vaddr);
529 }
530
531 struct iova *alloc_iova_mem(void)
532 {
533         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
534 }
535
536 void free_iova_mem(struct iova *iova)
537 {
538         kmem_cache_free(iommu_iova_cache, iova);
539 }
540
541 static inline int domain_type_is_vm(struct dmar_domain *domain)
542 {
543         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
544 }
545
546 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
547 {
548         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
549                                 DOMAIN_FLAG_STATIC_IDENTITY);
550 }
551
552 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
553 {
554         unsigned long sagaw;
555         int agaw = -1;
556
557         sagaw = cap_sagaw(iommu->cap);
558         for (agaw = width_to_agaw(max_gaw);
559              agaw >= 0; agaw--) {
560                 if (test_bit(agaw, &sagaw))
561                         break;
562         }
563
564         return agaw;
565 }
566
567 /*
568  * Calculate max SAGAW for each iommu.
569  */
570 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
571 {
572         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
573 }
574
575 /*
576  * calculate agaw for each iommu.
577  * "SAGAW" may be different across iommus, use a default agaw, and
578  * get a supported less agaw for iommus that don't support the default agaw.
579  */
580 int iommu_calculate_agaw(struct intel_iommu *iommu)
581 {
582         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
583 }
584
585 /* This functionin only returns single iommu in a domain */
586 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
587 {
588         int iommu_id;
589
590         /* si_domain and vm domain should not get here. */
591         BUG_ON(domain_type_is_vm_or_si(domain));
592         iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
593         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
594                 return NULL;
595
596         return g_iommus[iommu_id];
597 }
598
599 static void domain_update_iommu_coherency(struct dmar_domain *domain)
600 {
601         struct dmar_drhd_unit *drhd;
602         struct intel_iommu *iommu;
603         int i, found = 0;
604
605         domain->iommu_coherency = 1;
606
607         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
608                 found = 1;
609                 if (!ecap_coherent(g_iommus[i]->ecap)) {
610                         domain->iommu_coherency = 0;
611                         break;
612                 }
613         }
614         if (found)
615                 return;
616
617         /* No hardware attached; use lowest common denominator */
618         rcu_read_lock();
619         for_each_active_iommu(iommu, drhd) {
620                 if (!ecap_coherent(iommu->ecap)) {
621                         domain->iommu_coherency = 0;
622                         break;
623                 }
624         }
625         rcu_read_unlock();
626 }
627
628 static void domain_update_iommu_snooping(struct dmar_domain *domain)
629 {
630         int i;
631
632         domain->iommu_snooping = 1;
633
634         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
635                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
636                         domain->iommu_snooping = 0;
637                         break;
638                 }
639         }
640 }
641
642 static void domain_update_iommu_superpage(struct dmar_domain *domain)
643 {
644         struct dmar_drhd_unit *drhd;
645         struct intel_iommu *iommu = NULL;
646         int mask = 0xf;
647
648         if (!intel_iommu_superpage) {
649                 domain->iommu_superpage = 0;
650                 return;
651         }
652
653         /* set iommu_superpage to the smallest common denominator */
654         rcu_read_lock();
655         for_each_active_iommu(iommu, drhd) {
656                 mask &= cap_super_page_val(iommu->cap);
657                 if (!mask) {
658                         break;
659                 }
660         }
661         rcu_read_unlock();
662
663         domain->iommu_superpage = fls(mask);
664 }
665
666 /* Some capabilities may be different across iommus */
667 static void domain_update_iommu_cap(struct dmar_domain *domain)
668 {
669         domain_update_iommu_coherency(domain);
670         domain_update_iommu_snooping(domain);
671         domain_update_iommu_superpage(domain);
672 }
673
674 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
675 {
676         struct dmar_drhd_unit *drhd = NULL;
677         struct intel_iommu *iommu;
678         struct device *tmp;
679         struct pci_dev *ptmp, *pdev = NULL;
680         u16 segment = 0;
681         int i;
682
683         if (dev_is_pci(dev)) {
684                 pdev = to_pci_dev(dev);
685                 segment = pci_domain_nr(pdev->bus);
686         } else if (ACPI_COMPANION(dev))
687                 dev = &ACPI_COMPANION(dev)->dev;
688
689         rcu_read_lock();
690         for_each_active_iommu(iommu, drhd) {
691                 if (pdev && segment != drhd->segment)
692                         continue;
693
694                 for_each_active_dev_scope(drhd->devices,
695                                           drhd->devices_cnt, i, tmp) {
696                         if (tmp == dev) {
697                                 *bus = drhd->devices[i].bus;
698                                 *devfn = drhd->devices[i].devfn;
699                                 goto out;
700                         }
701
702                         if (!pdev || !dev_is_pci(tmp))
703                                 continue;
704
705                         ptmp = to_pci_dev(tmp);
706                         if (ptmp->subordinate &&
707                             ptmp->subordinate->number <= pdev->bus->number &&
708                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
709                                 goto got_pdev;
710                 }
711
712                 if (pdev && drhd->include_all) {
713                 got_pdev:
714                         *bus = pdev->bus->number;
715                         *devfn = pdev->devfn;
716                         goto out;
717                 }
718         }
719         iommu = NULL;
720  out:
721         rcu_read_unlock();
722
723         return iommu;
724 }
725
726 static void domain_flush_cache(struct dmar_domain *domain,
727                                void *addr, int size)
728 {
729         if (!domain->iommu_coherency)
730                 clflush_cache_range(addr, size);
731 }
732
733 /* Gets context entry for a given bus and devfn */
734 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
735                 u8 bus, u8 devfn)
736 {
737         struct root_entry *root;
738         struct context_entry *context;
739         unsigned long phy_addr;
740         unsigned long flags;
741
742         spin_lock_irqsave(&iommu->lock, flags);
743         root = &iommu->root_entry[bus];
744         context = get_context_addr_from_root(root);
745         if (!context) {
746                 context = (struct context_entry *)
747                                 alloc_pgtable_page(iommu->node);
748                 if (!context) {
749                         spin_unlock_irqrestore(&iommu->lock, flags);
750                         return NULL;
751                 }
752                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
753                 phy_addr = virt_to_phys((void *)context);
754                 set_root_value(root, phy_addr);
755                 set_root_present(root);
756                 __iommu_flush_cache(iommu, root, sizeof(*root));
757         }
758         spin_unlock_irqrestore(&iommu->lock, flags);
759         return &context[devfn];
760 }
761
762 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
763 {
764         struct root_entry *root;
765         struct context_entry *context;
766         int ret;
767         unsigned long flags;
768
769         spin_lock_irqsave(&iommu->lock, flags);
770         root = &iommu->root_entry[bus];
771         context = get_context_addr_from_root(root);
772         if (!context) {
773                 ret = 0;
774                 goto out;
775         }
776         ret = context_present(&context[devfn]);
777 out:
778         spin_unlock_irqrestore(&iommu->lock, flags);
779         return ret;
780 }
781
782 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
783 {
784         struct root_entry *root;
785         struct context_entry *context;
786         unsigned long flags;
787
788         spin_lock_irqsave(&iommu->lock, flags);
789         root = &iommu->root_entry[bus];
790         context = get_context_addr_from_root(root);
791         if (context) {
792                 context_clear_entry(&context[devfn]);
793                 __iommu_flush_cache(iommu, &context[devfn], \
794                         sizeof(*context));
795         }
796         spin_unlock_irqrestore(&iommu->lock, flags);
797 }
798
799 static void free_context_table(struct intel_iommu *iommu)
800 {
801         struct root_entry *root;
802         int i;
803         unsigned long flags;
804         struct context_entry *context;
805
806         spin_lock_irqsave(&iommu->lock, flags);
807         if (!iommu->root_entry) {
808                 goto out;
809         }
810         for (i = 0; i < ROOT_ENTRY_NR; i++) {
811                 root = &iommu->root_entry[i];
812                 context = get_context_addr_from_root(root);
813                 if (context)
814                         free_pgtable_page(context);
815         }
816         free_pgtable_page(iommu->root_entry);
817         iommu->root_entry = NULL;
818 out:
819         spin_unlock_irqrestore(&iommu->lock, flags);
820 }
821
822 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
823                                       unsigned long pfn, int *target_level)
824 {
825         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
826         struct dma_pte *parent, *pte = NULL;
827         int level = agaw_to_level(domain->agaw);
828         int offset;
829
830         BUG_ON(!domain->pgd);
831
832         if (addr_width < BITS_PER_LONG && pfn >> addr_width)
833                 /* Address beyond IOMMU's addressing capabilities. */
834                 return NULL;
835
836         parent = domain->pgd;
837
838         while (1) {
839                 void *tmp_page;
840
841                 offset = pfn_level_offset(pfn, level);
842                 pte = &parent[offset];
843                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
844                         break;
845                 if (level == *target_level)
846                         break;
847
848                 if (!dma_pte_present(pte)) {
849                         uint64_t pteval;
850
851                         tmp_page = alloc_pgtable_page(domain->nid);
852
853                         if (!tmp_page)
854                                 return NULL;
855
856                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
857                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
858                         if (cmpxchg64(&pte->val, 0ULL, pteval))
859                                 /* Someone else set it while we were thinking; use theirs. */
860                                 free_pgtable_page(tmp_page);
861                         else
862                                 domain_flush_cache(domain, pte, sizeof(*pte));
863                 }
864                 if (level == 1)
865                         break;
866
867                 parent = phys_to_virt(dma_pte_addr(pte));
868                 level--;
869         }
870
871         if (!*target_level)
872                 *target_level = level;
873
874         return pte;
875 }
876
877
878 /* return address's pte at specific level */
879 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
880                                          unsigned long pfn,
881                                          int level, int *large_page)
882 {
883         struct dma_pte *parent, *pte = NULL;
884         int total = agaw_to_level(domain->agaw);
885         int offset;
886
887         parent = domain->pgd;
888         while (level <= total) {
889                 offset = pfn_level_offset(pfn, total);
890                 pte = &parent[offset];
891                 if (level == total)
892                         return pte;
893
894                 if (!dma_pte_present(pte)) {
895                         *large_page = total;
896                         break;
897                 }
898
899                 if (dma_pte_superpage(pte)) {
900                         *large_page = total;
901                         return pte;
902                 }
903
904                 parent = phys_to_virt(dma_pte_addr(pte));
905                 total--;
906         }
907         return NULL;
908 }
909
910 /* clear last level pte, a tlb flush should be followed */
911 static void dma_pte_clear_range(struct dmar_domain *domain,
912                                 unsigned long start_pfn,
913                                 unsigned long last_pfn)
914 {
915         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
916         unsigned int large_page = 1;
917         struct dma_pte *first_pte, *pte;
918
919         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
920         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
921         BUG_ON(start_pfn > last_pfn);
922
923         /* we don't need lock here; nobody else touches the iova range */
924         do {
925                 large_page = 1;
926                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
927                 if (!pte) {
928                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
929                         continue;
930                 }
931                 do {
932                         dma_clear_pte(pte);
933                         start_pfn += lvl_to_nr_pages(large_page);
934                         pte++;
935                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
936
937                 domain_flush_cache(domain, first_pte,
938                                    (void *)pte - (void *)first_pte);
939
940         } while (start_pfn && start_pfn <= last_pfn);
941 }
942
943 static void dma_pte_free_level(struct dmar_domain *domain, int level,
944                                struct dma_pte *pte, unsigned long pfn,
945                                unsigned long start_pfn, unsigned long last_pfn)
946 {
947         pfn = max(start_pfn, pfn);
948         pte = &pte[pfn_level_offset(pfn, level)];
949
950         do {
951                 unsigned long level_pfn;
952                 struct dma_pte *level_pte;
953
954                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
955                         goto next;
956
957                 level_pfn = pfn & level_mask(level - 1);
958                 level_pte = phys_to_virt(dma_pte_addr(pte));
959
960                 if (level > 2)
961                         dma_pte_free_level(domain, level - 1, level_pte,
962                                            level_pfn, start_pfn, last_pfn);
963
964                 /* If range covers entire pagetable, free it */
965                 if (!(start_pfn > level_pfn ||
966                       last_pfn < level_pfn + level_size(level) - 1)) {
967                         dma_clear_pte(pte);
968                         domain_flush_cache(domain, pte, sizeof(*pte));
969                         free_pgtable_page(level_pte);
970                 }
971 next:
972                 pfn += level_size(level);
973         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
974 }
975
976 /* free page table pages. last level pte should already be cleared */
977 static void dma_pte_free_pagetable(struct dmar_domain *domain,
978                                    unsigned long start_pfn,
979                                    unsigned long last_pfn)
980 {
981         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
982
983         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
984         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
985         BUG_ON(start_pfn > last_pfn);
986
987         /* We don't need lock here; nobody else touches the iova range */
988         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
989                            domain->pgd, 0, start_pfn, last_pfn);
990
991         /* free pgd */
992         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
993                 free_pgtable_page(domain->pgd);
994                 domain->pgd = NULL;
995         }
996 }
997
998 /* When a page at a given level is being unlinked from its parent, we don't
999    need to *modify* it at all. All we need to do is make a list of all the
1000    pages which can be freed just as soon as we've flushed the IOTLB and we
1001    know the hardware page-walk will no longer touch them.
1002    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1003    be freed. */
1004 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1005                                             int level, struct dma_pte *pte,
1006                                             struct page *freelist)
1007 {
1008         struct page *pg;
1009
1010         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1011         pg->freelist = freelist;
1012         freelist = pg;
1013
1014         if (level == 1)
1015                 return freelist;
1016
1017         pte = page_address(pg);
1018         do {
1019                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1020                         freelist = dma_pte_list_pagetables(domain, level - 1,
1021                                                            pte, freelist);
1022                 pte++;
1023         } while (!first_pte_in_page(pte));
1024
1025         return freelist;
1026 }
1027
1028 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1029                                         struct dma_pte *pte, unsigned long pfn,
1030                                         unsigned long start_pfn,
1031                                         unsigned long last_pfn,
1032                                         struct page *freelist)
1033 {
1034         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1035
1036         pfn = max(start_pfn, pfn);
1037         pte = &pte[pfn_level_offset(pfn, level)];
1038
1039         do {
1040                 unsigned long level_pfn;
1041
1042                 if (!dma_pte_present(pte))
1043                         goto next;
1044
1045                 level_pfn = pfn & level_mask(level);
1046
1047                 /* If range covers entire pagetable, free it */
1048                 if (start_pfn <= level_pfn &&
1049                     last_pfn >= level_pfn + level_size(level) - 1) {
1050                         /* These suborbinate page tables are going away entirely. Don't
1051                            bother to clear them; we're just going to *free* them. */
1052                         if (level > 1 && !dma_pte_superpage(pte))
1053                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1054
1055                         dma_clear_pte(pte);
1056                         if (!first_pte)
1057                                 first_pte = pte;
1058                         last_pte = pte;
1059                 } else if (level > 1) {
1060                         /* Recurse down into a level that isn't *entirely* obsolete */
1061                         freelist = dma_pte_clear_level(domain, level - 1,
1062                                                        phys_to_virt(dma_pte_addr(pte)),
1063                                                        level_pfn, start_pfn, last_pfn,
1064                                                        freelist);
1065                 }
1066 next:
1067                 pfn += level_size(level);
1068         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1069
1070         if (first_pte)
1071                 domain_flush_cache(domain, first_pte,
1072                                    (void *)++last_pte - (void *)first_pte);
1073
1074         return freelist;
1075 }
1076
1077 /* We can't just free the pages because the IOMMU may still be walking
1078    the page tables, and may have cached the intermediate levels. The
1079    pages can only be freed after the IOTLB flush has been done. */
1080 struct page *domain_unmap(struct dmar_domain *domain,
1081                           unsigned long start_pfn,
1082                           unsigned long last_pfn)
1083 {
1084         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1085         struct page *freelist = NULL;
1086
1087         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1088         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1089         BUG_ON(start_pfn > last_pfn);
1090
1091         /* we don't need lock here; nobody else touches the iova range */
1092         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1093                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1094
1095         /* free pgd */
1096         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1097                 struct page *pgd_page = virt_to_page(domain->pgd);
1098                 pgd_page->freelist = freelist;
1099                 freelist = pgd_page;
1100
1101                 domain->pgd = NULL;
1102         }
1103
1104         return freelist;
1105 }
1106
1107 void dma_free_pagelist(struct page *freelist)
1108 {
1109         struct page *pg;
1110
1111         while ((pg = freelist)) {
1112                 freelist = pg->freelist;
1113                 free_pgtable_page(page_address(pg));
1114         }
1115 }
1116
1117 /* iommu handling */
1118 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1119 {
1120         struct root_entry *root;
1121         unsigned long flags;
1122
1123         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1124         if (!root)
1125                 return -ENOMEM;
1126
1127         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1128
1129         spin_lock_irqsave(&iommu->lock, flags);
1130         iommu->root_entry = root;
1131         spin_unlock_irqrestore(&iommu->lock, flags);
1132
1133         return 0;
1134 }
1135
1136 static void iommu_set_root_entry(struct intel_iommu *iommu)
1137 {
1138         void *addr;
1139         u32 sts;
1140         unsigned long flag;
1141
1142         addr = iommu->root_entry;
1143
1144         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1145         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1146
1147         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1148
1149         /* Make sure hardware complete it */
1150         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1151                       readl, (sts & DMA_GSTS_RTPS), sts);
1152
1153         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1154 }
1155
1156 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1157 {
1158         u32 val;
1159         unsigned long flag;
1160
1161         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1162                 return;
1163
1164         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1165         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1166
1167         /* Make sure hardware complete it */
1168         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1169                       readl, (!(val & DMA_GSTS_WBFS)), val);
1170
1171         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1172 }
1173
1174 /* return value determine if we need a write buffer flush */
1175 static void __iommu_flush_context(struct intel_iommu *iommu,
1176                                   u16 did, u16 source_id, u8 function_mask,
1177                                   u64 type)
1178 {
1179         u64 val = 0;
1180         unsigned long flag;
1181
1182         switch (type) {
1183         case DMA_CCMD_GLOBAL_INVL:
1184                 val = DMA_CCMD_GLOBAL_INVL;
1185                 break;
1186         case DMA_CCMD_DOMAIN_INVL:
1187                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1188                 break;
1189         case DMA_CCMD_DEVICE_INVL:
1190                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1191                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1192                 break;
1193         default:
1194                 BUG();
1195         }
1196         val |= DMA_CCMD_ICC;
1197
1198         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1199         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1200
1201         /* Make sure hardware complete it */
1202         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1203                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1204
1205         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1206 }
1207
1208 /* return value determine if we need a write buffer flush */
1209 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1210                                 u64 addr, unsigned int size_order, u64 type)
1211 {
1212         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1213         u64 val = 0, val_iva = 0;
1214         unsigned long flag;
1215
1216         switch (type) {
1217         case DMA_TLB_GLOBAL_FLUSH:
1218                 /* global flush doesn't need set IVA_REG */
1219                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1220                 break;
1221         case DMA_TLB_DSI_FLUSH:
1222                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1223                 break;
1224         case DMA_TLB_PSI_FLUSH:
1225                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1226                 /* IH bit is passed in as part of address */
1227                 val_iva = size_order | addr;
1228                 break;
1229         default:
1230                 BUG();
1231         }
1232         /* Note: set drain read/write */
1233 #if 0
1234         /*
1235          * This is probably to be super secure.. Looks like we can
1236          * ignore it without any impact.
1237          */
1238         if (cap_read_drain(iommu->cap))
1239                 val |= DMA_TLB_READ_DRAIN;
1240 #endif
1241         if (cap_write_drain(iommu->cap))
1242                 val |= DMA_TLB_WRITE_DRAIN;
1243
1244         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1245         /* Note: Only uses first TLB reg currently */
1246         if (val_iva)
1247                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1248         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1249
1250         /* Make sure hardware complete it */
1251         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1252                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1253
1254         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1255
1256         /* check IOTLB invalidation granularity */
1257         if (DMA_TLB_IAIG(val) == 0)
1258                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1259         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1260                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1261                         (unsigned long long)DMA_TLB_IIRG(type),
1262                         (unsigned long long)DMA_TLB_IAIG(val));
1263 }
1264
1265 static struct device_domain_info *
1266 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1267                          u8 bus, u8 devfn)
1268 {
1269         int found = 0;
1270         unsigned long flags;
1271         struct device_domain_info *info;
1272         struct pci_dev *pdev;
1273
1274         if (!ecap_dev_iotlb_support(iommu->ecap))
1275                 return NULL;
1276
1277         if (!iommu->qi)
1278                 return NULL;
1279
1280         spin_lock_irqsave(&device_domain_lock, flags);
1281         list_for_each_entry(info, &domain->devices, link)
1282                 if (info->iommu == iommu && info->bus == bus &&
1283                     info->devfn == devfn) {
1284                         found = 1;
1285                         break;
1286                 }
1287         spin_unlock_irqrestore(&device_domain_lock, flags);
1288
1289         if (!found || !info->dev || !dev_is_pci(info->dev))
1290                 return NULL;
1291
1292         pdev = to_pci_dev(info->dev);
1293
1294         if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1295                 return NULL;
1296
1297         if (!dmar_find_matched_atsr_unit(pdev))
1298                 return NULL;
1299
1300         return info;
1301 }
1302
1303 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1304 {
1305         if (!info || !dev_is_pci(info->dev))
1306                 return;
1307
1308         pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1309 }
1310
1311 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1312 {
1313         if (!info->dev || !dev_is_pci(info->dev) ||
1314             !pci_ats_enabled(to_pci_dev(info->dev)))
1315                 return;
1316
1317         pci_disable_ats(to_pci_dev(info->dev));
1318 }
1319
1320 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1321                                   u64 addr, unsigned mask)
1322 {
1323         u16 sid, qdep;
1324         unsigned long flags;
1325         struct device_domain_info *info;
1326
1327         spin_lock_irqsave(&device_domain_lock, flags);
1328         list_for_each_entry(info, &domain->devices, link) {
1329                 struct pci_dev *pdev;
1330                 if (!info->dev || !dev_is_pci(info->dev))
1331                         continue;
1332
1333                 pdev = to_pci_dev(info->dev);
1334                 if (!pci_ats_enabled(pdev))
1335                         continue;
1336
1337                 sid = info->bus << 8 | info->devfn;
1338                 qdep = pci_ats_queue_depth(pdev);
1339                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1340         }
1341         spin_unlock_irqrestore(&device_domain_lock, flags);
1342 }
1343
1344 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1345                                   unsigned long pfn, unsigned int pages, int ih, int map)
1346 {
1347         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1348         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1349
1350         BUG_ON(pages == 0);
1351
1352         if (ih)
1353                 ih = 1 << 6;
1354         /*
1355          * Fallback to domain selective flush if no PSI support or the size is
1356          * too big.
1357          * PSI requires page size to be 2 ^ x, and the base address is naturally
1358          * aligned to the size
1359          */
1360         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1361                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1362                                                 DMA_TLB_DSI_FLUSH);
1363         else
1364                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1365                                                 DMA_TLB_PSI_FLUSH);
1366
1367         /*
1368          * In caching mode, changes of pages from non-present to present require
1369          * flush. However, device IOTLB doesn't need to be flushed in this case.
1370          */
1371         if (!cap_caching_mode(iommu->cap) || !map)
1372                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1373 }
1374
1375 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1376 {
1377         u32 pmen;
1378         unsigned long flags;
1379
1380         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1381         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1382         pmen &= ~DMA_PMEN_EPM;
1383         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1384
1385         /* wait for the protected region status bit to clear */
1386         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1387                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1388
1389         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1390 }
1391
1392 static void iommu_enable_translation(struct intel_iommu *iommu)
1393 {
1394         u32 sts;
1395         unsigned long flags;
1396
1397         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1398         iommu->gcmd |= DMA_GCMD_TE;
1399         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1400
1401         /* Make sure hardware complete it */
1402         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1403                       readl, (sts & DMA_GSTS_TES), sts);
1404
1405         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1406 }
1407
1408 static void iommu_disable_translation(struct intel_iommu *iommu)
1409 {
1410         u32 sts;
1411         unsigned long flag;
1412
1413         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1414         iommu->gcmd &= ~DMA_GCMD_TE;
1415         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1416
1417         /* Make sure hardware complete it */
1418         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1419                       readl, (!(sts & DMA_GSTS_TES)), sts);
1420
1421         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1422 }
1423
1424
1425 static int iommu_init_domains(struct intel_iommu *iommu)
1426 {
1427         unsigned long ndomains;
1428         unsigned long nlongs;
1429
1430         ndomains = cap_ndoms(iommu->cap);
1431         pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1432                  iommu->seq_id, ndomains);
1433         nlongs = BITS_TO_LONGS(ndomains);
1434
1435         spin_lock_init(&iommu->lock);
1436
1437         /* TBD: there might be 64K domains,
1438          * consider other allocation for future chip
1439          */
1440         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1441         if (!iommu->domain_ids) {
1442                 pr_err("IOMMU%d: allocating domain id array failed\n",
1443                        iommu->seq_id);
1444                 return -ENOMEM;
1445         }
1446         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1447                         GFP_KERNEL);
1448         if (!iommu->domains) {
1449                 pr_err("IOMMU%d: allocating domain array failed\n",
1450                        iommu->seq_id);
1451                 kfree(iommu->domain_ids);
1452                 iommu->domain_ids = NULL;
1453                 return -ENOMEM;
1454         }
1455
1456         /*
1457          * if Caching mode is set, then invalid translations are tagged
1458          * with domainid 0. Hence we need to pre-allocate it.
1459          */
1460         if (cap_caching_mode(iommu->cap))
1461                 set_bit(0, iommu->domain_ids);
1462         return 0;
1463 }
1464
1465 static void free_dmar_iommu(struct intel_iommu *iommu)
1466 {
1467         struct dmar_domain *domain;
1468         int i;
1469
1470         if ((iommu->domains) && (iommu->domain_ids)) {
1471                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1472                         /*
1473                          * Domain id 0 is reserved for invalid translation
1474                          * if hardware supports caching mode.
1475                          */
1476                         if (cap_caching_mode(iommu->cap) && i == 0)
1477                                 continue;
1478
1479                         domain = iommu->domains[i];
1480                         clear_bit(i, iommu->domain_ids);
1481                         if (domain_detach_iommu(domain, iommu) == 0 &&
1482                             !domain_type_is_vm(domain))
1483                                 domain_exit(domain);
1484                 }
1485         }
1486
1487         if (iommu->gcmd & DMA_GCMD_TE)
1488                 iommu_disable_translation(iommu);
1489
1490         kfree(iommu->domains);
1491         kfree(iommu->domain_ids);
1492         iommu->domains = NULL;
1493         iommu->domain_ids = NULL;
1494
1495         g_iommus[iommu->seq_id] = NULL;
1496
1497         /* free context mapping */
1498         free_context_table(iommu);
1499 }
1500
1501 static struct dmar_domain *alloc_domain(int flags)
1502 {
1503         /* domain id for virtual machine, it won't be set in context */
1504         static atomic_t vm_domid = ATOMIC_INIT(0);
1505         struct dmar_domain *domain;
1506
1507         domain = alloc_domain_mem();
1508         if (!domain)
1509                 return NULL;
1510
1511         memset(domain, 0, sizeof(*domain));
1512         domain->nid = -1;
1513         domain->flags = flags;
1514         spin_lock_init(&domain->iommu_lock);
1515         INIT_LIST_HEAD(&domain->devices);
1516         if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1517                 domain->id = atomic_inc_return(&vm_domid);
1518
1519         return domain;
1520 }
1521
1522 static int __iommu_attach_domain(struct dmar_domain *domain,
1523                                  struct intel_iommu *iommu)
1524 {
1525         int num;
1526         unsigned long ndomains;
1527
1528         ndomains = cap_ndoms(iommu->cap);
1529         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1530         if (num < ndomains) {
1531                 set_bit(num, iommu->domain_ids);
1532                 iommu->domains[num] = domain;
1533         } else {
1534                 num = -ENOSPC;
1535         }
1536
1537         return num;
1538 }
1539
1540 static int iommu_attach_domain(struct dmar_domain *domain,
1541                                struct intel_iommu *iommu)
1542 {
1543         int num;
1544         unsigned long flags;
1545
1546         spin_lock_irqsave(&iommu->lock, flags);
1547         num = __iommu_attach_domain(domain, iommu);
1548         spin_unlock_irqrestore(&iommu->lock, flags);
1549         if (num < 0)
1550                 pr_err("IOMMU: no free domain ids\n");
1551
1552         return num;
1553 }
1554
1555 static int iommu_attach_vm_domain(struct dmar_domain *domain,
1556                                   struct intel_iommu *iommu)
1557 {
1558         int num;
1559         unsigned long ndomains;
1560
1561         ndomains = cap_ndoms(iommu->cap);
1562         for_each_set_bit(num, iommu->domain_ids, ndomains)
1563                 if (iommu->domains[num] == domain)
1564                         return num;
1565
1566         return __iommu_attach_domain(domain, iommu);
1567 }
1568
1569 static void iommu_detach_domain(struct dmar_domain *domain,
1570                                 struct intel_iommu *iommu)
1571 {
1572         unsigned long flags;
1573         int num, ndomains;
1574
1575         spin_lock_irqsave(&iommu->lock, flags);
1576         if (domain_type_is_vm_or_si(domain)) {
1577                 ndomains = cap_ndoms(iommu->cap);
1578                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1579                         if (iommu->domains[num] == domain) {
1580                                 clear_bit(num, iommu->domain_ids);
1581                                 iommu->domains[num] = NULL;
1582                                 break;
1583                         }
1584                 }
1585         } else {
1586                 clear_bit(domain->id, iommu->domain_ids);
1587                 iommu->domains[domain->id] = NULL;
1588         }
1589         spin_unlock_irqrestore(&iommu->lock, flags);
1590 }
1591
1592 static void domain_attach_iommu(struct dmar_domain *domain,
1593                                struct intel_iommu *iommu)
1594 {
1595         unsigned long flags;
1596
1597         spin_lock_irqsave(&domain->iommu_lock, flags);
1598         if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1599                 domain->iommu_count++;
1600                 if (domain->iommu_count == 1)
1601                         domain->nid = iommu->node;
1602                 domain_update_iommu_cap(domain);
1603         }
1604         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1605 }
1606
1607 static int domain_detach_iommu(struct dmar_domain *domain,
1608                                struct intel_iommu *iommu)
1609 {
1610         unsigned long flags;
1611         int count = INT_MAX;
1612
1613         spin_lock_irqsave(&domain->iommu_lock, flags);
1614         if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1615                 count = --domain->iommu_count;
1616                 domain_update_iommu_cap(domain);
1617         }
1618         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1619
1620         return count;
1621 }
1622
1623 static struct iova_domain reserved_iova_list;
1624 static struct lock_class_key reserved_rbtree_key;
1625
1626 static int dmar_init_reserved_ranges(void)
1627 {
1628         struct pci_dev *pdev = NULL;
1629         struct iova *iova;
1630         int i;
1631
1632         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1633
1634         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1635                 &reserved_rbtree_key);
1636
1637         /* IOAPIC ranges shouldn't be accessed by DMA */
1638         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1639                 IOVA_PFN(IOAPIC_RANGE_END));
1640         if (!iova) {
1641                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1642                 return -ENODEV;
1643         }
1644
1645         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1646         for_each_pci_dev(pdev) {
1647                 struct resource *r;
1648
1649                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1650                         r = &pdev->resource[i];
1651                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1652                                 continue;
1653                         iova = reserve_iova(&reserved_iova_list,
1654                                             IOVA_PFN(r->start),
1655                                             IOVA_PFN(r->end));
1656                         if (!iova) {
1657                                 printk(KERN_ERR "Reserve iova failed\n");
1658                                 return -ENODEV;
1659                         }
1660                 }
1661         }
1662         return 0;
1663 }
1664
1665 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1666 {
1667         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1668 }
1669
1670 static inline int guestwidth_to_adjustwidth(int gaw)
1671 {
1672         int agaw;
1673         int r = (gaw - 12) % 9;
1674
1675         if (r == 0)
1676                 agaw = gaw;
1677         else
1678                 agaw = gaw + 9 - r;
1679         if (agaw > 64)
1680                 agaw = 64;
1681         return agaw;
1682 }
1683
1684 static int domain_init(struct dmar_domain *domain, int guest_width)
1685 {
1686         struct intel_iommu *iommu;
1687         int adjust_width, agaw;
1688         unsigned long sagaw;
1689
1690         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1691         domain_reserve_special_ranges(domain);
1692
1693         /* calculate AGAW */
1694         iommu = domain_get_iommu(domain);
1695         if (guest_width > cap_mgaw(iommu->cap))
1696                 guest_width = cap_mgaw(iommu->cap);
1697         domain->gaw = guest_width;
1698         adjust_width = guestwidth_to_adjustwidth(guest_width);
1699         agaw = width_to_agaw(adjust_width);
1700         sagaw = cap_sagaw(iommu->cap);
1701         if (!test_bit(agaw, &sagaw)) {
1702                 /* hardware doesn't support it, choose a bigger one */
1703                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1704                 agaw = find_next_bit(&sagaw, 5, agaw);
1705                 if (agaw >= 5)
1706                         return -ENODEV;
1707         }
1708         domain->agaw = agaw;
1709
1710         if (ecap_coherent(iommu->ecap))
1711                 domain->iommu_coherency = 1;
1712         else
1713                 domain->iommu_coherency = 0;
1714
1715         if (ecap_sc_support(iommu->ecap))
1716                 domain->iommu_snooping = 1;
1717         else
1718                 domain->iommu_snooping = 0;
1719
1720         if (intel_iommu_superpage)
1721                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1722         else
1723                 domain->iommu_superpage = 0;
1724
1725         domain->nid = iommu->node;
1726
1727         /* always allocate the top pgd */
1728         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1729         if (!domain->pgd)
1730                 return -ENOMEM;
1731         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1732         return 0;
1733 }
1734
1735 static void domain_exit(struct dmar_domain *domain)
1736 {
1737         struct dmar_drhd_unit *drhd;
1738         struct intel_iommu *iommu;
1739         struct page *freelist = NULL;
1740
1741         /* Domain 0 is reserved, so dont process it */
1742         if (!domain)
1743                 return;
1744
1745         /* Flush any lazy unmaps that may reference this domain */
1746         if (!intel_iommu_strict)
1747                 flush_unmaps_timeout(0);
1748
1749         /* remove associated devices */
1750         domain_remove_dev_info(domain);
1751
1752         /* destroy iovas */
1753         put_iova_domain(&domain->iovad);
1754
1755         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1756
1757         /* clear attached or cached domains */
1758         rcu_read_lock();
1759         for_each_active_iommu(iommu, drhd)
1760                 iommu_detach_domain(domain, iommu);
1761         rcu_read_unlock();
1762
1763         dma_free_pagelist(freelist);
1764
1765         free_domain_mem(domain);
1766 }
1767
1768 static int domain_context_mapping_one(struct dmar_domain *domain,
1769                                       struct intel_iommu *iommu,
1770                                       u8 bus, u8 devfn, int translation)
1771 {
1772         struct context_entry *context;
1773         unsigned long flags;
1774         struct dma_pte *pgd;
1775         int id;
1776         int agaw;
1777         struct device_domain_info *info = NULL;
1778
1779         pr_debug("Set context mapping for %02x:%02x.%d\n",
1780                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1781
1782         BUG_ON(!domain->pgd);
1783         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1784                translation != CONTEXT_TT_MULTI_LEVEL);
1785
1786         context = device_to_context_entry(iommu, bus, devfn);
1787         if (!context)
1788                 return -ENOMEM;
1789         spin_lock_irqsave(&iommu->lock, flags);
1790         if (context_present(context)) {
1791                 spin_unlock_irqrestore(&iommu->lock, flags);
1792                 return 0;
1793         }
1794
1795         id = domain->id;
1796         pgd = domain->pgd;
1797
1798         if (domain_type_is_vm_or_si(domain)) {
1799                 if (domain_type_is_vm(domain)) {
1800                         id = iommu_attach_vm_domain(domain, iommu);
1801                         if (id < 0) {
1802                                 spin_unlock_irqrestore(&iommu->lock, flags);
1803                                 pr_err("IOMMU: no free domain ids\n");
1804                                 return -EFAULT;
1805                         }
1806                 }
1807
1808                 /* Skip top levels of page tables for
1809                  * iommu which has less agaw than default.
1810                  * Unnecessary for PT mode.
1811                  */
1812                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1813                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1814                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1815                                 if (!dma_pte_present(pgd)) {
1816                                         spin_unlock_irqrestore(&iommu->lock, flags);
1817                                         return -ENOMEM;
1818                                 }
1819                         }
1820                 }
1821         }
1822
1823         context_set_domain_id(context, id);
1824
1825         if (translation != CONTEXT_TT_PASS_THROUGH) {
1826                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1827                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1828                                      CONTEXT_TT_MULTI_LEVEL;
1829         }
1830         /*
1831          * In pass through mode, AW must be programmed to indicate the largest
1832          * AGAW value supported by hardware. And ASR is ignored by hardware.
1833          */
1834         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1835                 context_set_address_width(context, iommu->msagaw);
1836         else {
1837                 context_set_address_root(context, virt_to_phys(pgd));
1838                 context_set_address_width(context, iommu->agaw);
1839         }
1840
1841         context_set_translation_type(context, translation);
1842         context_set_fault_enable(context);
1843         context_set_present(context);
1844         domain_flush_cache(domain, context, sizeof(*context));
1845
1846         /*
1847          * It's a non-present to present mapping. If hardware doesn't cache
1848          * non-present entry we only need to flush the write-buffer. If the
1849          * _does_ cache non-present entries, then it does so in the special
1850          * domain #0, which we have to flush:
1851          */
1852         if (cap_caching_mode(iommu->cap)) {
1853                 iommu->flush.flush_context(iommu, 0,
1854                                            (((u16)bus) << 8) | devfn,
1855                                            DMA_CCMD_MASK_NOBIT,
1856                                            DMA_CCMD_DEVICE_INVL);
1857                 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
1858         } else {
1859                 iommu_flush_write_buffer(iommu);
1860         }
1861         iommu_enable_dev_iotlb(info);
1862         spin_unlock_irqrestore(&iommu->lock, flags);
1863
1864         domain_attach_iommu(domain, iommu);
1865
1866         return 0;
1867 }
1868
1869 struct domain_context_mapping_data {
1870         struct dmar_domain *domain;
1871         struct intel_iommu *iommu;
1872         int translation;
1873 };
1874
1875 static int domain_context_mapping_cb(struct pci_dev *pdev,
1876                                      u16 alias, void *opaque)
1877 {
1878         struct domain_context_mapping_data *data = opaque;
1879
1880         return domain_context_mapping_one(data->domain, data->iommu,
1881                                           PCI_BUS_NUM(alias), alias & 0xff,
1882                                           data->translation);
1883 }
1884
1885 static int
1886 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1887                        int translation)
1888 {
1889         struct intel_iommu *iommu;
1890         u8 bus, devfn;
1891         struct domain_context_mapping_data data;
1892
1893         iommu = device_to_iommu(dev, &bus, &devfn);
1894         if (!iommu)
1895                 return -ENODEV;
1896
1897         if (!dev_is_pci(dev))
1898                 return domain_context_mapping_one(domain, iommu, bus, devfn,
1899                                                   translation);
1900
1901         data.domain = domain;
1902         data.iommu = iommu;
1903         data.translation = translation;
1904
1905         return pci_for_each_dma_alias(to_pci_dev(dev),
1906                                       &domain_context_mapping_cb, &data);
1907 }
1908
1909 static int domain_context_mapped_cb(struct pci_dev *pdev,
1910                                     u16 alias, void *opaque)
1911 {
1912         struct intel_iommu *iommu = opaque;
1913
1914         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
1915 }
1916
1917 static int domain_context_mapped(struct device *dev)
1918 {
1919         struct intel_iommu *iommu;
1920         u8 bus, devfn;
1921
1922         iommu = device_to_iommu(dev, &bus, &devfn);
1923         if (!iommu)
1924                 return -ENODEV;
1925
1926         if (!dev_is_pci(dev))
1927                 return device_context_mapped(iommu, bus, devfn);
1928
1929         return !pci_for_each_dma_alias(to_pci_dev(dev),
1930                                        domain_context_mapped_cb, iommu);
1931 }
1932
1933 /* Returns a number of VTD pages, but aligned to MM page size */
1934 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1935                                             size_t size)
1936 {
1937         host_addr &= ~PAGE_MASK;
1938         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1939 }
1940
1941 /* Return largest possible superpage level for a given mapping */
1942 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1943                                           unsigned long iov_pfn,
1944                                           unsigned long phy_pfn,
1945                                           unsigned long pages)
1946 {
1947         int support, level = 1;
1948         unsigned long pfnmerge;
1949
1950         support = domain->iommu_superpage;
1951
1952         /* To use a large page, the virtual *and* physical addresses
1953            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1954            of them will mean we have to use smaller pages. So just
1955            merge them and check both at once. */
1956         pfnmerge = iov_pfn | phy_pfn;
1957
1958         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1959                 pages >>= VTD_STRIDE_SHIFT;
1960                 if (!pages)
1961                         break;
1962                 pfnmerge >>= VTD_STRIDE_SHIFT;
1963                 level++;
1964                 support--;
1965         }
1966         return level;
1967 }
1968
1969 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1970                             struct scatterlist *sg, unsigned long phys_pfn,
1971                             unsigned long nr_pages, int prot)
1972 {
1973         struct dma_pte *first_pte = NULL, *pte = NULL;
1974         phys_addr_t uninitialized_var(pteval);
1975         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1976         unsigned long sg_res;
1977         unsigned int largepage_lvl = 0;
1978         unsigned long lvl_pages = 0;
1979
1980         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1981
1982         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1983                 return -EINVAL;
1984
1985         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1986
1987         if (sg)
1988                 sg_res = 0;
1989         else {
1990                 sg_res = nr_pages + 1;
1991                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1992         }
1993
1994         while (nr_pages > 0) {
1995                 uint64_t tmp;
1996
1997                 if (!sg_res) {
1998                         sg_res = aligned_nrpages(sg->offset, sg->length);
1999                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2000                         sg->dma_length = sg->length;
2001                         pteval = page_to_phys(sg_page(sg)) | prot;
2002                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2003                 }
2004
2005                 if (!pte) {
2006                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2007
2008                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2009                         if (!pte)
2010                                 return -ENOMEM;
2011                         /* It is large page*/
2012                         if (largepage_lvl > 1) {
2013                                 pteval |= DMA_PTE_LARGE_PAGE;
2014                                 /* Ensure that old small page tables are removed to make room
2015                                    for superpage, if they exist. */
2016                                 dma_pte_clear_range(domain, iov_pfn,
2017                                                     iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2018                                 dma_pte_free_pagetable(domain, iov_pfn,
2019                                                        iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2020                         } else {
2021                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2022                         }
2023
2024                 }
2025                 /* We don't need lock here, nobody else
2026                  * touches the iova range
2027                  */
2028                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2029                 if (tmp) {
2030                         static int dumps = 5;
2031                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2032                                iov_pfn, tmp, (unsigned long long)pteval);
2033                         if (dumps) {
2034                                 dumps--;
2035                                 debug_dma_dump_mappings(NULL);
2036                         }
2037                         WARN_ON(1);
2038                 }
2039
2040                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2041
2042                 BUG_ON(nr_pages < lvl_pages);
2043                 BUG_ON(sg_res < lvl_pages);
2044
2045                 nr_pages -= lvl_pages;
2046                 iov_pfn += lvl_pages;
2047                 phys_pfn += lvl_pages;
2048                 pteval += lvl_pages * VTD_PAGE_SIZE;
2049                 sg_res -= lvl_pages;
2050
2051                 /* If the next PTE would be the first in a new page, then we
2052                    need to flush the cache on the entries we've just written.
2053                    And then we'll need to recalculate 'pte', so clear it and
2054                    let it get set again in the if (!pte) block above.
2055
2056                    If we're done (!nr_pages) we need to flush the cache too.
2057
2058                    Also if we've been setting superpages, we may need to
2059                    recalculate 'pte' and switch back to smaller pages for the
2060                    end of the mapping, if the trailing size is not enough to
2061                    use another superpage (i.e. sg_res < lvl_pages). */
2062                 pte++;
2063                 if (!nr_pages || first_pte_in_page(pte) ||
2064                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2065                         domain_flush_cache(domain, first_pte,
2066                                            (void *)pte - (void *)first_pte);
2067                         pte = NULL;
2068                 }
2069
2070                 if (!sg_res && nr_pages)
2071                         sg = sg_next(sg);
2072         }
2073         return 0;
2074 }
2075
2076 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2077                                     struct scatterlist *sg, unsigned long nr_pages,
2078                                     int prot)
2079 {
2080         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2081 }
2082
2083 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2084                                      unsigned long phys_pfn, unsigned long nr_pages,
2085                                      int prot)
2086 {
2087         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2088 }
2089
2090 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2091 {
2092         if (!iommu)
2093                 return;
2094
2095         clear_context_table(iommu, bus, devfn);
2096         iommu->flush.flush_context(iommu, 0, 0, 0,
2097                                            DMA_CCMD_GLOBAL_INVL);
2098         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2099 }
2100
2101 static inline void unlink_domain_info(struct device_domain_info *info)
2102 {
2103         assert_spin_locked(&device_domain_lock);
2104         list_del(&info->link);
2105         list_del(&info->global);
2106         if (info->dev)
2107                 info->dev->archdata.iommu = NULL;
2108 }
2109
2110 static void domain_remove_dev_info(struct dmar_domain *domain)
2111 {
2112         struct device_domain_info *info, *tmp;
2113         unsigned long flags;
2114
2115         spin_lock_irqsave(&device_domain_lock, flags);
2116         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
2117                 unlink_domain_info(info);
2118                 spin_unlock_irqrestore(&device_domain_lock, flags);
2119
2120                 iommu_disable_dev_iotlb(info);
2121                 iommu_detach_dev(info->iommu, info->bus, info->devfn);
2122
2123                 if (domain_type_is_vm(domain)) {
2124                         iommu_detach_dependent_devices(info->iommu, info->dev);
2125                         domain_detach_iommu(domain, info->iommu);
2126                 }
2127
2128                 free_devinfo_mem(info);
2129                 spin_lock_irqsave(&device_domain_lock, flags);
2130         }
2131         spin_unlock_irqrestore(&device_domain_lock, flags);
2132 }
2133
2134 /*
2135  * find_domain
2136  * Note: we use struct device->archdata.iommu stores the info
2137  */
2138 static struct dmar_domain *find_domain(struct device *dev)
2139 {
2140         struct device_domain_info *info;
2141
2142         /* No lock here, assumes no domain exit in normal case */
2143         info = dev->archdata.iommu;
2144         if (info)
2145                 return info->domain;
2146         return NULL;
2147 }
2148
2149 static inline struct device_domain_info *
2150 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2151 {
2152         struct device_domain_info *info;
2153
2154         list_for_each_entry(info, &device_domain_list, global)
2155                 if (info->iommu->segment == segment && info->bus == bus &&
2156                     info->devfn == devfn)
2157                         return info;
2158
2159         return NULL;
2160 }
2161
2162 static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2163                                                 int bus, int devfn,
2164                                                 struct device *dev,
2165                                                 struct dmar_domain *domain)
2166 {
2167         struct dmar_domain *found = NULL;
2168         struct device_domain_info *info;
2169         unsigned long flags;
2170
2171         info = alloc_devinfo_mem();
2172         if (!info)
2173                 return NULL;
2174
2175         info->bus = bus;
2176         info->devfn = devfn;
2177         info->dev = dev;
2178         info->domain = domain;
2179         info->iommu = iommu;
2180
2181         spin_lock_irqsave(&device_domain_lock, flags);
2182         if (dev)
2183                 found = find_domain(dev);
2184         else {
2185                 struct device_domain_info *info2;
2186                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2187                 if (info2)
2188                         found = info2->domain;
2189         }
2190         if (found) {
2191                 spin_unlock_irqrestore(&device_domain_lock, flags);
2192                 free_devinfo_mem(info);
2193                 /* Caller must free the original domain */
2194                 return found;
2195         }
2196
2197         list_add(&info->link, &domain->devices);
2198         list_add(&info->global, &device_domain_list);
2199         if (dev)
2200                 dev->archdata.iommu = info;
2201         spin_unlock_irqrestore(&device_domain_lock, flags);
2202
2203         return domain;
2204 }
2205
2206 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2207 {
2208         *(u16 *)opaque = alias;
2209         return 0;
2210 }
2211
2212 /* domain is initialized */
2213 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2214 {
2215         struct dmar_domain *domain, *tmp;
2216         struct intel_iommu *iommu;
2217         struct device_domain_info *info;
2218         u16 dma_alias;
2219         unsigned long flags;
2220         u8 bus, devfn;
2221
2222         domain = find_domain(dev);
2223         if (domain)
2224                 return domain;
2225
2226         iommu = device_to_iommu(dev, &bus, &devfn);
2227         if (!iommu)
2228                 return NULL;
2229
2230         if (dev_is_pci(dev)) {
2231                 struct pci_dev *pdev = to_pci_dev(dev);
2232
2233                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2234
2235                 spin_lock_irqsave(&device_domain_lock, flags);
2236                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2237                                                       PCI_BUS_NUM(dma_alias),
2238                                                       dma_alias & 0xff);
2239                 if (info) {
2240                         iommu = info->iommu;
2241                         domain = info->domain;
2242                 }
2243                 spin_unlock_irqrestore(&device_domain_lock, flags);
2244
2245                 /* DMA alias already has a domain, uses it */
2246                 if (info)
2247                         goto found_domain;
2248         }
2249
2250         /* Allocate and initialize new domain for the device */
2251         domain = alloc_domain(0);
2252         if (!domain)
2253                 return NULL;
2254         domain->id = iommu_attach_domain(domain, iommu);
2255         if (domain->id < 0) {
2256                 free_domain_mem(domain);
2257                 return NULL;
2258         }
2259         domain_attach_iommu(domain, iommu);
2260         if (domain_init(domain, gaw)) {
2261                 domain_exit(domain);
2262                 return NULL;
2263         }
2264
2265         /* register PCI DMA alias device */
2266         if (dev_is_pci(dev)) {
2267                 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2268                                            dma_alias & 0xff, NULL, domain);
2269
2270                 if (!tmp || tmp != domain) {
2271                         domain_exit(domain);
2272                         domain = tmp;
2273                 }
2274
2275                 if (!domain)
2276                         return NULL;
2277         }
2278
2279 found_domain:
2280         tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2281
2282         if (!tmp || tmp != domain) {
2283                 domain_exit(domain);
2284                 domain = tmp;
2285         }
2286
2287         return domain;
2288 }
2289
2290 static int iommu_identity_mapping;
2291 #define IDENTMAP_ALL            1
2292 #define IDENTMAP_GFX            2
2293 #define IDENTMAP_AZALIA         4
2294
2295 static int iommu_domain_identity_map(struct dmar_domain *domain,
2296                                      unsigned long long start,
2297                                      unsigned long long end)
2298 {
2299         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2300         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2301
2302         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2303                           dma_to_mm_pfn(last_vpfn))) {
2304                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2305                 return -ENOMEM;
2306         }
2307
2308         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2309                  start, end, domain->id);
2310         /*
2311          * RMRR range might have overlap with physical memory range,
2312          * clear it first
2313          */
2314         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2315
2316         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2317                                   last_vpfn - first_vpfn + 1,
2318                                   DMA_PTE_READ|DMA_PTE_WRITE);
2319 }
2320
2321 static int iommu_prepare_identity_map(struct device *dev,
2322                                       unsigned long long start,
2323                                       unsigned long long end)
2324 {
2325         struct dmar_domain *domain;
2326         int ret;
2327
2328         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2329         if (!domain)
2330                 return -ENOMEM;
2331
2332         /* For _hardware_ passthrough, don't bother. But for software
2333            passthrough, we do it anyway -- it may indicate a memory
2334            range which is reserved in E820, so which didn't get set
2335            up to start with in si_domain */
2336         if (domain == si_domain && hw_pass_through) {
2337                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2338                        dev_name(dev), start, end);
2339                 return 0;
2340         }
2341
2342         printk(KERN_INFO
2343                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2344                dev_name(dev), start, end);
2345         
2346         if (end < start) {
2347                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2348                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2349                         dmi_get_system_info(DMI_BIOS_VENDOR),
2350                         dmi_get_system_info(DMI_BIOS_VERSION),
2351                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2352                 ret = -EIO;
2353                 goto error;
2354         }
2355
2356         if (end >> agaw_to_width(domain->agaw)) {
2357                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2358                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2359                      agaw_to_width(domain->agaw),
2360                      dmi_get_system_info(DMI_BIOS_VENDOR),
2361                      dmi_get_system_info(DMI_BIOS_VERSION),
2362                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2363                 ret = -EIO;
2364                 goto error;
2365         }
2366
2367         ret = iommu_domain_identity_map(domain, start, end);
2368         if (ret)
2369                 goto error;
2370
2371         /* context entry init */
2372         ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2373         if (ret)
2374                 goto error;
2375
2376         return 0;
2377
2378  error:
2379         domain_exit(domain);
2380         return ret;
2381 }
2382
2383 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2384                                          struct device *dev)
2385 {
2386         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2387                 return 0;
2388         return iommu_prepare_identity_map(dev, rmrr->base_address,
2389                                           rmrr->end_address);
2390 }
2391
2392 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2393 static inline void iommu_prepare_isa(void)
2394 {
2395         struct pci_dev *pdev;
2396         int ret;
2397
2398         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2399         if (!pdev)
2400                 return;
2401
2402         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2403         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2404
2405         if (ret)
2406                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2407                        "floppy might not work\n");
2408
2409         pci_dev_put(pdev);
2410 }
2411 #else
2412 static inline void iommu_prepare_isa(void)
2413 {
2414         return;
2415 }
2416 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2417
2418 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2419
2420 static int __init si_domain_init(int hw)
2421 {
2422         struct dmar_drhd_unit *drhd;
2423         struct intel_iommu *iommu;
2424         int nid, ret = 0;
2425         bool first = true;
2426
2427         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2428         if (!si_domain)
2429                 return -EFAULT;
2430
2431         for_each_active_iommu(iommu, drhd) {
2432                 ret = iommu_attach_domain(si_domain, iommu);
2433                 if (ret < 0) {
2434                         domain_exit(si_domain);
2435                         return -EFAULT;
2436                 } else if (first) {
2437                         si_domain->id = ret;
2438                         first = false;
2439                 } else if (si_domain->id != ret) {
2440                         domain_exit(si_domain);
2441                         return -EFAULT;
2442                 }
2443                 domain_attach_iommu(si_domain, iommu);
2444         }
2445
2446         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2447                 domain_exit(si_domain);
2448                 return -EFAULT;
2449         }
2450
2451         pr_debug("IOMMU: identity mapping domain is domain %d\n",
2452                  si_domain->id);
2453
2454         if (hw)
2455                 return 0;
2456
2457         for_each_online_node(nid) {
2458                 unsigned long start_pfn, end_pfn;
2459                 int i;
2460
2461                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2462                         ret = iommu_domain_identity_map(si_domain,
2463                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2464                         if (ret)
2465                                 return ret;
2466                 }
2467         }
2468
2469         return 0;
2470 }
2471
2472 static int identity_mapping(struct device *dev)
2473 {
2474         struct device_domain_info *info;
2475
2476         if (likely(!iommu_identity_mapping))
2477                 return 0;
2478
2479         info = dev->archdata.iommu;
2480         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2481                 return (info->domain == si_domain);
2482
2483         return 0;
2484 }
2485
2486 static int domain_add_dev_info(struct dmar_domain *domain,
2487                                struct device *dev, int translation)
2488 {
2489         struct dmar_domain *ndomain;
2490         struct intel_iommu *iommu;
2491         u8 bus, devfn;
2492         int ret;
2493
2494         iommu = device_to_iommu(dev, &bus, &devfn);
2495         if (!iommu)
2496                 return -ENODEV;
2497
2498         ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2499         if (ndomain != domain)
2500                 return -EBUSY;
2501
2502         ret = domain_context_mapping(domain, dev, translation);
2503         if (ret) {
2504                 domain_remove_one_dev_info(domain, dev);
2505                 return ret;
2506         }
2507
2508         return 0;
2509 }
2510
2511 static bool device_has_rmrr(struct device *dev)
2512 {
2513         struct dmar_rmrr_unit *rmrr;
2514         struct device *tmp;
2515         int i;
2516
2517         rcu_read_lock();
2518         for_each_rmrr_units(rmrr) {
2519                 /*
2520                  * Return TRUE if this RMRR contains the device that
2521                  * is passed in.
2522                  */
2523                 for_each_active_dev_scope(rmrr->devices,
2524                                           rmrr->devices_cnt, i, tmp)
2525                         if (tmp == dev) {
2526                                 rcu_read_unlock();
2527                                 return true;
2528                         }
2529         }
2530         rcu_read_unlock();
2531         return false;
2532 }
2533
2534 static int iommu_should_identity_map(struct device *dev, int startup)
2535 {
2536
2537         if (dev_is_pci(dev)) {
2538                 struct pci_dev *pdev = to_pci_dev(dev);
2539
2540                 /*
2541                  * We want to prevent any device associated with an RMRR from
2542                  * getting placed into the SI Domain. This is done because
2543                  * problems exist when devices are moved in and out of domains
2544                  * and their respective RMRR info is lost. We exempt USB devices
2545                  * from this process due to their usage of RMRRs that are known
2546                  * to not be needed after BIOS hand-off to OS.
2547                  */
2548                 if (device_has_rmrr(dev) &&
2549                     (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2550                         return 0;
2551
2552                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2553                         return 1;
2554
2555                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2556                         return 1;
2557
2558                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2559                         return 0;
2560
2561                 /*
2562                  * We want to start off with all devices in the 1:1 domain, and
2563                  * take them out later if we find they can't access all of memory.
2564                  *
2565                  * However, we can't do this for PCI devices behind bridges,
2566                  * because all PCI devices behind the same bridge will end up
2567                  * with the same source-id on their transactions.
2568                  *
2569                  * Practically speaking, we can't change things around for these
2570                  * devices at run-time, because we can't be sure there'll be no
2571                  * DMA transactions in flight for any of their siblings.
2572                  *
2573                  * So PCI devices (unless they're on the root bus) as well as
2574                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2575                  * the 1:1 domain, just in _case_ one of their siblings turns out
2576                  * not to be able to map all of memory.
2577                  */
2578                 if (!pci_is_pcie(pdev)) {
2579                         if (!pci_is_root_bus(pdev->bus))
2580                                 return 0;
2581                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2582                                 return 0;
2583                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2584                         return 0;
2585         } else {
2586                 if (device_has_rmrr(dev))
2587                         return 0;
2588         }
2589
2590         /*
2591          * At boot time, we don't yet know if devices will be 64-bit capable.
2592          * Assume that they will — if they turn out not to be, then we can
2593          * take them out of the 1:1 domain later.
2594          */
2595         if (!startup) {
2596                 /*
2597                  * If the device's dma_mask is less than the system's memory
2598                  * size then this is not a candidate for identity mapping.
2599                  */
2600                 u64 dma_mask = *dev->dma_mask;
2601
2602                 if (dev->coherent_dma_mask &&
2603                     dev->coherent_dma_mask < dma_mask)
2604                         dma_mask = dev->coherent_dma_mask;
2605
2606                 return dma_mask >= dma_get_required_mask(dev);
2607         }
2608
2609         return 1;
2610 }
2611
2612 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2613 {
2614         int ret;
2615
2616         if (!iommu_should_identity_map(dev, 1))
2617                 return 0;
2618
2619         ret = domain_add_dev_info(si_domain, dev,
2620                                   hw ? CONTEXT_TT_PASS_THROUGH :
2621                                        CONTEXT_TT_MULTI_LEVEL);
2622         if (!ret)
2623                 pr_info("IOMMU: %s identity mapping for device %s\n",
2624                         hw ? "hardware" : "software", dev_name(dev));
2625         else if (ret == -ENODEV)
2626                 /* device not associated with an iommu */
2627                 ret = 0;
2628
2629         return ret;
2630 }
2631
2632
2633 static int __init iommu_prepare_static_identity_mapping(int hw)
2634 {
2635         struct pci_dev *pdev = NULL;
2636         struct dmar_drhd_unit *drhd;
2637         struct intel_iommu *iommu;
2638         struct device *dev;
2639         int i;
2640         int ret = 0;
2641
2642         ret = si_domain_init(hw);
2643         if (ret)
2644                 return -EFAULT;
2645
2646         for_each_pci_dev(pdev) {
2647                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2648                 if (ret)
2649                         return ret;
2650         }
2651
2652         for_each_active_iommu(iommu, drhd)
2653                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2654                         struct acpi_device_physical_node *pn;
2655                         struct acpi_device *adev;
2656
2657                         if (dev->bus != &acpi_bus_type)
2658                                 continue;
2659                                 
2660                         adev= to_acpi_device(dev);
2661                         mutex_lock(&adev->physical_node_lock);
2662                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2663                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2664                                 if (ret)
2665                                         break;
2666                         }
2667                         mutex_unlock(&adev->physical_node_lock);
2668                         if (ret)
2669                                 return ret;
2670                 }
2671
2672         return 0;
2673 }
2674
2675 static int __init init_dmars(void)
2676 {
2677         struct dmar_drhd_unit *drhd;
2678         struct dmar_rmrr_unit *rmrr;
2679         struct device *dev;
2680         struct intel_iommu *iommu;
2681         int i, ret;
2682
2683         /*
2684          * for each drhd
2685          *    allocate root
2686          *    initialize and program root entry to not present
2687          * endfor
2688          */
2689         for_each_drhd_unit(drhd) {
2690                 /*
2691                  * lock not needed as this is only incremented in the single
2692                  * threaded kernel __init code path all other access are read
2693                  * only
2694                  */
2695                 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2696                         g_num_of_iommus++;
2697                         continue;
2698                 }
2699                 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2700                           IOMMU_UNITS_SUPPORTED);
2701         }
2702
2703         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2704                         GFP_KERNEL);
2705         if (!g_iommus) {
2706                 printk(KERN_ERR "Allocating global iommu array failed\n");
2707                 ret = -ENOMEM;
2708                 goto error;
2709         }
2710
2711         deferred_flush = kzalloc(g_num_of_iommus *
2712                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2713         if (!deferred_flush) {
2714                 ret = -ENOMEM;
2715                 goto free_g_iommus;
2716         }
2717
2718         for_each_active_iommu(iommu, drhd) {
2719                 g_iommus[iommu->seq_id] = iommu;
2720
2721                 ret = iommu_init_domains(iommu);
2722                 if (ret)
2723                         goto free_iommu;
2724
2725                 /*
2726                  * TBD:
2727                  * we could share the same root & context tables
2728                  * among all IOMMU's. Need to Split it later.
2729                  */
2730                 ret = iommu_alloc_root_entry(iommu);
2731                 if (ret) {
2732                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2733                         goto free_iommu;
2734                 }
2735                 if (!ecap_pass_through(iommu->ecap))
2736                         hw_pass_through = 0;
2737         }
2738
2739         /*
2740          * Start from the sane iommu hardware state.
2741          */
2742         for_each_active_iommu(iommu, drhd) {
2743                 /*
2744                  * If the queued invalidation is already initialized by us
2745                  * (for example, while enabling interrupt-remapping) then
2746                  * we got the things already rolling from a sane state.
2747                  */
2748                 if (iommu->qi)
2749                         continue;
2750
2751                 /*
2752                  * Clear any previous faults.
2753                  */
2754                 dmar_fault(-1, iommu);
2755                 /*
2756                  * Disable queued invalidation if supported and already enabled
2757                  * before OS handover.
2758                  */
2759                 dmar_disable_qi(iommu);
2760         }
2761
2762         for_each_active_iommu(iommu, drhd) {
2763                 if (dmar_enable_qi(iommu)) {
2764                         /*
2765                          * Queued Invalidate not enabled, use Register Based
2766                          * Invalidate
2767                          */
2768                         iommu->flush.flush_context = __iommu_flush_context;
2769                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2770                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2771                                "invalidation\n",
2772                                 iommu->seq_id,
2773                                (unsigned long long)drhd->reg_base_addr);
2774                 } else {
2775                         iommu->flush.flush_context = qi_flush_context;
2776                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2777                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2778                                "invalidation\n",
2779                                 iommu->seq_id,
2780                                (unsigned long long)drhd->reg_base_addr);
2781                 }
2782         }
2783
2784         if (iommu_pass_through)
2785                 iommu_identity_mapping |= IDENTMAP_ALL;
2786
2787 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2788         iommu_identity_mapping |= IDENTMAP_GFX;
2789 #endif
2790
2791         check_tylersburg_isoch();
2792
2793         /*
2794          * If pass through is not set or not enabled, setup context entries for
2795          * identity mappings for rmrr, gfx, and isa and may fall back to static
2796          * identity mapping if iommu_identity_mapping is set.
2797          */
2798         if (iommu_identity_mapping) {
2799                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2800                 if (ret) {
2801                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2802                         goto free_iommu;
2803                 }
2804         }
2805         /*
2806          * For each rmrr
2807          *   for each dev attached to rmrr
2808          *   do
2809          *     locate drhd for dev, alloc domain for dev
2810          *     allocate free domain
2811          *     allocate page table entries for rmrr
2812          *     if context not allocated for bus
2813          *           allocate and init context
2814          *           set present in root table for this bus
2815          *     init context with domain, translation etc
2816          *    endfor
2817          * endfor
2818          */
2819         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2820         for_each_rmrr_units(rmrr) {
2821                 /* some BIOS lists non-exist devices in DMAR table. */
2822                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2823                                           i, dev) {
2824                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
2825                         if (ret)
2826                                 printk(KERN_ERR
2827                                        "IOMMU: mapping reserved region failed\n");
2828                 }
2829         }
2830
2831         iommu_prepare_isa();
2832
2833         /*
2834          * for each drhd
2835          *   enable fault log
2836          *   global invalidate context cache
2837          *   global invalidate iotlb
2838          *   enable translation
2839          */
2840         for_each_iommu(iommu, drhd) {
2841                 if (drhd->ignored) {
2842                         /*
2843                          * we always have to disable PMRs or DMA may fail on
2844                          * this device
2845                          */
2846                         if (force_on)
2847                                 iommu_disable_protect_mem_regions(iommu);
2848                         continue;
2849                 }
2850
2851                 iommu_flush_write_buffer(iommu);
2852
2853                 ret = dmar_set_interrupt(iommu);
2854                 if (ret)
2855                         goto free_iommu;
2856
2857                 iommu_set_root_entry(iommu);
2858
2859                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2860                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2861                 iommu_enable_translation(iommu);
2862                 iommu_disable_protect_mem_regions(iommu);
2863         }
2864
2865         return 0;
2866
2867 free_iommu:
2868         for_each_active_iommu(iommu, drhd)
2869                 free_dmar_iommu(iommu);
2870         kfree(deferred_flush);
2871 free_g_iommus:
2872         kfree(g_iommus);
2873 error:
2874         return ret;
2875 }
2876
2877 /* This takes a number of _MM_ pages, not VTD pages */
2878 static struct iova *intel_alloc_iova(struct device *dev,
2879                                      struct dmar_domain *domain,
2880                                      unsigned long nrpages, uint64_t dma_mask)
2881 {
2882         struct iova *iova = NULL;
2883
2884         /* Restrict dma_mask to the width that the iommu can handle */
2885         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2886
2887         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2888                 /*
2889                  * First try to allocate an io virtual address in
2890                  * DMA_BIT_MASK(32) and if that fails then try allocating
2891                  * from higher range
2892                  */
2893                 iova = alloc_iova(&domain->iovad, nrpages,
2894                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2895                 if (iova)
2896                         return iova;
2897         }
2898         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2899         if (unlikely(!iova)) {
2900                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2901                        nrpages, dev_name(dev));
2902                 return NULL;
2903         }
2904
2905         return iova;
2906 }
2907
2908 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2909 {
2910         struct dmar_domain *domain;
2911         int ret;
2912
2913         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2914         if (!domain) {
2915                 printk(KERN_ERR "Allocating domain for %s failed",
2916                        dev_name(dev));
2917                 return NULL;
2918         }
2919
2920         /* make sure context mapping is ok */
2921         if (unlikely(!domain_context_mapped(dev))) {
2922                 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2923                 if (ret) {
2924                         printk(KERN_ERR "Domain context map for %s failed",
2925                                dev_name(dev));
2926                         return NULL;
2927                 }
2928         }
2929
2930         return domain;
2931 }
2932
2933 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2934 {
2935         struct device_domain_info *info;
2936
2937         /* No lock here, assumes no domain exit in normal case */
2938         info = dev->archdata.iommu;
2939         if (likely(info))
2940                 return info->domain;
2941
2942         return __get_valid_domain_for_dev(dev);
2943 }
2944
2945 static int iommu_dummy(struct device *dev)
2946 {
2947         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2948 }
2949
2950 /* Check if the dev needs to go through non-identity map and unmap process.*/
2951 static int iommu_no_mapping(struct device *dev)
2952 {
2953         int found;
2954
2955         if (iommu_dummy(dev))
2956                 return 1;
2957
2958         if (!iommu_identity_mapping)
2959                 return 0;
2960
2961         found = identity_mapping(dev);
2962         if (found) {
2963                 if (iommu_should_identity_map(dev, 0))
2964                         return 1;
2965                 else {
2966                         /*
2967                          * 32 bit DMA is removed from si_domain and fall back
2968                          * to non-identity mapping.
2969                          */
2970                         domain_remove_one_dev_info(si_domain, dev);
2971                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2972                                dev_name(dev));
2973                         return 0;
2974                 }
2975         } else {
2976                 /*
2977                  * In case of a detached 64 bit DMA device from vm, the device
2978                  * is put into si_domain for identity mapping.
2979                  */
2980                 if (iommu_should_identity_map(dev, 0)) {
2981                         int ret;
2982                         ret = domain_add_dev_info(si_domain, dev,
2983                                                   hw_pass_through ?
2984                                                   CONTEXT_TT_PASS_THROUGH :
2985                                                   CONTEXT_TT_MULTI_LEVEL);
2986                         if (!ret) {
2987                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2988                                        dev_name(dev));
2989                                 return 1;
2990                         }
2991                 }
2992         }
2993
2994         return 0;
2995 }
2996
2997 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
2998                                      size_t size, int dir, u64 dma_mask)
2999 {
3000         struct dmar_domain *domain;
3001         phys_addr_t start_paddr;
3002         struct iova *iova;
3003         int prot = 0;
3004         int ret;
3005         struct intel_iommu *iommu;
3006         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3007
3008         BUG_ON(dir == DMA_NONE);
3009
3010         if (iommu_no_mapping(dev))
3011                 return paddr;
3012
3013         domain = get_valid_domain_for_dev(dev);
3014         if (!domain)
3015                 return 0;
3016
3017         iommu = domain_get_iommu(domain);
3018         size = aligned_nrpages(paddr, size);
3019
3020         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3021         if (!iova)
3022                 goto error;
3023
3024         /*
3025          * Check if DMAR supports zero-length reads on write only
3026          * mappings..
3027          */
3028         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3029                         !cap_zlr(iommu->cap))
3030                 prot |= DMA_PTE_READ;
3031         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3032                 prot |= DMA_PTE_WRITE;
3033         /*
3034          * paddr - (paddr + size) might be partial page, we should map the whole
3035          * page.  Note: if two part of one page are separately mapped, we
3036          * might have two guest_addr mapping to the same host paddr, but this
3037          * is not a big problem
3038          */
3039         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3040                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3041         if (ret)
3042                 goto error;
3043
3044         /* it's a non-present to present mapping. Only flush if caching mode */
3045         if (cap_caching_mode(iommu->cap))
3046                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3047         else
3048                 iommu_flush_write_buffer(iommu);
3049
3050         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3051         start_paddr += paddr & ~PAGE_MASK;
3052         return start_paddr;
3053
3054 error:
3055         if (iova)
3056                 __free_iova(&domain->iovad, iova);
3057         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3058                 dev_name(dev), size, (unsigned long long)paddr, dir);
3059         return 0;
3060 }
3061
3062 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3063                                  unsigned long offset, size_t size,
3064                                  enum dma_data_direction dir,
3065                                  struct dma_attrs *attrs)
3066 {
3067         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3068                                   dir, *dev->dma_mask);
3069 }
3070
3071 static void flush_unmaps(void)
3072 {
3073         int i, j;
3074
3075         timer_on = 0;
3076
3077         /* just flush them all */
3078         for (i = 0; i < g_num_of_iommus; i++) {
3079                 struct intel_iommu *iommu = g_iommus[i];
3080                 if (!iommu)
3081                         continue;
3082
3083                 if (!deferred_flush[i].next)
3084                         continue;
3085
3086                 /* In caching mode, global flushes turn emulation expensive */
3087                 if (!cap_caching_mode(iommu->cap))
3088                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3089                                          DMA_TLB_GLOBAL_FLUSH);
3090                 for (j = 0; j < deferred_flush[i].next; j++) {
3091                         unsigned long mask;
3092                         struct iova *iova = deferred_flush[i].iova[j];
3093                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3094
3095                         /* On real hardware multiple invalidations are expensive */
3096                         if (cap_caching_mode(iommu->cap))
3097                                 iommu_flush_iotlb_psi(iommu, domain->id,
3098                                         iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3099                                         !deferred_flush[i].freelist[j], 0);
3100                         else {
3101                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3102                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3103                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3104                         }
3105                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3106                         if (deferred_flush[i].freelist[j])
3107                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3108                 }
3109                 deferred_flush[i].next = 0;
3110         }
3111
3112         list_size = 0;
3113 }
3114
3115 static void flush_unmaps_timeout(unsigned long data)
3116 {
3117         unsigned long flags;
3118
3119         spin_lock_irqsave(&async_umap_flush_lock, flags);
3120         flush_unmaps();
3121         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3122 }
3123
3124 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3125 {
3126         unsigned long flags;
3127         int next, iommu_id;
3128         struct intel_iommu *iommu;
3129
3130         spin_lock_irqsave(&async_umap_flush_lock, flags);
3131         if (list_size == HIGH_WATER_MARK)
3132                 flush_unmaps();
3133
3134         iommu = domain_get_iommu(dom);
3135         iommu_id = iommu->seq_id;
3136
3137         next = deferred_flush[iommu_id].next;
3138         deferred_flush[iommu_id].domain[next] = dom;
3139         deferred_flush[iommu_id].iova[next] = iova;
3140         deferred_flush[iommu_id].freelist[next] = freelist;
3141         deferred_flush[iommu_id].next++;
3142
3143         if (!timer_on) {
3144                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3145                 timer_on = 1;
3146         }
3147         list_size++;
3148         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3149 }
3150
3151 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3152                              size_t size, enum dma_data_direction dir,
3153                              struct dma_attrs *attrs)
3154 {
3155         struct dmar_domain *domain;
3156         unsigned long start_pfn, last_pfn;
3157         struct iova *iova;
3158         struct intel_iommu *iommu;
3159         struct page *freelist;
3160
3161         if (iommu_no_mapping(dev))
3162                 return;
3163
3164         domain = find_domain(dev);
3165         BUG_ON(!domain);
3166
3167         iommu = domain_get_iommu(domain);
3168
3169         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3170         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3171                       (unsigned long long)dev_addr))
3172                 return;
3173
3174         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3175         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3176
3177         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3178                  dev_name(dev), start_pfn, last_pfn);
3179
3180         freelist = domain_unmap(domain, start_pfn, last_pfn);
3181
3182         if (intel_iommu_strict) {
3183                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3184                                       last_pfn - start_pfn + 1, !freelist, 0);
3185                 /* free iova */
3186                 __free_iova(&domain->iovad, iova);
3187                 dma_free_pagelist(freelist);
3188         } else {
3189                 add_unmap(domain, iova, freelist);
3190                 /*
3191                  * queue up the release of the unmap to save the 1/6th of the
3192                  * cpu used up by the iotlb flush operation...
3193                  */
3194         }
3195 }
3196
3197 static void *intel_alloc_coherent(struct device *dev, size_t size,
3198                                   dma_addr_t *dma_handle, gfp_t flags,
3199                                   struct dma_attrs *attrs)
3200 {
3201         struct page *page = NULL;
3202         int order;
3203
3204         size = PAGE_ALIGN(size);
3205         order = get_order(size);
3206
3207         if (!iommu_no_mapping(dev))
3208                 flags &= ~(GFP_DMA | GFP_DMA32);
3209         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3210                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3211                         flags |= GFP_DMA;
3212                 else
3213                         flags |= GFP_DMA32;
3214         }
3215
3216         if (flags & __GFP_WAIT) {
3217                 unsigned int count = size >> PAGE_SHIFT;
3218
3219                 page = dma_alloc_from_contiguous(dev, count, order);
3220                 if (page && iommu_no_mapping(dev) &&
3221                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3222                         dma_release_from_contiguous(dev, page, count);
3223                         page = NULL;
3224                 }
3225         }
3226
3227         if (!page)
3228                 page = alloc_pages(flags, order);
3229         if (!page)
3230                 return NULL;
3231         memset(page_address(page), 0, size);
3232
3233         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3234                                          DMA_BIDIRECTIONAL,
3235                                          dev->coherent_dma_mask);
3236         if (*dma_handle)
3237                 return page_address(page);
3238         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3239                 __free_pages(page, order);
3240
3241         return NULL;
3242 }
3243
3244 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3245                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3246 {
3247         int order;
3248         struct page *page = virt_to_page(vaddr);
3249
3250         size = PAGE_ALIGN(size);
3251         order = get_order(size);
3252
3253         intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3254         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3255                 __free_pages(page, order);
3256 }
3257
3258 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3259                            int nelems, enum dma_data_direction dir,
3260                            struct dma_attrs *attrs)
3261 {
3262         struct dmar_domain *domain;
3263         unsigned long start_pfn, last_pfn;
3264         struct iova *iova;
3265         struct intel_iommu *iommu;
3266         struct page *freelist;
3267
3268         if (iommu_no_mapping(dev))
3269                 return;
3270
3271         domain = find_domain(dev);
3272         BUG_ON(!domain);
3273
3274         iommu = domain_get_iommu(domain);
3275
3276         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3277         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3278                       (unsigned long long)sglist[0].dma_address))
3279                 return;
3280
3281         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3282         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3283
3284         freelist = domain_unmap(domain, start_pfn, last_pfn);
3285
3286         if (intel_iommu_strict) {
3287                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3288                                       last_pfn - start_pfn + 1, !freelist, 0);
3289                 /* free iova */
3290                 __free_iova(&domain->iovad, iova);
3291                 dma_free_pagelist(freelist);
3292         } else {
3293                 add_unmap(domain, iova, freelist);
3294                 /*
3295                  * queue up the release of the unmap to save the 1/6th of the
3296                  * cpu used up by the iotlb flush operation...
3297                  */
3298         }
3299 }
3300
3301 static int intel_nontranslate_map_sg(struct device *hddev,
3302         struct scatterlist *sglist, int nelems, int dir)
3303 {
3304         int i;
3305         struct scatterlist *sg;
3306
3307         for_each_sg(sglist, sg, nelems, i) {
3308                 BUG_ON(!sg_page(sg));
3309                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3310                 sg->dma_length = sg->length;
3311         }
3312         return nelems;
3313 }
3314
3315 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3316                         enum dma_data_direction dir, struct dma_attrs *attrs)
3317 {
3318         int i;
3319         struct dmar_domain *domain;
3320         size_t size = 0;
3321         int prot = 0;
3322         struct iova *iova = NULL;
3323         int ret;
3324         struct scatterlist *sg;
3325         unsigned long start_vpfn;
3326         struct intel_iommu *iommu;
3327
3328         BUG_ON(dir == DMA_NONE);
3329         if (iommu_no_mapping(dev))
3330                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3331
3332         domain = get_valid_domain_for_dev(dev);
3333         if (!domain)
3334                 return 0;
3335
3336         iommu = domain_get_iommu(domain);
3337
3338         for_each_sg(sglist, sg, nelems, i)
3339                 size += aligned_nrpages(sg->offset, sg->length);
3340
3341         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3342                                 *dev->dma_mask);
3343         if (!iova) {
3344                 sglist->dma_length = 0;
3345                 return 0;
3346         }
3347
3348         /*
3349          * Check if DMAR supports zero-length reads on write only
3350          * mappings..
3351          */
3352         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3353                         !cap_zlr(iommu->cap))
3354                 prot |= DMA_PTE_READ;
3355         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3356                 prot |= DMA_PTE_WRITE;
3357
3358         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3359
3360         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3361         if (unlikely(ret)) {
3362                 /*  clear the page */
3363                 dma_pte_clear_range(domain, start_vpfn,
3364                                     start_vpfn + size - 1);
3365                 /* free page tables */
3366                 dma_pte_free_pagetable(domain, start_vpfn,
3367                                        start_vpfn + size - 1);
3368                 /* free iova */
3369                 __free_iova(&domain->iovad, iova);
3370                 return 0;
3371         }
3372
3373         /* it's a non-present to present mapping. Only flush if caching mode */
3374         if (cap_caching_mode(iommu->cap))
3375                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3376         else
3377                 iommu_flush_write_buffer(iommu);
3378
3379         return nelems;
3380 }
3381
3382 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3383 {
3384         return !dma_addr;
3385 }
3386
3387 struct dma_map_ops intel_dma_ops = {
3388         .alloc = intel_alloc_coherent,
3389         .free = intel_free_coherent,
3390         .map_sg = intel_map_sg,
3391         .unmap_sg = intel_unmap_sg,
3392         .map_page = intel_map_page,
3393         .unmap_page = intel_unmap_page,
3394         .mapping_error = intel_mapping_error,
3395 };
3396
3397 static inline int iommu_domain_cache_init(void)
3398 {
3399         int ret = 0;
3400
3401         iommu_domain_cache = kmem_cache_create("iommu_domain",
3402                                          sizeof(struct dmar_domain),
3403                                          0,
3404                                          SLAB_HWCACHE_ALIGN,
3405
3406                                          NULL);
3407         if (!iommu_domain_cache) {
3408                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3409                 ret = -ENOMEM;
3410         }
3411
3412         return ret;
3413 }
3414
3415 static inline int iommu_devinfo_cache_init(void)
3416 {
3417         int ret = 0;
3418
3419         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3420                                          sizeof(struct device_domain_info),
3421                                          0,
3422                                          SLAB_HWCACHE_ALIGN,
3423                                          NULL);
3424         if (!iommu_devinfo_cache) {
3425                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3426                 ret = -ENOMEM;
3427         }
3428
3429         return ret;
3430 }
3431
3432 static inline int iommu_iova_cache_init(void)
3433 {
3434         int ret = 0;
3435
3436         iommu_iova_cache = kmem_cache_create("iommu_iova",
3437                                          sizeof(struct iova),
3438                                          0,
3439                                          SLAB_HWCACHE_ALIGN,
3440                                          NULL);
3441         if (!iommu_iova_cache) {
3442                 printk(KERN_ERR "Couldn't create iova cache\n");
3443                 ret = -ENOMEM;
3444         }
3445
3446         return ret;
3447 }
3448
3449 static int __init iommu_init_mempool(void)
3450 {
3451         int ret;
3452         ret = iommu_iova_cache_init();
3453         if (ret)
3454                 return ret;
3455
3456         ret = iommu_domain_cache_init();
3457         if (ret)
3458                 goto domain_error;
3459
3460         ret = iommu_devinfo_cache_init();
3461         if (!ret)
3462                 return ret;
3463
3464         kmem_cache_destroy(iommu_domain_cache);
3465 domain_error:
3466         kmem_cache_destroy(iommu_iova_cache);
3467
3468         return -ENOMEM;
3469 }
3470
3471 static void __init iommu_exit_mempool(void)
3472 {
3473         kmem_cache_destroy(iommu_devinfo_cache);
3474         kmem_cache_destroy(iommu_domain_cache);
3475         kmem_cache_destroy(iommu_iova_cache);
3476
3477 }
3478
3479 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3480 {
3481         struct dmar_drhd_unit *drhd;
3482         u32 vtbar;
3483         int rc;
3484
3485         /* We know that this device on this chipset has its own IOMMU.
3486          * If we find it under a different IOMMU, then the BIOS is lying
3487          * to us. Hope that the IOMMU for this device is actually
3488          * disabled, and it needs no translation...
3489          */
3490         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3491         if (rc) {
3492                 /* "can't" happen */
3493                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3494                 return;
3495         }
3496         vtbar &= 0xffff0000;
3497
3498         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3499         drhd = dmar_find_matched_drhd_unit(pdev);
3500         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3501                             TAINT_FIRMWARE_WORKAROUND,
3502                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3503                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3504 }
3505 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3506
3507 static void __init init_no_remapping_devices(void)
3508 {
3509         struct dmar_drhd_unit *drhd;
3510         struct device *dev;
3511         int i;
3512
3513         for_each_drhd_unit(drhd) {
3514                 if (!drhd->include_all) {
3515                         for_each_active_dev_scope(drhd->devices,
3516                                                   drhd->devices_cnt, i, dev)
3517                                 break;
3518                         /* ignore DMAR unit if no devices exist */
3519                         if (i == drhd->devices_cnt)
3520                                 drhd->ignored = 1;
3521                 }
3522         }
3523
3524         for_each_active_drhd_unit(drhd) {
3525                 if (drhd->include_all)
3526                         continue;
3527
3528                 for_each_active_dev_scope(drhd->devices,
3529                                           drhd->devices_cnt, i, dev)
3530                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3531                                 break;
3532                 if (i < drhd->devices_cnt)
3533                         continue;
3534
3535                 /* This IOMMU has *only* gfx devices. Either bypass it or
3536                    set the gfx_mapped flag, as appropriate */
3537                 if (dmar_map_gfx) {
3538                         intel_iommu_gfx_mapped = 1;
3539                 } else {
3540                         drhd->ignored = 1;
3541                         for_each_active_dev_scope(drhd->devices,
3542                                                   drhd->devices_cnt, i, dev)
3543                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3544                 }
3545         }
3546 }
3547
3548 #ifdef CONFIG_SUSPEND
3549 static int init_iommu_hw(void)
3550 {
3551         struct dmar_drhd_unit *drhd;
3552         struct intel_iommu *iommu = NULL;
3553
3554         for_each_active_iommu(iommu, drhd)
3555                 if (iommu->qi)
3556                         dmar_reenable_qi(iommu);
3557
3558         for_each_iommu(iommu, drhd) {
3559                 if (drhd->ignored) {
3560                         /*
3561                          * we always have to disable PMRs or DMA may fail on
3562                          * this device
3563                          */
3564                         if (force_on)
3565                                 iommu_disable_protect_mem_regions(iommu);
3566                         continue;
3567                 }
3568         
3569                 iommu_flush_write_buffer(iommu);
3570
3571                 iommu_set_root_entry(iommu);
3572
3573                 iommu->flush.flush_context(iommu, 0, 0, 0,
3574                                            DMA_CCMD_GLOBAL_INVL);
3575                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3576                 iommu_enable_translation(iommu);
3577                 iommu_disable_protect_mem_regions(iommu);
3578         }
3579
3580         return 0;
3581 }
3582
3583 static void iommu_flush_all(void)
3584 {
3585         struct dmar_drhd_unit *drhd;
3586         struct intel_iommu *iommu;
3587
3588         for_each_active_iommu(iommu, drhd) {
3589                 iommu->flush.flush_context(iommu, 0, 0, 0,
3590                                            DMA_CCMD_GLOBAL_INVL);
3591                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3592                                          DMA_TLB_GLOBAL_FLUSH);
3593         }
3594 }
3595
3596 static int iommu_suspend(void)
3597 {
3598         struct dmar_drhd_unit *drhd;
3599         struct intel_iommu *iommu = NULL;
3600         unsigned long flag;
3601
3602         for_each_active_iommu(iommu, drhd) {
3603                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3604                                                  GFP_ATOMIC);
3605                 if (!iommu->iommu_state)
3606                         goto nomem;
3607         }
3608
3609         iommu_flush_all();
3610
3611         for_each_active_iommu(iommu, drhd) {
3612                 iommu_disable_translation(iommu);
3613
3614                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3615
3616                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3617                         readl(iommu->reg + DMAR_FECTL_REG);
3618                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3619                         readl(iommu->reg + DMAR_FEDATA_REG);
3620                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3621                         readl(iommu->reg + DMAR_FEADDR_REG);
3622                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3623                         readl(iommu->reg + DMAR_FEUADDR_REG);
3624
3625                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3626         }
3627         return 0;
3628
3629 nomem:
3630         for_each_active_iommu(iommu, drhd)
3631                 kfree(iommu->iommu_state);
3632
3633         return -ENOMEM;
3634 }
3635
3636 static void iommu_resume(void)
3637 {
3638         struct dmar_drhd_unit *drhd;
3639         struct intel_iommu *iommu = NULL;
3640         unsigned long flag;
3641
3642         if (init_iommu_hw()) {
3643                 if (force_on)
3644                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3645                 else
3646                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3647                 return;
3648         }
3649
3650         for_each_active_iommu(iommu, drhd) {
3651
3652                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3653
3654                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3655                         iommu->reg + DMAR_FECTL_REG);
3656                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3657                         iommu->reg + DMAR_FEDATA_REG);
3658                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3659                         iommu->reg + DMAR_FEADDR_REG);
3660                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3661                         iommu->reg + DMAR_FEUADDR_REG);
3662
3663                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3664         }
3665
3666         for_each_active_iommu(iommu, drhd)
3667                 kfree(iommu->iommu_state);
3668 }
3669
3670 static struct syscore_ops iommu_syscore_ops = {
3671         .resume         = iommu_resume,
3672         .suspend        = iommu_suspend,
3673 };
3674
3675 static void __init init_iommu_pm_ops(void)
3676 {
3677         register_syscore_ops(&iommu_syscore_ops);
3678 }
3679
3680 #else
3681 static inline void init_iommu_pm_ops(void) {}
3682 #endif  /* CONFIG_PM */
3683
3684
3685 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3686 {
3687         struct acpi_dmar_reserved_memory *rmrr;
3688         struct dmar_rmrr_unit *rmrru;
3689
3690         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3691         if (!rmrru)
3692                 return -ENOMEM;
3693
3694         rmrru->hdr = header;
3695         rmrr = (struct acpi_dmar_reserved_memory *)header;
3696         rmrru->base_address = rmrr->base_address;
3697         rmrru->end_address = rmrr->end_address;
3698         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3699                                 ((void *)rmrr) + rmrr->header.length,
3700                                 &rmrru->devices_cnt);
3701         if (rmrru->devices_cnt && rmrru->devices == NULL) {
3702                 kfree(rmrru);
3703                 return -ENOMEM;
3704         }
3705
3706         list_add(&rmrru->list, &dmar_rmrr_units);
3707
3708         return 0;
3709 }
3710
3711 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3712 {
3713         struct acpi_dmar_atsr *atsr;
3714         struct dmar_atsr_unit *atsru;
3715
3716         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3717         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3718         if (!atsru)
3719                 return -ENOMEM;
3720
3721         atsru->hdr = hdr;
3722         atsru->include_all = atsr->flags & 0x1;
3723         if (!atsru->include_all) {
3724                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3725                                 (void *)atsr + atsr->header.length,
3726                                 &atsru->devices_cnt);
3727                 if (atsru->devices_cnt && atsru->devices == NULL) {
3728                         kfree(atsru);
3729                         return -ENOMEM;
3730                 }
3731         }
3732
3733         list_add_rcu(&atsru->list, &dmar_atsr_units);
3734
3735         return 0;
3736 }
3737
3738 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3739 {
3740         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3741         kfree(atsru);
3742 }
3743
3744 static void intel_iommu_free_dmars(void)
3745 {
3746         struct dmar_rmrr_unit *rmrru, *rmrr_n;
3747         struct dmar_atsr_unit *atsru, *atsr_n;
3748
3749         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3750                 list_del(&rmrru->list);
3751                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3752                 kfree(rmrru);
3753         }
3754
3755         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3756                 list_del(&atsru->list);
3757                 intel_iommu_free_atsr(atsru);
3758         }
3759 }
3760
3761 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3762 {
3763         int i, ret = 1;
3764         struct pci_bus *bus;
3765         struct pci_dev *bridge = NULL;
3766         struct device *tmp;
3767         struct acpi_dmar_atsr *atsr;
3768         struct dmar_atsr_unit *atsru;
3769
3770         dev = pci_physfn(dev);
3771         for (bus = dev->bus; bus; bus = bus->parent) {
3772                 bridge = bus->self;
3773                 if (!bridge || !pci_is_pcie(bridge) ||
3774                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3775                         return 0;
3776                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3777                         break;
3778         }
3779         if (!bridge)
3780                 return 0;
3781
3782         rcu_read_lock();
3783         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3784                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3785                 if (atsr->segment != pci_domain_nr(dev->bus))
3786                         continue;
3787
3788                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3789                         if (tmp == &bridge->dev)
3790                                 goto out;
3791
3792                 if (atsru->include_all)
3793                         goto out;
3794         }
3795         ret = 0;
3796 out:
3797         rcu_read_unlock();
3798
3799         return ret;
3800 }
3801
3802 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3803 {
3804         int ret = 0;
3805         struct dmar_rmrr_unit *rmrru;
3806         struct dmar_atsr_unit *atsru;
3807         struct acpi_dmar_atsr *atsr;
3808         struct acpi_dmar_reserved_memory *rmrr;
3809
3810         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3811                 return 0;
3812
3813         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3814                 rmrr = container_of(rmrru->hdr,
3815                                     struct acpi_dmar_reserved_memory, header);
3816                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3817                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3818                                 ((void *)rmrr) + rmrr->header.length,
3819                                 rmrr->segment, rmrru->devices,
3820                                 rmrru->devices_cnt);
3821                         if(ret < 0)
3822                                 return ret;
3823                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3824                         dmar_remove_dev_scope(info, rmrr->segment,
3825                                 rmrru->devices, rmrru->devices_cnt);
3826                 }
3827         }
3828
3829         list_for_each_entry(atsru, &dmar_atsr_units, list) {
3830                 if (atsru->include_all)
3831                         continue;
3832
3833                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3834                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3835                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3836                                         (void *)atsr + atsr->header.length,
3837                                         atsr->segment, atsru->devices,
3838                                         atsru->devices_cnt);
3839                         if (ret > 0)
3840                                 break;
3841                         else if(ret < 0)
3842                                 return ret;
3843                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3844                         if (dmar_remove_dev_scope(info, atsr->segment,
3845                                         atsru->devices, atsru->devices_cnt))
3846                                 break;
3847                 }
3848         }
3849
3850         return 0;
3851 }
3852
3853 /*
3854  * Here we only respond to action of unbound device from driver.
3855  *
3856  * Added device is not attached to its DMAR domain here yet. That will happen
3857  * when mapping the device to iova.
3858  */
3859 static int device_notifier(struct notifier_block *nb,
3860                                   unsigned long action, void *data)
3861 {
3862         struct device *dev = data;
3863         struct dmar_domain *domain;
3864
3865         if (iommu_dummy(dev))
3866                 return 0;
3867
3868         if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3869             action != BUS_NOTIFY_DEL_DEVICE)
3870                 return 0;
3871
3872         domain = find_domain(dev);
3873         if (!domain)
3874                 return 0;
3875
3876         down_read(&dmar_global_lock);
3877         domain_remove_one_dev_info(domain, dev);
3878         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
3879                 domain_exit(domain);
3880         up_read(&dmar_global_lock);
3881
3882         return 0;
3883 }
3884
3885 static struct notifier_block device_nb = {
3886         .notifier_call = device_notifier,
3887 };
3888
3889 static int intel_iommu_memory_notifier(struct notifier_block *nb,
3890                                        unsigned long val, void *v)
3891 {
3892         struct memory_notify *mhp = v;
3893         unsigned long long start, end;
3894         unsigned long start_vpfn, last_vpfn;
3895
3896         switch (val) {
3897         case MEM_GOING_ONLINE:
3898                 start = mhp->start_pfn << PAGE_SHIFT;
3899                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3900                 if (iommu_domain_identity_map(si_domain, start, end)) {
3901                         pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3902                                 start, end);
3903                         return NOTIFY_BAD;
3904                 }
3905                 break;
3906
3907         case MEM_OFFLINE:
3908         case MEM_CANCEL_ONLINE:
3909                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3910                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3911                 while (start_vpfn <= last_vpfn) {
3912                         struct iova *iova;
3913                         struct dmar_drhd_unit *drhd;
3914                         struct intel_iommu *iommu;
3915                         struct page *freelist;
3916
3917                         iova = find_iova(&si_domain->iovad, start_vpfn);
3918                         if (iova == NULL) {
3919                                 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3920                                          start_vpfn);
3921                                 break;
3922                         }
3923
3924                         iova = split_and_remove_iova(&si_domain->iovad, iova,
3925                                                      start_vpfn, last_vpfn);
3926                         if (iova == NULL) {
3927                                 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3928                                         start_vpfn, last_vpfn);
3929                                 return NOTIFY_BAD;
3930                         }
3931
3932                         freelist = domain_unmap(si_domain, iova->pfn_lo,
3933                                                iova->pfn_hi);
3934
3935                         rcu_read_lock();
3936                         for_each_active_iommu(iommu, drhd)
3937                                 iommu_flush_iotlb_psi(iommu, si_domain->id,
3938                                         iova->pfn_lo,
3939                                         iova->pfn_hi - iova->pfn_lo + 1,
3940                                         !freelist, 0);
3941                         rcu_read_unlock();
3942                         dma_free_pagelist(freelist);
3943
3944                         start_vpfn = iova->pfn_hi + 1;
3945                         free_iova_mem(iova);
3946                 }
3947                 break;
3948         }
3949
3950         return NOTIFY_OK;
3951 }
3952
3953 static struct notifier_block intel_iommu_memory_nb = {
3954         .notifier_call = intel_iommu_memory_notifier,
3955         .priority = 0
3956 };
3957
3958
3959 static ssize_t intel_iommu_show_version(struct device *dev,
3960                                         struct device_attribute *attr,
3961                                         char *buf)
3962 {
3963         struct intel_iommu *iommu = dev_get_drvdata(dev);
3964         u32 ver = readl(iommu->reg + DMAR_VER_REG);
3965         return sprintf(buf, "%d:%d\n",
3966                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3967 }
3968 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
3969
3970 static ssize_t intel_iommu_show_address(struct device *dev,
3971                                         struct device_attribute *attr,
3972                                         char *buf)
3973 {
3974         struct intel_iommu *iommu = dev_get_drvdata(dev);
3975         return sprintf(buf, "%llx\n", iommu->reg_phys);
3976 }
3977 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
3978
3979 static ssize_t intel_iommu_show_cap(struct device *dev,
3980                                     struct device_attribute *attr,
3981                                     char *buf)
3982 {
3983         struct intel_iommu *iommu = dev_get_drvdata(dev);
3984         return sprintf(buf, "%llx\n", iommu->cap);
3985 }
3986 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
3987
3988 static ssize_t intel_iommu_show_ecap(struct device *dev,
3989                                     struct device_attribute *attr,
3990                                     char *buf)
3991 {
3992         struct intel_iommu *iommu = dev_get_drvdata(dev);
3993         return sprintf(buf, "%llx\n", iommu->ecap);
3994 }
3995 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
3996
3997 static struct attribute *intel_iommu_attrs[] = {
3998         &dev_attr_version.attr,
3999         &dev_attr_address.attr,
4000         &dev_attr_cap.attr,
4001         &dev_attr_ecap.attr,
4002         NULL,
4003 };
4004
4005 static struct attribute_group intel_iommu_group = {
4006         .name = "intel-iommu",
4007         .attrs = intel_iommu_attrs,
4008 };
4009
4010 const struct attribute_group *intel_iommu_groups[] = {
4011         &intel_iommu_group,
4012         NULL,
4013 };
4014
4015 int __init intel_iommu_init(void)
4016 {
4017         int ret = -ENODEV;
4018         struct dmar_drhd_unit *drhd;
4019         struct intel_iommu *iommu;
4020
4021         /* VT-d is required for a TXT/tboot launch, so enforce that */
4022         force_on = tboot_force_iommu();
4023
4024         if (iommu_init_mempool()) {
4025                 if (force_on)
4026                         panic("tboot: Failed to initialize iommu memory\n");
4027                 return -ENOMEM;
4028         }
4029
4030         down_write(&dmar_global_lock);
4031         if (dmar_table_init()) {
4032                 if (force_on)
4033                         panic("tboot: Failed to initialize DMAR table\n");
4034                 goto out_free_dmar;
4035         }
4036
4037         /*
4038          * Disable translation if already enabled prior to OS handover.
4039          */
4040         for_each_active_iommu(iommu, drhd)
4041                 if (iommu->gcmd & DMA_GCMD_TE)
4042                         iommu_disable_translation(iommu);
4043
4044         if (dmar_dev_scope_init() < 0) {
4045                 if (force_on)
4046                         panic("tboot: Failed to initialize DMAR device scope\n");
4047                 goto out_free_dmar;
4048         }
4049
4050         if (no_iommu || dmar_disabled)
4051                 goto out_free_dmar;
4052
4053         if (list_empty(&dmar_rmrr_units))
4054                 printk(KERN_INFO "DMAR: No RMRR found\n");
4055
4056         if (list_empty(&dmar_atsr_units))
4057                 printk(KERN_INFO "DMAR: No ATSR found\n");
4058
4059         if (dmar_init_reserved_ranges()) {
4060                 if (force_on)
4061                         panic("tboot: Failed to reserve iommu ranges\n");
4062                 goto out_free_reserved_range;
4063         }
4064
4065         init_no_remapping_devices();
4066
4067         ret = init_dmars();
4068         if (ret) {
4069                 if (force_on)
4070                         panic("tboot: Failed to initialize DMARs\n");
4071                 printk(KERN_ERR "IOMMU: dmar init failed\n");
4072                 goto out_free_reserved_range;
4073         }
4074         up_write(&dmar_global_lock);
4075         printk(KERN_INFO
4076         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4077
4078         init_timer(&unmap_timer);
4079 #ifdef CONFIG_SWIOTLB
4080         swiotlb = 0;
4081 #endif
4082         dma_ops = &intel_dma_ops;
4083
4084         init_iommu_pm_ops();
4085
4086         for_each_active_iommu(iommu, drhd)
4087                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4088                                                        intel_iommu_groups,
4089                                                        iommu->name);
4090
4091         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4092         bus_register_notifier(&pci_bus_type, &device_nb);
4093         if (si_domain && !hw_pass_through)
4094                 register_memory_notifier(&intel_iommu_memory_nb);
4095
4096         intel_iommu_enabled = 1;
4097
4098         return 0;
4099
4100 out_free_reserved_range:
4101         put_iova_domain(&reserved_iova_list);
4102 out_free_dmar:
4103         intel_iommu_free_dmars();
4104         up_write(&dmar_global_lock);
4105         iommu_exit_mempool();
4106         return ret;
4107 }
4108
4109 static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4110 {
4111         struct intel_iommu *iommu = opaque;
4112
4113         iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4114         return 0;
4115 }
4116
4117 /*
4118  * NB - intel-iommu lacks any sort of reference counting for the users of
4119  * dependent devices.  If multiple endpoints have intersecting dependent
4120  * devices, unbinding the driver from any one of them will possibly leave
4121  * the others unable to operate.
4122  */
4123 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4124                                            struct device *dev)
4125 {
4126         if (!iommu || !dev || !dev_is_pci(dev))
4127                 return;
4128
4129         pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
4130 }
4131
4132 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4133                                        struct device *dev)
4134 {
4135         struct device_domain_info *info, *tmp;
4136         struct intel_iommu *iommu;
4137         unsigned long flags;
4138         int found = 0;
4139         u8 bus, devfn;
4140
4141         iommu = device_to_iommu(dev, &bus, &devfn);
4142         if (!iommu)
4143                 return;
4144
4145         spin_lock_irqsave(&device_domain_lock, flags);
4146         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4147                 if (info->iommu == iommu && info->bus == bus &&
4148                     info->devfn == devfn) {
4149                         unlink_domain_info(info);
4150                         spin_unlock_irqrestore(&device_domain_lock, flags);
4151
4152                         iommu_disable_dev_iotlb(info);
4153                         iommu_detach_dev(iommu, info->bus, info->devfn);
4154                         iommu_detach_dependent_devices(iommu, dev);
4155                         free_devinfo_mem(info);
4156
4157                         spin_lock_irqsave(&device_domain_lock, flags);
4158
4159                         if (found)
4160                                 break;
4161                         else
4162                                 continue;
4163                 }
4164
4165                 /* if there is no other devices under the same iommu
4166                  * owned by this domain, clear this iommu in iommu_bmp
4167                  * update iommu count and coherency
4168                  */
4169                 if (info->iommu == iommu)
4170                         found = 1;
4171         }
4172
4173         spin_unlock_irqrestore(&device_domain_lock, flags);
4174
4175         if (found == 0) {
4176                 domain_detach_iommu(domain, iommu);
4177                 if (!domain_type_is_vm_or_si(domain))
4178                         iommu_detach_domain(domain, iommu);
4179         }
4180 }
4181
4182 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4183 {
4184         int adjust_width;
4185
4186         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
4187         domain_reserve_special_ranges(domain);
4188
4189         /* calculate AGAW */
4190         domain->gaw = guest_width;
4191         adjust_width = guestwidth_to_adjustwidth(guest_width);
4192         domain->agaw = width_to_agaw(adjust_width);
4193
4194         domain->iommu_coherency = 0;
4195         domain->iommu_snooping = 0;
4196         domain->iommu_superpage = 0;
4197         domain->max_addr = 0;
4198
4199         /* always allocate the top pgd */
4200         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4201         if (!domain->pgd)
4202                 return -ENOMEM;
4203         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4204         return 0;
4205 }
4206
4207 static int intel_iommu_domain_init(struct iommu_domain *domain)
4208 {
4209         struct dmar_domain *dmar_domain;
4210
4211         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4212         if (!dmar_domain) {
4213                 printk(KERN_ERR
4214                         "intel_iommu_domain_init: dmar_domain == NULL\n");
4215                 return -ENOMEM;
4216         }
4217         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4218                 printk(KERN_ERR
4219                         "intel_iommu_domain_init() failed\n");
4220                 domain_exit(dmar_domain);
4221                 return -ENOMEM;
4222         }
4223         domain_update_iommu_cap(dmar_domain);
4224         domain->priv = dmar_domain;
4225
4226         domain->geometry.aperture_start = 0;
4227         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4228         domain->geometry.force_aperture = true;
4229
4230         return 0;
4231 }
4232
4233 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
4234 {
4235         struct dmar_domain *dmar_domain = domain->priv;
4236
4237         domain->priv = NULL;
4238         domain_exit(dmar_domain);
4239 }
4240
4241 static int intel_iommu_attach_device(struct iommu_domain *domain,
4242                                      struct device *dev)
4243 {
4244         struct dmar_domain *dmar_domain = domain->priv;
4245         struct intel_iommu *iommu;
4246         int addr_width;
4247         u8 bus, devfn;
4248
4249         /* normally dev is not mapped */
4250         if (unlikely(domain_context_mapped(dev))) {
4251                 struct dmar_domain *old_domain;
4252
4253                 old_domain = find_domain(dev);
4254                 if (old_domain) {
4255                         if (domain_type_is_vm_or_si(dmar_domain))
4256                                 domain_remove_one_dev_info(old_domain, dev);
4257                         else
4258                                 domain_remove_dev_info(old_domain);
4259                 }
4260         }
4261
4262         iommu = device_to_iommu(dev, &bus, &devfn);
4263         if (!iommu)
4264                 return -ENODEV;
4265
4266         /* check if this iommu agaw is sufficient for max mapped address */
4267         addr_width = agaw_to_width(iommu->agaw);
4268         if (addr_width > cap_mgaw(iommu->cap))
4269                 addr_width = cap_mgaw(iommu->cap);
4270
4271         if (dmar_domain->max_addr > (1LL << addr_width)) {
4272                 printk(KERN_ERR "%s: iommu width (%d) is not "
4273                        "sufficient for the mapped address (%llx)\n",
4274                        __func__, addr_width, dmar_domain->max_addr);
4275                 return -EFAULT;
4276         }
4277         dmar_domain->gaw = addr_width;
4278
4279         /*
4280          * Knock out extra levels of page tables if necessary
4281          */
4282         while (iommu->agaw < dmar_domain->agaw) {
4283                 struct dma_pte *pte;
4284
4285                 pte = dmar_domain->pgd;
4286                 if (dma_pte_present(pte)) {
4287                         dmar_domain->pgd = (struct dma_pte *)
4288                                 phys_to_virt(dma_pte_addr(pte));
4289                         free_pgtable_page(pte);
4290                 }
4291                 dmar_domain->agaw--;
4292         }
4293
4294         return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4295 }
4296
4297 static void intel_iommu_detach_device(struct iommu_domain *domain,
4298                                       struct device *dev)
4299 {
4300         struct dmar_domain *dmar_domain = domain->priv;
4301
4302         domain_remove_one_dev_info(dmar_domain, dev);
4303 }
4304
4305 static int intel_iommu_map(struct iommu_domain *domain,
4306                            unsigned long iova, phys_addr_t hpa,
4307                            size_t size, int iommu_prot)
4308 {
4309         struct dmar_domain *dmar_domain = domain->priv;
4310         u64 max_addr;
4311         int prot = 0;
4312         int ret;
4313
4314         if (iommu_prot & IOMMU_READ)
4315                 prot |= DMA_PTE_READ;
4316         if (iommu_prot & IOMMU_WRITE)
4317                 prot |= DMA_PTE_WRITE;
4318         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4319                 prot |= DMA_PTE_SNP;
4320
4321         max_addr = iova + size;
4322         if (dmar_domain->max_addr < max_addr) {
4323                 u64 end;
4324
4325                 /* check if minimum agaw is sufficient for mapped address */
4326                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4327                 if (end < max_addr) {
4328                         printk(KERN_ERR "%s: iommu width (%d) is not "
4329                                "sufficient for the mapped address (%llx)\n",
4330                                __func__, dmar_domain->gaw, max_addr);
4331                         return -EFAULT;
4332                 }
4333                 dmar_domain->max_addr = max_addr;
4334         }
4335         /* Round up size to next multiple of PAGE_SIZE, if it and
4336            the low bits of hpa would take us onto the next page */
4337         size = aligned_nrpages(hpa, size);
4338         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4339                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4340         return ret;
4341 }
4342
4343 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4344                                 unsigned long iova, size_t size)
4345 {
4346         struct dmar_domain *dmar_domain = domain->priv;
4347         struct page *freelist = NULL;
4348         struct intel_iommu *iommu;
4349         unsigned long start_pfn, last_pfn;
4350         unsigned int npages;
4351         int iommu_id, num, ndomains, level = 0;
4352
4353         /* Cope with horrid API which requires us to unmap more than the
4354            size argument if it happens to be a large-page mapping. */
4355         if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4356                 BUG();
4357
4358         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4359                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4360
4361         start_pfn = iova >> VTD_PAGE_SHIFT;
4362         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4363
4364         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4365
4366         npages = last_pfn - start_pfn + 1;
4367
4368         for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4369                iommu = g_iommus[iommu_id];
4370
4371                /*
4372                 * find bit position of dmar_domain
4373                 */
4374                ndomains = cap_ndoms(iommu->cap);
4375                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4376                        if (iommu->domains[num] == dmar_domain)
4377                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4378                                                      npages, !freelist, 0);
4379                }
4380
4381         }
4382
4383         dma_free_pagelist(freelist);
4384
4385         if (dmar_domain->max_addr == iova + size)
4386                 dmar_domain->max_addr = iova;
4387
4388         return size;
4389 }
4390
4391 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4392                                             dma_addr_t iova)
4393 {
4394         struct dmar_domain *dmar_domain = domain->priv;
4395         struct dma_pte *pte;
4396         int level = 0;
4397         u64 phys = 0;
4398
4399         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4400         if (pte)
4401                 phys = dma_pte_addr(pte);
4402
4403         return phys;
4404 }
4405
4406 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4407                                       unsigned long cap)
4408 {
4409         struct dmar_domain *dmar_domain = domain->priv;
4410
4411         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4412                 return dmar_domain->iommu_snooping;
4413         if (cap == IOMMU_CAP_INTR_REMAP)
4414                 return irq_remapping_enabled;
4415
4416         return 0;
4417 }
4418
4419 static int intel_iommu_add_device(struct device *dev)
4420 {
4421         struct intel_iommu *iommu;
4422         struct iommu_group *group;
4423         u8 bus, devfn;
4424
4425         iommu = device_to_iommu(dev, &bus, &devfn);
4426         if (!iommu)
4427                 return -ENODEV;
4428
4429         iommu_device_link(iommu->iommu_dev, dev);
4430
4431         group = iommu_group_get_for_dev(dev);
4432
4433         if (IS_ERR(group))
4434                 return PTR_ERR(group);
4435
4436         iommu_group_put(group);
4437         return 0;
4438 }
4439
4440 static void intel_iommu_remove_device(struct device *dev)
4441 {
4442         struct intel_iommu *iommu;
4443         u8 bus, devfn;
4444
4445         iommu = device_to_iommu(dev, &bus, &devfn);
4446         if (!iommu)
4447                 return;
4448
4449         iommu_group_remove_device(dev);
4450
4451         iommu_device_unlink(iommu->iommu_dev, dev);
4452 }
4453
4454 static const struct iommu_ops intel_iommu_ops = {
4455         .domain_init    = intel_iommu_domain_init,
4456         .domain_destroy = intel_iommu_domain_destroy,
4457         .attach_dev     = intel_iommu_attach_device,
4458         .detach_dev     = intel_iommu_detach_device,
4459         .map            = intel_iommu_map,
4460         .unmap          = intel_iommu_unmap,
4461         .iova_to_phys   = intel_iommu_iova_to_phys,
4462         .domain_has_cap = intel_iommu_domain_has_cap,
4463         .add_device     = intel_iommu_add_device,
4464         .remove_device  = intel_iommu_remove_device,
4465         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4466 };
4467
4468 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4469 {
4470         /* G4x/GM45 integrated gfx dmar support is totally busted. */
4471         printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4472         dmar_map_gfx = 0;
4473 }
4474
4475 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4476 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4477 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4478 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4479 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4480 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4481 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4482
4483 static void quirk_iommu_rwbf(struct pci_dev *dev)
4484 {
4485         /*
4486          * Mobile 4 Series Chipset neglects to set RWBF capability,
4487          * but needs it. Same seems to hold for the desktop versions.
4488          */
4489         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4490         rwbf_quirk = 1;
4491 }
4492
4493 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4494 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4495 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4496 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4497 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4498 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4499 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4500
4501 #define GGC 0x52
4502 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4503 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4504 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4505 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4506 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4507 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4508 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4509 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4510
4511 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4512 {
4513         unsigned short ggc;
4514
4515         if (pci_read_config_word(dev, GGC, &ggc))
4516                 return;
4517
4518         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4519                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4520                 dmar_map_gfx = 0;
4521         } else if (dmar_map_gfx) {
4522                 /* we have to ensure the gfx device is idle before we flush */
4523                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4524                 intel_iommu_strict = 1;
4525        }
4526 }
4527 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4528 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4529 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4530 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4531
4532 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4533    ISOCH DMAR unit for the Azalia sound device, but not give it any
4534    TLB entries, which causes it to deadlock. Check for that.  We do
4535    this in a function called from init_dmars(), instead of in a PCI
4536    quirk, because we don't want to print the obnoxious "BIOS broken"
4537    message if VT-d is actually disabled.
4538 */
4539 static void __init check_tylersburg_isoch(void)
4540 {
4541         struct pci_dev *pdev;
4542         uint32_t vtisochctrl;
4543
4544         /* If there's no Azalia in the system anyway, forget it. */
4545         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4546         if (!pdev)
4547                 return;
4548         pci_dev_put(pdev);
4549
4550         /* System Management Registers. Might be hidden, in which case
4551            we can't do the sanity check. But that's OK, because the
4552            known-broken BIOSes _don't_ actually hide it, so far. */
4553         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4554         if (!pdev)
4555                 return;
4556
4557         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4558                 pci_dev_put(pdev);
4559                 return;
4560         }
4561
4562         pci_dev_put(pdev);
4563
4564         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4565         if (vtisochctrl & 1)
4566                 return;
4567
4568         /* Drop all bits other than the number of TLB entries */
4569         vtisochctrl &= 0x1c;
4570
4571         /* If we have the recommended number of TLB entries (16), fine. */
4572         if (vtisochctrl == 0x10)
4573                 return;
4574
4575         /* Zero TLB entries? You get to ride the short bus to school. */
4576         if (!vtisochctrl) {
4577                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4578                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4579                      dmi_get_system_info(DMI_BIOS_VENDOR),
4580                      dmi_get_system_info(DMI_BIOS_VERSION),
4581                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4582                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4583                 return;
4584         }
4585         
4586         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4587                vtisochctrl);
4588 }