2973c09c9677133e81044ca8f39e66bc00396ca9
[firefly-linux-kernel-4.4.55.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  *          Joerg Roedel <jroedel@suse.de>
19  */
20
21 #define pr_fmt(fmt)     "DMAR: " fmt
22
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/export.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/memory.h>
36 #include <linux/timer.h>
37 #include <linux/iova.h>
38 #include <linux/iommu.h>
39 #include <linux/intel-iommu.h>
40 #include <linux/syscore_ops.h>
41 #include <linux/tboot.h>
42 #include <linux/dmi.h>
43 #include <linux/pci-ats.h>
44 #include <linux/memblock.h>
45 #include <linux/dma-contiguous.h>
46 #include <linux/crash_dump.h>
47 #include <asm/irq_remapping.h>
48 #include <asm/cacheflush.h>
49 #include <asm/iommu.h>
50
51 #include "irq_remapping.h"
52
53 #define ROOT_SIZE               VTD_PAGE_SIZE
54 #define CONTEXT_SIZE            VTD_PAGE_SIZE
55
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
60
61 #define IOAPIC_RANGE_START      (0xfee00000)
62 #define IOAPIC_RANGE_END        (0xfeefffff)
63 #define IOVA_START_ADDR         (0x1000)
64
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
66
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
69
70 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
76                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
78
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN          (1)
81
82 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
83 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
84 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
85
86 /* page table handling */
87 #define LEVEL_STRIDE            (9)
88 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
89
90 /*
91  * This bitmap is used to advertise the page sizes our hardware support
92  * to the IOMMU core, which will then use this information to split
93  * physically contiguous memory regions it is mapping into page sizes
94  * that we support.
95  *
96  * Traditionally the IOMMU core just handed us the mappings directly,
97  * after making sure the size is an order of a 4KiB page and that the
98  * mapping has natural alignment.
99  *
100  * To retain this behavior, we currently advertise that we support
101  * all page sizes that are an order of 4KiB.
102  *
103  * If at some point we'd like to utilize the IOMMU core's new behavior,
104  * we could change this to advertise the real page sizes we support.
105  */
106 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
107
108 static inline int agaw_to_level(int agaw)
109 {
110         return agaw + 2;
111 }
112
113 static inline int agaw_to_width(int agaw)
114 {
115         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
116 }
117
118 static inline int width_to_agaw(int width)
119 {
120         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
121 }
122
123 static inline unsigned int level_to_offset_bits(int level)
124 {
125         return (level - 1) * LEVEL_STRIDE;
126 }
127
128 static inline int pfn_level_offset(unsigned long pfn, int level)
129 {
130         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
131 }
132
133 static inline unsigned long level_mask(int level)
134 {
135         return -1UL << level_to_offset_bits(level);
136 }
137
138 static inline unsigned long level_size(int level)
139 {
140         return 1UL << level_to_offset_bits(level);
141 }
142
143 static inline unsigned long align_to_level(unsigned long pfn, int level)
144 {
145         return (pfn + level_size(level) - 1) & level_mask(level);
146 }
147
148 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
149 {
150         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
151 }
152
153 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
154    are never going to work. */
155 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
156 {
157         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
158 }
159
160 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
161 {
162         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
163 }
164 static inline unsigned long page_to_dma_pfn(struct page *pg)
165 {
166         return mm_to_dma_pfn(page_to_pfn(pg));
167 }
168 static inline unsigned long virt_to_dma_pfn(void *p)
169 {
170         return page_to_dma_pfn(virt_to_page(p));
171 }
172
173 /* global iommu list, set NULL for ignored DMAR units */
174 static struct intel_iommu **g_iommus;
175
176 static void __init check_tylersburg_isoch(void);
177 static int rwbf_quirk;
178
179 /*
180  * set to 1 to panic kernel if can't successfully enable VT-d
181  * (used when kernel is launched w/ TXT)
182  */
183 static int force_on = 0;
184
185 /*
186  * 0: Present
187  * 1-11: Reserved
188  * 12-63: Context Ptr (12 - (haw-1))
189  * 64-127: Reserved
190  */
191 struct root_entry {
192         u64     lo;
193         u64     hi;
194 };
195 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
196
197 /*
198  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
199  * if marked present.
200  */
201 static phys_addr_t root_entry_lctp(struct root_entry *re)
202 {
203         if (!(re->lo & 1))
204                 return 0;
205
206         return re->lo & VTD_PAGE_MASK;
207 }
208
209 /*
210  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
211  * if marked present.
212  */
213 static phys_addr_t root_entry_uctp(struct root_entry *re)
214 {
215         if (!(re->hi & 1))
216                 return 0;
217
218         return re->hi & VTD_PAGE_MASK;
219 }
220 /*
221  * low 64 bits:
222  * 0: present
223  * 1: fault processing disable
224  * 2-3: translation type
225  * 12-63: address space root
226  * high 64 bits:
227  * 0-2: address width
228  * 3-6: aval
229  * 8-23: domain id
230  */
231 struct context_entry {
232         u64 lo;
233         u64 hi;
234 };
235
236 static inline void context_clear_pasid_enable(struct context_entry *context)
237 {
238         context->lo &= ~(1ULL << 11);
239 }
240
241 static inline bool context_pasid_enabled(struct context_entry *context)
242 {
243         return !!(context->lo & (1ULL << 11));
244 }
245
246 static inline void context_set_copied(struct context_entry *context)
247 {
248         context->hi |= (1ull << 3);
249 }
250
251 static inline bool context_copied(struct context_entry *context)
252 {
253         return !!(context->hi & (1ULL << 3));
254 }
255
256 static inline bool __context_present(struct context_entry *context)
257 {
258         return (context->lo & 1);
259 }
260
261 static inline bool context_present(struct context_entry *context)
262 {
263         return context_pasid_enabled(context) ?
264              __context_present(context) :
265              __context_present(context) && !context_copied(context);
266 }
267
268 static inline void context_set_present(struct context_entry *context)
269 {
270         context->lo |= 1;
271 }
272
273 static inline void context_set_fault_enable(struct context_entry *context)
274 {
275         context->lo &= (((u64)-1) << 2) | 1;
276 }
277
278 static inline void context_set_translation_type(struct context_entry *context,
279                                                 unsigned long value)
280 {
281         context->lo &= (((u64)-1) << 4) | 3;
282         context->lo |= (value & 3) << 2;
283 }
284
285 static inline void context_set_address_root(struct context_entry *context,
286                                             unsigned long value)
287 {
288         context->lo &= ~VTD_PAGE_MASK;
289         context->lo |= value & VTD_PAGE_MASK;
290 }
291
292 static inline void context_set_address_width(struct context_entry *context,
293                                              unsigned long value)
294 {
295         context->hi |= value & 7;
296 }
297
298 static inline void context_set_domain_id(struct context_entry *context,
299                                          unsigned long value)
300 {
301         context->hi |= (value & ((1 << 16) - 1)) << 8;
302 }
303
304 static inline int context_domain_id(struct context_entry *c)
305 {
306         return((c->hi >> 8) & 0xffff);
307 }
308
309 static inline void context_clear_entry(struct context_entry *context)
310 {
311         context->lo = 0;
312         context->hi = 0;
313 }
314
315 /*
316  * 0: readable
317  * 1: writable
318  * 2-6: reserved
319  * 7: super page
320  * 8-10: available
321  * 11: snoop behavior
322  * 12-63: Host physcial address
323  */
324 struct dma_pte {
325         u64 val;
326 };
327
328 static inline void dma_clear_pte(struct dma_pte *pte)
329 {
330         pte->val = 0;
331 }
332
333 static inline u64 dma_pte_addr(struct dma_pte *pte)
334 {
335 #ifdef CONFIG_64BIT
336         return pte->val & VTD_PAGE_MASK;
337 #else
338         /* Must have a full atomic 64-bit read */
339         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
340 #endif
341 }
342
343 static inline bool dma_pte_present(struct dma_pte *pte)
344 {
345         return (pte->val & 3) != 0;
346 }
347
348 static inline bool dma_pte_superpage(struct dma_pte *pte)
349 {
350         return (pte->val & DMA_PTE_LARGE_PAGE);
351 }
352
353 static inline int first_pte_in_page(struct dma_pte *pte)
354 {
355         return !((unsigned long)pte & ~VTD_PAGE_MASK);
356 }
357
358 /*
359  * This domain is a statically identity mapping domain.
360  *      1. This domain creats a static 1:1 mapping to all usable memory.
361  *      2. It maps to each iommu if successful.
362  *      3. Each iommu mapps to this domain if successful.
363  */
364 static struct dmar_domain *si_domain;
365 static int hw_pass_through = 1;
366
367 /*
368  * Domain represents a virtual machine, more than one devices
369  * across iommus may be owned in one domain, e.g. kvm guest.
370  */
371 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
372
373 /* si_domain contains mulitple devices */
374 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
375
376 #define for_each_domain_iommu(idx, domain)                      \
377         for (idx = 0; idx < g_num_of_iommus; idx++)             \
378                 if (domain->iommu_refcnt[idx])
379
380 struct dmar_domain {
381         int     nid;                    /* node id */
382
383         unsigned        iommu_refcnt[DMAR_UNITS_SUPPORTED];
384                                         /* Refcount of devices per iommu */
385
386
387         u16             iommu_did[DMAR_UNITS_SUPPORTED];
388                                         /* Domain ids per IOMMU. Use u16 since
389                                          * domain ids are 16 bit wide according
390                                          * to VT-d spec, section 9.3 */
391
392         struct list_head devices;       /* all devices' list */
393         struct iova_domain iovad;       /* iova's that belong to this domain */
394
395         struct dma_pte  *pgd;           /* virtual address */
396         int             gaw;            /* max guest address width */
397
398         /* adjusted guest address width, 0 is level 2 30-bit */
399         int             agaw;
400
401         int             flags;          /* flags to find out type of domain */
402
403         int             iommu_coherency;/* indicate coherency of iommu access */
404         int             iommu_snooping; /* indicate snooping control feature*/
405         int             iommu_count;    /* reference count of iommu */
406         int             iommu_superpage;/* Level of superpages supported:
407                                            0 == 4KiB (no superpages), 1 == 2MiB,
408                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
409         u64             max_addr;       /* maximum mapped address */
410
411         struct iommu_domain domain;     /* generic domain data structure for
412                                            iommu core */
413 };
414
415 /* PCI domain-device relationship */
416 struct device_domain_info {
417         struct list_head link;  /* link to domain siblings */
418         struct list_head global; /* link to global list */
419         u8 bus;                 /* PCI bus number */
420         u8 devfn;               /* PCI devfn number */
421         u8 pasid_supported:3;
422         u8 pasid_enabled:1;
423         u8 pri_supported:1;
424         u8 pri_enabled:1;
425         u8 ats_supported:1;
426         u8 ats_enabled:1;
427         u8 ats_qdep;
428         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
429         struct intel_iommu *iommu; /* IOMMU used by this device */
430         struct dmar_domain *domain; /* pointer to domain */
431 };
432
433 struct dmar_rmrr_unit {
434         struct list_head list;          /* list of rmrr units   */
435         struct acpi_dmar_header *hdr;   /* ACPI header          */
436         u64     base_address;           /* reserved base address*/
437         u64     end_address;            /* reserved end address */
438         struct dmar_dev_scope *devices; /* target devices */
439         int     devices_cnt;            /* target device count */
440 };
441
442 struct dmar_atsr_unit {
443         struct list_head list;          /* list of ATSR units */
444         struct acpi_dmar_header *hdr;   /* ACPI header */
445         struct dmar_dev_scope *devices; /* target devices */
446         int devices_cnt;                /* target device count */
447         u8 include_all:1;               /* include all ports */
448 };
449
450 static LIST_HEAD(dmar_atsr_units);
451 static LIST_HEAD(dmar_rmrr_units);
452
453 #define for_each_rmrr_units(rmrr) \
454         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
455
456 static void flush_unmaps_timeout(unsigned long data);
457
458 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
459
460 #define HIGH_WATER_MARK 250
461 struct deferred_flush_tables {
462         int next;
463         struct iova *iova[HIGH_WATER_MARK];
464         struct dmar_domain *domain[HIGH_WATER_MARK];
465         struct page *freelist[HIGH_WATER_MARK];
466 };
467
468 static struct deferred_flush_tables *deferred_flush;
469
470 /* bitmap for indexing intel_iommus */
471 static int g_num_of_iommus;
472
473 static DEFINE_SPINLOCK(async_umap_flush_lock);
474 static LIST_HEAD(unmaps_to_do);
475
476 static int timer_on;
477 static long list_size;
478
479 static void domain_exit(struct dmar_domain *domain);
480 static void domain_remove_dev_info(struct dmar_domain *domain);
481 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
482                                      struct device *dev);
483 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
484 static void domain_context_clear(struct intel_iommu *iommu,
485                                  struct device *dev);
486 static int domain_detach_iommu(struct dmar_domain *domain,
487                                struct intel_iommu *iommu);
488
489 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
490 int dmar_disabled = 0;
491 #else
492 int dmar_disabled = 1;
493 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
494
495 int intel_iommu_enabled = 0;
496 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
497
498 static int dmar_map_gfx = 1;
499 static int dmar_forcedac;
500 static int intel_iommu_strict;
501 static int intel_iommu_superpage = 1;
502 static int intel_iommu_ecs = 1;
503 static int intel_iommu_pasid28;
504 static int iommu_identity_mapping;
505
506 #define IDENTMAP_ALL            1
507 #define IDENTMAP_GFX            2
508 #define IDENTMAP_AZALIA         4
509
510 /* Broadwell and Skylake have broken ECS support — normal so-called "second
511  * level" translation of DMA requests-without-PASID doesn't actually happen
512  * unless you also set the NESTE bit in an extended context-entry. Which of
513  * course means that SVM doesn't work because it's trying to do nested
514  * translation of the physical addresses it finds in the process page tables,
515  * through the IOVA->phys mapping found in the "second level" page tables.
516  *
517  * The VT-d specification was retroactively changed to change the definition
518  * of the capability bits and pretend that Broadwell/Skylake never happened...
519  * but unfortunately the wrong bit was changed. It's ECS which is broken, but
520  * for some reason it was the PASID capability bit which was redefined (from
521  * bit 28 on BDW/SKL to bit 40 in future).
522  *
523  * So our test for ECS needs to eschew those implementations which set the old
524  * PASID capabiity bit 28, since those are the ones on which ECS is broken.
525  * Unless we are working around the 'pasid28' limitations, that is, by putting
526  * the device into passthrough mode for normal DMA and thus masking the bug.
527  */
528 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
529                             (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
530 /* PASID support is thus enabled if ECS is enabled and *either* of the old
531  * or new capability bits are set. */
532 #define pasid_enabled(iommu) (ecs_enabled(iommu) &&                     \
533                               (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
534
535 int intel_iommu_gfx_mapped;
536 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
537
538 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
539 static DEFINE_SPINLOCK(device_domain_lock);
540 static LIST_HEAD(device_domain_list);
541
542 static const struct iommu_ops intel_iommu_ops;
543
544 static bool translation_pre_enabled(struct intel_iommu *iommu)
545 {
546         return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
547 }
548
549 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
550 {
551         iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
552 }
553
554 static void init_translation_status(struct intel_iommu *iommu)
555 {
556         u32 gsts;
557
558         gsts = readl(iommu->reg + DMAR_GSTS_REG);
559         if (gsts & DMA_GSTS_TES)
560                 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
561 }
562
563 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
564 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
565 {
566         return container_of(dom, struct dmar_domain, domain);
567 }
568
569 static int __init intel_iommu_setup(char *str)
570 {
571         if (!str)
572                 return -EINVAL;
573         while (*str) {
574                 if (!strncmp(str, "on", 2)) {
575                         dmar_disabled = 0;
576                         pr_info("IOMMU enabled\n");
577                 } else if (!strncmp(str, "off", 3)) {
578                         dmar_disabled = 1;
579                         pr_info("IOMMU disabled\n");
580                 } else if (!strncmp(str, "igfx_off", 8)) {
581                         dmar_map_gfx = 0;
582                         pr_info("Disable GFX device mapping\n");
583                 } else if (!strncmp(str, "forcedac", 8)) {
584                         pr_info("Forcing DAC for PCI devices\n");
585                         dmar_forcedac = 1;
586                 } else if (!strncmp(str, "strict", 6)) {
587                         pr_info("Disable batched IOTLB flush\n");
588                         intel_iommu_strict = 1;
589                 } else if (!strncmp(str, "sp_off", 6)) {
590                         pr_info("Disable supported super page\n");
591                         intel_iommu_superpage = 0;
592                 } else if (!strncmp(str, "ecs_off", 7)) {
593                         printk(KERN_INFO
594                                 "Intel-IOMMU: disable extended context table support\n");
595                         intel_iommu_ecs = 0;
596                 } else if (!strncmp(str, "pasid28", 7)) {
597                         printk(KERN_INFO
598                                 "Intel-IOMMU: enable pre-production PASID support\n");
599                         intel_iommu_pasid28 = 1;
600                         iommu_identity_mapping |= IDENTMAP_GFX;
601                 }
602
603                 str += strcspn(str, ",");
604                 while (*str == ',')
605                         str++;
606         }
607         return 0;
608 }
609 __setup("intel_iommu=", intel_iommu_setup);
610
611 static struct kmem_cache *iommu_domain_cache;
612 static struct kmem_cache *iommu_devinfo_cache;
613
614 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
615 {
616         struct dmar_domain **domains;
617         int idx = did >> 8;
618
619         domains = iommu->domains[idx];
620         if (!domains)
621                 return NULL;
622
623         return domains[did & 0xff];
624 }
625
626 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
627                              struct dmar_domain *domain)
628 {
629         struct dmar_domain **domains;
630         int idx = did >> 8;
631
632         if (!iommu->domains[idx]) {
633                 size_t size = 256 * sizeof(struct dmar_domain *);
634                 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
635         }
636
637         domains = iommu->domains[idx];
638         if (WARN_ON(!domains))
639                 return;
640         else
641                 domains[did & 0xff] = domain;
642 }
643
644 static inline void *alloc_pgtable_page(int node)
645 {
646         struct page *page;
647         void *vaddr = NULL;
648
649         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
650         if (page)
651                 vaddr = page_address(page);
652         return vaddr;
653 }
654
655 static inline void free_pgtable_page(void *vaddr)
656 {
657         free_page((unsigned long)vaddr);
658 }
659
660 static inline void *alloc_domain_mem(void)
661 {
662         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
663 }
664
665 static void free_domain_mem(void *vaddr)
666 {
667         kmem_cache_free(iommu_domain_cache, vaddr);
668 }
669
670 static inline void * alloc_devinfo_mem(void)
671 {
672         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
673 }
674
675 static inline void free_devinfo_mem(void *vaddr)
676 {
677         kmem_cache_free(iommu_devinfo_cache, vaddr);
678 }
679
680 static inline int domain_type_is_vm(struct dmar_domain *domain)
681 {
682         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
683 }
684
685 static inline int domain_type_is_si(struct dmar_domain *domain)
686 {
687         return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
688 }
689
690 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
691 {
692         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
693                                 DOMAIN_FLAG_STATIC_IDENTITY);
694 }
695
696 static inline int domain_pfn_supported(struct dmar_domain *domain,
697                                        unsigned long pfn)
698 {
699         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
700
701         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
702 }
703
704 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
705 {
706         unsigned long sagaw;
707         int agaw = -1;
708
709         sagaw = cap_sagaw(iommu->cap);
710         for (agaw = width_to_agaw(max_gaw);
711              agaw >= 0; agaw--) {
712                 if (test_bit(agaw, &sagaw))
713                         break;
714         }
715
716         return agaw;
717 }
718
719 /*
720  * Calculate max SAGAW for each iommu.
721  */
722 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
723 {
724         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
725 }
726
727 /*
728  * calculate agaw for each iommu.
729  * "SAGAW" may be different across iommus, use a default agaw, and
730  * get a supported less agaw for iommus that don't support the default agaw.
731  */
732 int iommu_calculate_agaw(struct intel_iommu *iommu)
733 {
734         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
735 }
736
737 /* This functionin only returns single iommu in a domain */
738 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
739 {
740         int iommu_id;
741
742         /* si_domain and vm domain should not get here. */
743         BUG_ON(domain_type_is_vm_or_si(domain));
744         for_each_domain_iommu(iommu_id, domain)
745                 break;
746
747         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
748                 return NULL;
749
750         return g_iommus[iommu_id];
751 }
752
753 static void domain_update_iommu_coherency(struct dmar_domain *domain)
754 {
755         struct dmar_drhd_unit *drhd;
756         struct intel_iommu *iommu;
757         bool found = false;
758         int i;
759
760         domain->iommu_coherency = 1;
761
762         for_each_domain_iommu(i, domain) {
763                 found = true;
764                 if (!ecap_coherent(g_iommus[i]->ecap)) {
765                         domain->iommu_coherency = 0;
766                         break;
767                 }
768         }
769         if (found)
770                 return;
771
772         /* No hardware attached; use lowest common denominator */
773         rcu_read_lock();
774         for_each_active_iommu(iommu, drhd) {
775                 if (!ecap_coherent(iommu->ecap)) {
776                         domain->iommu_coherency = 0;
777                         break;
778                 }
779         }
780         rcu_read_unlock();
781 }
782
783 static int domain_update_iommu_snooping(struct intel_iommu *skip)
784 {
785         struct dmar_drhd_unit *drhd;
786         struct intel_iommu *iommu;
787         int ret = 1;
788
789         rcu_read_lock();
790         for_each_active_iommu(iommu, drhd) {
791                 if (iommu != skip) {
792                         if (!ecap_sc_support(iommu->ecap)) {
793                                 ret = 0;
794                                 break;
795                         }
796                 }
797         }
798         rcu_read_unlock();
799
800         return ret;
801 }
802
803 static int domain_update_iommu_superpage(struct intel_iommu *skip)
804 {
805         struct dmar_drhd_unit *drhd;
806         struct intel_iommu *iommu;
807         int mask = 0xf;
808
809         if (!intel_iommu_superpage) {
810                 return 0;
811         }
812
813         /* set iommu_superpage to the smallest common denominator */
814         rcu_read_lock();
815         for_each_active_iommu(iommu, drhd) {
816                 if (iommu != skip) {
817                         mask &= cap_super_page_val(iommu->cap);
818                         if (!mask)
819                                 break;
820                 }
821         }
822         rcu_read_unlock();
823
824         return fls(mask);
825 }
826
827 /* Some capabilities may be different across iommus */
828 static void domain_update_iommu_cap(struct dmar_domain *domain)
829 {
830         domain_update_iommu_coherency(domain);
831         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
832         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
833 }
834
835 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
836                                                        u8 bus, u8 devfn, int alloc)
837 {
838         struct root_entry *root = &iommu->root_entry[bus];
839         struct context_entry *context;
840         u64 *entry;
841
842         entry = &root->lo;
843         if (ecs_enabled(iommu)) {
844                 if (devfn >= 0x80) {
845                         devfn -= 0x80;
846                         entry = &root->hi;
847                 }
848                 devfn *= 2;
849         }
850         if (*entry & 1)
851                 context = phys_to_virt(*entry & VTD_PAGE_MASK);
852         else {
853                 unsigned long phy_addr;
854                 if (!alloc)
855                         return NULL;
856
857                 context = alloc_pgtable_page(iommu->node);
858                 if (!context)
859                         return NULL;
860
861                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
862                 phy_addr = virt_to_phys((void *)context);
863                 *entry = phy_addr | 1;
864                 __iommu_flush_cache(iommu, entry, sizeof(*entry));
865         }
866         return &context[devfn];
867 }
868
869 static int iommu_dummy(struct device *dev)
870 {
871         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
872 }
873
874 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
875 {
876         struct dmar_drhd_unit *drhd = NULL;
877         struct intel_iommu *iommu;
878         struct device *tmp;
879         struct pci_dev *ptmp, *pdev = NULL;
880         u16 segment = 0;
881         int i;
882
883         if (iommu_dummy(dev))
884                 return NULL;
885
886         if (dev_is_pci(dev)) {
887                 pdev = to_pci_dev(dev);
888                 segment = pci_domain_nr(pdev->bus);
889         } else if (has_acpi_companion(dev))
890                 dev = &ACPI_COMPANION(dev)->dev;
891
892         rcu_read_lock();
893         for_each_active_iommu(iommu, drhd) {
894                 if (pdev && segment != drhd->segment)
895                         continue;
896
897                 for_each_active_dev_scope(drhd->devices,
898                                           drhd->devices_cnt, i, tmp) {
899                         if (tmp == dev) {
900                                 *bus = drhd->devices[i].bus;
901                                 *devfn = drhd->devices[i].devfn;
902                                 goto out;
903                         }
904
905                         if (!pdev || !dev_is_pci(tmp))
906                                 continue;
907
908                         ptmp = to_pci_dev(tmp);
909                         if (ptmp->subordinate &&
910                             ptmp->subordinate->number <= pdev->bus->number &&
911                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
912                                 goto got_pdev;
913                 }
914
915                 if (pdev && drhd->include_all) {
916                 got_pdev:
917                         *bus = pdev->bus->number;
918                         *devfn = pdev->devfn;
919                         goto out;
920                 }
921         }
922         iommu = NULL;
923  out:
924         rcu_read_unlock();
925
926         return iommu;
927 }
928
929 static void domain_flush_cache(struct dmar_domain *domain,
930                                void *addr, int size)
931 {
932         if (!domain->iommu_coherency)
933                 clflush_cache_range(addr, size);
934 }
935
936 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
937 {
938         struct context_entry *context;
939         int ret = 0;
940         unsigned long flags;
941
942         spin_lock_irqsave(&iommu->lock, flags);
943         context = iommu_context_addr(iommu, bus, devfn, 0);
944         if (context)
945                 ret = context_present(context);
946         spin_unlock_irqrestore(&iommu->lock, flags);
947         return ret;
948 }
949
950 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
951 {
952         struct context_entry *context;
953         unsigned long flags;
954
955         spin_lock_irqsave(&iommu->lock, flags);
956         context = iommu_context_addr(iommu, bus, devfn, 0);
957         if (context) {
958                 context_clear_entry(context);
959                 __iommu_flush_cache(iommu, context, sizeof(*context));
960         }
961         spin_unlock_irqrestore(&iommu->lock, flags);
962 }
963
964 static void free_context_table(struct intel_iommu *iommu)
965 {
966         int i;
967         unsigned long flags;
968         struct context_entry *context;
969
970         spin_lock_irqsave(&iommu->lock, flags);
971         if (!iommu->root_entry) {
972                 goto out;
973         }
974         for (i = 0; i < ROOT_ENTRY_NR; i++) {
975                 context = iommu_context_addr(iommu, i, 0, 0);
976                 if (context)
977                         free_pgtable_page(context);
978
979                 if (!ecs_enabled(iommu))
980                         continue;
981
982                 context = iommu_context_addr(iommu, i, 0x80, 0);
983                 if (context)
984                         free_pgtable_page(context);
985
986         }
987         free_pgtable_page(iommu->root_entry);
988         iommu->root_entry = NULL;
989 out:
990         spin_unlock_irqrestore(&iommu->lock, flags);
991 }
992
993 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
994                                       unsigned long pfn, int *target_level)
995 {
996         struct dma_pte *parent, *pte = NULL;
997         int level = agaw_to_level(domain->agaw);
998         int offset;
999
1000         BUG_ON(!domain->pgd);
1001
1002         if (!domain_pfn_supported(domain, pfn))
1003                 /* Address beyond IOMMU's addressing capabilities. */
1004                 return NULL;
1005
1006         parent = domain->pgd;
1007
1008         while (1) {
1009                 void *tmp_page;
1010
1011                 offset = pfn_level_offset(pfn, level);
1012                 pte = &parent[offset];
1013                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1014                         break;
1015                 if (level == *target_level)
1016                         break;
1017
1018                 if (!dma_pte_present(pte)) {
1019                         uint64_t pteval;
1020
1021                         tmp_page = alloc_pgtable_page(domain->nid);
1022
1023                         if (!tmp_page)
1024                                 return NULL;
1025
1026                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1027                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1028                         if (cmpxchg64(&pte->val, 0ULL, pteval))
1029                                 /* Someone else set it while we were thinking; use theirs. */
1030                                 free_pgtable_page(tmp_page);
1031                         else
1032                                 domain_flush_cache(domain, pte, sizeof(*pte));
1033                 }
1034                 if (level == 1)
1035                         break;
1036
1037                 parent = phys_to_virt(dma_pte_addr(pte));
1038                 level--;
1039         }
1040
1041         if (!*target_level)
1042                 *target_level = level;
1043
1044         return pte;
1045 }
1046
1047
1048 /* return address's pte at specific level */
1049 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1050                                          unsigned long pfn,
1051                                          int level, int *large_page)
1052 {
1053         struct dma_pte *parent, *pte = NULL;
1054         int total = agaw_to_level(domain->agaw);
1055         int offset;
1056
1057         parent = domain->pgd;
1058         while (level <= total) {
1059                 offset = pfn_level_offset(pfn, total);
1060                 pte = &parent[offset];
1061                 if (level == total)
1062                         return pte;
1063
1064                 if (!dma_pte_present(pte)) {
1065                         *large_page = total;
1066                         break;
1067                 }
1068
1069                 if (dma_pte_superpage(pte)) {
1070                         *large_page = total;
1071                         return pte;
1072                 }
1073
1074                 parent = phys_to_virt(dma_pte_addr(pte));
1075                 total--;
1076         }
1077         return NULL;
1078 }
1079
1080 /* clear last level pte, a tlb flush should be followed */
1081 static void dma_pte_clear_range(struct dmar_domain *domain,
1082                                 unsigned long start_pfn,
1083                                 unsigned long last_pfn)
1084 {
1085         unsigned int large_page = 1;
1086         struct dma_pte *first_pte, *pte;
1087
1088         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1089         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1090         BUG_ON(start_pfn > last_pfn);
1091
1092         /* we don't need lock here; nobody else touches the iova range */
1093         do {
1094                 large_page = 1;
1095                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1096                 if (!pte) {
1097                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1098                         continue;
1099                 }
1100                 do {
1101                         dma_clear_pte(pte);
1102                         start_pfn += lvl_to_nr_pages(large_page);
1103                         pte++;
1104                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1105
1106                 domain_flush_cache(domain, first_pte,
1107                                    (void *)pte - (void *)first_pte);
1108
1109         } while (start_pfn && start_pfn <= last_pfn);
1110 }
1111
1112 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1113                                struct dma_pte *pte, unsigned long pfn,
1114                                unsigned long start_pfn, unsigned long last_pfn)
1115 {
1116         pfn = max(start_pfn, pfn);
1117         pte = &pte[pfn_level_offset(pfn, level)];
1118
1119         do {
1120                 unsigned long level_pfn;
1121                 struct dma_pte *level_pte;
1122
1123                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1124                         goto next;
1125
1126                 level_pfn = pfn & level_mask(level - 1);
1127                 level_pte = phys_to_virt(dma_pte_addr(pte));
1128
1129                 if (level > 2)
1130                         dma_pte_free_level(domain, level - 1, level_pte,
1131                                            level_pfn, start_pfn, last_pfn);
1132
1133                 /* If range covers entire pagetable, free it */
1134                 if (!(start_pfn > level_pfn ||
1135                       last_pfn < level_pfn + level_size(level) - 1)) {
1136                         dma_clear_pte(pte);
1137                         domain_flush_cache(domain, pte, sizeof(*pte));
1138                         free_pgtable_page(level_pte);
1139                 }
1140 next:
1141                 pfn += level_size(level);
1142         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1143 }
1144
1145 /* free page table pages. last level pte should already be cleared */
1146 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1147                                    unsigned long start_pfn,
1148                                    unsigned long last_pfn)
1149 {
1150         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1151         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1152         BUG_ON(start_pfn > last_pfn);
1153
1154         dma_pte_clear_range(domain, start_pfn, last_pfn);
1155
1156         /* We don't need lock here; nobody else touches the iova range */
1157         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1158                            domain->pgd, 0, start_pfn, last_pfn);
1159
1160         /* free pgd */
1161         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1162                 free_pgtable_page(domain->pgd);
1163                 domain->pgd = NULL;
1164         }
1165 }
1166
1167 /* When a page at a given level is being unlinked from its parent, we don't
1168    need to *modify* it at all. All we need to do is make a list of all the
1169    pages which can be freed just as soon as we've flushed the IOTLB and we
1170    know the hardware page-walk will no longer touch them.
1171    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1172    be freed. */
1173 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1174                                             int level, struct dma_pte *pte,
1175                                             struct page *freelist)
1176 {
1177         struct page *pg;
1178
1179         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1180         pg->freelist = freelist;
1181         freelist = pg;
1182
1183         if (level == 1)
1184                 return freelist;
1185
1186         pte = page_address(pg);
1187         do {
1188                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1189                         freelist = dma_pte_list_pagetables(domain, level - 1,
1190                                                            pte, freelist);
1191                 pte++;
1192         } while (!first_pte_in_page(pte));
1193
1194         return freelist;
1195 }
1196
1197 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1198                                         struct dma_pte *pte, unsigned long pfn,
1199                                         unsigned long start_pfn,
1200                                         unsigned long last_pfn,
1201                                         struct page *freelist)
1202 {
1203         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1204
1205         pfn = max(start_pfn, pfn);
1206         pte = &pte[pfn_level_offset(pfn, level)];
1207
1208         do {
1209                 unsigned long level_pfn;
1210
1211                 if (!dma_pte_present(pte))
1212                         goto next;
1213
1214                 level_pfn = pfn & level_mask(level);
1215
1216                 /* If range covers entire pagetable, free it */
1217                 if (start_pfn <= level_pfn &&
1218                     last_pfn >= level_pfn + level_size(level) - 1) {
1219                         /* These suborbinate page tables are going away entirely. Don't
1220                            bother to clear them; we're just going to *free* them. */
1221                         if (level > 1 && !dma_pte_superpage(pte))
1222                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1223
1224                         dma_clear_pte(pte);
1225                         if (!first_pte)
1226                                 first_pte = pte;
1227                         last_pte = pte;
1228                 } else if (level > 1) {
1229                         /* Recurse down into a level that isn't *entirely* obsolete */
1230                         freelist = dma_pte_clear_level(domain, level - 1,
1231                                                        phys_to_virt(dma_pte_addr(pte)),
1232                                                        level_pfn, start_pfn, last_pfn,
1233                                                        freelist);
1234                 }
1235 next:
1236                 pfn += level_size(level);
1237         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1238
1239         if (first_pte)
1240                 domain_flush_cache(domain, first_pte,
1241                                    (void *)++last_pte - (void *)first_pte);
1242
1243         return freelist;
1244 }
1245
1246 /* We can't just free the pages because the IOMMU may still be walking
1247    the page tables, and may have cached the intermediate levels. The
1248    pages can only be freed after the IOTLB flush has been done. */
1249 static struct page *domain_unmap(struct dmar_domain *domain,
1250                                  unsigned long start_pfn,
1251                                  unsigned long last_pfn)
1252 {
1253         struct page *freelist = NULL;
1254
1255         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1256         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1257         BUG_ON(start_pfn > last_pfn);
1258
1259         /* we don't need lock here; nobody else touches the iova range */
1260         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1261                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1262
1263         /* free pgd */
1264         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1265                 struct page *pgd_page = virt_to_page(domain->pgd);
1266                 pgd_page->freelist = freelist;
1267                 freelist = pgd_page;
1268
1269                 domain->pgd = NULL;
1270         }
1271
1272         return freelist;
1273 }
1274
1275 static void dma_free_pagelist(struct page *freelist)
1276 {
1277         struct page *pg;
1278
1279         while ((pg = freelist)) {
1280                 freelist = pg->freelist;
1281                 free_pgtable_page(page_address(pg));
1282         }
1283 }
1284
1285 /* iommu handling */
1286 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1287 {
1288         struct root_entry *root;
1289         unsigned long flags;
1290
1291         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1292         if (!root) {
1293                 pr_err("Allocating root entry for %s failed\n",
1294                         iommu->name);
1295                 return -ENOMEM;
1296         }
1297
1298         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1299
1300         spin_lock_irqsave(&iommu->lock, flags);
1301         iommu->root_entry = root;
1302         spin_unlock_irqrestore(&iommu->lock, flags);
1303
1304         return 0;
1305 }
1306
1307 static void iommu_set_root_entry(struct intel_iommu *iommu)
1308 {
1309         u64 addr;
1310         u32 sts;
1311         unsigned long flag;
1312
1313         addr = virt_to_phys(iommu->root_entry);
1314         if (ecs_enabled(iommu))
1315                 addr |= DMA_RTADDR_RTT;
1316
1317         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1318         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1319
1320         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1321
1322         /* Make sure hardware complete it */
1323         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1324                       readl, (sts & DMA_GSTS_RTPS), sts);
1325
1326         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1327 }
1328
1329 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1330 {
1331         u32 val;
1332         unsigned long flag;
1333
1334         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1335                 return;
1336
1337         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1338         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1339
1340         /* Make sure hardware complete it */
1341         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1342                       readl, (!(val & DMA_GSTS_WBFS)), val);
1343
1344         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1345 }
1346
1347 /* return value determine if we need a write buffer flush */
1348 static void __iommu_flush_context(struct intel_iommu *iommu,
1349                                   u16 did, u16 source_id, u8 function_mask,
1350                                   u64 type)
1351 {
1352         u64 val = 0;
1353         unsigned long flag;
1354
1355         switch (type) {
1356         case DMA_CCMD_GLOBAL_INVL:
1357                 val = DMA_CCMD_GLOBAL_INVL;
1358                 break;
1359         case DMA_CCMD_DOMAIN_INVL:
1360                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1361                 break;
1362         case DMA_CCMD_DEVICE_INVL:
1363                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1364                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1365                 break;
1366         default:
1367                 BUG();
1368         }
1369         val |= DMA_CCMD_ICC;
1370
1371         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1372         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1373
1374         /* Make sure hardware complete it */
1375         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1376                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1377
1378         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1379 }
1380
1381 /* return value determine if we need a write buffer flush */
1382 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1383                                 u64 addr, unsigned int size_order, u64 type)
1384 {
1385         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1386         u64 val = 0, val_iva = 0;
1387         unsigned long flag;
1388
1389         switch (type) {
1390         case DMA_TLB_GLOBAL_FLUSH:
1391                 /* global flush doesn't need set IVA_REG */
1392                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1393                 break;
1394         case DMA_TLB_DSI_FLUSH:
1395                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1396                 break;
1397         case DMA_TLB_PSI_FLUSH:
1398                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1399                 /* IH bit is passed in as part of address */
1400                 val_iva = size_order | addr;
1401                 break;
1402         default:
1403                 BUG();
1404         }
1405         /* Note: set drain read/write */
1406 #if 0
1407         /*
1408          * This is probably to be super secure.. Looks like we can
1409          * ignore it without any impact.
1410          */
1411         if (cap_read_drain(iommu->cap))
1412                 val |= DMA_TLB_READ_DRAIN;
1413 #endif
1414         if (cap_write_drain(iommu->cap))
1415                 val |= DMA_TLB_WRITE_DRAIN;
1416
1417         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1418         /* Note: Only uses first TLB reg currently */
1419         if (val_iva)
1420                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1421         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1422
1423         /* Make sure hardware complete it */
1424         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1425                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1426
1427         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1428
1429         /* check IOTLB invalidation granularity */
1430         if (DMA_TLB_IAIG(val) == 0)
1431                 pr_err("Flush IOTLB failed\n");
1432         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1433                 pr_debug("TLB flush request %Lx, actual %Lx\n",
1434                         (unsigned long long)DMA_TLB_IIRG(type),
1435                         (unsigned long long)DMA_TLB_IAIG(val));
1436 }
1437
1438 static struct device_domain_info *
1439 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1440                          u8 bus, u8 devfn)
1441 {
1442         struct device_domain_info *info;
1443
1444         assert_spin_locked(&device_domain_lock);
1445
1446         if (!iommu->qi)
1447                 return NULL;
1448
1449         list_for_each_entry(info, &domain->devices, link)
1450                 if (info->iommu == iommu && info->bus == bus &&
1451                     info->devfn == devfn) {
1452                         if (info->ats_supported && info->dev)
1453                                 return info;
1454                         break;
1455                 }
1456
1457         return NULL;
1458 }
1459
1460 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1461 {
1462         struct pci_dev *pdev;
1463
1464         if (!info || !dev_is_pci(info->dev))
1465                 return;
1466
1467         pdev = to_pci_dev(info->dev);
1468
1469 #ifdef CONFIG_INTEL_IOMMU_SVM
1470         /* The PCIe spec, in its wisdom, declares that the behaviour of
1471            the device if you enable PASID support after ATS support is
1472            undefined. So always enable PASID support on devices which
1473            have it, even if we can't yet know if we're ever going to
1474            use it. */
1475         if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1476                 info->pasid_enabled = 1;
1477
1478         if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1479                 info->pri_enabled = 1;
1480 #endif
1481         if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1482                 info->ats_enabled = 1;
1483                 info->ats_qdep = pci_ats_queue_depth(pdev);
1484         }
1485 }
1486
1487 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1488 {
1489         struct pci_dev *pdev;
1490
1491         if (dev_is_pci(info->dev))
1492                 return;
1493
1494         pdev = to_pci_dev(info->dev);
1495
1496         if (info->ats_enabled) {
1497                 pci_disable_ats(pdev);
1498                 info->ats_enabled = 0;
1499         }
1500 #ifdef CONFIG_INTEL_IOMMU_SVM
1501         if (info->pri_enabled) {
1502                 pci_disable_pri(pdev);
1503                 info->pri_enabled = 0;
1504         }
1505         if (info->pasid_enabled) {
1506                 pci_disable_pasid(pdev);
1507                 info->pasid_enabled = 0;
1508         }
1509 #endif
1510 }
1511
1512 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1513                                   u64 addr, unsigned mask)
1514 {
1515         u16 sid, qdep;
1516         unsigned long flags;
1517         struct device_domain_info *info;
1518
1519         spin_lock_irqsave(&device_domain_lock, flags);
1520         list_for_each_entry(info, &domain->devices, link) {
1521                 if (!info->ats_enabled)
1522                         continue;
1523
1524                 sid = info->bus << 8 | info->devfn;
1525                 qdep = info->ats_qdep;
1526                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1527         }
1528         spin_unlock_irqrestore(&device_domain_lock, flags);
1529 }
1530
1531 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1532                                   struct dmar_domain *domain,
1533                                   unsigned long pfn, unsigned int pages,
1534                                   int ih, int map)
1535 {
1536         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1537         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1538         u16 did = domain->iommu_did[iommu->seq_id];
1539
1540         BUG_ON(pages == 0);
1541
1542         if (ih)
1543                 ih = 1 << 6;
1544         /*
1545          * Fallback to domain selective flush if no PSI support or the size is
1546          * too big.
1547          * PSI requires page size to be 2 ^ x, and the base address is naturally
1548          * aligned to the size
1549          */
1550         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1551                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1552                                                 DMA_TLB_DSI_FLUSH);
1553         else
1554                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1555                                                 DMA_TLB_PSI_FLUSH);
1556
1557         /*
1558          * In caching mode, changes of pages from non-present to present require
1559          * flush. However, device IOTLB doesn't need to be flushed in this case.
1560          */
1561         if (!cap_caching_mode(iommu->cap) || !map)
1562                 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1563                                       addr, mask);
1564 }
1565
1566 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1567 {
1568         u32 pmen;
1569         unsigned long flags;
1570
1571         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1572         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1573         pmen &= ~DMA_PMEN_EPM;
1574         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1575
1576         /* wait for the protected region status bit to clear */
1577         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1578                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1579
1580         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1581 }
1582
1583 static void iommu_enable_translation(struct intel_iommu *iommu)
1584 {
1585         u32 sts;
1586         unsigned long flags;
1587
1588         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1589         iommu->gcmd |= DMA_GCMD_TE;
1590         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1591
1592         /* Make sure hardware complete it */
1593         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1594                       readl, (sts & DMA_GSTS_TES), sts);
1595
1596         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1597 }
1598
1599 static void iommu_disable_translation(struct intel_iommu *iommu)
1600 {
1601         u32 sts;
1602         unsigned long flag;
1603
1604         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1605         iommu->gcmd &= ~DMA_GCMD_TE;
1606         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1607
1608         /* Make sure hardware complete it */
1609         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1610                       readl, (!(sts & DMA_GSTS_TES)), sts);
1611
1612         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1613 }
1614
1615
1616 static int iommu_init_domains(struct intel_iommu *iommu)
1617 {
1618         u32 ndomains, nlongs;
1619         size_t size;
1620
1621         ndomains = cap_ndoms(iommu->cap);
1622         pr_debug("%s: Number of Domains supported <%d>\n",
1623                  iommu->name, ndomains);
1624         nlongs = BITS_TO_LONGS(ndomains);
1625
1626         spin_lock_init(&iommu->lock);
1627
1628         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1629         if (!iommu->domain_ids) {
1630                 pr_err("%s: Allocating domain id array failed\n",
1631                        iommu->name);
1632                 return -ENOMEM;
1633         }
1634
1635         size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1636         iommu->domains = kzalloc(size, GFP_KERNEL);
1637
1638         if (iommu->domains) {
1639                 size = 256 * sizeof(struct dmar_domain *);
1640                 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1641         }
1642
1643         if (!iommu->domains || !iommu->domains[0]) {
1644                 pr_err("%s: Allocating domain array failed\n",
1645                        iommu->name);
1646                 kfree(iommu->domain_ids);
1647                 kfree(iommu->domains);
1648                 iommu->domain_ids = NULL;
1649                 iommu->domains    = NULL;
1650                 return -ENOMEM;
1651         }
1652
1653
1654
1655         /*
1656          * If Caching mode is set, then invalid translations are tagged
1657          * with domain-id 0, hence we need to pre-allocate it. We also
1658          * use domain-id 0 as a marker for non-allocated domain-id, so
1659          * make sure it is not used for a real domain.
1660          */
1661         set_bit(0, iommu->domain_ids);
1662
1663         return 0;
1664 }
1665
1666 static void disable_dmar_iommu(struct intel_iommu *iommu)
1667 {
1668         struct device_domain_info *info, *tmp;
1669         unsigned long flags;
1670
1671         if (!iommu->domains || !iommu->domain_ids)
1672                 return;
1673
1674         spin_lock_irqsave(&device_domain_lock, flags);
1675         list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1676                 struct dmar_domain *domain;
1677
1678                 if (info->iommu != iommu)
1679                         continue;
1680
1681                 if (!info->dev || !info->domain)
1682                         continue;
1683
1684                 domain = info->domain;
1685
1686                 dmar_remove_one_dev_info(domain, info->dev);
1687
1688                 if (!domain_type_is_vm_or_si(domain))
1689                         domain_exit(domain);
1690         }
1691         spin_unlock_irqrestore(&device_domain_lock, flags);
1692
1693         if (iommu->gcmd & DMA_GCMD_TE)
1694                 iommu_disable_translation(iommu);
1695 }
1696
1697 static void free_dmar_iommu(struct intel_iommu *iommu)
1698 {
1699         if ((iommu->domains) && (iommu->domain_ids)) {
1700                 int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1701                 int i;
1702
1703                 for (i = 0; i < elems; i++)
1704                         kfree(iommu->domains[i]);
1705                 kfree(iommu->domains);
1706                 kfree(iommu->domain_ids);
1707                 iommu->domains = NULL;
1708                 iommu->domain_ids = NULL;
1709         }
1710
1711         g_iommus[iommu->seq_id] = NULL;
1712
1713         /* free context mapping */
1714         free_context_table(iommu);
1715
1716 #ifdef CONFIG_INTEL_IOMMU_SVM
1717         if (pasid_enabled(iommu)) {
1718                 if (ecap_prs(iommu->ecap))
1719                         intel_svm_finish_prq(iommu);
1720                 intel_svm_free_pasid_tables(iommu);
1721         }
1722 #endif
1723 }
1724
1725 static struct dmar_domain *alloc_domain(int flags)
1726 {
1727         struct dmar_domain *domain;
1728
1729         domain = alloc_domain_mem();
1730         if (!domain)
1731                 return NULL;
1732
1733         memset(domain, 0, sizeof(*domain));
1734         domain->nid = -1;
1735         domain->flags = flags;
1736         INIT_LIST_HEAD(&domain->devices);
1737
1738         return domain;
1739 }
1740
1741 /* Must be called with iommu->lock */
1742 static int domain_attach_iommu(struct dmar_domain *domain,
1743                                struct intel_iommu *iommu)
1744 {
1745         unsigned long ndomains;
1746         int num;
1747
1748         assert_spin_locked(&device_domain_lock);
1749         assert_spin_locked(&iommu->lock);
1750
1751         domain->iommu_refcnt[iommu->seq_id] += 1;
1752         domain->iommu_count += 1;
1753         if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1754                 ndomains = cap_ndoms(iommu->cap);
1755                 num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1756
1757                 if (num >= ndomains) {
1758                         pr_err("%s: No free domain ids\n", iommu->name);
1759                         domain->iommu_refcnt[iommu->seq_id] -= 1;
1760                         domain->iommu_count -= 1;
1761                         return -ENOSPC;
1762                 }
1763
1764                 set_bit(num, iommu->domain_ids);
1765                 set_iommu_domain(iommu, num, domain);
1766
1767                 domain->iommu_did[iommu->seq_id] = num;
1768                 domain->nid                      = iommu->node;
1769
1770                 domain_update_iommu_cap(domain);
1771         }
1772
1773         return 0;
1774 }
1775
1776 static int domain_detach_iommu(struct dmar_domain *domain,
1777                                struct intel_iommu *iommu)
1778 {
1779         int num, count = INT_MAX;
1780
1781         assert_spin_locked(&device_domain_lock);
1782         assert_spin_locked(&iommu->lock);
1783
1784         domain->iommu_refcnt[iommu->seq_id] -= 1;
1785         count = --domain->iommu_count;
1786         if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1787                 num = domain->iommu_did[iommu->seq_id];
1788                 clear_bit(num, iommu->domain_ids);
1789                 set_iommu_domain(iommu, num, NULL);
1790
1791                 domain_update_iommu_cap(domain);
1792                 domain->iommu_did[iommu->seq_id] = 0;
1793         }
1794
1795         return count;
1796 }
1797
1798 static struct iova_domain reserved_iova_list;
1799 static struct lock_class_key reserved_rbtree_key;
1800
1801 static int dmar_init_reserved_ranges(void)
1802 {
1803         struct pci_dev *pdev = NULL;
1804         struct iova *iova;
1805         int i;
1806
1807         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1808                         DMA_32BIT_PFN);
1809
1810         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1811                 &reserved_rbtree_key);
1812
1813         /* IOAPIC ranges shouldn't be accessed by DMA */
1814         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1815                 IOVA_PFN(IOAPIC_RANGE_END));
1816         if (!iova) {
1817                 pr_err("Reserve IOAPIC range failed\n");
1818                 return -ENODEV;
1819         }
1820
1821         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1822         for_each_pci_dev(pdev) {
1823                 struct resource *r;
1824
1825                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1826                         r = &pdev->resource[i];
1827                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1828                                 continue;
1829                         iova = reserve_iova(&reserved_iova_list,
1830                                             IOVA_PFN(r->start),
1831                                             IOVA_PFN(r->end));
1832                         if (!iova) {
1833                                 pr_err("Reserve iova failed\n");
1834                                 return -ENODEV;
1835                         }
1836                 }
1837         }
1838         return 0;
1839 }
1840
1841 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1842 {
1843         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1844 }
1845
1846 static inline int guestwidth_to_adjustwidth(int gaw)
1847 {
1848         int agaw;
1849         int r = (gaw - 12) % 9;
1850
1851         if (r == 0)
1852                 agaw = gaw;
1853         else
1854                 agaw = gaw + 9 - r;
1855         if (agaw > 64)
1856                 agaw = 64;
1857         return agaw;
1858 }
1859
1860 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1861                        int guest_width)
1862 {
1863         int adjust_width, agaw;
1864         unsigned long sagaw;
1865
1866         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1867                         DMA_32BIT_PFN);
1868         domain_reserve_special_ranges(domain);
1869
1870         /* calculate AGAW */
1871         if (guest_width > cap_mgaw(iommu->cap))
1872                 guest_width = cap_mgaw(iommu->cap);
1873         domain->gaw = guest_width;
1874         adjust_width = guestwidth_to_adjustwidth(guest_width);
1875         agaw = width_to_agaw(adjust_width);
1876         sagaw = cap_sagaw(iommu->cap);
1877         if (!test_bit(agaw, &sagaw)) {
1878                 /* hardware doesn't support it, choose a bigger one */
1879                 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1880                 agaw = find_next_bit(&sagaw, 5, agaw);
1881                 if (agaw >= 5)
1882                         return -ENODEV;
1883         }
1884         domain->agaw = agaw;
1885
1886         if (ecap_coherent(iommu->ecap))
1887                 domain->iommu_coherency = 1;
1888         else
1889                 domain->iommu_coherency = 0;
1890
1891         if (ecap_sc_support(iommu->ecap))
1892                 domain->iommu_snooping = 1;
1893         else
1894                 domain->iommu_snooping = 0;
1895
1896         if (intel_iommu_superpage)
1897                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1898         else
1899                 domain->iommu_superpage = 0;
1900
1901         domain->nid = iommu->node;
1902
1903         /* always allocate the top pgd */
1904         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1905         if (!domain->pgd)
1906                 return -ENOMEM;
1907         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1908         return 0;
1909 }
1910
1911 static void domain_exit(struct dmar_domain *domain)
1912 {
1913         struct page *freelist = NULL;
1914
1915         /* Domain 0 is reserved, so dont process it */
1916         if (!domain)
1917                 return;
1918
1919         /* Flush any lazy unmaps that may reference this domain */
1920         if (!intel_iommu_strict)
1921                 flush_unmaps_timeout(0);
1922
1923         /* Remove associated devices and clear attached or cached domains */
1924         rcu_read_lock();
1925         domain_remove_dev_info(domain);
1926         rcu_read_unlock();
1927
1928         /* destroy iovas */
1929         put_iova_domain(&domain->iovad);
1930
1931         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1932
1933         dma_free_pagelist(freelist);
1934
1935         free_domain_mem(domain);
1936 }
1937
1938 static int domain_context_mapping_one(struct dmar_domain *domain,
1939                                       struct intel_iommu *iommu,
1940                                       u8 bus, u8 devfn)
1941 {
1942         u16 did = domain->iommu_did[iommu->seq_id];
1943         int translation = CONTEXT_TT_MULTI_LEVEL;
1944         struct device_domain_info *info = NULL;
1945         struct context_entry *context;
1946         unsigned long flags;
1947         struct dma_pte *pgd;
1948         int ret, agaw;
1949
1950         WARN_ON(did == 0);
1951
1952         if (hw_pass_through && domain_type_is_si(domain))
1953                 translation = CONTEXT_TT_PASS_THROUGH;
1954
1955         pr_debug("Set context mapping for %02x:%02x.%d\n",
1956                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1957
1958         BUG_ON(!domain->pgd);
1959
1960         spin_lock_irqsave(&device_domain_lock, flags);
1961         spin_lock(&iommu->lock);
1962
1963         ret = -ENOMEM;
1964         context = iommu_context_addr(iommu, bus, devfn, 1);
1965         if (!context)
1966                 goto out_unlock;
1967
1968         ret = 0;
1969         if (context_present(context))
1970                 goto out_unlock;
1971
1972         pgd = domain->pgd;
1973
1974         context_clear_entry(context);
1975         context_set_domain_id(context, did);
1976
1977         /*
1978          * Skip top levels of page tables for iommu which has less agaw
1979          * than default.  Unnecessary for PT mode.
1980          */
1981         if (translation != CONTEXT_TT_PASS_THROUGH) {
1982                 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1983                         ret = -ENOMEM;
1984                         pgd = phys_to_virt(dma_pte_addr(pgd));
1985                         if (!dma_pte_present(pgd))
1986                                 goto out_unlock;
1987                 }
1988
1989                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1990                 if (info && info->ats_supported)
1991                         translation = CONTEXT_TT_DEV_IOTLB;
1992                 else
1993                         translation = CONTEXT_TT_MULTI_LEVEL;
1994
1995                 context_set_address_root(context, virt_to_phys(pgd));
1996                 context_set_address_width(context, iommu->agaw);
1997         } else {
1998                 /*
1999                  * In pass through mode, AW must be programmed to
2000                  * indicate the largest AGAW value supported by
2001                  * hardware. And ASR is ignored by hardware.
2002                  */
2003                 context_set_address_width(context, iommu->msagaw);
2004         }
2005
2006         context_set_translation_type(context, translation);
2007         context_set_fault_enable(context);
2008         context_set_present(context);
2009         domain_flush_cache(domain, context, sizeof(*context));
2010
2011         /*
2012          * It's a non-present to present mapping. If hardware doesn't cache
2013          * non-present entry we only need to flush the write-buffer. If the
2014          * _does_ cache non-present entries, then it does so in the special
2015          * domain #0, which we have to flush:
2016          */
2017         if (cap_caching_mode(iommu->cap)) {
2018                 iommu->flush.flush_context(iommu, 0,
2019                                            (((u16)bus) << 8) | devfn,
2020                                            DMA_CCMD_MASK_NOBIT,
2021                                            DMA_CCMD_DEVICE_INVL);
2022                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2023         } else {
2024                 iommu_flush_write_buffer(iommu);
2025         }
2026         iommu_enable_dev_iotlb(info);
2027
2028         ret = 0;
2029
2030 out_unlock:
2031         spin_unlock(&iommu->lock);
2032         spin_unlock_irqrestore(&device_domain_lock, flags);
2033
2034         return 0;
2035 }
2036
2037 struct domain_context_mapping_data {
2038         struct dmar_domain *domain;
2039         struct intel_iommu *iommu;
2040 };
2041
2042 static int domain_context_mapping_cb(struct pci_dev *pdev,
2043                                      u16 alias, void *opaque)
2044 {
2045         struct domain_context_mapping_data *data = opaque;
2046
2047         return domain_context_mapping_one(data->domain, data->iommu,
2048                                           PCI_BUS_NUM(alias), alias & 0xff);
2049 }
2050
2051 static int
2052 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2053 {
2054         struct intel_iommu *iommu;
2055         u8 bus, devfn;
2056         struct domain_context_mapping_data data;
2057
2058         iommu = device_to_iommu(dev, &bus, &devfn);
2059         if (!iommu)
2060                 return -ENODEV;
2061
2062         if (!dev_is_pci(dev))
2063                 return domain_context_mapping_one(domain, iommu, bus, devfn);
2064
2065         data.domain = domain;
2066         data.iommu = iommu;
2067
2068         return pci_for_each_dma_alias(to_pci_dev(dev),
2069                                       &domain_context_mapping_cb, &data);
2070 }
2071
2072 static int domain_context_mapped_cb(struct pci_dev *pdev,
2073                                     u16 alias, void *opaque)
2074 {
2075         struct intel_iommu *iommu = opaque;
2076
2077         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2078 }
2079
2080 static int domain_context_mapped(struct device *dev)
2081 {
2082         struct intel_iommu *iommu;
2083         u8 bus, devfn;
2084
2085         iommu = device_to_iommu(dev, &bus, &devfn);
2086         if (!iommu)
2087                 return -ENODEV;
2088
2089         if (!dev_is_pci(dev))
2090                 return device_context_mapped(iommu, bus, devfn);
2091
2092         return !pci_for_each_dma_alias(to_pci_dev(dev),
2093                                        domain_context_mapped_cb, iommu);
2094 }
2095
2096 /* Returns a number of VTD pages, but aligned to MM page size */
2097 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2098                                             size_t size)
2099 {
2100         host_addr &= ~PAGE_MASK;
2101         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2102 }
2103
2104 /* Return largest possible superpage level for a given mapping */
2105 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2106                                           unsigned long iov_pfn,
2107                                           unsigned long phy_pfn,
2108                                           unsigned long pages)
2109 {
2110         int support, level = 1;
2111         unsigned long pfnmerge;
2112
2113         support = domain->iommu_superpage;
2114
2115         /* To use a large page, the virtual *and* physical addresses
2116            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2117            of them will mean we have to use smaller pages. So just
2118            merge them and check both at once. */
2119         pfnmerge = iov_pfn | phy_pfn;
2120
2121         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2122                 pages >>= VTD_STRIDE_SHIFT;
2123                 if (!pages)
2124                         break;
2125                 pfnmerge >>= VTD_STRIDE_SHIFT;
2126                 level++;
2127                 support--;
2128         }
2129         return level;
2130 }
2131
2132 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2133                             struct scatterlist *sg, unsigned long phys_pfn,
2134                             unsigned long nr_pages, int prot)
2135 {
2136         struct dma_pte *first_pte = NULL, *pte = NULL;
2137         phys_addr_t uninitialized_var(pteval);
2138         unsigned long sg_res = 0;
2139         unsigned int largepage_lvl = 0;
2140         unsigned long lvl_pages = 0;
2141
2142         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2143
2144         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2145                 return -EINVAL;
2146
2147         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2148
2149         if (!sg) {
2150                 sg_res = nr_pages;
2151                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2152         }
2153
2154         while (nr_pages > 0) {
2155                 uint64_t tmp;
2156
2157                 if (!sg_res) {
2158                         sg_res = aligned_nrpages(sg->offset, sg->length);
2159                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2160                         sg->dma_length = sg->length;
2161                         pteval = (sg_phys(sg) & PAGE_MASK) | prot;
2162                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2163                 }
2164
2165                 if (!pte) {
2166                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2167
2168                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2169                         if (!pte)
2170                                 return -ENOMEM;
2171                         /* It is large page*/
2172                         if (largepage_lvl > 1) {
2173                                 unsigned long nr_superpages, end_pfn;
2174
2175                                 pteval |= DMA_PTE_LARGE_PAGE;
2176                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2177
2178                                 nr_superpages = sg_res / lvl_pages;
2179                                 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2180
2181                                 /*
2182                                  * Ensure that old small page tables are
2183                                  * removed to make room for superpage(s).
2184                                  */
2185                                 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
2186                         } else {
2187                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2188                         }
2189
2190                 }
2191                 /* We don't need lock here, nobody else
2192                  * touches the iova range
2193                  */
2194                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2195                 if (tmp) {
2196                         static int dumps = 5;
2197                         pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2198                                 iov_pfn, tmp, (unsigned long long)pteval);
2199                         if (dumps) {
2200                                 dumps--;
2201                                 debug_dma_dump_mappings(NULL);
2202                         }
2203                         WARN_ON(1);
2204                 }
2205
2206                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2207
2208                 BUG_ON(nr_pages < lvl_pages);
2209                 BUG_ON(sg_res < lvl_pages);
2210
2211                 nr_pages -= lvl_pages;
2212                 iov_pfn += lvl_pages;
2213                 phys_pfn += lvl_pages;
2214                 pteval += lvl_pages * VTD_PAGE_SIZE;
2215                 sg_res -= lvl_pages;
2216
2217                 /* If the next PTE would be the first in a new page, then we
2218                    need to flush the cache on the entries we've just written.
2219                    And then we'll need to recalculate 'pte', so clear it and
2220                    let it get set again in the if (!pte) block above.
2221
2222                    If we're done (!nr_pages) we need to flush the cache too.
2223
2224                    Also if we've been setting superpages, we may need to
2225                    recalculate 'pte' and switch back to smaller pages for the
2226                    end of the mapping, if the trailing size is not enough to
2227                    use another superpage (i.e. sg_res < lvl_pages). */
2228                 pte++;
2229                 if (!nr_pages || first_pte_in_page(pte) ||
2230                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2231                         domain_flush_cache(domain, first_pte,
2232                                            (void *)pte - (void *)first_pte);
2233                         pte = NULL;
2234                 }
2235
2236                 if (!sg_res && nr_pages)
2237                         sg = sg_next(sg);
2238         }
2239         return 0;
2240 }
2241
2242 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2243                                     struct scatterlist *sg, unsigned long nr_pages,
2244                                     int prot)
2245 {
2246         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2247 }
2248
2249 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2250                                      unsigned long phys_pfn, unsigned long nr_pages,
2251                                      int prot)
2252 {
2253         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2254 }
2255
2256 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2257 {
2258         if (!iommu)
2259                 return;
2260
2261         clear_context_table(iommu, bus, devfn);
2262         iommu->flush.flush_context(iommu, 0, 0, 0,
2263                                            DMA_CCMD_GLOBAL_INVL);
2264         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2265 }
2266
2267 static inline void unlink_domain_info(struct device_domain_info *info)
2268 {
2269         assert_spin_locked(&device_domain_lock);
2270         list_del(&info->link);
2271         list_del(&info->global);
2272         if (info->dev)
2273                 info->dev->archdata.iommu = NULL;
2274 }
2275
2276 static void domain_remove_dev_info(struct dmar_domain *domain)
2277 {
2278         struct device_domain_info *info, *tmp;
2279         unsigned long flags;
2280
2281         spin_lock_irqsave(&device_domain_lock, flags);
2282         list_for_each_entry_safe(info, tmp, &domain->devices, link)
2283                 __dmar_remove_one_dev_info(info);
2284         spin_unlock_irqrestore(&device_domain_lock, flags);
2285 }
2286
2287 /*
2288  * find_domain
2289  * Note: we use struct device->archdata.iommu stores the info
2290  */
2291 static struct dmar_domain *find_domain(struct device *dev)
2292 {
2293         struct device_domain_info *info;
2294
2295         /* No lock here, assumes no domain exit in normal case */
2296         info = dev->archdata.iommu;
2297         if (info)
2298                 return info->domain;
2299         return NULL;
2300 }
2301
2302 static inline struct device_domain_info *
2303 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2304 {
2305         struct device_domain_info *info;
2306
2307         list_for_each_entry(info, &device_domain_list, global)
2308                 if (info->iommu->segment == segment && info->bus == bus &&
2309                     info->devfn == devfn)
2310                         return info;
2311
2312         return NULL;
2313 }
2314
2315 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2316                                                     int bus, int devfn,
2317                                                     struct device *dev,
2318                                                     struct dmar_domain *domain)
2319 {
2320         struct dmar_domain *found = NULL;
2321         struct device_domain_info *info;
2322         unsigned long flags;
2323         int ret;
2324
2325         info = alloc_devinfo_mem();
2326         if (!info)
2327                 return NULL;
2328
2329         info->bus = bus;
2330         info->devfn = devfn;
2331         info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2332         info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2333         info->ats_qdep = 0;
2334         info->dev = dev;
2335         info->domain = domain;
2336         info->iommu = iommu;
2337
2338         if (dev && dev_is_pci(dev)) {
2339                 struct pci_dev *pdev = to_pci_dev(info->dev);
2340
2341                 if (ecap_dev_iotlb_support(iommu->ecap) &&
2342                     pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2343                     dmar_find_matched_atsr_unit(pdev))
2344                         info->ats_supported = 1;
2345
2346                 if (ecs_enabled(iommu)) {
2347                         if (pasid_enabled(iommu)) {
2348                                 int features = pci_pasid_features(pdev);
2349                                 if (features >= 0)
2350                                         info->pasid_supported = features | 1;
2351                         }
2352
2353                         if (info->ats_supported && ecap_prs(iommu->ecap) &&
2354                             pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2355                                 info->pri_supported = 1;
2356                 }
2357         }
2358
2359         spin_lock_irqsave(&device_domain_lock, flags);
2360         if (dev)
2361                 found = find_domain(dev);
2362
2363         if (!found) {
2364                 struct device_domain_info *info2;
2365                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2366                 if (info2) {
2367                         found      = info2->domain;
2368                         info2->dev = dev;
2369                 }
2370         }
2371
2372         if (found) {
2373                 spin_unlock_irqrestore(&device_domain_lock, flags);
2374                 free_devinfo_mem(info);
2375                 /* Caller must free the original domain */
2376                 return found;
2377         }
2378
2379         spin_lock(&iommu->lock);
2380         ret = domain_attach_iommu(domain, iommu);
2381         spin_unlock(&iommu->lock);
2382
2383         if (ret) {
2384                 spin_unlock_irqrestore(&device_domain_lock, flags);
2385                 return NULL;
2386         }
2387
2388         list_add(&info->link, &domain->devices);
2389         list_add(&info->global, &device_domain_list);
2390         if (dev)
2391                 dev->archdata.iommu = info;
2392         spin_unlock_irqrestore(&device_domain_lock, flags);
2393
2394         if (dev && domain_context_mapping(domain, dev)) {
2395                 pr_err("Domain context map for %s failed\n", dev_name(dev));
2396                 dmar_remove_one_dev_info(domain, dev);
2397                 return NULL;
2398         }
2399
2400         return domain;
2401 }
2402
2403 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2404 {
2405         *(u16 *)opaque = alias;
2406         return 0;
2407 }
2408
2409 /* domain is initialized */
2410 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2411 {
2412         struct device_domain_info *info = NULL;
2413         struct dmar_domain *domain, *tmp;
2414         struct intel_iommu *iommu;
2415         u16 req_id, dma_alias;
2416         unsigned long flags;
2417         u8 bus, devfn;
2418
2419         domain = find_domain(dev);
2420         if (domain)
2421                 return domain;
2422
2423         iommu = device_to_iommu(dev, &bus, &devfn);
2424         if (!iommu)
2425                 return NULL;
2426
2427         req_id = ((u16)bus << 8) | devfn;
2428
2429         if (dev_is_pci(dev)) {
2430                 struct pci_dev *pdev = to_pci_dev(dev);
2431
2432                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2433
2434                 spin_lock_irqsave(&device_domain_lock, flags);
2435                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2436                                                       PCI_BUS_NUM(dma_alias),
2437                                                       dma_alias & 0xff);
2438                 if (info) {
2439                         iommu = info->iommu;
2440                         domain = info->domain;
2441                 }
2442                 spin_unlock_irqrestore(&device_domain_lock, flags);
2443
2444                 /* DMA alias already has a domain, uses it */
2445                 if (info)
2446                         goto found_domain;
2447         }
2448
2449         /* Allocate and initialize new domain for the device */
2450         domain = alloc_domain(0);
2451         if (!domain)
2452                 return NULL;
2453         if (domain_init(domain, iommu, gaw)) {
2454                 domain_exit(domain);
2455                 return NULL;
2456         }
2457
2458         /* register PCI DMA alias device */
2459         if (req_id != dma_alias && dev_is_pci(dev)) {
2460                 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2461                                                dma_alias & 0xff, NULL, domain);
2462
2463                 if (!tmp || tmp != domain) {
2464                         domain_exit(domain);
2465                         domain = tmp;
2466                 }
2467
2468                 if (!domain)
2469                         return NULL;
2470         }
2471
2472 found_domain:
2473         tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2474
2475         if (!tmp || tmp != domain) {
2476                 domain_exit(domain);
2477                 domain = tmp;
2478         }
2479
2480         return domain;
2481 }
2482
2483 static int iommu_domain_identity_map(struct dmar_domain *domain,
2484                                      unsigned long long start,
2485                                      unsigned long long end)
2486 {
2487         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2488         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2489
2490         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2491                           dma_to_mm_pfn(last_vpfn))) {
2492                 pr_err("Reserving iova failed\n");
2493                 return -ENOMEM;
2494         }
2495
2496         pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2497         /*
2498          * RMRR range might have overlap with physical memory range,
2499          * clear it first
2500          */
2501         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2502
2503         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2504                                   last_vpfn - first_vpfn + 1,
2505                                   DMA_PTE_READ|DMA_PTE_WRITE);
2506 }
2507
2508 static int iommu_prepare_identity_map(struct device *dev,
2509                                       unsigned long long start,
2510                                       unsigned long long end)
2511 {
2512         struct dmar_domain *domain;
2513         int ret;
2514
2515         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2516         if (!domain)
2517                 return -ENOMEM;
2518
2519         /* For _hardware_ passthrough, don't bother. But for software
2520            passthrough, we do it anyway -- it may indicate a memory
2521            range which is reserved in E820, so which didn't get set
2522            up to start with in si_domain */
2523         if (domain == si_domain && hw_pass_through) {
2524                 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2525                         dev_name(dev), start, end);
2526                 return 0;
2527         }
2528
2529         pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2530                 dev_name(dev), start, end);
2531
2532         if (end < start) {
2533                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2534                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2535                         dmi_get_system_info(DMI_BIOS_VENDOR),
2536                         dmi_get_system_info(DMI_BIOS_VERSION),
2537                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2538                 ret = -EIO;
2539                 goto error;
2540         }
2541
2542         if (end >> agaw_to_width(domain->agaw)) {
2543                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2544                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2545                      agaw_to_width(domain->agaw),
2546                      dmi_get_system_info(DMI_BIOS_VENDOR),
2547                      dmi_get_system_info(DMI_BIOS_VERSION),
2548                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2549                 ret = -EIO;
2550                 goto error;
2551         }
2552
2553         ret = iommu_domain_identity_map(domain, start, end);
2554         if (ret)
2555                 goto error;
2556
2557         return 0;
2558
2559  error:
2560         domain_exit(domain);
2561         return ret;
2562 }
2563
2564 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2565                                          struct device *dev)
2566 {
2567         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2568                 return 0;
2569         return iommu_prepare_identity_map(dev, rmrr->base_address,
2570                                           rmrr->end_address);
2571 }
2572
2573 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2574 static inline void iommu_prepare_isa(void)
2575 {
2576         struct pci_dev *pdev;
2577         int ret;
2578
2579         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2580         if (!pdev)
2581                 return;
2582
2583         pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2584         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2585
2586         if (ret)
2587                 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2588
2589         pci_dev_put(pdev);
2590 }
2591 #else
2592 static inline void iommu_prepare_isa(void)
2593 {
2594         return;
2595 }
2596 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2597
2598 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2599
2600 static int __init si_domain_init(int hw)
2601 {
2602         int nid, ret = 0;
2603
2604         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2605         if (!si_domain)
2606                 return -EFAULT;
2607
2608         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2609                 domain_exit(si_domain);
2610                 return -EFAULT;
2611         }
2612
2613         pr_debug("Identity mapping domain allocated\n");
2614
2615         if (hw)
2616                 return 0;
2617
2618         for_each_online_node(nid) {
2619                 unsigned long start_pfn, end_pfn;
2620                 int i;
2621
2622                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2623                         ret = iommu_domain_identity_map(si_domain,
2624                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2625                         if (ret)
2626                                 return ret;
2627                 }
2628         }
2629
2630         return 0;
2631 }
2632
2633 static int identity_mapping(struct device *dev)
2634 {
2635         struct device_domain_info *info;
2636
2637         if (likely(!iommu_identity_mapping))
2638                 return 0;
2639
2640         info = dev->archdata.iommu;
2641         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2642                 return (info->domain == si_domain);
2643
2644         return 0;
2645 }
2646
2647 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2648 {
2649         struct dmar_domain *ndomain;
2650         struct intel_iommu *iommu;
2651         u8 bus, devfn;
2652
2653         iommu = device_to_iommu(dev, &bus, &devfn);
2654         if (!iommu)
2655                 return -ENODEV;
2656
2657         ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2658         if (ndomain != domain)
2659                 return -EBUSY;
2660
2661         return 0;
2662 }
2663
2664 static bool device_has_rmrr(struct device *dev)
2665 {
2666         struct dmar_rmrr_unit *rmrr;
2667         struct device *tmp;
2668         int i;
2669
2670         rcu_read_lock();
2671         for_each_rmrr_units(rmrr) {
2672                 /*
2673                  * Return TRUE if this RMRR contains the device that
2674                  * is passed in.
2675                  */
2676                 for_each_active_dev_scope(rmrr->devices,
2677                                           rmrr->devices_cnt, i, tmp)
2678                         if (tmp == dev) {
2679                                 rcu_read_unlock();
2680                                 return true;
2681                         }
2682         }
2683         rcu_read_unlock();
2684         return false;
2685 }
2686
2687 /*
2688  * There are a couple cases where we need to restrict the functionality of
2689  * devices associated with RMRRs.  The first is when evaluating a device for
2690  * identity mapping because problems exist when devices are moved in and out
2691  * of domains and their respective RMRR information is lost.  This means that
2692  * a device with associated RMRRs will never be in a "passthrough" domain.
2693  * The second is use of the device through the IOMMU API.  This interface
2694  * expects to have full control of the IOVA space for the device.  We cannot
2695  * satisfy both the requirement that RMRR access is maintained and have an
2696  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2697  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2698  * We therefore prevent devices associated with an RMRR from participating in
2699  * the IOMMU API, which eliminates them from device assignment.
2700  *
2701  * In both cases we assume that PCI USB devices with RMRRs have them largely
2702  * for historical reasons and that the RMRR space is not actively used post
2703  * boot.  This exclusion may change if vendors begin to abuse it.
2704  *
2705  * The same exception is made for graphics devices, with the requirement that
2706  * any use of the RMRR regions will be torn down before assigning the device
2707  * to a guest.
2708  */
2709 static bool device_is_rmrr_locked(struct device *dev)
2710 {
2711         if (!device_has_rmrr(dev))
2712                 return false;
2713
2714         if (dev_is_pci(dev)) {
2715                 struct pci_dev *pdev = to_pci_dev(dev);
2716
2717                 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2718                         return false;
2719         }
2720
2721         return true;
2722 }
2723
2724 static int iommu_should_identity_map(struct device *dev, int startup)
2725 {
2726
2727         if (dev_is_pci(dev)) {
2728                 struct pci_dev *pdev = to_pci_dev(dev);
2729
2730                 if (device_is_rmrr_locked(dev))
2731                         return 0;
2732
2733                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2734                         return 1;
2735
2736                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2737                         return 1;
2738
2739                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2740                         return 0;
2741
2742                 /*
2743                  * We want to start off with all devices in the 1:1 domain, and
2744                  * take them out later if we find they can't access all of memory.
2745                  *
2746                  * However, we can't do this for PCI devices behind bridges,
2747                  * because all PCI devices behind the same bridge will end up
2748                  * with the same source-id on their transactions.
2749                  *
2750                  * Practically speaking, we can't change things around for these
2751                  * devices at run-time, because we can't be sure there'll be no
2752                  * DMA transactions in flight for any of their siblings.
2753                  *
2754                  * So PCI devices (unless they're on the root bus) as well as
2755                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2756                  * the 1:1 domain, just in _case_ one of their siblings turns out
2757                  * not to be able to map all of memory.
2758                  */
2759                 if (!pci_is_pcie(pdev)) {
2760                         if (!pci_is_root_bus(pdev->bus))
2761                                 return 0;
2762                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2763                                 return 0;
2764                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2765                         return 0;
2766         } else {
2767                 if (device_has_rmrr(dev))
2768                         return 0;
2769         }
2770
2771         /*
2772          * At boot time, we don't yet know if devices will be 64-bit capable.
2773          * Assume that they will — if they turn out not to be, then we can
2774          * take them out of the 1:1 domain later.
2775          */
2776         if (!startup) {
2777                 /*
2778                  * If the device's dma_mask is less than the system's memory
2779                  * size then this is not a candidate for identity mapping.
2780                  */
2781                 u64 dma_mask = *dev->dma_mask;
2782
2783                 if (dev->coherent_dma_mask &&
2784                     dev->coherent_dma_mask < dma_mask)
2785                         dma_mask = dev->coherent_dma_mask;
2786
2787                 return dma_mask >= dma_get_required_mask(dev);
2788         }
2789
2790         return 1;
2791 }
2792
2793 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2794 {
2795         int ret;
2796
2797         if (!iommu_should_identity_map(dev, 1))
2798                 return 0;
2799
2800         ret = domain_add_dev_info(si_domain, dev);
2801         if (!ret)
2802                 pr_info("%s identity mapping for device %s\n",
2803                         hw ? "Hardware" : "Software", dev_name(dev));
2804         else if (ret == -ENODEV)
2805                 /* device not associated with an iommu */
2806                 ret = 0;
2807
2808         return ret;
2809 }
2810
2811
2812 static int __init iommu_prepare_static_identity_mapping(int hw)
2813 {
2814         struct pci_dev *pdev = NULL;
2815         struct dmar_drhd_unit *drhd;
2816         struct intel_iommu *iommu;
2817         struct device *dev;
2818         int i;
2819         int ret = 0;
2820
2821         for_each_pci_dev(pdev) {
2822                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2823                 if (ret)
2824                         return ret;
2825         }
2826
2827         for_each_active_iommu(iommu, drhd)
2828                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2829                         struct acpi_device_physical_node *pn;
2830                         struct acpi_device *adev;
2831
2832                         if (dev->bus != &acpi_bus_type)
2833                                 continue;
2834
2835                         adev= to_acpi_device(dev);
2836                         mutex_lock(&adev->physical_node_lock);
2837                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2838                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2839                                 if (ret)
2840                                         break;
2841                         }
2842                         mutex_unlock(&adev->physical_node_lock);
2843                         if (ret)
2844                                 return ret;
2845                 }
2846
2847         return 0;
2848 }
2849
2850 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2851 {
2852         /*
2853          * Start from the sane iommu hardware state.
2854          * If the queued invalidation is already initialized by us
2855          * (for example, while enabling interrupt-remapping) then
2856          * we got the things already rolling from a sane state.
2857          */
2858         if (!iommu->qi) {
2859                 /*
2860                  * Clear any previous faults.
2861                  */
2862                 dmar_fault(-1, iommu);
2863                 /*
2864                  * Disable queued invalidation if supported and already enabled
2865                  * before OS handover.
2866                  */
2867                 dmar_disable_qi(iommu);
2868         }
2869
2870         if (dmar_enable_qi(iommu)) {
2871                 /*
2872                  * Queued Invalidate not enabled, use Register Based Invalidate
2873                  */
2874                 iommu->flush.flush_context = __iommu_flush_context;
2875                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2876                 pr_info("%s: Using Register based invalidation\n",
2877                         iommu->name);
2878         } else {
2879                 iommu->flush.flush_context = qi_flush_context;
2880                 iommu->flush.flush_iotlb = qi_flush_iotlb;
2881                 pr_info("%s: Using Queued invalidation\n", iommu->name);
2882         }
2883 }
2884
2885 static int copy_context_table(struct intel_iommu *iommu,
2886                               struct root_entry __iomem *old_re,
2887                               struct context_entry **tbl,
2888                               int bus, bool ext)
2889 {
2890         int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2891         struct context_entry __iomem *old_ce = NULL;
2892         struct context_entry *new_ce = NULL, ce;
2893         struct root_entry re;
2894         phys_addr_t old_ce_phys;
2895
2896         tbl_idx = ext ? bus * 2 : bus;
2897         memcpy_fromio(&re, old_re, sizeof(re));
2898
2899         for (devfn = 0; devfn < 256; devfn++) {
2900                 /* First calculate the correct index */
2901                 idx = (ext ? devfn * 2 : devfn) % 256;
2902
2903                 if (idx == 0) {
2904                         /* First save what we may have and clean up */
2905                         if (new_ce) {
2906                                 tbl[tbl_idx] = new_ce;
2907                                 __iommu_flush_cache(iommu, new_ce,
2908                                                     VTD_PAGE_SIZE);
2909                                 pos = 1;
2910                         }
2911
2912                         if (old_ce)
2913                                 iounmap(old_ce);
2914
2915                         ret = 0;
2916                         if (devfn < 0x80)
2917                                 old_ce_phys = root_entry_lctp(&re);
2918                         else
2919                                 old_ce_phys = root_entry_uctp(&re);
2920
2921                         if (!old_ce_phys) {
2922                                 if (ext && devfn == 0) {
2923                                         /* No LCTP, try UCTP */
2924                                         devfn = 0x7f;
2925                                         continue;
2926                                 } else {
2927                                         goto out;
2928                                 }
2929                         }
2930
2931                         ret = -ENOMEM;
2932                         old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
2933                         if (!old_ce)
2934                                 goto out;
2935
2936                         new_ce = alloc_pgtable_page(iommu->node);
2937                         if (!new_ce)
2938                                 goto out_unmap;
2939
2940                         ret = 0;
2941                 }
2942
2943                 /* Now copy the context entry */
2944                 memcpy_fromio(&ce, old_ce + idx, sizeof(ce));
2945
2946                 if (!__context_present(&ce))
2947                         continue;
2948
2949                 did = context_domain_id(&ce);
2950                 if (did >= 0 && did < cap_ndoms(iommu->cap))
2951                         set_bit(did, iommu->domain_ids);
2952
2953                 /*
2954                  * We need a marker for copied context entries. This
2955                  * marker needs to work for the old format as well as
2956                  * for extended context entries.
2957                  *
2958                  * Bit 67 of the context entry is used. In the old
2959                  * format this bit is available to software, in the
2960                  * extended format it is the PGE bit, but PGE is ignored
2961                  * by HW if PASIDs are disabled (and thus still
2962                  * available).
2963                  *
2964                  * So disable PASIDs first and then mark the entry
2965                  * copied. This means that we don't copy PASID
2966                  * translations from the old kernel, but this is fine as
2967                  * faults there are not fatal.
2968                  */
2969                 context_clear_pasid_enable(&ce);
2970                 context_set_copied(&ce);
2971
2972                 new_ce[idx] = ce;
2973         }
2974
2975         tbl[tbl_idx + pos] = new_ce;
2976
2977         __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2978
2979 out_unmap:
2980         iounmap(old_ce);
2981
2982 out:
2983         return ret;
2984 }
2985
2986 static int copy_translation_tables(struct intel_iommu *iommu)
2987 {
2988         struct root_entry __iomem *old_rt;
2989         struct context_entry **ctxt_tbls;
2990         phys_addr_t old_rt_phys;
2991         int ctxt_table_entries;
2992         unsigned long flags;
2993         u64 rtaddr_reg;
2994         int bus, ret;
2995         bool new_ext, ext;
2996
2997         rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2998         ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
2999         new_ext    = !!ecap_ecs(iommu->ecap);
3000
3001         /*
3002          * The RTT bit can only be changed when translation is disabled,
3003          * but disabling translation means to open a window for data
3004          * corruption. So bail out and don't copy anything if we would
3005          * have to change the bit.
3006          */
3007         if (new_ext != ext)
3008                 return -EINVAL;
3009
3010         old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3011         if (!old_rt_phys)
3012                 return -EINVAL;
3013
3014         old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
3015         if (!old_rt)
3016                 return -ENOMEM;
3017
3018         /* This is too big for the stack - allocate it from slab */
3019         ctxt_table_entries = ext ? 512 : 256;
3020         ret = -ENOMEM;
3021         ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3022         if (!ctxt_tbls)
3023                 goto out_unmap;
3024
3025         for (bus = 0; bus < 256; bus++) {
3026                 ret = copy_context_table(iommu, &old_rt[bus],
3027                                          ctxt_tbls, bus, ext);
3028                 if (ret) {
3029                         pr_err("%s: Failed to copy context table for bus %d\n",
3030                                 iommu->name, bus);
3031                         continue;
3032                 }
3033         }
3034
3035         spin_lock_irqsave(&iommu->lock, flags);
3036
3037         /* Context tables are copied, now write them to the root_entry table */
3038         for (bus = 0; bus < 256; bus++) {
3039                 int idx = ext ? bus * 2 : bus;
3040                 u64 val;
3041
3042                 if (ctxt_tbls[idx]) {
3043                         val = virt_to_phys(ctxt_tbls[idx]) | 1;
3044                         iommu->root_entry[bus].lo = val;
3045                 }
3046
3047                 if (!ext || !ctxt_tbls[idx + 1])
3048                         continue;
3049
3050                 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3051                 iommu->root_entry[bus].hi = val;
3052         }
3053
3054         spin_unlock_irqrestore(&iommu->lock, flags);
3055
3056         kfree(ctxt_tbls);
3057
3058         __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3059
3060         ret = 0;
3061
3062 out_unmap:
3063         iounmap(old_rt);
3064
3065         return ret;
3066 }
3067
3068 static int __init init_dmars(void)
3069 {
3070         struct dmar_drhd_unit *drhd;
3071         struct dmar_rmrr_unit *rmrr;
3072         bool copied_tables = false;
3073         struct device *dev;
3074         struct intel_iommu *iommu;
3075         int i, ret;
3076
3077         /*
3078          * for each drhd
3079          *    allocate root
3080          *    initialize and program root entry to not present
3081          * endfor
3082          */
3083         for_each_drhd_unit(drhd) {
3084                 /*
3085                  * lock not needed as this is only incremented in the single
3086                  * threaded kernel __init code path all other access are read
3087                  * only
3088                  */
3089                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3090                         g_num_of_iommus++;
3091                         continue;
3092                 }
3093                 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3094         }
3095
3096         /* Preallocate enough resources for IOMMU hot-addition */
3097         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3098                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3099
3100         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3101                         GFP_KERNEL);
3102         if (!g_iommus) {
3103                 pr_err("Allocating global iommu array failed\n");
3104                 ret = -ENOMEM;
3105                 goto error;
3106         }
3107
3108         deferred_flush = kzalloc(g_num_of_iommus *
3109                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3110         if (!deferred_flush) {
3111                 ret = -ENOMEM;
3112                 goto free_g_iommus;
3113         }
3114
3115         for_each_active_iommu(iommu, drhd) {
3116                 g_iommus[iommu->seq_id] = iommu;
3117
3118                 intel_iommu_init_qi(iommu);
3119
3120                 ret = iommu_init_domains(iommu);
3121                 if (ret)
3122                         goto free_iommu;
3123
3124                 init_translation_status(iommu);
3125
3126                 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3127                         iommu_disable_translation(iommu);
3128                         clear_translation_pre_enabled(iommu);
3129                         pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3130                                 iommu->name);
3131                 }
3132
3133                 /*
3134                  * TBD:
3135                  * we could share the same root & context tables
3136                  * among all IOMMU's. Need to Split it later.
3137                  */
3138                 ret = iommu_alloc_root_entry(iommu);
3139                 if (ret)
3140                         goto free_iommu;
3141
3142                 if (translation_pre_enabled(iommu)) {
3143                         pr_info("Translation already enabled - trying to copy translation structures\n");
3144
3145                         ret = copy_translation_tables(iommu);
3146                         if (ret) {
3147                                 /*
3148                                  * We found the IOMMU with translation
3149                                  * enabled - but failed to copy over the
3150                                  * old root-entry table. Try to proceed
3151                                  * by disabling translation now and
3152                                  * allocating a clean root-entry table.
3153                                  * This might cause DMAR faults, but
3154                                  * probably the dump will still succeed.
3155                                  */
3156                                 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3157                                        iommu->name);
3158                                 iommu_disable_translation(iommu);
3159                                 clear_translation_pre_enabled(iommu);
3160                         } else {
3161                                 pr_info("Copied translation tables from previous kernel for %s\n",
3162                                         iommu->name);
3163                                 copied_tables = true;
3164                         }
3165                 }
3166
3167                 iommu_flush_write_buffer(iommu);
3168                 iommu_set_root_entry(iommu);
3169                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3170                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3171
3172                 if (!ecap_pass_through(iommu->ecap))
3173                         hw_pass_through = 0;
3174 #ifdef CONFIG_INTEL_IOMMU_SVM
3175                 if (pasid_enabled(iommu))
3176                         intel_svm_alloc_pasid_tables(iommu);
3177 #endif
3178         }
3179
3180         if (iommu_pass_through)
3181                 iommu_identity_mapping |= IDENTMAP_ALL;
3182
3183 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3184         iommu_identity_mapping |= IDENTMAP_GFX;
3185 #endif
3186
3187         if (iommu_identity_mapping) {
3188                 ret = si_domain_init(hw_pass_through);
3189                 if (ret)
3190                         goto free_iommu;
3191         }
3192
3193         check_tylersburg_isoch();
3194
3195         /*
3196          * If we copied translations from a previous kernel in the kdump
3197          * case, we can not assign the devices to domains now, as that
3198          * would eliminate the old mappings. So skip this part and defer
3199          * the assignment to device driver initialization time.
3200          */
3201         if (copied_tables)
3202                 goto domains_done;
3203
3204         /*
3205          * If pass through is not set or not enabled, setup context entries for
3206          * identity mappings for rmrr, gfx, and isa and may fall back to static
3207          * identity mapping if iommu_identity_mapping is set.
3208          */
3209         if (iommu_identity_mapping) {
3210                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3211                 if (ret) {
3212                         pr_crit("Failed to setup IOMMU pass-through\n");
3213                         goto free_iommu;
3214                 }
3215         }
3216         /*
3217          * For each rmrr
3218          *   for each dev attached to rmrr
3219          *   do
3220          *     locate drhd for dev, alloc domain for dev
3221          *     allocate free domain
3222          *     allocate page table entries for rmrr
3223          *     if context not allocated for bus
3224          *           allocate and init context
3225          *           set present in root table for this bus
3226          *     init context with domain, translation etc
3227          *    endfor
3228          * endfor
3229          */
3230         pr_info("Setting RMRR:\n");
3231         for_each_rmrr_units(rmrr) {
3232                 /* some BIOS lists non-exist devices in DMAR table. */
3233                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3234                                           i, dev) {
3235                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
3236                         if (ret)
3237                                 pr_err("Mapping reserved region failed\n");
3238                 }
3239         }
3240
3241         iommu_prepare_isa();
3242
3243 domains_done:
3244
3245         /*
3246          * for each drhd
3247          *   enable fault log
3248          *   global invalidate context cache
3249          *   global invalidate iotlb
3250          *   enable translation
3251          */
3252         for_each_iommu(iommu, drhd) {
3253                 if (drhd->ignored) {
3254                         /*
3255                          * we always have to disable PMRs or DMA may fail on
3256                          * this device
3257                          */
3258                         if (force_on)
3259                                 iommu_disable_protect_mem_regions(iommu);
3260                         continue;
3261                 }
3262
3263                 iommu_flush_write_buffer(iommu);
3264
3265 #ifdef CONFIG_INTEL_IOMMU_SVM
3266                 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3267                         ret = intel_svm_enable_prq(iommu);
3268                         if (ret)
3269                                 goto free_iommu;
3270                 }
3271 #endif
3272                 ret = dmar_set_interrupt(iommu);
3273                 if (ret)
3274                         goto free_iommu;
3275
3276                 if (!translation_pre_enabled(iommu))
3277                         iommu_enable_translation(iommu);
3278
3279                 iommu_disable_protect_mem_regions(iommu);
3280         }
3281
3282         return 0;
3283
3284 free_iommu:
3285         for_each_active_iommu(iommu, drhd) {
3286                 disable_dmar_iommu(iommu);
3287                 free_dmar_iommu(iommu);
3288         }
3289         kfree(deferred_flush);
3290 free_g_iommus:
3291         kfree(g_iommus);
3292 error:
3293         return ret;
3294 }
3295
3296 /* This takes a number of _MM_ pages, not VTD pages */
3297 static struct iova *intel_alloc_iova(struct device *dev,
3298                                      struct dmar_domain *domain,
3299                                      unsigned long nrpages, uint64_t dma_mask)
3300 {
3301         struct iova *iova = NULL;
3302
3303         /* Restrict dma_mask to the width that the iommu can handle */
3304         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3305         /* Ensure we reserve the whole size-aligned region */
3306         nrpages = __roundup_pow_of_two(nrpages);
3307
3308         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3309                 /*
3310                  * First try to allocate an io virtual address in
3311                  * DMA_BIT_MASK(32) and if that fails then try allocating
3312                  * from higher range
3313                  */
3314                 iova = alloc_iova(&domain->iovad, nrpages,
3315                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
3316                 if (iova)
3317                         return iova;
3318         }
3319         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3320         if (unlikely(!iova)) {
3321                 pr_err("Allocating %ld-page iova for %s failed",
3322                        nrpages, dev_name(dev));
3323                 return NULL;
3324         }
3325
3326         return iova;
3327 }
3328
3329 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
3330 {
3331         struct dmar_domain *domain;
3332
3333         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3334         if (!domain) {
3335                 pr_err("Allocating domain for %s failed\n",
3336                        dev_name(dev));
3337                 return NULL;
3338         }
3339
3340         return domain;
3341 }
3342
3343 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3344 {
3345         struct device_domain_info *info;
3346
3347         /* No lock here, assumes no domain exit in normal case */
3348         info = dev->archdata.iommu;
3349         if (likely(info))
3350                 return info->domain;
3351
3352         return __get_valid_domain_for_dev(dev);
3353 }
3354
3355 /* Check if the dev needs to go through non-identity map and unmap process.*/
3356 static int iommu_no_mapping(struct device *dev)
3357 {
3358         int found;
3359
3360         if (iommu_dummy(dev))
3361                 return 1;
3362
3363         if (!iommu_identity_mapping)
3364                 return 0;
3365
3366         found = identity_mapping(dev);
3367         if (found) {
3368                 if (iommu_should_identity_map(dev, 0))
3369                         return 1;
3370                 else {
3371                         /*
3372                          * 32 bit DMA is removed from si_domain and fall back
3373                          * to non-identity mapping.
3374                          */
3375                         dmar_remove_one_dev_info(si_domain, dev);
3376                         pr_info("32bit %s uses non-identity mapping\n",
3377                                 dev_name(dev));
3378                         return 0;
3379                 }
3380         } else {
3381                 /*
3382                  * In case of a detached 64 bit DMA device from vm, the device
3383                  * is put into si_domain for identity mapping.
3384                  */
3385                 if (iommu_should_identity_map(dev, 0)) {
3386                         int ret;
3387                         ret = domain_add_dev_info(si_domain, dev);
3388                         if (!ret) {
3389                                 pr_info("64bit %s uses identity mapping\n",
3390                                         dev_name(dev));
3391                                 return 1;
3392                         }
3393                 }
3394         }
3395
3396         return 0;
3397 }
3398
3399 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3400                                      size_t size, int dir, u64 dma_mask)
3401 {
3402         struct dmar_domain *domain;
3403         phys_addr_t start_paddr;
3404         struct iova *iova;
3405         int prot = 0;
3406         int ret;
3407         struct intel_iommu *iommu;
3408         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3409
3410         BUG_ON(dir == DMA_NONE);
3411
3412         if (iommu_no_mapping(dev))
3413                 return paddr;
3414
3415         domain = get_valid_domain_for_dev(dev);
3416         if (!domain)
3417                 return 0;
3418
3419         iommu = domain_get_iommu(domain);
3420         size = aligned_nrpages(paddr, size);
3421
3422         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3423         if (!iova)
3424                 goto error;
3425
3426         /*
3427          * Check if DMAR supports zero-length reads on write only
3428          * mappings..
3429          */
3430         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3431                         !cap_zlr(iommu->cap))
3432                 prot |= DMA_PTE_READ;
3433         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3434                 prot |= DMA_PTE_WRITE;
3435         /*
3436          * paddr - (paddr + size) might be partial page, we should map the whole
3437          * page.  Note: if two part of one page are separately mapped, we
3438          * might have two guest_addr mapping to the same host paddr, but this
3439          * is not a big problem
3440          */
3441         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3442                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3443         if (ret)
3444                 goto error;
3445
3446         /* it's a non-present to present mapping. Only flush if caching mode */
3447         if (cap_caching_mode(iommu->cap))
3448                 iommu_flush_iotlb_psi(iommu, domain,
3449                                       mm_to_dma_pfn(iova->pfn_lo),
3450                                       size, 0, 1);
3451         else
3452                 iommu_flush_write_buffer(iommu);
3453
3454         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3455         start_paddr += paddr & ~PAGE_MASK;
3456         return start_paddr;
3457
3458 error:
3459         if (iova)
3460                 __free_iova(&domain->iovad, iova);
3461         pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3462                 dev_name(dev), size, (unsigned long long)paddr, dir);
3463         return 0;
3464 }
3465
3466 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3467                                  unsigned long offset, size_t size,
3468                                  enum dma_data_direction dir,
3469                                  struct dma_attrs *attrs)
3470 {
3471         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3472                                   dir, *dev->dma_mask);
3473 }
3474
3475 static void flush_unmaps(void)
3476 {
3477         int i, j;
3478
3479         timer_on = 0;
3480
3481         /* just flush them all */
3482         for (i = 0; i < g_num_of_iommus; i++) {
3483                 struct intel_iommu *iommu = g_iommus[i];
3484                 if (!iommu)
3485                         continue;
3486
3487                 if (!deferred_flush[i].next)
3488                         continue;
3489
3490                 /* In caching mode, global flushes turn emulation expensive */
3491                 if (!cap_caching_mode(iommu->cap))
3492                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3493                                          DMA_TLB_GLOBAL_FLUSH);
3494                 for (j = 0; j < deferred_flush[i].next; j++) {
3495                         unsigned long mask;
3496                         struct iova *iova = deferred_flush[i].iova[j];
3497                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3498
3499                         /* On real hardware multiple invalidations are expensive */
3500                         if (cap_caching_mode(iommu->cap))
3501                                 iommu_flush_iotlb_psi(iommu, domain,
3502                                         iova->pfn_lo, iova_size(iova),
3503                                         !deferred_flush[i].freelist[j], 0);
3504                         else {
3505                                 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3506                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3507                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3508                         }
3509                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3510                         if (deferred_flush[i].freelist[j])
3511                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3512                 }
3513                 deferred_flush[i].next = 0;
3514         }
3515
3516         list_size = 0;
3517 }
3518
3519 static void flush_unmaps_timeout(unsigned long data)
3520 {
3521         unsigned long flags;
3522
3523         spin_lock_irqsave(&async_umap_flush_lock, flags);
3524         flush_unmaps();
3525         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3526 }
3527
3528 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3529 {
3530         unsigned long flags;
3531         int next, iommu_id;
3532         struct intel_iommu *iommu;
3533
3534         spin_lock_irqsave(&async_umap_flush_lock, flags);
3535         if (list_size == HIGH_WATER_MARK)
3536                 flush_unmaps();
3537
3538         iommu = domain_get_iommu(dom);
3539         iommu_id = iommu->seq_id;
3540
3541         next = deferred_flush[iommu_id].next;
3542         deferred_flush[iommu_id].domain[next] = dom;
3543         deferred_flush[iommu_id].iova[next] = iova;
3544         deferred_flush[iommu_id].freelist[next] = freelist;
3545         deferred_flush[iommu_id].next++;
3546
3547         if (!timer_on) {
3548                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3549                 timer_on = 1;
3550         }
3551         list_size++;
3552         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3553 }
3554
3555 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3556 {
3557         struct dmar_domain *domain;
3558         unsigned long start_pfn, last_pfn;
3559         struct iova *iova;
3560         struct intel_iommu *iommu;
3561         struct page *freelist;
3562
3563         if (iommu_no_mapping(dev))
3564                 return;
3565
3566         domain = find_domain(dev);
3567         BUG_ON(!domain);
3568
3569         iommu = domain_get_iommu(domain);
3570
3571         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3572         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3573                       (unsigned long long)dev_addr))
3574                 return;
3575
3576         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3577         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3578
3579         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3580                  dev_name(dev), start_pfn, last_pfn);
3581
3582         freelist = domain_unmap(domain, start_pfn, last_pfn);
3583
3584         if (intel_iommu_strict) {
3585                 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3586                                       last_pfn - start_pfn + 1, !freelist, 0);
3587                 /* free iova */
3588                 __free_iova(&domain->iovad, iova);
3589                 dma_free_pagelist(freelist);
3590         } else {
3591                 add_unmap(domain, iova, freelist);
3592                 /*
3593                  * queue up the release of the unmap to save the 1/6th of the
3594                  * cpu used up by the iotlb flush operation...
3595                  */
3596         }
3597 }
3598
3599 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3600                              size_t size, enum dma_data_direction dir,
3601                              struct dma_attrs *attrs)
3602 {
3603         intel_unmap(dev, dev_addr);
3604 }
3605
3606 static void *intel_alloc_coherent(struct device *dev, size_t size,
3607                                   dma_addr_t *dma_handle, gfp_t flags,
3608                                   struct dma_attrs *attrs)
3609 {
3610         struct page *page = NULL;
3611         int order;
3612
3613         size = PAGE_ALIGN(size);
3614         order = get_order(size);
3615
3616         if (!iommu_no_mapping(dev))
3617                 flags &= ~(GFP_DMA | GFP_DMA32);
3618         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3619                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3620                         flags |= GFP_DMA;
3621                 else
3622                         flags |= GFP_DMA32;
3623         }
3624
3625         if (flags & __GFP_WAIT) {
3626                 unsigned int count = size >> PAGE_SHIFT;
3627
3628                 page = dma_alloc_from_contiguous(dev, count, order);
3629                 if (page && iommu_no_mapping(dev) &&
3630                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3631                         dma_release_from_contiguous(dev, page, count);
3632                         page = NULL;
3633                 }
3634         }
3635
3636         if (!page)
3637                 page = alloc_pages(flags, order);
3638         if (!page)
3639                 return NULL;
3640         memset(page_address(page), 0, size);
3641
3642         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3643                                          DMA_BIDIRECTIONAL,
3644                                          dev->coherent_dma_mask);
3645         if (*dma_handle)
3646                 return page_address(page);
3647         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3648                 __free_pages(page, order);
3649
3650         return NULL;
3651 }
3652
3653 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3654                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3655 {
3656         int order;
3657         struct page *page = virt_to_page(vaddr);
3658
3659         size = PAGE_ALIGN(size);
3660         order = get_order(size);
3661
3662         intel_unmap(dev, dma_handle);
3663         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3664                 __free_pages(page, order);
3665 }
3666
3667 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3668                            int nelems, enum dma_data_direction dir,
3669                            struct dma_attrs *attrs)
3670 {
3671         intel_unmap(dev, sglist[0].dma_address);
3672 }
3673
3674 static int intel_nontranslate_map_sg(struct device *hddev,
3675         struct scatterlist *sglist, int nelems, int dir)
3676 {
3677         int i;
3678         struct scatterlist *sg;
3679
3680         for_each_sg(sglist, sg, nelems, i) {
3681                 BUG_ON(!sg_page(sg));
3682                 sg->dma_address = sg_phys(sg);
3683                 sg->dma_length = sg->length;
3684         }
3685         return nelems;
3686 }
3687
3688 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3689                         enum dma_data_direction dir, struct dma_attrs *attrs)
3690 {
3691         int i;
3692         struct dmar_domain *domain;
3693         size_t size = 0;
3694         int prot = 0;
3695         struct iova *iova = NULL;
3696         int ret;
3697         struct scatterlist *sg;
3698         unsigned long start_vpfn;
3699         struct intel_iommu *iommu;
3700
3701         BUG_ON(dir == DMA_NONE);
3702         if (iommu_no_mapping(dev))
3703                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3704
3705         domain = get_valid_domain_for_dev(dev);
3706         if (!domain)
3707                 return 0;
3708
3709         iommu = domain_get_iommu(domain);
3710
3711         for_each_sg(sglist, sg, nelems, i)
3712                 size += aligned_nrpages(sg->offset, sg->length);
3713
3714         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3715                                 *dev->dma_mask);
3716         if (!iova) {
3717                 sglist->dma_length = 0;
3718                 return 0;
3719         }
3720
3721         /*
3722          * Check if DMAR supports zero-length reads on write only
3723          * mappings..
3724          */
3725         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3726                         !cap_zlr(iommu->cap))
3727                 prot |= DMA_PTE_READ;
3728         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3729                 prot |= DMA_PTE_WRITE;
3730
3731         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3732
3733         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3734         if (unlikely(ret)) {
3735                 dma_pte_free_pagetable(domain, start_vpfn,
3736                                        start_vpfn + size - 1);
3737                 __free_iova(&domain->iovad, iova);
3738                 return 0;
3739         }
3740
3741         /* it's a non-present to present mapping. Only flush if caching mode */
3742         if (cap_caching_mode(iommu->cap))
3743                 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
3744         else
3745                 iommu_flush_write_buffer(iommu);
3746
3747         return nelems;
3748 }
3749
3750 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3751 {
3752         return !dma_addr;
3753 }
3754
3755 struct dma_map_ops intel_dma_ops = {
3756         .alloc = intel_alloc_coherent,
3757         .free = intel_free_coherent,
3758         .map_sg = intel_map_sg,
3759         .unmap_sg = intel_unmap_sg,
3760         .map_page = intel_map_page,
3761         .unmap_page = intel_unmap_page,
3762         .mapping_error = intel_mapping_error,
3763 };
3764
3765 static inline int iommu_domain_cache_init(void)
3766 {
3767         int ret = 0;
3768
3769         iommu_domain_cache = kmem_cache_create("iommu_domain",
3770                                          sizeof(struct dmar_domain),
3771                                          0,
3772                                          SLAB_HWCACHE_ALIGN,
3773
3774                                          NULL);
3775         if (!iommu_domain_cache) {
3776                 pr_err("Couldn't create iommu_domain cache\n");
3777                 ret = -ENOMEM;
3778         }
3779
3780         return ret;
3781 }
3782
3783 static inline int iommu_devinfo_cache_init(void)
3784 {
3785         int ret = 0;
3786
3787         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3788                                          sizeof(struct device_domain_info),
3789                                          0,
3790                                          SLAB_HWCACHE_ALIGN,
3791                                          NULL);
3792         if (!iommu_devinfo_cache) {
3793                 pr_err("Couldn't create devinfo cache\n");
3794                 ret = -ENOMEM;
3795         }
3796
3797         return ret;
3798 }
3799
3800 static int __init iommu_init_mempool(void)
3801 {
3802         int ret;
3803         ret = iova_cache_get();
3804         if (ret)
3805                 return ret;
3806
3807         ret = iommu_domain_cache_init();
3808         if (ret)
3809                 goto domain_error;
3810
3811         ret = iommu_devinfo_cache_init();
3812         if (!ret)
3813                 return ret;
3814
3815         kmem_cache_destroy(iommu_domain_cache);
3816 domain_error:
3817         iova_cache_put();
3818
3819         return -ENOMEM;
3820 }
3821
3822 static void __init iommu_exit_mempool(void)
3823 {
3824         kmem_cache_destroy(iommu_devinfo_cache);
3825         kmem_cache_destroy(iommu_domain_cache);
3826         iova_cache_put();
3827 }
3828
3829 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3830 {
3831         struct dmar_drhd_unit *drhd;
3832         u32 vtbar;
3833         int rc;
3834
3835         /* We know that this device on this chipset has its own IOMMU.
3836          * If we find it under a different IOMMU, then the BIOS is lying
3837          * to us. Hope that the IOMMU for this device is actually
3838          * disabled, and it needs no translation...
3839          */
3840         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3841         if (rc) {
3842                 /* "can't" happen */
3843                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3844                 return;
3845         }
3846         vtbar &= 0xffff0000;
3847
3848         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3849         drhd = dmar_find_matched_drhd_unit(pdev);
3850         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3851                             TAINT_FIRMWARE_WORKAROUND,
3852                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3853                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3854 }
3855 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3856
3857 static void __init init_no_remapping_devices(void)
3858 {
3859         struct dmar_drhd_unit *drhd;
3860         struct device *dev;
3861         int i;
3862
3863         for_each_drhd_unit(drhd) {
3864                 if (!drhd->include_all) {
3865                         for_each_active_dev_scope(drhd->devices,
3866                                                   drhd->devices_cnt, i, dev)
3867                                 break;
3868                         /* ignore DMAR unit if no devices exist */
3869                         if (i == drhd->devices_cnt)
3870                                 drhd->ignored = 1;
3871                 }
3872         }
3873
3874         for_each_active_drhd_unit(drhd) {
3875                 if (drhd->include_all)
3876                         continue;
3877
3878                 for_each_active_dev_scope(drhd->devices,
3879                                           drhd->devices_cnt, i, dev)
3880                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3881                                 break;
3882                 if (i < drhd->devices_cnt)
3883                         continue;
3884
3885                 /* This IOMMU has *only* gfx devices. Either bypass it or
3886                    set the gfx_mapped flag, as appropriate */
3887                 if (dmar_map_gfx) {
3888                         intel_iommu_gfx_mapped = 1;
3889                 } else {
3890                         drhd->ignored = 1;
3891                         for_each_active_dev_scope(drhd->devices,
3892                                                   drhd->devices_cnt, i, dev)
3893                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3894                 }
3895         }
3896 }
3897
3898 #ifdef CONFIG_SUSPEND
3899 static int init_iommu_hw(void)
3900 {
3901         struct dmar_drhd_unit *drhd;
3902         struct intel_iommu *iommu = NULL;
3903
3904         for_each_active_iommu(iommu, drhd)
3905                 if (iommu->qi)
3906                         dmar_reenable_qi(iommu);
3907
3908         for_each_iommu(iommu, drhd) {
3909                 if (drhd->ignored) {
3910                         /*
3911                          * we always have to disable PMRs or DMA may fail on
3912                          * this device
3913                          */
3914                         if (force_on)
3915                                 iommu_disable_protect_mem_regions(iommu);
3916                         continue;
3917                 }
3918         
3919                 iommu_flush_write_buffer(iommu);
3920
3921                 iommu_set_root_entry(iommu);
3922
3923                 iommu->flush.flush_context(iommu, 0, 0, 0,
3924                                            DMA_CCMD_GLOBAL_INVL);
3925                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3926                 iommu_enable_translation(iommu);
3927                 iommu_disable_protect_mem_regions(iommu);
3928         }
3929
3930         return 0;
3931 }
3932
3933 static void iommu_flush_all(void)
3934 {
3935         struct dmar_drhd_unit *drhd;
3936         struct intel_iommu *iommu;
3937
3938         for_each_active_iommu(iommu, drhd) {
3939                 iommu->flush.flush_context(iommu, 0, 0, 0,
3940                                            DMA_CCMD_GLOBAL_INVL);
3941                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3942                                          DMA_TLB_GLOBAL_FLUSH);
3943         }
3944 }
3945
3946 static int iommu_suspend(void)
3947 {
3948         struct dmar_drhd_unit *drhd;
3949         struct intel_iommu *iommu = NULL;
3950         unsigned long flag;
3951
3952         for_each_active_iommu(iommu, drhd) {
3953                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3954                                                  GFP_ATOMIC);
3955                 if (!iommu->iommu_state)
3956                         goto nomem;
3957         }
3958
3959         iommu_flush_all();
3960
3961         for_each_active_iommu(iommu, drhd) {
3962                 iommu_disable_translation(iommu);
3963
3964                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3965
3966                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3967                         readl(iommu->reg + DMAR_FECTL_REG);
3968                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3969                         readl(iommu->reg + DMAR_FEDATA_REG);
3970                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3971                         readl(iommu->reg + DMAR_FEADDR_REG);
3972                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3973                         readl(iommu->reg + DMAR_FEUADDR_REG);
3974
3975                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3976         }
3977         return 0;
3978
3979 nomem:
3980         for_each_active_iommu(iommu, drhd)
3981                 kfree(iommu->iommu_state);
3982
3983         return -ENOMEM;
3984 }
3985
3986 static void iommu_resume(void)
3987 {
3988         struct dmar_drhd_unit *drhd;
3989         struct intel_iommu *iommu = NULL;
3990         unsigned long flag;
3991
3992         if (init_iommu_hw()) {
3993                 if (force_on)
3994                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3995                 else
3996                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3997                 return;
3998         }
3999
4000         for_each_active_iommu(iommu, drhd) {
4001
4002                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4003
4004                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4005                         iommu->reg + DMAR_FECTL_REG);
4006                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4007                         iommu->reg + DMAR_FEDATA_REG);
4008                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4009                         iommu->reg + DMAR_FEADDR_REG);
4010                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4011                         iommu->reg + DMAR_FEUADDR_REG);
4012
4013                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4014         }
4015
4016         for_each_active_iommu(iommu, drhd)
4017                 kfree(iommu->iommu_state);
4018 }
4019
4020 static struct syscore_ops iommu_syscore_ops = {
4021         .resume         = iommu_resume,
4022         .suspend        = iommu_suspend,
4023 };
4024
4025 static void __init init_iommu_pm_ops(void)
4026 {
4027         register_syscore_ops(&iommu_syscore_ops);
4028 }
4029
4030 #else
4031 static inline void init_iommu_pm_ops(void) {}
4032 #endif  /* CONFIG_PM */
4033
4034
4035 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4036 {
4037         struct acpi_dmar_reserved_memory *rmrr;
4038         struct dmar_rmrr_unit *rmrru;
4039
4040         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4041         if (!rmrru)
4042                 return -ENOMEM;
4043
4044         rmrru->hdr = header;
4045         rmrr = (struct acpi_dmar_reserved_memory *)header;
4046         rmrru->base_address = rmrr->base_address;
4047         rmrru->end_address = rmrr->end_address;
4048         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4049                                 ((void *)rmrr) + rmrr->header.length,
4050                                 &rmrru->devices_cnt);
4051         if (rmrru->devices_cnt && rmrru->devices == NULL) {
4052                 kfree(rmrru);
4053                 return -ENOMEM;
4054         }
4055
4056         list_add(&rmrru->list, &dmar_rmrr_units);
4057
4058         return 0;
4059 }
4060
4061 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4062 {
4063         struct dmar_atsr_unit *atsru;
4064         struct acpi_dmar_atsr *tmp;
4065
4066         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4067                 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4068                 if (atsr->segment != tmp->segment)
4069                         continue;
4070                 if (atsr->header.length != tmp->header.length)
4071                         continue;
4072                 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4073                         return atsru;
4074         }
4075
4076         return NULL;
4077 }
4078
4079 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4080 {
4081         struct acpi_dmar_atsr *atsr;
4082         struct dmar_atsr_unit *atsru;
4083
4084         if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4085                 return 0;
4086
4087         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4088         atsru = dmar_find_atsr(atsr);
4089         if (atsru)
4090                 return 0;
4091
4092         atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4093         if (!atsru)
4094                 return -ENOMEM;
4095
4096         /*
4097          * If memory is allocated from slab by ACPI _DSM method, we need to
4098          * copy the memory content because the memory buffer will be freed
4099          * on return.
4100          */
4101         atsru->hdr = (void *)(atsru + 1);
4102         memcpy(atsru->hdr, hdr, hdr->length);
4103         atsru->include_all = atsr->flags & 0x1;
4104         if (!atsru->include_all) {
4105                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4106                                 (void *)atsr + atsr->header.length,
4107                                 &atsru->devices_cnt);
4108                 if (atsru->devices_cnt && atsru->devices == NULL) {
4109                         kfree(atsru);
4110                         return -ENOMEM;
4111                 }
4112         }
4113
4114         list_add_rcu(&atsru->list, &dmar_atsr_units);
4115
4116         return 0;
4117 }
4118
4119 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4120 {
4121         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4122         kfree(atsru);
4123 }
4124
4125 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4126 {
4127         struct acpi_dmar_atsr *atsr;
4128         struct dmar_atsr_unit *atsru;
4129
4130         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4131         atsru = dmar_find_atsr(atsr);
4132         if (atsru) {
4133                 list_del_rcu(&atsru->list);
4134                 synchronize_rcu();
4135                 intel_iommu_free_atsr(atsru);
4136         }
4137
4138         return 0;
4139 }
4140
4141 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4142 {
4143         int i;
4144         struct device *dev;
4145         struct acpi_dmar_atsr *atsr;
4146         struct dmar_atsr_unit *atsru;
4147
4148         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4149         atsru = dmar_find_atsr(atsr);
4150         if (!atsru)
4151                 return 0;
4152
4153         if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
4154                 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4155                                           i, dev)
4156                         return -EBUSY;
4157
4158         return 0;
4159 }
4160
4161 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4162 {
4163         int sp, ret = 0;
4164         struct intel_iommu *iommu = dmaru->iommu;
4165
4166         if (g_iommus[iommu->seq_id])
4167                 return 0;
4168
4169         if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4170                 pr_warn("%s: Doesn't support hardware pass through.\n",
4171                         iommu->name);
4172                 return -ENXIO;
4173         }
4174         if (!ecap_sc_support(iommu->ecap) &&
4175             domain_update_iommu_snooping(iommu)) {
4176                 pr_warn("%s: Doesn't support snooping.\n",
4177                         iommu->name);
4178                 return -ENXIO;
4179         }
4180         sp = domain_update_iommu_superpage(iommu) - 1;
4181         if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4182                 pr_warn("%s: Doesn't support large page.\n",
4183                         iommu->name);
4184                 return -ENXIO;
4185         }
4186
4187         /*
4188          * Disable translation if already enabled prior to OS handover.
4189          */
4190         if (iommu->gcmd & DMA_GCMD_TE)
4191                 iommu_disable_translation(iommu);
4192
4193         g_iommus[iommu->seq_id] = iommu;
4194         ret = iommu_init_domains(iommu);
4195         if (ret == 0)
4196                 ret = iommu_alloc_root_entry(iommu);
4197         if (ret)
4198                 goto out;
4199
4200 #ifdef CONFIG_INTEL_IOMMU_SVM
4201         if (pasid_enabled(iommu))
4202                 intel_svm_alloc_pasid_tables(iommu);
4203 #endif
4204
4205         if (dmaru->ignored) {
4206                 /*
4207                  * we always have to disable PMRs or DMA may fail on this device
4208                  */
4209                 if (force_on)
4210                         iommu_disable_protect_mem_regions(iommu);
4211                 return 0;
4212         }
4213
4214         intel_iommu_init_qi(iommu);
4215         iommu_flush_write_buffer(iommu);
4216
4217 #ifdef CONFIG_INTEL_IOMMU_SVM
4218         if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4219                 ret = intel_svm_enable_prq(iommu);
4220                 if (ret)
4221                         goto disable_iommu;
4222         }
4223 #endif
4224         ret = dmar_set_interrupt(iommu);
4225         if (ret)
4226                 goto disable_iommu;
4227
4228         iommu_set_root_entry(iommu);
4229         iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4230         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4231         iommu_enable_translation(iommu);
4232
4233         iommu_disable_protect_mem_regions(iommu);
4234         return 0;
4235
4236 disable_iommu:
4237         disable_dmar_iommu(iommu);
4238 out:
4239         free_dmar_iommu(iommu);
4240         return ret;
4241 }
4242
4243 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4244 {
4245         int ret = 0;
4246         struct intel_iommu *iommu = dmaru->iommu;
4247
4248         if (!intel_iommu_enabled)
4249                 return 0;
4250         if (iommu == NULL)
4251                 return -EINVAL;
4252
4253         if (insert) {
4254                 ret = intel_iommu_add(dmaru);
4255         } else {
4256                 disable_dmar_iommu(iommu);
4257                 free_dmar_iommu(iommu);
4258         }
4259
4260         return ret;
4261 }
4262
4263 static void intel_iommu_free_dmars(void)
4264 {
4265         struct dmar_rmrr_unit *rmrru, *rmrr_n;
4266         struct dmar_atsr_unit *atsru, *atsr_n;
4267
4268         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4269                 list_del(&rmrru->list);
4270                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4271                 kfree(rmrru);
4272         }
4273
4274         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4275                 list_del(&atsru->list);
4276                 intel_iommu_free_atsr(atsru);
4277         }
4278 }
4279
4280 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4281 {
4282         int i, ret = 1;
4283         struct pci_bus *bus;
4284         struct pci_dev *bridge = NULL;
4285         struct device *tmp;
4286         struct acpi_dmar_atsr *atsr;
4287         struct dmar_atsr_unit *atsru;
4288
4289         dev = pci_physfn(dev);
4290         for (bus = dev->bus; bus; bus = bus->parent) {
4291                 bridge = bus->self;
4292                 /* If it's an integrated device, allow ATS */
4293                 if (!bridge)
4294                         return 1;
4295                 /* Connected via non-PCIe: no ATS */
4296                 if (!pci_is_pcie(bridge) ||
4297                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4298                         return 0;
4299                 /* If we found the root port, look it up in the ATSR */
4300                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4301                         break;
4302         }
4303
4304         rcu_read_lock();
4305         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4306                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4307                 if (atsr->segment != pci_domain_nr(dev->bus))
4308                         continue;
4309
4310                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4311                         if (tmp == &bridge->dev)
4312                                 goto out;
4313
4314                 if (atsru->include_all)
4315                         goto out;
4316         }
4317         ret = 0;
4318 out:
4319         rcu_read_unlock();
4320
4321         return ret;
4322 }
4323
4324 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4325 {
4326         int ret = 0;
4327         struct dmar_rmrr_unit *rmrru;
4328         struct dmar_atsr_unit *atsru;
4329         struct acpi_dmar_atsr *atsr;
4330         struct acpi_dmar_reserved_memory *rmrr;
4331
4332         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4333                 return 0;
4334
4335         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4336                 rmrr = container_of(rmrru->hdr,
4337                                     struct acpi_dmar_reserved_memory, header);
4338                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4339                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4340                                 ((void *)rmrr) + rmrr->header.length,
4341                                 rmrr->segment, rmrru->devices,
4342                                 rmrru->devices_cnt);
4343                         if(ret < 0)
4344                                 return ret;
4345                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4346                         dmar_remove_dev_scope(info, rmrr->segment,
4347                                 rmrru->devices, rmrru->devices_cnt);
4348                 }
4349         }
4350
4351         list_for_each_entry(atsru, &dmar_atsr_units, list) {
4352                 if (atsru->include_all)
4353                         continue;
4354
4355                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4356                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4357                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4358                                         (void *)atsr + atsr->header.length,
4359                                         atsr->segment, atsru->devices,
4360                                         atsru->devices_cnt);
4361                         if (ret > 0)
4362                                 break;
4363                         else if(ret < 0)
4364                                 return ret;
4365                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4366                         if (dmar_remove_dev_scope(info, atsr->segment,
4367                                         atsru->devices, atsru->devices_cnt))
4368                                 break;
4369                 }
4370         }
4371
4372         return 0;
4373 }
4374
4375 /*
4376  * Here we only respond to action of unbound device from driver.
4377  *
4378  * Added device is not attached to its DMAR domain here yet. That will happen
4379  * when mapping the device to iova.
4380  */
4381 static int device_notifier(struct notifier_block *nb,
4382                                   unsigned long action, void *data)
4383 {
4384         struct device *dev = data;
4385         struct dmar_domain *domain;
4386
4387         if (iommu_dummy(dev))
4388                 return 0;
4389
4390         if (action != BUS_NOTIFY_REMOVED_DEVICE)
4391                 return 0;
4392
4393         domain = find_domain(dev);
4394         if (!domain)
4395                 return 0;
4396
4397         dmar_remove_one_dev_info(domain, dev);
4398         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4399                 domain_exit(domain);
4400
4401         return 0;
4402 }
4403
4404 static struct notifier_block device_nb = {
4405         .notifier_call = device_notifier,
4406 };
4407
4408 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4409                                        unsigned long val, void *v)
4410 {
4411         struct memory_notify *mhp = v;
4412         unsigned long long start, end;
4413         unsigned long start_vpfn, last_vpfn;
4414
4415         switch (val) {
4416         case MEM_GOING_ONLINE:
4417                 start = mhp->start_pfn << PAGE_SHIFT;
4418                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4419                 if (iommu_domain_identity_map(si_domain, start, end)) {
4420                         pr_warn("Failed to build identity map for [%llx-%llx]\n",
4421                                 start, end);
4422                         return NOTIFY_BAD;
4423                 }
4424                 break;
4425
4426         case MEM_OFFLINE:
4427         case MEM_CANCEL_ONLINE:
4428                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4429                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4430                 while (start_vpfn <= last_vpfn) {
4431                         struct iova *iova;
4432                         struct dmar_drhd_unit *drhd;
4433                         struct intel_iommu *iommu;
4434                         struct page *freelist;
4435
4436                         iova = find_iova(&si_domain->iovad, start_vpfn);
4437                         if (iova == NULL) {
4438                                 pr_debug("Failed get IOVA for PFN %lx\n",
4439                                          start_vpfn);
4440                                 break;
4441                         }
4442
4443                         iova = split_and_remove_iova(&si_domain->iovad, iova,
4444                                                      start_vpfn, last_vpfn);
4445                         if (iova == NULL) {
4446                                 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4447                                         start_vpfn, last_vpfn);
4448                                 return NOTIFY_BAD;
4449                         }
4450
4451                         freelist = domain_unmap(si_domain, iova->pfn_lo,
4452                                                iova->pfn_hi);
4453
4454                         rcu_read_lock();
4455                         for_each_active_iommu(iommu, drhd)
4456                                 iommu_flush_iotlb_psi(iommu, si_domain,
4457                                         iova->pfn_lo, iova_size(iova),
4458                                         !freelist, 0);
4459                         rcu_read_unlock();
4460                         dma_free_pagelist(freelist);
4461
4462                         start_vpfn = iova->pfn_hi + 1;
4463                         free_iova_mem(iova);
4464                 }
4465                 break;
4466         }
4467
4468         return NOTIFY_OK;
4469 }
4470
4471 static struct notifier_block intel_iommu_memory_nb = {
4472         .notifier_call = intel_iommu_memory_notifier,
4473         .priority = 0
4474 };
4475
4476
4477 static ssize_t intel_iommu_show_version(struct device *dev,
4478                                         struct device_attribute *attr,
4479                                         char *buf)
4480 {
4481         struct intel_iommu *iommu = dev_get_drvdata(dev);
4482         u32 ver = readl(iommu->reg + DMAR_VER_REG);
4483         return sprintf(buf, "%d:%d\n",
4484                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4485 }
4486 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4487
4488 static ssize_t intel_iommu_show_address(struct device *dev,
4489                                         struct device_attribute *attr,
4490                                         char *buf)
4491 {
4492         struct intel_iommu *iommu = dev_get_drvdata(dev);
4493         return sprintf(buf, "%llx\n", iommu->reg_phys);
4494 }
4495 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4496
4497 static ssize_t intel_iommu_show_cap(struct device *dev,
4498                                     struct device_attribute *attr,
4499                                     char *buf)
4500 {
4501         struct intel_iommu *iommu = dev_get_drvdata(dev);
4502         return sprintf(buf, "%llx\n", iommu->cap);
4503 }
4504 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4505
4506 static ssize_t intel_iommu_show_ecap(struct device *dev,
4507                                     struct device_attribute *attr,
4508                                     char *buf)
4509 {
4510         struct intel_iommu *iommu = dev_get_drvdata(dev);
4511         return sprintf(buf, "%llx\n", iommu->ecap);
4512 }
4513 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4514
4515 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4516                                       struct device_attribute *attr,
4517                                       char *buf)
4518 {
4519         struct intel_iommu *iommu = dev_get_drvdata(dev);
4520         return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4521 }
4522 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4523
4524 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4525                                            struct device_attribute *attr,
4526                                            char *buf)
4527 {
4528         struct intel_iommu *iommu = dev_get_drvdata(dev);
4529         return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4530                                                   cap_ndoms(iommu->cap)));
4531 }
4532 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4533
4534 static struct attribute *intel_iommu_attrs[] = {
4535         &dev_attr_version.attr,
4536         &dev_attr_address.attr,
4537         &dev_attr_cap.attr,
4538         &dev_attr_ecap.attr,
4539         &dev_attr_domains_supported.attr,
4540         &dev_attr_domains_used.attr,
4541         NULL,
4542 };
4543
4544 static struct attribute_group intel_iommu_group = {
4545         .name = "intel-iommu",
4546         .attrs = intel_iommu_attrs,
4547 };
4548
4549 const struct attribute_group *intel_iommu_groups[] = {
4550         &intel_iommu_group,
4551         NULL,
4552 };
4553
4554 int __init intel_iommu_init(void)
4555 {
4556         int ret = -ENODEV;
4557         struct dmar_drhd_unit *drhd;
4558         struct intel_iommu *iommu;
4559
4560         /* VT-d is required for a TXT/tboot launch, so enforce that */
4561         force_on = tboot_force_iommu();
4562
4563         if (iommu_init_mempool()) {
4564                 if (force_on)
4565                         panic("tboot: Failed to initialize iommu memory\n");
4566                 return -ENOMEM;
4567         }
4568
4569         down_write(&dmar_global_lock);
4570         if (dmar_table_init()) {
4571                 if (force_on)
4572                         panic("tboot: Failed to initialize DMAR table\n");
4573                 goto out_free_dmar;
4574         }
4575
4576         if (dmar_dev_scope_init() < 0) {
4577                 if (force_on)
4578                         panic("tboot: Failed to initialize DMAR device scope\n");
4579                 goto out_free_dmar;
4580         }
4581
4582         if (no_iommu || dmar_disabled)
4583                 goto out_free_dmar;
4584
4585         if (list_empty(&dmar_rmrr_units))
4586                 pr_info("No RMRR found\n");
4587
4588         if (list_empty(&dmar_atsr_units))
4589                 pr_info("No ATSR found\n");
4590
4591         if (dmar_init_reserved_ranges()) {
4592                 if (force_on)
4593                         panic("tboot: Failed to reserve iommu ranges\n");
4594                 goto out_free_reserved_range;
4595         }
4596
4597         init_no_remapping_devices();
4598
4599         ret = init_dmars();
4600         if (ret) {
4601                 if (force_on)
4602                         panic("tboot: Failed to initialize DMARs\n");
4603                 pr_err("Initialization failed\n");
4604                 goto out_free_reserved_range;
4605         }
4606         up_write(&dmar_global_lock);
4607         pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4608
4609         init_timer(&unmap_timer);
4610 #ifdef CONFIG_SWIOTLB
4611         swiotlb = 0;
4612 #endif
4613         dma_ops = &intel_dma_ops;
4614
4615         init_iommu_pm_ops();
4616
4617         for_each_active_iommu(iommu, drhd)
4618                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4619                                                        intel_iommu_groups,
4620                                                        "%s", iommu->name);
4621
4622         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4623         bus_register_notifier(&pci_bus_type, &device_nb);
4624         if (si_domain && !hw_pass_through)
4625                 register_memory_notifier(&intel_iommu_memory_nb);
4626
4627         intel_iommu_enabled = 1;
4628
4629         return 0;
4630
4631 out_free_reserved_range:
4632         put_iova_domain(&reserved_iova_list);
4633 out_free_dmar:
4634         intel_iommu_free_dmars();
4635         up_write(&dmar_global_lock);
4636         iommu_exit_mempool();
4637         return ret;
4638 }
4639
4640 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4641 {
4642         struct intel_iommu *iommu = opaque;
4643
4644         domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4645         return 0;
4646 }
4647
4648 /*
4649  * NB - intel-iommu lacks any sort of reference counting for the users of
4650  * dependent devices.  If multiple endpoints have intersecting dependent
4651  * devices, unbinding the driver from any one of them will possibly leave
4652  * the others unable to operate.
4653  */
4654 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4655 {
4656         if (!iommu || !dev || !dev_is_pci(dev))
4657                 return;
4658
4659         pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4660 }
4661
4662 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4663 {
4664         struct intel_iommu *iommu;
4665         unsigned long flags;
4666
4667         assert_spin_locked(&device_domain_lock);
4668
4669         if (WARN_ON(!info))
4670                 return;
4671
4672         iommu = info->iommu;
4673
4674         if (info->dev) {
4675                 iommu_disable_dev_iotlb(info);
4676                 domain_context_clear(iommu, info->dev);
4677         }
4678
4679         unlink_domain_info(info);
4680
4681         spin_lock_irqsave(&iommu->lock, flags);
4682         domain_detach_iommu(info->domain, iommu);
4683         spin_unlock_irqrestore(&iommu->lock, flags);
4684
4685         free_devinfo_mem(info);
4686 }
4687
4688 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4689                                      struct device *dev)
4690 {
4691         struct device_domain_info *info;
4692         unsigned long flags;
4693
4694         spin_lock_irqsave(&device_domain_lock, flags);
4695         info = dev->archdata.iommu;
4696         __dmar_remove_one_dev_info(info);
4697         spin_unlock_irqrestore(&device_domain_lock, flags);
4698 }
4699
4700 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4701 {
4702         int adjust_width;
4703
4704         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4705                         DMA_32BIT_PFN);
4706         domain_reserve_special_ranges(domain);
4707
4708         /* calculate AGAW */
4709         domain->gaw = guest_width;
4710         adjust_width = guestwidth_to_adjustwidth(guest_width);
4711         domain->agaw = width_to_agaw(adjust_width);
4712
4713         domain->iommu_coherency = 0;
4714         domain->iommu_snooping = 0;
4715         domain->iommu_superpage = 0;
4716         domain->max_addr = 0;
4717
4718         /* always allocate the top pgd */
4719         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4720         if (!domain->pgd)
4721                 return -ENOMEM;
4722         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4723         return 0;
4724 }
4725
4726 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4727 {
4728         struct dmar_domain *dmar_domain;
4729         struct iommu_domain *domain;
4730
4731         if (type != IOMMU_DOMAIN_UNMANAGED)
4732                 return NULL;
4733
4734         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4735         if (!dmar_domain) {
4736                 pr_err("Can't allocate dmar_domain\n");
4737                 return NULL;
4738         }
4739         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4740                 pr_err("Domain initialization failed\n");
4741                 domain_exit(dmar_domain);
4742                 return NULL;
4743         }
4744         domain_update_iommu_cap(dmar_domain);
4745
4746         domain = &dmar_domain->domain;
4747         domain->geometry.aperture_start = 0;
4748         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4749         domain->geometry.force_aperture = true;
4750
4751         return domain;
4752 }
4753
4754 static void intel_iommu_domain_free(struct iommu_domain *domain)
4755 {
4756         domain_exit(to_dmar_domain(domain));
4757 }
4758
4759 static int intel_iommu_attach_device(struct iommu_domain *domain,
4760                                      struct device *dev)
4761 {
4762         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4763         struct intel_iommu *iommu;
4764         int addr_width;
4765         u8 bus, devfn;
4766
4767         if (device_is_rmrr_locked(dev)) {
4768                 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4769                 return -EPERM;
4770         }
4771
4772         /* normally dev is not mapped */
4773         if (unlikely(domain_context_mapped(dev))) {
4774                 struct dmar_domain *old_domain;
4775
4776                 old_domain = find_domain(dev);
4777                 if (old_domain) {
4778                         rcu_read_lock();
4779                         dmar_remove_one_dev_info(old_domain, dev);
4780                         rcu_read_unlock();
4781
4782                         if (!domain_type_is_vm_or_si(old_domain) &&
4783                              list_empty(&old_domain->devices))
4784                                 domain_exit(old_domain);
4785                 }
4786         }
4787
4788         iommu = device_to_iommu(dev, &bus, &devfn);
4789         if (!iommu)
4790                 return -ENODEV;
4791
4792         /* check if this iommu agaw is sufficient for max mapped address */
4793         addr_width = agaw_to_width(iommu->agaw);
4794         if (addr_width > cap_mgaw(iommu->cap))
4795                 addr_width = cap_mgaw(iommu->cap);
4796
4797         if (dmar_domain->max_addr > (1LL << addr_width)) {
4798                 pr_err("%s: iommu width (%d) is not "
4799                        "sufficient for the mapped address (%llx)\n",
4800                        __func__, addr_width, dmar_domain->max_addr);
4801                 return -EFAULT;
4802         }
4803         dmar_domain->gaw = addr_width;
4804
4805         /*
4806          * Knock out extra levels of page tables if necessary
4807          */
4808         while (iommu->agaw < dmar_domain->agaw) {
4809                 struct dma_pte *pte;
4810
4811                 pte = dmar_domain->pgd;
4812                 if (dma_pte_present(pte)) {
4813                         dmar_domain->pgd = (struct dma_pte *)
4814                                 phys_to_virt(dma_pte_addr(pte));
4815                         free_pgtable_page(pte);
4816                 }
4817                 dmar_domain->agaw--;
4818         }
4819
4820         return domain_add_dev_info(dmar_domain, dev);
4821 }
4822
4823 static void intel_iommu_detach_device(struct iommu_domain *domain,
4824                                       struct device *dev)
4825 {
4826         dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
4827 }
4828
4829 static int intel_iommu_map(struct iommu_domain *domain,
4830                            unsigned long iova, phys_addr_t hpa,
4831                            size_t size, int iommu_prot)
4832 {
4833         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4834         u64 max_addr;
4835         int prot = 0;
4836         int ret;
4837
4838         if (iommu_prot & IOMMU_READ)
4839                 prot |= DMA_PTE_READ;
4840         if (iommu_prot & IOMMU_WRITE)
4841                 prot |= DMA_PTE_WRITE;
4842         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4843                 prot |= DMA_PTE_SNP;
4844
4845         max_addr = iova + size;
4846         if (dmar_domain->max_addr < max_addr) {
4847                 u64 end;
4848
4849                 /* check if minimum agaw is sufficient for mapped address */
4850                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4851                 if (end < max_addr) {
4852                         pr_err("%s: iommu width (%d) is not "
4853                                "sufficient for the mapped address (%llx)\n",
4854                                __func__, dmar_domain->gaw, max_addr);
4855                         return -EFAULT;
4856                 }
4857                 dmar_domain->max_addr = max_addr;
4858         }
4859         /* Round up size to next multiple of PAGE_SIZE, if it and
4860            the low bits of hpa would take us onto the next page */
4861         size = aligned_nrpages(hpa, size);
4862         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4863                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4864         return ret;
4865 }
4866
4867 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4868                                 unsigned long iova, size_t size)
4869 {
4870         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4871         struct page *freelist = NULL;
4872         struct intel_iommu *iommu;
4873         unsigned long start_pfn, last_pfn;
4874         unsigned int npages;
4875         int iommu_id, level = 0;
4876
4877         /* Cope with horrid API which requires us to unmap more than the
4878            size argument if it happens to be a large-page mapping. */
4879         BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
4880
4881         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4882                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4883
4884         start_pfn = iova >> VTD_PAGE_SHIFT;
4885         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4886
4887         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4888
4889         npages = last_pfn - start_pfn + 1;
4890
4891         for_each_domain_iommu(iommu_id, dmar_domain) {
4892                 iommu = g_iommus[iommu_id];
4893
4894                 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
4895                                       start_pfn, npages, !freelist, 0);
4896         }
4897
4898         dma_free_pagelist(freelist);
4899
4900         if (dmar_domain->max_addr == iova + size)
4901                 dmar_domain->max_addr = iova;
4902
4903         return size;
4904 }
4905
4906 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4907                                             dma_addr_t iova)
4908 {
4909         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4910         struct dma_pte *pte;
4911         int level = 0;
4912         u64 phys = 0;
4913
4914         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4915         if (pte)
4916                 phys = dma_pte_addr(pte);
4917
4918         return phys;
4919 }
4920
4921 static bool intel_iommu_capable(enum iommu_cap cap)
4922 {
4923         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4924                 return domain_update_iommu_snooping(NULL) == 1;
4925         if (cap == IOMMU_CAP_INTR_REMAP)
4926                 return irq_remapping_enabled == 1;
4927
4928         return false;
4929 }
4930
4931 static int intel_iommu_add_device(struct device *dev)
4932 {
4933         struct intel_iommu *iommu;
4934         struct iommu_group *group;
4935         u8 bus, devfn;
4936
4937         iommu = device_to_iommu(dev, &bus, &devfn);
4938         if (!iommu)
4939                 return -ENODEV;
4940
4941         iommu_device_link(iommu->iommu_dev, dev);
4942
4943         group = iommu_group_get_for_dev(dev);
4944
4945         if (IS_ERR(group))
4946                 return PTR_ERR(group);
4947
4948         iommu_group_put(group);
4949         return 0;
4950 }
4951
4952 static void intel_iommu_remove_device(struct device *dev)
4953 {
4954         struct intel_iommu *iommu;
4955         u8 bus, devfn;
4956
4957         iommu = device_to_iommu(dev, &bus, &devfn);
4958         if (!iommu)
4959                 return;
4960
4961         iommu_group_remove_device(dev);
4962
4963         iommu_device_unlink(iommu->iommu_dev, dev);
4964 }
4965
4966 #ifdef CONFIG_INTEL_IOMMU_SVM
4967 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
4968 {
4969         struct device_domain_info *info;
4970         struct context_entry *context;
4971         struct dmar_domain *domain;
4972         unsigned long flags;
4973         u64 ctx_lo;
4974         int ret;
4975
4976         domain = get_valid_domain_for_dev(sdev->dev);
4977         if (!domain)
4978                 return -EINVAL;
4979
4980         spin_lock_irqsave(&device_domain_lock, flags);
4981         spin_lock(&iommu->lock);
4982
4983         ret = -EINVAL;
4984         info = sdev->dev->archdata.iommu;
4985         if (!info || !info->pasid_supported)
4986                 goto out;
4987
4988         context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
4989         if (WARN_ON(!context))
4990                 goto out;
4991
4992         ctx_lo = context[0].lo;
4993
4994         sdev->did = domain->iommu_did[iommu->seq_id];
4995         sdev->sid = PCI_DEVID(info->bus, info->devfn);
4996
4997         if (!(ctx_lo & CONTEXT_PASIDE)) {
4998                 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
4999                 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
5000                 wmb();
5001                 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5002                  * extended to permit requests-with-PASID if the PASIDE bit
5003                  * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5004                  * however, the PASIDE bit is ignored and requests-with-PASID
5005                  * are unconditionally blocked. Which makes less sense.
5006                  * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5007                  * "guest mode" translation types depending on whether ATS
5008                  * is available or not. Annoyingly, we can't use the new
5009                  * modes *unless* PASIDE is set. */
5010                 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5011                         ctx_lo &= ~CONTEXT_TT_MASK;
5012                         if (info->ats_supported)
5013                                 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5014                         else
5015                                 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5016                 }
5017                 ctx_lo |= CONTEXT_PASIDE;
5018                 if (iommu->pasid_state_table)
5019                         ctx_lo |= CONTEXT_DINVE;
5020                 if (info->pri_supported)
5021                         ctx_lo |= CONTEXT_PRS;
5022                 context[0].lo = ctx_lo;
5023                 wmb();
5024                 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5025                                            DMA_CCMD_MASK_NOBIT,
5026                                            DMA_CCMD_DEVICE_INVL);
5027         }
5028
5029         /* Enable PASID support in the device, if it wasn't already */
5030         if (!info->pasid_enabled)
5031                 iommu_enable_dev_iotlb(info);
5032
5033         if (info->ats_enabled) {
5034                 sdev->dev_iotlb = 1;
5035                 sdev->qdep = info->ats_qdep;
5036                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5037                         sdev->qdep = 0;
5038         }
5039         ret = 0;
5040
5041  out:
5042         spin_unlock(&iommu->lock);
5043         spin_unlock_irqrestore(&device_domain_lock, flags);
5044
5045         return ret;
5046 }
5047
5048 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5049 {
5050         struct intel_iommu *iommu;
5051         u8 bus, devfn;
5052
5053         if (iommu_dummy(dev)) {
5054                 dev_warn(dev,
5055                          "No IOMMU translation for device; cannot enable SVM\n");
5056                 return NULL;
5057         }
5058
5059         iommu = device_to_iommu(dev, &bus, &devfn);
5060         if ((!iommu)) {
5061                 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5062                 return NULL;
5063         }
5064
5065         if (!iommu->pasid_table) {
5066                 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
5067                 return NULL;
5068         }
5069
5070         return iommu;
5071 }
5072 #endif /* CONFIG_INTEL_IOMMU_SVM */
5073
5074 static const struct iommu_ops intel_iommu_ops = {
5075         .capable        = intel_iommu_capable,
5076         .domain_alloc   = intel_iommu_domain_alloc,
5077         .domain_free    = intel_iommu_domain_free,
5078         .attach_dev     = intel_iommu_attach_device,
5079         .detach_dev     = intel_iommu_detach_device,
5080         .map            = intel_iommu_map,
5081         .unmap          = intel_iommu_unmap,
5082         .map_sg         = default_iommu_map_sg,
5083         .iova_to_phys   = intel_iommu_iova_to_phys,
5084         .add_device     = intel_iommu_add_device,
5085         .remove_device  = intel_iommu_remove_device,
5086         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
5087 };
5088
5089 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5090 {
5091         /* G4x/GM45 integrated gfx dmar support is totally busted. */
5092         pr_info("Disabling IOMMU for graphics on this chipset\n");
5093         dmar_map_gfx = 0;
5094 }
5095
5096 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5097 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5098 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5099 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5100 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5101 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5102 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5103
5104 static void quirk_iommu_rwbf(struct pci_dev *dev)
5105 {
5106         /*
5107          * Mobile 4 Series Chipset neglects to set RWBF capability,
5108          * but needs it. Same seems to hold for the desktop versions.
5109          */
5110         pr_info("Forcing write-buffer flush capability\n");
5111         rwbf_quirk = 1;
5112 }
5113
5114 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5115 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5116 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5117 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5118 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5119 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5120 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5121
5122 #define GGC 0x52
5123 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
5124 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
5125 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
5126 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
5127 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
5128 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
5129 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
5130 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
5131
5132 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5133 {
5134         unsigned short ggc;
5135
5136         if (pci_read_config_word(dev, GGC, &ggc))
5137                 return;
5138
5139         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5140                 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5141                 dmar_map_gfx = 0;
5142         } else if (dmar_map_gfx) {
5143                 /* we have to ensure the gfx device is idle before we flush */
5144                 pr_info("Disabling batched IOTLB flush on Ironlake\n");
5145                 intel_iommu_strict = 1;
5146        }
5147 }
5148 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5149 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5150 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5151 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5152
5153 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5154    ISOCH DMAR unit for the Azalia sound device, but not give it any
5155    TLB entries, which causes it to deadlock. Check for that.  We do
5156    this in a function called from init_dmars(), instead of in a PCI
5157    quirk, because we don't want to print the obnoxious "BIOS broken"
5158    message if VT-d is actually disabled.
5159 */
5160 static void __init check_tylersburg_isoch(void)
5161 {
5162         struct pci_dev *pdev;
5163         uint32_t vtisochctrl;
5164
5165         /* If there's no Azalia in the system anyway, forget it. */
5166         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5167         if (!pdev)
5168                 return;
5169         pci_dev_put(pdev);
5170
5171         /* System Management Registers. Might be hidden, in which case
5172            we can't do the sanity check. But that's OK, because the
5173            known-broken BIOSes _don't_ actually hide it, so far. */
5174         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5175         if (!pdev)
5176                 return;
5177
5178         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5179                 pci_dev_put(pdev);
5180                 return;
5181         }
5182
5183         pci_dev_put(pdev);
5184
5185         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5186         if (vtisochctrl & 1)
5187                 return;
5188
5189         /* Drop all bits other than the number of TLB entries */
5190         vtisochctrl &= 0x1c;
5191
5192         /* If we have the recommended number of TLB entries (16), fine. */
5193         if (vtisochctrl == 0x10)
5194                 return;
5195
5196         /* Zero TLB entries? You get to ride the short bus to school. */
5197         if (!vtisochctrl) {
5198                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5199                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5200                      dmi_get_system_info(DMI_BIOS_VENDOR),
5201                      dmi_get_system_info(DMI_BIOS_VERSION),
5202                      dmi_get_system_info(DMI_PRODUCT_VERSION));
5203                 iommu_identity_mapping |= IDENTMAP_AZALIA;
5204                 return;
5205         }
5206
5207         pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5208                vtisochctrl);
5209 }