intel-iommu: move DMA PTE defs out of dma_remapping.h
[firefly-linux-kernel-4.4.55.git] / drivers / pci / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
40 #include "pci.h"
41
42 #define ROOT_SIZE               VTD_PAGE_SIZE
43 #define CONTEXT_SIZE            VTD_PAGE_SIZE
44
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48 #define IOAPIC_RANGE_START      (0xfee00000)
49 #define IOAPIC_RANGE_END        (0xfeefffff)
50 #define IOVA_START_ADDR         (0x1000)
51
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
56 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN           IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN           IOVA_PFN(DMA_64BIT_MASK)
59
60 /*
61  * 0: Present
62  * 1-11: Reserved
63  * 12-63: Context Ptr (12 - (haw-1))
64  * 64-127: Reserved
65  */
66 struct root_entry {
67         u64     val;
68         u64     rsvd1;
69 };
70 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71 static inline bool root_present(struct root_entry *root)
72 {
73         return (root->val & 1);
74 }
75 static inline void set_root_present(struct root_entry *root)
76 {
77         root->val |= 1;
78 }
79 static inline void set_root_value(struct root_entry *root, unsigned long value)
80 {
81         root->val |= value & VTD_PAGE_MASK;
82 }
83
84 static inline struct context_entry *
85 get_context_addr_from_root(struct root_entry *root)
86 {
87         return (struct context_entry *)
88                 (root_present(root)?phys_to_virt(
89                 root->val & VTD_PAGE_MASK) :
90                 NULL);
91 }
92
93 /*
94  * low 64 bits:
95  * 0: present
96  * 1: fault processing disable
97  * 2-3: translation type
98  * 12-63: address space root
99  * high 64 bits:
100  * 0-2: address width
101  * 3-6: aval
102  * 8-23: domain id
103  */
104 struct context_entry {
105         u64 lo;
106         u64 hi;
107 };
108 #define context_present(c) ((c).lo & 1)
109 #define context_fault_disable(c) (((c).lo >> 1) & 1)
110 #define context_translation_type(c) (((c).lo >> 2) & 3)
111 #define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
112 #define context_address_width(c) ((c).hi &  7)
113 #define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
114
115 #define context_set_present(c) do {(c).lo |= 1;} while (0)
116 #define context_set_fault_enable(c) \
117         do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
118 #define context_set_translation_type(c, val) \
119         do { \
120                 (c).lo &= (((u64)-1) << 4) | 3; \
121                 (c).lo |= ((val) & 3) << 2; \
122         } while (0)
123 #define CONTEXT_TT_MULTI_LEVEL 0
124 #define context_set_address_root(c, val) \
125         do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
126 #define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
127 #define context_set_domain_id(c, val) \
128         do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
129 #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
130
131 /*
132  * 0: readable
133  * 1: writable
134  * 2-6: reserved
135  * 7: super page
136  * 8-11: available
137  * 12-63: Host physcial address
138  */
139 struct dma_pte {
140         u64 val;
141 };
142 #define dma_clear_pte(p)        do {(p).val = 0;} while (0)
143
144 #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
145 #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
146 #define dma_set_pte_prot(p, prot) \
147                 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
148 #define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
149 #define dma_set_pte_addr(p, addr) do {\
150                 (p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
151 #define dma_pte_present(p) (((p).val & 3) != 0)
152
153 static void flush_unmaps_timeout(unsigned long data);
154
155 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
156
157 #define HIGH_WATER_MARK 250
158 struct deferred_flush_tables {
159         int next;
160         struct iova *iova[HIGH_WATER_MARK];
161         struct dmar_domain *domain[HIGH_WATER_MARK];
162 };
163
164 static struct deferred_flush_tables *deferred_flush;
165
166 /* bitmap for indexing intel_iommus */
167 static int g_num_of_iommus;
168
169 static DEFINE_SPINLOCK(async_umap_flush_lock);
170 static LIST_HEAD(unmaps_to_do);
171
172 static int timer_on;
173 static long list_size;
174
175 static void domain_remove_dev_info(struct dmar_domain *domain);
176
177 int dmar_disabled;
178 static int __initdata dmar_map_gfx = 1;
179 static int dmar_forcedac;
180 static int intel_iommu_strict;
181
182 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
183 static DEFINE_SPINLOCK(device_domain_lock);
184 static LIST_HEAD(device_domain_list);
185
186 static int __init intel_iommu_setup(char *str)
187 {
188         if (!str)
189                 return -EINVAL;
190         while (*str) {
191                 if (!strncmp(str, "off", 3)) {
192                         dmar_disabled = 1;
193                         printk(KERN_INFO"Intel-IOMMU: disabled\n");
194                 } else if (!strncmp(str, "igfx_off", 8)) {
195                         dmar_map_gfx = 0;
196                         printk(KERN_INFO
197                                 "Intel-IOMMU: disable GFX device mapping\n");
198                 } else if (!strncmp(str, "forcedac", 8)) {
199                         printk(KERN_INFO
200                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
201                         dmar_forcedac = 1;
202                 } else if (!strncmp(str, "strict", 6)) {
203                         printk(KERN_INFO
204                                 "Intel-IOMMU: disable batched IOTLB flush\n");
205                         intel_iommu_strict = 1;
206                 }
207
208                 str += strcspn(str, ",");
209                 while (*str == ',')
210                         str++;
211         }
212         return 0;
213 }
214 __setup("intel_iommu=", intel_iommu_setup);
215
216 static struct kmem_cache *iommu_domain_cache;
217 static struct kmem_cache *iommu_devinfo_cache;
218 static struct kmem_cache *iommu_iova_cache;
219
220 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
221 {
222         unsigned int flags;
223         void *vaddr;
224
225         /* trying to avoid low memory issues */
226         flags = current->flags & PF_MEMALLOC;
227         current->flags |= PF_MEMALLOC;
228         vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
229         current->flags &= (~PF_MEMALLOC | flags);
230         return vaddr;
231 }
232
233
234 static inline void *alloc_pgtable_page(void)
235 {
236         unsigned int flags;
237         void *vaddr;
238
239         /* trying to avoid low memory issues */
240         flags = current->flags & PF_MEMALLOC;
241         current->flags |= PF_MEMALLOC;
242         vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
243         current->flags &= (~PF_MEMALLOC | flags);
244         return vaddr;
245 }
246
247 static inline void free_pgtable_page(void *vaddr)
248 {
249         free_page((unsigned long)vaddr);
250 }
251
252 static inline void *alloc_domain_mem(void)
253 {
254         return iommu_kmem_cache_alloc(iommu_domain_cache);
255 }
256
257 static void free_domain_mem(void *vaddr)
258 {
259         kmem_cache_free(iommu_domain_cache, vaddr);
260 }
261
262 static inline void * alloc_devinfo_mem(void)
263 {
264         return iommu_kmem_cache_alloc(iommu_devinfo_cache);
265 }
266
267 static inline void free_devinfo_mem(void *vaddr)
268 {
269         kmem_cache_free(iommu_devinfo_cache, vaddr);
270 }
271
272 struct iova *alloc_iova_mem(void)
273 {
274         return iommu_kmem_cache_alloc(iommu_iova_cache);
275 }
276
277 void free_iova_mem(struct iova *iova)
278 {
279         kmem_cache_free(iommu_iova_cache, iova);
280 }
281
282 /* Gets context entry for a given bus and devfn */
283 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
284                 u8 bus, u8 devfn)
285 {
286         struct root_entry *root;
287         struct context_entry *context;
288         unsigned long phy_addr;
289         unsigned long flags;
290
291         spin_lock_irqsave(&iommu->lock, flags);
292         root = &iommu->root_entry[bus];
293         context = get_context_addr_from_root(root);
294         if (!context) {
295                 context = (struct context_entry *)alloc_pgtable_page();
296                 if (!context) {
297                         spin_unlock_irqrestore(&iommu->lock, flags);
298                         return NULL;
299                 }
300                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
301                 phy_addr = virt_to_phys((void *)context);
302                 set_root_value(root, phy_addr);
303                 set_root_present(root);
304                 __iommu_flush_cache(iommu, root, sizeof(*root));
305         }
306         spin_unlock_irqrestore(&iommu->lock, flags);
307         return &context[devfn];
308 }
309
310 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
311 {
312         struct root_entry *root;
313         struct context_entry *context;
314         int ret;
315         unsigned long flags;
316
317         spin_lock_irqsave(&iommu->lock, flags);
318         root = &iommu->root_entry[bus];
319         context = get_context_addr_from_root(root);
320         if (!context) {
321                 ret = 0;
322                 goto out;
323         }
324         ret = context_present(context[devfn]);
325 out:
326         spin_unlock_irqrestore(&iommu->lock, flags);
327         return ret;
328 }
329
330 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
331 {
332         struct root_entry *root;
333         struct context_entry *context;
334         unsigned long flags;
335
336         spin_lock_irqsave(&iommu->lock, flags);
337         root = &iommu->root_entry[bus];
338         context = get_context_addr_from_root(root);
339         if (context) {
340                 context_clear_entry(context[devfn]);
341                 __iommu_flush_cache(iommu, &context[devfn], \
342                         sizeof(*context));
343         }
344         spin_unlock_irqrestore(&iommu->lock, flags);
345 }
346
347 static void free_context_table(struct intel_iommu *iommu)
348 {
349         struct root_entry *root;
350         int i;
351         unsigned long flags;
352         struct context_entry *context;
353
354         spin_lock_irqsave(&iommu->lock, flags);
355         if (!iommu->root_entry) {
356                 goto out;
357         }
358         for (i = 0; i < ROOT_ENTRY_NR; i++) {
359                 root = &iommu->root_entry[i];
360                 context = get_context_addr_from_root(root);
361                 if (context)
362                         free_pgtable_page(context);
363         }
364         free_pgtable_page(iommu->root_entry);
365         iommu->root_entry = NULL;
366 out:
367         spin_unlock_irqrestore(&iommu->lock, flags);
368 }
369
370 /* page table handling */
371 #define LEVEL_STRIDE            (9)
372 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
373
374 static inline int agaw_to_level(int agaw)
375 {
376         return agaw + 2;
377 }
378
379 static inline int agaw_to_width(int agaw)
380 {
381         return 30 + agaw * LEVEL_STRIDE;
382
383 }
384
385 static inline int width_to_agaw(int width)
386 {
387         return (width - 30) / LEVEL_STRIDE;
388 }
389
390 static inline unsigned int level_to_offset_bits(int level)
391 {
392         return (12 + (level - 1) * LEVEL_STRIDE);
393 }
394
395 static inline int address_level_offset(u64 addr, int level)
396 {
397         return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
398 }
399
400 static inline u64 level_mask(int level)
401 {
402         return ((u64)-1 << level_to_offset_bits(level));
403 }
404
405 static inline u64 level_size(int level)
406 {
407         return ((u64)1 << level_to_offset_bits(level));
408 }
409
410 static inline u64 align_to_level(u64 addr, int level)
411 {
412         return ((addr + level_size(level) - 1) & level_mask(level));
413 }
414
415 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
416 {
417         int addr_width = agaw_to_width(domain->agaw);
418         struct dma_pte *parent, *pte = NULL;
419         int level = agaw_to_level(domain->agaw);
420         int offset;
421         unsigned long flags;
422
423         BUG_ON(!domain->pgd);
424
425         addr &= (((u64)1) << addr_width) - 1;
426         parent = domain->pgd;
427
428         spin_lock_irqsave(&domain->mapping_lock, flags);
429         while (level > 0) {
430                 void *tmp_page;
431
432                 offset = address_level_offset(addr, level);
433                 pte = &parent[offset];
434                 if (level == 1)
435                         break;
436
437                 if (!dma_pte_present(*pte)) {
438                         tmp_page = alloc_pgtable_page();
439
440                         if (!tmp_page) {
441                                 spin_unlock_irqrestore(&domain->mapping_lock,
442                                         flags);
443                                 return NULL;
444                         }
445                         __iommu_flush_cache(domain->iommu, tmp_page,
446                                         PAGE_SIZE);
447                         dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
448                         /*
449                          * high level table always sets r/w, last level page
450                          * table control read/write
451                          */
452                         dma_set_pte_readable(*pte);
453                         dma_set_pte_writable(*pte);
454                         __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
455                 }
456                 parent = phys_to_virt(dma_pte_addr(*pte));
457                 level--;
458         }
459
460         spin_unlock_irqrestore(&domain->mapping_lock, flags);
461         return pte;
462 }
463
464 /* return address's pte at specific level */
465 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
466                 int level)
467 {
468         struct dma_pte *parent, *pte = NULL;
469         int total = agaw_to_level(domain->agaw);
470         int offset;
471
472         parent = domain->pgd;
473         while (level <= total) {
474                 offset = address_level_offset(addr, total);
475                 pte = &parent[offset];
476                 if (level == total)
477                         return pte;
478
479                 if (!dma_pte_present(*pte))
480                         break;
481                 parent = phys_to_virt(dma_pte_addr(*pte));
482                 total--;
483         }
484         return NULL;
485 }
486
487 /* clear one page's page table */
488 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
489 {
490         struct dma_pte *pte = NULL;
491
492         /* get last level pte */
493         pte = dma_addr_level_pte(domain, addr, 1);
494
495         if (pte) {
496                 dma_clear_pte(*pte);
497                 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
498         }
499 }
500
501 /* clear last level pte, a tlb flush should be followed */
502 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
503 {
504         int addr_width = agaw_to_width(domain->agaw);
505
506         start &= (((u64)1) << addr_width) - 1;
507         end &= (((u64)1) << addr_width) - 1;
508         /* in case it's partial page */
509         start = PAGE_ALIGN(start);
510         end &= PAGE_MASK;
511
512         /* we don't need lock here, nobody else touches the iova range */
513         while (start < end) {
514                 dma_pte_clear_one(domain, start);
515                 start += VTD_PAGE_SIZE;
516         }
517 }
518
519 /* free page table pages. last level pte should already be cleared */
520 static void dma_pte_free_pagetable(struct dmar_domain *domain,
521         u64 start, u64 end)
522 {
523         int addr_width = agaw_to_width(domain->agaw);
524         struct dma_pte *pte;
525         int total = agaw_to_level(domain->agaw);
526         int level;
527         u64 tmp;
528
529         start &= (((u64)1) << addr_width) - 1;
530         end &= (((u64)1) << addr_width) - 1;
531
532         /* we don't need lock here, nobody else touches the iova range */
533         level = 2;
534         while (level <= total) {
535                 tmp = align_to_level(start, level);
536                 if (tmp >= end || (tmp + level_size(level) > end))
537                         return;
538
539                 while (tmp < end) {
540                         pte = dma_addr_level_pte(domain, tmp, level);
541                         if (pte) {
542                                 free_pgtable_page(
543                                         phys_to_virt(dma_pte_addr(*pte)));
544                                 dma_clear_pte(*pte);
545                                 __iommu_flush_cache(domain->iommu,
546                                                 pte, sizeof(*pte));
547                         }
548                         tmp += level_size(level);
549                 }
550                 level++;
551         }
552         /* free pgd */
553         if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
554                 free_pgtable_page(domain->pgd);
555                 domain->pgd = NULL;
556         }
557 }
558
559 /* iommu handling */
560 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
561 {
562         struct root_entry *root;
563         unsigned long flags;
564
565         root = (struct root_entry *)alloc_pgtable_page();
566         if (!root)
567                 return -ENOMEM;
568
569         __iommu_flush_cache(iommu, root, ROOT_SIZE);
570
571         spin_lock_irqsave(&iommu->lock, flags);
572         iommu->root_entry = root;
573         spin_unlock_irqrestore(&iommu->lock, flags);
574
575         return 0;
576 }
577
578 static void iommu_set_root_entry(struct intel_iommu *iommu)
579 {
580         void *addr;
581         u32 cmd, sts;
582         unsigned long flag;
583
584         addr = iommu->root_entry;
585
586         spin_lock_irqsave(&iommu->register_lock, flag);
587         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
588
589         cmd = iommu->gcmd | DMA_GCMD_SRTP;
590         writel(cmd, iommu->reg + DMAR_GCMD_REG);
591
592         /* Make sure hardware complete it */
593         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
594                 readl, (sts & DMA_GSTS_RTPS), sts);
595
596         spin_unlock_irqrestore(&iommu->register_lock, flag);
597 }
598
599 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
600 {
601         u32 val;
602         unsigned long flag;
603
604         if (!cap_rwbf(iommu->cap))
605                 return;
606         val = iommu->gcmd | DMA_GCMD_WBF;
607
608         spin_lock_irqsave(&iommu->register_lock, flag);
609         writel(val, iommu->reg + DMAR_GCMD_REG);
610
611         /* Make sure hardware complete it */
612         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
613                         readl, (!(val & DMA_GSTS_WBFS)), val);
614
615         spin_unlock_irqrestore(&iommu->register_lock, flag);
616 }
617
618 /* return value determine if we need a write buffer flush */
619 static int __iommu_flush_context(struct intel_iommu *iommu,
620         u16 did, u16 source_id, u8 function_mask, u64 type,
621         int non_present_entry_flush)
622 {
623         u64 val = 0;
624         unsigned long flag;
625
626         /*
627          * In the non-present entry flush case, if hardware doesn't cache
628          * non-present entry we do nothing and if hardware cache non-present
629          * entry, we flush entries of domain 0 (the domain id is used to cache
630          * any non-present entries)
631          */
632         if (non_present_entry_flush) {
633                 if (!cap_caching_mode(iommu->cap))
634                         return 1;
635                 else
636                         did = 0;
637         }
638
639         switch (type) {
640         case DMA_CCMD_GLOBAL_INVL:
641                 val = DMA_CCMD_GLOBAL_INVL;
642                 break;
643         case DMA_CCMD_DOMAIN_INVL:
644                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
645                 break;
646         case DMA_CCMD_DEVICE_INVL:
647                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
648                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
649                 break;
650         default:
651                 BUG();
652         }
653         val |= DMA_CCMD_ICC;
654
655         spin_lock_irqsave(&iommu->register_lock, flag);
656         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
657
658         /* Make sure hardware complete it */
659         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
660                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
661
662         spin_unlock_irqrestore(&iommu->register_lock, flag);
663
664         /* flush context entry will implicitly flush write buffer */
665         return 0;
666 }
667
668 /* return value determine if we need a write buffer flush */
669 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
670         u64 addr, unsigned int size_order, u64 type,
671         int non_present_entry_flush)
672 {
673         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
674         u64 val = 0, val_iva = 0;
675         unsigned long flag;
676
677         /*
678          * In the non-present entry flush case, if hardware doesn't cache
679          * non-present entry we do nothing and if hardware cache non-present
680          * entry, we flush entries of domain 0 (the domain id is used to cache
681          * any non-present entries)
682          */
683         if (non_present_entry_flush) {
684                 if (!cap_caching_mode(iommu->cap))
685                         return 1;
686                 else
687                         did = 0;
688         }
689
690         switch (type) {
691         case DMA_TLB_GLOBAL_FLUSH:
692                 /* global flush doesn't need set IVA_REG */
693                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
694                 break;
695         case DMA_TLB_DSI_FLUSH:
696                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
697                 break;
698         case DMA_TLB_PSI_FLUSH:
699                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
700                 /* Note: always flush non-leaf currently */
701                 val_iva = size_order | addr;
702                 break;
703         default:
704                 BUG();
705         }
706         /* Note: set drain read/write */
707 #if 0
708         /*
709          * This is probably to be super secure.. Looks like we can
710          * ignore it without any impact.
711          */
712         if (cap_read_drain(iommu->cap))
713                 val |= DMA_TLB_READ_DRAIN;
714 #endif
715         if (cap_write_drain(iommu->cap))
716                 val |= DMA_TLB_WRITE_DRAIN;
717
718         spin_lock_irqsave(&iommu->register_lock, flag);
719         /* Note: Only uses first TLB reg currently */
720         if (val_iva)
721                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
722         dmar_writeq(iommu->reg + tlb_offset + 8, val);
723
724         /* Make sure hardware complete it */
725         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
726                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
727
728         spin_unlock_irqrestore(&iommu->register_lock, flag);
729
730         /* check IOTLB invalidation granularity */
731         if (DMA_TLB_IAIG(val) == 0)
732                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
733         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
734                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
735                         (unsigned long long)DMA_TLB_IIRG(type),
736                         (unsigned long long)DMA_TLB_IAIG(val));
737         /* flush iotlb entry will implicitly flush write buffer */
738         return 0;
739 }
740
741 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
742         u64 addr, unsigned int pages, int non_present_entry_flush)
743 {
744         unsigned int mask;
745
746         BUG_ON(addr & (~VTD_PAGE_MASK));
747         BUG_ON(pages == 0);
748
749         /* Fallback to domain selective flush if no PSI support */
750         if (!cap_pgsel_inv(iommu->cap))
751                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
752                                                 DMA_TLB_DSI_FLUSH,
753                                                 non_present_entry_flush);
754
755         /*
756          * PSI requires page size to be 2 ^ x, and the base address is naturally
757          * aligned to the size
758          */
759         mask = ilog2(__roundup_pow_of_two(pages));
760         /* Fallback to domain selective flush if size is too big */
761         if (mask > cap_max_amask_val(iommu->cap))
762                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
763                         DMA_TLB_DSI_FLUSH, non_present_entry_flush);
764
765         return iommu->flush.flush_iotlb(iommu, did, addr, mask,
766                                         DMA_TLB_PSI_FLUSH,
767                                         non_present_entry_flush);
768 }
769
770 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
771 {
772         u32 pmen;
773         unsigned long flags;
774
775         spin_lock_irqsave(&iommu->register_lock, flags);
776         pmen = readl(iommu->reg + DMAR_PMEN_REG);
777         pmen &= ~DMA_PMEN_EPM;
778         writel(pmen, iommu->reg + DMAR_PMEN_REG);
779
780         /* wait for the protected region status bit to clear */
781         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
782                 readl, !(pmen & DMA_PMEN_PRS), pmen);
783
784         spin_unlock_irqrestore(&iommu->register_lock, flags);
785 }
786
787 static int iommu_enable_translation(struct intel_iommu *iommu)
788 {
789         u32 sts;
790         unsigned long flags;
791
792         spin_lock_irqsave(&iommu->register_lock, flags);
793         writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
794
795         /* Make sure hardware complete it */
796         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
797                 readl, (sts & DMA_GSTS_TES), sts);
798
799         iommu->gcmd |= DMA_GCMD_TE;
800         spin_unlock_irqrestore(&iommu->register_lock, flags);
801         return 0;
802 }
803
804 static int iommu_disable_translation(struct intel_iommu *iommu)
805 {
806         u32 sts;
807         unsigned long flag;
808
809         spin_lock_irqsave(&iommu->register_lock, flag);
810         iommu->gcmd &= ~DMA_GCMD_TE;
811         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
812
813         /* Make sure hardware complete it */
814         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
815                 readl, (!(sts & DMA_GSTS_TES)), sts);
816
817         spin_unlock_irqrestore(&iommu->register_lock, flag);
818         return 0;
819 }
820
821 /* iommu interrupt handling. Most stuff are MSI-like. */
822
823 static const char *fault_reason_strings[] =
824 {
825         "Software",
826         "Present bit in root entry is clear",
827         "Present bit in context entry is clear",
828         "Invalid context entry",
829         "Access beyond MGAW",
830         "PTE Write access is not set",
831         "PTE Read access is not set",
832         "Next page table ptr is invalid",
833         "Root table address invalid",
834         "Context table ptr is invalid",
835         "non-zero reserved fields in RTP",
836         "non-zero reserved fields in CTP",
837         "non-zero reserved fields in PTE",
838 };
839 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
840
841 const char *dmar_get_fault_reason(u8 fault_reason)
842 {
843         if (fault_reason > MAX_FAULT_REASON_IDX)
844                 return "Unknown";
845         else
846                 return fault_reason_strings[fault_reason];
847 }
848
849 void dmar_msi_unmask(unsigned int irq)
850 {
851         struct intel_iommu *iommu = get_irq_data(irq);
852         unsigned long flag;
853
854         /* unmask it */
855         spin_lock_irqsave(&iommu->register_lock, flag);
856         writel(0, iommu->reg + DMAR_FECTL_REG);
857         /* Read a reg to force flush the post write */
858         readl(iommu->reg + DMAR_FECTL_REG);
859         spin_unlock_irqrestore(&iommu->register_lock, flag);
860 }
861
862 void dmar_msi_mask(unsigned int irq)
863 {
864         unsigned long flag;
865         struct intel_iommu *iommu = get_irq_data(irq);
866
867         /* mask it */
868         spin_lock_irqsave(&iommu->register_lock, flag);
869         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
870         /* Read a reg to force flush the post write */
871         readl(iommu->reg + DMAR_FECTL_REG);
872         spin_unlock_irqrestore(&iommu->register_lock, flag);
873 }
874
875 void dmar_msi_write(int irq, struct msi_msg *msg)
876 {
877         struct intel_iommu *iommu = get_irq_data(irq);
878         unsigned long flag;
879
880         spin_lock_irqsave(&iommu->register_lock, flag);
881         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
882         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
883         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
884         spin_unlock_irqrestore(&iommu->register_lock, flag);
885 }
886
887 void dmar_msi_read(int irq, struct msi_msg *msg)
888 {
889         struct intel_iommu *iommu = get_irq_data(irq);
890         unsigned long flag;
891
892         spin_lock_irqsave(&iommu->register_lock, flag);
893         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
894         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
895         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
896         spin_unlock_irqrestore(&iommu->register_lock, flag);
897 }
898
899 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
900                 u8 fault_reason, u16 source_id, unsigned long long addr)
901 {
902         const char *reason;
903
904         reason = dmar_get_fault_reason(fault_reason);
905
906         printk(KERN_ERR
907                 "DMAR:[%s] Request device [%02x:%02x.%d] "
908                 "fault addr %llx \n"
909                 "DMAR:[fault reason %02d] %s\n",
910                 (type ? "DMA Read" : "DMA Write"),
911                 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
912                 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
913         return 0;
914 }
915
916 #define PRIMARY_FAULT_REG_LEN (16)
917 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
918 {
919         struct intel_iommu *iommu = dev_id;
920         int reg, fault_index;
921         u32 fault_status;
922         unsigned long flag;
923
924         spin_lock_irqsave(&iommu->register_lock, flag);
925         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
926
927         /* TBD: ignore advanced fault log currently */
928         if (!(fault_status & DMA_FSTS_PPF))
929                 goto clear_overflow;
930
931         fault_index = dma_fsts_fault_record_index(fault_status);
932         reg = cap_fault_reg_offset(iommu->cap);
933         while (1) {
934                 u8 fault_reason;
935                 u16 source_id;
936                 u64 guest_addr;
937                 int type;
938                 u32 data;
939
940                 /* highest 32 bits */
941                 data = readl(iommu->reg + reg +
942                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
943                 if (!(data & DMA_FRCD_F))
944                         break;
945
946                 fault_reason = dma_frcd_fault_reason(data);
947                 type = dma_frcd_type(data);
948
949                 data = readl(iommu->reg + reg +
950                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
951                 source_id = dma_frcd_source_id(data);
952
953                 guest_addr = dmar_readq(iommu->reg + reg +
954                                 fault_index * PRIMARY_FAULT_REG_LEN);
955                 guest_addr = dma_frcd_page_addr(guest_addr);
956                 /* clear the fault */
957                 writel(DMA_FRCD_F, iommu->reg + reg +
958                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
959
960                 spin_unlock_irqrestore(&iommu->register_lock, flag);
961
962                 iommu_page_fault_do_one(iommu, type, fault_reason,
963                                 source_id, guest_addr);
964
965                 fault_index++;
966                 if (fault_index > cap_num_fault_regs(iommu->cap))
967                         fault_index = 0;
968                 spin_lock_irqsave(&iommu->register_lock, flag);
969         }
970 clear_overflow:
971         /* clear primary fault overflow */
972         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
973         if (fault_status & DMA_FSTS_PFO)
974                 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
975
976         spin_unlock_irqrestore(&iommu->register_lock, flag);
977         return IRQ_HANDLED;
978 }
979
980 int dmar_set_interrupt(struct intel_iommu *iommu)
981 {
982         int irq, ret;
983
984         irq = create_irq();
985         if (!irq) {
986                 printk(KERN_ERR "IOMMU: no free vectors\n");
987                 return -EINVAL;
988         }
989
990         set_irq_data(irq, iommu);
991         iommu->irq = irq;
992
993         ret = arch_setup_dmar_msi(irq);
994         if (ret) {
995                 set_irq_data(irq, NULL);
996                 iommu->irq = 0;
997                 destroy_irq(irq);
998                 return 0;
999         }
1000
1001         /* Force fault register is cleared */
1002         iommu_page_fault(irq, iommu);
1003
1004         ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1005         if (ret)
1006                 printk(KERN_ERR "IOMMU: can't request irq\n");
1007         return ret;
1008 }
1009
1010 static int iommu_init_domains(struct intel_iommu *iommu)
1011 {
1012         unsigned long ndomains;
1013         unsigned long nlongs;
1014
1015         ndomains = cap_ndoms(iommu->cap);
1016         pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1017         nlongs = BITS_TO_LONGS(ndomains);
1018
1019         /* TBD: there might be 64K domains,
1020          * consider other allocation for future chip
1021          */
1022         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1023         if (!iommu->domain_ids) {
1024                 printk(KERN_ERR "Allocating domain id array failed\n");
1025                 return -ENOMEM;
1026         }
1027         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1028                         GFP_KERNEL);
1029         if (!iommu->domains) {
1030                 printk(KERN_ERR "Allocating domain array failed\n");
1031                 kfree(iommu->domain_ids);
1032                 return -ENOMEM;
1033         }
1034
1035         spin_lock_init(&iommu->lock);
1036
1037         /*
1038          * if Caching mode is set, then invalid translations are tagged
1039          * with domainid 0. Hence we need to pre-allocate it.
1040          */
1041         if (cap_caching_mode(iommu->cap))
1042                 set_bit(0, iommu->domain_ids);
1043         return 0;
1044 }
1045
1046
1047 static void domain_exit(struct dmar_domain *domain);
1048
1049 void free_dmar_iommu(struct intel_iommu *iommu)
1050 {
1051         struct dmar_domain *domain;
1052         int i;
1053
1054         i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1055         for (; i < cap_ndoms(iommu->cap); ) {
1056                 domain = iommu->domains[i];
1057                 clear_bit(i, iommu->domain_ids);
1058                 domain_exit(domain);
1059                 i = find_next_bit(iommu->domain_ids,
1060                         cap_ndoms(iommu->cap), i+1);
1061         }
1062
1063         if (iommu->gcmd & DMA_GCMD_TE)
1064                 iommu_disable_translation(iommu);
1065
1066         if (iommu->irq) {
1067                 set_irq_data(iommu->irq, NULL);
1068                 /* This will mask the irq */
1069                 free_irq(iommu->irq, iommu);
1070                 destroy_irq(iommu->irq);
1071         }
1072
1073         kfree(iommu->domains);
1074         kfree(iommu->domain_ids);
1075
1076         /* free context mapping */
1077         free_context_table(iommu);
1078 }
1079
1080 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1081 {
1082         unsigned long num;
1083         unsigned long ndomains;
1084         struct dmar_domain *domain;
1085         unsigned long flags;
1086
1087         domain = alloc_domain_mem();
1088         if (!domain)
1089                 return NULL;
1090
1091         ndomains = cap_ndoms(iommu->cap);
1092
1093         spin_lock_irqsave(&iommu->lock, flags);
1094         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1095         if (num >= ndomains) {
1096                 spin_unlock_irqrestore(&iommu->lock, flags);
1097                 free_domain_mem(domain);
1098                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1099                 return NULL;
1100         }
1101
1102         set_bit(num, iommu->domain_ids);
1103         domain->id = num;
1104         domain->iommu = iommu;
1105         iommu->domains[num] = domain;
1106         spin_unlock_irqrestore(&iommu->lock, flags);
1107
1108         return domain;
1109 }
1110
1111 static void iommu_free_domain(struct dmar_domain *domain)
1112 {
1113         unsigned long flags;
1114
1115         spin_lock_irqsave(&domain->iommu->lock, flags);
1116         clear_bit(domain->id, domain->iommu->domain_ids);
1117         spin_unlock_irqrestore(&domain->iommu->lock, flags);
1118 }
1119
1120 static struct iova_domain reserved_iova_list;
1121 static struct lock_class_key reserved_alloc_key;
1122 static struct lock_class_key reserved_rbtree_key;
1123
1124 static void dmar_init_reserved_ranges(void)
1125 {
1126         struct pci_dev *pdev = NULL;
1127         struct iova *iova;
1128         int i;
1129         u64 addr, size;
1130
1131         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1132
1133         lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1134                 &reserved_alloc_key);
1135         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1136                 &reserved_rbtree_key);
1137
1138         /* IOAPIC ranges shouldn't be accessed by DMA */
1139         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1140                 IOVA_PFN(IOAPIC_RANGE_END));
1141         if (!iova)
1142                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1143
1144         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1145         for_each_pci_dev(pdev) {
1146                 struct resource *r;
1147
1148                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1149                         r = &pdev->resource[i];
1150                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1151                                 continue;
1152                         addr = r->start;
1153                         addr &= PAGE_MASK;
1154                         size = r->end - addr;
1155                         size = PAGE_ALIGN(size);
1156                         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1157                                 IOVA_PFN(size + addr) - 1);
1158                         if (!iova)
1159                                 printk(KERN_ERR "Reserve iova failed\n");
1160                 }
1161         }
1162
1163 }
1164
1165 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1166 {
1167         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1168 }
1169
1170 static inline int guestwidth_to_adjustwidth(int gaw)
1171 {
1172         int agaw;
1173         int r = (gaw - 12) % 9;
1174
1175         if (r == 0)
1176                 agaw = gaw;
1177         else
1178                 agaw = gaw + 9 - r;
1179         if (agaw > 64)
1180                 agaw = 64;
1181         return agaw;
1182 }
1183
1184 static int domain_init(struct dmar_domain *domain, int guest_width)
1185 {
1186         struct intel_iommu *iommu;
1187         int adjust_width, agaw;
1188         unsigned long sagaw;
1189
1190         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1191         spin_lock_init(&domain->mapping_lock);
1192
1193         domain_reserve_special_ranges(domain);
1194
1195         /* calculate AGAW */
1196         iommu = domain->iommu;
1197         if (guest_width > cap_mgaw(iommu->cap))
1198                 guest_width = cap_mgaw(iommu->cap);
1199         domain->gaw = guest_width;
1200         adjust_width = guestwidth_to_adjustwidth(guest_width);
1201         agaw = width_to_agaw(adjust_width);
1202         sagaw = cap_sagaw(iommu->cap);
1203         if (!test_bit(agaw, &sagaw)) {
1204                 /* hardware doesn't support it, choose a bigger one */
1205                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1206                 agaw = find_next_bit(&sagaw, 5, agaw);
1207                 if (agaw >= 5)
1208                         return -ENODEV;
1209         }
1210         domain->agaw = agaw;
1211         INIT_LIST_HEAD(&domain->devices);
1212
1213         /* always allocate the top pgd */
1214         domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1215         if (!domain->pgd)
1216                 return -ENOMEM;
1217         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1218         return 0;
1219 }
1220
1221 static void domain_exit(struct dmar_domain *domain)
1222 {
1223         u64 end;
1224
1225         /* Domain 0 is reserved, so dont process it */
1226         if (!domain)
1227                 return;
1228
1229         domain_remove_dev_info(domain);
1230         /* destroy iovas */
1231         put_iova_domain(&domain->iovad);
1232         end = DOMAIN_MAX_ADDR(domain->gaw);
1233         end = end & (~PAGE_MASK);
1234
1235         /* clear ptes */
1236         dma_pte_clear_range(domain, 0, end);
1237
1238         /* free page tables */
1239         dma_pte_free_pagetable(domain, 0, end);
1240
1241         iommu_free_domain(domain);
1242         free_domain_mem(domain);
1243 }
1244
1245 static int domain_context_mapping_one(struct dmar_domain *domain,
1246                 u8 bus, u8 devfn)
1247 {
1248         struct context_entry *context;
1249         struct intel_iommu *iommu = domain->iommu;
1250         unsigned long flags;
1251
1252         pr_debug("Set context mapping for %02x:%02x.%d\n",
1253                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1254         BUG_ON(!domain->pgd);
1255         context = device_to_context_entry(iommu, bus, devfn);
1256         if (!context)
1257                 return -ENOMEM;
1258         spin_lock_irqsave(&iommu->lock, flags);
1259         if (context_present(*context)) {
1260                 spin_unlock_irqrestore(&iommu->lock, flags);
1261                 return 0;
1262         }
1263
1264         context_set_domain_id(*context, domain->id);
1265         context_set_address_width(*context, domain->agaw);
1266         context_set_address_root(*context, virt_to_phys(domain->pgd));
1267         context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1268         context_set_fault_enable(*context);
1269         context_set_present(*context);
1270         __iommu_flush_cache(iommu, context, sizeof(*context));
1271
1272         /* it's a non-present to present mapping */
1273         if (iommu->flush.flush_context(iommu, domain->id,
1274                 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1275                 DMA_CCMD_DEVICE_INVL, 1))
1276                 iommu_flush_write_buffer(iommu);
1277         else
1278                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1279
1280         spin_unlock_irqrestore(&iommu->lock, flags);
1281         return 0;
1282 }
1283
1284 static int
1285 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1286 {
1287         int ret;
1288         struct pci_dev *tmp, *parent;
1289
1290         ret = domain_context_mapping_one(domain, pdev->bus->number,
1291                 pdev->devfn);
1292         if (ret)
1293                 return ret;
1294
1295         /* dependent device mapping */
1296         tmp = pci_find_upstream_pcie_bridge(pdev);
1297         if (!tmp)
1298                 return 0;
1299         /* Secondary interface's bus number and devfn 0 */
1300         parent = pdev->bus->self;
1301         while (parent != tmp) {
1302                 ret = domain_context_mapping_one(domain, parent->bus->number,
1303                         parent->devfn);
1304                 if (ret)
1305                         return ret;
1306                 parent = parent->bus->self;
1307         }
1308         if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1309                 return domain_context_mapping_one(domain,
1310                         tmp->subordinate->number, 0);
1311         else /* this is a legacy PCI bridge */
1312                 return domain_context_mapping_one(domain,
1313                         tmp->bus->number, tmp->devfn);
1314 }
1315
1316 static int domain_context_mapped(struct dmar_domain *domain,
1317         struct pci_dev *pdev)
1318 {
1319         int ret;
1320         struct pci_dev *tmp, *parent;
1321
1322         ret = device_context_mapped(domain->iommu,
1323                 pdev->bus->number, pdev->devfn);
1324         if (!ret)
1325                 return ret;
1326         /* dependent device mapping */
1327         tmp = pci_find_upstream_pcie_bridge(pdev);
1328         if (!tmp)
1329                 return ret;
1330         /* Secondary interface's bus number and devfn 0 */
1331         parent = pdev->bus->self;
1332         while (parent != tmp) {
1333                 ret = device_context_mapped(domain->iommu, parent->bus->number,
1334                         parent->devfn);
1335                 if (!ret)
1336                         return ret;
1337                 parent = parent->bus->self;
1338         }
1339         if (tmp->is_pcie)
1340                 return device_context_mapped(domain->iommu,
1341                         tmp->subordinate->number, 0);
1342         else
1343                 return device_context_mapped(domain->iommu,
1344                         tmp->bus->number, tmp->devfn);
1345 }
1346
1347 static int
1348 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1349                         u64 hpa, size_t size, int prot)
1350 {
1351         u64 start_pfn, end_pfn;
1352         struct dma_pte *pte;
1353         int index;
1354         int addr_width = agaw_to_width(domain->agaw);
1355
1356         hpa &= (((u64)1) << addr_width) - 1;
1357
1358         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1359                 return -EINVAL;
1360         iova &= PAGE_MASK;
1361         start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1362         end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1363         index = 0;
1364         while (start_pfn < end_pfn) {
1365                 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1366                 if (!pte)
1367                         return -ENOMEM;
1368                 /* We don't need lock here, nobody else
1369                  * touches the iova range
1370                  */
1371                 BUG_ON(dma_pte_addr(*pte));
1372                 dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
1373                 dma_set_pte_prot(*pte, prot);
1374                 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1375                 start_pfn++;
1376                 index++;
1377         }
1378         return 0;
1379 }
1380
1381 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1382 {
1383         clear_context_table(domain->iommu, bus, devfn);
1384         domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1385                                            DMA_CCMD_GLOBAL_INVL, 0);
1386         domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1387                                          DMA_TLB_GLOBAL_FLUSH, 0);
1388 }
1389
1390 static void domain_remove_dev_info(struct dmar_domain *domain)
1391 {
1392         struct device_domain_info *info;
1393         unsigned long flags;
1394
1395         spin_lock_irqsave(&device_domain_lock, flags);
1396         while (!list_empty(&domain->devices)) {
1397                 info = list_entry(domain->devices.next,
1398                         struct device_domain_info, link);
1399                 list_del(&info->link);
1400                 list_del(&info->global);
1401                 if (info->dev)
1402                         info->dev->dev.archdata.iommu = NULL;
1403                 spin_unlock_irqrestore(&device_domain_lock, flags);
1404
1405                 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1406                 free_devinfo_mem(info);
1407
1408                 spin_lock_irqsave(&device_domain_lock, flags);
1409         }
1410         spin_unlock_irqrestore(&device_domain_lock, flags);
1411 }
1412
1413 /*
1414  * find_domain
1415  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1416  */
1417 static struct dmar_domain *
1418 find_domain(struct pci_dev *pdev)
1419 {
1420         struct device_domain_info *info;
1421
1422         /* No lock here, assumes no domain exit in normal case */
1423         info = pdev->dev.archdata.iommu;
1424         if (info)
1425                 return info->domain;
1426         return NULL;
1427 }
1428
1429 /* domain is initialized */
1430 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1431 {
1432         struct dmar_domain *domain, *found = NULL;
1433         struct intel_iommu *iommu;
1434         struct dmar_drhd_unit *drhd;
1435         struct device_domain_info *info, *tmp;
1436         struct pci_dev *dev_tmp;
1437         unsigned long flags;
1438         int bus = 0, devfn = 0;
1439
1440         domain = find_domain(pdev);
1441         if (domain)
1442                 return domain;
1443
1444         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1445         if (dev_tmp) {
1446                 if (dev_tmp->is_pcie) {
1447                         bus = dev_tmp->subordinate->number;
1448                         devfn = 0;
1449                 } else {
1450                         bus = dev_tmp->bus->number;
1451                         devfn = dev_tmp->devfn;
1452                 }
1453                 spin_lock_irqsave(&device_domain_lock, flags);
1454                 list_for_each_entry(info, &device_domain_list, global) {
1455                         if (info->bus == bus && info->devfn == devfn) {
1456                                 found = info->domain;
1457                                 break;
1458                         }
1459                 }
1460                 spin_unlock_irqrestore(&device_domain_lock, flags);
1461                 /* pcie-pci bridge already has a domain, uses it */
1462                 if (found) {
1463                         domain = found;
1464                         goto found_domain;
1465                 }
1466         }
1467
1468         /* Allocate new domain for the device */
1469         drhd = dmar_find_matched_drhd_unit(pdev);
1470         if (!drhd) {
1471                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1472                         pci_name(pdev));
1473                 return NULL;
1474         }
1475         iommu = drhd->iommu;
1476
1477         domain = iommu_alloc_domain(iommu);
1478         if (!domain)
1479                 goto error;
1480
1481         if (domain_init(domain, gaw)) {
1482                 domain_exit(domain);
1483                 goto error;
1484         }
1485
1486         /* register pcie-to-pci device */
1487         if (dev_tmp) {
1488                 info = alloc_devinfo_mem();
1489                 if (!info) {
1490                         domain_exit(domain);
1491                         goto error;
1492                 }
1493                 info->bus = bus;
1494                 info->devfn = devfn;
1495                 info->dev = NULL;
1496                 info->domain = domain;
1497                 /* This domain is shared by devices under p2p bridge */
1498                 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1499
1500                 /* pcie-to-pci bridge already has a domain, uses it */
1501                 found = NULL;
1502                 spin_lock_irqsave(&device_domain_lock, flags);
1503                 list_for_each_entry(tmp, &device_domain_list, global) {
1504                         if (tmp->bus == bus && tmp->devfn == devfn) {
1505                                 found = tmp->domain;
1506                                 break;
1507                         }
1508                 }
1509                 if (found) {
1510                         free_devinfo_mem(info);
1511                         domain_exit(domain);
1512                         domain = found;
1513                 } else {
1514                         list_add(&info->link, &domain->devices);
1515                         list_add(&info->global, &device_domain_list);
1516                 }
1517                 spin_unlock_irqrestore(&device_domain_lock, flags);
1518         }
1519
1520 found_domain:
1521         info = alloc_devinfo_mem();
1522         if (!info)
1523                 goto error;
1524         info->bus = pdev->bus->number;
1525         info->devfn = pdev->devfn;
1526         info->dev = pdev;
1527         info->domain = domain;
1528         spin_lock_irqsave(&device_domain_lock, flags);
1529         /* somebody is fast */
1530         found = find_domain(pdev);
1531         if (found != NULL) {
1532                 spin_unlock_irqrestore(&device_domain_lock, flags);
1533                 if (found != domain) {
1534                         domain_exit(domain);
1535                         domain = found;
1536                 }
1537                 free_devinfo_mem(info);
1538                 return domain;
1539         }
1540         list_add(&info->link, &domain->devices);
1541         list_add(&info->global, &device_domain_list);
1542         pdev->dev.archdata.iommu = info;
1543         spin_unlock_irqrestore(&device_domain_lock, flags);
1544         return domain;
1545 error:
1546         /* recheck it here, maybe others set it */
1547         return find_domain(pdev);
1548 }
1549
1550 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1551                                       unsigned long long start,
1552                                       unsigned long long end)
1553 {
1554         struct dmar_domain *domain;
1555         unsigned long size;
1556         unsigned long long base;
1557         int ret;
1558
1559         printk(KERN_INFO
1560                 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1561                 pci_name(pdev), start, end);
1562         /* page table init */
1563         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1564         if (!domain)
1565                 return -ENOMEM;
1566
1567         /* The address might not be aligned */
1568         base = start & PAGE_MASK;
1569         size = end - base;
1570         size = PAGE_ALIGN(size);
1571         if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1572                         IOVA_PFN(base + size) - 1)) {
1573                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1574                 ret = -ENOMEM;
1575                 goto error;
1576         }
1577
1578         pr_debug("Mapping reserved region %lx@%llx for %s\n",
1579                 size, base, pci_name(pdev));
1580         /*
1581          * RMRR range might have overlap with physical memory range,
1582          * clear it first
1583          */
1584         dma_pte_clear_range(domain, base, base + size);
1585
1586         ret = domain_page_mapping(domain, base, base, size,
1587                 DMA_PTE_READ|DMA_PTE_WRITE);
1588         if (ret)
1589                 goto error;
1590
1591         /* context entry init */
1592         ret = domain_context_mapping(domain, pdev);
1593         if (!ret)
1594                 return 0;
1595 error:
1596         domain_exit(domain);
1597         return ret;
1598
1599 }
1600
1601 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1602         struct pci_dev *pdev)
1603 {
1604         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1605                 return 0;
1606         return iommu_prepare_identity_map(pdev, rmrr->base_address,
1607                 rmrr->end_address + 1);
1608 }
1609
1610 #ifdef CONFIG_DMAR_GFX_WA
1611 struct iommu_prepare_data {
1612         struct pci_dev *pdev;
1613         int ret;
1614 };
1615
1616 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1617                                          unsigned long end_pfn, void *datax)
1618 {
1619         struct iommu_prepare_data *data;
1620
1621         data = (struct iommu_prepare_data *)datax;
1622
1623         data->ret = iommu_prepare_identity_map(data->pdev,
1624                                 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1625         return data->ret;
1626
1627 }
1628
1629 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1630 {
1631         int nid;
1632         struct iommu_prepare_data data;
1633
1634         data.pdev = pdev;
1635         data.ret = 0;
1636
1637         for_each_online_node(nid) {
1638                 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1639                 if (data.ret)
1640                         return data.ret;
1641         }
1642         return data.ret;
1643 }
1644
1645 static void __init iommu_prepare_gfx_mapping(void)
1646 {
1647         struct pci_dev *pdev = NULL;
1648         int ret;
1649
1650         for_each_pci_dev(pdev) {
1651                 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1652                                 !IS_GFX_DEVICE(pdev))
1653                         continue;
1654                 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1655                         pci_name(pdev));
1656                 ret = iommu_prepare_with_active_regions(pdev);
1657                 if (ret)
1658                         printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1659         }
1660 }
1661 #endif
1662
1663 #ifdef CONFIG_DMAR_FLOPPY_WA
1664 static inline void iommu_prepare_isa(void)
1665 {
1666         struct pci_dev *pdev;
1667         int ret;
1668
1669         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1670         if (!pdev)
1671                 return;
1672
1673         printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1674         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1675
1676         if (ret)
1677                 printk("IOMMU: Failed to create 0-64M identity map, "
1678                         "floppy might not work\n");
1679
1680 }
1681 #else
1682 static inline void iommu_prepare_isa(void)
1683 {
1684         return;
1685 }
1686 #endif /* !CONFIG_DMAR_FLPY_WA */
1687
1688 static int __init init_dmars(void)
1689 {
1690         struct dmar_drhd_unit *drhd;
1691         struct dmar_rmrr_unit *rmrr;
1692         struct pci_dev *pdev;
1693         struct intel_iommu *iommu;
1694         int i, ret, unit = 0;
1695
1696         /*
1697          * for each drhd
1698          *    allocate root
1699          *    initialize and program root entry to not present
1700          * endfor
1701          */
1702         for_each_drhd_unit(drhd) {
1703                 g_num_of_iommus++;
1704                 /*
1705                  * lock not needed as this is only incremented in the single
1706                  * threaded kernel __init code path all other access are read
1707                  * only
1708                  */
1709         }
1710
1711         deferred_flush = kzalloc(g_num_of_iommus *
1712                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1713         if (!deferred_flush) {
1714                 ret = -ENOMEM;
1715                 goto error;
1716         }
1717
1718         for_each_drhd_unit(drhd) {
1719                 if (drhd->ignored)
1720                         continue;
1721
1722                 iommu = drhd->iommu;
1723
1724                 ret = iommu_init_domains(iommu);
1725                 if (ret)
1726                         goto error;
1727
1728                 /*
1729                  * TBD:
1730                  * we could share the same root & context tables
1731                  * amoung all IOMMU's. Need to Split it later.
1732                  */
1733                 ret = iommu_alloc_root_entry(iommu);
1734                 if (ret) {
1735                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1736                         goto error;
1737                 }
1738         }
1739
1740         for_each_drhd_unit(drhd) {
1741                 if (drhd->ignored)
1742                         continue;
1743
1744                 iommu = drhd->iommu;
1745                 if (dmar_enable_qi(iommu)) {
1746                         /*
1747                          * Queued Invalidate not enabled, use Register Based
1748                          * Invalidate
1749                          */
1750                         iommu->flush.flush_context = __iommu_flush_context;
1751                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1752                         printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1753                                "invalidation\n",
1754                                (unsigned long long)drhd->reg_base_addr);
1755                 } else {
1756                         iommu->flush.flush_context = qi_flush_context;
1757                         iommu->flush.flush_iotlb = qi_flush_iotlb;
1758                         printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1759                                "invalidation\n",
1760                                (unsigned long long)drhd->reg_base_addr);
1761                 }
1762         }
1763
1764         /*
1765          * For each rmrr
1766          *   for each dev attached to rmrr
1767          *   do
1768          *     locate drhd for dev, alloc domain for dev
1769          *     allocate free domain
1770          *     allocate page table entries for rmrr
1771          *     if context not allocated for bus
1772          *           allocate and init context
1773          *           set present in root table for this bus
1774          *     init context with domain, translation etc
1775          *    endfor
1776          * endfor
1777          */
1778         for_each_rmrr_units(rmrr) {
1779                 for (i = 0; i < rmrr->devices_cnt; i++) {
1780                         pdev = rmrr->devices[i];
1781                         /* some BIOS lists non-exist devices in DMAR table */
1782                         if (!pdev)
1783                                 continue;
1784                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1785                         if (ret)
1786                                 printk(KERN_ERR
1787                                  "IOMMU: mapping reserved region failed\n");
1788                 }
1789         }
1790
1791         iommu_prepare_gfx_mapping();
1792
1793         iommu_prepare_isa();
1794
1795         /*
1796          * for each drhd
1797          *   enable fault log
1798          *   global invalidate context cache
1799          *   global invalidate iotlb
1800          *   enable translation
1801          */
1802         for_each_drhd_unit(drhd) {
1803                 if (drhd->ignored)
1804                         continue;
1805                 iommu = drhd->iommu;
1806                 sprintf (iommu->name, "dmar%d", unit++);
1807
1808                 iommu_flush_write_buffer(iommu);
1809
1810                 ret = dmar_set_interrupt(iommu);
1811                 if (ret)
1812                         goto error;
1813
1814                 iommu_set_root_entry(iommu);
1815
1816                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1817                                            0);
1818                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1819                                          0);
1820                 iommu_disable_protect_mem_regions(iommu);
1821
1822                 ret = iommu_enable_translation(iommu);
1823                 if (ret)
1824                         goto error;
1825         }
1826
1827         return 0;
1828 error:
1829         for_each_drhd_unit(drhd) {
1830                 if (drhd->ignored)
1831                         continue;
1832                 iommu = drhd->iommu;
1833                 free_iommu(iommu);
1834         }
1835         return ret;
1836 }
1837
1838 static inline u64 aligned_size(u64 host_addr, size_t size)
1839 {
1840         u64 addr;
1841         addr = (host_addr & (~PAGE_MASK)) + size;
1842         return PAGE_ALIGN(addr);
1843 }
1844
1845 struct iova *
1846 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1847 {
1848         struct iova *piova;
1849
1850         /* Make sure it's in range */
1851         end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1852         if (!size || (IOVA_START_ADDR + size > end))
1853                 return NULL;
1854
1855         piova = alloc_iova(&domain->iovad,
1856                         size >> PAGE_SHIFT, IOVA_PFN(end), 1);
1857         return piova;
1858 }
1859
1860 static struct iova *
1861 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1862                    size_t size, u64 dma_mask)
1863 {
1864         struct pci_dev *pdev = to_pci_dev(dev);
1865         struct iova *iova = NULL;
1866
1867         if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1868                 iova = iommu_alloc_iova(domain, size, dma_mask);
1869         else {
1870                 /*
1871                  * First try to allocate an io virtual address in
1872                  * DMA_32BIT_MASK and if that fails then try allocating
1873                  * from higher range
1874                  */
1875                 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1876                 if (!iova)
1877                         iova = iommu_alloc_iova(domain, size, dma_mask);
1878         }
1879
1880         if (!iova) {
1881                 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1882                 return NULL;
1883         }
1884
1885         return iova;
1886 }
1887
1888 static struct dmar_domain *
1889 get_valid_domain_for_dev(struct pci_dev *pdev)
1890 {
1891         struct dmar_domain *domain;
1892         int ret;
1893
1894         domain = get_domain_for_dev(pdev,
1895                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
1896         if (!domain) {
1897                 printk(KERN_ERR
1898                         "Allocating domain for %s failed", pci_name(pdev));
1899                 return NULL;
1900         }
1901
1902         /* make sure context mapping is ok */
1903         if (unlikely(!domain_context_mapped(domain, pdev))) {
1904                 ret = domain_context_mapping(domain, pdev);
1905                 if (ret) {
1906                         printk(KERN_ERR
1907                                 "Domain context map for %s failed",
1908                                 pci_name(pdev));
1909                         return NULL;
1910                 }
1911         }
1912
1913         return domain;
1914 }
1915
1916 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1917                                      size_t size, int dir, u64 dma_mask)
1918 {
1919         struct pci_dev *pdev = to_pci_dev(hwdev);
1920         struct dmar_domain *domain;
1921         phys_addr_t start_paddr;
1922         struct iova *iova;
1923         int prot = 0;
1924         int ret;
1925
1926         BUG_ON(dir == DMA_NONE);
1927         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1928                 return paddr;
1929
1930         domain = get_valid_domain_for_dev(pdev);
1931         if (!domain)
1932                 return 0;
1933
1934         size = aligned_size((u64)paddr, size);
1935
1936         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
1937         if (!iova)
1938                 goto error;
1939
1940         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
1941
1942         /*
1943          * Check if DMAR supports zero-length reads on write only
1944          * mappings..
1945          */
1946         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1947                         !cap_zlr(domain->iommu->cap))
1948                 prot |= DMA_PTE_READ;
1949         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1950                 prot |= DMA_PTE_WRITE;
1951         /*
1952          * paddr - (paddr + size) might be partial page, we should map the whole
1953          * page.  Note: if two part of one page are separately mapped, we
1954          * might have two guest_addr mapping to the same host paddr, but this
1955          * is not a big problem
1956          */
1957         ret = domain_page_mapping(domain, start_paddr,
1958                 ((u64)paddr) & PAGE_MASK, size, prot);
1959         if (ret)
1960                 goto error;
1961
1962         /* it's a non-present to present mapping */
1963         ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1964                         start_paddr, size >> VTD_PAGE_SHIFT, 1);
1965         if (ret)
1966                 iommu_flush_write_buffer(domain->iommu);
1967
1968         return start_paddr + ((u64)paddr & (~PAGE_MASK));
1969
1970 error:
1971         if (iova)
1972                 __free_iova(&domain->iovad, iova);
1973         printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1974                 pci_name(pdev), size, (unsigned long long)paddr, dir);
1975         return 0;
1976 }
1977
1978 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
1979                             size_t size, int dir)
1980 {
1981         return __intel_map_single(hwdev, paddr, size, dir,
1982                                   to_pci_dev(hwdev)->dma_mask);
1983 }
1984
1985 static void flush_unmaps(void)
1986 {
1987         int i, j;
1988
1989         timer_on = 0;
1990
1991         /* just flush them all */
1992         for (i = 0; i < g_num_of_iommus; i++) {
1993                 if (deferred_flush[i].next) {
1994                         struct intel_iommu *iommu =
1995                                 deferred_flush[i].domain[0]->iommu;
1996
1997                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1998                                                  DMA_TLB_GLOBAL_FLUSH, 0);
1999                         for (j = 0; j < deferred_flush[i].next; j++) {
2000                                 __free_iova(&deferred_flush[i].domain[j]->iovad,
2001                                                 deferred_flush[i].iova[j]);
2002                         }
2003                         deferred_flush[i].next = 0;
2004                 }
2005         }
2006
2007         list_size = 0;
2008 }
2009
2010 static void flush_unmaps_timeout(unsigned long data)
2011 {
2012         unsigned long flags;
2013
2014         spin_lock_irqsave(&async_umap_flush_lock, flags);
2015         flush_unmaps();
2016         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2017 }
2018
2019 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2020 {
2021         unsigned long flags;
2022         int next, iommu_id;
2023
2024         spin_lock_irqsave(&async_umap_flush_lock, flags);
2025         if (list_size == HIGH_WATER_MARK)
2026                 flush_unmaps();
2027
2028         iommu_id = dom->iommu->seq_id;
2029
2030         next = deferred_flush[iommu_id].next;
2031         deferred_flush[iommu_id].domain[next] = dom;
2032         deferred_flush[iommu_id].iova[next] = iova;
2033         deferred_flush[iommu_id].next++;
2034
2035         if (!timer_on) {
2036                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2037                 timer_on = 1;
2038         }
2039         list_size++;
2040         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2041 }
2042
2043 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2044                         int dir)
2045 {
2046         struct pci_dev *pdev = to_pci_dev(dev);
2047         struct dmar_domain *domain;
2048         unsigned long start_addr;
2049         struct iova *iova;
2050
2051         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2052                 return;
2053         domain = find_domain(pdev);
2054         BUG_ON(!domain);
2055
2056         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2057         if (!iova)
2058                 return;
2059
2060         start_addr = iova->pfn_lo << PAGE_SHIFT;
2061         size = aligned_size((u64)dev_addr, size);
2062
2063         pr_debug("Device %s unmapping: %lx@%llx\n",
2064                 pci_name(pdev), size, (unsigned long long)start_addr);
2065
2066         /*  clear the whole page */
2067         dma_pte_clear_range(domain, start_addr, start_addr + size);
2068         /* free page tables */
2069         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2070         if (intel_iommu_strict) {
2071                 if (iommu_flush_iotlb_psi(domain->iommu,
2072                         domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2073                         iommu_flush_write_buffer(domain->iommu);
2074                 /* free iova */
2075                 __free_iova(&domain->iovad, iova);
2076         } else {
2077                 add_unmap(domain, iova);
2078                 /*
2079                  * queue up the release of the unmap to save the 1/6th of the
2080                  * cpu used up by the iotlb flush operation...
2081                  */
2082         }
2083 }
2084
2085 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2086                            dma_addr_t *dma_handle, gfp_t flags)
2087 {
2088         void *vaddr;
2089         int order;
2090
2091         size = PAGE_ALIGN(size);
2092         order = get_order(size);
2093         flags &= ~(GFP_DMA | GFP_DMA32);
2094
2095         vaddr = (void *)__get_free_pages(flags, order);
2096         if (!vaddr)
2097                 return NULL;
2098         memset(vaddr, 0, size);
2099
2100         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2101                                          DMA_BIDIRECTIONAL,
2102                                          hwdev->coherent_dma_mask);
2103         if (*dma_handle)
2104                 return vaddr;
2105         free_pages((unsigned long)vaddr, order);
2106         return NULL;
2107 }
2108
2109 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2110                          dma_addr_t dma_handle)
2111 {
2112         int order;
2113
2114         size = PAGE_ALIGN(size);
2115         order = get_order(size);
2116
2117         intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2118         free_pages((unsigned long)vaddr, order);
2119 }
2120
2121 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2122
2123 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2124                     int nelems, int dir)
2125 {
2126         int i;
2127         struct pci_dev *pdev = to_pci_dev(hwdev);
2128         struct dmar_domain *domain;
2129         unsigned long start_addr;
2130         struct iova *iova;
2131         size_t size = 0;
2132         void *addr;
2133         struct scatterlist *sg;
2134
2135         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2136                 return;
2137
2138         domain = find_domain(pdev);
2139
2140         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2141         if (!iova)
2142                 return;
2143         for_each_sg(sglist, sg, nelems, i) {
2144                 addr = SG_ENT_VIRT_ADDRESS(sg);
2145                 size += aligned_size((u64)addr, sg->length);
2146         }
2147
2148         start_addr = iova->pfn_lo << PAGE_SHIFT;
2149
2150         /*  clear the whole page */
2151         dma_pte_clear_range(domain, start_addr, start_addr + size);
2152         /* free page tables */
2153         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2154
2155         if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2156                         size >> VTD_PAGE_SHIFT, 0))
2157                 iommu_flush_write_buffer(domain->iommu);
2158
2159         /* free iova */
2160         __free_iova(&domain->iovad, iova);
2161 }
2162
2163 static int intel_nontranslate_map_sg(struct device *hddev,
2164         struct scatterlist *sglist, int nelems, int dir)
2165 {
2166         int i;
2167         struct scatterlist *sg;
2168
2169         for_each_sg(sglist, sg, nelems, i) {
2170                 BUG_ON(!sg_page(sg));
2171                 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2172                 sg->dma_length = sg->length;
2173         }
2174         return nelems;
2175 }
2176
2177 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2178                  int dir)
2179 {
2180         void *addr;
2181         int i;
2182         struct pci_dev *pdev = to_pci_dev(hwdev);
2183         struct dmar_domain *domain;
2184         size_t size = 0;
2185         int prot = 0;
2186         size_t offset = 0;
2187         struct iova *iova = NULL;
2188         int ret;
2189         struct scatterlist *sg;
2190         unsigned long start_addr;
2191
2192         BUG_ON(dir == DMA_NONE);
2193         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2194                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2195
2196         domain = get_valid_domain_for_dev(pdev);
2197         if (!domain)
2198                 return 0;
2199
2200         for_each_sg(sglist, sg, nelems, i) {
2201                 addr = SG_ENT_VIRT_ADDRESS(sg);
2202                 addr = (void *)virt_to_phys(addr);
2203                 size += aligned_size((u64)addr, sg->length);
2204         }
2205
2206         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2207         if (!iova) {
2208                 sglist->dma_length = 0;
2209                 return 0;
2210         }
2211
2212         /*
2213          * Check if DMAR supports zero-length reads on write only
2214          * mappings..
2215          */
2216         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2217                         !cap_zlr(domain->iommu->cap))
2218                 prot |= DMA_PTE_READ;
2219         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2220                 prot |= DMA_PTE_WRITE;
2221
2222         start_addr = iova->pfn_lo << PAGE_SHIFT;
2223         offset = 0;
2224         for_each_sg(sglist, sg, nelems, i) {
2225                 addr = SG_ENT_VIRT_ADDRESS(sg);
2226                 addr = (void *)virt_to_phys(addr);
2227                 size = aligned_size((u64)addr, sg->length);
2228                 ret = domain_page_mapping(domain, start_addr + offset,
2229                         ((u64)addr) & PAGE_MASK,
2230                         size, prot);
2231                 if (ret) {
2232                         /*  clear the page */
2233                         dma_pte_clear_range(domain, start_addr,
2234                                   start_addr + offset);
2235                         /* free page tables */
2236                         dma_pte_free_pagetable(domain, start_addr,
2237                                   start_addr + offset);
2238                         /* free iova */
2239                         __free_iova(&domain->iovad, iova);
2240                         return 0;
2241                 }
2242                 sg->dma_address = start_addr + offset +
2243                                 ((u64)addr & (~PAGE_MASK));
2244                 sg->dma_length = sg->length;
2245                 offset += size;
2246         }
2247
2248         /* it's a non-present to present mapping */
2249         if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2250                         start_addr, offset >> VTD_PAGE_SHIFT, 1))
2251                 iommu_flush_write_buffer(domain->iommu);
2252         return nelems;
2253 }
2254
2255 static struct dma_mapping_ops intel_dma_ops = {
2256         .alloc_coherent = intel_alloc_coherent,
2257         .free_coherent = intel_free_coherent,
2258         .map_single = intel_map_single,
2259         .unmap_single = intel_unmap_single,
2260         .map_sg = intel_map_sg,
2261         .unmap_sg = intel_unmap_sg,
2262 };
2263
2264 static inline int iommu_domain_cache_init(void)
2265 {
2266         int ret = 0;
2267
2268         iommu_domain_cache = kmem_cache_create("iommu_domain",
2269                                          sizeof(struct dmar_domain),
2270                                          0,
2271                                          SLAB_HWCACHE_ALIGN,
2272
2273                                          NULL);
2274         if (!iommu_domain_cache) {
2275                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2276                 ret = -ENOMEM;
2277         }
2278
2279         return ret;
2280 }
2281
2282 static inline int iommu_devinfo_cache_init(void)
2283 {
2284         int ret = 0;
2285
2286         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2287                                          sizeof(struct device_domain_info),
2288                                          0,
2289                                          SLAB_HWCACHE_ALIGN,
2290                                          NULL);
2291         if (!iommu_devinfo_cache) {
2292                 printk(KERN_ERR "Couldn't create devinfo cache\n");
2293                 ret = -ENOMEM;
2294         }
2295
2296         return ret;
2297 }
2298
2299 static inline int iommu_iova_cache_init(void)
2300 {
2301         int ret = 0;
2302
2303         iommu_iova_cache = kmem_cache_create("iommu_iova",
2304                                          sizeof(struct iova),
2305                                          0,
2306                                          SLAB_HWCACHE_ALIGN,
2307                                          NULL);
2308         if (!iommu_iova_cache) {
2309                 printk(KERN_ERR "Couldn't create iova cache\n");
2310                 ret = -ENOMEM;
2311         }
2312
2313         return ret;
2314 }
2315
2316 static int __init iommu_init_mempool(void)
2317 {
2318         int ret;
2319         ret = iommu_iova_cache_init();
2320         if (ret)
2321                 return ret;
2322
2323         ret = iommu_domain_cache_init();
2324         if (ret)
2325                 goto domain_error;
2326
2327         ret = iommu_devinfo_cache_init();
2328         if (!ret)
2329                 return ret;
2330
2331         kmem_cache_destroy(iommu_domain_cache);
2332 domain_error:
2333         kmem_cache_destroy(iommu_iova_cache);
2334
2335         return -ENOMEM;
2336 }
2337
2338 static void __init iommu_exit_mempool(void)
2339 {
2340         kmem_cache_destroy(iommu_devinfo_cache);
2341         kmem_cache_destroy(iommu_domain_cache);
2342         kmem_cache_destroy(iommu_iova_cache);
2343
2344 }
2345
2346 static void __init init_no_remapping_devices(void)
2347 {
2348         struct dmar_drhd_unit *drhd;
2349
2350         for_each_drhd_unit(drhd) {
2351                 if (!drhd->include_all) {
2352                         int i;
2353                         for (i = 0; i < drhd->devices_cnt; i++)
2354                                 if (drhd->devices[i] != NULL)
2355                                         break;
2356                         /* ignore DMAR unit if no pci devices exist */
2357                         if (i == drhd->devices_cnt)
2358                                 drhd->ignored = 1;
2359                 }
2360         }
2361
2362         if (dmar_map_gfx)
2363                 return;
2364
2365         for_each_drhd_unit(drhd) {
2366                 int i;
2367                 if (drhd->ignored || drhd->include_all)
2368                         continue;
2369
2370                 for (i = 0; i < drhd->devices_cnt; i++)
2371                         if (drhd->devices[i] &&
2372                                 !IS_GFX_DEVICE(drhd->devices[i]))
2373                                 break;
2374
2375                 if (i < drhd->devices_cnt)
2376                         continue;
2377
2378                 /* bypass IOMMU if it is just for gfx devices */
2379                 drhd->ignored = 1;
2380                 for (i = 0; i < drhd->devices_cnt; i++) {
2381                         if (!drhd->devices[i])
2382                                 continue;
2383                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2384                 }
2385         }
2386 }
2387
2388 int __init intel_iommu_init(void)
2389 {
2390         int ret = 0;
2391
2392         if (dmar_table_init())
2393                 return  -ENODEV;
2394
2395         if (dmar_dev_scope_init())
2396                 return  -ENODEV;
2397
2398         /*
2399          * Check the need for DMA-remapping initialization now.
2400          * Above initialization will also be used by Interrupt-remapping.
2401          */
2402         if (no_iommu || swiotlb || dmar_disabled)
2403                 return -ENODEV;
2404
2405         iommu_init_mempool();
2406         dmar_init_reserved_ranges();
2407
2408         init_no_remapping_devices();
2409
2410         ret = init_dmars();
2411         if (ret) {
2412                 printk(KERN_ERR "IOMMU: dmar init failed\n");
2413                 put_iova_domain(&reserved_iova_list);
2414                 iommu_exit_mempool();
2415                 return ret;
2416         }
2417         printk(KERN_INFO
2418         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2419
2420         init_timer(&unmap_timer);
2421         force_iommu = 1;
2422         dma_ops = &intel_dma_ops;
2423         return 0;
2424 }
2425
2426 void intel_iommu_domain_exit(struct dmar_domain *domain)
2427 {
2428         u64 end;
2429
2430         /* Domain 0 is reserved, so dont process it */
2431         if (!domain)
2432                 return;
2433
2434         end = DOMAIN_MAX_ADDR(domain->gaw);
2435         end = end & (~VTD_PAGE_MASK);
2436
2437         /* clear ptes */
2438         dma_pte_clear_range(domain, 0, end);
2439
2440         /* free page tables */
2441         dma_pte_free_pagetable(domain, 0, end);
2442
2443         iommu_free_domain(domain);
2444         free_domain_mem(domain);
2445 }
2446 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2447
2448 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2449 {
2450         struct dmar_drhd_unit *drhd;
2451         struct dmar_domain *domain;
2452         struct intel_iommu *iommu;
2453
2454         drhd = dmar_find_matched_drhd_unit(pdev);
2455         if (!drhd) {
2456                 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2457                 return NULL;
2458         }
2459
2460         iommu = drhd->iommu;
2461         if (!iommu) {
2462                 printk(KERN_ERR
2463                         "intel_iommu_domain_alloc: iommu == NULL\n");
2464                 return NULL;
2465         }
2466         domain = iommu_alloc_domain(iommu);
2467         if (!domain) {
2468                 printk(KERN_ERR
2469                         "intel_iommu_domain_alloc: domain == NULL\n");
2470                 return NULL;
2471         }
2472         if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2473                 printk(KERN_ERR
2474                         "intel_iommu_domain_alloc: domain_init() failed\n");
2475                 intel_iommu_domain_exit(domain);
2476                 return NULL;
2477         }
2478         return domain;
2479 }
2480 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2481
2482 int intel_iommu_context_mapping(
2483         struct dmar_domain *domain, struct pci_dev *pdev)
2484 {
2485         int rc;
2486         rc = domain_context_mapping(domain, pdev);
2487         return rc;
2488 }
2489 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2490
2491 int intel_iommu_page_mapping(
2492         struct dmar_domain *domain, dma_addr_t iova,
2493         u64 hpa, size_t size, int prot)
2494 {
2495         int rc;
2496         rc = domain_page_mapping(domain, iova, hpa, size, prot);
2497         return rc;
2498 }
2499 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2500
2501 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2502 {
2503         detach_domain_for_dev(domain, bus, devfn);
2504 }
2505 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2506
2507 struct dmar_domain *
2508 intel_iommu_find_domain(struct pci_dev *pdev)
2509 {
2510         return find_domain(pdev);
2511 }
2512 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2513
2514 int intel_iommu_found(void)
2515 {
2516         return g_num_of_iommus;
2517 }
2518 EXPORT_SYMBOL_GPL(intel_iommu_found);
2519
2520 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2521 {
2522         struct dma_pte *pte;
2523         u64 pfn;
2524
2525         pfn = 0;
2526         pte = addr_to_dma_pte(domain, iova);
2527
2528         if (pte)
2529                 pfn = dma_pte_addr(*pte);
2530
2531         return pfn >> VTD_PAGE_SHIFT;
2532 }
2533 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);