Add/remove domain device info for virtual machine domain
[firefly-linux-kernel-4.4.55.git] / drivers / pci / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
40 #include "pci.h"
41
42 #define ROOT_SIZE               VTD_PAGE_SIZE
43 #define CONTEXT_SIZE            VTD_PAGE_SIZE
44
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48 #define IOAPIC_RANGE_START      (0xfee00000)
49 #define IOAPIC_RANGE_END        (0xfeefffff)
50 #define IOVA_START_ADDR         (0x1000)
51
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
56 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN           IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN           IOVA_PFN(DMA_64BIT_MASK)
59
60 /* global iommu list, set NULL for ignored DMAR units */
61 static struct intel_iommu **g_iommus;
62
63 /*
64  * 0: Present
65  * 1-11: Reserved
66  * 12-63: Context Ptr (12 - (haw-1))
67  * 64-127: Reserved
68  */
69 struct root_entry {
70         u64     val;
71         u64     rsvd1;
72 };
73 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74 static inline bool root_present(struct root_entry *root)
75 {
76         return (root->val & 1);
77 }
78 static inline void set_root_present(struct root_entry *root)
79 {
80         root->val |= 1;
81 }
82 static inline void set_root_value(struct root_entry *root, unsigned long value)
83 {
84         root->val |= value & VTD_PAGE_MASK;
85 }
86
87 static inline struct context_entry *
88 get_context_addr_from_root(struct root_entry *root)
89 {
90         return (struct context_entry *)
91                 (root_present(root)?phys_to_virt(
92                 root->val & VTD_PAGE_MASK) :
93                 NULL);
94 }
95
96 /*
97  * low 64 bits:
98  * 0: present
99  * 1: fault processing disable
100  * 2-3: translation type
101  * 12-63: address space root
102  * high 64 bits:
103  * 0-2: address width
104  * 3-6: aval
105  * 8-23: domain id
106  */
107 struct context_entry {
108         u64 lo;
109         u64 hi;
110 };
111
112 static inline bool context_present(struct context_entry *context)
113 {
114         return (context->lo & 1);
115 }
116 static inline void context_set_present(struct context_entry *context)
117 {
118         context->lo |= 1;
119 }
120
121 static inline void context_set_fault_enable(struct context_entry *context)
122 {
123         context->lo &= (((u64)-1) << 2) | 1;
124 }
125
126 #define CONTEXT_TT_MULTI_LEVEL 0
127
128 static inline void context_set_translation_type(struct context_entry *context,
129                                                 unsigned long value)
130 {
131         context->lo &= (((u64)-1) << 4) | 3;
132         context->lo |= (value & 3) << 2;
133 }
134
135 static inline void context_set_address_root(struct context_entry *context,
136                                             unsigned long value)
137 {
138         context->lo |= value & VTD_PAGE_MASK;
139 }
140
141 static inline void context_set_address_width(struct context_entry *context,
142                                              unsigned long value)
143 {
144         context->hi |= value & 7;
145 }
146
147 static inline void context_set_domain_id(struct context_entry *context,
148                                          unsigned long value)
149 {
150         context->hi |= (value & ((1 << 16) - 1)) << 8;
151 }
152
153 static inline void context_clear_entry(struct context_entry *context)
154 {
155         context->lo = 0;
156         context->hi = 0;
157 }
158
159 /*
160  * 0: readable
161  * 1: writable
162  * 2-6: reserved
163  * 7: super page
164  * 8-11: available
165  * 12-63: Host physcial address
166  */
167 struct dma_pte {
168         u64 val;
169 };
170
171 static inline void dma_clear_pte(struct dma_pte *pte)
172 {
173         pte->val = 0;
174 }
175
176 static inline void dma_set_pte_readable(struct dma_pte *pte)
177 {
178         pte->val |= DMA_PTE_READ;
179 }
180
181 static inline void dma_set_pte_writable(struct dma_pte *pte)
182 {
183         pte->val |= DMA_PTE_WRITE;
184 }
185
186 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187 {
188         pte->val = (pte->val & ~3) | (prot & 3);
189 }
190
191 static inline u64 dma_pte_addr(struct dma_pte *pte)
192 {
193         return (pte->val & VTD_PAGE_MASK);
194 }
195
196 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197 {
198         pte->val |= (addr & VTD_PAGE_MASK);
199 }
200
201 static inline bool dma_pte_present(struct dma_pte *pte)
202 {
203         return (pte->val & 3) != 0;
204 }
205
206 /* devices under the same p2p bridge are owned in one domain */
207 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
209 /* domain represents a virtual machine, more than one devices
210  * across iommus may be owned in one domain, e.g. kvm guest.
211  */
212 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
213
214 struct dmar_domain {
215         int     id;                     /* domain id */
216         unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
217
218         struct list_head devices;       /* all devices' list */
219         struct iova_domain iovad;       /* iova's that belong to this domain */
220
221         struct dma_pte  *pgd;           /* virtual address */
222         spinlock_t      mapping_lock;   /* page table lock */
223         int             gaw;            /* max guest address width */
224
225         /* adjusted guest address width, 0 is level 2 30-bit */
226         int             agaw;
227
228         int             flags;          /* flags to find out type of domain */
229
230         int             iommu_coherency;/* indicate coherency of iommu access */
231         int             iommu_count;    /* reference count of iommu */
232         spinlock_t      iommu_lock;     /* protect iommu set in domain */
233 };
234
235 /* PCI domain-device relationship */
236 struct device_domain_info {
237         struct list_head link;  /* link to domain siblings */
238         struct list_head global; /* link to global list */
239         u8 bus;                 /* PCI bus numer */
240         u8 devfn;               /* PCI devfn number */
241         struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
242         struct dmar_domain *domain; /* pointer to domain */
243 };
244
245 static void flush_unmaps_timeout(unsigned long data);
246
247 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
248
249 #define HIGH_WATER_MARK 250
250 struct deferred_flush_tables {
251         int next;
252         struct iova *iova[HIGH_WATER_MARK];
253         struct dmar_domain *domain[HIGH_WATER_MARK];
254 };
255
256 static struct deferred_flush_tables *deferred_flush;
257
258 /* bitmap for indexing intel_iommus */
259 static int g_num_of_iommus;
260
261 static DEFINE_SPINLOCK(async_umap_flush_lock);
262 static LIST_HEAD(unmaps_to_do);
263
264 static int timer_on;
265 static long list_size;
266
267 static void domain_remove_dev_info(struct dmar_domain *domain);
268
269 int dmar_disabled;
270 static int __initdata dmar_map_gfx = 1;
271 static int dmar_forcedac;
272 static int intel_iommu_strict;
273
274 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
275 static DEFINE_SPINLOCK(device_domain_lock);
276 static LIST_HEAD(device_domain_list);
277
278 static int __init intel_iommu_setup(char *str)
279 {
280         if (!str)
281                 return -EINVAL;
282         while (*str) {
283                 if (!strncmp(str, "off", 3)) {
284                         dmar_disabled = 1;
285                         printk(KERN_INFO"Intel-IOMMU: disabled\n");
286                 } else if (!strncmp(str, "igfx_off", 8)) {
287                         dmar_map_gfx = 0;
288                         printk(KERN_INFO
289                                 "Intel-IOMMU: disable GFX device mapping\n");
290                 } else if (!strncmp(str, "forcedac", 8)) {
291                         printk(KERN_INFO
292                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
293                         dmar_forcedac = 1;
294                 } else if (!strncmp(str, "strict", 6)) {
295                         printk(KERN_INFO
296                                 "Intel-IOMMU: disable batched IOTLB flush\n");
297                         intel_iommu_strict = 1;
298                 }
299
300                 str += strcspn(str, ",");
301                 while (*str == ',')
302                         str++;
303         }
304         return 0;
305 }
306 __setup("intel_iommu=", intel_iommu_setup);
307
308 static struct kmem_cache *iommu_domain_cache;
309 static struct kmem_cache *iommu_devinfo_cache;
310 static struct kmem_cache *iommu_iova_cache;
311
312 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
313 {
314         unsigned int flags;
315         void *vaddr;
316
317         /* trying to avoid low memory issues */
318         flags = current->flags & PF_MEMALLOC;
319         current->flags |= PF_MEMALLOC;
320         vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
321         current->flags &= (~PF_MEMALLOC | flags);
322         return vaddr;
323 }
324
325
326 static inline void *alloc_pgtable_page(void)
327 {
328         unsigned int flags;
329         void *vaddr;
330
331         /* trying to avoid low memory issues */
332         flags = current->flags & PF_MEMALLOC;
333         current->flags |= PF_MEMALLOC;
334         vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
335         current->flags &= (~PF_MEMALLOC | flags);
336         return vaddr;
337 }
338
339 static inline void free_pgtable_page(void *vaddr)
340 {
341         free_page((unsigned long)vaddr);
342 }
343
344 static inline void *alloc_domain_mem(void)
345 {
346         return iommu_kmem_cache_alloc(iommu_domain_cache);
347 }
348
349 static void free_domain_mem(void *vaddr)
350 {
351         kmem_cache_free(iommu_domain_cache, vaddr);
352 }
353
354 static inline void * alloc_devinfo_mem(void)
355 {
356         return iommu_kmem_cache_alloc(iommu_devinfo_cache);
357 }
358
359 static inline void free_devinfo_mem(void *vaddr)
360 {
361         kmem_cache_free(iommu_devinfo_cache, vaddr);
362 }
363
364 struct iova *alloc_iova_mem(void)
365 {
366         return iommu_kmem_cache_alloc(iommu_iova_cache);
367 }
368
369 void free_iova_mem(struct iova *iova)
370 {
371         kmem_cache_free(iommu_iova_cache, iova);
372 }
373
374
375 static inline int width_to_agaw(int width);
376
377 /* calculate agaw for each iommu.
378  * "SAGAW" may be different across iommus, use a default agaw, and
379  * get a supported less agaw for iommus that don't support the default agaw.
380  */
381 int iommu_calculate_agaw(struct intel_iommu *iommu)
382 {
383         unsigned long sagaw;
384         int agaw = -1;
385
386         sagaw = cap_sagaw(iommu->cap);
387         for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
388              agaw >= 0; agaw--) {
389                 if (test_bit(agaw, &sagaw))
390                         break;
391         }
392
393         return agaw;
394 }
395
396 /* in native case, each domain is related to only one iommu */
397 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
398 {
399         int iommu_id;
400
401         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
402
403         iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
404         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
405                 return NULL;
406
407         return g_iommus[iommu_id];
408 }
409
410 /* "Coherency" capability may be different across iommus */
411 static void domain_update_iommu_coherency(struct dmar_domain *domain)
412 {
413         int i;
414
415         domain->iommu_coherency = 1;
416
417         i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
418         for (; i < g_num_of_iommus; ) {
419                 if (!ecap_coherent(g_iommus[i]->ecap)) {
420                         domain->iommu_coherency = 0;
421                         break;
422                 }
423                 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
424         }
425 }
426
427 static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
428 {
429         struct dmar_drhd_unit *drhd = NULL;
430         int i;
431
432         for_each_drhd_unit(drhd) {
433                 if (drhd->ignored)
434                         continue;
435
436                 for (i = 0; i < drhd->devices_cnt; i++)
437                         if (drhd->devices[i]->bus->number == bus &&
438                             drhd->devices[i]->devfn == devfn)
439                                 return drhd->iommu;
440
441                 if (drhd->include_all)
442                         return drhd->iommu;
443         }
444
445         return NULL;
446 }
447
448 /* Gets context entry for a given bus and devfn */
449 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
450                 u8 bus, u8 devfn)
451 {
452         struct root_entry *root;
453         struct context_entry *context;
454         unsigned long phy_addr;
455         unsigned long flags;
456
457         spin_lock_irqsave(&iommu->lock, flags);
458         root = &iommu->root_entry[bus];
459         context = get_context_addr_from_root(root);
460         if (!context) {
461                 context = (struct context_entry *)alloc_pgtable_page();
462                 if (!context) {
463                         spin_unlock_irqrestore(&iommu->lock, flags);
464                         return NULL;
465                 }
466                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
467                 phy_addr = virt_to_phys((void *)context);
468                 set_root_value(root, phy_addr);
469                 set_root_present(root);
470                 __iommu_flush_cache(iommu, root, sizeof(*root));
471         }
472         spin_unlock_irqrestore(&iommu->lock, flags);
473         return &context[devfn];
474 }
475
476 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
477 {
478         struct root_entry *root;
479         struct context_entry *context;
480         int ret;
481         unsigned long flags;
482
483         spin_lock_irqsave(&iommu->lock, flags);
484         root = &iommu->root_entry[bus];
485         context = get_context_addr_from_root(root);
486         if (!context) {
487                 ret = 0;
488                 goto out;
489         }
490         ret = context_present(&context[devfn]);
491 out:
492         spin_unlock_irqrestore(&iommu->lock, flags);
493         return ret;
494 }
495
496 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
497 {
498         struct root_entry *root;
499         struct context_entry *context;
500         unsigned long flags;
501
502         spin_lock_irqsave(&iommu->lock, flags);
503         root = &iommu->root_entry[bus];
504         context = get_context_addr_from_root(root);
505         if (context) {
506                 context_clear_entry(&context[devfn]);
507                 __iommu_flush_cache(iommu, &context[devfn], \
508                         sizeof(*context));
509         }
510         spin_unlock_irqrestore(&iommu->lock, flags);
511 }
512
513 static void free_context_table(struct intel_iommu *iommu)
514 {
515         struct root_entry *root;
516         int i;
517         unsigned long flags;
518         struct context_entry *context;
519
520         spin_lock_irqsave(&iommu->lock, flags);
521         if (!iommu->root_entry) {
522                 goto out;
523         }
524         for (i = 0; i < ROOT_ENTRY_NR; i++) {
525                 root = &iommu->root_entry[i];
526                 context = get_context_addr_from_root(root);
527                 if (context)
528                         free_pgtable_page(context);
529         }
530         free_pgtable_page(iommu->root_entry);
531         iommu->root_entry = NULL;
532 out:
533         spin_unlock_irqrestore(&iommu->lock, flags);
534 }
535
536 /* page table handling */
537 #define LEVEL_STRIDE            (9)
538 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
539
540 static inline int agaw_to_level(int agaw)
541 {
542         return agaw + 2;
543 }
544
545 static inline int agaw_to_width(int agaw)
546 {
547         return 30 + agaw * LEVEL_STRIDE;
548
549 }
550
551 static inline int width_to_agaw(int width)
552 {
553         return (width - 30) / LEVEL_STRIDE;
554 }
555
556 static inline unsigned int level_to_offset_bits(int level)
557 {
558         return (12 + (level - 1) * LEVEL_STRIDE);
559 }
560
561 static inline int address_level_offset(u64 addr, int level)
562 {
563         return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
564 }
565
566 static inline u64 level_mask(int level)
567 {
568         return ((u64)-1 << level_to_offset_bits(level));
569 }
570
571 static inline u64 level_size(int level)
572 {
573         return ((u64)1 << level_to_offset_bits(level));
574 }
575
576 static inline u64 align_to_level(u64 addr, int level)
577 {
578         return ((addr + level_size(level) - 1) & level_mask(level));
579 }
580
581 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
582 {
583         int addr_width = agaw_to_width(domain->agaw);
584         struct dma_pte *parent, *pte = NULL;
585         int level = agaw_to_level(domain->agaw);
586         int offset;
587         unsigned long flags;
588         struct intel_iommu *iommu = domain_get_iommu(domain);
589
590         BUG_ON(!domain->pgd);
591
592         addr &= (((u64)1) << addr_width) - 1;
593         parent = domain->pgd;
594
595         spin_lock_irqsave(&domain->mapping_lock, flags);
596         while (level > 0) {
597                 void *tmp_page;
598
599                 offset = address_level_offset(addr, level);
600                 pte = &parent[offset];
601                 if (level == 1)
602                         break;
603
604                 if (!dma_pte_present(pte)) {
605                         tmp_page = alloc_pgtable_page();
606
607                         if (!tmp_page) {
608                                 spin_unlock_irqrestore(&domain->mapping_lock,
609                                         flags);
610                                 return NULL;
611                         }
612                         __iommu_flush_cache(iommu, tmp_page,
613                                         PAGE_SIZE);
614                         dma_set_pte_addr(pte, virt_to_phys(tmp_page));
615                         /*
616                          * high level table always sets r/w, last level page
617                          * table control read/write
618                          */
619                         dma_set_pte_readable(pte);
620                         dma_set_pte_writable(pte);
621                         __iommu_flush_cache(iommu, pte, sizeof(*pte));
622                 }
623                 parent = phys_to_virt(dma_pte_addr(pte));
624                 level--;
625         }
626
627         spin_unlock_irqrestore(&domain->mapping_lock, flags);
628         return pte;
629 }
630
631 /* return address's pte at specific level */
632 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
633                 int level)
634 {
635         struct dma_pte *parent, *pte = NULL;
636         int total = agaw_to_level(domain->agaw);
637         int offset;
638
639         parent = domain->pgd;
640         while (level <= total) {
641                 offset = address_level_offset(addr, total);
642                 pte = &parent[offset];
643                 if (level == total)
644                         return pte;
645
646                 if (!dma_pte_present(pte))
647                         break;
648                 parent = phys_to_virt(dma_pte_addr(pte));
649                 total--;
650         }
651         return NULL;
652 }
653
654 /* clear one page's page table */
655 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
656 {
657         struct dma_pte *pte = NULL;
658         struct intel_iommu *iommu = domain_get_iommu(domain);
659
660         /* get last level pte */
661         pte = dma_addr_level_pte(domain, addr, 1);
662
663         if (pte) {
664                 dma_clear_pte(pte);
665                 __iommu_flush_cache(iommu, pte, sizeof(*pte));
666         }
667 }
668
669 /* clear last level pte, a tlb flush should be followed */
670 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
671 {
672         int addr_width = agaw_to_width(domain->agaw);
673
674         start &= (((u64)1) << addr_width) - 1;
675         end &= (((u64)1) << addr_width) - 1;
676         /* in case it's partial page */
677         start = PAGE_ALIGN(start);
678         end &= PAGE_MASK;
679
680         /* we don't need lock here, nobody else touches the iova range */
681         while (start < end) {
682                 dma_pte_clear_one(domain, start);
683                 start += VTD_PAGE_SIZE;
684         }
685 }
686
687 /* free page table pages. last level pte should already be cleared */
688 static void dma_pte_free_pagetable(struct dmar_domain *domain,
689         u64 start, u64 end)
690 {
691         int addr_width = agaw_to_width(domain->agaw);
692         struct dma_pte *pte;
693         int total = agaw_to_level(domain->agaw);
694         int level;
695         u64 tmp;
696         struct intel_iommu *iommu = domain_get_iommu(domain);
697
698         start &= (((u64)1) << addr_width) - 1;
699         end &= (((u64)1) << addr_width) - 1;
700
701         /* we don't need lock here, nobody else touches the iova range */
702         level = 2;
703         while (level <= total) {
704                 tmp = align_to_level(start, level);
705                 if (tmp >= end || (tmp + level_size(level) > end))
706                         return;
707
708                 while (tmp < end) {
709                         pte = dma_addr_level_pte(domain, tmp, level);
710                         if (pte) {
711                                 free_pgtable_page(
712                                         phys_to_virt(dma_pte_addr(pte)));
713                                 dma_clear_pte(pte);
714                                 __iommu_flush_cache(iommu,
715                                                 pte, sizeof(*pte));
716                         }
717                         tmp += level_size(level);
718                 }
719                 level++;
720         }
721         /* free pgd */
722         if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
723                 free_pgtable_page(domain->pgd);
724                 domain->pgd = NULL;
725         }
726 }
727
728 /* iommu handling */
729 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
730 {
731         struct root_entry *root;
732         unsigned long flags;
733
734         root = (struct root_entry *)alloc_pgtable_page();
735         if (!root)
736                 return -ENOMEM;
737
738         __iommu_flush_cache(iommu, root, ROOT_SIZE);
739
740         spin_lock_irqsave(&iommu->lock, flags);
741         iommu->root_entry = root;
742         spin_unlock_irqrestore(&iommu->lock, flags);
743
744         return 0;
745 }
746
747 static void iommu_set_root_entry(struct intel_iommu *iommu)
748 {
749         void *addr;
750         u32 cmd, sts;
751         unsigned long flag;
752
753         addr = iommu->root_entry;
754
755         spin_lock_irqsave(&iommu->register_lock, flag);
756         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
757
758         cmd = iommu->gcmd | DMA_GCMD_SRTP;
759         writel(cmd, iommu->reg + DMAR_GCMD_REG);
760
761         /* Make sure hardware complete it */
762         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
763                 readl, (sts & DMA_GSTS_RTPS), sts);
764
765         spin_unlock_irqrestore(&iommu->register_lock, flag);
766 }
767
768 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
769 {
770         u32 val;
771         unsigned long flag;
772
773         if (!cap_rwbf(iommu->cap))
774                 return;
775         val = iommu->gcmd | DMA_GCMD_WBF;
776
777         spin_lock_irqsave(&iommu->register_lock, flag);
778         writel(val, iommu->reg + DMAR_GCMD_REG);
779
780         /* Make sure hardware complete it */
781         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
782                         readl, (!(val & DMA_GSTS_WBFS)), val);
783
784         spin_unlock_irqrestore(&iommu->register_lock, flag);
785 }
786
787 /* return value determine if we need a write buffer flush */
788 static int __iommu_flush_context(struct intel_iommu *iommu,
789         u16 did, u16 source_id, u8 function_mask, u64 type,
790         int non_present_entry_flush)
791 {
792         u64 val = 0;
793         unsigned long flag;
794
795         /*
796          * In the non-present entry flush case, if hardware doesn't cache
797          * non-present entry we do nothing and if hardware cache non-present
798          * entry, we flush entries of domain 0 (the domain id is used to cache
799          * any non-present entries)
800          */
801         if (non_present_entry_flush) {
802                 if (!cap_caching_mode(iommu->cap))
803                         return 1;
804                 else
805                         did = 0;
806         }
807
808         switch (type) {
809         case DMA_CCMD_GLOBAL_INVL:
810                 val = DMA_CCMD_GLOBAL_INVL;
811                 break;
812         case DMA_CCMD_DOMAIN_INVL:
813                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
814                 break;
815         case DMA_CCMD_DEVICE_INVL:
816                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
817                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
818                 break;
819         default:
820                 BUG();
821         }
822         val |= DMA_CCMD_ICC;
823
824         spin_lock_irqsave(&iommu->register_lock, flag);
825         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
826
827         /* Make sure hardware complete it */
828         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
829                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
830
831         spin_unlock_irqrestore(&iommu->register_lock, flag);
832
833         /* flush context entry will implicitly flush write buffer */
834         return 0;
835 }
836
837 /* return value determine if we need a write buffer flush */
838 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
839         u64 addr, unsigned int size_order, u64 type,
840         int non_present_entry_flush)
841 {
842         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
843         u64 val = 0, val_iva = 0;
844         unsigned long flag;
845
846         /*
847          * In the non-present entry flush case, if hardware doesn't cache
848          * non-present entry we do nothing and if hardware cache non-present
849          * entry, we flush entries of domain 0 (the domain id is used to cache
850          * any non-present entries)
851          */
852         if (non_present_entry_flush) {
853                 if (!cap_caching_mode(iommu->cap))
854                         return 1;
855                 else
856                         did = 0;
857         }
858
859         switch (type) {
860         case DMA_TLB_GLOBAL_FLUSH:
861                 /* global flush doesn't need set IVA_REG */
862                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
863                 break;
864         case DMA_TLB_DSI_FLUSH:
865                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
866                 break;
867         case DMA_TLB_PSI_FLUSH:
868                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
869                 /* Note: always flush non-leaf currently */
870                 val_iva = size_order | addr;
871                 break;
872         default:
873                 BUG();
874         }
875         /* Note: set drain read/write */
876 #if 0
877         /*
878          * This is probably to be super secure.. Looks like we can
879          * ignore it without any impact.
880          */
881         if (cap_read_drain(iommu->cap))
882                 val |= DMA_TLB_READ_DRAIN;
883 #endif
884         if (cap_write_drain(iommu->cap))
885                 val |= DMA_TLB_WRITE_DRAIN;
886
887         spin_lock_irqsave(&iommu->register_lock, flag);
888         /* Note: Only uses first TLB reg currently */
889         if (val_iva)
890                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
891         dmar_writeq(iommu->reg + tlb_offset + 8, val);
892
893         /* Make sure hardware complete it */
894         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
895                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
896
897         spin_unlock_irqrestore(&iommu->register_lock, flag);
898
899         /* check IOTLB invalidation granularity */
900         if (DMA_TLB_IAIG(val) == 0)
901                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
902         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
903                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
904                         (unsigned long long)DMA_TLB_IIRG(type),
905                         (unsigned long long)DMA_TLB_IAIG(val));
906         /* flush iotlb entry will implicitly flush write buffer */
907         return 0;
908 }
909
910 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
911         u64 addr, unsigned int pages, int non_present_entry_flush)
912 {
913         unsigned int mask;
914
915         BUG_ON(addr & (~VTD_PAGE_MASK));
916         BUG_ON(pages == 0);
917
918         /* Fallback to domain selective flush if no PSI support */
919         if (!cap_pgsel_inv(iommu->cap))
920                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
921                                                 DMA_TLB_DSI_FLUSH,
922                                                 non_present_entry_flush);
923
924         /*
925          * PSI requires page size to be 2 ^ x, and the base address is naturally
926          * aligned to the size
927          */
928         mask = ilog2(__roundup_pow_of_two(pages));
929         /* Fallback to domain selective flush if size is too big */
930         if (mask > cap_max_amask_val(iommu->cap))
931                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
932                         DMA_TLB_DSI_FLUSH, non_present_entry_flush);
933
934         return iommu->flush.flush_iotlb(iommu, did, addr, mask,
935                                         DMA_TLB_PSI_FLUSH,
936                                         non_present_entry_flush);
937 }
938
939 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
940 {
941         u32 pmen;
942         unsigned long flags;
943
944         spin_lock_irqsave(&iommu->register_lock, flags);
945         pmen = readl(iommu->reg + DMAR_PMEN_REG);
946         pmen &= ~DMA_PMEN_EPM;
947         writel(pmen, iommu->reg + DMAR_PMEN_REG);
948
949         /* wait for the protected region status bit to clear */
950         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
951                 readl, !(pmen & DMA_PMEN_PRS), pmen);
952
953         spin_unlock_irqrestore(&iommu->register_lock, flags);
954 }
955
956 static int iommu_enable_translation(struct intel_iommu *iommu)
957 {
958         u32 sts;
959         unsigned long flags;
960
961         spin_lock_irqsave(&iommu->register_lock, flags);
962         writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
963
964         /* Make sure hardware complete it */
965         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
966                 readl, (sts & DMA_GSTS_TES), sts);
967
968         iommu->gcmd |= DMA_GCMD_TE;
969         spin_unlock_irqrestore(&iommu->register_lock, flags);
970         return 0;
971 }
972
973 static int iommu_disable_translation(struct intel_iommu *iommu)
974 {
975         u32 sts;
976         unsigned long flag;
977
978         spin_lock_irqsave(&iommu->register_lock, flag);
979         iommu->gcmd &= ~DMA_GCMD_TE;
980         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
981
982         /* Make sure hardware complete it */
983         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
984                 readl, (!(sts & DMA_GSTS_TES)), sts);
985
986         spin_unlock_irqrestore(&iommu->register_lock, flag);
987         return 0;
988 }
989
990 /* iommu interrupt handling. Most stuff are MSI-like. */
991
992 static const char *fault_reason_strings[] =
993 {
994         "Software",
995         "Present bit in root entry is clear",
996         "Present bit in context entry is clear",
997         "Invalid context entry",
998         "Access beyond MGAW",
999         "PTE Write access is not set",
1000         "PTE Read access is not set",
1001         "Next page table ptr is invalid",
1002         "Root table address invalid",
1003         "Context table ptr is invalid",
1004         "non-zero reserved fields in RTP",
1005         "non-zero reserved fields in CTP",
1006         "non-zero reserved fields in PTE",
1007 };
1008 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1009
1010 const char *dmar_get_fault_reason(u8 fault_reason)
1011 {
1012         if (fault_reason > MAX_FAULT_REASON_IDX)
1013                 return "Unknown";
1014         else
1015                 return fault_reason_strings[fault_reason];
1016 }
1017
1018 void dmar_msi_unmask(unsigned int irq)
1019 {
1020         struct intel_iommu *iommu = get_irq_data(irq);
1021         unsigned long flag;
1022
1023         /* unmask it */
1024         spin_lock_irqsave(&iommu->register_lock, flag);
1025         writel(0, iommu->reg + DMAR_FECTL_REG);
1026         /* Read a reg to force flush the post write */
1027         readl(iommu->reg + DMAR_FECTL_REG);
1028         spin_unlock_irqrestore(&iommu->register_lock, flag);
1029 }
1030
1031 void dmar_msi_mask(unsigned int irq)
1032 {
1033         unsigned long flag;
1034         struct intel_iommu *iommu = get_irq_data(irq);
1035
1036         /* mask it */
1037         spin_lock_irqsave(&iommu->register_lock, flag);
1038         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1039         /* Read a reg to force flush the post write */
1040         readl(iommu->reg + DMAR_FECTL_REG);
1041         spin_unlock_irqrestore(&iommu->register_lock, flag);
1042 }
1043
1044 void dmar_msi_write(int irq, struct msi_msg *msg)
1045 {
1046         struct intel_iommu *iommu = get_irq_data(irq);
1047         unsigned long flag;
1048
1049         spin_lock_irqsave(&iommu->register_lock, flag);
1050         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1051         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1052         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1053         spin_unlock_irqrestore(&iommu->register_lock, flag);
1054 }
1055
1056 void dmar_msi_read(int irq, struct msi_msg *msg)
1057 {
1058         struct intel_iommu *iommu = get_irq_data(irq);
1059         unsigned long flag;
1060
1061         spin_lock_irqsave(&iommu->register_lock, flag);
1062         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1063         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1064         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1065         spin_unlock_irqrestore(&iommu->register_lock, flag);
1066 }
1067
1068 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
1069                 u8 fault_reason, u16 source_id, unsigned long long addr)
1070 {
1071         const char *reason;
1072
1073         reason = dmar_get_fault_reason(fault_reason);
1074
1075         printk(KERN_ERR
1076                 "DMAR:[%s] Request device [%02x:%02x.%d] "
1077                 "fault addr %llx \n"
1078                 "DMAR:[fault reason %02d] %s\n",
1079                 (type ? "DMA Read" : "DMA Write"),
1080                 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1081                 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1082         return 0;
1083 }
1084
1085 #define PRIMARY_FAULT_REG_LEN (16)
1086 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1087 {
1088         struct intel_iommu *iommu = dev_id;
1089         int reg, fault_index;
1090         u32 fault_status;
1091         unsigned long flag;
1092
1093         spin_lock_irqsave(&iommu->register_lock, flag);
1094         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1095
1096         /* TBD: ignore advanced fault log currently */
1097         if (!(fault_status & DMA_FSTS_PPF))
1098                 goto clear_overflow;
1099
1100         fault_index = dma_fsts_fault_record_index(fault_status);
1101         reg = cap_fault_reg_offset(iommu->cap);
1102         while (1) {
1103                 u8 fault_reason;
1104                 u16 source_id;
1105                 u64 guest_addr;
1106                 int type;
1107                 u32 data;
1108
1109                 /* highest 32 bits */
1110                 data = readl(iommu->reg + reg +
1111                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1112                 if (!(data & DMA_FRCD_F))
1113                         break;
1114
1115                 fault_reason = dma_frcd_fault_reason(data);
1116                 type = dma_frcd_type(data);
1117
1118                 data = readl(iommu->reg + reg +
1119                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1120                 source_id = dma_frcd_source_id(data);
1121
1122                 guest_addr = dmar_readq(iommu->reg + reg +
1123                                 fault_index * PRIMARY_FAULT_REG_LEN);
1124                 guest_addr = dma_frcd_page_addr(guest_addr);
1125                 /* clear the fault */
1126                 writel(DMA_FRCD_F, iommu->reg + reg +
1127                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1128
1129                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1130
1131                 iommu_page_fault_do_one(iommu, type, fault_reason,
1132                                 source_id, guest_addr);
1133
1134                 fault_index++;
1135                 if (fault_index > cap_num_fault_regs(iommu->cap))
1136                         fault_index = 0;
1137                 spin_lock_irqsave(&iommu->register_lock, flag);
1138         }
1139 clear_overflow:
1140         /* clear primary fault overflow */
1141         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1142         if (fault_status & DMA_FSTS_PFO)
1143                 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1144
1145         spin_unlock_irqrestore(&iommu->register_lock, flag);
1146         return IRQ_HANDLED;
1147 }
1148
1149 int dmar_set_interrupt(struct intel_iommu *iommu)
1150 {
1151         int irq, ret;
1152
1153         irq = create_irq();
1154         if (!irq) {
1155                 printk(KERN_ERR "IOMMU: no free vectors\n");
1156                 return -EINVAL;
1157         }
1158
1159         set_irq_data(irq, iommu);
1160         iommu->irq = irq;
1161
1162         ret = arch_setup_dmar_msi(irq);
1163         if (ret) {
1164                 set_irq_data(irq, NULL);
1165                 iommu->irq = 0;
1166                 destroy_irq(irq);
1167                 return 0;
1168         }
1169
1170         /* Force fault register is cleared */
1171         iommu_page_fault(irq, iommu);
1172
1173         ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1174         if (ret)
1175                 printk(KERN_ERR "IOMMU: can't request irq\n");
1176         return ret;
1177 }
1178
1179 static int iommu_init_domains(struct intel_iommu *iommu)
1180 {
1181         unsigned long ndomains;
1182         unsigned long nlongs;
1183
1184         ndomains = cap_ndoms(iommu->cap);
1185         pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1186         nlongs = BITS_TO_LONGS(ndomains);
1187
1188         /* TBD: there might be 64K domains,
1189          * consider other allocation for future chip
1190          */
1191         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1192         if (!iommu->domain_ids) {
1193                 printk(KERN_ERR "Allocating domain id array failed\n");
1194                 return -ENOMEM;
1195         }
1196         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1197                         GFP_KERNEL);
1198         if (!iommu->domains) {
1199                 printk(KERN_ERR "Allocating domain array failed\n");
1200                 kfree(iommu->domain_ids);
1201                 return -ENOMEM;
1202         }
1203
1204         spin_lock_init(&iommu->lock);
1205
1206         /*
1207          * if Caching mode is set, then invalid translations are tagged
1208          * with domainid 0. Hence we need to pre-allocate it.
1209          */
1210         if (cap_caching_mode(iommu->cap))
1211                 set_bit(0, iommu->domain_ids);
1212         return 0;
1213 }
1214
1215
1216 static void domain_exit(struct dmar_domain *domain);
1217
1218 void free_dmar_iommu(struct intel_iommu *iommu)
1219 {
1220         struct dmar_domain *domain;
1221         int i;
1222         unsigned long flags;
1223
1224         i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1225         for (; i < cap_ndoms(iommu->cap); ) {
1226                 domain = iommu->domains[i];
1227                 clear_bit(i, iommu->domain_ids);
1228
1229                 spin_lock_irqsave(&domain->iommu_lock, flags);
1230                 if (--domain->iommu_count == 0)
1231                         domain_exit(domain);
1232                 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1233
1234                 i = find_next_bit(iommu->domain_ids,
1235                         cap_ndoms(iommu->cap), i+1);
1236         }
1237
1238         if (iommu->gcmd & DMA_GCMD_TE)
1239                 iommu_disable_translation(iommu);
1240
1241         if (iommu->irq) {
1242                 set_irq_data(iommu->irq, NULL);
1243                 /* This will mask the irq */
1244                 free_irq(iommu->irq, iommu);
1245                 destroy_irq(iommu->irq);
1246         }
1247
1248         kfree(iommu->domains);
1249         kfree(iommu->domain_ids);
1250
1251         g_iommus[iommu->seq_id] = NULL;
1252
1253         /* if all iommus are freed, free g_iommus */
1254         for (i = 0; i < g_num_of_iommus; i++) {
1255                 if (g_iommus[i])
1256                         break;
1257         }
1258
1259         if (i == g_num_of_iommus)
1260                 kfree(g_iommus);
1261
1262         /* free context mapping */
1263         free_context_table(iommu);
1264 }
1265
1266 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1267 {
1268         unsigned long num;
1269         unsigned long ndomains;
1270         struct dmar_domain *domain;
1271         unsigned long flags;
1272
1273         domain = alloc_domain_mem();
1274         if (!domain)
1275                 return NULL;
1276
1277         ndomains = cap_ndoms(iommu->cap);
1278
1279         spin_lock_irqsave(&iommu->lock, flags);
1280         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1281         if (num >= ndomains) {
1282                 spin_unlock_irqrestore(&iommu->lock, flags);
1283                 free_domain_mem(domain);
1284                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1285                 return NULL;
1286         }
1287
1288         set_bit(num, iommu->domain_ids);
1289         domain->id = num;
1290         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1291         set_bit(iommu->seq_id, &domain->iommu_bmp);
1292         domain->flags = 0;
1293         iommu->domains[num] = domain;
1294         spin_unlock_irqrestore(&iommu->lock, flags);
1295
1296         return domain;
1297 }
1298
1299 static void iommu_free_domain(struct dmar_domain *domain)
1300 {
1301         unsigned long flags;
1302         struct intel_iommu *iommu;
1303
1304         iommu = domain_get_iommu(domain);
1305
1306         spin_lock_irqsave(&iommu->lock, flags);
1307         clear_bit(domain->id, iommu->domain_ids);
1308         spin_unlock_irqrestore(&iommu->lock, flags);
1309 }
1310
1311 static struct iova_domain reserved_iova_list;
1312 static struct lock_class_key reserved_alloc_key;
1313 static struct lock_class_key reserved_rbtree_key;
1314
1315 static void dmar_init_reserved_ranges(void)
1316 {
1317         struct pci_dev *pdev = NULL;
1318         struct iova *iova;
1319         int i;
1320         u64 addr, size;
1321
1322         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1323
1324         lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1325                 &reserved_alloc_key);
1326         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1327                 &reserved_rbtree_key);
1328
1329         /* IOAPIC ranges shouldn't be accessed by DMA */
1330         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1331                 IOVA_PFN(IOAPIC_RANGE_END));
1332         if (!iova)
1333                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1334
1335         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1336         for_each_pci_dev(pdev) {
1337                 struct resource *r;
1338
1339                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1340                         r = &pdev->resource[i];
1341                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1342                                 continue;
1343                         addr = r->start;
1344                         addr &= PAGE_MASK;
1345                         size = r->end - addr;
1346                         size = PAGE_ALIGN(size);
1347                         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1348                                 IOVA_PFN(size + addr) - 1);
1349                         if (!iova)
1350                                 printk(KERN_ERR "Reserve iova failed\n");
1351                 }
1352         }
1353
1354 }
1355
1356 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1357 {
1358         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1359 }
1360
1361 static inline int guestwidth_to_adjustwidth(int gaw)
1362 {
1363         int agaw;
1364         int r = (gaw - 12) % 9;
1365
1366         if (r == 0)
1367                 agaw = gaw;
1368         else
1369                 agaw = gaw + 9 - r;
1370         if (agaw > 64)
1371                 agaw = 64;
1372         return agaw;
1373 }
1374
1375 static int domain_init(struct dmar_domain *domain, int guest_width)
1376 {
1377         struct intel_iommu *iommu;
1378         int adjust_width, agaw;
1379         unsigned long sagaw;
1380
1381         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1382         spin_lock_init(&domain->mapping_lock);
1383         spin_lock_init(&domain->iommu_lock);
1384
1385         domain_reserve_special_ranges(domain);
1386
1387         /* calculate AGAW */
1388         iommu = domain_get_iommu(domain);
1389         if (guest_width > cap_mgaw(iommu->cap))
1390                 guest_width = cap_mgaw(iommu->cap);
1391         domain->gaw = guest_width;
1392         adjust_width = guestwidth_to_adjustwidth(guest_width);
1393         agaw = width_to_agaw(adjust_width);
1394         sagaw = cap_sagaw(iommu->cap);
1395         if (!test_bit(agaw, &sagaw)) {
1396                 /* hardware doesn't support it, choose a bigger one */
1397                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1398                 agaw = find_next_bit(&sagaw, 5, agaw);
1399                 if (agaw >= 5)
1400                         return -ENODEV;
1401         }
1402         domain->agaw = agaw;
1403         INIT_LIST_HEAD(&domain->devices);
1404
1405         if (ecap_coherent(iommu->ecap))
1406                 domain->iommu_coherency = 1;
1407         else
1408                 domain->iommu_coherency = 0;
1409
1410         domain->iommu_count = 1;
1411
1412         /* always allocate the top pgd */
1413         domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1414         if (!domain->pgd)
1415                 return -ENOMEM;
1416         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1417         return 0;
1418 }
1419
1420 static void domain_exit(struct dmar_domain *domain)
1421 {
1422         u64 end;
1423
1424         /* Domain 0 is reserved, so dont process it */
1425         if (!domain)
1426                 return;
1427
1428         domain_remove_dev_info(domain);
1429         /* destroy iovas */
1430         put_iova_domain(&domain->iovad);
1431         end = DOMAIN_MAX_ADDR(domain->gaw);
1432         end = end & (~PAGE_MASK);
1433
1434         /* clear ptes */
1435         dma_pte_clear_range(domain, 0, end);
1436
1437         /* free page tables */
1438         dma_pte_free_pagetable(domain, 0, end);
1439
1440         iommu_free_domain(domain);
1441         free_domain_mem(domain);
1442 }
1443
1444 static int domain_context_mapping_one(struct dmar_domain *domain,
1445                 u8 bus, u8 devfn)
1446 {
1447         struct context_entry *context;
1448         struct intel_iommu *iommu = domain_get_iommu(domain);
1449         unsigned long flags;
1450
1451         pr_debug("Set context mapping for %02x:%02x.%d\n",
1452                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1453         BUG_ON(!domain->pgd);
1454         context = device_to_context_entry(iommu, bus, devfn);
1455         if (!context)
1456                 return -ENOMEM;
1457         spin_lock_irqsave(&iommu->lock, flags);
1458         if (context_present(context)) {
1459                 spin_unlock_irqrestore(&iommu->lock, flags);
1460                 return 0;
1461         }
1462
1463         context_set_domain_id(context, domain->id);
1464         context_set_address_width(context, domain->agaw);
1465         context_set_address_root(context, virt_to_phys(domain->pgd));
1466         context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1467         context_set_fault_enable(context);
1468         context_set_present(context);
1469         __iommu_flush_cache(iommu, context, sizeof(*context));
1470
1471         /* it's a non-present to present mapping */
1472         if (iommu->flush.flush_context(iommu, domain->id,
1473                 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1474                 DMA_CCMD_DEVICE_INVL, 1))
1475                 iommu_flush_write_buffer(iommu);
1476         else
1477                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1478
1479         spin_unlock_irqrestore(&iommu->lock, flags);
1480
1481         spin_lock_irqsave(&domain->iommu_lock, flags);
1482         if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1483                 domain->iommu_count++;
1484                 domain_update_iommu_coherency(domain);
1485         }
1486         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1487         return 0;
1488 }
1489
1490 static int
1491 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1492 {
1493         int ret;
1494         struct pci_dev *tmp, *parent;
1495
1496         ret = domain_context_mapping_one(domain, pdev->bus->number,
1497                 pdev->devfn);
1498         if (ret)
1499                 return ret;
1500
1501         /* dependent device mapping */
1502         tmp = pci_find_upstream_pcie_bridge(pdev);
1503         if (!tmp)
1504                 return 0;
1505         /* Secondary interface's bus number and devfn 0 */
1506         parent = pdev->bus->self;
1507         while (parent != tmp) {
1508                 ret = domain_context_mapping_one(domain, parent->bus->number,
1509                         parent->devfn);
1510                 if (ret)
1511                         return ret;
1512                 parent = parent->bus->self;
1513         }
1514         if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1515                 return domain_context_mapping_one(domain,
1516                         tmp->subordinate->number, 0);
1517         else /* this is a legacy PCI bridge */
1518                 return domain_context_mapping_one(domain,
1519                         tmp->bus->number, tmp->devfn);
1520 }
1521
1522 static int domain_context_mapped(struct dmar_domain *domain,
1523         struct pci_dev *pdev)
1524 {
1525         int ret;
1526         struct pci_dev *tmp, *parent;
1527         struct intel_iommu *iommu = domain_get_iommu(domain);
1528
1529         ret = device_context_mapped(iommu,
1530                 pdev->bus->number, pdev->devfn);
1531         if (!ret)
1532                 return ret;
1533         /* dependent device mapping */
1534         tmp = pci_find_upstream_pcie_bridge(pdev);
1535         if (!tmp)
1536                 return ret;
1537         /* Secondary interface's bus number and devfn 0 */
1538         parent = pdev->bus->self;
1539         while (parent != tmp) {
1540                 ret = device_context_mapped(iommu, parent->bus->number,
1541                         parent->devfn);
1542                 if (!ret)
1543                         return ret;
1544                 parent = parent->bus->self;
1545         }
1546         if (tmp->is_pcie)
1547                 return device_context_mapped(iommu,
1548                         tmp->subordinate->number, 0);
1549         else
1550                 return device_context_mapped(iommu,
1551                         tmp->bus->number, tmp->devfn);
1552 }
1553
1554 static int
1555 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1556                         u64 hpa, size_t size, int prot)
1557 {
1558         u64 start_pfn, end_pfn;
1559         struct dma_pte *pte;
1560         int index;
1561         int addr_width = agaw_to_width(domain->agaw);
1562         struct intel_iommu *iommu = domain_get_iommu(domain);
1563
1564         hpa &= (((u64)1) << addr_width) - 1;
1565
1566         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1567                 return -EINVAL;
1568         iova &= PAGE_MASK;
1569         start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1570         end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1571         index = 0;
1572         while (start_pfn < end_pfn) {
1573                 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1574                 if (!pte)
1575                         return -ENOMEM;
1576                 /* We don't need lock here, nobody else
1577                  * touches the iova range
1578                  */
1579                 BUG_ON(dma_pte_addr(pte));
1580                 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1581                 dma_set_pte_prot(pte, prot);
1582                 __iommu_flush_cache(iommu, pte, sizeof(*pte));
1583                 start_pfn++;
1584                 index++;
1585         }
1586         return 0;
1587 }
1588
1589 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1590 {
1591         if (!iommu)
1592                 return;
1593
1594         clear_context_table(iommu, bus, devfn);
1595         iommu->flush.flush_context(iommu, 0, 0, 0,
1596                                            DMA_CCMD_GLOBAL_INVL, 0);
1597         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1598                                          DMA_TLB_GLOBAL_FLUSH, 0);
1599 }
1600
1601 static void domain_remove_dev_info(struct dmar_domain *domain)
1602 {
1603         struct device_domain_info *info;
1604         unsigned long flags;
1605         struct intel_iommu *iommu;
1606
1607         spin_lock_irqsave(&device_domain_lock, flags);
1608         while (!list_empty(&domain->devices)) {
1609                 info = list_entry(domain->devices.next,
1610                         struct device_domain_info, link);
1611                 list_del(&info->link);
1612                 list_del(&info->global);
1613                 if (info->dev)
1614                         info->dev->dev.archdata.iommu = NULL;
1615                 spin_unlock_irqrestore(&device_domain_lock, flags);
1616
1617                 iommu = device_to_iommu(info->bus, info->devfn);
1618                 iommu_detach_dev(iommu, info->bus, info->devfn);
1619                 free_devinfo_mem(info);
1620
1621                 spin_lock_irqsave(&device_domain_lock, flags);
1622         }
1623         spin_unlock_irqrestore(&device_domain_lock, flags);
1624 }
1625
1626 /*
1627  * find_domain
1628  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1629  */
1630 static struct dmar_domain *
1631 find_domain(struct pci_dev *pdev)
1632 {
1633         struct device_domain_info *info;
1634
1635         /* No lock here, assumes no domain exit in normal case */
1636         info = pdev->dev.archdata.iommu;
1637         if (info)
1638                 return info->domain;
1639         return NULL;
1640 }
1641
1642 /* domain is initialized */
1643 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1644 {
1645         struct dmar_domain *domain, *found = NULL;
1646         struct intel_iommu *iommu;
1647         struct dmar_drhd_unit *drhd;
1648         struct device_domain_info *info, *tmp;
1649         struct pci_dev *dev_tmp;
1650         unsigned long flags;
1651         int bus = 0, devfn = 0;
1652
1653         domain = find_domain(pdev);
1654         if (domain)
1655                 return domain;
1656
1657         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1658         if (dev_tmp) {
1659                 if (dev_tmp->is_pcie) {
1660                         bus = dev_tmp->subordinate->number;
1661                         devfn = 0;
1662                 } else {
1663                         bus = dev_tmp->bus->number;
1664                         devfn = dev_tmp->devfn;
1665                 }
1666                 spin_lock_irqsave(&device_domain_lock, flags);
1667                 list_for_each_entry(info, &device_domain_list, global) {
1668                         if (info->bus == bus && info->devfn == devfn) {
1669                                 found = info->domain;
1670                                 break;
1671                         }
1672                 }
1673                 spin_unlock_irqrestore(&device_domain_lock, flags);
1674                 /* pcie-pci bridge already has a domain, uses it */
1675                 if (found) {
1676                         domain = found;
1677                         goto found_domain;
1678                 }
1679         }
1680
1681         /* Allocate new domain for the device */
1682         drhd = dmar_find_matched_drhd_unit(pdev);
1683         if (!drhd) {
1684                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1685                         pci_name(pdev));
1686                 return NULL;
1687         }
1688         iommu = drhd->iommu;
1689
1690         domain = iommu_alloc_domain(iommu);
1691         if (!domain)
1692                 goto error;
1693
1694         if (domain_init(domain, gaw)) {
1695                 domain_exit(domain);
1696                 goto error;
1697         }
1698
1699         /* register pcie-to-pci device */
1700         if (dev_tmp) {
1701                 info = alloc_devinfo_mem();
1702                 if (!info) {
1703                         domain_exit(domain);
1704                         goto error;
1705                 }
1706                 info->bus = bus;
1707                 info->devfn = devfn;
1708                 info->dev = NULL;
1709                 info->domain = domain;
1710                 /* This domain is shared by devices under p2p bridge */
1711                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1712
1713                 /* pcie-to-pci bridge already has a domain, uses it */
1714                 found = NULL;
1715                 spin_lock_irqsave(&device_domain_lock, flags);
1716                 list_for_each_entry(tmp, &device_domain_list, global) {
1717                         if (tmp->bus == bus && tmp->devfn == devfn) {
1718                                 found = tmp->domain;
1719                                 break;
1720                         }
1721                 }
1722                 if (found) {
1723                         free_devinfo_mem(info);
1724                         domain_exit(domain);
1725                         domain = found;
1726                 } else {
1727                         list_add(&info->link, &domain->devices);
1728                         list_add(&info->global, &device_domain_list);
1729                 }
1730                 spin_unlock_irqrestore(&device_domain_lock, flags);
1731         }
1732
1733 found_domain:
1734         info = alloc_devinfo_mem();
1735         if (!info)
1736                 goto error;
1737         info->bus = pdev->bus->number;
1738         info->devfn = pdev->devfn;
1739         info->dev = pdev;
1740         info->domain = domain;
1741         spin_lock_irqsave(&device_domain_lock, flags);
1742         /* somebody is fast */
1743         found = find_domain(pdev);
1744         if (found != NULL) {
1745                 spin_unlock_irqrestore(&device_domain_lock, flags);
1746                 if (found != domain) {
1747                         domain_exit(domain);
1748                         domain = found;
1749                 }
1750                 free_devinfo_mem(info);
1751                 return domain;
1752         }
1753         list_add(&info->link, &domain->devices);
1754         list_add(&info->global, &device_domain_list);
1755         pdev->dev.archdata.iommu = info;
1756         spin_unlock_irqrestore(&device_domain_lock, flags);
1757         return domain;
1758 error:
1759         /* recheck it here, maybe others set it */
1760         return find_domain(pdev);
1761 }
1762
1763 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1764                                       unsigned long long start,
1765                                       unsigned long long end)
1766 {
1767         struct dmar_domain *domain;
1768         unsigned long size;
1769         unsigned long long base;
1770         int ret;
1771
1772         printk(KERN_INFO
1773                 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1774                 pci_name(pdev), start, end);
1775         /* page table init */
1776         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1777         if (!domain)
1778                 return -ENOMEM;
1779
1780         /* The address might not be aligned */
1781         base = start & PAGE_MASK;
1782         size = end - base;
1783         size = PAGE_ALIGN(size);
1784         if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1785                         IOVA_PFN(base + size) - 1)) {
1786                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1787                 ret = -ENOMEM;
1788                 goto error;
1789         }
1790
1791         pr_debug("Mapping reserved region %lx@%llx for %s\n",
1792                 size, base, pci_name(pdev));
1793         /*
1794          * RMRR range might have overlap with physical memory range,
1795          * clear it first
1796          */
1797         dma_pte_clear_range(domain, base, base + size);
1798
1799         ret = domain_page_mapping(domain, base, base, size,
1800                 DMA_PTE_READ|DMA_PTE_WRITE);
1801         if (ret)
1802                 goto error;
1803
1804         /* context entry init */
1805         ret = domain_context_mapping(domain, pdev);
1806         if (!ret)
1807                 return 0;
1808 error:
1809         domain_exit(domain);
1810         return ret;
1811
1812 }
1813
1814 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1815         struct pci_dev *pdev)
1816 {
1817         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1818                 return 0;
1819         return iommu_prepare_identity_map(pdev, rmrr->base_address,
1820                 rmrr->end_address + 1);
1821 }
1822
1823 #ifdef CONFIG_DMAR_GFX_WA
1824 struct iommu_prepare_data {
1825         struct pci_dev *pdev;
1826         int ret;
1827 };
1828
1829 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1830                                          unsigned long end_pfn, void *datax)
1831 {
1832         struct iommu_prepare_data *data;
1833
1834         data = (struct iommu_prepare_data *)datax;
1835
1836         data->ret = iommu_prepare_identity_map(data->pdev,
1837                                 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1838         return data->ret;
1839
1840 }
1841
1842 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1843 {
1844         int nid;
1845         struct iommu_prepare_data data;
1846
1847         data.pdev = pdev;
1848         data.ret = 0;
1849
1850         for_each_online_node(nid) {
1851                 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1852                 if (data.ret)
1853                         return data.ret;
1854         }
1855         return data.ret;
1856 }
1857
1858 static void __init iommu_prepare_gfx_mapping(void)
1859 {
1860         struct pci_dev *pdev = NULL;
1861         int ret;
1862
1863         for_each_pci_dev(pdev) {
1864                 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1865                                 !IS_GFX_DEVICE(pdev))
1866                         continue;
1867                 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1868                         pci_name(pdev));
1869                 ret = iommu_prepare_with_active_regions(pdev);
1870                 if (ret)
1871                         printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1872         }
1873 }
1874 #else /* !CONFIG_DMAR_GFX_WA */
1875 static inline void iommu_prepare_gfx_mapping(void)
1876 {
1877         return;
1878 }
1879 #endif
1880
1881 #ifdef CONFIG_DMAR_FLOPPY_WA
1882 static inline void iommu_prepare_isa(void)
1883 {
1884         struct pci_dev *pdev;
1885         int ret;
1886
1887         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1888         if (!pdev)
1889                 return;
1890
1891         printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1892         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1893
1894         if (ret)
1895                 printk("IOMMU: Failed to create 0-64M identity map, "
1896                         "floppy might not work\n");
1897
1898 }
1899 #else
1900 static inline void iommu_prepare_isa(void)
1901 {
1902         return;
1903 }
1904 #endif /* !CONFIG_DMAR_FLPY_WA */
1905
1906 static int __init init_dmars(void)
1907 {
1908         struct dmar_drhd_unit *drhd;
1909         struct dmar_rmrr_unit *rmrr;
1910         struct pci_dev *pdev;
1911         struct intel_iommu *iommu;
1912         int i, ret, unit = 0;
1913
1914         /*
1915          * for each drhd
1916          *    allocate root
1917          *    initialize and program root entry to not present
1918          * endfor
1919          */
1920         for_each_drhd_unit(drhd) {
1921                 g_num_of_iommus++;
1922                 /*
1923                  * lock not needed as this is only incremented in the single
1924                  * threaded kernel __init code path all other access are read
1925                  * only
1926                  */
1927         }
1928
1929         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1930                         GFP_KERNEL);
1931         if (!g_iommus) {
1932                 printk(KERN_ERR "Allocating global iommu array failed\n");
1933                 ret = -ENOMEM;
1934                 goto error;
1935         }
1936
1937         deferred_flush = kzalloc(g_num_of_iommus *
1938                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1939         if (!deferred_flush) {
1940                 kfree(g_iommus);
1941                 ret = -ENOMEM;
1942                 goto error;
1943         }
1944
1945         for_each_drhd_unit(drhd) {
1946                 if (drhd->ignored)
1947                         continue;
1948
1949                 iommu = drhd->iommu;
1950                 g_iommus[iommu->seq_id] = iommu;
1951
1952                 ret = iommu_init_domains(iommu);
1953                 if (ret)
1954                         goto error;
1955
1956                 /*
1957                  * TBD:
1958                  * we could share the same root & context tables
1959                  * amoung all IOMMU's. Need to Split it later.
1960                  */
1961                 ret = iommu_alloc_root_entry(iommu);
1962                 if (ret) {
1963                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1964                         goto error;
1965                 }
1966         }
1967
1968         for_each_drhd_unit(drhd) {
1969                 if (drhd->ignored)
1970                         continue;
1971
1972                 iommu = drhd->iommu;
1973                 if (dmar_enable_qi(iommu)) {
1974                         /*
1975                          * Queued Invalidate not enabled, use Register Based
1976                          * Invalidate
1977                          */
1978                         iommu->flush.flush_context = __iommu_flush_context;
1979                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1980                         printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1981                                "invalidation\n",
1982                                (unsigned long long)drhd->reg_base_addr);
1983                 } else {
1984                         iommu->flush.flush_context = qi_flush_context;
1985                         iommu->flush.flush_iotlb = qi_flush_iotlb;
1986                         printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1987                                "invalidation\n",
1988                                (unsigned long long)drhd->reg_base_addr);
1989                 }
1990         }
1991
1992         /*
1993          * For each rmrr
1994          *   for each dev attached to rmrr
1995          *   do
1996          *     locate drhd for dev, alloc domain for dev
1997          *     allocate free domain
1998          *     allocate page table entries for rmrr
1999          *     if context not allocated for bus
2000          *           allocate and init context
2001          *           set present in root table for this bus
2002          *     init context with domain, translation etc
2003          *    endfor
2004          * endfor
2005          */
2006         for_each_rmrr_units(rmrr) {
2007                 for (i = 0; i < rmrr->devices_cnt; i++) {
2008                         pdev = rmrr->devices[i];
2009                         /* some BIOS lists non-exist devices in DMAR table */
2010                         if (!pdev)
2011                                 continue;
2012                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2013                         if (ret)
2014                                 printk(KERN_ERR
2015                                  "IOMMU: mapping reserved region failed\n");
2016                 }
2017         }
2018
2019         iommu_prepare_gfx_mapping();
2020
2021         iommu_prepare_isa();
2022
2023         /*
2024          * for each drhd
2025          *   enable fault log
2026          *   global invalidate context cache
2027          *   global invalidate iotlb
2028          *   enable translation
2029          */
2030         for_each_drhd_unit(drhd) {
2031                 if (drhd->ignored)
2032                         continue;
2033                 iommu = drhd->iommu;
2034                 sprintf (iommu->name, "dmar%d", unit++);
2035
2036                 iommu_flush_write_buffer(iommu);
2037
2038                 ret = dmar_set_interrupt(iommu);
2039                 if (ret)
2040                         goto error;
2041
2042                 iommu_set_root_entry(iommu);
2043
2044                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2045                                            0);
2046                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2047                                          0);
2048                 iommu_disable_protect_mem_regions(iommu);
2049
2050                 ret = iommu_enable_translation(iommu);
2051                 if (ret)
2052                         goto error;
2053         }
2054
2055         return 0;
2056 error:
2057         for_each_drhd_unit(drhd) {
2058                 if (drhd->ignored)
2059                         continue;
2060                 iommu = drhd->iommu;
2061                 free_iommu(iommu);
2062         }
2063         kfree(g_iommus);
2064         return ret;
2065 }
2066
2067 static inline u64 aligned_size(u64 host_addr, size_t size)
2068 {
2069         u64 addr;
2070         addr = (host_addr & (~PAGE_MASK)) + size;
2071         return PAGE_ALIGN(addr);
2072 }
2073
2074 struct iova *
2075 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2076 {
2077         struct iova *piova;
2078
2079         /* Make sure it's in range */
2080         end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2081         if (!size || (IOVA_START_ADDR + size > end))
2082                 return NULL;
2083
2084         piova = alloc_iova(&domain->iovad,
2085                         size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2086         return piova;
2087 }
2088
2089 static struct iova *
2090 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2091                    size_t size, u64 dma_mask)
2092 {
2093         struct pci_dev *pdev = to_pci_dev(dev);
2094         struct iova *iova = NULL;
2095
2096         if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2097                 iova = iommu_alloc_iova(domain, size, dma_mask);
2098         else {
2099                 /*
2100                  * First try to allocate an io virtual address in
2101                  * DMA_32BIT_MASK and if that fails then try allocating
2102                  * from higher range
2103                  */
2104                 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2105                 if (!iova)
2106                         iova = iommu_alloc_iova(domain, size, dma_mask);
2107         }
2108
2109         if (!iova) {
2110                 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2111                 return NULL;
2112         }
2113
2114         return iova;
2115 }
2116
2117 static struct dmar_domain *
2118 get_valid_domain_for_dev(struct pci_dev *pdev)
2119 {
2120         struct dmar_domain *domain;
2121         int ret;
2122
2123         domain = get_domain_for_dev(pdev,
2124                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2125         if (!domain) {
2126                 printk(KERN_ERR
2127                         "Allocating domain for %s failed", pci_name(pdev));
2128                 return NULL;
2129         }
2130
2131         /* make sure context mapping is ok */
2132         if (unlikely(!domain_context_mapped(domain, pdev))) {
2133                 ret = domain_context_mapping(domain, pdev);
2134                 if (ret) {
2135                         printk(KERN_ERR
2136                                 "Domain context map for %s failed",
2137                                 pci_name(pdev));
2138                         return NULL;
2139                 }
2140         }
2141
2142         return domain;
2143 }
2144
2145 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2146                                      size_t size, int dir, u64 dma_mask)
2147 {
2148         struct pci_dev *pdev = to_pci_dev(hwdev);
2149         struct dmar_domain *domain;
2150         phys_addr_t start_paddr;
2151         struct iova *iova;
2152         int prot = 0;
2153         int ret;
2154         struct intel_iommu *iommu;
2155
2156         BUG_ON(dir == DMA_NONE);
2157         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2158                 return paddr;
2159
2160         domain = get_valid_domain_for_dev(pdev);
2161         if (!domain)
2162                 return 0;
2163
2164         iommu = domain_get_iommu(domain);
2165         size = aligned_size((u64)paddr, size);
2166
2167         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2168         if (!iova)
2169                 goto error;
2170
2171         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2172
2173         /*
2174          * Check if DMAR supports zero-length reads on write only
2175          * mappings..
2176          */
2177         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2178                         !cap_zlr(iommu->cap))
2179                 prot |= DMA_PTE_READ;
2180         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2181                 prot |= DMA_PTE_WRITE;
2182         /*
2183          * paddr - (paddr + size) might be partial page, we should map the whole
2184          * page.  Note: if two part of one page are separately mapped, we
2185          * might have two guest_addr mapping to the same host paddr, but this
2186          * is not a big problem
2187          */
2188         ret = domain_page_mapping(domain, start_paddr,
2189                 ((u64)paddr) & PAGE_MASK, size, prot);
2190         if (ret)
2191                 goto error;
2192
2193         /* it's a non-present to present mapping */
2194         ret = iommu_flush_iotlb_psi(iommu, domain->id,
2195                         start_paddr, size >> VTD_PAGE_SHIFT, 1);
2196         if (ret)
2197                 iommu_flush_write_buffer(iommu);
2198
2199         return start_paddr + ((u64)paddr & (~PAGE_MASK));
2200
2201 error:
2202         if (iova)
2203                 __free_iova(&domain->iovad, iova);
2204         printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2205                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2206         return 0;
2207 }
2208
2209 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2210                             size_t size, int dir)
2211 {
2212         return __intel_map_single(hwdev, paddr, size, dir,
2213                                   to_pci_dev(hwdev)->dma_mask);
2214 }
2215
2216 static void flush_unmaps(void)
2217 {
2218         int i, j;
2219
2220         timer_on = 0;
2221
2222         /* just flush them all */
2223         for (i = 0; i < g_num_of_iommus; i++) {
2224                 struct intel_iommu *iommu = g_iommus[i];
2225                 if (!iommu)
2226                         continue;
2227
2228                 if (deferred_flush[i].next) {
2229                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2230                                                  DMA_TLB_GLOBAL_FLUSH, 0);
2231                         for (j = 0; j < deferred_flush[i].next; j++) {
2232                                 __free_iova(&deferred_flush[i].domain[j]->iovad,
2233                                                 deferred_flush[i].iova[j]);
2234                         }
2235                         deferred_flush[i].next = 0;
2236                 }
2237         }
2238
2239         list_size = 0;
2240 }
2241
2242 static void flush_unmaps_timeout(unsigned long data)
2243 {
2244         unsigned long flags;
2245
2246         spin_lock_irqsave(&async_umap_flush_lock, flags);
2247         flush_unmaps();
2248         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2249 }
2250
2251 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2252 {
2253         unsigned long flags;
2254         int next, iommu_id;
2255         struct intel_iommu *iommu;
2256
2257         spin_lock_irqsave(&async_umap_flush_lock, flags);
2258         if (list_size == HIGH_WATER_MARK)
2259                 flush_unmaps();
2260
2261         iommu = domain_get_iommu(dom);
2262         iommu_id = iommu->seq_id;
2263
2264         next = deferred_flush[iommu_id].next;
2265         deferred_flush[iommu_id].domain[next] = dom;
2266         deferred_flush[iommu_id].iova[next] = iova;
2267         deferred_flush[iommu_id].next++;
2268
2269         if (!timer_on) {
2270                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2271                 timer_on = 1;
2272         }
2273         list_size++;
2274         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2275 }
2276
2277 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2278                         int dir)
2279 {
2280         struct pci_dev *pdev = to_pci_dev(dev);
2281         struct dmar_domain *domain;
2282         unsigned long start_addr;
2283         struct iova *iova;
2284         struct intel_iommu *iommu;
2285
2286         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2287                 return;
2288         domain = find_domain(pdev);
2289         BUG_ON(!domain);
2290
2291         iommu = domain_get_iommu(domain);
2292
2293         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2294         if (!iova)
2295                 return;
2296
2297         start_addr = iova->pfn_lo << PAGE_SHIFT;
2298         size = aligned_size((u64)dev_addr, size);
2299
2300         pr_debug("Device %s unmapping: %lx@%llx\n",
2301                 pci_name(pdev), size, (unsigned long long)start_addr);
2302
2303         /*  clear the whole page */
2304         dma_pte_clear_range(domain, start_addr, start_addr + size);
2305         /* free page tables */
2306         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2307         if (intel_iommu_strict) {
2308                 if (iommu_flush_iotlb_psi(iommu,
2309                         domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2310                         iommu_flush_write_buffer(iommu);
2311                 /* free iova */
2312                 __free_iova(&domain->iovad, iova);
2313         } else {
2314                 add_unmap(domain, iova);
2315                 /*
2316                  * queue up the release of the unmap to save the 1/6th of the
2317                  * cpu used up by the iotlb flush operation...
2318                  */
2319         }
2320 }
2321
2322 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2323                            dma_addr_t *dma_handle, gfp_t flags)
2324 {
2325         void *vaddr;
2326         int order;
2327
2328         size = PAGE_ALIGN(size);
2329         order = get_order(size);
2330         flags &= ~(GFP_DMA | GFP_DMA32);
2331
2332         vaddr = (void *)__get_free_pages(flags, order);
2333         if (!vaddr)
2334                 return NULL;
2335         memset(vaddr, 0, size);
2336
2337         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2338                                          DMA_BIDIRECTIONAL,
2339                                          hwdev->coherent_dma_mask);
2340         if (*dma_handle)
2341                 return vaddr;
2342         free_pages((unsigned long)vaddr, order);
2343         return NULL;
2344 }
2345
2346 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2347                          dma_addr_t dma_handle)
2348 {
2349         int order;
2350
2351         size = PAGE_ALIGN(size);
2352         order = get_order(size);
2353
2354         intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2355         free_pages((unsigned long)vaddr, order);
2356 }
2357
2358 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2359
2360 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2361                     int nelems, int dir)
2362 {
2363         int i;
2364         struct pci_dev *pdev = to_pci_dev(hwdev);
2365         struct dmar_domain *domain;
2366         unsigned long start_addr;
2367         struct iova *iova;
2368         size_t size = 0;
2369         void *addr;
2370         struct scatterlist *sg;
2371         struct intel_iommu *iommu;
2372
2373         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2374                 return;
2375
2376         domain = find_domain(pdev);
2377         BUG_ON(!domain);
2378
2379         iommu = domain_get_iommu(domain);
2380
2381         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2382         if (!iova)
2383                 return;
2384         for_each_sg(sglist, sg, nelems, i) {
2385                 addr = SG_ENT_VIRT_ADDRESS(sg);
2386                 size += aligned_size((u64)addr, sg->length);
2387         }
2388
2389         start_addr = iova->pfn_lo << PAGE_SHIFT;
2390
2391         /*  clear the whole page */
2392         dma_pte_clear_range(domain, start_addr, start_addr + size);
2393         /* free page tables */
2394         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2395
2396         if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2397                         size >> VTD_PAGE_SHIFT, 0))
2398                 iommu_flush_write_buffer(iommu);
2399
2400         /* free iova */
2401         __free_iova(&domain->iovad, iova);
2402 }
2403
2404 static int intel_nontranslate_map_sg(struct device *hddev,
2405         struct scatterlist *sglist, int nelems, int dir)
2406 {
2407         int i;
2408         struct scatterlist *sg;
2409
2410         for_each_sg(sglist, sg, nelems, i) {
2411                 BUG_ON(!sg_page(sg));
2412                 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2413                 sg->dma_length = sg->length;
2414         }
2415         return nelems;
2416 }
2417
2418 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2419                  int dir)
2420 {
2421         void *addr;
2422         int i;
2423         struct pci_dev *pdev = to_pci_dev(hwdev);
2424         struct dmar_domain *domain;
2425         size_t size = 0;
2426         int prot = 0;
2427         size_t offset = 0;
2428         struct iova *iova = NULL;
2429         int ret;
2430         struct scatterlist *sg;
2431         unsigned long start_addr;
2432         struct intel_iommu *iommu;
2433
2434         BUG_ON(dir == DMA_NONE);
2435         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2436                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2437
2438         domain = get_valid_domain_for_dev(pdev);
2439         if (!domain)
2440                 return 0;
2441
2442         iommu = domain_get_iommu(domain);
2443
2444         for_each_sg(sglist, sg, nelems, i) {
2445                 addr = SG_ENT_VIRT_ADDRESS(sg);
2446                 addr = (void *)virt_to_phys(addr);
2447                 size += aligned_size((u64)addr, sg->length);
2448         }
2449
2450         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2451         if (!iova) {
2452                 sglist->dma_length = 0;
2453                 return 0;
2454         }
2455
2456         /*
2457          * Check if DMAR supports zero-length reads on write only
2458          * mappings..
2459          */
2460         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2461                         !cap_zlr(iommu->cap))
2462                 prot |= DMA_PTE_READ;
2463         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2464                 prot |= DMA_PTE_WRITE;
2465
2466         start_addr = iova->pfn_lo << PAGE_SHIFT;
2467         offset = 0;
2468         for_each_sg(sglist, sg, nelems, i) {
2469                 addr = SG_ENT_VIRT_ADDRESS(sg);
2470                 addr = (void *)virt_to_phys(addr);
2471                 size = aligned_size((u64)addr, sg->length);
2472                 ret = domain_page_mapping(domain, start_addr + offset,
2473                         ((u64)addr) & PAGE_MASK,
2474                         size, prot);
2475                 if (ret) {
2476                         /*  clear the page */
2477                         dma_pte_clear_range(domain, start_addr,
2478                                   start_addr + offset);
2479                         /* free page tables */
2480                         dma_pte_free_pagetable(domain, start_addr,
2481                                   start_addr + offset);
2482                         /* free iova */
2483                         __free_iova(&domain->iovad, iova);
2484                         return 0;
2485                 }
2486                 sg->dma_address = start_addr + offset +
2487                                 ((u64)addr & (~PAGE_MASK));
2488                 sg->dma_length = sg->length;
2489                 offset += size;
2490         }
2491
2492         /* it's a non-present to present mapping */
2493         if (iommu_flush_iotlb_psi(iommu, domain->id,
2494                         start_addr, offset >> VTD_PAGE_SHIFT, 1))
2495                 iommu_flush_write_buffer(iommu);
2496         return nelems;
2497 }
2498
2499 static struct dma_mapping_ops intel_dma_ops = {
2500         .alloc_coherent = intel_alloc_coherent,
2501         .free_coherent = intel_free_coherent,
2502         .map_single = intel_map_single,
2503         .unmap_single = intel_unmap_single,
2504         .map_sg = intel_map_sg,
2505         .unmap_sg = intel_unmap_sg,
2506 };
2507
2508 static inline int iommu_domain_cache_init(void)
2509 {
2510         int ret = 0;
2511
2512         iommu_domain_cache = kmem_cache_create("iommu_domain",
2513                                          sizeof(struct dmar_domain),
2514                                          0,
2515                                          SLAB_HWCACHE_ALIGN,
2516
2517                                          NULL);
2518         if (!iommu_domain_cache) {
2519                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2520                 ret = -ENOMEM;
2521         }
2522
2523         return ret;
2524 }
2525
2526 static inline int iommu_devinfo_cache_init(void)
2527 {
2528         int ret = 0;
2529
2530         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2531                                          sizeof(struct device_domain_info),
2532                                          0,
2533                                          SLAB_HWCACHE_ALIGN,
2534                                          NULL);
2535         if (!iommu_devinfo_cache) {
2536                 printk(KERN_ERR "Couldn't create devinfo cache\n");
2537                 ret = -ENOMEM;
2538         }
2539
2540         return ret;
2541 }
2542
2543 static inline int iommu_iova_cache_init(void)
2544 {
2545         int ret = 0;
2546
2547         iommu_iova_cache = kmem_cache_create("iommu_iova",
2548                                          sizeof(struct iova),
2549                                          0,
2550                                          SLAB_HWCACHE_ALIGN,
2551                                          NULL);
2552         if (!iommu_iova_cache) {
2553                 printk(KERN_ERR "Couldn't create iova cache\n");
2554                 ret = -ENOMEM;
2555         }
2556
2557         return ret;
2558 }
2559
2560 static int __init iommu_init_mempool(void)
2561 {
2562         int ret;
2563         ret = iommu_iova_cache_init();
2564         if (ret)
2565                 return ret;
2566
2567         ret = iommu_domain_cache_init();
2568         if (ret)
2569                 goto domain_error;
2570
2571         ret = iommu_devinfo_cache_init();
2572         if (!ret)
2573                 return ret;
2574
2575         kmem_cache_destroy(iommu_domain_cache);
2576 domain_error:
2577         kmem_cache_destroy(iommu_iova_cache);
2578
2579         return -ENOMEM;
2580 }
2581
2582 static void __init iommu_exit_mempool(void)
2583 {
2584         kmem_cache_destroy(iommu_devinfo_cache);
2585         kmem_cache_destroy(iommu_domain_cache);
2586         kmem_cache_destroy(iommu_iova_cache);
2587
2588 }
2589
2590 static void __init init_no_remapping_devices(void)
2591 {
2592         struct dmar_drhd_unit *drhd;
2593
2594         for_each_drhd_unit(drhd) {
2595                 if (!drhd->include_all) {
2596                         int i;
2597                         for (i = 0; i < drhd->devices_cnt; i++)
2598                                 if (drhd->devices[i] != NULL)
2599                                         break;
2600                         /* ignore DMAR unit if no pci devices exist */
2601                         if (i == drhd->devices_cnt)
2602                                 drhd->ignored = 1;
2603                 }
2604         }
2605
2606         if (dmar_map_gfx)
2607                 return;
2608
2609         for_each_drhd_unit(drhd) {
2610                 int i;
2611                 if (drhd->ignored || drhd->include_all)
2612                         continue;
2613
2614                 for (i = 0; i < drhd->devices_cnt; i++)
2615                         if (drhd->devices[i] &&
2616                                 !IS_GFX_DEVICE(drhd->devices[i]))
2617                                 break;
2618
2619                 if (i < drhd->devices_cnt)
2620                         continue;
2621
2622                 /* bypass IOMMU if it is just for gfx devices */
2623                 drhd->ignored = 1;
2624                 for (i = 0; i < drhd->devices_cnt; i++) {
2625                         if (!drhd->devices[i])
2626                                 continue;
2627                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2628                 }
2629         }
2630 }
2631
2632 int __init intel_iommu_init(void)
2633 {
2634         int ret = 0;
2635
2636         if (dmar_table_init())
2637                 return  -ENODEV;
2638
2639         if (dmar_dev_scope_init())
2640                 return  -ENODEV;
2641
2642         /*
2643          * Check the need for DMA-remapping initialization now.
2644          * Above initialization will also be used by Interrupt-remapping.
2645          */
2646         if (no_iommu || swiotlb || dmar_disabled)
2647                 return -ENODEV;
2648
2649         iommu_init_mempool();
2650         dmar_init_reserved_ranges();
2651
2652         init_no_remapping_devices();
2653
2654         ret = init_dmars();
2655         if (ret) {
2656                 printk(KERN_ERR "IOMMU: dmar init failed\n");
2657                 put_iova_domain(&reserved_iova_list);
2658                 iommu_exit_mempool();
2659                 return ret;
2660         }
2661         printk(KERN_INFO
2662         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2663
2664         init_timer(&unmap_timer);
2665         force_iommu = 1;
2666         dma_ops = &intel_dma_ops;
2667         return 0;
2668 }
2669
2670 static int vm_domain_add_dev_info(struct dmar_domain *domain,
2671                                   struct pci_dev *pdev)
2672 {
2673         struct device_domain_info *info;
2674         unsigned long flags;
2675
2676         info = alloc_devinfo_mem();
2677         if (!info)
2678                 return -ENOMEM;
2679
2680         info->bus = pdev->bus->number;
2681         info->devfn = pdev->devfn;
2682         info->dev = pdev;
2683         info->domain = domain;
2684
2685         spin_lock_irqsave(&device_domain_lock, flags);
2686         list_add(&info->link, &domain->devices);
2687         list_add(&info->global, &device_domain_list);
2688         pdev->dev.archdata.iommu = info;
2689         spin_unlock_irqrestore(&device_domain_lock, flags);
2690
2691         return 0;
2692 }
2693
2694 static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2695                                           struct pci_dev *pdev)
2696 {
2697         struct device_domain_info *info;
2698         struct intel_iommu *iommu;
2699         unsigned long flags;
2700         int found = 0;
2701         struct list_head *entry, *tmp;
2702
2703         iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2704         if (!iommu)
2705                 return;
2706
2707         spin_lock_irqsave(&device_domain_lock, flags);
2708         list_for_each_safe(entry, tmp, &domain->devices) {
2709                 info = list_entry(entry, struct device_domain_info, link);
2710                 if (info->bus == pdev->bus->number &&
2711                     info->devfn == pdev->devfn) {
2712                         list_del(&info->link);
2713                         list_del(&info->global);
2714                         if (info->dev)
2715                                 info->dev->dev.archdata.iommu = NULL;
2716                         spin_unlock_irqrestore(&device_domain_lock, flags);
2717
2718                         iommu_detach_dev(iommu, info->bus, info->devfn);
2719                         free_devinfo_mem(info);
2720
2721                         spin_lock_irqsave(&device_domain_lock, flags);
2722
2723                         if (found)
2724                                 break;
2725                         else
2726                                 continue;
2727                 }
2728
2729                 /* if there is no other devices under the same iommu
2730                  * owned by this domain, clear this iommu in iommu_bmp
2731                  * update iommu count and coherency
2732                  */
2733                 if (device_to_iommu(info->bus, info->devfn) == iommu)
2734                         found = 1;
2735         }
2736
2737         if (found == 0) {
2738                 unsigned long tmp_flags;
2739                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2740                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2741                 domain->iommu_count--;
2742                 domain_update_iommu_coherency(domain);
2743                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2744         }
2745
2746         spin_unlock_irqrestore(&device_domain_lock, flags);
2747 }
2748
2749 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2750 {
2751         struct device_domain_info *info;
2752         struct intel_iommu *iommu;
2753         unsigned long flags1, flags2;
2754
2755         spin_lock_irqsave(&device_domain_lock, flags1);
2756         while (!list_empty(&domain->devices)) {
2757                 info = list_entry(domain->devices.next,
2758                         struct device_domain_info, link);
2759                 list_del(&info->link);
2760                 list_del(&info->global);
2761                 if (info->dev)
2762                         info->dev->dev.archdata.iommu = NULL;
2763
2764                 spin_unlock_irqrestore(&device_domain_lock, flags1);
2765
2766                 iommu = device_to_iommu(info->bus, info->devfn);
2767                 iommu_detach_dev(iommu, info->bus, info->devfn);
2768
2769                 /* clear this iommu in iommu_bmp, update iommu count
2770                  * and coherency
2771                  */
2772                 spin_lock_irqsave(&domain->iommu_lock, flags2);
2773                 if (test_and_clear_bit(iommu->seq_id,
2774                                        &domain->iommu_bmp)) {
2775                         domain->iommu_count--;
2776                         domain_update_iommu_coherency(domain);
2777                 }
2778                 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2779
2780                 free_devinfo_mem(info);
2781                 spin_lock_irqsave(&device_domain_lock, flags1);
2782         }
2783         spin_unlock_irqrestore(&device_domain_lock, flags1);
2784 }
2785
2786 void intel_iommu_domain_exit(struct dmar_domain *domain)
2787 {
2788         u64 end;
2789
2790         /* Domain 0 is reserved, so dont process it */
2791         if (!domain)
2792                 return;
2793
2794         end = DOMAIN_MAX_ADDR(domain->gaw);
2795         end = end & (~VTD_PAGE_MASK);
2796
2797         /* clear ptes */
2798         dma_pte_clear_range(domain, 0, end);
2799
2800         /* free page tables */
2801         dma_pte_free_pagetable(domain, 0, end);
2802
2803         iommu_free_domain(domain);
2804         free_domain_mem(domain);
2805 }
2806 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2807
2808 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2809 {
2810         struct dmar_drhd_unit *drhd;
2811         struct dmar_domain *domain;
2812         struct intel_iommu *iommu;
2813
2814         drhd = dmar_find_matched_drhd_unit(pdev);
2815         if (!drhd) {
2816                 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2817                 return NULL;
2818         }
2819
2820         iommu = drhd->iommu;
2821         if (!iommu) {
2822                 printk(KERN_ERR
2823                         "intel_iommu_domain_alloc: iommu == NULL\n");
2824                 return NULL;
2825         }
2826         domain = iommu_alloc_domain(iommu);
2827         if (!domain) {
2828                 printk(KERN_ERR
2829                         "intel_iommu_domain_alloc: domain == NULL\n");
2830                 return NULL;
2831         }
2832         if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2833                 printk(KERN_ERR
2834                         "intel_iommu_domain_alloc: domain_init() failed\n");
2835                 intel_iommu_domain_exit(domain);
2836                 return NULL;
2837         }
2838         return domain;
2839 }
2840 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2841
2842 int intel_iommu_context_mapping(
2843         struct dmar_domain *domain, struct pci_dev *pdev)
2844 {
2845         int rc;
2846         rc = domain_context_mapping(domain, pdev);
2847         return rc;
2848 }
2849 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2850
2851 int intel_iommu_page_mapping(
2852         struct dmar_domain *domain, dma_addr_t iova,
2853         u64 hpa, size_t size, int prot)
2854 {
2855         int rc;
2856         rc = domain_page_mapping(domain, iova, hpa, size, prot);
2857         return rc;
2858 }
2859 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2860
2861 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2862 {
2863         struct intel_iommu *iommu;
2864
2865         iommu = device_to_iommu(bus, devfn);
2866         iommu_detach_dev(iommu, bus, devfn);
2867 }
2868 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2869
2870 struct dmar_domain *
2871 intel_iommu_find_domain(struct pci_dev *pdev)
2872 {
2873         return find_domain(pdev);
2874 }
2875 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2876
2877 int intel_iommu_found(void)
2878 {
2879         return g_num_of_iommus;
2880 }
2881 EXPORT_SYMBOL_GPL(intel_iommu_found);
2882
2883 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2884 {
2885         struct dma_pte *pte;
2886         u64 pfn;
2887
2888         pfn = 0;
2889         pte = addr_to_dma_pte(domain, iova);
2890
2891         if (pte)
2892                 pfn = dma_pte_addr(pte);
2893
2894         return pfn >> VTD_PAGE_SHIFT;
2895 }
2896 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);