2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
42 #define ROOT_SIZE VTD_PAGE_SIZE
43 #define CONTEXT_SIZE VTD_PAGE_SIZE
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48 #define IOAPIC_RANGE_START (0xfee00000)
49 #define IOAPIC_RANGE_END (0xfeefffff)
50 #define IOVA_START_ADDR (0x1000)
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
60 /* global iommu list, set NULL for ignored DMAR units */
61 static struct intel_iommu **g_iommus;
66 * 12-63: Context Ptr (12 - (haw-1))
73 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74 static inline bool root_present(struct root_entry *root)
76 return (root->val & 1);
78 static inline void set_root_present(struct root_entry *root)
82 static inline void set_root_value(struct root_entry *root, unsigned long value)
84 root->val |= value & VTD_PAGE_MASK;
87 static inline struct context_entry *
88 get_context_addr_from_root(struct root_entry *root)
90 return (struct context_entry *)
91 (root_present(root)?phys_to_virt(
92 root->val & VTD_PAGE_MASK) :
99 * 1: fault processing disable
100 * 2-3: translation type
101 * 12-63: address space root
107 struct context_entry {
112 static inline bool context_present(struct context_entry *context)
114 return (context->lo & 1);
116 static inline void context_set_present(struct context_entry *context)
121 static inline void context_set_fault_enable(struct context_entry *context)
123 context->lo &= (((u64)-1) << 2) | 1;
126 #define CONTEXT_TT_MULTI_LEVEL 0
128 static inline void context_set_translation_type(struct context_entry *context,
131 context->lo &= (((u64)-1) << 4) | 3;
132 context->lo |= (value & 3) << 2;
135 static inline void context_set_address_root(struct context_entry *context,
138 context->lo |= value & VTD_PAGE_MASK;
141 static inline void context_set_address_width(struct context_entry *context,
144 context->hi |= value & 7;
147 static inline void context_set_domain_id(struct context_entry *context,
150 context->hi |= (value & ((1 << 16) - 1)) << 8;
153 static inline void context_clear_entry(struct context_entry *context)
165 * 12-63: Host physcial address
171 static inline void dma_clear_pte(struct dma_pte *pte)
176 static inline void dma_set_pte_readable(struct dma_pte *pte)
178 pte->val |= DMA_PTE_READ;
181 static inline void dma_set_pte_writable(struct dma_pte *pte)
183 pte->val |= DMA_PTE_WRITE;
186 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
188 pte->val = (pte->val & ~3) | (prot & 3);
191 static inline u64 dma_pte_addr(struct dma_pte *pte)
193 return (pte->val & VTD_PAGE_MASK);
196 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
198 pte->val |= (addr & VTD_PAGE_MASK);
201 static inline bool dma_pte_present(struct dma_pte *pte)
203 return (pte->val & 3) != 0;
206 /* devices under the same p2p bridge are owned in one domain */
207 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
209 /* domain represents a virtual machine, more than one devices
210 * across iommus may be owned in one domain, e.g. kvm guest.
212 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
215 int id; /* domain id */
216 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
218 struct list_head devices; /* all devices' list */
219 struct iova_domain iovad; /* iova's that belong to this domain */
221 struct dma_pte *pgd; /* virtual address */
222 spinlock_t mapping_lock; /* page table lock */
223 int gaw; /* max guest address width */
225 /* adjusted guest address width, 0 is level 2 30-bit */
228 int flags; /* flags to find out type of domain */
230 int iommu_coherency;/* indicate coherency of iommu access */
231 int iommu_count; /* reference count of iommu */
232 spinlock_t iommu_lock; /* protect iommu set in domain */
235 /* PCI domain-device relationship */
236 struct device_domain_info {
237 struct list_head link; /* link to domain siblings */
238 struct list_head global; /* link to global list */
239 u8 bus; /* PCI bus numer */
240 u8 devfn; /* PCI devfn number */
241 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
242 struct dmar_domain *domain; /* pointer to domain */
245 static void flush_unmaps_timeout(unsigned long data);
247 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
249 #define HIGH_WATER_MARK 250
250 struct deferred_flush_tables {
252 struct iova *iova[HIGH_WATER_MARK];
253 struct dmar_domain *domain[HIGH_WATER_MARK];
256 static struct deferred_flush_tables *deferred_flush;
258 /* bitmap for indexing intel_iommus */
259 static int g_num_of_iommus;
261 static DEFINE_SPINLOCK(async_umap_flush_lock);
262 static LIST_HEAD(unmaps_to_do);
265 static long list_size;
267 static void domain_remove_dev_info(struct dmar_domain *domain);
270 static int __initdata dmar_map_gfx = 1;
271 static int dmar_forcedac;
272 static int intel_iommu_strict;
274 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
275 static DEFINE_SPINLOCK(device_domain_lock);
276 static LIST_HEAD(device_domain_list);
278 static int __init intel_iommu_setup(char *str)
283 if (!strncmp(str, "off", 3)) {
285 printk(KERN_INFO"Intel-IOMMU: disabled\n");
286 } else if (!strncmp(str, "igfx_off", 8)) {
289 "Intel-IOMMU: disable GFX device mapping\n");
290 } else if (!strncmp(str, "forcedac", 8)) {
292 "Intel-IOMMU: Forcing DAC for PCI devices\n");
294 } else if (!strncmp(str, "strict", 6)) {
296 "Intel-IOMMU: disable batched IOTLB flush\n");
297 intel_iommu_strict = 1;
300 str += strcspn(str, ",");
306 __setup("intel_iommu=", intel_iommu_setup);
308 static struct kmem_cache *iommu_domain_cache;
309 static struct kmem_cache *iommu_devinfo_cache;
310 static struct kmem_cache *iommu_iova_cache;
312 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
317 /* trying to avoid low memory issues */
318 flags = current->flags & PF_MEMALLOC;
319 current->flags |= PF_MEMALLOC;
320 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
321 current->flags &= (~PF_MEMALLOC | flags);
326 static inline void *alloc_pgtable_page(void)
331 /* trying to avoid low memory issues */
332 flags = current->flags & PF_MEMALLOC;
333 current->flags |= PF_MEMALLOC;
334 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
335 current->flags &= (~PF_MEMALLOC | flags);
339 static inline void free_pgtable_page(void *vaddr)
341 free_page((unsigned long)vaddr);
344 static inline void *alloc_domain_mem(void)
346 return iommu_kmem_cache_alloc(iommu_domain_cache);
349 static void free_domain_mem(void *vaddr)
351 kmem_cache_free(iommu_domain_cache, vaddr);
354 static inline void * alloc_devinfo_mem(void)
356 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
359 static inline void free_devinfo_mem(void *vaddr)
361 kmem_cache_free(iommu_devinfo_cache, vaddr);
364 struct iova *alloc_iova_mem(void)
366 return iommu_kmem_cache_alloc(iommu_iova_cache);
369 void free_iova_mem(struct iova *iova)
371 kmem_cache_free(iommu_iova_cache, iova);
375 static inline int width_to_agaw(int width);
377 /* calculate agaw for each iommu.
378 * "SAGAW" may be different across iommus, use a default agaw, and
379 * get a supported less agaw for iommus that don't support the default agaw.
381 int iommu_calculate_agaw(struct intel_iommu *iommu)
386 sagaw = cap_sagaw(iommu->cap);
387 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
389 if (test_bit(agaw, &sagaw))
396 /* in native case, each domain is related to only one iommu */
397 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
401 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
403 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
404 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
407 return g_iommus[iommu_id];
410 /* "Coherency" capability may be different across iommus */
411 static void domain_update_iommu_coherency(struct dmar_domain *domain)
415 domain->iommu_coherency = 1;
417 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
418 for (; i < g_num_of_iommus; ) {
419 if (!ecap_coherent(g_iommus[i]->ecap)) {
420 domain->iommu_coherency = 0;
423 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
427 static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
429 struct dmar_drhd_unit *drhd = NULL;
432 for_each_drhd_unit(drhd) {
436 for (i = 0; i < drhd->devices_cnt; i++)
437 if (drhd->devices[i]->bus->number == bus &&
438 drhd->devices[i]->devfn == devfn)
441 if (drhd->include_all)
448 /* Gets context entry for a given bus and devfn */
449 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
452 struct root_entry *root;
453 struct context_entry *context;
454 unsigned long phy_addr;
457 spin_lock_irqsave(&iommu->lock, flags);
458 root = &iommu->root_entry[bus];
459 context = get_context_addr_from_root(root);
461 context = (struct context_entry *)alloc_pgtable_page();
463 spin_unlock_irqrestore(&iommu->lock, flags);
466 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
467 phy_addr = virt_to_phys((void *)context);
468 set_root_value(root, phy_addr);
469 set_root_present(root);
470 __iommu_flush_cache(iommu, root, sizeof(*root));
472 spin_unlock_irqrestore(&iommu->lock, flags);
473 return &context[devfn];
476 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
478 struct root_entry *root;
479 struct context_entry *context;
483 spin_lock_irqsave(&iommu->lock, flags);
484 root = &iommu->root_entry[bus];
485 context = get_context_addr_from_root(root);
490 ret = context_present(&context[devfn]);
492 spin_unlock_irqrestore(&iommu->lock, flags);
496 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
498 struct root_entry *root;
499 struct context_entry *context;
502 spin_lock_irqsave(&iommu->lock, flags);
503 root = &iommu->root_entry[bus];
504 context = get_context_addr_from_root(root);
506 context_clear_entry(&context[devfn]);
507 __iommu_flush_cache(iommu, &context[devfn], \
510 spin_unlock_irqrestore(&iommu->lock, flags);
513 static void free_context_table(struct intel_iommu *iommu)
515 struct root_entry *root;
518 struct context_entry *context;
520 spin_lock_irqsave(&iommu->lock, flags);
521 if (!iommu->root_entry) {
524 for (i = 0; i < ROOT_ENTRY_NR; i++) {
525 root = &iommu->root_entry[i];
526 context = get_context_addr_from_root(root);
528 free_pgtable_page(context);
530 free_pgtable_page(iommu->root_entry);
531 iommu->root_entry = NULL;
533 spin_unlock_irqrestore(&iommu->lock, flags);
536 /* page table handling */
537 #define LEVEL_STRIDE (9)
538 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
540 static inline int agaw_to_level(int agaw)
545 static inline int agaw_to_width(int agaw)
547 return 30 + agaw * LEVEL_STRIDE;
551 static inline int width_to_agaw(int width)
553 return (width - 30) / LEVEL_STRIDE;
556 static inline unsigned int level_to_offset_bits(int level)
558 return (12 + (level - 1) * LEVEL_STRIDE);
561 static inline int address_level_offset(u64 addr, int level)
563 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
566 static inline u64 level_mask(int level)
568 return ((u64)-1 << level_to_offset_bits(level));
571 static inline u64 level_size(int level)
573 return ((u64)1 << level_to_offset_bits(level));
576 static inline u64 align_to_level(u64 addr, int level)
578 return ((addr + level_size(level) - 1) & level_mask(level));
581 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
583 int addr_width = agaw_to_width(domain->agaw);
584 struct dma_pte *parent, *pte = NULL;
585 int level = agaw_to_level(domain->agaw);
588 struct intel_iommu *iommu = domain_get_iommu(domain);
590 BUG_ON(!domain->pgd);
592 addr &= (((u64)1) << addr_width) - 1;
593 parent = domain->pgd;
595 spin_lock_irqsave(&domain->mapping_lock, flags);
599 offset = address_level_offset(addr, level);
600 pte = &parent[offset];
604 if (!dma_pte_present(pte)) {
605 tmp_page = alloc_pgtable_page();
608 spin_unlock_irqrestore(&domain->mapping_lock,
612 __iommu_flush_cache(iommu, tmp_page,
614 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
616 * high level table always sets r/w, last level page
617 * table control read/write
619 dma_set_pte_readable(pte);
620 dma_set_pte_writable(pte);
621 __iommu_flush_cache(iommu, pte, sizeof(*pte));
623 parent = phys_to_virt(dma_pte_addr(pte));
627 spin_unlock_irqrestore(&domain->mapping_lock, flags);
631 /* return address's pte at specific level */
632 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
635 struct dma_pte *parent, *pte = NULL;
636 int total = agaw_to_level(domain->agaw);
639 parent = domain->pgd;
640 while (level <= total) {
641 offset = address_level_offset(addr, total);
642 pte = &parent[offset];
646 if (!dma_pte_present(pte))
648 parent = phys_to_virt(dma_pte_addr(pte));
654 /* clear one page's page table */
655 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
657 struct dma_pte *pte = NULL;
658 struct intel_iommu *iommu = domain_get_iommu(domain);
660 /* get last level pte */
661 pte = dma_addr_level_pte(domain, addr, 1);
665 __iommu_flush_cache(iommu, pte, sizeof(*pte));
669 /* clear last level pte, a tlb flush should be followed */
670 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
672 int addr_width = agaw_to_width(domain->agaw);
674 start &= (((u64)1) << addr_width) - 1;
675 end &= (((u64)1) << addr_width) - 1;
676 /* in case it's partial page */
677 start = PAGE_ALIGN(start);
680 /* we don't need lock here, nobody else touches the iova range */
681 while (start < end) {
682 dma_pte_clear_one(domain, start);
683 start += VTD_PAGE_SIZE;
687 /* free page table pages. last level pte should already be cleared */
688 static void dma_pte_free_pagetable(struct dmar_domain *domain,
691 int addr_width = agaw_to_width(domain->agaw);
693 int total = agaw_to_level(domain->agaw);
696 struct intel_iommu *iommu = domain_get_iommu(domain);
698 start &= (((u64)1) << addr_width) - 1;
699 end &= (((u64)1) << addr_width) - 1;
701 /* we don't need lock here, nobody else touches the iova range */
703 while (level <= total) {
704 tmp = align_to_level(start, level);
705 if (tmp >= end || (tmp + level_size(level) > end))
709 pte = dma_addr_level_pte(domain, tmp, level);
712 phys_to_virt(dma_pte_addr(pte)));
714 __iommu_flush_cache(iommu,
717 tmp += level_size(level);
722 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
723 free_pgtable_page(domain->pgd);
729 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
731 struct root_entry *root;
734 root = (struct root_entry *)alloc_pgtable_page();
738 __iommu_flush_cache(iommu, root, ROOT_SIZE);
740 spin_lock_irqsave(&iommu->lock, flags);
741 iommu->root_entry = root;
742 spin_unlock_irqrestore(&iommu->lock, flags);
747 static void iommu_set_root_entry(struct intel_iommu *iommu)
753 addr = iommu->root_entry;
755 spin_lock_irqsave(&iommu->register_lock, flag);
756 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
758 cmd = iommu->gcmd | DMA_GCMD_SRTP;
759 writel(cmd, iommu->reg + DMAR_GCMD_REG);
761 /* Make sure hardware complete it */
762 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
763 readl, (sts & DMA_GSTS_RTPS), sts);
765 spin_unlock_irqrestore(&iommu->register_lock, flag);
768 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
773 if (!cap_rwbf(iommu->cap))
775 val = iommu->gcmd | DMA_GCMD_WBF;
777 spin_lock_irqsave(&iommu->register_lock, flag);
778 writel(val, iommu->reg + DMAR_GCMD_REG);
780 /* Make sure hardware complete it */
781 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
782 readl, (!(val & DMA_GSTS_WBFS)), val);
784 spin_unlock_irqrestore(&iommu->register_lock, flag);
787 /* return value determine if we need a write buffer flush */
788 static int __iommu_flush_context(struct intel_iommu *iommu,
789 u16 did, u16 source_id, u8 function_mask, u64 type,
790 int non_present_entry_flush)
796 * In the non-present entry flush case, if hardware doesn't cache
797 * non-present entry we do nothing and if hardware cache non-present
798 * entry, we flush entries of domain 0 (the domain id is used to cache
799 * any non-present entries)
801 if (non_present_entry_flush) {
802 if (!cap_caching_mode(iommu->cap))
809 case DMA_CCMD_GLOBAL_INVL:
810 val = DMA_CCMD_GLOBAL_INVL;
812 case DMA_CCMD_DOMAIN_INVL:
813 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
815 case DMA_CCMD_DEVICE_INVL:
816 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
817 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
824 spin_lock_irqsave(&iommu->register_lock, flag);
825 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
827 /* Make sure hardware complete it */
828 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
829 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
831 spin_unlock_irqrestore(&iommu->register_lock, flag);
833 /* flush context entry will implicitly flush write buffer */
837 /* return value determine if we need a write buffer flush */
838 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
839 u64 addr, unsigned int size_order, u64 type,
840 int non_present_entry_flush)
842 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
843 u64 val = 0, val_iva = 0;
847 * In the non-present entry flush case, if hardware doesn't cache
848 * non-present entry we do nothing and if hardware cache non-present
849 * entry, we flush entries of domain 0 (the domain id is used to cache
850 * any non-present entries)
852 if (non_present_entry_flush) {
853 if (!cap_caching_mode(iommu->cap))
860 case DMA_TLB_GLOBAL_FLUSH:
861 /* global flush doesn't need set IVA_REG */
862 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
864 case DMA_TLB_DSI_FLUSH:
865 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
867 case DMA_TLB_PSI_FLUSH:
868 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
869 /* Note: always flush non-leaf currently */
870 val_iva = size_order | addr;
875 /* Note: set drain read/write */
878 * This is probably to be super secure.. Looks like we can
879 * ignore it without any impact.
881 if (cap_read_drain(iommu->cap))
882 val |= DMA_TLB_READ_DRAIN;
884 if (cap_write_drain(iommu->cap))
885 val |= DMA_TLB_WRITE_DRAIN;
887 spin_lock_irqsave(&iommu->register_lock, flag);
888 /* Note: Only uses first TLB reg currently */
890 dmar_writeq(iommu->reg + tlb_offset, val_iva);
891 dmar_writeq(iommu->reg + tlb_offset + 8, val);
893 /* Make sure hardware complete it */
894 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
895 dmar_readq, (!(val & DMA_TLB_IVT)), val);
897 spin_unlock_irqrestore(&iommu->register_lock, flag);
899 /* check IOTLB invalidation granularity */
900 if (DMA_TLB_IAIG(val) == 0)
901 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
902 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
903 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
904 (unsigned long long)DMA_TLB_IIRG(type),
905 (unsigned long long)DMA_TLB_IAIG(val));
906 /* flush iotlb entry will implicitly flush write buffer */
910 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
911 u64 addr, unsigned int pages, int non_present_entry_flush)
915 BUG_ON(addr & (~VTD_PAGE_MASK));
918 /* Fallback to domain selective flush if no PSI support */
919 if (!cap_pgsel_inv(iommu->cap))
920 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
922 non_present_entry_flush);
925 * PSI requires page size to be 2 ^ x, and the base address is naturally
926 * aligned to the size
928 mask = ilog2(__roundup_pow_of_two(pages));
929 /* Fallback to domain selective flush if size is too big */
930 if (mask > cap_max_amask_val(iommu->cap))
931 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
932 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
934 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
936 non_present_entry_flush);
939 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
944 spin_lock_irqsave(&iommu->register_lock, flags);
945 pmen = readl(iommu->reg + DMAR_PMEN_REG);
946 pmen &= ~DMA_PMEN_EPM;
947 writel(pmen, iommu->reg + DMAR_PMEN_REG);
949 /* wait for the protected region status bit to clear */
950 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
951 readl, !(pmen & DMA_PMEN_PRS), pmen);
953 spin_unlock_irqrestore(&iommu->register_lock, flags);
956 static int iommu_enable_translation(struct intel_iommu *iommu)
961 spin_lock_irqsave(&iommu->register_lock, flags);
962 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
964 /* Make sure hardware complete it */
965 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
966 readl, (sts & DMA_GSTS_TES), sts);
968 iommu->gcmd |= DMA_GCMD_TE;
969 spin_unlock_irqrestore(&iommu->register_lock, flags);
973 static int iommu_disable_translation(struct intel_iommu *iommu)
978 spin_lock_irqsave(&iommu->register_lock, flag);
979 iommu->gcmd &= ~DMA_GCMD_TE;
980 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
982 /* Make sure hardware complete it */
983 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
984 readl, (!(sts & DMA_GSTS_TES)), sts);
986 spin_unlock_irqrestore(&iommu->register_lock, flag);
990 /* iommu interrupt handling. Most stuff are MSI-like. */
992 static const char *fault_reason_strings[] =
995 "Present bit in root entry is clear",
996 "Present bit in context entry is clear",
997 "Invalid context entry",
998 "Access beyond MGAW",
999 "PTE Write access is not set",
1000 "PTE Read access is not set",
1001 "Next page table ptr is invalid",
1002 "Root table address invalid",
1003 "Context table ptr is invalid",
1004 "non-zero reserved fields in RTP",
1005 "non-zero reserved fields in CTP",
1006 "non-zero reserved fields in PTE",
1008 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1010 const char *dmar_get_fault_reason(u8 fault_reason)
1012 if (fault_reason > MAX_FAULT_REASON_IDX)
1015 return fault_reason_strings[fault_reason];
1018 void dmar_msi_unmask(unsigned int irq)
1020 struct intel_iommu *iommu = get_irq_data(irq);
1024 spin_lock_irqsave(&iommu->register_lock, flag);
1025 writel(0, iommu->reg + DMAR_FECTL_REG);
1026 /* Read a reg to force flush the post write */
1027 readl(iommu->reg + DMAR_FECTL_REG);
1028 spin_unlock_irqrestore(&iommu->register_lock, flag);
1031 void dmar_msi_mask(unsigned int irq)
1034 struct intel_iommu *iommu = get_irq_data(irq);
1037 spin_lock_irqsave(&iommu->register_lock, flag);
1038 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1039 /* Read a reg to force flush the post write */
1040 readl(iommu->reg + DMAR_FECTL_REG);
1041 spin_unlock_irqrestore(&iommu->register_lock, flag);
1044 void dmar_msi_write(int irq, struct msi_msg *msg)
1046 struct intel_iommu *iommu = get_irq_data(irq);
1049 spin_lock_irqsave(&iommu->register_lock, flag);
1050 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1051 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1052 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1053 spin_unlock_irqrestore(&iommu->register_lock, flag);
1056 void dmar_msi_read(int irq, struct msi_msg *msg)
1058 struct intel_iommu *iommu = get_irq_data(irq);
1061 spin_lock_irqsave(&iommu->register_lock, flag);
1062 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1063 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1064 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1065 spin_unlock_irqrestore(&iommu->register_lock, flag);
1068 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
1069 u8 fault_reason, u16 source_id, unsigned long long addr)
1073 reason = dmar_get_fault_reason(fault_reason);
1076 "DMAR:[%s] Request device [%02x:%02x.%d] "
1077 "fault addr %llx \n"
1078 "DMAR:[fault reason %02d] %s\n",
1079 (type ? "DMA Read" : "DMA Write"),
1080 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1081 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1085 #define PRIMARY_FAULT_REG_LEN (16)
1086 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1088 struct intel_iommu *iommu = dev_id;
1089 int reg, fault_index;
1093 spin_lock_irqsave(&iommu->register_lock, flag);
1094 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1096 /* TBD: ignore advanced fault log currently */
1097 if (!(fault_status & DMA_FSTS_PPF))
1098 goto clear_overflow;
1100 fault_index = dma_fsts_fault_record_index(fault_status);
1101 reg = cap_fault_reg_offset(iommu->cap);
1109 /* highest 32 bits */
1110 data = readl(iommu->reg + reg +
1111 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1112 if (!(data & DMA_FRCD_F))
1115 fault_reason = dma_frcd_fault_reason(data);
1116 type = dma_frcd_type(data);
1118 data = readl(iommu->reg + reg +
1119 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1120 source_id = dma_frcd_source_id(data);
1122 guest_addr = dmar_readq(iommu->reg + reg +
1123 fault_index * PRIMARY_FAULT_REG_LEN);
1124 guest_addr = dma_frcd_page_addr(guest_addr);
1125 /* clear the fault */
1126 writel(DMA_FRCD_F, iommu->reg + reg +
1127 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1129 spin_unlock_irqrestore(&iommu->register_lock, flag);
1131 iommu_page_fault_do_one(iommu, type, fault_reason,
1132 source_id, guest_addr);
1135 if (fault_index > cap_num_fault_regs(iommu->cap))
1137 spin_lock_irqsave(&iommu->register_lock, flag);
1140 /* clear primary fault overflow */
1141 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1142 if (fault_status & DMA_FSTS_PFO)
1143 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1145 spin_unlock_irqrestore(&iommu->register_lock, flag);
1149 int dmar_set_interrupt(struct intel_iommu *iommu)
1155 printk(KERN_ERR "IOMMU: no free vectors\n");
1159 set_irq_data(irq, iommu);
1162 ret = arch_setup_dmar_msi(irq);
1164 set_irq_data(irq, NULL);
1170 /* Force fault register is cleared */
1171 iommu_page_fault(irq, iommu);
1173 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1175 printk(KERN_ERR "IOMMU: can't request irq\n");
1179 static int iommu_init_domains(struct intel_iommu *iommu)
1181 unsigned long ndomains;
1182 unsigned long nlongs;
1184 ndomains = cap_ndoms(iommu->cap);
1185 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1186 nlongs = BITS_TO_LONGS(ndomains);
1188 /* TBD: there might be 64K domains,
1189 * consider other allocation for future chip
1191 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1192 if (!iommu->domain_ids) {
1193 printk(KERN_ERR "Allocating domain id array failed\n");
1196 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1198 if (!iommu->domains) {
1199 printk(KERN_ERR "Allocating domain array failed\n");
1200 kfree(iommu->domain_ids);
1204 spin_lock_init(&iommu->lock);
1207 * if Caching mode is set, then invalid translations are tagged
1208 * with domainid 0. Hence we need to pre-allocate it.
1210 if (cap_caching_mode(iommu->cap))
1211 set_bit(0, iommu->domain_ids);
1216 static void domain_exit(struct dmar_domain *domain);
1218 void free_dmar_iommu(struct intel_iommu *iommu)
1220 struct dmar_domain *domain;
1222 unsigned long flags;
1224 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1225 for (; i < cap_ndoms(iommu->cap); ) {
1226 domain = iommu->domains[i];
1227 clear_bit(i, iommu->domain_ids);
1229 spin_lock_irqsave(&domain->iommu_lock, flags);
1230 if (--domain->iommu_count == 0)
1231 domain_exit(domain);
1232 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1234 i = find_next_bit(iommu->domain_ids,
1235 cap_ndoms(iommu->cap), i+1);
1238 if (iommu->gcmd & DMA_GCMD_TE)
1239 iommu_disable_translation(iommu);
1242 set_irq_data(iommu->irq, NULL);
1243 /* This will mask the irq */
1244 free_irq(iommu->irq, iommu);
1245 destroy_irq(iommu->irq);
1248 kfree(iommu->domains);
1249 kfree(iommu->domain_ids);
1251 g_iommus[iommu->seq_id] = NULL;
1253 /* if all iommus are freed, free g_iommus */
1254 for (i = 0; i < g_num_of_iommus; i++) {
1259 if (i == g_num_of_iommus)
1262 /* free context mapping */
1263 free_context_table(iommu);
1266 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1269 unsigned long ndomains;
1270 struct dmar_domain *domain;
1271 unsigned long flags;
1273 domain = alloc_domain_mem();
1277 ndomains = cap_ndoms(iommu->cap);
1279 spin_lock_irqsave(&iommu->lock, flags);
1280 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1281 if (num >= ndomains) {
1282 spin_unlock_irqrestore(&iommu->lock, flags);
1283 free_domain_mem(domain);
1284 printk(KERN_ERR "IOMMU: no free domain ids\n");
1288 set_bit(num, iommu->domain_ids);
1290 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1291 set_bit(iommu->seq_id, &domain->iommu_bmp);
1293 iommu->domains[num] = domain;
1294 spin_unlock_irqrestore(&iommu->lock, flags);
1299 static void iommu_free_domain(struct dmar_domain *domain)
1301 unsigned long flags;
1302 struct intel_iommu *iommu;
1304 iommu = domain_get_iommu(domain);
1306 spin_lock_irqsave(&iommu->lock, flags);
1307 clear_bit(domain->id, iommu->domain_ids);
1308 spin_unlock_irqrestore(&iommu->lock, flags);
1311 static struct iova_domain reserved_iova_list;
1312 static struct lock_class_key reserved_alloc_key;
1313 static struct lock_class_key reserved_rbtree_key;
1315 static void dmar_init_reserved_ranges(void)
1317 struct pci_dev *pdev = NULL;
1322 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1324 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1325 &reserved_alloc_key);
1326 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1327 &reserved_rbtree_key);
1329 /* IOAPIC ranges shouldn't be accessed by DMA */
1330 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1331 IOVA_PFN(IOAPIC_RANGE_END));
1333 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1335 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1336 for_each_pci_dev(pdev) {
1339 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1340 r = &pdev->resource[i];
1341 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1345 size = r->end - addr;
1346 size = PAGE_ALIGN(size);
1347 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1348 IOVA_PFN(size + addr) - 1);
1350 printk(KERN_ERR "Reserve iova failed\n");
1356 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1358 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1361 static inline int guestwidth_to_adjustwidth(int gaw)
1364 int r = (gaw - 12) % 9;
1375 static int domain_init(struct dmar_domain *domain, int guest_width)
1377 struct intel_iommu *iommu;
1378 int adjust_width, agaw;
1379 unsigned long sagaw;
1381 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1382 spin_lock_init(&domain->mapping_lock);
1383 spin_lock_init(&domain->iommu_lock);
1385 domain_reserve_special_ranges(domain);
1387 /* calculate AGAW */
1388 iommu = domain_get_iommu(domain);
1389 if (guest_width > cap_mgaw(iommu->cap))
1390 guest_width = cap_mgaw(iommu->cap);
1391 domain->gaw = guest_width;
1392 adjust_width = guestwidth_to_adjustwidth(guest_width);
1393 agaw = width_to_agaw(adjust_width);
1394 sagaw = cap_sagaw(iommu->cap);
1395 if (!test_bit(agaw, &sagaw)) {
1396 /* hardware doesn't support it, choose a bigger one */
1397 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1398 agaw = find_next_bit(&sagaw, 5, agaw);
1402 domain->agaw = agaw;
1403 INIT_LIST_HEAD(&domain->devices);
1405 if (ecap_coherent(iommu->ecap))
1406 domain->iommu_coherency = 1;
1408 domain->iommu_coherency = 0;
1410 domain->iommu_count = 1;
1412 /* always allocate the top pgd */
1413 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1416 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1420 static void domain_exit(struct dmar_domain *domain)
1424 /* Domain 0 is reserved, so dont process it */
1428 domain_remove_dev_info(domain);
1430 put_iova_domain(&domain->iovad);
1431 end = DOMAIN_MAX_ADDR(domain->gaw);
1432 end = end & (~PAGE_MASK);
1435 dma_pte_clear_range(domain, 0, end);
1437 /* free page tables */
1438 dma_pte_free_pagetable(domain, 0, end);
1440 iommu_free_domain(domain);
1441 free_domain_mem(domain);
1444 static int domain_context_mapping_one(struct dmar_domain *domain,
1447 struct context_entry *context;
1448 struct intel_iommu *iommu = domain_get_iommu(domain);
1449 unsigned long flags;
1451 pr_debug("Set context mapping for %02x:%02x.%d\n",
1452 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1453 BUG_ON(!domain->pgd);
1454 context = device_to_context_entry(iommu, bus, devfn);
1457 spin_lock_irqsave(&iommu->lock, flags);
1458 if (context_present(context)) {
1459 spin_unlock_irqrestore(&iommu->lock, flags);
1463 context_set_domain_id(context, domain->id);
1464 context_set_address_width(context, domain->agaw);
1465 context_set_address_root(context, virt_to_phys(domain->pgd));
1466 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1467 context_set_fault_enable(context);
1468 context_set_present(context);
1469 __iommu_flush_cache(iommu, context, sizeof(*context));
1471 /* it's a non-present to present mapping */
1472 if (iommu->flush.flush_context(iommu, domain->id,
1473 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1474 DMA_CCMD_DEVICE_INVL, 1))
1475 iommu_flush_write_buffer(iommu);
1477 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1479 spin_unlock_irqrestore(&iommu->lock, flags);
1481 spin_lock_irqsave(&domain->iommu_lock, flags);
1482 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1483 domain->iommu_count++;
1484 domain_update_iommu_coherency(domain);
1486 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1491 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1494 struct pci_dev *tmp, *parent;
1496 ret = domain_context_mapping_one(domain, pdev->bus->number,
1501 /* dependent device mapping */
1502 tmp = pci_find_upstream_pcie_bridge(pdev);
1505 /* Secondary interface's bus number and devfn 0 */
1506 parent = pdev->bus->self;
1507 while (parent != tmp) {
1508 ret = domain_context_mapping_one(domain, parent->bus->number,
1512 parent = parent->bus->self;
1514 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1515 return domain_context_mapping_one(domain,
1516 tmp->subordinate->number, 0);
1517 else /* this is a legacy PCI bridge */
1518 return domain_context_mapping_one(domain,
1519 tmp->bus->number, tmp->devfn);
1522 static int domain_context_mapped(struct dmar_domain *domain,
1523 struct pci_dev *pdev)
1526 struct pci_dev *tmp, *parent;
1527 struct intel_iommu *iommu = domain_get_iommu(domain);
1529 ret = device_context_mapped(iommu,
1530 pdev->bus->number, pdev->devfn);
1533 /* dependent device mapping */
1534 tmp = pci_find_upstream_pcie_bridge(pdev);
1537 /* Secondary interface's bus number and devfn 0 */
1538 parent = pdev->bus->self;
1539 while (parent != tmp) {
1540 ret = device_context_mapped(iommu, parent->bus->number,
1544 parent = parent->bus->self;
1547 return device_context_mapped(iommu,
1548 tmp->subordinate->number, 0);
1550 return device_context_mapped(iommu,
1551 tmp->bus->number, tmp->devfn);
1555 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1556 u64 hpa, size_t size, int prot)
1558 u64 start_pfn, end_pfn;
1559 struct dma_pte *pte;
1561 int addr_width = agaw_to_width(domain->agaw);
1562 struct intel_iommu *iommu = domain_get_iommu(domain);
1564 hpa &= (((u64)1) << addr_width) - 1;
1566 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1569 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1570 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1572 while (start_pfn < end_pfn) {
1573 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1576 /* We don't need lock here, nobody else
1577 * touches the iova range
1579 BUG_ON(dma_pte_addr(pte));
1580 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1581 dma_set_pte_prot(pte, prot);
1582 __iommu_flush_cache(iommu, pte, sizeof(*pte));
1589 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1594 clear_context_table(iommu, bus, devfn);
1595 iommu->flush.flush_context(iommu, 0, 0, 0,
1596 DMA_CCMD_GLOBAL_INVL, 0);
1597 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1598 DMA_TLB_GLOBAL_FLUSH, 0);
1601 static void domain_remove_dev_info(struct dmar_domain *domain)
1603 struct device_domain_info *info;
1604 unsigned long flags;
1605 struct intel_iommu *iommu;
1607 spin_lock_irqsave(&device_domain_lock, flags);
1608 while (!list_empty(&domain->devices)) {
1609 info = list_entry(domain->devices.next,
1610 struct device_domain_info, link);
1611 list_del(&info->link);
1612 list_del(&info->global);
1614 info->dev->dev.archdata.iommu = NULL;
1615 spin_unlock_irqrestore(&device_domain_lock, flags);
1617 iommu = device_to_iommu(info->bus, info->devfn);
1618 iommu_detach_dev(iommu, info->bus, info->devfn);
1619 free_devinfo_mem(info);
1621 spin_lock_irqsave(&device_domain_lock, flags);
1623 spin_unlock_irqrestore(&device_domain_lock, flags);
1628 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1630 static struct dmar_domain *
1631 find_domain(struct pci_dev *pdev)
1633 struct device_domain_info *info;
1635 /* No lock here, assumes no domain exit in normal case */
1636 info = pdev->dev.archdata.iommu;
1638 return info->domain;
1642 /* domain is initialized */
1643 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1645 struct dmar_domain *domain, *found = NULL;
1646 struct intel_iommu *iommu;
1647 struct dmar_drhd_unit *drhd;
1648 struct device_domain_info *info, *tmp;
1649 struct pci_dev *dev_tmp;
1650 unsigned long flags;
1651 int bus = 0, devfn = 0;
1653 domain = find_domain(pdev);
1657 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1659 if (dev_tmp->is_pcie) {
1660 bus = dev_tmp->subordinate->number;
1663 bus = dev_tmp->bus->number;
1664 devfn = dev_tmp->devfn;
1666 spin_lock_irqsave(&device_domain_lock, flags);
1667 list_for_each_entry(info, &device_domain_list, global) {
1668 if (info->bus == bus && info->devfn == devfn) {
1669 found = info->domain;
1673 spin_unlock_irqrestore(&device_domain_lock, flags);
1674 /* pcie-pci bridge already has a domain, uses it */
1681 /* Allocate new domain for the device */
1682 drhd = dmar_find_matched_drhd_unit(pdev);
1684 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1688 iommu = drhd->iommu;
1690 domain = iommu_alloc_domain(iommu);
1694 if (domain_init(domain, gaw)) {
1695 domain_exit(domain);
1699 /* register pcie-to-pci device */
1701 info = alloc_devinfo_mem();
1703 domain_exit(domain);
1707 info->devfn = devfn;
1709 info->domain = domain;
1710 /* This domain is shared by devices under p2p bridge */
1711 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1713 /* pcie-to-pci bridge already has a domain, uses it */
1715 spin_lock_irqsave(&device_domain_lock, flags);
1716 list_for_each_entry(tmp, &device_domain_list, global) {
1717 if (tmp->bus == bus && tmp->devfn == devfn) {
1718 found = tmp->domain;
1723 free_devinfo_mem(info);
1724 domain_exit(domain);
1727 list_add(&info->link, &domain->devices);
1728 list_add(&info->global, &device_domain_list);
1730 spin_unlock_irqrestore(&device_domain_lock, flags);
1734 info = alloc_devinfo_mem();
1737 info->bus = pdev->bus->number;
1738 info->devfn = pdev->devfn;
1740 info->domain = domain;
1741 spin_lock_irqsave(&device_domain_lock, flags);
1742 /* somebody is fast */
1743 found = find_domain(pdev);
1744 if (found != NULL) {
1745 spin_unlock_irqrestore(&device_domain_lock, flags);
1746 if (found != domain) {
1747 domain_exit(domain);
1750 free_devinfo_mem(info);
1753 list_add(&info->link, &domain->devices);
1754 list_add(&info->global, &device_domain_list);
1755 pdev->dev.archdata.iommu = info;
1756 spin_unlock_irqrestore(&device_domain_lock, flags);
1759 /* recheck it here, maybe others set it */
1760 return find_domain(pdev);
1763 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1764 unsigned long long start,
1765 unsigned long long end)
1767 struct dmar_domain *domain;
1769 unsigned long long base;
1773 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1774 pci_name(pdev), start, end);
1775 /* page table init */
1776 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1780 /* The address might not be aligned */
1781 base = start & PAGE_MASK;
1783 size = PAGE_ALIGN(size);
1784 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1785 IOVA_PFN(base + size) - 1)) {
1786 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1791 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1792 size, base, pci_name(pdev));
1794 * RMRR range might have overlap with physical memory range,
1797 dma_pte_clear_range(domain, base, base + size);
1799 ret = domain_page_mapping(domain, base, base, size,
1800 DMA_PTE_READ|DMA_PTE_WRITE);
1804 /* context entry init */
1805 ret = domain_context_mapping(domain, pdev);
1809 domain_exit(domain);
1814 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1815 struct pci_dev *pdev)
1817 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1819 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1820 rmrr->end_address + 1);
1823 #ifdef CONFIG_DMAR_GFX_WA
1824 struct iommu_prepare_data {
1825 struct pci_dev *pdev;
1829 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1830 unsigned long end_pfn, void *datax)
1832 struct iommu_prepare_data *data;
1834 data = (struct iommu_prepare_data *)datax;
1836 data->ret = iommu_prepare_identity_map(data->pdev,
1837 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1842 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1845 struct iommu_prepare_data data;
1850 for_each_online_node(nid) {
1851 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1858 static void __init iommu_prepare_gfx_mapping(void)
1860 struct pci_dev *pdev = NULL;
1863 for_each_pci_dev(pdev) {
1864 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1865 !IS_GFX_DEVICE(pdev))
1867 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1869 ret = iommu_prepare_with_active_regions(pdev);
1871 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1874 #else /* !CONFIG_DMAR_GFX_WA */
1875 static inline void iommu_prepare_gfx_mapping(void)
1881 #ifdef CONFIG_DMAR_FLOPPY_WA
1882 static inline void iommu_prepare_isa(void)
1884 struct pci_dev *pdev;
1887 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1891 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1892 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1895 printk("IOMMU: Failed to create 0-64M identity map, "
1896 "floppy might not work\n");
1900 static inline void iommu_prepare_isa(void)
1904 #endif /* !CONFIG_DMAR_FLPY_WA */
1906 static int __init init_dmars(void)
1908 struct dmar_drhd_unit *drhd;
1909 struct dmar_rmrr_unit *rmrr;
1910 struct pci_dev *pdev;
1911 struct intel_iommu *iommu;
1912 int i, ret, unit = 0;
1917 * initialize and program root entry to not present
1920 for_each_drhd_unit(drhd) {
1923 * lock not needed as this is only incremented in the single
1924 * threaded kernel __init code path all other access are read
1929 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1932 printk(KERN_ERR "Allocating global iommu array failed\n");
1937 deferred_flush = kzalloc(g_num_of_iommus *
1938 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1939 if (!deferred_flush) {
1945 for_each_drhd_unit(drhd) {
1949 iommu = drhd->iommu;
1950 g_iommus[iommu->seq_id] = iommu;
1952 ret = iommu_init_domains(iommu);
1958 * we could share the same root & context tables
1959 * amoung all IOMMU's. Need to Split it later.
1961 ret = iommu_alloc_root_entry(iommu);
1963 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1968 for_each_drhd_unit(drhd) {
1972 iommu = drhd->iommu;
1973 if (dmar_enable_qi(iommu)) {
1975 * Queued Invalidate not enabled, use Register Based
1978 iommu->flush.flush_context = __iommu_flush_context;
1979 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1980 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1982 (unsigned long long)drhd->reg_base_addr);
1984 iommu->flush.flush_context = qi_flush_context;
1985 iommu->flush.flush_iotlb = qi_flush_iotlb;
1986 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1988 (unsigned long long)drhd->reg_base_addr);
1994 * for each dev attached to rmrr
1996 * locate drhd for dev, alloc domain for dev
1997 * allocate free domain
1998 * allocate page table entries for rmrr
1999 * if context not allocated for bus
2000 * allocate and init context
2001 * set present in root table for this bus
2002 * init context with domain, translation etc
2006 for_each_rmrr_units(rmrr) {
2007 for (i = 0; i < rmrr->devices_cnt; i++) {
2008 pdev = rmrr->devices[i];
2009 /* some BIOS lists non-exist devices in DMAR table */
2012 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2015 "IOMMU: mapping reserved region failed\n");
2019 iommu_prepare_gfx_mapping();
2021 iommu_prepare_isa();
2026 * global invalidate context cache
2027 * global invalidate iotlb
2028 * enable translation
2030 for_each_drhd_unit(drhd) {
2033 iommu = drhd->iommu;
2034 sprintf (iommu->name, "dmar%d", unit++);
2036 iommu_flush_write_buffer(iommu);
2038 ret = dmar_set_interrupt(iommu);
2042 iommu_set_root_entry(iommu);
2044 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2046 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2048 iommu_disable_protect_mem_regions(iommu);
2050 ret = iommu_enable_translation(iommu);
2057 for_each_drhd_unit(drhd) {
2060 iommu = drhd->iommu;
2067 static inline u64 aligned_size(u64 host_addr, size_t size)
2070 addr = (host_addr & (~PAGE_MASK)) + size;
2071 return PAGE_ALIGN(addr);
2075 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2079 /* Make sure it's in range */
2080 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2081 if (!size || (IOVA_START_ADDR + size > end))
2084 piova = alloc_iova(&domain->iovad,
2085 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2089 static struct iova *
2090 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2091 size_t size, u64 dma_mask)
2093 struct pci_dev *pdev = to_pci_dev(dev);
2094 struct iova *iova = NULL;
2096 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2097 iova = iommu_alloc_iova(domain, size, dma_mask);
2100 * First try to allocate an io virtual address in
2101 * DMA_32BIT_MASK and if that fails then try allocating
2104 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2106 iova = iommu_alloc_iova(domain, size, dma_mask);
2110 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2117 static struct dmar_domain *
2118 get_valid_domain_for_dev(struct pci_dev *pdev)
2120 struct dmar_domain *domain;
2123 domain = get_domain_for_dev(pdev,
2124 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2127 "Allocating domain for %s failed", pci_name(pdev));
2131 /* make sure context mapping is ok */
2132 if (unlikely(!domain_context_mapped(domain, pdev))) {
2133 ret = domain_context_mapping(domain, pdev);
2136 "Domain context map for %s failed",
2145 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2146 size_t size, int dir, u64 dma_mask)
2148 struct pci_dev *pdev = to_pci_dev(hwdev);
2149 struct dmar_domain *domain;
2150 phys_addr_t start_paddr;
2154 struct intel_iommu *iommu;
2156 BUG_ON(dir == DMA_NONE);
2157 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2160 domain = get_valid_domain_for_dev(pdev);
2164 iommu = domain_get_iommu(domain);
2165 size = aligned_size((u64)paddr, size);
2167 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2171 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2174 * Check if DMAR supports zero-length reads on write only
2177 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2178 !cap_zlr(iommu->cap))
2179 prot |= DMA_PTE_READ;
2180 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2181 prot |= DMA_PTE_WRITE;
2183 * paddr - (paddr + size) might be partial page, we should map the whole
2184 * page. Note: if two part of one page are separately mapped, we
2185 * might have two guest_addr mapping to the same host paddr, but this
2186 * is not a big problem
2188 ret = domain_page_mapping(domain, start_paddr,
2189 ((u64)paddr) & PAGE_MASK, size, prot);
2193 /* it's a non-present to present mapping */
2194 ret = iommu_flush_iotlb_psi(iommu, domain->id,
2195 start_paddr, size >> VTD_PAGE_SHIFT, 1);
2197 iommu_flush_write_buffer(iommu);
2199 return start_paddr + ((u64)paddr & (~PAGE_MASK));
2203 __free_iova(&domain->iovad, iova);
2204 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2205 pci_name(pdev), size, (unsigned long long)paddr, dir);
2209 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2210 size_t size, int dir)
2212 return __intel_map_single(hwdev, paddr, size, dir,
2213 to_pci_dev(hwdev)->dma_mask);
2216 static void flush_unmaps(void)
2222 /* just flush them all */
2223 for (i = 0; i < g_num_of_iommus; i++) {
2224 struct intel_iommu *iommu = g_iommus[i];
2228 if (deferred_flush[i].next) {
2229 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2230 DMA_TLB_GLOBAL_FLUSH, 0);
2231 for (j = 0; j < deferred_flush[i].next; j++) {
2232 __free_iova(&deferred_flush[i].domain[j]->iovad,
2233 deferred_flush[i].iova[j]);
2235 deferred_flush[i].next = 0;
2242 static void flush_unmaps_timeout(unsigned long data)
2244 unsigned long flags;
2246 spin_lock_irqsave(&async_umap_flush_lock, flags);
2248 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2251 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2253 unsigned long flags;
2255 struct intel_iommu *iommu;
2257 spin_lock_irqsave(&async_umap_flush_lock, flags);
2258 if (list_size == HIGH_WATER_MARK)
2261 iommu = domain_get_iommu(dom);
2262 iommu_id = iommu->seq_id;
2264 next = deferred_flush[iommu_id].next;
2265 deferred_flush[iommu_id].domain[next] = dom;
2266 deferred_flush[iommu_id].iova[next] = iova;
2267 deferred_flush[iommu_id].next++;
2270 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2274 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2277 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2280 struct pci_dev *pdev = to_pci_dev(dev);
2281 struct dmar_domain *domain;
2282 unsigned long start_addr;
2284 struct intel_iommu *iommu;
2286 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2288 domain = find_domain(pdev);
2291 iommu = domain_get_iommu(domain);
2293 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2297 start_addr = iova->pfn_lo << PAGE_SHIFT;
2298 size = aligned_size((u64)dev_addr, size);
2300 pr_debug("Device %s unmapping: %lx@%llx\n",
2301 pci_name(pdev), size, (unsigned long long)start_addr);
2303 /* clear the whole page */
2304 dma_pte_clear_range(domain, start_addr, start_addr + size);
2305 /* free page tables */
2306 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2307 if (intel_iommu_strict) {
2308 if (iommu_flush_iotlb_psi(iommu,
2309 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2310 iommu_flush_write_buffer(iommu);
2312 __free_iova(&domain->iovad, iova);
2314 add_unmap(domain, iova);
2316 * queue up the release of the unmap to save the 1/6th of the
2317 * cpu used up by the iotlb flush operation...
2322 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2323 dma_addr_t *dma_handle, gfp_t flags)
2328 size = PAGE_ALIGN(size);
2329 order = get_order(size);
2330 flags &= ~(GFP_DMA | GFP_DMA32);
2332 vaddr = (void *)__get_free_pages(flags, order);
2335 memset(vaddr, 0, size);
2337 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2339 hwdev->coherent_dma_mask);
2342 free_pages((unsigned long)vaddr, order);
2346 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2347 dma_addr_t dma_handle)
2351 size = PAGE_ALIGN(size);
2352 order = get_order(size);
2354 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2355 free_pages((unsigned long)vaddr, order);
2358 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2360 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2361 int nelems, int dir)
2364 struct pci_dev *pdev = to_pci_dev(hwdev);
2365 struct dmar_domain *domain;
2366 unsigned long start_addr;
2370 struct scatterlist *sg;
2371 struct intel_iommu *iommu;
2373 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2376 domain = find_domain(pdev);
2379 iommu = domain_get_iommu(domain);
2381 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2384 for_each_sg(sglist, sg, nelems, i) {
2385 addr = SG_ENT_VIRT_ADDRESS(sg);
2386 size += aligned_size((u64)addr, sg->length);
2389 start_addr = iova->pfn_lo << PAGE_SHIFT;
2391 /* clear the whole page */
2392 dma_pte_clear_range(domain, start_addr, start_addr + size);
2393 /* free page tables */
2394 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2396 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2397 size >> VTD_PAGE_SHIFT, 0))
2398 iommu_flush_write_buffer(iommu);
2401 __free_iova(&domain->iovad, iova);
2404 static int intel_nontranslate_map_sg(struct device *hddev,
2405 struct scatterlist *sglist, int nelems, int dir)
2408 struct scatterlist *sg;
2410 for_each_sg(sglist, sg, nelems, i) {
2411 BUG_ON(!sg_page(sg));
2412 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2413 sg->dma_length = sg->length;
2418 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2423 struct pci_dev *pdev = to_pci_dev(hwdev);
2424 struct dmar_domain *domain;
2428 struct iova *iova = NULL;
2430 struct scatterlist *sg;
2431 unsigned long start_addr;
2432 struct intel_iommu *iommu;
2434 BUG_ON(dir == DMA_NONE);
2435 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2436 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2438 domain = get_valid_domain_for_dev(pdev);
2442 iommu = domain_get_iommu(domain);
2444 for_each_sg(sglist, sg, nelems, i) {
2445 addr = SG_ENT_VIRT_ADDRESS(sg);
2446 addr = (void *)virt_to_phys(addr);
2447 size += aligned_size((u64)addr, sg->length);
2450 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2452 sglist->dma_length = 0;
2457 * Check if DMAR supports zero-length reads on write only
2460 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2461 !cap_zlr(iommu->cap))
2462 prot |= DMA_PTE_READ;
2463 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2464 prot |= DMA_PTE_WRITE;
2466 start_addr = iova->pfn_lo << PAGE_SHIFT;
2468 for_each_sg(sglist, sg, nelems, i) {
2469 addr = SG_ENT_VIRT_ADDRESS(sg);
2470 addr = (void *)virt_to_phys(addr);
2471 size = aligned_size((u64)addr, sg->length);
2472 ret = domain_page_mapping(domain, start_addr + offset,
2473 ((u64)addr) & PAGE_MASK,
2476 /* clear the page */
2477 dma_pte_clear_range(domain, start_addr,
2478 start_addr + offset);
2479 /* free page tables */
2480 dma_pte_free_pagetable(domain, start_addr,
2481 start_addr + offset);
2483 __free_iova(&domain->iovad, iova);
2486 sg->dma_address = start_addr + offset +
2487 ((u64)addr & (~PAGE_MASK));
2488 sg->dma_length = sg->length;
2492 /* it's a non-present to present mapping */
2493 if (iommu_flush_iotlb_psi(iommu, domain->id,
2494 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2495 iommu_flush_write_buffer(iommu);
2499 static struct dma_mapping_ops intel_dma_ops = {
2500 .alloc_coherent = intel_alloc_coherent,
2501 .free_coherent = intel_free_coherent,
2502 .map_single = intel_map_single,
2503 .unmap_single = intel_unmap_single,
2504 .map_sg = intel_map_sg,
2505 .unmap_sg = intel_unmap_sg,
2508 static inline int iommu_domain_cache_init(void)
2512 iommu_domain_cache = kmem_cache_create("iommu_domain",
2513 sizeof(struct dmar_domain),
2518 if (!iommu_domain_cache) {
2519 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2526 static inline int iommu_devinfo_cache_init(void)
2530 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2531 sizeof(struct device_domain_info),
2535 if (!iommu_devinfo_cache) {
2536 printk(KERN_ERR "Couldn't create devinfo cache\n");
2543 static inline int iommu_iova_cache_init(void)
2547 iommu_iova_cache = kmem_cache_create("iommu_iova",
2548 sizeof(struct iova),
2552 if (!iommu_iova_cache) {
2553 printk(KERN_ERR "Couldn't create iova cache\n");
2560 static int __init iommu_init_mempool(void)
2563 ret = iommu_iova_cache_init();
2567 ret = iommu_domain_cache_init();
2571 ret = iommu_devinfo_cache_init();
2575 kmem_cache_destroy(iommu_domain_cache);
2577 kmem_cache_destroy(iommu_iova_cache);
2582 static void __init iommu_exit_mempool(void)
2584 kmem_cache_destroy(iommu_devinfo_cache);
2585 kmem_cache_destroy(iommu_domain_cache);
2586 kmem_cache_destroy(iommu_iova_cache);
2590 static void __init init_no_remapping_devices(void)
2592 struct dmar_drhd_unit *drhd;
2594 for_each_drhd_unit(drhd) {
2595 if (!drhd->include_all) {
2597 for (i = 0; i < drhd->devices_cnt; i++)
2598 if (drhd->devices[i] != NULL)
2600 /* ignore DMAR unit if no pci devices exist */
2601 if (i == drhd->devices_cnt)
2609 for_each_drhd_unit(drhd) {
2611 if (drhd->ignored || drhd->include_all)
2614 for (i = 0; i < drhd->devices_cnt; i++)
2615 if (drhd->devices[i] &&
2616 !IS_GFX_DEVICE(drhd->devices[i]))
2619 if (i < drhd->devices_cnt)
2622 /* bypass IOMMU if it is just for gfx devices */
2624 for (i = 0; i < drhd->devices_cnt; i++) {
2625 if (!drhd->devices[i])
2627 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2632 int __init intel_iommu_init(void)
2636 if (dmar_table_init())
2639 if (dmar_dev_scope_init())
2643 * Check the need for DMA-remapping initialization now.
2644 * Above initialization will also be used by Interrupt-remapping.
2646 if (no_iommu || swiotlb || dmar_disabled)
2649 iommu_init_mempool();
2650 dmar_init_reserved_ranges();
2652 init_no_remapping_devices();
2656 printk(KERN_ERR "IOMMU: dmar init failed\n");
2657 put_iova_domain(&reserved_iova_list);
2658 iommu_exit_mempool();
2662 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2664 init_timer(&unmap_timer);
2666 dma_ops = &intel_dma_ops;
2670 static int vm_domain_add_dev_info(struct dmar_domain *domain,
2671 struct pci_dev *pdev)
2673 struct device_domain_info *info;
2674 unsigned long flags;
2676 info = alloc_devinfo_mem();
2680 info->bus = pdev->bus->number;
2681 info->devfn = pdev->devfn;
2683 info->domain = domain;
2685 spin_lock_irqsave(&device_domain_lock, flags);
2686 list_add(&info->link, &domain->devices);
2687 list_add(&info->global, &device_domain_list);
2688 pdev->dev.archdata.iommu = info;
2689 spin_unlock_irqrestore(&device_domain_lock, flags);
2694 static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2695 struct pci_dev *pdev)
2697 struct device_domain_info *info;
2698 struct intel_iommu *iommu;
2699 unsigned long flags;
2701 struct list_head *entry, *tmp;
2703 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2707 spin_lock_irqsave(&device_domain_lock, flags);
2708 list_for_each_safe(entry, tmp, &domain->devices) {
2709 info = list_entry(entry, struct device_domain_info, link);
2710 if (info->bus == pdev->bus->number &&
2711 info->devfn == pdev->devfn) {
2712 list_del(&info->link);
2713 list_del(&info->global);
2715 info->dev->dev.archdata.iommu = NULL;
2716 spin_unlock_irqrestore(&device_domain_lock, flags);
2718 iommu_detach_dev(iommu, info->bus, info->devfn);
2719 free_devinfo_mem(info);
2721 spin_lock_irqsave(&device_domain_lock, flags);
2729 /* if there is no other devices under the same iommu
2730 * owned by this domain, clear this iommu in iommu_bmp
2731 * update iommu count and coherency
2733 if (device_to_iommu(info->bus, info->devfn) == iommu)
2738 unsigned long tmp_flags;
2739 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2740 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2741 domain->iommu_count--;
2742 domain_update_iommu_coherency(domain);
2743 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2746 spin_unlock_irqrestore(&device_domain_lock, flags);
2749 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2751 struct device_domain_info *info;
2752 struct intel_iommu *iommu;
2753 unsigned long flags1, flags2;
2755 spin_lock_irqsave(&device_domain_lock, flags1);
2756 while (!list_empty(&domain->devices)) {
2757 info = list_entry(domain->devices.next,
2758 struct device_domain_info, link);
2759 list_del(&info->link);
2760 list_del(&info->global);
2762 info->dev->dev.archdata.iommu = NULL;
2764 spin_unlock_irqrestore(&device_domain_lock, flags1);
2766 iommu = device_to_iommu(info->bus, info->devfn);
2767 iommu_detach_dev(iommu, info->bus, info->devfn);
2769 /* clear this iommu in iommu_bmp, update iommu count
2772 spin_lock_irqsave(&domain->iommu_lock, flags2);
2773 if (test_and_clear_bit(iommu->seq_id,
2774 &domain->iommu_bmp)) {
2775 domain->iommu_count--;
2776 domain_update_iommu_coherency(domain);
2778 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2780 free_devinfo_mem(info);
2781 spin_lock_irqsave(&device_domain_lock, flags1);
2783 spin_unlock_irqrestore(&device_domain_lock, flags1);
2786 void intel_iommu_domain_exit(struct dmar_domain *domain)
2790 /* Domain 0 is reserved, so dont process it */
2794 end = DOMAIN_MAX_ADDR(domain->gaw);
2795 end = end & (~VTD_PAGE_MASK);
2798 dma_pte_clear_range(domain, 0, end);
2800 /* free page tables */
2801 dma_pte_free_pagetable(domain, 0, end);
2803 iommu_free_domain(domain);
2804 free_domain_mem(domain);
2806 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2808 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2810 struct dmar_drhd_unit *drhd;
2811 struct dmar_domain *domain;
2812 struct intel_iommu *iommu;
2814 drhd = dmar_find_matched_drhd_unit(pdev);
2816 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2820 iommu = drhd->iommu;
2823 "intel_iommu_domain_alloc: iommu == NULL\n");
2826 domain = iommu_alloc_domain(iommu);
2829 "intel_iommu_domain_alloc: domain == NULL\n");
2832 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2834 "intel_iommu_domain_alloc: domain_init() failed\n");
2835 intel_iommu_domain_exit(domain);
2840 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2842 int intel_iommu_context_mapping(
2843 struct dmar_domain *domain, struct pci_dev *pdev)
2846 rc = domain_context_mapping(domain, pdev);
2849 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2851 int intel_iommu_page_mapping(
2852 struct dmar_domain *domain, dma_addr_t iova,
2853 u64 hpa, size_t size, int prot)
2856 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2859 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2861 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2863 struct intel_iommu *iommu;
2865 iommu = device_to_iommu(bus, devfn);
2866 iommu_detach_dev(iommu, bus, devfn);
2868 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2870 struct dmar_domain *
2871 intel_iommu_find_domain(struct pci_dev *pdev)
2873 return find_domain(pdev);
2875 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2877 int intel_iommu_found(void)
2879 return g_num_of_iommus;
2881 EXPORT_SYMBOL_GPL(intel_iommu_found);
2883 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2885 struct dma_pte *pte;
2889 pte = addr_to_dma_pte(domain, iova);
2892 pfn = dma_pte_addr(pte);
2894 return pfn >> VTD_PAGE_SHIFT;
2896 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);