1 /**************************************************************************
2 * Copyright (c) 2007, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 **************************************************************************/
24 * Code for the SGX MMU:
28 * clflush on one processor only:
29 * clflush should apparently flush the cache line on all processors in an
35 * The usage of the slots must be completely encapsulated within a spinlock, and
36 * no other functions that may be using the locks for other purposed may be
37 * called from within the locked region.
38 * Since the slots are per processor, this will guarantee that we are the only
43 * TODO: Inserting ptes from an interrupt handler:
44 * This may be desirable for some SGX functionality where the GPU can fault in
45 * needed pages. For that, we need to make an atomic insert_pages function, that
47 * If it fails, the caller need to insert the page using a workqueue function,
48 * but on average it should be fast.
51 static inline uint32_t psb_mmu_pt_index(uint32_t offset)
53 return (offset >> PSB_PTE_SHIFT) & 0x3FF;
56 static inline uint32_t psb_mmu_pd_index(uint32_t offset)
58 return offset >> PSB_PDE_SHIFT;
61 #if defined(CONFIG_X86)
62 static inline void psb_clflush(void *addr)
64 __asm__ __volatile__("clflush (%0)\n" : : "r"(addr) : "memory");
67 static inline void psb_mmu_clflush(struct psb_mmu_driver *driver, void *addr)
69 if (!driver->has_clflush)
78 static inline void psb_mmu_clflush(struct psb_mmu_driver *driver, void *addr)
84 static void psb_mmu_flush_pd_locked(struct psb_mmu_driver *driver, int force)
86 struct drm_device *dev = driver->dev;
87 struct drm_psb_private *dev_priv = dev->dev_private;
89 if (atomic_read(&driver->needs_tlbflush) || force) {
90 uint32_t val = PSB_RSGX32(PSB_CR_BIF_CTRL);
91 PSB_WSGX32(val | _PSB_CB_CTRL_INVALDC, PSB_CR_BIF_CTRL);
93 /* Make sure data cache is turned off before enabling it */
95 PSB_WSGX32(val & ~_PSB_CB_CTRL_INVALDC, PSB_CR_BIF_CTRL);
96 (void)PSB_RSGX32(PSB_CR_BIF_CTRL);
97 if (driver->msvdx_mmu_invaldc)
98 atomic_set(driver->msvdx_mmu_invaldc, 1);
100 atomic_set(&driver->needs_tlbflush, 0);
104 static void psb_mmu_flush_pd(struct psb_mmu_driver *driver, int force)
106 down_write(&driver->sem);
107 psb_mmu_flush_pd_locked(driver, force);
108 up_write(&driver->sem);
112 void psb_mmu_flush(struct psb_mmu_driver *driver)
114 struct drm_device *dev = driver->dev;
115 struct drm_psb_private *dev_priv = dev->dev_private;
118 down_write(&driver->sem);
119 val = PSB_RSGX32(PSB_CR_BIF_CTRL);
120 if (atomic_read(&driver->needs_tlbflush))
121 PSB_WSGX32(val | _PSB_CB_CTRL_INVALDC, PSB_CR_BIF_CTRL);
123 PSB_WSGX32(val | _PSB_CB_CTRL_FLUSH, PSB_CR_BIF_CTRL);
125 /* Make sure data cache is turned off and MMU is flushed before
126 restoring bank interface control register */
128 PSB_WSGX32(val & ~(_PSB_CB_CTRL_FLUSH | _PSB_CB_CTRL_INVALDC),
130 (void)PSB_RSGX32(PSB_CR_BIF_CTRL);
132 atomic_set(&driver->needs_tlbflush, 0);
133 if (driver->msvdx_mmu_invaldc)
134 atomic_set(driver->msvdx_mmu_invaldc, 1);
135 up_write(&driver->sem);
138 void psb_mmu_set_pd_context(struct psb_mmu_pd *pd, int hw_context)
140 struct drm_device *dev = pd->driver->dev;
141 struct drm_psb_private *dev_priv = dev->dev_private;
142 uint32_t offset = (hw_context == 0) ? PSB_CR_BIF_DIR_LIST_BASE0 :
143 PSB_CR_BIF_DIR_LIST_BASE1 + hw_context * 4;
145 down_write(&pd->driver->sem);
146 PSB_WSGX32(page_to_pfn(pd->p) << PAGE_SHIFT, offset);
148 psb_mmu_flush_pd_locked(pd->driver, 1);
149 pd->hw_context = hw_context;
150 up_write(&pd->driver->sem);
154 static inline unsigned long psb_pd_addr_end(unsigned long addr,
157 addr = (addr + PSB_PDE_MASK + 1) & ~PSB_PDE_MASK;
158 return (addr < end) ? addr : end;
161 static inline uint32_t psb_mmu_mask_pte(uint32_t pfn, int type)
163 uint32_t mask = PSB_PTE_VALID;
165 if (type & PSB_MMU_CACHED_MEMORY)
166 mask |= PSB_PTE_CACHED;
167 if (type & PSB_MMU_RO_MEMORY)
169 if (type & PSB_MMU_WO_MEMORY)
172 return (pfn << PAGE_SHIFT) | mask;
175 struct psb_mmu_pd *psb_mmu_alloc_pd(struct psb_mmu_driver *driver,
176 int trap_pagefaults, int invalid_type)
178 struct psb_mmu_pd *pd = kmalloc(sizeof(*pd), GFP_KERNEL);
185 pd->p = alloc_page(GFP_DMA32);
188 pd->dummy_pt = alloc_page(GFP_DMA32);
191 pd->dummy_page = alloc_page(GFP_DMA32);
195 if (!trap_pagefaults) {
196 pd->invalid_pde = psb_mmu_mask_pte(page_to_pfn(pd->dummy_pt),
198 pd->invalid_pte = psb_mmu_mask_pte(page_to_pfn(pd->dummy_page),
205 v = kmap(pd->dummy_pt);
206 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
207 v[i] = pd->invalid_pte;
209 kunmap(pd->dummy_pt);
212 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
213 v[i] = pd->invalid_pde;
217 clear_page(kmap(pd->dummy_page));
218 kunmap(pd->dummy_page);
220 pd->tables = vmalloc_user(sizeof(struct psb_mmu_pt *) * 1024);
225 pd->pd_mask = PSB_PTE_VALID;
231 __free_page(pd->dummy_page);
233 __free_page(pd->dummy_pt);
241 static void psb_mmu_free_pt(struct psb_mmu_pt *pt)
247 void psb_mmu_free_pagedir(struct psb_mmu_pd *pd)
249 struct psb_mmu_driver *driver = pd->driver;
250 struct drm_device *dev = driver->dev;
251 struct drm_psb_private *dev_priv = dev->dev_private;
252 struct psb_mmu_pt *pt;
255 down_write(&driver->sem);
256 if (pd->hw_context != -1) {
257 PSB_WSGX32(0, PSB_CR_BIF_DIR_LIST_BASE0 + pd->hw_context * 4);
258 psb_mmu_flush_pd_locked(driver, 1);
261 /* Should take the spinlock here, but we don't need to do that
262 since we have the semaphore in write mode. */
264 for (i = 0; i < 1024; ++i) {
271 __free_page(pd->dummy_page);
272 __free_page(pd->dummy_pt);
275 up_write(&driver->sem);
278 static struct psb_mmu_pt *psb_mmu_alloc_pt(struct psb_mmu_pd *pd)
280 struct psb_mmu_pt *pt = kmalloc(sizeof(*pt), GFP_KERNEL);
282 uint32_t clflush_add = pd->driver->clflush_add >> PAGE_SHIFT;
283 uint32_t clflush_count = PAGE_SIZE / clflush_add;
284 spinlock_t *lock = &pd->driver->lock;
292 pt->p = alloc_page(GFP_DMA32);
300 v = kmap_atomic(pt->p);
302 ptes = (uint32_t *) v;
303 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
304 *ptes++ = pd->invalid_pte;
306 #if defined(CONFIG_X86)
307 if (pd->driver->has_clflush && pd->hw_context != -1) {
309 for (i = 0; i < clflush_count; ++i) {
326 struct psb_mmu_pt *psb_mmu_pt_alloc_map_lock(struct psb_mmu_pd *pd,
329 uint32_t index = psb_mmu_pd_index(addr);
330 struct psb_mmu_pt *pt;
332 spinlock_t *lock = &pd->driver->lock;
335 pt = pd->tables[index];
338 pt = psb_mmu_alloc_pt(pd);
343 if (pd->tables[index]) {
347 pt = pd->tables[index];
351 v = kmap_atomic(pd->p);
352 pd->tables[index] = pt;
353 v[index] = (page_to_pfn(pt->p) << 12) | pd->pd_mask;
355 kunmap_atomic((void *) v);
357 if (pd->hw_context != -1) {
358 psb_mmu_clflush(pd->driver, (void *)&v[index]);
359 atomic_set(&pd->driver->needs_tlbflush, 1);
362 pt->v = kmap_atomic(pt->p);
366 static struct psb_mmu_pt *psb_mmu_pt_map_lock(struct psb_mmu_pd *pd,
369 uint32_t index = psb_mmu_pd_index(addr);
370 struct psb_mmu_pt *pt;
371 spinlock_t *lock = &pd->driver->lock;
374 pt = pd->tables[index];
379 pt->v = kmap_atomic(pt->p);
383 static void psb_mmu_pt_unmap_unlock(struct psb_mmu_pt *pt)
385 struct psb_mmu_pd *pd = pt->pd;
388 kunmap_atomic(pt->v);
389 if (pt->count == 0) {
390 v = kmap_atomic(pd->p);
391 v[pt->index] = pd->invalid_pde;
392 pd->tables[pt->index] = NULL;
394 if (pd->hw_context != -1) {
395 psb_mmu_clflush(pd->driver, (void *)&v[pt->index]);
396 atomic_set(&pd->driver->needs_tlbflush, 1);
398 kunmap_atomic(pt->v);
399 spin_unlock(&pd->driver->lock);
403 spin_unlock(&pd->driver->lock);
406 static inline void psb_mmu_set_pte(struct psb_mmu_pt *pt, unsigned long addr,
409 pt->v[psb_mmu_pt_index(addr)] = pte;
412 static inline void psb_mmu_invalidate_pte(struct psb_mmu_pt *pt,
415 pt->v[psb_mmu_pt_index(addr)] = pt->pd->invalid_pte;
418 struct psb_mmu_pd *psb_mmu_get_default_pd(struct psb_mmu_driver *driver)
420 struct psb_mmu_pd *pd;
422 down_read(&driver->sem);
423 pd = driver->default_pd;
424 up_read(&driver->sem);
429 /* Returns the physical address of the PD shared by sgx/msvdx */
430 uint32_t psb_get_default_pd_addr(struct psb_mmu_driver *driver)
432 struct psb_mmu_pd *pd;
434 pd = psb_mmu_get_default_pd(driver);
435 return page_to_pfn(pd->p) << PAGE_SHIFT;
438 void psb_mmu_driver_takedown(struct psb_mmu_driver *driver)
440 struct drm_device *dev = driver->dev;
441 struct drm_psb_private *dev_priv = dev->dev_private;
443 PSB_WSGX32(driver->bif_ctrl, PSB_CR_BIF_CTRL);
444 psb_mmu_free_pagedir(driver->default_pd);
448 struct psb_mmu_driver *psb_mmu_driver_init(struct drm_device *dev,
451 atomic_t *msvdx_mmu_invaldc)
453 struct psb_mmu_driver *driver;
454 struct drm_psb_private *dev_priv = dev->dev_private;
456 driver = kmalloc(sizeof(*driver), GFP_KERNEL);
462 driver->default_pd = psb_mmu_alloc_pd(driver, trap_pagefaults,
464 if (!driver->default_pd)
467 spin_lock_init(&driver->lock);
468 init_rwsem(&driver->sem);
469 down_write(&driver->sem);
470 atomic_set(&driver->needs_tlbflush, 1);
471 driver->msvdx_mmu_invaldc = msvdx_mmu_invaldc;
473 driver->bif_ctrl = PSB_RSGX32(PSB_CR_BIF_CTRL);
474 PSB_WSGX32(driver->bif_ctrl | _PSB_CB_CTRL_CLEAR_FAULT,
476 PSB_WSGX32(driver->bif_ctrl & ~_PSB_CB_CTRL_CLEAR_FAULT,
479 driver->has_clflush = 0;
481 #if defined(CONFIG_X86)
482 if (boot_cpu_has(X86_FEATURE_CLFLUSH)) {
483 uint32_t tfms, misc, cap0, cap4, clflush_size;
486 * clflush size is determined at kernel setup for x86_64 but not
487 * for i386. We have to do it here.
490 cpuid(0x00000001, &tfms, &misc, &cap0, &cap4);
491 clflush_size = ((misc >> 8) & 0xff) * 8;
492 driver->has_clflush = 1;
493 driver->clflush_add =
494 PAGE_SIZE * clflush_size / sizeof(uint32_t);
495 driver->clflush_mask = driver->clflush_add - 1;
496 driver->clflush_mask = ~driver->clflush_mask;
500 up_write(&driver->sem);
508 #if defined(CONFIG_X86)
509 static void psb_mmu_flush_ptes(struct psb_mmu_pd *pd, unsigned long address,
510 uint32_t num_pages, uint32_t desired_tile_stride,
511 uint32_t hw_tile_stride)
513 struct psb_mmu_pt *pt;
520 unsigned long row_add;
521 unsigned long clflush_add = pd->driver->clflush_add;
522 unsigned long clflush_mask = pd->driver->clflush_mask;
524 if (!pd->driver->has_clflush)
528 rows = num_pages / desired_tile_stride;
530 desired_tile_stride = num_pages;
532 add = desired_tile_stride << PAGE_SHIFT;
533 row_add = hw_tile_stride << PAGE_SHIFT;
535 for (i = 0; i < rows; ++i) {
541 next = psb_pd_addr_end(addr, end);
542 pt = psb_mmu_pt_map_lock(pd, addr);
546 psb_clflush(&pt->v[psb_mmu_pt_index(addr)]);
547 } while (addr += clflush_add,
548 (addr & clflush_mask) < next);
550 psb_mmu_pt_unmap_unlock(pt);
551 } while (addr = next, next != end);
557 static void psb_mmu_flush_ptes(struct psb_mmu_pd *pd, unsigned long address,
558 uint32_t num_pages, uint32_t desired_tile_stride,
559 uint32_t hw_tile_stride)
561 drm_ttm_cache_flush();
565 void psb_mmu_remove_pfn_sequence(struct psb_mmu_pd *pd,
566 unsigned long address, uint32_t num_pages)
568 struct psb_mmu_pt *pt;
572 unsigned long f_address = address;
574 down_read(&pd->driver->sem);
577 end = addr + (num_pages << PAGE_SHIFT);
580 next = psb_pd_addr_end(addr, end);
581 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
585 psb_mmu_invalidate_pte(pt, addr);
587 } while (addr += PAGE_SIZE, addr < next);
588 psb_mmu_pt_unmap_unlock(pt);
590 } while (addr = next, next != end);
593 if (pd->hw_context != -1)
594 psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1);
596 up_read(&pd->driver->sem);
598 if (pd->hw_context != -1)
599 psb_mmu_flush(pd->driver);
604 void psb_mmu_remove_pages(struct psb_mmu_pd *pd, unsigned long address,
605 uint32_t num_pages, uint32_t desired_tile_stride,
606 uint32_t hw_tile_stride)
608 struct psb_mmu_pt *pt;
615 unsigned long row_add;
616 unsigned long f_address = address;
619 rows = num_pages / desired_tile_stride;
621 desired_tile_stride = num_pages;
623 add = desired_tile_stride << PAGE_SHIFT;
624 row_add = hw_tile_stride << PAGE_SHIFT;
626 down_read(&pd->driver->sem);
628 /* Make sure we only need to flush this processor's cache */
630 for (i = 0; i < rows; ++i) {
636 next = psb_pd_addr_end(addr, end);
637 pt = psb_mmu_pt_map_lock(pd, addr);
641 psb_mmu_invalidate_pte(pt, addr);
644 } while (addr += PAGE_SIZE, addr < next);
645 psb_mmu_pt_unmap_unlock(pt);
647 } while (addr = next, next != end);
650 if (pd->hw_context != -1)
651 psb_mmu_flush_ptes(pd, f_address, num_pages,
652 desired_tile_stride, hw_tile_stride);
654 up_read(&pd->driver->sem);
656 if (pd->hw_context != -1)
657 psb_mmu_flush(pd->driver);
660 int psb_mmu_insert_pfn_sequence(struct psb_mmu_pd *pd, uint32_t start_pfn,
661 unsigned long address, uint32_t num_pages,
664 struct psb_mmu_pt *pt;
669 unsigned long f_address = address;
672 down_read(&pd->driver->sem);
675 end = addr + (num_pages << PAGE_SHIFT);
678 next = psb_pd_addr_end(addr, end);
679 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
685 pte = psb_mmu_mask_pte(start_pfn++, type);
686 psb_mmu_set_pte(pt, addr, pte);
688 } while (addr += PAGE_SIZE, addr < next);
689 psb_mmu_pt_unmap_unlock(pt);
691 } while (addr = next, next != end);
695 if (pd->hw_context != -1)
696 psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1);
698 up_read(&pd->driver->sem);
700 if (pd->hw_context != -1)
701 psb_mmu_flush(pd->driver);
706 int psb_mmu_insert_pages(struct psb_mmu_pd *pd, struct page **pages,
707 unsigned long address, uint32_t num_pages,
708 uint32_t desired_tile_stride, uint32_t hw_tile_stride,
711 struct psb_mmu_pt *pt;
719 unsigned long row_add;
720 unsigned long f_address = address;
723 if (hw_tile_stride) {
724 if (num_pages % desired_tile_stride != 0)
726 rows = num_pages / desired_tile_stride;
728 desired_tile_stride = num_pages;
731 add = desired_tile_stride << PAGE_SHIFT;
732 row_add = hw_tile_stride << PAGE_SHIFT;
734 down_read(&pd->driver->sem);
736 for (i = 0; i < rows; ++i) {
742 next = psb_pd_addr_end(addr, end);
743 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
747 pte = psb_mmu_mask_pte(page_to_pfn(*pages++),
749 psb_mmu_set_pte(pt, addr, pte);
751 } while (addr += PAGE_SIZE, addr < next);
752 psb_mmu_pt_unmap_unlock(pt);
754 } while (addr = next, next != end);
761 if (pd->hw_context != -1)
762 psb_mmu_flush_ptes(pd, f_address, num_pages,
763 desired_tile_stride, hw_tile_stride);
765 up_read(&pd->driver->sem);
767 if (pd->hw_context != -1)
768 psb_mmu_flush(pd->driver);
773 int psb_mmu_virtual_to_pfn(struct psb_mmu_pd *pd, uint32_t virtual,
777 struct psb_mmu_pt *pt;
779 spinlock_t *lock = &pd->driver->lock;
781 down_read(&pd->driver->sem);
782 pt = psb_mmu_pt_map_lock(pd, virtual);
787 v = kmap_atomic(pd->p);
788 tmp = v[psb_mmu_pd_index(virtual)];
792 if (tmp != pd->invalid_pde || !(tmp & PSB_PTE_VALID) ||
793 !(pd->invalid_pte & PSB_PTE_VALID)) {
798 *pfn = pd->invalid_pte >> PAGE_SHIFT;
801 tmp = pt->v[psb_mmu_pt_index(virtual)];
802 if (!(tmp & PSB_PTE_VALID)) {
806 *pfn = tmp >> PAGE_SHIFT;
808 psb_mmu_pt_unmap_unlock(pt);
810 up_read(&pd->driver->sem);