drm/i915: Unduplicate i915_ggtt_unbind/bind_vma
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / i915 / i915_gem_gtt.c
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25
26 #include <linux/seq_file.h>
27 #include <drm/drmP.h>
28 #include <drm/i915_drm.h>
29 #include "i915_drv.h"
30 #include "i915_vgpu.h"
31 #include "i915_trace.h"
32 #include "intel_drv.h"
33
34 /**
35  * DOC: Global GTT views
36  *
37  * Background and previous state
38  *
39  * Historically objects could exists (be bound) in global GTT space only as
40  * singular instances with a view representing all of the object's backing pages
41  * in a linear fashion. This view will be called a normal view.
42  *
43  * To support multiple views of the same object, where the number of mapped
44  * pages is not equal to the backing store, or where the layout of the pages
45  * is not linear, concept of a GGTT view was added.
46  *
47  * One example of an alternative view is a stereo display driven by a single
48  * image. In this case we would have a framebuffer looking like this
49  * (2x2 pages):
50  *
51  *    12
52  *    34
53  *
54  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
55  * rendering. In contrast, fed to the display engine would be an alternative
56  * view which could look something like this:
57  *
58  *   1212
59  *   3434
60  *
61  * In this example both the size and layout of pages in the alternative view is
62  * different from the normal view.
63  *
64  * Implementation and usage
65  *
66  * GGTT views are implemented using VMAs and are distinguished via enum
67  * i915_ggtt_view_type and struct i915_ggtt_view.
68  *
69  * A new flavour of core GEM functions which work with GGTT bound objects were
70  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
71  * renaming  in large amounts of code. They take the struct i915_ggtt_view
72  * parameter encapsulating all metadata required to implement a view.
73  *
74  * As a helper for callers which are only interested in the normal view,
75  * globally const i915_ggtt_view_normal singleton instance exists. All old core
76  * GEM API functions, the ones not taking the view parameter, are operating on,
77  * or with the normal GGTT view.
78  *
79  * Code wanting to add or use a new GGTT view needs to:
80  *
81  * 1. Add a new enum with a suitable name.
82  * 2. Extend the metadata in the i915_ggtt_view structure if required.
83  * 3. Add support to i915_get_vma_pages().
84  *
85  * New views are required to build a scatter-gather table from within the
86  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
87  * exists for the lifetime of an VMA.
88  *
89  * Core API is designed to have copy semantics which means that passed in
90  * struct i915_ggtt_view does not need to be persistent (left around after
91  * calling the core API functions).
92  *
93  */
94
95 const struct i915_ggtt_view i915_ggtt_view_normal;
96 const struct i915_ggtt_view i915_ggtt_view_rotated = {
97         .type = I915_GGTT_VIEW_ROTATED
98 };
99
100 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
101 {
102         bool has_aliasing_ppgtt;
103         bool has_full_ppgtt;
104
105         has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
106         has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
107
108         if (intel_vgpu_active(dev))
109                 has_full_ppgtt = false; /* emulation is too hard */
110
111         /*
112          * We don't allow disabling PPGTT for gen9+ as it's a requirement for
113          * execlists, the sole mechanism available to submit work.
114          */
115         if (INTEL_INFO(dev)->gen < 9 &&
116             (enable_ppgtt == 0 || !has_aliasing_ppgtt))
117                 return 0;
118
119         if (enable_ppgtt == 1)
120                 return 1;
121
122         if (enable_ppgtt == 2 && has_full_ppgtt)
123                 return 2;
124
125 #ifdef CONFIG_INTEL_IOMMU
126         /* Disable ppgtt on SNB if VT-d is on. */
127         if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
128                 DRM_INFO("Disabling PPGTT because VT-d is on\n");
129                 return 0;
130         }
131 #endif
132
133         /* Early VLV doesn't have this */
134         if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
135             dev->pdev->revision < 0xb) {
136                 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
137                 return 0;
138         }
139
140         if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
141                 return 2;
142         else
143                 return has_aliasing_ppgtt ? 1 : 0;
144 }
145
146 static void ppgtt_bind_vma(struct i915_vma *vma,
147                            enum i915_cache_level cache_level,
148                            u32 unused)
149 {
150         u32 pte_flags = 0;
151
152         /* Currently applicable only to VLV */
153         if (vma->obj->gt_ro)
154                 pte_flags |= PTE_READ_ONLY;
155
156         vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
157                                 cache_level, pte_flags);
158 }
159
160 static void ppgtt_unbind_vma(struct i915_vma *vma)
161 {
162         vma->vm->clear_range(vma->vm,
163                              vma->node.start,
164                              vma->obj->base.size,
165                              true);
166 }
167
168 static inline gen8_pte_t gen8_pte_encode(dma_addr_t addr,
169                                          enum i915_cache_level level,
170                                          bool valid)
171 {
172         gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
173         pte |= addr;
174
175         switch (level) {
176         case I915_CACHE_NONE:
177                 pte |= PPAT_UNCACHED_INDEX;
178                 break;
179         case I915_CACHE_WT:
180                 pte |= PPAT_DISPLAY_ELLC_INDEX;
181                 break;
182         default:
183                 pte |= PPAT_CACHED_INDEX;
184                 break;
185         }
186
187         return pte;
188 }
189
190 static inline gen8_pde_t gen8_pde_encode(struct drm_device *dev,
191                                           dma_addr_t addr,
192                                           enum i915_cache_level level)
193 {
194         gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
195         pde |= addr;
196         if (level != I915_CACHE_NONE)
197                 pde |= PPAT_CACHED_PDE_INDEX;
198         else
199                 pde |= PPAT_UNCACHED_INDEX;
200         return pde;
201 }
202
203 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
204                                  enum i915_cache_level level,
205                                  bool valid, u32 unused)
206 {
207         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
208         pte |= GEN6_PTE_ADDR_ENCODE(addr);
209
210         switch (level) {
211         case I915_CACHE_L3_LLC:
212         case I915_CACHE_LLC:
213                 pte |= GEN6_PTE_CACHE_LLC;
214                 break;
215         case I915_CACHE_NONE:
216                 pte |= GEN6_PTE_UNCACHED;
217                 break;
218         default:
219                 MISSING_CASE(level);
220         }
221
222         return pte;
223 }
224
225 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
226                                  enum i915_cache_level level,
227                                  bool valid, u32 unused)
228 {
229         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
230         pte |= GEN6_PTE_ADDR_ENCODE(addr);
231
232         switch (level) {
233         case I915_CACHE_L3_LLC:
234                 pte |= GEN7_PTE_CACHE_L3_LLC;
235                 break;
236         case I915_CACHE_LLC:
237                 pte |= GEN6_PTE_CACHE_LLC;
238                 break;
239         case I915_CACHE_NONE:
240                 pte |= GEN6_PTE_UNCACHED;
241                 break;
242         default:
243                 MISSING_CASE(level);
244         }
245
246         return pte;
247 }
248
249 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
250                                  enum i915_cache_level level,
251                                  bool valid, u32 flags)
252 {
253         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
254         pte |= GEN6_PTE_ADDR_ENCODE(addr);
255
256         if (!(flags & PTE_READ_ONLY))
257                 pte |= BYT_PTE_WRITEABLE;
258
259         if (level != I915_CACHE_NONE)
260                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
261
262         return pte;
263 }
264
265 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
266                                  enum i915_cache_level level,
267                                  bool valid, u32 unused)
268 {
269         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
270         pte |= HSW_PTE_ADDR_ENCODE(addr);
271
272         if (level != I915_CACHE_NONE)
273                 pte |= HSW_WB_LLC_AGE3;
274
275         return pte;
276 }
277
278 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
279                                   enum i915_cache_level level,
280                                   bool valid, u32 unused)
281 {
282         gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
283         pte |= HSW_PTE_ADDR_ENCODE(addr);
284
285         switch (level) {
286         case I915_CACHE_NONE:
287                 break;
288         case I915_CACHE_WT:
289                 pte |= HSW_WT_ELLC_LLC_AGE3;
290                 break;
291         default:
292                 pte |= HSW_WB_ELLC_LLC_AGE3;
293                 break;
294         }
295
296         return pte;
297 }
298
299 #define i915_dma_unmap_single(px, dev) \
300         __i915_dma_unmap_single((px)->daddr, dev)
301
302 static inline void __i915_dma_unmap_single(dma_addr_t daddr,
303                                         struct drm_device *dev)
304 {
305         struct device *device = &dev->pdev->dev;
306
307         dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
308 }
309
310 /**
311  * i915_dma_map_single() - Create a dma mapping for a page table/dir/etc.
312  * @px: Page table/dir/etc to get a DMA map for
313  * @dev:        drm device
314  *
315  * Page table allocations are unified across all gens. They always require a
316  * single 4k allocation, as well as a DMA mapping. If we keep the structs
317  * symmetric here, the simple macro covers us for every page table type.
318  *
319  * Return: 0 if success.
320  */
321 #define i915_dma_map_single(px, dev) \
322         i915_dma_map_page_single((px)->page, (dev), &(px)->daddr)
323
324 static inline int i915_dma_map_page_single(struct page *page,
325                                            struct drm_device *dev,
326                                            dma_addr_t *daddr)
327 {
328         struct device *device = &dev->pdev->dev;
329
330         *daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
331         if (dma_mapping_error(device, *daddr))
332                 return -ENOMEM;
333
334         return 0;
335 }
336
337 static void unmap_and_free_pt(struct i915_page_table *pt,
338                                struct drm_device *dev)
339 {
340         if (WARN_ON(!pt->page))
341                 return;
342
343         i915_dma_unmap_single(pt, dev);
344         __free_page(pt->page);
345         kfree(pt->used_ptes);
346         kfree(pt);
347 }
348
349 static void gen8_initialize_pt(struct i915_address_space *vm,
350                                struct i915_page_table *pt)
351 {
352         gen8_pte_t *pt_vaddr, scratch_pte;
353         int i;
354
355         pt_vaddr = kmap_atomic(pt->page);
356         scratch_pte = gen8_pte_encode(vm->scratch.addr,
357                                       I915_CACHE_LLC, true);
358
359         for (i = 0; i < GEN8_PTES; i++)
360                 pt_vaddr[i] = scratch_pte;
361
362         if (!HAS_LLC(vm->dev))
363                 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
364         kunmap_atomic(pt_vaddr);
365 }
366
367 static struct i915_page_table *alloc_pt_single(struct drm_device *dev)
368 {
369         struct i915_page_table *pt;
370         const size_t count = INTEL_INFO(dev)->gen >= 8 ?
371                 GEN8_PTES : GEN6_PTES;
372         int ret = -ENOMEM;
373
374         pt = kzalloc(sizeof(*pt), GFP_KERNEL);
375         if (!pt)
376                 return ERR_PTR(-ENOMEM);
377
378         pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
379                                 GFP_KERNEL);
380
381         if (!pt->used_ptes)
382                 goto fail_bitmap;
383
384         pt->page = alloc_page(GFP_KERNEL);
385         if (!pt->page)
386                 goto fail_page;
387
388         ret = i915_dma_map_single(pt, dev);
389         if (ret)
390                 goto fail_dma;
391
392         return pt;
393
394 fail_dma:
395         __free_page(pt->page);
396 fail_page:
397         kfree(pt->used_ptes);
398 fail_bitmap:
399         kfree(pt);
400
401         return ERR_PTR(ret);
402 }
403
404 static void unmap_and_free_pd(struct i915_page_directory *pd,
405                               struct drm_device *dev)
406 {
407         if (pd->page) {
408                 i915_dma_unmap_single(pd, dev);
409                 __free_page(pd->page);
410                 kfree(pd->used_pdes);
411                 kfree(pd);
412         }
413 }
414
415 static struct i915_page_directory *alloc_pd_single(struct drm_device *dev)
416 {
417         struct i915_page_directory *pd;
418         int ret = -ENOMEM;
419
420         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
421         if (!pd)
422                 return ERR_PTR(-ENOMEM);
423
424         pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
425                                 sizeof(*pd->used_pdes), GFP_KERNEL);
426         if (!pd->used_pdes)
427                 goto free_pd;
428
429         pd->page = alloc_page(GFP_KERNEL);
430         if (!pd->page)
431                 goto free_bitmap;
432
433         ret = i915_dma_map_single(pd, dev);
434         if (ret)
435                 goto free_page;
436
437         return pd;
438
439 free_page:
440         __free_page(pd->page);
441 free_bitmap:
442         kfree(pd->used_pdes);
443 free_pd:
444         kfree(pd);
445
446         return ERR_PTR(ret);
447 }
448
449 /* Broadwell Page Directory Pointer Descriptors */
450 static int gen8_write_pdp(struct intel_engine_cs *ring,
451                           unsigned entry,
452                           dma_addr_t addr)
453 {
454         int ret;
455
456         BUG_ON(entry >= 4);
457
458         ret = intel_ring_begin(ring, 6);
459         if (ret)
460                 return ret;
461
462         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
463         intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
464         intel_ring_emit(ring, upper_32_bits(addr));
465         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
466         intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
467         intel_ring_emit(ring, lower_32_bits(addr));
468         intel_ring_advance(ring);
469
470         return 0;
471 }
472
473 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
474                           struct intel_engine_cs *ring)
475 {
476         int i, ret;
477
478         for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
479                 struct i915_page_directory *pd = ppgtt->pdp.page_directory[i];
480                 dma_addr_t pd_daddr = pd ? pd->daddr : ppgtt->scratch_pd->daddr;
481                 /* The page directory might be NULL, but we need to clear out
482                  * whatever the previous context might have used. */
483                 ret = gen8_write_pdp(ring, i, pd_daddr);
484                 if (ret)
485                         return ret;
486         }
487
488         return 0;
489 }
490
491 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
492                                    uint64_t start,
493                                    uint64_t length,
494                                    bool use_scratch)
495 {
496         struct i915_hw_ppgtt *ppgtt =
497                 container_of(vm, struct i915_hw_ppgtt, base);
498         gen8_pte_t *pt_vaddr, scratch_pte;
499         unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
500         unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
501         unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
502         unsigned num_entries = length >> PAGE_SHIFT;
503         unsigned last_pte, i;
504
505         scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
506                                       I915_CACHE_LLC, use_scratch);
507
508         while (num_entries) {
509                 struct i915_page_directory *pd;
510                 struct i915_page_table *pt;
511                 struct page *page_table;
512
513                 if (WARN_ON(!ppgtt->pdp.page_directory[pdpe]))
514                         continue;
515
516                 pd = ppgtt->pdp.page_directory[pdpe];
517
518                 if (WARN_ON(!pd->page_table[pde]))
519                         continue;
520
521                 pt = pd->page_table[pde];
522
523                 if (WARN_ON(!pt->page))
524                         continue;
525
526                 page_table = pt->page;
527
528                 last_pte = pte + num_entries;
529                 if (last_pte > GEN8_PTES)
530                         last_pte = GEN8_PTES;
531
532                 pt_vaddr = kmap_atomic(page_table);
533
534                 for (i = pte; i < last_pte; i++) {
535                         pt_vaddr[i] = scratch_pte;
536                         num_entries--;
537                 }
538
539                 if (!HAS_LLC(ppgtt->base.dev))
540                         drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
541                 kunmap_atomic(pt_vaddr);
542
543                 pte = 0;
544                 if (++pde == I915_PDES) {
545                         pdpe++;
546                         pde = 0;
547                 }
548         }
549 }
550
551 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
552                                       struct sg_table *pages,
553                                       uint64_t start,
554                                       enum i915_cache_level cache_level, u32 unused)
555 {
556         struct i915_hw_ppgtt *ppgtt =
557                 container_of(vm, struct i915_hw_ppgtt, base);
558         gen8_pte_t *pt_vaddr;
559         unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
560         unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
561         unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
562         struct sg_page_iter sg_iter;
563
564         pt_vaddr = NULL;
565
566         for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
567                 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES))
568                         break;
569
570                 if (pt_vaddr == NULL) {
571                         struct i915_page_directory *pd = ppgtt->pdp.page_directory[pdpe];
572                         struct i915_page_table *pt = pd->page_table[pde];
573                         struct page *page_table = pt->page;
574
575                         pt_vaddr = kmap_atomic(page_table);
576                 }
577
578                 pt_vaddr[pte] =
579                         gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
580                                         cache_level, true);
581                 if (++pte == GEN8_PTES) {
582                         if (!HAS_LLC(ppgtt->base.dev))
583                                 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
584                         kunmap_atomic(pt_vaddr);
585                         pt_vaddr = NULL;
586                         if (++pde == I915_PDES) {
587                                 pdpe++;
588                                 pde = 0;
589                         }
590                         pte = 0;
591                 }
592         }
593         if (pt_vaddr) {
594                 if (!HAS_LLC(ppgtt->base.dev))
595                         drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
596                 kunmap_atomic(pt_vaddr);
597         }
598 }
599
600 static void __gen8_do_map_pt(gen8_pde_t * const pde,
601                              struct i915_page_table *pt,
602                              struct drm_device *dev)
603 {
604         gen8_pde_t entry =
605                 gen8_pde_encode(dev, pt->daddr, I915_CACHE_LLC);
606         *pde = entry;
607 }
608
609 static void gen8_initialize_pd(struct i915_address_space *vm,
610                                struct i915_page_directory *pd)
611 {
612         struct i915_hw_ppgtt *ppgtt =
613                         container_of(vm, struct i915_hw_ppgtt, base);
614         gen8_pde_t *page_directory;
615         struct i915_page_table *pt;
616         int i;
617
618         page_directory = kmap_atomic(pd->page);
619         pt = ppgtt->scratch_pt;
620         for (i = 0; i < I915_PDES; i++)
621                 /* Map the PDE to the page table */
622                 __gen8_do_map_pt(page_directory + i, pt, vm->dev);
623
624         if (!HAS_LLC(vm->dev))
625                 drm_clflush_virt_range(page_directory, PAGE_SIZE);
626         kunmap_atomic(page_directory);
627 }
628
629 static void gen8_free_page_tables(struct i915_page_directory *pd, struct drm_device *dev)
630 {
631         int i;
632
633         if (!pd->page)
634                 return;
635
636         for_each_set_bit(i, pd->used_pdes, I915_PDES) {
637                 if (WARN_ON(!pd->page_table[i]))
638                         continue;
639
640                 unmap_and_free_pt(pd->page_table[i], dev);
641                 pd->page_table[i] = NULL;
642         }
643 }
644
645 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
646 {
647         struct i915_hw_ppgtt *ppgtt =
648                 container_of(vm, struct i915_hw_ppgtt, base);
649         int i;
650
651         for_each_set_bit(i, ppgtt->pdp.used_pdpes, GEN8_LEGACY_PDPES) {
652                 if (WARN_ON(!ppgtt->pdp.page_directory[i]))
653                         continue;
654
655                 gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
656                 unmap_and_free_pd(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
657         }
658
659         unmap_and_free_pd(ppgtt->scratch_pd, ppgtt->base.dev);
660         unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
661 }
662
663 /**
664  * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
665  * @ppgtt:      Master ppgtt structure.
666  * @pd:         Page directory for this address range.
667  * @start:      Starting virtual address to begin allocations.
668  * @length      Size of the allocations.
669  * @new_pts:    Bitmap set by function with new allocations. Likely used by the
670  *              caller to free on error.
671  *
672  * Allocate the required number of page tables. Extremely similar to
673  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
674  * the page directory boundary (instead of the page directory pointer). That
675  * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
676  * possible, and likely that the caller will need to use multiple calls of this
677  * function to achieve the appropriate allocation.
678  *
679  * Return: 0 if success; negative error code otherwise.
680  */
681 static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
682                                      struct i915_page_directory *pd,
683                                      uint64_t start,
684                                      uint64_t length,
685                                      unsigned long *new_pts)
686 {
687         struct drm_device *dev = ppgtt->base.dev;
688         struct i915_page_table *pt;
689         uint64_t temp;
690         uint32_t pde;
691
692         gen8_for_each_pde(pt, pd, start, length, temp, pde) {
693                 /* Don't reallocate page tables */
694                 if (pt) {
695                         /* Scratch is never allocated this way */
696                         WARN_ON(pt == ppgtt->scratch_pt);
697                         continue;
698                 }
699
700                 pt = alloc_pt_single(dev);
701                 if (IS_ERR(pt))
702                         goto unwind_out;
703
704                 gen8_initialize_pt(&ppgtt->base, pt);
705                 pd->page_table[pde] = pt;
706                 set_bit(pde, new_pts);
707         }
708
709         return 0;
710
711 unwind_out:
712         for_each_set_bit(pde, new_pts, I915_PDES)
713                 unmap_and_free_pt(pd->page_table[pde], dev);
714
715         return -ENOMEM;
716 }
717
718 /**
719  * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
720  * @ppgtt:      Master ppgtt structure.
721  * @pdp:        Page directory pointer for this address range.
722  * @start:      Starting virtual address to begin allocations.
723  * @length      Size of the allocations.
724  * @new_pds     Bitmap set by function with new allocations. Likely used by the
725  *              caller to free on error.
726  *
727  * Allocate the required number of page directories starting at the pde index of
728  * @start, and ending at the pde index @start + @length. This function will skip
729  * over already allocated page directories within the range, and only allocate
730  * new ones, setting the appropriate pointer within the pdp as well as the
731  * correct position in the bitmap @new_pds.
732  *
733  * The function will only allocate the pages within the range for a give page
734  * directory pointer. In other words, if @start + @length straddles a virtually
735  * addressed PDP boundary (512GB for 4k pages), there will be more allocations
736  * required by the caller, This is not currently possible, and the BUG in the
737  * code will prevent it.
738  *
739  * Return: 0 if success; negative error code otherwise.
740  */
741 static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
742                                      struct i915_page_directory_pointer *pdp,
743                                      uint64_t start,
744                                      uint64_t length,
745                                      unsigned long *new_pds)
746 {
747         struct drm_device *dev = ppgtt->base.dev;
748         struct i915_page_directory *pd;
749         uint64_t temp;
750         uint32_t pdpe;
751
752         WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
753
754         /* FIXME: PPGTT container_of won't work for 64b */
755         WARN_ON((start + length) > 0x800000000ULL);
756
757         gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
758                 if (pd)
759                         continue;
760
761                 pd = alloc_pd_single(dev);
762                 if (IS_ERR(pd))
763                         goto unwind_out;
764
765                 gen8_initialize_pd(&ppgtt->base, pd);
766                 pdp->page_directory[pdpe] = pd;
767                 set_bit(pdpe, new_pds);
768         }
769
770         return 0;
771
772 unwind_out:
773         for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
774                 unmap_and_free_pd(pdp->page_directory[pdpe], dev);
775
776         return -ENOMEM;
777 }
778
779 static void
780 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)
781 {
782         int i;
783
784         for (i = 0; i < GEN8_LEGACY_PDPES; i++)
785                 kfree(new_pts[i]);
786         kfree(new_pts);
787         kfree(new_pds);
788 }
789
790 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
791  * of these are based on the number of PDPEs in the system.
792  */
793 static
794 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
795                                          unsigned long ***new_pts)
796 {
797         int i;
798         unsigned long *pds;
799         unsigned long **pts;
800
801         pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL);
802         if (!pds)
803                 return -ENOMEM;
804
805         pts = kcalloc(GEN8_LEGACY_PDPES, sizeof(unsigned long *), GFP_KERNEL);
806         if (!pts) {
807                 kfree(pds);
808                 return -ENOMEM;
809         }
810
811         for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
812                 pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
813                                  sizeof(unsigned long), GFP_KERNEL);
814                 if (!pts[i])
815                         goto err_out;
816         }
817
818         *new_pds = pds;
819         *new_pts = pts;
820
821         return 0;
822
823 err_out:
824         free_gen8_temp_bitmaps(pds, pts);
825         return -ENOMEM;
826 }
827
828 static int gen8_alloc_va_range(struct i915_address_space *vm,
829                                uint64_t start,
830                                uint64_t length)
831 {
832         struct i915_hw_ppgtt *ppgtt =
833                 container_of(vm, struct i915_hw_ppgtt, base);
834         unsigned long *new_page_dirs, **new_page_tables;
835         struct i915_page_directory *pd;
836         const uint64_t orig_start = start;
837         const uint64_t orig_length = length;
838         uint64_t temp;
839         uint32_t pdpe;
840         int ret;
841
842 #ifndef CONFIG_64BIT
843         /* Disallow 64b address on 32b platforms. Nothing is wrong with doing
844          * this in hardware, but a lot of the drm code is not prepared to handle
845          * 64b offset on 32b platforms.
846          * This will be addressed when 48b PPGTT is added */
847         if (start + length > 0x100000000ULL)
848                 return -E2BIG;
849 #endif
850
851         /* Wrap is never okay since we can only represent 48b, and we don't
852          * actually use the other side of the canonical address space.
853          */
854         if (WARN_ON(start + length < start))
855                 return -ERANGE;
856
857         ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
858         if (ret)
859                 return ret;
860
861         /* Do the allocations first so we can easily bail out */
862         ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length,
863                                         new_page_dirs);
864         if (ret) {
865                 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
866                 return ret;
867         }
868
869         /* For every page directory referenced, allocate page tables */
870         gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
871                 ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length,
872                                                 new_page_tables[pdpe]);
873                 if (ret)
874                         goto err_out;
875         }
876
877         start = orig_start;
878         length = orig_length;
879
880         /* Allocations have completed successfully, so set the bitmaps, and do
881          * the mappings. */
882         gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
883                 gen8_pde_t *const page_directory = kmap_atomic(pd->page);
884                 struct i915_page_table *pt;
885                 uint64_t pd_len = gen8_clamp_pd(start, length);
886                 uint64_t pd_start = start;
887                 uint32_t pde;
888
889                 /* Every pd should be allocated, we just did that above. */
890                 WARN_ON(!pd);
891
892                 gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
893                         /* Same reasoning as pd */
894                         WARN_ON(!pt);
895                         WARN_ON(!pd_len);
896                         WARN_ON(!gen8_pte_count(pd_start, pd_len));
897
898                         /* Set our used ptes within the page table */
899                         bitmap_set(pt->used_ptes,
900                                    gen8_pte_index(pd_start),
901                                    gen8_pte_count(pd_start, pd_len));
902
903                         /* Our pde is now pointing to the pagetable, pt */
904                         set_bit(pde, pd->used_pdes);
905
906                         /* Map the PDE to the page table */
907                         __gen8_do_map_pt(page_directory + pde, pt, vm->dev);
908
909                         /* NB: We haven't yet mapped ptes to pages. At this
910                          * point we're still relying on insert_entries() */
911                 }
912
913                 if (!HAS_LLC(vm->dev))
914                         drm_clflush_virt_range(page_directory, PAGE_SIZE);
915
916                 kunmap_atomic(page_directory);
917
918                 set_bit(pdpe, ppgtt->pdp.used_pdpes);
919         }
920
921         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
922         return 0;
923
924 err_out:
925         while (pdpe--) {
926                 for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
927                         unmap_and_free_pt(ppgtt->pdp.page_directory[pdpe]->page_table[temp], vm->dev);
928         }
929
930         for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES)
931                 unmap_and_free_pd(ppgtt->pdp.page_directory[pdpe], vm->dev);
932
933         free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
934         return ret;
935 }
936
937 /*
938  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
939  * with a net effect resembling a 2-level page table in normal x86 terms. Each
940  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
941  * space.
942  *
943  */
944 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
945 {
946         ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev);
947         if (IS_ERR(ppgtt->scratch_pt))
948                 return PTR_ERR(ppgtt->scratch_pt);
949
950         ppgtt->scratch_pd = alloc_pd_single(ppgtt->base.dev);
951         if (IS_ERR(ppgtt->scratch_pd))
952                 return PTR_ERR(ppgtt->scratch_pd);
953
954         gen8_initialize_pt(&ppgtt->base, ppgtt->scratch_pt);
955         gen8_initialize_pd(&ppgtt->base, ppgtt->scratch_pd);
956
957         ppgtt->base.start = 0;
958         ppgtt->base.total = 1ULL << 32;
959         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
960         ppgtt->base.allocate_va_range = gen8_alloc_va_range;
961         ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
962         ppgtt->base.clear_range = gen8_ppgtt_clear_range;
963         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
964         ppgtt->base.bind_vma = ppgtt_bind_vma;
965
966         ppgtt->switch_mm = gen8_mm_switch;
967
968         return 0;
969 }
970
971 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
972 {
973         struct i915_address_space *vm = &ppgtt->base;
974         struct i915_page_table *unused;
975         gen6_pte_t scratch_pte;
976         uint32_t pd_entry;
977         uint32_t  pte, pde, temp;
978         uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
979
980         scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
981
982         gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
983                 u32 expected;
984                 gen6_pte_t *pt_vaddr;
985                 dma_addr_t pt_addr = ppgtt->pd.page_table[pde]->daddr;
986                 pd_entry = readl(ppgtt->pd_addr + pde);
987                 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
988
989                 if (pd_entry != expected)
990                         seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
991                                    pde,
992                                    pd_entry,
993                                    expected);
994                 seq_printf(m, "\tPDE: %x\n", pd_entry);
995
996                 pt_vaddr = kmap_atomic(ppgtt->pd.page_table[pde]->page);
997                 for (pte = 0; pte < GEN6_PTES; pte+=4) {
998                         unsigned long va =
999                                 (pde * PAGE_SIZE * GEN6_PTES) +
1000                                 (pte * PAGE_SIZE);
1001                         int i;
1002                         bool found = false;
1003                         for (i = 0; i < 4; i++)
1004                                 if (pt_vaddr[pte + i] != scratch_pte)
1005                                         found = true;
1006                         if (!found)
1007                                 continue;
1008
1009                         seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1010                         for (i = 0; i < 4; i++) {
1011                                 if (pt_vaddr[pte + i] != scratch_pte)
1012                                         seq_printf(m, " %08x", pt_vaddr[pte + i]);
1013                                 else
1014                                         seq_puts(m, "  SCRATCH ");
1015                         }
1016                         seq_puts(m, "\n");
1017                 }
1018                 kunmap_atomic(pt_vaddr);
1019         }
1020 }
1021
1022 /* Write pde (index) from the page directory @pd to the page table @pt */
1023 static void gen6_write_pde(struct i915_page_directory *pd,
1024                             const int pde, struct i915_page_table *pt)
1025 {
1026         /* Caller needs to make sure the write completes if necessary */
1027         struct i915_hw_ppgtt *ppgtt =
1028                 container_of(pd, struct i915_hw_ppgtt, pd);
1029         u32 pd_entry;
1030
1031         pd_entry = GEN6_PDE_ADDR_ENCODE(pt->daddr);
1032         pd_entry |= GEN6_PDE_VALID;
1033
1034         writel(pd_entry, ppgtt->pd_addr + pde);
1035 }
1036
1037 /* Write all the page tables found in the ppgtt structure to incrementing page
1038  * directories. */
1039 static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1040                                   struct i915_page_directory *pd,
1041                                   uint32_t start, uint32_t length)
1042 {
1043         struct i915_page_table *pt;
1044         uint32_t pde, temp;
1045
1046         gen6_for_each_pde(pt, pd, start, length, temp, pde)
1047                 gen6_write_pde(pd, pde, pt);
1048
1049         /* Make sure write is complete before other code can use this page
1050          * table. Also require for WC mapped PTEs */
1051         readl(dev_priv->gtt.gsm);
1052 }
1053
1054 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1055 {
1056         BUG_ON(ppgtt->pd.pd_offset & 0x3f);
1057
1058         return (ppgtt->pd.pd_offset / 64) << 16;
1059 }
1060
1061 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1062                          struct intel_engine_cs *ring)
1063 {
1064         int ret;
1065
1066         /* NB: TLBs must be flushed and invalidated before a switch */
1067         ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1068         if (ret)
1069                 return ret;
1070
1071         ret = intel_ring_begin(ring, 6);
1072         if (ret)
1073                 return ret;
1074
1075         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1076         intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1077         intel_ring_emit(ring, PP_DIR_DCLV_2G);
1078         intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1079         intel_ring_emit(ring, get_pd_offset(ppgtt));
1080         intel_ring_emit(ring, MI_NOOP);
1081         intel_ring_advance(ring);
1082
1083         return 0;
1084 }
1085
1086 static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1087                           struct intel_engine_cs *ring)
1088 {
1089         struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
1090
1091         I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1092         I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1093         return 0;
1094 }
1095
1096 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1097                           struct intel_engine_cs *ring)
1098 {
1099         int ret;
1100
1101         /* NB: TLBs must be flushed and invalidated before a switch */
1102         ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1103         if (ret)
1104                 return ret;
1105
1106         ret = intel_ring_begin(ring, 6);
1107         if (ret)
1108                 return ret;
1109
1110         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1111         intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1112         intel_ring_emit(ring, PP_DIR_DCLV_2G);
1113         intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1114         intel_ring_emit(ring, get_pd_offset(ppgtt));
1115         intel_ring_emit(ring, MI_NOOP);
1116         intel_ring_advance(ring);
1117
1118         /* XXX: RCS is the only one to auto invalidate the TLBs? */
1119         if (ring->id != RCS) {
1120                 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1121                 if (ret)
1122                         return ret;
1123         }
1124
1125         return 0;
1126 }
1127
1128 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1129                           struct intel_engine_cs *ring)
1130 {
1131         struct drm_device *dev = ppgtt->base.dev;
1132         struct drm_i915_private *dev_priv = dev->dev_private;
1133
1134
1135         I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1136         I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1137
1138         POSTING_READ(RING_PP_DIR_DCLV(ring));
1139
1140         return 0;
1141 }
1142
1143 static void gen8_ppgtt_enable(struct drm_device *dev)
1144 {
1145         struct drm_i915_private *dev_priv = dev->dev_private;
1146         struct intel_engine_cs *ring;
1147         int j;
1148
1149         for_each_ring(ring, dev_priv, j) {
1150                 I915_WRITE(RING_MODE_GEN7(ring),
1151                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1152         }
1153 }
1154
1155 static void gen7_ppgtt_enable(struct drm_device *dev)
1156 {
1157         struct drm_i915_private *dev_priv = dev->dev_private;
1158         struct intel_engine_cs *ring;
1159         uint32_t ecochk, ecobits;
1160         int i;
1161
1162         ecobits = I915_READ(GAC_ECO_BITS);
1163         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1164
1165         ecochk = I915_READ(GAM_ECOCHK);
1166         if (IS_HASWELL(dev)) {
1167                 ecochk |= ECOCHK_PPGTT_WB_HSW;
1168         } else {
1169                 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1170                 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1171         }
1172         I915_WRITE(GAM_ECOCHK, ecochk);
1173
1174         for_each_ring(ring, dev_priv, i) {
1175                 /* GFX_MODE is per-ring on gen7+ */
1176                 I915_WRITE(RING_MODE_GEN7(ring),
1177                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1178         }
1179 }
1180
1181 static void gen6_ppgtt_enable(struct drm_device *dev)
1182 {
1183         struct drm_i915_private *dev_priv = dev->dev_private;
1184         uint32_t ecochk, gab_ctl, ecobits;
1185
1186         ecobits = I915_READ(GAC_ECO_BITS);
1187         I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1188                    ECOBITS_PPGTT_CACHE64B);
1189
1190         gab_ctl = I915_READ(GAB_CTL);
1191         I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1192
1193         ecochk = I915_READ(GAM_ECOCHK);
1194         I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1195
1196         I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1197 }
1198
1199 /* PPGTT support for Sandybdrige/Gen6 and later */
1200 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1201                                    uint64_t start,
1202                                    uint64_t length,
1203                                    bool use_scratch)
1204 {
1205         struct i915_hw_ppgtt *ppgtt =
1206                 container_of(vm, struct i915_hw_ppgtt, base);
1207         gen6_pte_t *pt_vaddr, scratch_pte;
1208         unsigned first_entry = start >> PAGE_SHIFT;
1209         unsigned num_entries = length >> PAGE_SHIFT;
1210         unsigned act_pt = first_entry / GEN6_PTES;
1211         unsigned first_pte = first_entry % GEN6_PTES;
1212         unsigned last_pte, i;
1213
1214         scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
1215
1216         while (num_entries) {
1217                 last_pte = first_pte + num_entries;
1218                 if (last_pte > GEN6_PTES)
1219                         last_pte = GEN6_PTES;
1220
1221                 pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page);
1222
1223                 for (i = first_pte; i < last_pte; i++)
1224                         pt_vaddr[i] = scratch_pte;
1225
1226                 kunmap_atomic(pt_vaddr);
1227
1228                 num_entries -= last_pte - first_pte;
1229                 first_pte = 0;
1230                 act_pt++;
1231         }
1232 }
1233
1234 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1235                                       struct sg_table *pages,
1236                                       uint64_t start,
1237                                       enum i915_cache_level cache_level, u32 flags)
1238 {
1239         struct i915_hw_ppgtt *ppgtt =
1240                 container_of(vm, struct i915_hw_ppgtt, base);
1241         gen6_pte_t *pt_vaddr;
1242         unsigned first_entry = start >> PAGE_SHIFT;
1243         unsigned act_pt = first_entry / GEN6_PTES;
1244         unsigned act_pte = first_entry % GEN6_PTES;
1245         struct sg_page_iter sg_iter;
1246
1247         pt_vaddr = NULL;
1248         for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
1249                 if (pt_vaddr == NULL)
1250                         pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page);
1251
1252                 pt_vaddr[act_pte] =
1253                         vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
1254                                        cache_level, true, flags);
1255
1256                 if (++act_pte == GEN6_PTES) {
1257                         kunmap_atomic(pt_vaddr);
1258                         pt_vaddr = NULL;
1259                         act_pt++;
1260                         act_pte = 0;
1261                 }
1262         }
1263         if (pt_vaddr)
1264                 kunmap_atomic(pt_vaddr);
1265 }
1266
1267 /* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If we
1268  * are switching between contexts with the same LRCA, we also must do a force
1269  * restore.
1270  */
1271 static inline void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
1272 {
1273         /* If current vm != vm, */
1274         ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1275 }
1276
1277 static void gen6_initialize_pt(struct i915_address_space *vm,
1278                 struct i915_page_table *pt)
1279 {
1280         gen6_pte_t *pt_vaddr, scratch_pte;
1281         int i;
1282
1283         WARN_ON(vm->scratch.addr == 0);
1284
1285         scratch_pte = vm->pte_encode(vm->scratch.addr,
1286                         I915_CACHE_LLC, true, 0);
1287
1288         pt_vaddr = kmap_atomic(pt->page);
1289
1290         for (i = 0; i < GEN6_PTES; i++)
1291                 pt_vaddr[i] = scratch_pte;
1292
1293         kunmap_atomic(pt_vaddr);
1294 }
1295
1296 static int gen6_alloc_va_range(struct i915_address_space *vm,
1297                                uint64_t start, uint64_t length)
1298 {
1299         DECLARE_BITMAP(new_page_tables, I915_PDES);
1300         struct drm_device *dev = vm->dev;
1301         struct drm_i915_private *dev_priv = dev->dev_private;
1302         struct i915_hw_ppgtt *ppgtt =
1303                                 container_of(vm, struct i915_hw_ppgtt, base);
1304         struct i915_page_table *pt;
1305         const uint32_t start_save = start, length_save = length;
1306         uint32_t pde, temp;
1307         int ret;
1308
1309         WARN_ON(upper_32_bits(start));
1310
1311         bitmap_zero(new_page_tables, I915_PDES);
1312
1313         /* The allocation is done in two stages so that we can bail out with
1314          * minimal amount of pain. The first stage finds new page tables that
1315          * need allocation. The second stage marks use ptes within the page
1316          * tables.
1317          */
1318         gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1319                 if (pt != ppgtt->scratch_pt) {
1320                         WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1321                         continue;
1322                 }
1323
1324                 /* We've already allocated a page table */
1325                 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1326
1327                 pt = alloc_pt_single(dev);
1328                 if (IS_ERR(pt)) {
1329                         ret = PTR_ERR(pt);
1330                         goto unwind_out;
1331                 }
1332
1333                 gen6_initialize_pt(vm, pt);
1334
1335                 ppgtt->pd.page_table[pde] = pt;
1336                 set_bit(pde, new_page_tables);
1337                 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1338         }
1339
1340         start = start_save;
1341         length = length_save;
1342
1343         gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1344                 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1345
1346                 bitmap_zero(tmp_bitmap, GEN6_PTES);
1347                 bitmap_set(tmp_bitmap, gen6_pte_index(start),
1348                            gen6_pte_count(start, length));
1349
1350                 if (test_and_clear_bit(pde, new_page_tables))
1351                         gen6_write_pde(&ppgtt->pd, pde, pt);
1352
1353                 trace_i915_page_table_entry_map(vm, pde, pt,
1354                                          gen6_pte_index(start),
1355                                          gen6_pte_count(start, length),
1356                                          GEN6_PTES);
1357                 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1358                                 GEN6_PTES);
1359         }
1360
1361         WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1362
1363         /* Make sure write is complete before other code can use this page
1364          * table. Also require for WC mapped PTEs */
1365         readl(dev_priv->gtt.gsm);
1366
1367         mark_tlbs_dirty(ppgtt);
1368         return 0;
1369
1370 unwind_out:
1371         for_each_set_bit(pde, new_page_tables, I915_PDES) {
1372                 struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1373
1374                 ppgtt->pd.page_table[pde] = ppgtt->scratch_pt;
1375                 unmap_and_free_pt(pt, vm->dev);
1376         }
1377
1378         mark_tlbs_dirty(ppgtt);
1379         return ret;
1380 }
1381
1382 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1383 {
1384         struct i915_hw_ppgtt *ppgtt =
1385                 container_of(vm, struct i915_hw_ppgtt, base);
1386         struct i915_page_table *pt;
1387         uint32_t pde;
1388
1389
1390         drm_mm_remove_node(&ppgtt->node);
1391
1392         gen6_for_all_pdes(pt, ppgtt, pde) {
1393                 if (pt != ppgtt->scratch_pt)
1394                         unmap_and_free_pt(pt, ppgtt->base.dev);
1395         }
1396
1397         unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
1398         unmap_and_free_pd(&ppgtt->pd, ppgtt->base.dev);
1399 }
1400
1401 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1402 {
1403         struct drm_device *dev = ppgtt->base.dev;
1404         struct drm_i915_private *dev_priv = dev->dev_private;
1405         bool retried = false;
1406         int ret;
1407
1408         /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1409          * allocator works in address space sizes, so it's multiplied by page
1410          * size. We allocate at the top of the GTT to avoid fragmentation.
1411          */
1412         BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
1413         ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev);
1414         if (IS_ERR(ppgtt->scratch_pt))
1415                 return PTR_ERR(ppgtt->scratch_pt);
1416
1417         gen6_initialize_pt(&ppgtt->base, ppgtt->scratch_pt);
1418
1419 alloc:
1420         ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
1421                                                   &ppgtt->node, GEN6_PD_SIZE,
1422                                                   GEN6_PD_ALIGN, 0,
1423                                                   0, dev_priv->gtt.base.total,
1424                                                   DRM_MM_TOPDOWN);
1425         if (ret == -ENOSPC && !retried) {
1426                 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
1427                                                GEN6_PD_SIZE, GEN6_PD_ALIGN,
1428                                                I915_CACHE_NONE,
1429                                                0, dev_priv->gtt.base.total,
1430                                                0);
1431                 if (ret)
1432                         goto err_out;
1433
1434                 retried = true;
1435                 goto alloc;
1436         }
1437
1438         if (ret)
1439                 goto err_out;
1440
1441
1442         if (ppgtt->node.start < dev_priv->gtt.mappable_end)
1443                 DRM_DEBUG("Forced to use aperture for PDEs\n");
1444
1445         return 0;
1446
1447 err_out:
1448         unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
1449         return ret;
1450 }
1451
1452 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
1453 {
1454         return gen6_ppgtt_allocate_page_directories(ppgtt);
1455 }
1456
1457 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
1458                                   uint64_t start, uint64_t length)
1459 {
1460         struct i915_page_table *unused;
1461         uint32_t pde, temp;
1462
1463         gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
1464                 ppgtt->pd.page_table[pde] = ppgtt->scratch_pt;
1465 }
1466
1467 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1468 {
1469         struct drm_device *dev = ppgtt->base.dev;
1470         struct drm_i915_private *dev_priv = dev->dev_private;
1471         int ret;
1472
1473         ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
1474         if (IS_GEN6(dev)) {
1475                 ppgtt->switch_mm = gen6_mm_switch;
1476         } else if (IS_HASWELL(dev)) {
1477                 ppgtt->switch_mm = hsw_mm_switch;
1478         } else if (IS_GEN7(dev)) {
1479                 ppgtt->switch_mm = gen7_mm_switch;
1480         } else
1481                 BUG();
1482
1483         if (intel_vgpu_active(dev))
1484                 ppgtt->switch_mm = vgpu_mm_switch;
1485
1486         ret = gen6_ppgtt_alloc(ppgtt);
1487         if (ret)
1488                 return ret;
1489
1490         ppgtt->base.allocate_va_range = gen6_alloc_va_range;
1491         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
1492         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
1493         ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1494         ppgtt->base.bind_vma = ppgtt_bind_vma;
1495         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
1496         ppgtt->base.start = 0;
1497         ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
1498         ppgtt->debug_dump = gen6_dump_ppgtt;
1499
1500         ppgtt->pd.pd_offset =
1501                 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
1502
1503         ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
1504                 ppgtt->pd.pd_offset / sizeof(gen6_pte_t);
1505
1506         gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
1507
1508         gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
1509
1510         DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
1511                          ppgtt->node.size >> 20,
1512                          ppgtt->node.start / PAGE_SIZE);
1513
1514         DRM_DEBUG("Adding PPGTT at offset %x\n",
1515                   ppgtt->pd.pd_offset << 10);
1516
1517         return 0;
1518 }
1519
1520 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
1521 {
1522         struct drm_i915_private *dev_priv = dev->dev_private;
1523
1524         ppgtt->base.dev = dev;
1525         ppgtt->base.scratch = dev_priv->gtt.base.scratch;
1526
1527         if (INTEL_INFO(dev)->gen < 8)
1528                 return gen6_ppgtt_init(ppgtt);
1529         else
1530                 return gen8_ppgtt_init(ppgtt);
1531 }
1532 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
1533 {
1534         struct drm_i915_private *dev_priv = dev->dev_private;
1535         int ret = 0;
1536
1537         ret = __hw_ppgtt_init(dev, ppgtt);
1538         if (ret == 0) {
1539                 kref_init(&ppgtt->ref);
1540                 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
1541                             ppgtt->base.total);
1542                 i915_init_vm(dev_priv, &ppgtt->base);
1543         }
1544
1545         return ret;
1546 }
1547
1548 int i915_ppgtt_init_hw(struct drm_device *dev)
1549 {
1550         struct drm_i915_private *dev_priv = dev->dev_private;
1551         struct intel_engine_cs *ring;
1552         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
1553         int i, ret = 0;
1554
1555         /* In the case of execlists, PPGTT is enabled by the context descriptor
1556          * and the PDPs are contained within the context itself.  We don't
1557          * need to do anything here. */
1558         if (i915.enable_execlists)
1559                 return 0;
1560
1561         if (!USES_PPGTT(dev))
1562                 return 0;
1563
1564         if (IS_GEN6(dev))
1565                 gen6_ppgtt_enable(dev);
1566         else if (IS_GEN7(dev))
1567                 gen7_ppgtt_enable(dev);
1568         else if (INTEL_INFO(dev)->gen >= 8)
1569                 gen8_ppgtt_enable(dev);
1570         else
1571                 MISSING_CASE(INTEL_INFO(dev)->gen);
1572
1573         if (ppgtt) {
1574                 for_each_ring(ring, dev_priv, i) {
1575                         ret = ppgtt->switch_mm(ppgtt, ring);
1576                         if (ret != 0)
1577                                 return ret;
1578                 }
1579         }
1580
1581         return ret;
1582 }
1583 struct i915_hw_ppgtt *
1584 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
1585 {
1586         struct i915_hw_ppgtt *ppgtt;
1587         int ret;
1588
1589         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
1590         if (!ppgtt)
1591                 return ERR_PTR(-ENOMEM);
1592
1593         ret = i915_ppgtt_init(dev, ppgtt);
1594         if (ret) {
1595                 kfree(ppgtt);
1596                 return ERR_PTR(ret);
1597         }
1598
1599         ppgtt->file_priv = fpriv;
1600
1601         trace_i915_ppgtt_create(&ppgtt->base);
1602
1603         return ppgtt;
1604 }
1605
1606 void  i915_ppgtt_release(struct kref *kref)
1607 {
1608         struct i915_hw_ppgtt *ppgtt =
1609                 container_of(kref, struct i915_hw_ppgtt, ref);
1610
1611         trace_i915_ppgtt_release(&ppgtt->base);
1612
1613         /* vmas should already be unbound */
1614         WARN_ON(!list_empty(&ppgtt->base.active_list));
1615         WARN_ON(!list_empty(&ppgtt->base.inactive_list));
1616
1617         list_del(&ppgtt->base.global_link);
1618         drm_mm_takedown(&ppgtt->base.mm);
1619
1620         ppgtt->base.cleanup(&ppgtt->base);
1621         kfree(ppgtt);
1622 }
1623
1624 extern int intel_iommu_gfx_mapped;
1625 /* Certain Gen5 chipsets require require idling the GPU before
1626  * unmapping anything from the GTT when VT-d is enabled.
1627  */
1628 static inline bool needs_idle_maps(struct drm_device *dev)
1629 {
1630 #ifdef CONFIG_INTEL_IOMMU
1631         /* Query intel_iommu to see if we need the workaround. Presumably that
1632          * was loaded first.
1633          */
1634         if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
1635                 return true;
1636 #endif
1637         return false;
1638 }
1639
1640 static bool do_idling(struct drm_i915_private *dev_priv)
1641 {
1642         bool ret = dev_priv->mm.interruptible;
1643
1644         if (unlikely(dev_priv->gtt.do_idle_maps)) {
1645                 dev_priv->mm.interruptible = false;
1646                 if (i915_gpu_idle(dev_priv->dev)) {
1647                         DRM_ERROR("Couldn't idle GPU\n");
1648                         /* Wait a bit, in hopes it avoids the hang */
1649                         udelay(10);
1650                 }
1651         }
1652
1653         return ret;
1654 }
1655
1656 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
1657 {
1658         if (unlikely(dev_priv->gtt.do_idle_maps))
1659                 dev_priv->mm.interruptible = interruptible;
1660 }
1661
1662 void i915_check_and_clear_faults(struct drm_device *dev)
1663 {
1664         struct drm_i915_private *dev_priv = dev->dev_private;
1665         struct intel_engine_cs *ring;
1666         int i;
1667
1668         if (INTEL_INFO(dev)->gen < 6)
1669                 return;
1670
1671         for_each_ring(ring, dev_priv, i) {
1672                 u32 fault_reg;
1673                 fault_reg = I915_READ(RING_FAULT_REG(ring));
1674                 if (fault_reg & RING_FAULT_VALID) {
1675                         DRM_DEBUG_DRIVER("Unexpected fault\n"
1676                                          "\tAddr: 0x%08lx\n"
1677                                          "\tAddress space: %s\n"
1678                                          "\tSource ID: %d\n"
1679                                          "\tType: %d\n",
1680                                          fault_reg & PAGE_MASK,
1681                                          fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
1682                                          RING_FAULT_SRCID(fault_reg),
1683                                          RING_FAULT_FAULT_TYPE(fault_reg));
1684                         I915_WRITE(RING_FAULT_REG(ring),
1685                                    fault_reg & ~RING_FAULT_VALID);
1686                 }
1687         }
1688         POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
1689 }
1690
1691 static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
1692 {
1693         if (INTEL_INFO(dev_priv->dev)->gen < 6) {
1694                 intel_gtt_chipset_flush();
1695         } else {
1696                 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1697                 POSTING_READ(GFX_FLSH_CNTL_GEN6);
1698         }
1699 }
1700
1701 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
1702 {
1703         struct drm_i915_private *dev_priv = dev->dev_private;
1704
1705         /* Don't bother messing with faults pre GEN6 as we have little
1706          * documentation supporting that it's a good idea.
1707          */
1708         if (INTEL_INFO(dev)->gen < 6)
1709                 return;
1710
1711         i915_check_and_clear_faults(dev);
1712
1713         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1714                                        dev_priv->gtt.base.start,
1715                                        dev_priv->gtt.base.total,
1716                                        true);
1717
1718         i915_ggtt_flush(dev_priv);
1719 }
1720
1721 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
1722 {
1723         if (obj->has_dma_mapping)
1724                 return 0;
1725
1726         if (!dma_map_sg(&obj->base.dev->pdev->dev,
1727                         obj->pages->sgl, obj->pages->nents,
1728                         PCI_DMA_BIDIRECTIONAL))
1729                 return -ENOSPC;
1730
1731         return 0;
1732 }
1733
1734 static inline void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
1735 {
1736 #ifdef writeq
1737         writeq(pte, addr);
1738 #else
1739         iowrite32((u32)pte, addr);
1740         iowrite32(pte >> 32, addr + 4);
1741 #endif
1742 }
1743
1744 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
1745                                      struct sg_table *st,
1746                                      uint64_t start,
1747                                      enum i915_cache_level level, u32 unused)
1748 {
1749         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1750         unsigned first_entry = start >> PAGE_SHIFT;
1751         gen8_pte_t __iomem *gtt_entries =
1752                 (gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1753         int i = 0;
1754         struct sg_page_iter sg_iter;
1755         dma_addr_t addr = 0; /* shut up gcc */
1756
1757         for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
1758                 addr = sg_dma_address(sg_iter.sg) +
1759                         (sg_iter.sg_pgoffset << PAGE_SHIFT);
1760                 gen8_set_pte(&gtt_entries[i],
1761                              gen8_pte_encode(addr, level, true));
1762                 i++;
1763         }
1764
1765         /*
1766          * XXX: This serves as a posting read to make sure that the PTE has
1767          * actually been updated. There is some concern that even though
1768          * registers and PTEs are within the same BAR that they are potentially
1769          * of NUMA access patterns. Therefore, even with the way we assume
1770          * hardware should work, we must keep this posting read for paranoia.
1771          */
1772         if (i != 0)
1773                 WARN_ON(readq(&gtt_entries[i-1])
1774                         != gen8_pte_encode(addr, level, true));
1775
1776         /* This next bit makes the above posting read even more important. We
1777          * want to flush the TLBs only after we're certain all the PTE updates
1778          * have finished.
1779          */
1780         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1781         POSTING_READ(GFX_FLSH_CNTL_GEN6);
1782 }
1783
1784 /*
1785  * Binds an object into the global gtt with the specified cache level. The object
1786  * will be accessible to the GPU via commands whose operands reference offsets
1787  * within the global GTT as well as accessible by the GPU through the GMADR
1788  * mapped BAR (dev_priv->mm.gtt->gtt).
1789  */
1790 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
1791                                      struct sg_table *st,
1792                                      uint64_t start,
1793                                      enum i915_cache_level level, u32 flags)
1794 {
1795         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1796         unsigned first_entry = start >> PAGE_SHIFT;
1797         gen6_pte_t __iomem *gtt_entries =
1798                 (gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1799         int i = 0;
1800         struct sg_page_iter sg_iter;
1801         dma_addr_t addr = 0;
1802
1803         for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
1804                 addr = sg_page_iter_dma_address(&sg_iter);
1805                 iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
1806                 i++;
1807         }
1808
1809         /* XXX: This serves as a posting read to make sure that the PTE has
1810          * actually been updated. There is some concern that even though
1811          * registers and PTEs are within the same BAR that they are potentially
1812          * of NUMA access patterns. Therefore, even with the way we assume
1813          * hardware should work, we must keep this posting read for paranoia.
1814          */
1815         if (i != 0) {
1816                 unsigned long gtt = readl(&gtt_entries[i-1]);
1817                 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
1818         }
1819
1820         /* This next bit makes the above posting read even more important. We
1821          * want to flush the TLBs only after we're certain all the PTE updates
1822          * have finished.
1823          */
1824         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1825         POSTING_READ(GFX_FLSH_CNTL_GEN6);
1826 }
1827
1828 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
1829                                   uint64_t start,
1830                                   uint64_t length,
1831                                   bool use_scratch)
1832 {
1833         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1834         unsigned first_entry = start >> PAGE_SHIFT;
1835         unsigned num_entries = length >> PAGE_SHIFT;
1836         gen8_pte_t scratch_pte, __iomem *gtt_base =
1837                 (gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
1838         const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
1839         int i;
1840
1841         if (WARN(num_entries > max_entries,
1842                  "First entry = %d; Num entries = %d (max=%d)\n",
1843                  first_entry, num_entries, max_entries))
1844                 num_entries = max_entries;
1845
1846         scratch_pte = gen8_pte_encode(vm->scratch.addr,
1847                                       I915_CACHE_LLC,
1848                                       use_scratch);
1849         for (i = 0; i < num_entries; i++)
1850                 gen8_set_pte(&gtt_base[i], scratch_pte);
1851         readl(gtt_base);
1852 }
1853
1854 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
1855                                   uint64_t start,
1856                                   uint64_t length,
1857                                   bool use_scratch)
1858 {
1859         struct drm_i915_private *dev_priv = vm->dev->dev_private;
1860         unsigned first_entry = start >> PAGE_SHIFT;
1861         unsigned num_entries = length >> PAGE_SHIFT;
1862         gen6_pte_t scratch_pte, __iomem *gtt_base =
1863                 (gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
1864         const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
1865         int i;
1866
1867         if (WARN(num_entries > max_entries,
1868                  "First entry = %d; Num entries = %d (max=%d)\n",
1869                  first_entry, num_entries, max_entries))
1870                 num_entries = max_entries;
1871
1872         scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
1873
1874         for (i = 0; i < num_entries; i++)
1875                 iowrite32(scratch_pte, &gtt_base[i]);
1876         readl(gtt_base);
1877 }
1878
1879 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
1880                                      struct sg_table *pages,
1881                                      uint64_t start,
1882                                      enum i915_cache_level cache_level, u32 unused)
1883 {
1884         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
1885                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
1886
1887         intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
1888
1889 }
1890
1891 static void i915_ggtt_clear_range(struct i915_address_space *vm,
1892                                   uint64_t start,
1893                                   uint64_t length,
1894                                   bool unused)
1895 {
1896         unsigned first_entry = start >> PAGE_SHIFT;
1897         unsigned num_entries = length >> PAGE_SHIFT;
1898         intel_gtt_clear_range(first_entry, num_entries);
1899 }
1900
1901 static void ggtt_bind_vma(struct i915_vma *vma,
1902                           enum i915_cache_level cache_level,
1903                           u32 flags)
1904 {
1905         struct drm_device *dev = vma->vm->dev;
1906         struct drm_i915_private *dev_priv = dev->dev_private;
1907         struct drm_i915_gem_object *obj = vma->obj;
1908         struct sg_table *pages = obj->pages;
1909         u32 pte_flags = 0;
1910
1911         /* Currently applicable only to VLV */
1912         if (obj->gt_ro)
1913                 pte_flags |= PTE_READ_ONLY;
1914
1915         if (i915_is_ggtt(vma->vm))
1916                 pages = vma->ggtt_view.pages;
1917
1918         if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
1919                 vma->vm->insert_entries(vma->vm, pages,
1920                                         vma->node.start,
1921                                         cache_level, pte_flags);
1922
1923                 vma->bound |= GLOBAL_BIND;
1924         }
1925
1926         if (dev_priv->mm.aliasing_ppgtt && flags & LOCAL_BIND) {
1927                 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
1928                 appgtt->base.insert_entries(&appgtt->base, pages,
1929                                             vma->node.start,
1930                                             cache_level, pte_flags);
1931         }
1932 }
1933
1934 static void ggtt_unbind_vma(struct i915_vma *vma)
1935 {
1936         struct drm_device *dev = vma->vm->dev;
1937         struct drm_i915_private *dev_priv = dev->dev_private;
1938         struct drm_i915_gem_object *obj = vma->obj;
1939
1940         if (vma->bound & GLOBAL_BIND) {
1941                 vma->vm->clear_range(vma->vm,
1942                                      vma->node.start,
1943                                      obj->base.size,
1944                                      true);
1945         }
1946
1947         if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
1948                 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
1949                 appgtt->base.clear_range(&appgtt->base,
1950                                          vma->node.start,
1951                                          obj->base.size,
1952                                          true);
1953         }
1954 }
1955
1956 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
1957 {
1958         struct drm_device *dev = obj->base.dev;
1959         struct drm_i915_private *dev_priv = dev->dev_private;
1960         bool interruptible;
1961
1962         interruptible = do_idling(dev_priv);
1963
1964         if (!obj->has_dma_mapping)
1965                 dma_unmap_sg(&dev->pdev->dev,
1966                              obj->pages->sgl, obj->pages->nents,
1967                              PCI_DMA_BIDIRECTIONAL);
1968
1969         undo_idling(dev_priv, interruptible);
1970 }
1971
1972 static void i915_gtt_color_adjust(struct drm_mm_node *node,
1973                                   unsigned long color,
1974                                   u64 *start,
1975                                   u64 *end)
1976 {
1977         if (node->color != color)
1978                 *start += 4096;
1979
1980         if (!list_empty(&node->node_list)) {
1981                 node = list_entry(node->node_list.next,
1982                                   struct drm_mm_node,
1983                                   node_list);
1984                 if (node->allocated && node->color != color)
1985                         *end -= 4096;
1986         }
1987 }
1988
1989 static int i915_gem_setup_global_gtt(struct drm_device *dev,
1990                                      unsigned long start,
1991                                      unsigned long mappable_end,
1992                                      unsigned long end)
1993 {
1994         /* Let GEM Manage all of the aperture.
1995          *
1996          * However, leave one page at the end still bound to the scratch page.
1997          * There are a number of places where the hardware apparently prefetches
1998          * past the end of the object, and we've seen multiple hangs with the
1999          * GPU head pointer stuck in a batchbuffer bound at the last page of the
2000          * aperture.  One page should be enough to keep any prefetching inside
2001          * of the aperture.
2002          */
2003         struct drm_i915_private *dev_priv = dev->dev_private;
2004         struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
2005         struct drm_mm_node *entry;
2006         struct drm_i915_gem_object *obj;
2007         unsigned long hole_start, hole_end;
2008         int ret;
2009
2010         BUG_ON(mappable_end > end);
2011
2012         /* Subtract the guard page ... */
2013         drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
2014
2015         dev_priv->gtt.base.start = start;
2016         dev_priv->gtt.base.total = end - start;
2017
2018         if (intel_vgpu_active(dev)) {
2019                 ret = intel_vgt_balloon(dev);
2020                 if (ret)
2021                         return ret;
2022         }
2023
2024         if (!HAS_LLC(dev))
2025                 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
2026
2027         /* Mark any preallocated objects as occupied */
2028         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2029                 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
2030
2031                 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
2032                               i915_gem_obj_ggtt_offset(obj), obj->base.size);
2033
2034                 WARN_ON(i915_gem_obj_ggtt_bound(obj));
2035                 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
2036                 if (ret) {
2037                         DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2038                         return ret;
2039                 }
2040                 vma->bound |= GLOBAL_BIND;
2041         }
2042
2043         /* Clear any non-preallocated blocks */
2044         drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
2045                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2046                               hole_start, hole_end);
2047                 ggtt_vm->clear_range(ggtt_vm, hole_start,
2048                                      hole_end - hole_start, true);
2049         }
2050
2051         /* And finally clear the reserved guard page */
2052         ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
2053
2054         if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2055                 struct i915_hw_ppgtt *ppgtt;
2056
2057                 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2058                 if (!ppgtt)
2059                         return -ENOMEM;
2060
2061                 ret = __hw_ppgtt_init(dev, ppgtt);
2062                 if (ret) {
2063                         ppgtt->base.cleanup(&ppgtt->base);
2064                         kfree(ppgtt);
2065                         return ret;
2066                 }
2067
2068                 if (ppgtt->base.allocate_va_range)
2069                         ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2070                                                             ppgtt->base.total);
2071                 if (ret) {
2072                         ppgtt->base.cleanup(&ppgtt->base);
2073                         kfree(ppgtt);
2074                         return ret;
2075                 }
2076
2077                 ppgtt->base.clear_range(&ppgtt->base,
2078                                         ppgtt->base.start,
2079                                         ppgtt->base.total,
2080                                         true);
2081
2082                 dev_priv->mm.aliasing_ppgtt = ppgtt;
2083         }
2084
2085         return 0;
2086 }
2087
2088 void i915_gem_init_global_gtt(struct drm_device *dev)
2089 {
2090         struct drm_i915_private *dev_priv = dev->dev_private;
2091         unsigned long gtt_size, mappable_size;
2092
2093         gtt_size = dev_priv->gtt.base.total;
2094         mappable_size = dev_priv->gtt.mappable_end;
2095
2096         i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
2097 }
2098
2099 void i915_global_gtt_cleanup(struct drm_device *dev)
2100 {
2101         struct drm_i915_private *dev_priv = dev->dev_private;
2102         struct i915_address_space *vm = &dev_priv->gtt.base;
2103
2104         if (dev_priv->mm.aliasing_ppgtt) {
2105                 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2106
2107                 ppgtt->base.cleanup(&ppgtt->base);
2108         }
2109
2110         if (drm_mm_initialized(&vm->mm)) {
2111                 if (intel_vgpu_active(dev))
2112                         intel_vgt_deballoon();
2113
2114                 drm_mm_takedown(&vm->mm);
2115                 list_del(&vm->global_link);
2116         }
2117
2118         vm->cleanup(vm);
2119 }
2120
2121 static int setup_scratch_page(struct drm_device *dev)
2122 {
2123         struct drm_i915_private *dev_priv = dev->dev_private;
2124         struct page *page;
2125         dma_addr_t dma_addr;
2126
2127         page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
2128         if (page == NULL)
2129                 return -ENOMEM;
2130         set_pages_uc(page, 1);
2131
2132 #ifdef CONFIG_INTEL_IOMMU
2133         dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
2134                                 PCI_DMA_BIDIRECTIONAL);
2135         if (pci_dma_mapping_error(dev->pdev, dma_addr))
2136                 return -EINVAL;
2137 #else
2138         dma_addr = page_to_phys(page);
2139 #endif
2140         dev_priv->gtt.base.scratch.page = page;
2141         dev_priv->gtt.base.scratch.addr = dma_addr;
2142
2143         return 0;
2144 }
2145
2146 static void teardown_scratch_page(struct drm_device *dev)
2147 {
2148         struct drm_i915_private *dev_priv = dev->dev_private;
2149         struct page *page = dev_priv->gtt.base.scratch.page;
2150
2151         set_pages_wb(page, 1);
2152         pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
2153                        PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
2154         __free_page(page);
2155 }
2156
2157 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2158 {
2159         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2160         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2161         return snb_gmch_ctl << 20;
2162 }
2163
2164 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2165 {
2166         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2167         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2168         if (bdw_gmch_ctl)
2169                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2170
2171 #ifdef CONFIG_X86_32
2172         /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2173         if (bdw_gmch_ctl > 4)
2174                 bdw_gmch_ctl = 4;
2175 #endif
2176
2177         return bdw_gmch_ctl << 20;
2178 }
2179
2180 static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2181 {
2182         gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2183         gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2184
2185         if (gmch_ctrl)
2186                 return 1 << (20 + gmch_ctrl);
2187
2188         return 0;
2189 }
2190
2191 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2192 {
2193         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2194         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2195         return snb_gmch_ctl << 25; /* 32 MB units */
2196 }
2197
2198 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2199 {
2200         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2201         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2202         return bdw_gmch_ctl << 25; /* 32 MB units */
2203 }
2204
2205 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2206 {
2207         gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2208         gmch_ctrl &= SNB_GMCH_GMS_MASK;
2209
2210         /*
2211          * 0x0  to 0x10: 32MB increments starting at 0MB
2212          * 0x11 to 0x16: 4MB increments starting at 8MB
2213          * 0x17 to 0x1d: 4MB increments start at 36MB
2214          */
2215         if (gmch_ctrl < 0x11)
2216                 return gmch_ctrl << 25;
2217         else if (gmch_ctrl < 0x17)
2218                 return (gmch_ctrl - 0x11 + 2) << 22;
2219         else
2220                 return (gmch_ctrl - 0x17 + 9) << 22;
2221 }
2222
2223 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2224 {
2225         gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2226         gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2227
2228         if (gen9_gmch_ctl < 0xf0)
2229                 return gen9_gmch_ctl << 25; /* 32 MB units */
2230         else
2231                 /* 4MB increments starting at 0xf0 for 4MB */
2232                 return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2233 }
2234
2235 static int ggtt_probe_common(struct drm_device *dev,
2236                              size_t gtt_size)
2237 {
2238         struct drm_i915_private *dev_priv = dev->dev_private;
2239         phys_addr_t gtt_phys_addr;
2240         int ret;
2241
2242         /* For Modern GENs the PTEs and register space are split in the BAR */
2243         gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2244                 (pci_resource_len(dev->pdev, 0) / 2);
2245
2246         /*
2247          * On BXT writes larger than 64 bit to the GTT pagetable range will be
2248          * dropped. For WC mappings in general we have 64 byte burst writes
2249          * when the WC buffer is flushed, so we can't use it, but have to
2250          * resort to an uncached mapping. The WC issue is easily caught by the
2251          * readback check when writing GTT PTE entries.
2252          */
2253         if (IS_BROXTON(dev))
2254                 dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
2255         else
2256                 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
2257         if (!dev_priv->gtt.gsm) {
2258                 DRM_ERROR("Failed to map the gtt page table\n");
2259                 return -ENOMEM;
2260         }
2261
2262         ret = setup_scratch_page(dev);
2263         if (ret) {
2264                 DRM_ERROR("Scratch setup failed\n");
2265                 /* iounmap will also get called at remove, but meh */
2266                 iounmap(dev_priv->gtt.gsm);
2267         }
2268
2269         return ret;
2270 }
2271
2272 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2273  * bits. When using advanced contexts each context stores its own PAT, but
2274  * writing this data shouldn't be harmful even in those cases. */
2275 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
2276 {
2277         uint64_t pat;
2278
2279         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
2280               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
2281               GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
2282               GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
2283               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
2284               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
2285               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
2286               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
2287
2288         if (!USES_PPGTT(dev_priv->dev))
2289                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
2290                  * so RTL will always use the value corresponding to
2291                  * pat_sel = 000".
2292                  * So let's disable cache for GGTT to avoid screen corruptions.
2293                  * MOCS still can be used though.
2294                  * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
2295                  * before this patch, i.e. the same uncached + snooping access
2296                  * like on gen6/7 seems to be in effect.
2297                  * - So this just fixes blitter/render access. Again it looks
2298                  * like it's not just uncached access, but uncached + snooping.
2299                  * So we can still hold onto all our assumptions wrt cpu
2300                  * clflushing on LLC machines.
2301                  */
2302                 pat = GEN8_PPAT(0, GEN8_PPAT_UC);
2303
2304         /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
2305          * write would work. */
2306         I915_WRITE(GEN8_PRIVATE_PAT, pat);
2307         I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
2308 }
2309
2310 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
2311 {
2312         uint64_t pat;
2313
2314         /*
2315          * Map WB on BDW to snooped on CHV.
2316          *
2317          * Only the snoop bit has meaning for CHV, the rest is
2318          * ignored.
2319          *
2320          * The hardware will never snoop for certain types of accesses:
2321          * - CPU GTT (GMADR->GGTT->no snoop->memory)
2322          * - PPGTT page tables
2323          * - some other special cycles
2324          *
2325          * As with BDW, we also need to consider the following for GT accesses:
2326          * "For GGTT, there is NO pat_sel[2:0] from the entry,
2327          * so RTL will always use the value corresponding to
2328          * pat_sel = 000".
2329          * Which means we must set the snoop bit in PAT entry 0
2330          * in order to keep the global status page working.
2331          */
2332         pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
2333               GEN8_PPAT(1, 0) |
2334               GEN8_PPAT(2, 0) |
2335               GEN8_PPAT(3, 0) |
2336               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
2337               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
2338               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
2339               GEN8_PPAT(7, CHV_PPAT_SNOOP);
2340
2341         I915_WRITE(GEN8_PRIVATE_PAT, pat);
2342         I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
2343 }
2344
2345 static int gen8_gmch_probe(struct drm_device *dev,
2346                            size_t *gtt_total,
2347                            size_t *stolen,
2348                            phys_addr_t *mappable_base,
2349                            unsigned long *mappable_end)
2350 {
2351         struct drm_i915_private *dev_priv = dev->dev_private;
2352         unsigned int gtt_size;
2353         u16 snb_gmch_ctl;
2354         int ret;
2355
2356         /* TODO: We're not aware of mappable constraints on gen8 yet */
2357         *mappable_base = pci_resource_start(dev->pdev, 2);
2358         *mappable_end = pci_resource_len(dev->pdev, 2);
2359
2360         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
2361                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
2362
2363         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2364
2365         if (INTEL_INFO(dev)->gen >= 9) {
2366                 *stolen = gen9_get_stolen_size(snb_gmch_ctl);
2367                 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2368         } else if (IS_CHERRYVIEW(dev)) {
2369                 *stolen = chv_get_stolen_size(snb_gmch_ctl);
2370                 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
2371         } else {
2372                 *stolen = gen8_get_stolen_size(snb_gmch_ctl);
2373                 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2374         }
2375
2376         *gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
2377
2378         if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
2379                 chv_setup_private_ppat(dev_priv);
2380         else
2381                 bdw_setup_private_ppat(dev_priv);
2382
2383         ret = ggtt_probe_common(dev, gtt_size);
2384
2385         dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
2386         dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
2387         dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
2388         dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
2389
2390         return ret;
2391 }
2392
2393 static int gen6_gmch_probe(struct drm_device *dev,
2394                            size_t *gtt_total,
2395                            size_t *stolen,
2396                            phys_addr_t *mappable_base,
2397                            unsigned long *mappable_end)
2398 {
2399         struct drm_i915_private *dev_priv = dev->dev_private;
2400         unsigned int gtt_size;
2401         u16 snb_gmch_ctl;
2402         int ret;
2403
2404         *mappable_base = pci_resource_start(dev->pdev, 2);
2405         *mappable_end = pci_resource_len(dev->pdev, 2);
2406
2407         /* 64/512MB is the current min/max we actually know of, but this is just
2408          * a coarse sanity check.
2409          */
2410         if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
2411                 DRM_ERROR("Unknown GMADR size (%lx)\n",
2412                           dev_priv->gtt.mappable_end);
2413                 return -ENXIO;
2414         }
2415
2416         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
2417                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
2418         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2419
2420         *stolen = gen6_get_stolen_size(snb_gmch_ctl);
2421
2422         gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
2423         *gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
2424
2425         ret = ggtt_probe_common(dev, gtt_size);
2426
2427         dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
2428         dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
2429         dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
2430         dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
2431
2432         return ret;
2433 }
2434
2435 static void gen6_gmch_remove(struct i915_address_space *vm)
2436 {
2437
2438         struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
2439
2440         iounmap(gtt->gsm);
2441         teardown_scratch_page(vm->dev);
2442 }
2443
2444 static int i915_gmch_probe(struct drm_device *dev,
2445                            size_t *gtt_total,
2446                            size_t *stolen,
2447                            phys_addr_t *mappable_base,
2448                            unsigned long *mappable_end)
2449 {
2450         struct drm_i915_private *dev_priv = dev->dev_private;
2451         int ret;
2452
2453         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
2454         if (!ret) {
2455                 DRM_ERROR("failed to set up gmch\n");
2456                 return -EIO;
2457         }
2458
2459         intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
2460
2461         dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
2462         dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
2463         dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
2464         dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
2465         dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
2466
2467         if (unlikely(dev_priv->gtt.do_idle_maps))
2468                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
2469
2470         return 0;
2471 }
2472
2473 static void i915_gmch_remove(struct i915_address_space *vm)
2474 {
2475         intel_gmch_remove();
2476 }
2477
2478 int i915_gem_gtt_init(struct drm_device *dev)
2479 {
2480         struct drm_i915_private *dev_priv = dev->dev_private;
2481         struct i915_gtt *gtt = &dev_priv->gtt;
2482         int ret;
2483
2484         if (INTEL_INFO(dev)->gen <= 5) {
2485                 gtt->gtt_probe = i915_gmch_probe;
2486                 gtt->base.cleanup = i915_gmch_remove;
2487         } else if (INTEL_INFO(dev)->gen < 8) {
2488                 gtt->gtt_probe = gen6_gmch_probe;
2489                 gtt->base.cleanup = gen6_gmch_remove;
2490                 if (IS_HASWELL(dev) && dev_priv->ellc_size)
2491                         gtt->base.pte_encode = iris_pte_encode;
2492                 else if (IS_HASWELL(dev))
2493                         gtt->base.pte_encode = hsw_pte_encode;
2494                 else if (IS_VALLEYVIEW(dev))
2495                         gtt->base.pte_encode = byt_pte_encode;
2496                 else if (INTEL_INFO(dev)->gen >= 7)
2497                         gtt->base.pte_encode = ivb_pte_encode;
2498                 else
2499                         gtt->base.pte_encode = snb_pte_encode;
2500         } else {
2501                 dev_priv->gtt.gtt_probe = gen8_gmch_probe;
2502                 dev_priv->gtt.base.cleanup = gen6_gmch_remove;
2503         }
2504
2505         ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
2506                              &gtt->mappable_base, &gtt->mappable_end);
2507         if (ret)
2508                 return ret;
2509
2510         gtt->base.dev = dev;
2511
2512         /* GMADR is the PCI mmio aperture into the global GTT. */
2513         DRM_INFO("Memory usable by graphics device = %zdM\n",
2514                  gtt->base.total >> 20);
2515         DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
2516         DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
2517 #ifdef CONFIG_INTEL_IOMMU
2518         if (intel_iommu_gfx_mapped)
2519                 DRM_INFO("VT-d active for gfx access\n");
2520 #endif
2521         /*
2522          * i915.enable_ppgtt is read-only, so do an early pass to validate the
2523          * user's requested state against the hardware/driver capabilities.  We
2524          * do this now so that we can print out any log messages once rather
2525          * than every time we check intel_enable_ppgtt().
2526          */
2527         i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
2528         DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
2529
2530         return 0;
2531 }
2532
2533 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
2534 {
2535         struct drm_i915_private *dev_priv = dev->dev_private;
2536         struct drm_i915_gem_object *obj;
2537         struct i915_address_space *vm;
2538
2539         i915_check_and_clear_faults(dev);
2540
2541         /* First fill our portion of the GTT with scratch pages */
2542         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
2543                                        dev_priv->gtt.base.start,
2544                                        dev_priv->gtt.base.total,
2545                                        true);
2546
2547         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2548                 struct i915_vma *vma = i915_gem_obj_to_vma(obj,
2549                                                            &dev_priv->gtt.base);
2550                 if (!vma)
2551                         continue;
2552
2553                 i915_gem_clflush_object(obj, obj->pin_display);
2554                 WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE));
2555         }
2556
2557
2558         if (INTEL_INFO(dev)->gen >= 8) {
2559                 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
2560                         chv_setup_private_ppat(dev_priv);
2561                 else
2562                         bdw_setup_private_ppat(dev_priv);
2563
2564                 return;
2565         }
2566
2567         if (USES_PPGTT(dev)) {
2568                 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
2569                         /* TODO: Perhaps it shouldn't be gen6 specific */
2570
2571                         struct i915_hw_ppgtt *ppgtt =
2572                                         container_of(vm, struct i915_hw_ppgtt,
2573                                                      base);
2574
2575                         if (i915_is_ggtt(vm))
2576                                 ppgtt = dev_priv->mm.aliasing_ppgtt;
2577
2578                         gen6_write_page_range(dev_priv, &ppgtt->pd,
2579                                               0, ppgtt->base.total);
2580                 }
2581         }
2582
2583         i915_ggtt_flush(dev_priv);
2584 }
2585
2586 static struct i915_vma *
2587 __i915_gem_vma_create(struct drm_i915_gem_object *obj,
2588                       struct i915_address_space *vm,
2589                       const struct i915_ggtt_view *ggtt_view)
2590 {
2591         struct i915_vma *vma;
2592
2593         if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
2594                 return ERR_PTR(-EINVAL);
2595
2596         vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
2597         if (vma == NULL)
2598                 return ERR_PTR(-ENOMEM);
2599
2600         INIT_LIST_HEAD(&vma->vma_link);
2601         INIT_LIST_HEAD(&vma->mm_list);
2602         INIT_LIST_HEAD(&vma->exec_list);
2603         vma->vm = vm;
2604         vma->obj = obj;
2605
2606         if (i915_is_ggtt(vm))
2607                 vma->ggtt_view = *ggtt_view;
2608
2609         list_add_tail(&vma->vma_link, &obj->vma_list);
2610         if (!i915_is_ggtt(vm))
2611                 i915_ppgtt_get(i915_vm_to_ppgtt(vm));
2612
2613         return vma;
2614 }
2615
2616 struct i915_vma *
2617 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
2618                                   struct i915_address_space *vm)
2619 {
2620         struct i915_vma *vma;
2621
2622         vma = i915_gem_obj_to_vma(obj, vm);
2623         if (!vma)
2624                 vma = __i915_gem_vma_create(obj, vm,
2625                                             i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
2626
2627         return vma;
2628 }
2629
2630 struct i915_vma *
2631 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
2632                                        const struct i915_ggtt_view *view)
2633 {
2634         struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
2635         struct i915_vma *vma;
2636
2637         if (WARN_ON(!view))
2638                 return ERR_PTR(-EINVAL);
2639
2640         vma = i915_gem_obj_to_ggtt_view(obj, view);
2641
2642         if (IS_ERR(vma))
2643                 return vma;
2644
2645         if (!vma)
2646                 vma = __i915_gem_vma_create(obj, ggtt, view);
2647
2648         return vma;
2649
2650 }
2651
2652 static void
2653 rotate_pages(dma_addr_t *in, unsigned int width, unsigned int height,
2654              struct sg_table *st)
2655 {
2656         unsigned int column, row;
2657         unsigned int src_idx;
2658         struct scatterlist *sg = st->sgl;
2659
2660         st->nents = 0;
2661
2662         for (column = 0; column < width; column++) {
2663                 src_idx = width * (height - 1) + column;
2664                 for (row = 0; row < height; row++) {
2665                         st->nents++;
2666                         /* We don't need the pages, but need to initialize
2667                          * the entries so the sg list can be happily traversed.
2668                          * The only thing we need are DMA addresses.
2669                          */
2670                         sg_set_page(sg, NULL, PAGE_SIZE, 0);
2671                         sg_dma_address(sg) = in[src_idx];
2672                         sg_dma_len(sg) = PAGE_SIZE;
2673                         sg = sg_next(sg);
2674                         src_idx -= width;
2675                 }
2676         }
2677 }
2678
2679 static struct sg_table *
2680 intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
2681                           struct drm_i915_gem_object *obj)
2682 {
2683         struct drm_device *dev = obj->base.dev;
2684         struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
2685         unsigned long size, pages, rot_pages;
2686         struct sg_page_iter sg_iter;
2687         unsigned long i;
2688         dma_addr_t *page_addr_list;
2689         struct sg_table *st;
2690         unsigned int tile_pitch, tile_height;
2691         unsigned int width_pages, height_pages;
2692         int ret = -ENOMEM;
2693
2694         pages = obj->base.size / PAGE_SIZE;
2695
2696         /* Calculate tiling geometry. */
2697         tile_height = intel_tile_height(dev, rot_info->pixel_format,
2698                                         rot_info->fb_modifier);
2699         tile_pitch = PAGE_SIZE / tile_height;
2700         width_pages = DIV_ROUND_UP(rot_info->pitch, tile_pitch);
2701         height_pages = DIV_ROUND_UP(rot_info->height, tile_height);
2702         rot_pages = width_pages * height_pages;
2703         size = rot_pages * PAGE_SIZE;
2704
2705         /* Allocate a temporary list of source pages for random access. */
2706         page_addr_list = drm_malloc_ab(pages, sizeof(dma_addr_t));
2707         if (!page_addr_list)
2708                 return ERR_PTR(ret);
2709
2710         /* Allocate target SG list. */
2711         st = kmalloc(sizeof(*st), GFP_KERNEL);
2712         if (!st)
2713                 goto err_st_alloc;
2714
2715         ret = sg_alloc_table(st, rot_pages, GFP_KERNEL);
2716         if (ret)
2717                 goto err_sg_alloc;
2718
2719         /* Populate source page list from the object. */
2720         i = 0;
2721         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
2722                 page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
2723                 i++;
2724         }
2725
2726         /* Rotate the pages. */
2727         rotate_pages(page_addr_list, width_pages, height_pages, st);
2728
2729         DRM_DEBUG_KMS(
2730                       "Created rotated page mapping for object size %lu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %lu pages).\n",
2731                       size, rot_info->pitch, rot_info->height,
2732                       rot_info->pixel_format, width_pages, height_pages,
2733                       rot_pages);
2734
2735         drm_free_large(page_addr_list);
2736
2737         return st;
2738
2739 err_sg_alloc:
2740         kfree(st);
2741 err_st_alloc:
2742         drm_free_large(page_addr_list);
2743
2744         DRM_DEBUG_KMS(
2745                       "Failed to create rotated mapping for object size %lu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %lu pages)\n",
2746                       size, ret, rot_info->pitch, rot_info->height,
2747                       rot_info->pixel_format, width_pages, height_pages,
2748                       rot_pages);
2749         return ERR_PTR(ret);
2750 }
2751
2752 static inline int
2753 i915_get_ggtt_vma_pages(struct i915_vma *vma)
2754 {
2755         int ret = 0;
2756
2757         if (vma->ggtt_view.pages)
2758                 return 0;
2759
2760         if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
2761                 vma->ggtt_view.pages = vma->obj->pages;
2762         else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
2763                 vma->ggtt_view.pages =
2764                         intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj);
2765         else
2766                 WARN_ONCE(1, "GGTT view %u not implemented!\n",
2767                           vma->ggtt_view.type);
2768
2769         if (!vma->ggtt_view.pages) {
2770                 DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
2771                           vma->ggtt_view.type);
2772                 ret = -EINVAL;
2773         } else if (IS_ERR(vma->ggtt_view.pages)) {
2774                 ret = PTR_ERR(vma->ggtt_view.pages);
2775                 vma->ggtt_view.pages = NULL;
2776                 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
2777                           vma->ggtt_view.type, ret);
2778         }
2779
2780         return ret;
2781 }
2782
2783 /**
2784  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
2785  * @vma: VMA to map
2786  * @cache_level: mapping cache level
2787  * @flags: flags like global or local mapping
2788  *
2789  * DMA addresses are taken from the scatter-gather table of this object (or of
2790  * this VMA in case of non-default GGTT views) and PTE entries set up.
2791  * Note that DMA addresses are also the only part of the SG table we care about.
2792  */
2793 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
2794                   u32 flags)
2795 {
2796         u32 bind_flags = 0;
2797         int ret;
2798
2799         if (vma->vm->allocate_va_range) {
2800                 trace_i915_va_alloc(vma->vm, vma->node.start,
2801                                     vma->node.size,
2802                                     VM_TO_TRACE_NAME(vma->vm));
2803
2804                 ret = vma->vm->allocate_va_range(vma->vm,
2805                                                  vma->node.start,
2806                                                  vma->node.size);
2807                 if (ret)
2808                         return ret;
2809         }
2810
2811         if (i915_is_ggtt(vma->vm)) {
2812                 ret = i915_get_ggtt_vma_pages(vma);
2813                 if (ret)
2814                         return 0;
2815         }
2816
2817         if (flags & PIN_GLOBAL)
2818                 bind_flags |= GLOBAL_BIND;
2819         if (flags & PIN_USER)
2820                 bind_flags |= LOCAL_BIND;
2821
2822         if (flags & PIN_UPDATE)
2823                 bind_flags |= vma->bound;
2824         else
2825                 bind_flags &= ~vma->bound;
2826
2827         if (bind_flags)
2828                 vma->vm->bind_vma(vma, cache_level, bind_flags);
2829
2830         vma->bound |= bind_flags;
2831
2832         return 0;
2833 }