e4b233df576fe4de89ff793940b4c0330859df60
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_trace.h"
32 #include "intel_drv.h"
33 #include <linux/shmem_fs.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
37 #include <linux/dma-buf.h>
38
39 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
40 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
41 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
42                                                     unsigned alignment,
43                                                     bool map_and_fenceable,
44                                                     bool nonblocking);
45 static int i915_gem_phys_pwrite(struct drm_device *dev,
46                                 struct drm_i915_gem_object *obj,
47                                 struct drm_i915_gem_pwrite *args,
48                                 struct drm_file *file);
49
50 static void i915_gem_write_fence(struct drm_device *dev, int reg,
51                                  struct drm_i915_gem_object *obj);
52 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
53                                          struct drm_i915_fence_reg *fence,
54                                          bool enable);
55
56 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
57                                     struct shrink_control *sc);
58 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
59 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
60 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
61
62 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
63 {
64         if (obj->tiling_mode)
65                 i915_gem_release_mmap(obj);
66
67         /* As we do not have an associated fence register, we will force
68          * a tiling change if we ever need to acquire one.
69          */
70         obj->fence_dirty = false;
71         obj->fence_reg = I915_FENCE_REG_NONE;
72 }
73
74 /* some bookkeeping */
75 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
76                                   size_t size)
77 {
78         dev_priv->mm.object_count++;
79         dev_priv->mm.object_memory += size;
80 }
81
82 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
83                                      size_t size)
84 {
85         dev_priv->mm.object_count--;
86         dev_priv->mm.object_memory -= size;
87 }
88
89 static int
90 i915_gem_wait_for_error(struct drm_device *dev)
91 {
92         struct drm_i915_private *dev_priv = dev->dev_private;
93         struct completion *x = &dev_priv->error_completion;
94         unsigned long flags;
95         int ret;
96
97         if (!atomic_read(&dev_priv->mm.wedged))
98                 return 0;
99
100         /*
101          * Only wait 10 seconds for the gpu reset to complete to avoid hanging
102          * userspace. If it takes that long something really bad is going on and
103          * we should simply try to bail out and fail as gracefully as possible.
104          */
105         ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
106         if (ret == 0) {
107                 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
108                 return -EIO;
109         } else if (ret < 0) {
110                 return ret;
111         }
112
113         if (atomic_read(&dev_priv->mm.wedged)) {
114                 /* GPU is hung, bump the completion count to account for
115                  * the token we just consumed so that we never hit zero and
116                  * end up waiting upon a subsequent completion event that
117                  * will never happen.
118                  */
119                 spin_lock_irqsave(&x->wait.lock, flags);
120                 x->done++;
121                 spin_unlock_irqrestore(&x->wait.lock, flags);
122         }
123         return 0;
124 }
125
126 int i915_mutex_lock_interruptible(struct drm_device *dev)
127 {
128         int ret;
129
130         ret = i915_gem_wait_for_error(dev);
131         if (ret)
132                 return ret;
133
134         ret = mutex_lock_interruptible(&dev->struct_mutex);
135         if (ret)
136                 return ret;
137
138         WARN_ON(i915_verify_lists(dev));
139         return 0;
140 }
141
142 static inline bool
143 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
144 {
145         return obj->gtt_space && !obj->active;
146 }
147
148 int
149 i915_gem_init_ioctl(struct drm_device *dev, void *data,
150                     struct drm_file *file)
151 {
152         struct drm_i915_gem_init *args = data;
153
154         if (drm_core_check_feature(dev, DRIVER_MODESET))
155                 return -ENODEV;
156
157         if (args->gtt_start >= args->gtt_end ||
158             (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
159                 return -EINVAL;
160
161         /* GEM with user mode setting was never supported on ilk and later. */
162         if (INTEL_INFO(dev)->gen >= 5)
163                 return -ENODEV;
164
165         mutex_lock(&dev->struct_mutex);
166         i915_gem_init_global_gtt(dev, args->gtt_start,
167                                  args->gtt_end, args->gtt_end);
168         mutex_unlock(&dev->struct_mutex);
169
170         return 0;
171 }
172
173 int
174 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
175                             struct drm_file *file)
176 {
177         struct drm_i915_private *dev_priv = dev->dev_private;
178         struct drm_i915_gem_get_aperture *args = data;
179         struct drm_i915_gem_object *obj;
180         size_t pinned;
181
182         pinned = 0;
183         mutex_lock(&dev->struct_mutex);
184         list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
185                 if (obj->pin_count)
186                         pinned += obj->gtt_space->size;
187         mutex_unlock(&dev->struct_mutex);
188
189         args->aper_size = dev_priv->mm.gtt_total;
190         args->aper_available_size = args->aper_size - pinned;
191
192         return 0;
193 }
194
195 void *i915_gem_object_alloc(struct drm_device *dev)
196 {
197         struct drm_i915_private *dev_priv = dev->dev_private;
198         return kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
199 }
200
201 void i915_gem_object_free(struct drm_i915_gem_object *obj)
202 {
203         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
204         kmem_cache_free(dev_priv->slab, obj);
205 }
206
207 static int
208 i915_gem_create(struct drm_file *file,
209                 struct drm_device *dev,
210                 uint64_t size,
211                 uint32_t *handle_p)
212 {
213         struct drm_i915_gem_object *obj;
214         int ret;
215         u32 handle;
216
217         size = roundup(size, PAGE_SIZE);
218         if (size == 0)
219                 return -EINVAL;
220
221         /* Allocate the new object */
222         obj = i915_gem_alloc_object(dev, size);
223         if (obj == NULL)
224                 return -ENOMEM;
225
226         ret = drm_gem_handle_create(file, &obj->base, &handle);
227         if (ret) {
228                 drm_gem_object_release(&obj->base);
229                 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
230                 i915_gem_object_free(obj);
231                 return ret;
232         }
233
234         /* drop reference from allocate - handle holds it now */
235         drm_gem_object_unreference(&obj->base);
236         trace_i915_gem_object_create(obj);
237
238         *handle_p = handle;
239         return 0;
240 }
241
242 int
243 i915_gem_dumb_create(struct drm_file *file,
244                      struct drm_device *dev,
245                      struct drm_mode_create_dumb *args)
246 {
247         /* have to work out size/pitch and return them */
248         args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
249         args->size = args->pitch * args->height;
250         return i915_gem_create(file, dev,
251                                args->size, &args->handle);
252 }
253
254 int i915_gem_dumb_destroy(struct drm_file *file,
255                           struct drm_device *dev,
256                           uint32_t handle)
257 {
258         return drm_gem_handle_delete(file, handle);
259 }
260
261 /**
262  * Creates a new mm object and returns a handle to it.
263  */
264 int
265 i915_gem_create_ioctl(struct drm_device *dev, void *data,
266                       struct drm_file *file)
267 {
268         struct drm_i915_gem_create *args = data;
269
270         return i915_gem_create(file, dev,
271                                args->size, &args->handle);
272 }
273
274 static inline int
275 __copy_to_user_swizzled(char __user *cpu_vaddr,
276                         const char *gpu_vaddr, int gpu_offset,
277                         int length)
278 {
279         int ret, cpu_offset = 0;
280
281         while (length > 0) {
282                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
283                 int this_length = min(cacheline_end - gpu_offset, length);
284                 int swizzled_gpu_offset = gpu_offset ^ 64;
285
286                 ret = __copy_to_user(cpu_vaddr + cpu_offset,
287                                      gpu_vaddr + swizzled_gpu_offset,
288                                      this_length);
289                 if (ret)
290                         return ret + length;
291
292                 cpu_offset += this_length;
293                 gpu_offset += this_length;
294                 length -= this_length;
295         }
296
297         return 0;
298 }
299
300 static inline int
301 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
302                           const char __user *cpu_vaddr,
303                           int length)
304 {
305         int ret, cpu_offset = 0;
306
307         while (length > 0) {
308                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
309                 int this_length = min(cacheline_end - gpu_offset, length);
310                 int swizzled_gpu_offset = gpu_offset ^ 64;
311
312                 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
313                                        cpu_vaddr + cpu_offset,
314                                        this_length);
315                 if (ret)
316                         return ret + length;
317
318                 cpu_offset += this_length;
319                 gpu_offset += this_length;
320                 length -= this_length;
321         }
322
323         return 0;
324 }
325
326 /* Per-page copy function for the shmem pread fastpath.
327  * Flushes invalid cachelines before reading the target if
328  * needs_clflush is set. */
329 static int
330 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
331                  char __user *user_data,
332                  bool page_do_bit17_swizzling, bool needs_clflush)
333 {
334         char *vaddr;
335         int ret;
336
337         if (unlikely(page_do_bit17_swizzling))
338                 return -EINVAL;
339
340         vaddr = kmap_atomic(page);
341         if (needs_clflush)
342                 drm_clflush_virt_range(vaddr + shmem_page_offset,
343                                        page_length);
344         ret = __copy_to_user_inatomic(user_data,
345                                       vaddr + shmem_page_offset,
346                                       page_length);
347         kunmap_atomic(vaddr);
348
349         return ret ? -EFAULT : 0;
350 }
351
352 static void
353 shmem_clflush_swizzled_range(char *addr, unsigned long length,
354                              bool swizzled)
355 {
356         if (unlikely(swizzled)) {
357                 unsigned long start = (unsigned long) addr;
358                 unsigned long end = (unsigned long) addr + length;
359
360                 /* For swizzling simply ensure that we always flush both
361                  * channels. Lame, but simple and it works. Swizzled
362                  * pwrite/pread is far from a hotpath - current userspace
363                  * doesn't use it at all. */
364                 start = round_down(start, 128);
365                 end = round_up(end, 128);
366
367                 drm_clflush_virt_range((void *)start, end - start);
368         } else {
369                 drm_clflush_virt_range(addr, length);
370         }
371
372 }
373
374 /* Only difference to the fast-path function is that this can handle bit17
375  * and uses non-atomic copy and kmap functions. */
376 static int
377 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
378                  char __user *user_data,
379                  bool page_do_bit17_swizzling, bool needs_clflush)
380 {
381         char *vaddr;
382         int ret;
383
384         vaddr = kmap(page);
385         if (needs_clflush)
386                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
387                                              page_length,
388                                              page_do_bit17_swizzling);
389
390         if (page_do_bit17_swizzling)
391                 ret = __copy_to_user_swizzled(user_data,
392                                               vaddr, shmem_page_offset,
393                                               page_length);
394         else
395                 ret = __copy_to_user(user_data,
396                                      vaddr + shmem_page_offset,
397                                      page_length);
398         kunmap(page);
399
400         return ret ? - EFAULT : 0;
401 }
402
403 static int
404 i915_gem_shmem_pread(struct drm_device *dev,
405                      struct drm_i915_gem_object *obj,
406                      struct drm_i915_gem_pread *args,
407                      struct drm_file *file)
408 {
409         char __user *user_data;
410         ssize_t remain;
411         loff_t offset;
412         int shmem_page_offset, page_length, ret = 0;
413         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
414         int prefaulted = 0;
415         int needs_clflush = 0;
416         struct scatterlist *sg;
417         int i;
418
419         user_data = (char __user *) (uintptr_t) args->data_ptr;
420         remain = args->size;
421
422         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
423
424         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
425                 /* If we're not in the cpu read domain, set ourself into the gtt
426                  * read domain and manually flush cachelines (if required). This
427                  * optimizes for the case when the gpu will dirty the data
428                  * anyway again before the next pread happens. */
429                 if (obj->cache_level == I915_CACHE_NONE)
430                         needs_clflush = 1;
431                 if (obj->gtt_space) {
432                         ret = i915_gem_object_set_to_gtt_domain(obj, false);
433                         if (ret)
434                                 return ret;
435                 }
436         }
437
438         ret = i915_gem_object_get_pages(obj);
439         if (ret)
440                 return ret;
441
442         i915_gem_object_pin_pages(obj);
443
444         offset = args->offset;
445
446         for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
447                 struct page *page;
448
449                 if (i < offset >> PAGE_SHIFT)
450                         continue;
451
452                 if (remain <= 0)
453                         break;
454
455                 /* Operation in this page
456                  *
457                  * shmem_page_offset = offset within page in shmem file
458                  * page_length = bytes to copy for this page
459                  */
460                 shmem_page_offset = offset_in_page(offset);
461                 page_length = remain;
462                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
463                         page_length = PAGE_SIZE - shmem_page_offset;
464
465                 page = sg_page(sg);
466                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
467                         (page_to_phys(page) & (1 << 17)) != 0;
468
469                 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
470                                        user_data, page_do_bit17_swizzling,
471                                        needs_clflush);
472                 if (ret == 0)
473                         goto next_page;
474
475                 mutex_unlock(&dev->struct_mutex);
476
477                 if (!prefaulted) {
478                         ret = fault_in_multipages_writeable(user_data, remain);
479                         /* Userspace is tricking us, but we've already clobbered
480                          * its pages with the prefault and promised to write the
481                          * data up to the first fault. Hence ignore any errors
482                          * and just continue. */
483                         (void)ret;
484                         prefaulted = 1;
485                 }
486
487                 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
488                                        user_data, page_do_bit17_swizzling,
489                                        needs_clflush);
490
491                 mutex_lock(&dev->struct_mutex);
492
493 next_page:
494                 mark_page_accessed(page);
495
496                 if (ret)
497                         goto out;
498
499                 remain -= page_length;
500                 user_data += page_length;
501                 offset += page_length;
502         }
503
504 out:
505         i915_gem_object_unpin_pages(obj);
506
507         return ret;
508 }
509
510 /**
511  * Reads data from the object referenced by handle.
512  *
513  * On error, the contents of *data are undefined.
514  */
515 int
516 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
517                      struct drm_file *file)
518 {
519         struct drm_i915_gem_pread *args = data;
520         struct drm_i915_gem_object *obj;
521         int ret = 0;
522
523         if (args->size == 0)
524                 return 0;
525
526         if (!access_ok(VERIFY_WRITE,
527                        (char __user *)(uintptr_t)args->data_ptr,
528                        args->size))
529                 return -EFAULT;
530
531         ret = i915_mutex_lock_interruptible(dev);
532         if (ret)
533                 return ret;
534
535         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
536         if (&obj->base == NULL) {
537                 ret = -ENOENT;
538                 goto unlock;
539         }
540
541         /* Bounds check source.  */
542         if (args->offset > obj->base.size ||
543             args->size > obj->base.size - args->offset) {
544                 ret = -EINVAL;
545                 goto out;
546         }
547
548         /* prime objects have no backing filp to GEM pread/pwrite
549          * pages from.
550          */
551         if (!obj->base.filp) {
552                 ret = -EINVAL;
553                 goto out;
554         }
555
556         trace_i915_gem_object_pread(obj, args->offset, args->size);
557
558         ret = i915_gem_shmem_pread(dev, obj, args, file);
559
560 out:
561         drm_gem_object_unreference(&obj->base);
562 unlock:
563         mutex_unlock(&dev->struct_mutex);
564         return ret;
565 }
566
567 /* This is the fast write path which cannot handle
568  * page faults in the source data
569  */
570
571 static inline int
572 fast_user_write(struct io_mapping *mapping,
573                 loff_t page_base, int page_offset,
574                 char __user *user_data,
575                 int length)
576 {
577         void __iomem *vaddr_atomic;
578         void *vaddr;
579         unsigned long unwritten;
580
581         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
582         /* We can use the cpu mem copy function because this is X86. */
583         vaddr = (void __force*)vaddr_atomic + page_offset;
584         unwritten = __copy_from_user_inatomic_nocache(vaddr,
585                                                       user_data, length);
586         io_mapping_unmap_atomic(vaddr_atomic);
587         return unwritten;
588 }
589
590 /**
591  * This is the fast pwrite path, where we copy the data directly from the
592  * user into the GTT, uncached.
593  */
594 static int
595 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
596                          struct drm_i915_gem_object *obj,
597                          struct drm_i915_gem_pwrite *args,
598                          struct drm_file *file)
599 {
600         drm_i915_private_t *dev_priv = dev->dev_private;
601         ssize_t remain;
602         loff_t offset, page_base;
603         char __user *user_data;
604         int page_offset, page_length, ret;
605
606         ret = i915_gem_object_pin(obj, 0, true, true);
607         if (ret)
608                 goto out;
609
610         ret = i915_gem_object_set_to_gtt_domain(obj, true);
611         if (ret)
612                 goto out_unpin;
613
614         ret = i915_gem_object_put_fence(obj);
615         if (ret)
616                 goto out_unpin;
617
618         user_data = (char __user *) (uintptr_t) args->data_ptr;
619         remain = args->size;
620
621         offset = obj->gtt_offset + args->offset;
622
623         while (remain > 0) {
624                 /* Operation in this page
625                  *
626                  * page_base = page offset within aperture
627                  * page_offset = offset within page
628                  * page_length = bytes to copy for this page
629                  */
630                 page_base = offset & PAGE_MASK;
631                 page_offset = offset_in_page(offset);
632                 page_length = remain;
633                 if ((page_offset + remain) > PAGE_SIZE)
634                         page_length = PAGE_SIZE - page_offset;
635
636                 /* If we get a fault while copying data, then (presumably) our
637                  * source page isn't available.  Return the error and we'll
638                  * retry in the slow path.
639                  */
640                 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
641                                     page_offset, user_data, page_length)) {
642                         ret = -EFAULT;
643                         goto out_unpin;
644                 }
645
646                 remain -= page_length;
647                 user_data += page_length;
648                 offset += page_length;
649         }
650
651 out_unpin:
652         i915_gem_object_unpin(obj);
653 out:
654         return ret;
655 }
656
657 /* Per-page copy function for the shmem pwrite fastpath.
658  * Flushes invalid cachelines before writing to the target if
659  * needs_clflush_before is set and flushes out any written cachelines after
660  * writing if needs_clflush is set. */
661 static int
662 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
663                   char __user *user_data,
664                   bool page_do_bit17_swizzling,
665                   bool needs_clflush_before,
666                   bool needs_clflush_after)
667 {
668         char *vaddr;
669         int ret;
670
671         if (unlikely(page_do_bit17_swizzling))
672                 return -EINVAL;
673
674         vaddr = kmap_atomic(page);
675         if (needs_clflush_before)
676                 drm_clflush_virt_range(vaddr + shmem_page_offset,
677                                        page_length);
678         ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
679                                                 user_data,
680                                                 page_length);
681         if (needs_clflush_after)
682                 drm_clflush_virt_range(vaddr + shmem_page_offset,
683                                        page_length);
684         kunmap_atomic(vaddr);
685
686         return ret ? -EFAULT : 0;
687 }
688
689 /* Only difference to the fast-path function is that this can handle bit17
690  * and uses non-atomic copy and kmap functions. */
691 static int
692 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
693                   char __user *user_data,
694                   bool page_do_bit17_swizzling,
695                   bool needs_clflush_before,
696                   bool needs_clflush_after)
697 {
698         char *vaddr;
699         int ret;
700
701         vaddr = kmap(page);
702         if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
703                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
704                                              page_length,
705                                              page_do_bit17_swizzling);
706         if (page_do_bit17_swizzling)
707                 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
708                                                 user_data,
709                                                 page_length);
710         else
711                 ret = __copy_from_user(vaddr + shmem_page_offset,
712                                        user_data,
713                                        page_length);
714         if (needs_clflush_after)
715                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
716                                              page_length,
717                                              page_do_bit17_swizzling);
718         kunmap(page);
719
720         return ret ? -EFAULT : 0;
721 }
722
723 static int
724 i915_gem_shmem_pwrite(struct drm_device *dev,
725                       struct drm_i915_gem_object *obj,
726                       struct drm_i915_gem_pwrite *args,
727                       struct drm_file *file)
728 {
729         ssize_t remain;
730         loff_t offset;
731         char __user *user_data;
732         int shmem_page_offset, page_length, ret = 0;
733         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
734         int hit_slowpath = 0;
735         int needs_clflush_after = 0;
736         int needs_clflush_before = 0;
737         int i;
738         struct scatterlist *sg;
739
740         user_data = (char __user *) (uintptr_t) args->data_ptr;
741         remain = args->size;
742
743         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
744
745         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
746                 /* If we're not in the cpu write domain, set ourself into the gtt
747                  * write domain and manually flush cachelines (if required). This
748                  * optimizes for the case when the gpu will use the data
749                  * right away and we therefore have to clflush anyway. */
750                 if (obj->cache_level == I915_CACHE_NONE)
751                         needs_clflush_after = 1;
752                 if (obj->gtt_space) {
753                         ret = i915_gem_object_set_to_gtt_domain(obj, true);
754                         if (ret)
755                                 return ret;
756                 }
757         }
758         /* Same trick applies for invalidate partially written cachelines before
759          * writing.  */
760         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
761             && obj->cache_level == I915_CACHE_NONE)
762                 needs_clflush_before = 1;
763
764         ret = i915_gem_object_get_pages(obj);
765         if (ret)
766                 return ret;
767
768         i915_gem_object_pin_pages(obj);
769
770         offset = args->offset;
771         obj->dirty = 1;
772
773         for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
774                 struct page *page;
775                 int partial_cacheline_write;
776
777                 if (i < offset >> PAGE_SHIFT)
778                         continue;
779
780                 if (remain <= 0)
781                         break;
782
783                 /* Operation in this page
784                  *
785                  * shmem_page_offset = offset within page in shmem file
786                  * page_length = bytes to copy for this page
787                  */
788                 shmem_page_offset = offset_in_page(offset);
789
790                 page_length = remain;
791                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
792                         page_length = PAGE_SIZE - shmem_page_offset;
793
794                 /* If we don't overwrite a cacheline completely we need to be
795                  * careful to have up-to-date data by first clflushing. Don't
796                  * overcomplicate things and flush the entire patch. */
797                 partial_cacheline_write = needs_clflush_before &&
798                         ((shmem_page_offset | page_length)
799                                 & (boot_cpu_data.x86_clflush_size - 1));
800
801                 page = sg_page(sg);
802                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
803                         (page_to_phys(page) & (1 << 17)) != 0;
804
805                 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
806                                         user_data, page_do_bit17_swizzling,
807                                         partial_cacheline_write,
808                                         needs_clflush_after);
809                 if (ret == 0)
810                         goto next_page;
811
812                 hit_slowpath = 1;
813                 mutex_unlock(&dev->struct_mutex);
814                 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
815                                         user_data, page_do_bit17_swizzling,
816                                         partial_cacheline_write,
817                                         needs_clflush_after);
818
819                 mutex_lock(&dev->struct_mutex);
820
821 next_page:
822                 set_page_dirty(page);
823                 mark_page_accessed(page);
824
825                 if (ret)
826                         goto out;
827
828                 remain -= page_length;
829                 user_data += page_length;
830                 offset += page_length;
831         }
832
833 out:
834         i915_gem_object_unpin_pages(obj);
835
836         if (hit_slowpath) {
837                 /*
838                  * Fixup: Flush cpu caches in case we didn't flush the dirty
839                  * cachelines in-line while writing and the object moved
840                  * out of the cpu write domain while we've dropped the lock.
841                  */
842                 if (!needs_clflush_after &&
843                     obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
844                         i915_gem_clflush_object(obj);
845                         i915_gem_chipset_flush(dev);
846                 }
847         }
848
849         if (needs_clflush_after)
850                 i915_gem_chipset_flush(dev);
851
852         return ret;
853 }
854
855 /**
856  * Writes data to the object referenced by handle.
857  *
858  * On error, the contents of the buffer that were to be modified are undefined.
859  */
860 int
861 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
862                       struct drm_file *file)
863 {
864         struct drm_i915_gem_pwrite *args = data;
865         struct drm_i915_gem_object *obj;
866         int ret;
867
868         if (args->size == 0)
869                 return 0;
870
871         if (!access_ok(VERIFY_READ,
872                        (char __user *)(uintptr_t)args->data_ptr,
873                        args->size))
874                 return -EFAULT;
875
876         ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
877                                            args->size);
878         if (ret)
879                 return -EFAULT;
880
881         ret = i915_mutex_lock_interruptible(dev);
882         if (ret)
883                 return ret;
884
885         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
886         if (&obj->base == NULL) {
887                 ret = -ENOENT;
888                 goto unlock;
889         }
890
891         /* Bounds check destination. */
892         if (args->offset > obj->base.size ||
893             args->size > obj->base.size - args->offset) {
894                 ret = -EINVAL;
895                 goto out;
896         }
897
898         /* prime objects have no backing filp to GEM pread/pwrite
899          * pages from.
900          */
901         if (!obj->base.filp) {
902                 ret = -EINVAL;
903                 goto out;
904         }
905
906         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
907
908         ret = -EFAULT;
909         /* We can only do the GTT pwrite on untiled buffers, as otherwise
910          * it would end up going through the fenced access, and we'll get
911          * different detiling behavior between reading and writing.
912          * pread/pwrite currently are reading and writing from the CPU
913          * perspective, requiring manual detiling by the client.
914          */
915         if (obj->phys_obj) {
916                 ret = i915_gem_phys_pwrite(dev, obj, args, file);
917                 goto out;
918         }
919
920         if (obj->cache_level == I915_CACHE_NONE &&
921             obj->tiling_mode == I915_TILING_NONE &&
922             obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
923                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
924                 /* Note that the gtt paths might fail with non-page-backed user
925                  * pointers (e.g. gtt mappings when moving data between
926                  * textures). Fallback to the shmem path in that case. */
927         }
928
929         if (ret == -EFAULT || ret == -ENOSPC)
930                 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
931
932 out:
933         drm_gem_object_unreference(&obj->base);
934 unlock:
935         mutex_unlock(&dev->struct_mutex);
936         return ret;
937 }
938
939 int
940 i915_gem_check_wedge(struct drm_i915_private *dev_priv,
941                      bool interruptible)
942 {
943         if (atomic_read(&dev_priv->mm.wedged)) {
944                 struct completion *x = &dev_priv->error_completion;
945                 bool recovery_complete;
946                 unsigned long flags;
947
948                 /* Give the error handler a chance to run. */
949                 spin_lock_irqsave(&x->wait.lock, flags);
950                 recovery_complete = x->done > 0;
951                 spin_unlock_irqrestore(&x->wait.lock, flags);
952
953                 /* Non-interruptible callers can't handle -EAGAIN, hence return
954                  * -EIO unconditionally for these. */
955                 if (!interruptible)
956                         return -EIO;
957
958                 /* Recovery complete, but still wedged means reset failure. */
959                 if (recovery_complete)
960                         return -EIO;
961
962                 return -EAGAIN;
963         }
964
965         return 0;
966 }
967
968 /*
969  * Compare seqno against outstanding lazy request. Emit a request if they are
970  * equal.
971  */
972 static int
973 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
974 {
975         int ret;
976
977         BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
978
979         ret = 0;
980         if (seqno == ring->outstanding_lazy_request)
981                 ret = i915_add_request(ring, NULL, NULL);
982
983         return ret;
984 }
985
986 /**
987  * __wait_seqno - wait until execution of seqno has finished
988  * @ring: the ring expected to report seqno
989  * @seqno: duh!
990  * @interruptible: do an interruptible wait (normally yes)
991  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
992  *
993  * Returns 0 if the seqno was found within the alloted time. Else returns the
994  * errno with remaining time filled in timeout argument.
995  */
996 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
997                         bool interruptible, struct timespec *timeout)
998 {
999         drm_i915_private_t *dev_priv = ring->dev->dev_private;
1000         struct timespec before, now, wait_time={1,0};
1001         unsigned long timeout_jiffies;
1002         long end;
1003         bool wait_forever = true;
1004         int ret;
1005
1006         if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1007                 return 0;
1008
1009         trace_i915_gem_request_wait_begin(ring, seqno);
1010
1011         if (timeout != NULL) {
1012                 wait_time = *timeout;
1013                 wait_forever = false;
1014         }
1015
1016         timeout_jiffies = timespec_to_jiffies(&wait_time);
1017
1018         if (WARN_ON(!ring->irq_get(ring)))
1019                 return -ENODEV;
1020
1021         /* Record current time in case interrupted by signal, or wedged * */
1022         getrawmonotonic(&before);
1023
1024 #define EXIT_COND \
1025         (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
1026         atomic_read(&dev_priv->mm.wedged))
1027         do {
1028                 if (interruptible)
1029                         end = wait_event_interruptible_timeout(ring->irq_queue,
1030                                                                EXIT_COND,
1031                                                                timeout_jiffies);
1032                 else
1033                         end = wait_event_timeout(ring->irq_queue, EXIT_COND,
1034                                                  timeout_jiffies);
1035
1036                 ret = i915_gem_check_wedge(dev_priv, interruptible);
1037                 if (ret)
1038                         end = ret;
1039         } while (end == 0 && wait_forever);
1040
1041         getrawmonotonic(&now);
1042
1043         ring->irq_put(ring);
1044         trace_i915_gem_request_wait_end(ring, seqno);
1045 #undef EXIT_COND
1046
1047         if (timeout) {
1048                 struct timespec sleep_time = timespec_sub(now, before);
1049                 *timeout = timespec_sub(*timeout, sleep_time);
1050         }
1051
1052         switch (end) {
1053         case -EIO:
1054         case -EAGAIN: /* Wedged */
1055         case -ERESTARTSYS: /* Signal */
1056                 return (int)end;
1057         case 0: /* Timeout */
1058                 if (timeout)
1059                         set_normalized_timespec(timeout, 0, 0);
1060                 return -ETIME;
1061         default: /* Completed */
1062                 WARN_ON(end < 0); /* We're not aware of other errors */
1063                 return 0;
1064         }
1065 }
1066
1067 /**
1068  * Waits for a sequence number to be signaled, and cleans up the
1069  * request and object lists appropriately for that event.
1070  */
1071 int
1072 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1073 {
1074         struct drm_device *dev = ring->dev;
1075         struct drm_i915_private *dev_priv = dev->dev_private;
1076         bool interruptible = dev_priv->mm.interruptible;
1077         int ret;
1078
1079         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1080         BUG_ON(seqno == 0);
1081
1082         ret = i915_gem_check_wedge(dev_priv, interruptible);
1083         if (ret)
1084                 return ret;
1085
1086         ret = i915_gem_check_olr(ring, seqno);
1087         if (ret)
1088                 return ret;
1089
1090         return __wait_seqno(ring, seqno, interruptible, NULL);
1091 }
1092
1093 /**
1094  * Ensures that all rendering to the object has completed and the object is
1095  * safe to unbind from the GTT or access from the CPU.
1096  */
1097 static __must_check int
1098 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1099                                bool readonly)
1100 {
1101         struct intel_ring_buffer *ring = obj->ring;
1102         u32 seqno;
1103         int ret;
1104
1105         seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1106         if (seqno == 0)
1107                 return 0;
1108
1109         ret = i915_wait_seqno(ring, seqno);
1110         if (ret)
1111                 return ret;
1112
1113         i915_gem_retire_requests_ring(ring);
1114
1115         /* Manually manage the write flush as we may have not yet
1116          * retired the buffer.
1117          */
1118         if (obj->last_write_seqno &&
1119             i915_seqno_passed(seqno, obj->last_write_seqno)) {
1120                 obj->last_write_seqno = 0;
1121                 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1122         }
1123
1124         return 0;
1125 }
1126
1127 /* A nonblocking variant of the above wait. This is a highly dangerous routine
1128  * as the object state may change during this call.
1129  */
1130 static __must_check int
1131 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1132                                             bool readonly)
1133 {
1134         struct drm_device *dev = obj->base.dev;
1135         struct drm_i915_private *dev_priv = dev->dev_private;
1136         struct intel_ring_buffer *ring = obj->ring;
1137         u32 seqno;
1138         int ret;
1139
1140         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1141         BUG_ON(!dev_priv->mm.interruptible);
1142
1143         seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1144         if (seqno == 0)
1145                 return 0;
1146
1147         ret = i915_gem_check_wedge(dev_priv, true);
1148         if (ret)
1149                 return ret;
1150
1151         ret = i915_gem_check_olr(ring, seqno);
1152         if (ret)
1153                 return ret;
1154
1155         mutex_unlock(&dev->struct_mutex);
1156         ret = __wait_seqno(ring, seqno, true, NULL);
1157         mutex_lock(&dev->struct_mutex);
1158
1159         i915_gem_retire_requests_ring(ring);
1160
1161         /* Manually manage the write flush as we may have not yet
1162          * retired the buffer.
1163          */
1164         if (obj->last_write_seqno &&
1165             i915_seqno_passed(seqno, obj->last_write_seqno)) {
1166                 obj->last_write_seqno = 0;
1167                 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1168         }
1169
1170         return ret;
1171 }
1172
1173 /**
1174  * Called when user space prepares to use an object with the CPU, either
1175  * through the mmap ioctl's mapping or a GTT mapping.
1176  */
1177 int
1178 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1179                           struct drm_file *file)
1180 {
1181         struct drm_i915_gem_set_domain *args = data;
1182         struct drm_i915_gem_object *obj;
1183         uint32_t read_domains = args->read_domains;
1184         uint32_t write_domain = args->write_domain;
1185         int ret;
1186
1187         /* Only handle setting domains to types used by the CPU. */
1188         if (write_domain & I915_GEM_GPU_DOMAINS)
1189                 return -EINVAL;
1190
1191         if (read_domains & I915_GEM_GPU_DOMAINS)
1192                 return -EINVAL;
1193
1194         /* Having something in the write domain implies it's in the read
1195          * domain, and only that read domain.  Enforce that in the request.
1196          */
1197         if (write_domain != 0 && read_domains != write_domain)
1198                 return -EINVAL;
1199
1200         ret = i915_mutex_lock_interruptible(dev);
1201         if (ret)
1202                 return ret;
1203
1204         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1205         if (&obj->base == NULL) {
1206                 ret = -ENOENT;
1207                 goto unlock;
1208         }
1209
1210         /* Try to flush the object off the GPU without holding the lock.
1211          * We will repeat the flush holding the lock in the normal manner
1212          * to catch cases where we are gazumped.
1213          */
1214         ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
1215         if (ret)
1216                 goto unref;
1217
1218         if (read_domains & I915_GEM_DOMAIN_GTT) {
1219                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1220
1221                 /* Silently promote "you're not bound, there was nothing to do"
1222                  * to success, since the client was just asking us to
1223                  * make sure everything was done.
1224                  */
1225                 if (ret == -EINVAL)
1226                         ret = 0;
1227         } else {
1228                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1229         }
1230
1231 unref:
1232         drm_gem_object_unreference(&obj->base);
1233 unlock:
1234         mutex_unlock(&dev->struct_mutex);
1235         return ret;
1236 }
1237
1238 /**
1239  * Called when user space has done writes to this buffer
1240  */
1241 int
1242 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1243                          struct drm_file *file)
1244 {
1245         struct drm_i915_gem_sw_finish *args = data;
1246         struct drm_i915_gem_object *obj;
1247         int ret = 0;
1248
1249         ret = i915_mutex_lock_interruptible(dev);
1250         if (ret)
1251                 return ret;
1252
1253         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1254         if (&obj->base == NULL) {
1255                 ret = -ENOENT;
1256                 goto unlock;
1257         }
1258
1259         /* Pinned buffers may be scanout, so flush the cache */
1260         if (obj->pin_count)
1261                 i915_gem_object_flush_cpu_write_domain(obj);
1262
1263         drm_gem_object_unreference(&obj->base);
1264 unlock:
1265         mutex_unlock(&dev->struct_mutex);
1266         return ret;
1267 }
1268
1269 /**
1270  * Maps the contents of an object, returning the address it is mapped
1271  * into.
1272  *
1273  * While the mapping holds a reference on the contents of the object, it doesn't
1274  * imply a ref on the object itself.
1275  */
1276 int
1277 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1278                     struct drm_file *file)
1279 {
1280         struct drm_i915_gem_mmap *args = data;
1281         struct drm_gem_object *obj;
1282         unsigned long addr;
1283
1284         obj = drm_gem_object_lookup(dev, file, args->handle);
1285         if (obj == NULL)
1286                 return -ENOENT;
1287
1288         /* prime objects have no backing filp to GEM mmap
1289          * pages from.
1290          */
1291         if (!obj->filp) {
1292                 drm_gem_object_unreference_unlocked(obj);
1293                 return -EINVAL;
1294         }
1295
1296         addr = vm_mmap(obj->filp, 0, args->size,
1297                        PROT_READ | PROT_WRITE, MAP_SHARED,
1298                        args->offset);
1299         drm_gem_object_unreference_unlocked(obj);
1300         if (IS_ERR((void *)addr))
1301                 return addr;
1302
1303         args->addr_ptr = (uint64_t) addr;
1304
1305         return 0;
1306 }
1307
1308 /**
1309  * i915_gem_fault - fault a page into the GTT
1310  * vma: VMA in question
1311  * vmf: fault info
1312  *
1313  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1314  * from userspace.  The fault handler takes care of binding the object to
1315  * the GTT (if needed), allocating and programming a fence register (again,
1316  * only if needed based on whether the old reg is still valid or the object
1317  * is tiled) and inserting a new PTE into the faulting process.
1318  *
1319  * Note that the faulting process may involve evicting existing objects
1320  * from the GTT and/or fence registers to make room.  So performance may
1321  * suffer if the GTT working set is large or there are few fence registers
1322  * left.
1323  */
1324 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1325 {
1326         struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1327         struct drm_device *dev = obj->base.dev;
1328         drm_i915_private_t *dev_priv = dev->dev_private;
1329         pgoff_t page_offset;
1330         unsigned long pfn;
1331         int ret = 0;
1332         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1333
1334         /* We don't use vmf->pgoff since that has the fake offset */
1335         page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1336                 PAGE_SHIFT;
1337
1338         ret = i915_mutex_lock_interruptible(dev);
1339         if (ret)
1340                 goto out;
1341
1342         trace_i915_gem_object_fault(obj, page_offset, true, write);
1343
1344         /* Now bind it into the GTT if needed */
1345         ret = i915_gem_object_pin(obj, 0, true, false);
1346         if (ret)
1347                 goto unlock;
1348
1349         ret = i915_gem_object_set_to_gtt_domain(obj, write);
1350         if (ret)
1351                 goto unpin;
1352
1353         ret = i915_gem_object_get_fence(obj);
1354         if (ret)
1355                 goto unpin;
1356
1357         obj->fault_mappable = true;
1358
1359         pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) +
1360                 page_offset;
1361
1362         /* Finally, remap it using the new GTT offset */
1363         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1364 unpin:
1365         i915_gem_object_unpin(obj);
1366 unlock:
1367         mutex_unlock(&dev->struct_mutex);
1368 out:
1369         switch (ret) {
1370         case -EIO:
1371                 /* If this -EIO is due to a gpu hang, give the reset code a
1372                  * chance to clean up the mess. Otherwise return the proper
1373                  * SIGBUS. */
1374                 if (!atomic_read(&dev_priv->mm.wedged))
1375                         return VM_FAULT_SIGBUS;
1376         case -EAGAIN:
1377                 /* Give the error handler a chance to run and move the
1378                  * objects off the GPU active list. Next time we service the
1379                  * fault, we should be able to transition the page into the
1380                  * GTT without touching the GPU (and so avoid further
1381                  * EIO/EGAIN). If the GPU is wedged, then there is no issue
1382                  * with coherency, just lost writes.
1383                  */
1384                 set_need_resched();
1385         case 0:
1386         case -ERESTARTSYS:
1387         case -EINTR:
1388         case -EBUSY:
1389                 /*
1390                  * EBUSY is ok: this just means that another thread
1391                  * already did the job.
1392                  */
1393                 return VM_FAULT_NOPAGE;
1394         case -ENOMEM:
1395                 return VM_FAULT_OOM;
1396         case -ENOSPC:
1397                 return VM_FAULT_SIGBUS;
1398         default:
1399                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1400                 return VM_FAULT_SIGBUS;
1401         }
1402 }
1403
1404 /**
1405  * i915_gem_release_mmap - remove physical page mappings
1406  * @obj: obj in question
1407  *
1408  * Preserve the reservation of the mmapping with the DRM core code, but
1409  * relinquish ownership of the pages back to the system.
1410  *
1411  * It is vital that we remove the page mapping if we have mapped a tiled
1412  * object through the GTT and then lose the fence register due to
1413  * resource pressure. Similarly if the object has been moved out of the
1414  * aperture, than pages mapped into userspace must be revoked. Removing the
1415  * mapping will then trigger a page fault on the next user access, allowing
1416  * fixup by i915_gem_fault().
1417  */
1418 void
1419 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1420 {
1421         if (!obj->fault_mappable)
1422                 return;
1423
1424         if (obj->base.dev->dev_mapping)
1425                 unmap_mapping_range(obj->base.dev->dev_mapping,
1426                                     (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1427                                     obj->base.size, 1);
1428
1429         obj->fault_mappable = false;
1430 }
1431
1432 static uint32_t
1433 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1434 {
1435         uint32_t gtt_size;
1436
1437         if (INTEL_INFO(dev)->gen >= 4 ||
1438             tiling_mode == I915_TILING_NONE)
1439                 return size;
1440
1441         /* Previous chips need a power-of-two fence region when tiling */
1442         if (INTEL_INFO(dev)->gen == 3)
1443                 gtt_size = 1024*1024;
1444         else
1445                 gtt_size = 512*1024;
1446
1447         while (gtt_size < size)
1448                 gtt_size <<= 1;
1449
1450         return gtt_size;
1451 }
1452
1453 /**
1454  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1455  * @obj: object to check
1456  *
1457  * Return the required GTT alignment for an object, taking into account
1458  * potential fence register mapping.
1459  */
1460 static uint32_t
1461 i915_gem_get_gtt_alignment(struct drm_device *dev,
1462                            uint32_t size,
1463                            int tiling_mode)
1464 {
1465         /*
1466          * Minimum alignment is 4k (GTT page size), but might be greater
1467          * if a fence register is needed for the object.
1468          */
1469         if (INTEL_INFO(dev)->gen >= 4 ||
1470             tiling_mode == I915_TILING_NONE)
1471                 return 4096;
1472
1473         /*
1474          * Previous chips need to be aligned to the size of the smallest
1475          * fence register that can contain the object.
1476          */
1477         return i915_gem_get_gtt_size(dev, size, tiling_mode);
1478 }
1479
1480 /**
1481  * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1482  *                                       unfenced object
1483  * @dev: the device
1484  * @size: size of the object
1485  * @tiling_mode: tiling mode of the object
1486  *
1487  * Return the required GTT alignment for an object, only taking into account
1488  * unfenced tiled surface requirements.
1489  */
1490 uint32_t
1491 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1492                                     uint32_t size,
1493                                     int tiling_mode)
1494 {
1495         /*
1496          * Minimum alignment is 4k (GTT page size) for sane hw.
1497          */
1498         if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1499             tiling_mode == I915_TILING_NONE)
1500                 return 4096;
1501
1502         /* Previous hardware however needs to be aligned to a power-of-two
1503          * tile height. The simplest method for determining this is to reuse
1504          * the power-of-tile object size.
1505          */
1506         return i915_gem_get_gtt_size(dev, size, tiling_mode);
1507 }
1508
1509 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1510 {
1511         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1512         int ret;
1513
1514         if (obj->base.map_list.map)
1515                 return 0;
1516
1517         ret = drm_gem_create_mmap_offset(&obj->base);
1518         if (ret != -ENOSPC)
1519                 return ret;
1520
1521         /* Badly fragmented mmap space? The only way we can recover
1522          * space is by destroying unwanted objects. We can't randomly release
1523          * mmap_offsets as userspace expects them to be persistent for the
1524          * lifetime of the objects. The closest we can is to release the
1525          * offsets on purgeable objects by truncating it and marking it purged,
1526          * which prevents userspace from ever using that object again.
1527          */
1528         i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
1529         ret = drm_gem_create_mmap_offset(&obj->base);
1530         if (ret != -ENOSPC)
1531                 return ret;
1532
1533         i915_gem_shrink_all(dev_priv);
1534         return drm_gem_create_mmap_offset(&obj->base);
1535 }
1536
1537 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1538 {
1539         if (!obj->base.map_list.map)
1540                 return;
1541
1542         drm_gem_free_mmap_offset(&obj->base);
1543 }
1544
1545 int
1546 i915_gem_mmap_gtt(struct drm_file *file,
1547                   struct drm_device *dev,
1548                   uint32_t handle,
1549                   uint64_t *offset)
1550 {
1551         struct drm_i915_private *dev_priv = dev->dev_private;
1552         struct drm_i915_gem_object *obj;
1553         int ret;
1554
1555         ret = i915_mutex_lock_interruptible(dev);
1556         if (ret)
1557                 return ret;
1558
1559         obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1560         if (&obj->base == NULL) {
1561                 ret = -ENOENT;
1562                 goto unlock;
1563         }
1564
1565         if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1566                 ret = -E2BIG;
1567                 goto out;
1568         }
1569
1570         if (obj->madv != I915_MADV_WILLNEED) {
1571                 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1572                 ret = -EINVAL;
1573                 goto out;
1574         }
1575
1576         ret = i915_gem_object_create_mmap_offset(obj);
1577         if (ret)
1578                 goto out;
1579
1580         *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1581
1582 out:
1583         drm_gem_object_unreference(&obj->base);
1584 unlock:
1585         mutex_unlock(&dev->struct_mutex);
1586         return ret;
1587 }
1588
1589 /**
1590  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1591  * @dev: DRM device
1592  * @data: GTT mapping ioctl data
1593  * @file: GEM object info
1594  *
1595  * Simply returns the fake offset to userspace so it can mmap it.
1596  * The mmap call will end up in drm_gem_mmap(), which will set things
1597  * up so we can get faults in the handler above.
1598  *
1599  * The fault handler will take care of binding the object into the GTT
1600  * (since it may have been evicted to make room for something), allocating
1601  * a fence register, and mapping the appropriate aperture address into
1602  * userspace.
1603  */
1604 int
1605 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1606                         struct drm_file *file)
1607 {
1608         struct drm_i915_gem_mmap_gtt *args = data;
1609
1610         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1611 }
1612
1613 /* Immediately discard the backing storage */
1614 static void
1615 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1616 {
1617         struct inode *inode;
1618
1619         i915_gem_object_free_mmap_offset(obj);
1620
1621         if (obj->base.filp == NULL)
1622                 return;
1623
1624         /* Our goal here is to return as much of the memory as
1625          * is possible back to the system as we are called from OOM.
1626          * To do this we must instruct the shmfs to drop all of its
1627          * backing pages, *now*.
1628          */
1629         inode = obj->base.filp->f_path.dentry->d_inode;
1630         shmem_truncate_range(inode, 0, (loff_t)-1);
1631
1632         obj->madv = __I915_MADV_PURGED;
1633 }
1634
1635 static inline int
1636 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1637 {
1638         return obj->madv == I915_MADV_DONTNEED;
1639 }
1640
1641 static void
1642 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1643 {
1644         int page_count = obj->base.size / PAGE_SIZE;
1645         struct scatterlist *sg;
1646         int ret, i;
1647
1648         BUG_ON(obj->madv == __I915_MADV_PURGED);
1649
1650         ret = i915_gem_object_set_to_cpu_domain(obj, true);
1651         if (ret) {
1652                 /* In the event of a disaster, abandon all caches and
1653                  * hope for the best.
1654                  */
1655                 WARN_ON(ret != -EIO);
1656                 i915_gem_clflush_object(obj);
1657                 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1658         }
1659
1660         if (i915_gem_object_needs_bit17_swizzle(obj))
1661                 i915_gem_object_save_bit_17_swizzle(obj);
1662
1663         if (obj->madv == I915_MADV_DONTNEED)
1664                 obj->dirty = 0;
1665
1666         for_each_sg(obj->pages->sgl, sg, page_count, i) {
1667                 struct page *page = sg_page(sg);
1668
1669                 if (obj->dirty)
1670                         set_page_dirty(page);
1671
1672                 if (obj->madv == I915_MADV_WILLNEED)
1673                         mark_page_accessed(page);
1674
1675                 page_cache_release(page);
1676         }
1677         obj->dirty = 0;
1678
1679         sg_free_table(obj->pages);
1680         kfree(obj->pages);
1681 }
1682
1683 static int
1684 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1685 {
1686         const struct drm_i915_gem_object_ops *ops = obj->ops;
1687
1688         if (obj->pages == NULL)
1689                 return 0;
1690
1691         BUG_ON(obj->gtt_space);
1692
1693         if (obj->pages_pin_count)
1694                 return -EBUSY;
1695
1696         ops->put_pages(obj);
1697         obj->pages = NULL;
1698
1699         list_del(&obj->gtt_list);
1700         if (i915_gem_object_is_purgeable(obj))
1701                 i915_gem_object_truncate(obj);
1702
1703         return 0;
1704 }
1705
1706 static long
1707 i915_gem_purge(struct drm_i915_private *dev_priv, long target)
1708 {
1709         struct drm_i915_gem_object *obj, *next;
1710         long count = 0;
1711
1712         list_for_each_entry_safe(obj, next,
1713                                  &dev_priv->mm.unbound_list,
1714                                  gtt_list) {
1715                 if (i915_gem_object_is_purgeable(obj) &&
1716                     i915_gem_object_put_pages(obj) == 0) {
1717                         count += obj->base.size >> PAGE_SHIFT;
1718                         if (count >= target)
1719                                 return count;
1720                 }
1721         }
1722
1723         list_for_each_entry_safe(obj, next,
1724                                  &dev_priv->mm.inactive_list,
1725                                  mm_list) {
1726                 if (i915_gem_object_is_purgeable(obj) &&
1727                     i915_gem_object_unbind(obj) == 0 &&
1728                     i915_gem_object_put_pages(obj) == 0) {
1729                         count += obj->base.size >> PAGE_SHIFT;
1730                         if (count >= target)
1731                                 return count;
1732                 }
1733         }
1734
1735         return count;
1736 }
1737
1738 static void
1739 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
1740 {
1741         struct drm_i915_gem_object *obj, *next;
1742
1743         i915_gem_evict_everything(dev_priv->dev);
1744
1745         list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list)
1746                 i915_gem_object_put_pages(obj);
1747 }
1748
1749 static int
1750 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
1751 {
1752         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1753         int page_count, i;
1754         struct address_space *mapping;
1755         struct sg_table *st;
1756         struct scatterlist *sg;
1757         struct page *page;
1758         gfp_t gfp;
1759
1760         /* Assert that the object is not currently in any GPU domain. As it
1761          * wasn't in the GTT, there shouldn't be any way it could have been in
1762          * a GPU cache
1763          */
1764         BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
1765         BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
1766
1767         st = kmalloc(sizeof(*st), GFP_KERNEL);
1768         if (st == NULL)
1769                 return -ENOMEM;
1770
1771         page_count = obj->base.size / PAGE_SIZE;
1772         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
1773                 sg_free_table(st);
1774                 kfree(st);
1775                 return -ENOMEM;
1776         }
1777
1778         /* Get the list of pages out of our struct file.  They'll be pinned
1779          * at this point until we release them.
1780          *
1781          * Fail silently without starting the shrinker
1782          */
1783         mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
1784         gfp = mapping_gfp_mask(mapping);
1785         gfp |= __GFP_NORETRY | __GFP_NOWARN;
1786         gfp &= ~(__GFP_IO | __GFP_WAIT);
1787         for_each_sg(st->sgl, sg, page_count, i) {
1788                 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1789                 if (IS_ERR(page)) {
1790                         i915_gem_purge(dev_priv, page_count);
1791                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1792                 }
1793                 if (IS_ERR(page)) {
1794                         /* We've tried hard to allocate the memory by reaping
1795                          * our own buffer, now let the real VM do its job and
1796                          * go down in flames if truly OOM.
1797                          */
1798                         gfp &= ~(__GFP_NORETRY | __GFP_NOWARN);
1799                         gfp |= __GFP_IO | __GFP_WAIT;
1800
1801                         i915_gem_shrink_all(dev_priv);
1802                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1803                         if (IS_ERR(page))
1804                                 goto err_pages;
1805
1806                         gfp |= __GFP_NORETRY | __GFP_NOWARN;
1807                         gfp &= ~(__GFP_IO | __GFP_WAIT);
1808                 }
1809
1810                 sg_set_page(sg, page, PAGE_SIZE, 0);
1811         }
1812
1813         obj->pages = st;
1814
1815         if (i915_gem_object_needs_bit17_swizzle(obj))
1816                 i915_gem_object_do_bit_17_swizzle(obj);
1817
1818         return 0;
1819
1820 err_pages:
1821         for_each_sg(st->sgl, sg, i, page_count)
1822                 page_cache_release(sg_page(sg));
1823         sg_free_table(st);
1824         kfree(st);
1825         return PTR_ERR(page);
1826 }
1827
1828 /* Ensure that the associated pages are gathered from the backing storage
1829  * and pinned into our object. i915_gem_object_get_pages() may be called
1830  * multiple times before they are released by a single call to
1831  * i915_gem_object_put_pages() - once the pages are no longer referenced
1832  * either as a result of memory pressure (reaping pages under the shrinker)
1833  * or as the object is itself released.
1834  */
1835 int
1836 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
1837 {
1838         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1839         const struct drm_i915_gem_object_ops *ops = obj->ops;
1840         int ret;
1841
1842         if (obj->pages)
1843                 return 0;
1844
1845         BUG_ON(obj->pages_pin_count);
1846
1847         ret = ops->get_pages(obj);
1848         if (ret)
1849                 return ret;
1850
1851         list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
1852         return 0;
1853 }
1854
1855 void
1856 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1857                                struct intel_ring_buffer *ring)
1858 {
1859         struct drm_device *dev = obj->base.dev;
1860         struct drm_i915_private *dev_priv = dev->dev_private;
1861         u32 seqno = intel_ring_get_seqno(ring);
1862
1863         BUG_ON(ring == NULL);
1864         obj->ring = ring;
1865
1866         /* Add a reference if we're newly entering the active list. */
1867         if (!obj->active) {
1868                 drm_gem_object_reference(&obj->base);
1869                 obj->active = 1;
1870         }
1871
1872         /* Move from whatever list we were on to the tail of execution. */
1873         list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1874         list_move_tail(&obj->ring_list, &ring->active_list);
1875
1876         obj->last_read_seqno = seqno;
1877
1878         if (obj->fenced_gpu_access) {
1879                 obj->last_fenced_seqno = seqno;
1880
1881                 /* Bump MRU to take account of the delayed flush */
1882                 if (obj->fence_reg != I915_FENCE_REG_NONE) {
1883                         struct drm_i915_fence_reg *reg;
1884
1885                         reg = &dev_priv->fence_regs[obj->fence_reg];
1886                         list_move_tail(&reg->lru_list,
1887                                        &dev_priv->mm.fence_list);
1888                 }
1889         }
1890 }
1891
1892 static void
1893 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1894 {
1895         struct drm_device *dev = obj->base.dev;
1896         struct drm_i915_private *dev_priv = dev->dev_private;
1897
1898         BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
1899         BUG_ON(!obj->active);
1900
1901         if (obj->pin_count) /* are we a framebuffer? */
1902                 intel_mark_fb_idle(obj);
1903
1904         list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1905
1906         list_del_init(&obj->ring_list);
1907         obj->ring = NULL;
1908
1909         obj->last_read_seqno = 0;
1910         obj->last_write_seqno = 0;
1911         obj->base.write_domain = 0;
1912
1913         obj->last_fenced_seqno = 0;
1914         obj->fenced_gpu_access = false;
1915
1916         obj->active = 0;
1917         drm_gem_object_unreference(&obj->base);
1918
1919         WARN_ON(i915_verify_lists(dev));
1920 }
1921
1922 static int
1923 i915_gem_handle_seqno_wrap(struct drm_device *dev)
1924 {
1925         struct drm_i915_private *dev_priv = dev->dev_private;
1926         struct intel_ring_buffer *ring;
1927         int ret, i, j;
1928
1929         /* The hardware uses various monotonic 32-bit counters, if we
1930          * detect that they will wraparound we need to idle the GPU
1931          * and reset those counters.
1932          */
1933         ret = 0;
1934         for_each_ring(ring, dev_priv, i) {
1935                 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1936                         ret |= ring->sync_seqno[j] != 0;
1937         }
1938         if (ret == 0)
1939                 return ret;
1940
1941         ret = i915_gpu_idle(dev);
1942         if (ret)
1943                 return ret;
1944
1945         i915_gem_retire_requests(dev);
1946         for_each_ring(ring, dev_priv, i) {
1947                 ret = intel_ring_handle_seqno_wrap(ring);
1948                 if (ret)
1949                         return ret;
1950
1951                 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
1952                         ring->sync_seqno[j] = 0;
1953         }
1954
1955         return 0;
1956 }
1957
1958 int
1959 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
1960 {
1961         struct drm_i915_private *dev_priv = dev->dev_private;
1962
1963         /* reserve 0 for non-seqno */
1964         if (dev_priv->next_seqno == 0) {
1965                 int ret = i915_gem_handle_seqno_wrap(dev);
1966                 if (ret)
1967                         return ret;
1968
1969                 dev_priv->next_seqno = 1;
1970         }
1971
1972         *seqno = dev_priv->next_seqno++;
1973         return 0;
1974 }
1975
1976 int
1977 i915_add_request(struct intel_ring_buffer *ring,
1978                  struct drm_file *file,
1979                  u32 *out_seqno)
1980 {
1981         drm_i915_private_t *dev_priv = ring->dev->dev_private;
1982         struct drm_i915_gem_request *request;
1983         u32 request_ring_position;
1984         int was_empty;
1985         int ret;
1986
1987         /*
1988          * Emit any outstanding flushes - execbuf can fail to emit the flush
1989          * after having emitted the batchbuffer command. Hence we need to fix
1990          * things up similar to emitting the lazy request. The difference here
1991          * is that the flush _must_ happen before the next request, no matter
1992          * what.
1993          */
1994         ret = intel_ring_flush_all_caches(ring);
1995         if (ret)
1996                 return ret;
1997
1998         request = kmalloc(sizeof(*request), GFP_KERNEL);
1999         if (request == NULL)
2000                 return -ENOMEM;
2001
2002
2003         /* Record the position of the start of the request so that
2004          * should we detect the updated seqno part-way through the
2005          * GPU processing the request, we never over-estimate the
2006          * position of the head.
2007          */
2008         request_ring_position = intel_ring_get_tail(ring);
2009
2010         ret = ring->add_request(ring);
2011         if (ret) {
2012                 kfree(request);
2013                 return ret;
2014         }
2015
2016         request->seqno = intel_ring_get_seqno(ring);
2017         request->ring = ring;
2018         request->tail = request_ring_position;
2019         request->emitted_jiffies = jiffies;
2020         was_empty = list_empty(&ring->request_list);
2021         list_add_tail(&request->list, &ring->request_list);
2022         request->file_priv = NULL;
2023
2024         if (file) {
2025                 struct drm_i915_file_private *file_priv = file->driver_priv;
2026
2027                 spin_lock(&file_priv->mm.lock);
2028                 request->file_priv = file_priv;
2029                 list_add_tail(&request->client_list,
2030                               &file_priv->mm.request_list);
2031                 spin_unlock(&file_priv->mm.lock);
2032         }
2033
2034         trace_i915_gem_request_add(ring, request->seqno);
2035         ring->outstanding_lazy_request = 0;
2036
2037         if (!dev_priv->mm.suspended) {
2038                 if (i915_enable_hangcheck) {
2039                         mod_timer(&dev_priv->hangcheck_timer,
2040                                   round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
2041                 }
2042                 if (was_empty) {
2043                         queue_delayed_work(dev_priv->wq,
2044                                            &dev_priv->mm.retire_work,
2045                                            round_jiffies_up_relative(HZ));
2046                         intel_mark_busy(dev_priv->dev);
2047                 }
2048         }
2049
2050         if (out_seqno)
2051                 *out_seqno = request->seqno;
2052         return 0;
2053 }
2054
2055 static inline void
2056 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
2057 {
2058         struct drm_i915_file_private *file_priv = request->file_priv;
2059
2060         if (!file_priv)
2061                 return;
2062
2063         spin_lock(&file_priv->mm.lock);
2064         if (request->file_priv) {
2065                 list_del(&request->client_list);
2066                 request->file_priv = NULL;
2067         }
2068         spin_unlock(&file_priv->mm.lock);
2069 }
2070
2071 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
2072                                       struct intel_ring_buffer *ring)
2073 {
2074         while (!list_empty(&ring->request_list)) {
2075                 struct drm_i915_gem_request *request;
2076
2077                 request = list_first_entry(&ring->request_list,
2078                                            struct drm_i915_gem_request,
2079                                            list);
2080
2081                 list_del(&request->list);
2082                 i915_gem_request_remove_from_client(request);
2083                 kfree(request);
2084         }
2085
2086         while (!list_empty(&ring->active_list)) {
2087                 struct drm_i915_gem_object *obj;
2088
2089                 obj = list_first_entry(&ring->active_list,
2090                                        struct drm_i915_gem_object,
2091                                        ring_list);
2092
2093                 i915_gem_object_move_to_inactive(obj);
2094         }
2095 }
2096
2097 static void i915_gem_reset_fences(struct drm_device *dev)
2098 {
2099         struct drm_i915_private *dev_priv = dev->dev_private;
2100         int i;
2101
2102         for (i = 0; i < dev_priv->num_fence_regs; i++) {
2103                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2104
2105                 i915_gem_write_fence(dev, i, NULL);
2106
2107                 if (reg->obj)
2108                         i915_gem_object_fence_lost(reg->obj);
2109
2110                 reg->pin_count = 0;
2111                 reg->obj = NULL;
2112                 INIT_LIST_HEAD(&reg->lru_list);
2113         }
2114
2115         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
2116 }
2117
2118 void i915_gem_reset(struct drm_device *dev)
2119 {
2120         struct drm_i915_private *dev_priv = dev->dev_private;
2121         struct drm_i915_gem_object *obj;
2122         struct intel_ring_buffer *ring;
2123         int i;
2124
2125         for_each_ring(ring, dev_priv, i)
2126                 i915_gem_reset_ring_lists(dev_priv, ring);
2127
2128         /* Move everything out of the GPU domains to ensure we do any
2129          * necessary invalidation upon reuse.
2130          */
2131         list_for_each_entry(obj,
2132                             &dev_priv->mm.inactive_list,
2133                             mm_list)
2134         {
2135                 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
2136         }
2137
2138         /* The fence registers are invalidated so clear them out */
2139         i915_gem_reset_fences(dev);
2140 }
2141
2142 /**
2143  * This function clears the request list as sequence numbers are passed.
2144  */
2145 void
2146 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
2147 {
2148         uint32_t seqno;
2149
2150         if (list_empty(&ring->request_list))
2151                 return;
2152
2153         WARN_ON(i915_verify_lists(ring->dev));
2154
2155         seqno = ring->get_seqno(ring, true);
2156
2157         while (!list_empty(&ring->request_list)) {
2158                 struct drm_i915_gem_request *request;
2159
2160                 request = list_first_entry(&ring->request_list,
2161                                            struct drm_i915_gem_request,
2162                                            list);
2163
2164                 if (!i915_seqno_passed(seqno, request->seqno))
2165                         break;
2166
2167                 trace_i915_gem_request_retire(ring, request->seqno);
2168                 /* We know the GPU must have read the request to have
2169                  * sent us the seqno + interrupt, so use the position
2170                  * of tail of the request to update the last known position
2171                  * of the GPU head.
2172                  */
2173                 ring->last_retired_head = request->tail;
2174
2175                 list_del(&request->list);
2176                 i915_gem_request_remove_from_client(request);
2177                 kfree(request);
2178         }
2179
2180         /* Move any buffers on the active list that are no longer referenced
2181          * by the ringbuffer to the flushing/inactive lists as appropriate.
2182          */
2183         while (!list_empty(&ring->active_list)) {
2184                 struct drm_i915_gem_object *obj;
2185
2186                 obj = list_first_entry(&ring->active_list,
2187                                       struct drm_i915_gem_object,
2188                                       ring_list);
2189
2190                 if (!i915_seqno_passed(seqno, obj->last_read_seqno))
2191                         break;
2192
2193                 i915_gem_object_move_to_inactive(obj);
2194         }
2195
2196         if (unlikely(ring->trace_irq_seqno &&
2197                      i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
2198                 ring->irq_put(ring);
2199                 ring->trace_irq_seqno = 0;
2200         }
2201
2202         WARN_ON(i915_verify_lists(ring->dev));
2203 }
2204
2205 void
2206 i915_gem_retire_requests(struct drm_device *dev)
2207 {
2208         drm_i915_private_t *dev_priv = dev->dev_private;
2209         struct intel_ring_buffer *ring;
2210         int i;
2211
2212         for_each_ring(ring, dev_priv, i)
2213                 i915_gem_retire_requests_ring(ring);
2214 }
2215
2216 static void
2217 i915_gem_retire_work_handler(struct work_struct *work)
2218 {
2219         drm_i915_private_t *dev_priv;
2220         struct drm_device *dev;
2221         struct intel_ring_buffer *ring;
2222         bool idle;
2223         int i;
2224
2225         dev_priv = container_of(work, drm_i915_private_t,
2226                                 mm.retire_work.work);
2227         dev = dev_priv->dev;
2228
2229         /* Come back later if the device is busy... */
2230         if (!mutex_trylock(&dev->struct_mutex)) {
2231                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2232                                    round_jiffies_up_relative(HZ));
2233                 return;
2234         }
2235
2236         i915_gem_retire_requests(dev);
2237
2238         /* Send a periodic flush down the ring so we don't hold onto GEM
2239          * objects indefinitely.
2240          */
2241         idle = true;
2242         for_each_ring(ring, dev_priv, i) {
2243                 if (ring->gpu_caches_dirty)
2244                         i915_add_request(ring, NULL, NULL);
2245
2246                 idle &= list_empty(&ring->request_list);
2247         }
2248
2249         if (!dev_priv->mm.suspended && !idle)
2250                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2251                                    round_jiffies_up_relative(HZ));
2252         if (idle)
2253                 intel_mark_idle(dev);
2254
2255         mutex_unlock(&dev->struct_mutex);
2256 }
2257
2258 /**
2259  * Ensures that an object will eventually get non-busy by flushing any required
2260  * write domains, emitting any outstanding lazy request and retiring and
2261  * completed requests.
2262  */
2263 static int
2264 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2265 {
2266         int ret;
2267
2268         if (obj->active) {
2269                 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
2270                 if (ret)
2271                         return ret;
2272
2273                 i915_gem_retire_requests_ring(obj->ring);
2274         }
2275
2276         return 0;
2277 }
2278
2279 /**
2280  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2281  * @DRM_IOCTL_ARGS: standard ioctl arguments
2282  *
2283  * Returns 0 if successful, else an error is returned with the remaining time in
2284  * the timeout parameter.
2285  *  -ETIME: object is still busy after timeout
2286  *  -ERESTARTSYS: signal interrupted the wait
2287  *  -ENONENT: object doesn't exist
2288  * Also possible, but rare:
2289  *  -EAGAIN: GPU wedged
2290  *  -ENOMEM: damn
2291  *  -ENODEV: Internal IRQ fail
2292  *  -E?: The add request failed
2293  *
2294  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2295  * non-zero timeout parameter the wait ioctl will wait for the given number of
2296  * nanoseconds on an object becoming unbusy. Since the wait itself does so
2297  * without holding struct_mutex the object may become re-busied before this
2298  * function completes. A similar but shorter * race condition exists in the busy
2299  * ioctl
2300  */
2301 int
2302 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2303 {
2304         struct drm_i915_gem_wait *args = data;
2305         struct drm_i915_gem_object *obj;
2306         struct intel_ring_buffer *ring = NULL;
2307         struct timespec timeout_stack, *timeout = NULL;
2308         u32 seqno = 0;
2309         int ret = 0;
2310
2311         if (args->timeout_ns >= 0) {
2312                 timeout_stack = ns_to_timespec(args->timeout_ns);
2313                 timeout = &timeout_stack;
2314         }
2315
2316         ret = i915_mutex_lock_interruptible(dev);
2317         if (ret)
2318                 return ret;
2319
2320         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2321         if (&obj->base == NULL) {
2322                 mutex_unlock(&dev->struct_mutex);
2323                 return -ENOENT;
2324         }
2325
2326         /* Need to make sure the object gets inactive eventually. */
2327         ret = i915_gem_object_flush_active(obj);
2328         if (ret)
2329                 goto out;
2330
2331         if (obj->active) {
2332                 seqno = obj->last_read_seqno;
2333                 ring = obj->ring;
2334         }
2335
2336         if (seqno == 0)
2337                  goto out;
2338
2339         /* Do this after OLR check to make sure we make forward progress polling
2340          * on this IOCTL with a 0 timeout (like busy ioctl)
2341          */
2342         if (!args->timeout_ns) {
2343                 ret = -ETIME;
2344                 goto out;
2345         }
2346
2347         drm_gem_object_unreference(&obj->base);
2348         mutex_unlock(&dev->struct_mutex);
2349
2350         ret = __wait_seqno(ring, seqno, true, timeout);
2351         if (timeout) {
2352                 WARN_ON(!timespec_valid(timeout));
2353                 args->timeout_ns = timespec_to_ns(timeout);
2354         }
2355         return ret;
2356
2357 out:
2358         drm_gem_object_unreference(&obj->base);
2359         mutex_unlock(&dev->struct_mutex);
2360         return ret;
2361 }
2362
2363 /**
2364  * i915_gem_object_sync - sync an object to a ring.
2365  *
2366  * @obj: object which may be in use on another ring.
2367  * @to: ring we wish to use the object on. May be NULL.
2368  *
2369  * This code is meant to abstract object synchronization with the GPU.
2370  * Calling with NULL implies synchronizing the object with the CPU
2371  * rather than a particular GPU ring.
2372  *
2373  * Returns 0 if successful, else propagates up the lower layer error.
2374  */
2375 int
2376 i915_gem_object_sync(struct drm_i915_gem_object *obj,
2377                      struct intel_ring_buffer *to)
2378 {
2379         struct intel_ring_buffer *from = obj->ring;
2380         u32 seqno;
2381         int ret, idx;
2382
2383         if (from == NULL || to == from)
2384                 return 0;
2385
2386         if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
2387                 return i915_gem_object_wait_rendering(obj, false);
2388
2389         idx = intel_ring_sync_index(from, to);
2390
2391         seqno = obj->last_read_seqno;
2392         if (seqno <= from->sync_seqno[idx])
2393                 return 0;
2394
2395         ret = i915_gem_check_olr(obj->ring, seqno);
2396         if (ret)
2397                 return ret;
2398
2399         ret = to->sync_to(to, from, seqno);
2400         if (!ret)
2401                 /* We use last_read_seqno because sync_to()
2402                  * might have just caused seqno wrap under
2403                  * the radar.
2404                  */
2405                 from->sync_seqno[idx] = obj->last_read_seqno;
2406
2407         return ret;
2408 }
2409
2410 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2411 {
2412         u32 old_write_domain, old_read_domains;
2413
2414         /* Act a barrier for all accesses through the GTT */
2415         mb();
2416
2417         /* Force a pagefault for domain tracking on next user access */
2418         i915_gem_release_mmap(obj);
2419
2420         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2421                 return;
2422
2423         old_read_domains = obj->base.read_domains;
2424         old_write_domain = obj->base.write_domain;
2425
2426         obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2427         obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2428
2429         trace_i915_gem_object_change_domain(obj,
2430                                             old_read_domains,
2431                                             old_write_domain);
2432 }
2433
2434 /**
2435  * Unbinds an object from the GTT aperture.
2436  */
2437 int
2438 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2439 {
2440         drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2441         int ret = 0;
2442
2443         if (obj->gtt_space == NULL)
2444                 return 0;
2445
2446         if (obj->pin_count)
2447                 return -EBUSY;
2448
2449         BUG_ON(obj->pages == NULL);
2450
2451         ret = i915_gem_object_finish_gpu(obj);
2452         if (ret)
2453                 return ret;
2454         /* Continue on if we fail due to EIO, the GPU is hung so we
2455          * should be safe and we need to cleanup or else we might
2456          * cause memory corruption through use-after-free.
2457          */
2458
2459         i915_gem_object_finish_gtt(obj);
2460
2461         /* release the fence reg _after_ flushing */
2462         ret = i915_gem_object_put_fence(obj);
2463         if (ret)
2464                 return ret;
2465
2466         trace_i915_gem_object_unbind(obj);
2467
2468         if (obj->has_global_gtt_mapping)
2469                 i915_gem_gtt_unbind_object(obj);
2470         if (obj->has_aliasing_ppgtt_mapping) {
2471                 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2472                 obj->has_aliasing_ppgtt_mapping = 0;
2473         }
2474         i915_gem_gtt_finish_object(obj);
2475
2476         list_del(&obj->mm_list);
2477         list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
2478         /* Avoid an unnecessary call to unbind on rebind. */
2479         obj->map_and_fenceable = true;
2480
2481         drm_mm_put_block(obj->gtt_space);
2482         obj->gtt_space = NULL;
2483         obj->gtt_offset = 0;
2484
2485         return 0;
2486 }
2487
2488 int i915_gpu_idle(struct drm_device *dev)
2489 {
2490         drm_i915_private_t *dev_priv = dev->dev_private;
2491         struct intel_ring_buffer *ring;
2492         int ret, i;
2493
2494         /* Flush everything onto the inactive list. */
2495         for_each_ring(ring, dev_priv, i) {
2496                 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
2497                 if (ret)
2498                         return ret;
2499
2500                 ret = intel_ring_idle(ring);
2501                 if (ret)
2502                         return ret;
2503         }
2504
2505         return 0;
2506 }
2507
2508 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
2509                                         struct drm_i915_gem_object *obj)
2510 {
2511         drm_i915_private_t *dev_priv = dev->dev_private;
2512         uint64_t val;
2513
2514         if (obj) {
2515                 u32 size = obj->gtt_space->size;
2516
2517                 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2518                                  0xfffff000) << 32;
2519                 val |= obj->gtt_offset & 0xfffff000;
2520                 val |= (uint64_t)((obj->stride / 128) - 1) <<
2521                         SANDYBRIDGE_FENCE_PITCH_SHIFT;
2522
2523                 if (obj->tiling_mode == I915_TILING_Y)
2524                         val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2525                 val |= I965_FENCE_REG_VALID;
2526         } else
2527                 val = 0;
2528
2529         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
2530         POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
2531 }
2532
2533 static void i965_write_fence_reg(struct drm_device *dev, int reg,
2534                                  struct drm_i915_gem_object *obj)
2535 {
2536         drm_i915_private_t *dev_priv = dev->dev_private;
2537         uint64_t val;
2538
2539         if (obj) {
2540                 u32 size = obj->gtt_space->size;
2541
2542                 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2543                                  0xfffff000) << 32;
2544                 val |= obj->gtt_offset & 0xfffff000;
2545                 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2546                 if (obj->tiling_mode == I915_TILING_Y)
2547                         val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2548                 val |= I965_FENCE_REG_VALID;
2549         } else
2550                 val = 0;
2551
2552         I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
2553         POSTING_READ(FENCE_REG_965_0 + reg * 8);
2554 }
2555
2556 static void i915_write_fence_reg(struct drm_device *dev, int reg,
2557                                  struct drm_i915_gem_object *obj)
2558 {
2559         drm_i915_private_t *dev_priv = dev->dev_private;
2560         u32 val;
2561
2562         if (obj) {
2563                 u32 size = obj->gtt_space->size;
2564                 int pitch_val;
2565                 int tile_width;
2566
2567                 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2568                      (size & -size) != size ||
2569                      (obj->gtt_offset & (size - 1)),
2570                      "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2571                      obj->gtt_offset, obj->map_and_fenceable, size);
2572
2573                 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2574                         tile_width = 128;
2575                 else
2576                         tile_width = 512;
2577
2578                 /* Note: pitch better be a power of two tile widths */
2579                 pitch_val = obj->stride / tile_width;
2580                 pitch_val = ffs(pitch_val) - 1;
2581
2582                 val = obj->gtt_offset;
2583                 if (obj->tiling_mode == I915_TILING_Y)
2584                         val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2585                 val |= I915_FENCE_SIZE_BITS(size);
2586                 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2587                 val |= I830_FENCE_REG_VALID;
2588         } else
2589                 val = 0;
2590
2591         if (reg < 8)
2592                 reg = FENCE_REG_830_0 + reg * 4;
2593         else
2594                 reg = FENCE_REG_945_8 + (reg - 8) * 4;
2595
2596         I915_WRITE(reg, val);
2597         POSTING_READ(reg);
2598 }
2599
2600 static void i830_write_fence_reg(struct drm_device *dev, int reg,
2601                                 struct drm_i915_gem_object *obj)
2602 {
2603         drm_i915_private_t *dev_priv = dev->dev_private;
2604         uint32_t val;
2605
2606         if (obj) {
2607                 u32 size = obj->gtt_space->size;
2608                 uint32_t pitch_val;
2609
2610                 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2611                      (size & -size) != size ||
2612                      (obj->gtt_offset & (size - 1)),
2613                      "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2614                      obj->gtt_offset, size);
2615
2616                 pitch_val = obj->stride / 128;
2617                 pitch_val = ffs(pitch_val) - 1;
2618
2619                 val = obj->gtt_offset;
2620                 if (obj->tiling_mode == I915_TILING_Y)
2621                         val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2622                 val |= I830_FENCE_SIZE_BITS(size);
2623                 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2624                 val |= I830_FENCE_REG_VALID;
2625         } else
2626                 val = 0;
2627
2628         I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2629         POSTING_READ(FENCE_REG_830_0 + reg * 4);
2630 }
2631
2632 static void i915_gem_write_fence(struct drm_device *dev, int reg,
2633                                  struct drm_i915_gem_object *obj)
2634 {
2635         switch (INTEL_INFO(dev)->gen) {
2636         case 7:
2637         case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
2638         case 5:
2639         case 4: i965_write_fence_reg(dev, reg, obj); break;
2640         case 3: i915_write_fence_reg(dev, reg, obj); break;
2641         case 2: i830_write_fence_reg(dev, reg, obj); break;
2642         default: break;
2643         }
2644 }
2645
2646 static inline int fence_number(struct drm_i915_private *dev_priv,
2647                                struct drm_i915_fence_reg *fence)
2648 {
2649         return fence - dev_priv->fence_regs;
2650 }
2651
2652 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2653                                          struct drm_i915_fence_reg *fence,
2654                                          bool enable)
2655 {
2656         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2657         int reg = fence_number(dev_priv, fence);
2658
2659         i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
2660
2661         if (enable) {
2662                 obj->fence_reg = reg;
2663                 fence->obj = obj;
2664                 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2665         } else {
2666                 obj->fence_reg = I915_FENCE_REG_NONE;
2667                 fence->obj = NULL;
2668                 list_del_init(&fence->lru_list);
2669         }
2670 }
2671
2672 static int
2673 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
2674 {
2675         if (obj->last_fenced_seqno) {
2676                 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
2677                 if (ret)
2678                         return ret;
2679
2680                 obj->last_fenced_seqno = 0;
2681         }
2682
2683         /* Ensure that all CPU reads are completed before installing a fence
2684          * and all writes before removing the fence.
2685          */
2686         if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2687                 mb();
2688
2689         obj->fenced_gpu_access = false;
2690         return 0;
2691 }
2692
2693 int
2694 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2695 {
2696         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2697         int ret;
2698
2699         ret = i915_gem_object_flush_fence(obj);
2700         if (ret)
2701                 return ret;
2702
2703         if (obj->fence_reg == I915_FENCE_REG_NONE)
2704                 return 0;
2705
2706         i915_gem_object_update_fence(obj,
2707                                      &dev_priv->fence_regs[obj->fence_reg],
2708                                      false);
2709         i915_gem_object_fence_lost(obj);
2710
2711         return 0;
2712 }
2713
2714 static struct drm_i915_fence_reg *
2715 i915_find_fence_reg(struct drm_device *dev)
2716 {
2717         struct drm_i915_private *dev_priv = dev->dev_private;
2718         struct drm_i915_fence_reg *reg, *avail;
2719         int i;
2720
2721         /* First try to find a free reg */
2722         avail = NULL;
2723         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2724                 reg = &dev_priv->fence_regs[i];
2725                 if (!reg->obj)
2726                         return reg;
2727
2728                 if (!reg->pin_count)
2729                         avail = reg;
2730         }
2731
2732         if (avail == NULL)
2733                 return NULL;
2734
2735         /* None available, try to steal one or wait for a user to finish */
2736         list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2737                 if (reg->pin_count)
2738                         continue;
2739
2740                 return reg;
2741         }
2742
2743         return NULL;
2744 }
2745
2746 /**
2747  * i915_gem_object_get_fence - set up fencing for an object
2748  * @obj: object to map through a fence reg
2749  *
2750  * When mapping objects through the GTT, userspace wants to be able to write
2751  * to them without having to worry about swizzling if the object is tiled.
2752  * This function walks the fence regs looking for a free one for @obj,
2753  * stealing one if it can't find any.
2754  *
2755  * It then sets up the reg based on the object's properties: address, pitch
2756  * and tiling format.
2757  *
2758  * For an untiled surface, this removes any existing fence.
2759  */
2760 int
2761 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2762 {
2763         struct drm_device *dev = obj->base.dev;
2764         struct drm_i915_private *dev_priv = dev->dev_private;
2765         bool enable = obj->tiling_mode != I915_TILING_NONE;
2766         struct drm_i915_fence_reg *reg;
2767         int ret;
2768
2769         /* Have we updated the tiling parameters upon the object and so
2770          * will need to serialise the write to the associated fence register?
2771          */
2772         if (obj->fence_dirty) {
2773                 ret = i915_gem_object_flush_fence(obj);
2774                 if (ret)
2775                         return ret;
2776         }
2777
2778         /* Just update our place in the LRU if our fence is getting reused. */
2779         if (obj->fence_reg != I915_FENCE_REG_NONE) {
2780                 reg = &dev_priv->fence_regs[obj->fence_reg];
2781                 if (!obj->fence_dirty) {
2782                         list_move_tail(&reg->lru_list,
2783                                        &dev_priv->mm.fence_list);
2784                         return 0;
2785                 }
2786         } else if (enable) {
2787                 reg = i915_find_fence_reg(dev);
2788                 if (reg == NULL)
2789                         return -EDEADLK;
2790
2791                 if (reg->obj) {
2792                         struct drm_i915_gem_object *old = reg->obj;
2793
2794                         ret = i915_gem_object_flush_fence(old);
2795                         if (ret)
2796                                 return ret;
2797
2798                         i915_gem_object_fence_lost(old);
2799                 }
2800         } else
2801                 return 0;
2802
2803         i915_gem_object_update_fence(obj, reg, enable);
2804         obj->fence_dirty = false;
2805
2806         return 0;
2807 }
2808
2809 static bool i915_gem_valid_gtt_space(struct drm_device *dev,
2810                                      struct drm_mm_node *gtt_space,
2811                                      unsigned long cache_level)
2812 {
2813         struct drm_mm_node *other;
2814
2815         /* On non-LLC machines we have to be careful when putting differing
2816          * types of snoopable memory together to avoid the prefetcher
2817          * crossing memory domains and dying.
2818          */
2819         if (HAS_LLC(dev))
2820                 return true;
2821
2822         if (gtt_space == NULL)
2823                 return true;
2824
2825         if (list_empty(&gtt_space->node_list))
2826                 return true;
2827
2828         other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2829         if (other->allocated && !other->hole_follows && other->color != cache_level)
2830                 return false;
2831
2832         other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2833         if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2834                 return false;
2835
2836         return true;
2837 }
2838
2839 static void i915_gem_verify_gtt(struct drm_device *dev)
2840 {
2841 #if WATCH_GTT
2842         struct drm_i915_private *dev_priv = dev->dev_private;
2843         struct drm_i915_gem_object *obj;
2844         int err = 0;
2845
2846         list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
2847                 if (obj->gtt_space == NULL) {
2848                         printk(KERN_ERR "object found on GTT list with no space reserved\n");
2849                         err++;
2850                         continue;
2851                 }
2852
2853                 if (obj->cache_level != obj->gtt_space->color) {
2854                         printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
2855                                obj->gtt_space->start,
2856                                obj->gtt_space->start + obj->gtt_space->size,
2857                                obj->cache_level,
2858                                obj->gtt_space->color);
2859                         err++;
2860                         continue;
2861                 }
2862
2863                 if (!i915_gem_valid_gtt_space(dev,
2864                                               obj->gtt_space,
2865                                               obj->cache_level)) {
2866                         printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
2867                                obj->gtt_space->start,
2868                                obj->gtt_space->start + obj->gtt_space->size,
2869                                obj->cache_level);
2870                         err++;
2871                         continue;
2872                 }
2873         }
2874
2875         WARN_ON(err);
2876 #endif
2877 }
2878
2879 /**
2880  * Finds free space in the GTT aperture and binds the object there.
2881  */
2882 static int
2883 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2884                             unsigned alignment,
2885                             bool map_and_fenceable,
2886                             bool nonblocking)
2887 {
2888         struct drm_device *dev = obj->base.dev;
2889         drm_i915_private_t *dev_priv = dev->dev_private;
2890         struct drm_mm_node *free_space;
2891         u32 size, fence_size, fence_alignment, unfenced_alignment;
2892         bool mappable, fenceable;
2893         int ret;
2894
2895         if (obj->madv != I915_MADV_WILLNEED) {
2896                 DRM_ERROR("Attempting to bind a purgeable object\n");
2897                 return -EINVAL;
2898         }
2899
2900         fence_size = i915_gem_get_gtt_size(dev,
2901                                            obj->base.size,
2902                                            obj->tiling_mode);
2903         fence_alignment = i915_gem_get_gtt_alignment(dev,
2904                                                      obj->base.size,
2905                                                      obj->tiling_mode);
2906         unfenced_alignment =
2907                 i915_gem_get_unfenced_gtt_alignment(dev,
2908                                                     obj->base.size,
2909                                                     obj->tiling_mode);
2910
2911         if (alignment == 0)
2912                 alignment = map_and_fenceable ? fence_alignment :
2913                                                 unfenced_alignment;
2914         if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2915                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2916                 return -EINVAL;
2917         }
2918
2919         size = map_and_fenceable ? fence_size : obj->base.size;
2920
2921         /* If the object is bigger than the entire aperture, reject it early
2922          * before evicting everything in a vain attempt to find space.
2923          */
2924         if (obj->base.size >
2925             (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2926                 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2927                 return -E2BIG;
2928         }
2929
2930         ret = i915_gem_object_get_pages(obj);
2931         if (ret)
2932                 return ret;
2933
2934         i915_gem_object_pin_pages(obj);
2935
2936  search_free:
2937         if (map_and_fenceable)
2938                 free_space = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
2939                                                                size, alignment, obj->cache_level,
2940                                                                0, dev_priv->mm.gtt_mappable_end,
2941                                                                false);
2942         else
2943                 free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
2944                                                       size, alignment, obj->cache_level,
2945                                                       false);
2946
2947         if (free_space != NULL) {
2948                 if (map_and_fenceable)
2949                         free_space =
2950                                 drm_mm_get_block_range_generic(free_space,
2951                                                                size, alignment, obj->cache_level,
2952                                                                0, dev_priv->mm.gtt_mappable_end,
2953                                                                false);
2954                 else
2955                         free_space =
2956                                 drm_mm_get_block_generic(free_space,
2957                                                          size, alignment, obj->cache_level,
2958                                                          false);
2959         }
2960         if (free_space == NULL) {
2961                 ret = i915_gem_evict_something(dev, size, alignment,
2962                                                obj->cache_level,
2963                                                map_and_fenceable,
2964                                                nonblocking);
2965                 if (ret) {
2966                         i915_gem_object_unpin_pages(obj);
2967                         return ret;
2968                 }
2969
2970                 goto search_free;
2971         }
2972         if (WARN_ON(!i915_gem_valid_gtt_space(dev,
2973                                               free_space,
2974                                               obj->cache_level))) {
2975                 i915_gem_object_unpin_pages(obj);
2976                 drm_mm_put_block(free_space);
2977                 return -EINVAL;
2978         }
2979
2980         ret = i915_gem_gtt_prepare_object(obj);
2981         if (ret) {
2982                 i915_gem_object_unpin_pages(obj);
2983                 drm_mm_put_block(free_space);
2984                 return ret;
2985         }
2986
2987         list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
2988         list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2989
2990         obj->gtt_space = free_space;
2991         obj->gtt_offset = free_space->start;
2992
2993         fenceable =
2994                 free_space->size == fence_size &&
2995                 (free_space->start & (fence_alignment - 1)) == 0;
2996
2997         mappable =
2998                 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2999
3000         obj->map_and_fenceable = mappable && fenceable;
3001
3002         i915_gem_object_unpin_pages(obj);
3003         trace_i915_gem_object_bind(obj, map_and_fenceable);
3004         i915_gem_verify_gtt(dev);
3005         return 0;
3006 }
3007
3008 void
3009 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
3010 {
3011         /* If we don't have a page list set up, then we're not pinned
3012          * to GPU, and we can ignore the cache flush because it'll happen
3013          * again at bind time.
3014          */
3015         if (obj->pages == NULL)
3016                 return;
3017
3018         /* If the GPU is snooping the contents of the CPU cache,
3019          * we do not need to manually clear the CPU cache lines.  However,
3020          * the caches are only snooped when the render cache is
3021          * flushed/invalidated.  As we always have to emit invalidations
3022          * and flushes when moving into and out of the RENDER domain, correct
3023          * snooping behaviour occurs naturally as the result of our domain
3024          * tracking.
3025          */
3026         if (obj->cache_level != I915_CACHE_NONE)
3027                 return;
3028
3029         trace_i915_gem_object_clflush(obj);
3030
3031         drm_clflush_sg(obj->pages);
3032 }
3033
3034 /** Flushes the GTT write domain for the object if it's dirty. */
3035 static void
3036 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3037 {
3038         uint32_t old_write_domain;
3039
3040         if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3041                 return;
3042
3043         /* No actual flushing is required for the GTT write domain.  Writes
3044          * to it immediately go to main memory as far as we know, so there's
3045          * no chipset flush.  It also doesn't land in render cache.
3046          *
3047          * However, we do have to enforce the order so that all writes through
3048          * the GTT land before any writes to the device, such as updates to
3049          * the GATT itself.
3050          */
3051         wmb();
3052
3053         old_write_domain = obj->base.write_domain;
3054         obj->base.write_domain = 0;
3055
3056         trace_i915_gem_object_change_domain(obj,
3057                                             obj->base.read_domains,
3058                                             old_write_domain);
3059 }
3060
3061 /** Flushes the CPU write domain for the object if it's dirty. */
3062 static void
3063 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3064 {
3065         uint32_t old_write_domain;
3066
3067         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3068                 return;
3069
3070         i915_gem_clflush_object(obj);
3071         i915_gem_chipset_flush(obj->base.dev);
3072         old_write_domain = obj->base.write_domain;
3073         obj->base.write_domain = 0;
3074
3075         trace_i915_gem_object_change_domain(obj,
3076                                             obj->base.read_domains,
3077                                             old_write_domain);
3078 }
3079
3080 /**
3081  * Moves a single object to the GTT read, and possibly write domain.
3082  *
3083  * This function returns when the move is complete, including waiting on
3084  * flushes to occur.
3085  */
3086 int
3087 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3088 {
3089         drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
3090         uint32_t old_write_domain, old_read_domains;
3091         int ret;
3092
3093         /* Not valid to be called on unbound objects. */
3094         if (obj->gtt_space == NULL)
3095                 return -EINVAL;
3096
3097         if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3098                 return 0;
3099
3100         ret = i915_gem_object_wait_rendering(obj, !write);
3101         if (ret)
3102                 return ret;
3103
3104         i915_gem_object_flush_cpu_write_domain(obj);
3105
3106         old_write_domain = obj->base.write_domain;
3107         old_read_domains = obj->base.read_domains;
3108
3109         /* It should now be out of any other write domains, and we can update
3110          * the domain values for our changes.
3111          */
3112         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3113         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3114         if (write) {
3115                 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3116                 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3117                 obj->dirty = 1;
3118         }
3119
3120         trace_i915_gem_object_change_domain(obj,
3121                                             old_read_domains,
3122                                             old_write_domain);
3123
3124         /* And bump the LRU for this access */
3125         if (i915_gem_object_is_inactive(obj))
3126                 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3127
3128         return 0;
3129 }
3130
3131 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3132                                     enum i915_cache_level cache_level)
3133 {
3134         struct drm_device *dev = obj->base.dev;
3135         drm_i915_private_t *dev_priv = dev->dev_private;
3136         int ret;
3137
3138         if (obj->cache_level == cache_level)
3139                 return 0;
3140
3141         if (obj->pin_count) {
3142                 DRM_DEBUG("can not change the cache level of pinned objects\n");
3143                 return -EBUSY;
3144         }
3145
3146         if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
3147                 ret = i915_gem_object_unbind(obj);
3148                 if (ret)
3149                         return ret;
3150         }
3151
3152         if (obj->gtt_space) {
3153                 ret = i915_gem_object_finish_gpu(obj);
3154                 if (ret)
3155                         return ret;
3156
3157                 i915_gem_object_finish_gtt(obj);
3158
3159                 /* Before SandyBridge, you could not use tiling or fence
3160                  * registers with snooped memory, so relinquish any fences
3161                  * currently pointing to our region in the aperture.
3162                  */
3163                 if (INTEL_INFO(dev)->gen < 6) {
3164                         ret = i915_gem_object_put_fence(obj);
3165                         if (ret)
3166                                 return ret;
3167                 }
3168
3169                 if (obj->has_global_gtt_mapping)
3170                         i915_gem_gtt_bind_object(obj, cache_level);
3171                 if (obj->has_aliasing_ppgtt_mapping)
3172                         i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
3173                                                obj, cache_level);
3174
3175                 obj->gtt_space->color = cache_level;
3176         }
3177
3178         if (cache_level == I915_CACHE_NONE) {
3179                 u32 old_read_domains, old_write_domain;
3180
3181                 /* If we're coming from LLC cached, then we haven't
3182                  * actually been tracking whether the data is in the
3183                  * CPU cache or not, since we only allow one bit set
3184                  * in obj->write_domain and have been skipping the clflushes.
3185                  * Just set it to the CPU cache for now.
3186                  */
3187                 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
3188                 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
3189
3190                 old_read_domains = obj->base.read_domains;
3191                 old_write_domain = obj->base.write_domain;
3192
3193                 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3194                 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3195
3196                 trace_i915_gem_object_change_domain(obj,
3197                                                     old_read_domains,
3198                                                     old_write_domain);
3199         }
3200
3201         obj->cache_level = cache_level;
3202         i915_gem_verify_gtt(dev);
3203         return 0;
3204 }
3205
3206 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3207                                struct drm_file *file)
3208 {
3209         struct drm_i915_gem_caching *args = data;
3210         struct drm_i915_gem_object *obj;
3211         int ret;
3212
3213         ret = i915_mutex_lock_interruptible(dev);
3214         if (ret)
3215                 return ret;
3216
3217         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3218         if (&obj->base == NULL) {
3219                 ret = -ENOENT;
3220                 goto unlock;
3221         }
3222
3223         args->caching = obj->cache_level != I915_CACHE_NONE;
3224
3225         drm_gem_object_unreference(&obj->base);
3226 unlock:
3227         mutex_unlock(&dev->struct_mutex);
3228         return ret;
3229 }
3230
3231 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3232                                struct drm_file *file)
3233 {
3234         struct drm_i915_gem_caching *args = data;
3235         struct drm_i915_gem_object *obj;
3236         enum i915_cache_level level;
3237         int ret;
3238
3239         switch (args->caching) {
3240         case I915_CACHING_NONE:
3241                 level = I915_CACHE_NONE;
3242                 break;
3243         case I915_CACHING_CACHED:
3244                 level = I915_CACHE_LLC;
3245                 break;
3246         default:
3247                 return -EINVAL;
3248         }
3249
3250         ret = i915_mutex_lock_interruptible(dev);
3251         if (ret)
3252                 return ret;
3253
3254         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3255         if (&obj->base == NULL) {
3256                 ret = -ENOENT;
3257                 goto unlock;
3258         }
3259
3260         ret = i915_gem_object_set_cache_level(obj, level);
3261
3262         drm_gem_object_unreference(&obj->base);
3263 unlock:
3264         mutex_unlock(&dev->struct_mutex);
3265         return ret;
3266 }
3267
3268 /*
3269  * Prepare buffer for display plane (scanout, cursors, etc).
3270  * Can be called from an uninterruptible phase (modesetting) and allows
3271  * any flushes to be pipelined (for pageflips).
3272  */
3273 int
3274 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3275                                      u32 alignment,
3276                                      struct intel_ring_buffer *pipelined)
3277 {
3278         u32 old_read_domains, old_write_domain;
3279         int ret;
3280
3281         if (pipelined != obj->ring) {
3282                 ret = i915_gem_object_sync(obj, pipelined);
3283                 if (ret)
3284                         return ret;
3285         }
3286
3287         /* The display engine is not coherent with the LLC cache on gen6.  As
3288          * a result, we make sure that the pinning that is about to occur is
3289          * done with uncached PTEs. This is lowest common denominator for all
3290          * chipsets.
3291          *
3292          * However for gen6+, we could do better by using the GFDT bit instead
3293          * of uncaching, which would allow us to flush all the LLC-cached data
3294          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3295          */
3296         ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
3297         if (ret)
3298                 return ret;
3299
3300         /* As the user may map the buffer once pinned in the display plane
3301          * (e.g. libkms for the bootup splash), we have to ensure that we
3302          * always use map_and_fenceable for all scanout buffers.
3303          */
3304         ret = i915_gem_object_pin(obj, alignment, true, false);
3305         if (ret)
3306                 return ret;
3307
3308         i915_gem_object_flush_cpu_write_domain(obj);
3309
3310         old_write_domain = obj->base.write_domain;
3311         old_read_domains = obj->base.read_domains;
3312
3313         /* It should now be out of any other write domains, and we can update
3314          * the domain values for our changes.
3315          */
3316         obj->base.write_domain = 0;
3317         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3318
3319         trace_i915_gem_object_change_domain(obj,
3320                                             old_read_domains,
3321                                             old_write_domain);
3322
3323         return 0;
3324 }
3325
3326 int
3327 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
3328 {
3329         int ret;
3330
3331         if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
3332                 return 0;
3333
3334         ret = i915_gem_object_wait_rendering(obj, false);
3335         if (ret)
3336                 return ret;
3337
3338         /* Ensure that we invalidate the GPU's caches and TLBs. */
3339         obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
3340         return 0;
3341 }
3342
3343 /**
3344  * Moves a single object to the CPU read, and possibly write domain.
3345  *
3346  * This function returns when the move is complete, including waiting on
3347  * flushes to occur.
3348  */
3349 int
3350 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3351 {
3352         uint32_t old_write_domain, old_read_domains;
3353         int ret;
3354
3355         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3356                 return 0;
3357
3358         ret = i915_gem_object_wait_rendering(obj, !write);
3359         if (ret)
3360                 return ret;
3361
3362         i915_gem_object_flush_gtt_write_domain(obj);
3363
3364         old_write_domain = obj->base.write_domain;
3365         old_read_domains = obj->base.read_domains;
3366
3367         /* Flush the CPU cache if it's still invalid. */
3368         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3369                 i915_gem_clflush_object(obj);
3370
3371                 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3372         }
3373
3374         /* It should now be out of any other write domains, and we can update
3375          * the domain values for our changes.
3376          */
3377         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3378
3379         /* If we're writing through the CPU, then the GPU read domains will
3380          * need to be invalidated at next use.
3381          */
3382         if (write) {
3383                 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3384                 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3385         }
3386
3387         trace_i915_gem_object_change_domain(obj,
3388                                             old_read_domains,
3389                                             old_write_domain);
3390
3391         return 0;
3392 }
3393
3394 /* Throttle our rendering by waiting until the ring has completed our requests
3395  * emitted over 20 msec ago.
3396  *
3397  * Note that if we were to use the current jiffies each time around the loop,
3398  * we wouldn't escape the function with any frames outstanding if the time to
3399  * render a frame was over 20ms.
3400  *
3401  * This should get us reasonable parallelism between CPU and GPU but also
3402  * relatively low latency when blocking on a particular request to finish.
3403  */
3404 static int
3405 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3406 {
3407         struct drm_i915_private *dev_priv = dev->dev_private;
3408         struct drm_i915_file_private *file_priv = file->driver_priv;
3409         unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3410         struct drm_i915_gem_request *request;
3411         struct intel_ring_buffer *ring = NULL;
3412         u32 seqno = 0;
3413         int ret;
3414
3415         if (atomic_read(&dev_priv->mm.wedged))
3416                 return -EIO;
3417
3418         spin_lock(&file_priv->mm.lock);
3419         list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3420                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3421                         break;
3422
3423                 ring = request->ring;
3424                 seqno = request->seqno;
3425         }
3426         spin_unlock(&file_priv->mm.lock);
3427
3428         if (seqno == 0)
3429                 return 0;
3430
3431         ret = __wait_seqno(ring, seqno, true, NULL);
3432         if (ret == 0)
3433                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3434
3435         return ret;
3436 }
3437
3438 int
3439 i915_gem_object_pin(struct drm_i915_gem_object *obj,
3440                     uint32_t alignment,
3441                     bool map_and_fenceable,
3442                     bool nonblocking)
3443 {
3444         int ret;
3445
3446         if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3447                 return -EBUSY;
3448
3449         if (obj->gtt_space != NULL) {
3450                 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3451                     (map_and_fenceable && !obj->map_and_fenceable)) {
3452                         WARN(obj->pin_count,
3453                              "bo is already pinned with incorrect alignment:"
3454                              " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3455                              " obj->map_and_fenceable=%d\n",
3456                              obj->gtt_offset, alignment,
3457                              map_and_fenceable,
3458                              obj->map_and_fenceable);
3459                         ret = i915_gem_object_unbind(obj);
3460                         if (ret)
3461                                 return ret;
3462                 }
3463         }
3464
3465         if (obj->gtt_space == NULL) {
3466                 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3467
3468                 ret = i915_gem_object_bind_to_gtt(obj, alignment,
3469                                                   map_and_fenceable,
3470                                                   nonblocking);
3471                 if (ret)
3472                         return ret;
3473
3474                 if (!dev_priv->mm.aliasing_ppgtt)
3475                         i915_gem_gtt_bind_object(obj, obj->cache_level);
3476         }
3477
3478         if (!obj->has_global_gtt_mapping && map_and_fenceable)
3479                 i915_gem_gtt_bind_object(obj, obj->cache_level);
3480
3481         obj->pin_count++;
3482         obj->pin_mappable |= map_and_fenceable;
3483
3484         return 0;
3485 }
3486
3487 void
3488 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3489 {
3490         BUG_ON(obj->pin_count == 0);
3491         BUG_ON(obj->gtt_space == NULL);
3492
3493         if (--obj->pin_count == 0)
3494                 obj->pin_mappable = false;
3495 }
3496
3497 int
3498 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3499                    struct drm_file *file)
3500 {
3501         struct drm_i915_gem_pin *args = data;
3502         struct drm_i915_gem_object *obj;
3503         int ret;
3504
3505         ret = i915_mutex_lock_interruptible(dev);
3506         if (ret)
3507                 return ret;
3508
3509         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3510         if (&obj->base == NULL) {
3511                 ret = -ENOENT;
3512                 goto unlock;
3513         }
3514
3515         if (obj->madv != I915_MADV_WILLNEED) {
3516                 DRM_ERROR("Attempting to pin a purgeable buffer\n");
3517                 ret = -EINVAL;
3518                 goto out;
3519         }
3520
3521         if (obj->pin_filp != NULL && obj->pin_filp != file) {
3522                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3523                           args->handle);
3524                 ret = -EINVAL;
3525                 goto out;
3526         }
3527
3528         obj->user_pin_count++;
3529         obj->pin_filp = file;
3530         if (obj->user_pin_count == 1) {
3531                 ret = i915_gem_object_pin(obj, args->alignment, true, false);
3532                 if (ret)
3533                         goto out;
3534         }
3535
3536         /* XXX - flush the CPU caches for pinned objects
3537          * as the X server doesn't manage domains yet
3538          */
3539         i915_gem_object_flush_cpu_write_domain(obj);
3540         args->offset = obj->gtt_offset;
3541 out:
3542         drm_gem_object_unreference(&obj->base);
3543 unlock:
3544         mutex_unlock(&dev->struct_mutex);
3545         return ret;
3546 }
3547
3548 int
3549 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3550                      struct drm_file *file)
3551 {
3552         struct drm_i915_gem_pin *args = data;
3553         struct drm_i915_gem_object *obj;
3554         int ret;
3555
3556         ret = i915_mutex_lock_interruptible(dev);
3557         if (ret)
3558                 return ret;
3559
3560         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3561         if (&obj->base == NULL) {
3562                 ret = -ENOENT;
3563                 goto unlock;
3564         }
3565
3566         if (obj->pin_filp != file) {
3567                 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3568                           args->handle);
3569                 ret = -EINVAL;
3570                 goto out;
3571         }
3572         obj->user_pin_count--;
3573         if (obj->user_pin_count == 0) {
3574                 obj->pin_filp = NULL;
3575                 i915_gem_object_unpin(obj);
3576         }
3577
3578 out:
3579         drm_gem_object_unreference(&obj->base);
3580 unlock:
3581         mutex_unlock(&dev->struct_mutex);
3582         return ret;
3583 }
3584
3585 int
3586 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3587                     struct drm_file *file)
3588 {
3589         struct drm_i915_gem_busy *args = data;
3590         struct drm_i915_gem_object *obj;
3591         int ret;
3592
3593         ret = i915_mutex_lock_interruptible(dev);
3594         if (ret)
3595                 return ret;
3596
3597         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3598         if (&obj->base == NULL) {
3599                 ret = -ENOENT;
3600                 goto unlock;
3601         }
3602
3603         /* Count all active objects as busy, even if they are currently not used
3604          * by the gpu. Users of this interface expect objects to eventually
3605          * become non-busy without any further actions, therefore emit any
3606          * necessary flushes here.
3607          */
3608         ret = i915_gem_object_flush_active(obj);
3609
3610         args->busy = obj->active;
3611         if (obj->ring) {
3612                 BUILD_BUG_ON(I915_NUM_RINGS > 16);
3613                 args->busy |= intel_ring_flag(obj->ring) << 16;
3614         }
3615
3616         drm_gem_object_unreference(&obj->base);
3617 unlock:
3618         mutex_unlock(&dev->struct_mutex);
3619         return ret;
3620 }
3621
3622 int
3623 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3624                         struct drm_file *file_priv)
3625 {
3626         return i915_gem_ring_throttle(dev, file_priv);
3627 }
3628
3629 int
3630 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3631                        struct drm_file *file_priv)
3632 {
3633         struct drm_i915_gem_madvise *args = data;
3634         struct drm_i915_gem_object *obj;
3635         int ret;
3636
3637         switch (args->madv) {
3638         case I915_MADV_DONTNEED:
3639         case I915_MADV_WILLNEED:
3640             break;
3641         default:
3642             return -EINVAL;
3643         }
3644
3645         ret = i915_mutex_lock_interruptible(dev);
3646         if (ret)
3647                 return ret;
3648
3649         obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3650         if (&obj->base == NULL) {
3651                 ret = -ENOENT;
3652                 goto unlock;
3653         }
3654
3655         if (obj->pin_count) {
3656                 ret = -EINVAL;
3657                 goto out;
3658         }
3659
3660         if (obj->madv != __I915_MADV_PURGED)
3661                 obj->madv = args->madv;
3662
3663         /* if the object is no longer attached, discard its backing storage */
3664         if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
3665                 i915_gem_object_truncate(obj);
3666
3667         args->retained = obj->madv != __I915_MADV_PURGED;
3668
3669 out:
3670         drm_gem_object_unreference(&obj->base);
3671 unlock:
3672         mutex_unlock(&dev->struct_mutex);
3673         return ret;
3674 }
3675
3676 void i915_gem_object_init(struct drm_i915_gem_object *obj,
3677                           const struct drm_i915_gem_object_ops *ops)
3678 {
3679         INIT_LIST_HEAD(&obj->mm_list);
3680         INIT_LIST_HEAD(&obj->gtt_list);
3681         INIT_LIST_HEAD(&obj->ring_list);
3682         INIT_LIST_HEAD(&obj->exec_list);
3683
3684         obj->ops = ops;
3685
3686         obj->fence_reg = I915_FENCE_REG_NONE;
3687         obj->madv = I915_MADV_WILLNEED;
3688         /* Avoid an unnecessary call to unbind on the first bind. */
3689         obj->map_and_fenceable = true;
3690
3691         i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
3692 }
3693
3694 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3695         .get_pages = i915_gem_object_get_pages_gtt,
3696         .put_pages = i915_gem_object_put_pages_gtt,
3697 };
3698
3699 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3700                                                   size_t size)
3701 {
3702         struct drm_i915_gem_object *obj;
3703         struct address_space *mapping;
3704         gfp_t mask;
3705
3706         obj = i915_gem_object_alloc(dev);
3707         if (obj == NULL)
3708                 return NULL;
3709
3710         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3711                 i915_gem_object_free(obj);
3712                 return NULL;
3713         }
3714
3715         mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3716         if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
3717                 /* 965gm cannot relocate objects above 4GiB. */
3718                 mask &= ~__GFP_HIGHMEM;
3719                 mask |= __GFP_DMA32;
3720         }
3721
3722         mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3723         mapping_set_gfp_mask(mapping, mask);
3724
3725         i915_gem_object_init(obj, &i915_gem_object_ops);
3726
3727         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3728         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3729
3730         if (HAS_LLC(dev)) {
3731                 /* On some devices, we can have the GPU use the LLC (the CPU
3732                  * cache) for about a 10% performance improvement
3733                  * compared to uncached.  Graphics requests other than
3734                  * display scanout are coherent with the CPU in
3735                  * accessing this cache.  This means in this mode we
3736                  * don't need to clflush on the CPU side, and on the
3737                  * GPU side we only need to flush internal caches to
3738                  * get data visible to the CPU.
3739                  *
3740                  * However, we maintain the display planes as UC, and so
3741                  * need to rebind when first used as such.
3742                  */
3743                 obj->cache_level = I915_CACHE_LLC;
3744         } else
3745                 obj->cache_level = I915_CACHE_NONE;
3746
3747         return obj;
3748 }
3749
3750 int i915_gem_init_object(struct drm_gem_object *obj)
3751 {
3752         BUG();
3753
3754         return 0;
3755 }
3756
3757 void i915_gem_free_object(struct drm_gem_object *gem_obj)
3758 {
3759         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3760         struct drm_device *dev = obj->base.dev;
3761         drm_i915_private_t *dev_priv = dev->dev_private;
3762
3763         trace_i915_gem_object_destroy(obj);
3764
3765         if (obj->phys_obj)
3766                 i915_gem_detach_phys_object(dev, obj);
3767
3768         obj->pin_count = 0;
3769         if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
3770                 bool was_interruptible;
3771
3772                 was_interruptible = dev_priv->mm.interruptible;
3773                 dev_priv->mm.interruptible = false;
3774
3775                 WARN_ON(i915_gem_object_unbind(obj));
3776
3777                 dev_priv->mm.interruptible = was_interruptible;
3778         }
3779
3780         obj->pages_pin_count = 0;
3781         i915_gem_object_put_pages(obj);
3782         i915_gem_object_free_mmap_offset(obj);
3783         i915_gem_object_release_stolen(obj);
3784
3785         BUG_ON(obj->pages);
3786
3787         if (obj->base.import_attach)
3788                 drm_prime_gem_destroy(&obj->base, NULL);
3789
3790         drm_gem_object_release(&obj->base);
3791         i915_gem_info_remove_obj(dev_priv, obj->base.size);
3792
3793         kfree(obj->bit_17);
3794         i915_gem_object_free(obj);
3795 }
3796
3797 int
3798 i915_gem_idle(struct drm_device *dev)
3799 {
3800         drm_i915_private_t *dev_priv = dev->dev_private;
3801         int ret;
3802
3803         mutex_lock(&dev->struct_mutex);
3804
3805         if (dev_priv->mm.suspended) {
3806                 mutex_unlock(&dev->struct_mutex);
3807                 return 0;
3808         }
3809
3810         ret = i915_gpu_idle(dev);
3811         if (ret) {
3812                 mutex_unlock(&dev->struct_mutex);
3813                 return ret;
3814         }
3815         i915_gem_retire_requests(dev);
3816
3817         /* Under UMS, be paranoid and evict. */
3818         if (!drm_core_check_feature(dev, DRIVER_MODESET))
3819                 i915_gem_evict_everything(dev);
3820
3821         i915_gem_reset_fences(dev);
3822
3823         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
3824          * We need to replace this with a semaphore, or something.
3825          * And not confound mm.suspended!
3826          */
3827         dev_priv->mm.suspended = 1;
3828         del_timer_sync(&dev_priv->hangcheck_timer);
3829
3830         i915_kernel_lost_context(dev);
3831         i915_gem_cleanup_ringbuffer(dev);
3832
3833         mutex_unlock(&dev->struct_mutex);
3834
3835         /* Cancel the retire work handler, which should be idle now. */
3836         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3837
3838         return 0;
3839 }
3840
3841 void i915_gem_l3_remap(struct drm_device *dev)
3842 {
3843         drm_i915_private_t *dev_priv = dev->dev_private;
3844         u32 misccpctl;
3845         int i;
3846
3847         if (!IS_IVYBRIDGE(dev))
3848                 return;
3849
3850         if (!dev_priv->l3_parity.remap_info)
3851                 return;
3852
3853         misccpctl = I915_READ(GEN7_MISCCPCTL);
3854         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
3855         POSTING_READ(GEN7_MISCCPCTL);
3856
3857         for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
3858                 u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
3859                 if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
3860                         DRM_DEBUG("0x%x was already programmed to %x\n",
3861                                   GEN7_L3LOG_BASE + i, remap);
3862                 if (remap && !dev_priv->l3_parity.remap_info[i/4])
3863                         DRM_DEBUG_DRIVER("Clearing remapped register\n");
3864                 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
3865         }
3866
3867         /* Make sure all the writes land before disabling dop clock gating */
3868         POSTING_READ(GEN7_L3LOG_BASE);
3869
3870         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
3871 }
3872
3873 void i915_gem_init_swizzling(struct drm_device *dev)
3874 {
3875         drm_i915_private_t *dev_priv = dev->dev_private;
3876
3877         if (INTEL_INFO(dev)->gen < 5 ||
3878             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
3879                 return;
3880
3881         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
3882                                  DISP_TILE_SURFACE_SWIZZLING);
3883
3884         if (IS_GEN5(dev))
3885                 return;
3886
3887         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3888         if (IS_GEN6(dev))
3889                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3890         else
3891                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
3892 }
3893
3894 static bool
3895 intel_enable_blt(struct drm_device *dev)
3896 {
3897         if (!HAS_BLT(dev))
3898                 return false;
3899
3900         /* The blitter was dysfunctional on early prototypes */
3901         if (IS_GEN6(dev) && dev->pdev->revision < 8) {
3902                 DRM_INFO("BLT not supported on this pre-production hardware;"
3903                          " graphics performance will be degraded.\n");
3904                 return false;
3905         }
3906
3907         return true;
3908 }
3909
3910 int
3911 i915_gem_init_hw(struct drm_device *dev)
3912 {
3913         drm_i915_private_t *dev_priv = dev->dev_private;
3914         int ret;
3915
3916         if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
3917                 return -EIO;
3918
3919         if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
3920                 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
3921
3922         i915_gem_l3_remap(dev);
3923
3924         i915_gem_init_swizzling(dev);
3925
3926         ret = intel_init_render_ring_buffer(dev);
3927         if (ret)
3928                 return ret;
3929
3930         if (HAS_BSD(dev)) {
3931                 ret = intel_init_bsd_ring_buffer(dev);
3932                 if (ret)
3933                         goto cleanup_render_ring;
3934         }
3935
3936         if (intel_enable_blt(dev)) {
3937                 ret = intel_init_blt_ring_buffer(dev);
3938                 if (ret)
3939                         goto cleanup_bsd_ring;
3940         }
3941
3942         dev_priv->next_seqno = 1;
3943
3944         /*
3945          * XXX: There was some w/a described somewhere suggesting loading
3946          * contexts before PPGTT.
3947          */
3948         i915_gem_context_init(dev);
3949         i915_gem_init_ppgtt(dev);
3950
3951         return 0;
3952
3953 cleanup_bsd_ring:
3954         intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
3955 cleanup_render_ring:
3956         intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3957         return ret;
3958 }
3959
3960 static bool
3961 intel_enable_ppgtt(struct drm_device *dev)
3962 {
3963         if (i915_enable_ppgtt >= 0)
3964                 return i915_enable_ppgtt;
3965
3966 #ifdef CONFIG_INTEL_IOMMU
3967         /* Disable ppgtt on SNB if VT-d is on. */
3968         if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
3969                 return false;
3970 #endif
3971
3972         return true;
3973 }
3974
3975 int i915_gem_init(struct drm_device *dev)
3976 {
3977         struct drm_i915_private *dev_priv = dev->dev_private;
3978         unsigned long gtt_size, mappable_size;
3979         int ret;
3980
3981         gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
3982         mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
3983
3984         mutex_lock(&dev->struct_mutex);
3985         if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
3986                 /* PPGTT pdes are stolen from global gtt ptes, so shrink the
3987                  * aperture accordingly when using aliasing ppgtt. */
3988                 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
3989
3990                 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size);
3991
3992                 ret = i915_gem_init_aliasing_ppgtt(dev);
3993                 if (ret) {
3994                         mutex_unlock(&dev->struct_mutex);
3995                         return ret;
3996                 }
3997         } else {
3998                 /* Let GEM Manage all of the aperture.
3999                  *
4000                  * However, leave one page at the end still bound to the scratch
4001                  * page.  There are a number of places where the hardware
4002                  * apparently prefetches past the end of the object, and we've
4003                  * seen multiple hangs with the GPU head pointer stuck in a
4004                  * batchbuffer bound at the last page of the aperture.  One page
4005                  * should be enough to keep any prefetching inside of the
4006                  * aperture.
4007                  */
4008                 i915_gem_init_global_gtt(dev, 0, mappable_size,
4009                                          gtt_size);
4010         }
4011
4012         ret = i915_gem_init_hw(dev);
4013         mutex_unlock(&dev->struct_mutex);
4014         if (ret) {
4015                 i915_gem_cleanup_aliasing_ppgtt(dev);
4016                 return ret;
4017         }
4018
4019         /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
4020         if (!drm_core_check_feature(dev, DRIVER_MODESET))
4021                 dev_priv->dri1.allow_batchbuffer = 1;
4022         return 0;
4023 }
4024
4025 void
4026 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4027 {
4028         drm_i915_private_t *dev_priv = dev->dev_private;
4029         struct intel_ring_buffer *ring;
4030         int i;
4031
4032         for_each_ring(ring, dev_priv, i)
4033                 intel_cleanup_ring_buffer(ring);
4034 }
4035
4036 int
4037 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4038                        struct drm_file *file_priv)
4039 {
4040         drm_i915_private_t *dev_priv = dev->dev_private;
4041         int ret;
4042
4043         if (drm_core_check_feature(dev, DRIVER_MODESET))
4044                 return 0;
4045
4046         if (atomic_read(&dev_priv->mm.wedged)) {
4047                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4048                 atomic_set(&dev_priv->mm.wedged, 0);
4049         }
4050
4051         mutex_lock(&dev->struct_mutex);
4052         dev_priv->mm.suspended = 0;
4053
4054         ret = i915_gem_init_hw(dev);
4055         if (ret != 0) {
4056                 mutex_unlock(&dev->struct_mutex);
4057                 return ret;
4058         }
4059
4060         BUG_ON(!list_empty(&dev_priv->mm.active_list));
4061         mutex_unlock(&dev->struct_mutex);
4062
4063         ret = drm_irq_install(dev);
4064         if (ret)
4065                 goto cleanup_ringbuffer;
4066
4067         return 0;
4068
4069 cleanup_ringbuffer:
4070         mutex_lock(&dev->struct_mutex);
4071         i915_gem_cleanup_ringbuffer(dev);
4072         dev_priv->mm.suspended = 1;
4073         mutex_unlock(&dev->struct_mutex);
4074
4075         return ret;
4076 }
4077
4078 int
4079 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4080                        struct drm_file *file_priv)
4081 {
4082         if (drm_core_check_feature(dev, DRIVER_MODESET))
4083                 return 0;
4084
4085         drm_irq_uninstall(dev);
4086         return i915_gem_idle(dev);
4087 }
4088
4089 void
4090 i915_gem_lastclose(struct drm_device *dev)
4091 {
4092         int ret;
4093
4094         if (drm_core_check_feature(dev, DRIVER_MODESET))
4095                 return;
4096
4097         ret = i915_gem_idle(dev);
4098         if (ret)
4099                 DRM_ERROR("failed to idle hardware: %d\n", ret);
4100 }
4101
4102 static void
4103 init_ring_lists(struct intel_ring_buffer *ring)
4104 {
4105         INIT_LIST_HEAD(&ring->active_list);
4106         INIT_LIST_HEAD(&ring->request_list);
4107 }
4108
4109 void
4110 i915_gem_load(struct drm_device *dev)
4111 {
4112         drm_i915_private_t *dev_priv = dev->dev_private;
4113         int i;
4114
4115         dev_priv->slab =
4116                 kmem_cache_create("i915_gem_object",
4117                                   sizeof(struct drm_i915_gem_object), 0,
4118                                   SLAB_HWCACHE_ALIGN,
4119                                   NULL);
4120
4121         INIT_LIST_HEAD(&dev_priv->mm.active_list);
4122         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4123         INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4124         INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4125         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4126         for (i = 0; i < I915_NUM_RINGS; i++)
4127                 init_ring_lists(&dev_priv->ring[i]);
4128         for (i = 0; i < I915_MAX_NUM_FENCES; i++)
4129                 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4130         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4131                           i915_gem_retire_work_handler);
4132         init_completion(&dev_priv->error_completion);
4133
4134         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4135         if (IS_GEN3(dev)) {
4136                 I915_WRITE(MI_ARB_STATE,
4137                            _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
4138         }
4139
4140         dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4141
4142         /* Old X drivers will take 0-2 for front, back, depth buffers */
4143         if (!drm_core_check_feature(dev, DRIVER_MODESET))
4144                 dev_priv->fence_reg_start = 3;
4145
4146         if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4147                 dev_priv->num_fence_regs = 16;
4148         else
4149                 dev_priv->num_fence_regs = 8;
4150
4151         /* Initialize fence registers to zero */
4152         i915_gem_reset_fences(dev);
4153
4154         i915_gem_detect_bit_6_swizzle(dev);
4155         init_waitqueue_head(&dev_priv->pending_flip_queue);
4156
4157         dev_priv->mm.interruptible = true;
4158
4159         dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
4160         dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
4161         register_shrinker(&dev_priv->mm.inactive_shrinker);
4162 }
4163
4164 /*
4165  * Create a physically contiguous memory object for this object
4166  * e.g. for cursor + overlay regs
4167  */
4168 static int i915_gem_init_phys_object(struct drm_device *dev,
4169                                      int id, int size, int align)
4170 {
4171         drm_i915_private_t *dev_priv = dev->dev_private;
4172         struct drm_i915_gem_phys_object *phys_obj;
4173         int ret;
4174
4175         if (dev_priv->mm.phys_objs[id - 1] || !size)
4176                 return 0;
4177
4178         phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4179         if (!phys_obj)
4180                 return -ENOMEM;
4181
4182         phys_obj->id = id;
4183
4184         phys_obj->handle = drm_pci_alloc(dev, size, align);
4185         if (!phys_obj->handle) {
4186                 ret = -ENOMEM;
4187                 goto kfree_obj;
4188         }
4189 #ifdef CONFIG_X86
4190         set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4191 #endif
4192
4193         dev_priv->mm.phys_objs[id - 1] = phys_obj;
4194
4195         return 0;
4196 kfree_obj:
4197         kfree(phys_obj);
4198         return ret;
4199 }
4200
4201 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
4202 {
4203         drm_i915_private_t *dev_priv = dev->dev_private;
4204         struct drm_i915_gem_phys_object *phys_obj;
4205
4206         if (!dev_priv->mm.phys_objs[id - 1])
4207                 return;
4208
4209         phys_obj = dev_priv->mm.phys_objs[id - 1];
4210         if (phys_obj->cur_obj) {
4211                 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4212         }
4213
4214 #ifdef CONFIG_X86
4215         set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4216 #endif
4217         drm_pci_free(dev, phys_obj->handle);
4218         kfree(phys_obj);
4219         dev_priv->mm.phys_objs[id - 1] = NULL;
4220 }
4221
4222 void i915_gem_free_all_phys_object(struct drm_device *dev)
4223 {
4224         int i;
4225
4226         for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4227                 i915_gem_free_phys_object(dev, i);
4228 }
4229
4230 void i915_gem_detach_phys_object(struct drm_device *dev,
4231                                  struct drm_i915_gem_object *obj)
4232 {
4233         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4234         char *vaddr;
4235         int i;
4236         int page_count;
4237
4238         if (!obj->phys_obj)
4239                 return;
4240         vaddr = obj->phys_obj->handle->vaddr;
4241
4242         page_count = obj->base.size / PAGE_SIZE;
4243         for (i = 0; i < page_count; i++) {
4244                 struct page *page = shmem_read_mapping_page(mapping, i);
4245                 if (!IS_ERR(page)) {
4246                         char *dst = kmap_atomic(page);
4247                         memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
4248                         kunmap_atomic(dst);
4249
4250                         drm_clflush_pages(&page, 1);
4251
4252                         set_page_dirty(page);
4253                         mark_page_accessed(page);
4254                         page_cache_release(page);
4255                 }
4256         }
4257         i915_gem_chipset_flush(dev);
4258
4259         obj->phys_obj->cur_obj = NULL;
4260         obj->phys_obj = NULL;
4261 }
4262
4263 int
4264 i915_gem_attach_phys_object(struct drm_device *dev,
4265                             struct drm_i915_gem_object *obj,
4266                             int id,
4267                             int align)
4268 {
4269         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4270         drm_i915_private_t *dev_priv = dev->dev_private;
4271         int ret = 0;
4272         int page_count;
4273         int i;
4274
4275         if (id > I915_MAX_PHYS_OBJECT)
4276                 return -EINVAL;
4277
4278         if (obj->phys_obj) {
4279                 if (obj->phys_obj->id == id)
4280                         return 0;
4281                 i915_gem_detach_phys_object(dev, obj);
4282         }
4283
4284         /* create a new object */
4285         if (!dev_priv->mm.phys_objs[id - 1]) {
4286                 ret = i915_gem_init_phys_object(dev, id,
4287                                                 obj->base.size, align);
4288                 if (ret) {
4289                         DRM_ERROR("failed to init phys object %d size: %zu\n",
4290                                   id, obj->base.size);
4291                         return ret;
4292                 }
4293         }
4294
4295         /* bind to the object */
4296         obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4297         obj->phys_obj->cur_obj = obj;
4298
4299         page_count = obj->base.size / PAGE_SIZE;
4300
4301         for (i = 0; i < page_count; i++) {
4302                 struct page *page;
4303                 char *dst, *src;
4304
4305                 page = shmem_read_mapping_page(mapping, i);
4306                 if (IS_ERR(page))
4307                         return PTR_ERR(page);
4308
4309                 src = kmap_atomic(page);
4310                 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4311                 memcpy(dst, src, PAGE_SIZE);
4312                 kunmap_atomic(src);
4313
4314                 mark_page_accessed(page);
4315                 page_cache_release(page);
4316         }
4317
4318         return 0;
4319 }
4320
4321 static int
4322 i915_gem_phys_pwrite(struct drm_device *dev,
4323                      struct drm_i915_gem_object *obj,
4324                      struct drm_i915_gem_pwrite *args,
4325                      struct drm_file *file_priv)
4326 {
4327         void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
4328         char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
4329
4330         if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
4331                 unsigned long unwritten;
4332
4333                 /* The physical object once assigned is fixed for the lifetime
4334                  * of the obj, so we can safely drop the lock and continue
4335                  * to access vaddr.
4336                  */
4337                 mutex_unlock(&dev->struct_mutex);
4338                 unwritten = copy_from_user(vaddr, user_data, args->size);
4339                 mutex_lock(&dev->struct_mutex);
4340                 if (unwritten)
4341                         return -EFAULT;
4342         }
4343
4344         i915_gem_chipset_flush(dev);
4345         return 0;
4346 }
4347
4348 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4349 {
4350         struct drm_i915_file_private *file_priv = file->driver_priv;
4351
4352         /* Clean up our request list when the client is going away, so that
4353          * later retire_requests won't dereference our soon-to-be-gone
4354          * file_priv.
4355          */
4356         spin_lock(&file_priv->mm.lock);
4357         while (!list_empty(&file_priv->mm.request_list)) {
4358                 struct drm_i915_gem_request *request;
4359
4360                 request = list_first_entry(&file_priv->mm.request_list,
4361                                            struct drm_i915_gem_request,
4362                                            client_list);
4363                 list_del(&request->client_list);
4364                 request->file_priv = NULL;
4365         }
4366         spin_unlock(&file_priv->mm.lock);
4367 }
4368
4369 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
4370 {
4371         if (!mutex_is_locked(mutex))
4372                 return false;
4373
4374 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
4375         return mutex->owner == task;
4376 #else
4377         /* Since UP may be pre-empted, we cannot assume that we own the lock */
4378         return false;
4379 #endif
4380 }
4381
4382 static int
4383 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
4384 {
4385         struct drm_i915_private *dev_priv =
4386                 container_of(shrinker,
4387                              struct drm_i915_private,
4388                              mm.inactive_shrinker);
4389         struct drm_device *dev = dev_priv->dev;
4390         struct drm_i915_gem_object *obj;
4391         int nr_to_scan = sc->nr_to_scan;
4392         bool unlock = true;
4393         int cnt;
4394
4395         if (!mutex_trylock(&dev->struct_mutex)) {
4396                 if (!mutex_is_locked_by(&dev->struct_mutex, current))
4397                         return 0;
4398
4399                 unlock = false;
4400         }
4401
4402         if (nr_to_scan) {
4403                 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
4404                 if (nr_to_scan > 0)
4405                         i915_gem_shrink_all(dev_priv);
4406         }
4407
4408         cnt = 0;
4409         list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list)
4410                 if (obj->pages_pin_count == 0)
4411                         cnt += obj->base.size >> PAGE_SHIFT;
4412         list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
4413                 if (obj->pin_count == 0 && obj->pages_pin_count == 0)
4414                         cnt += obj->base.size >> PAGE_SHIFT;
4415
4416         if (unlock)
4417                 mutex_unlock(&dev->struct_mutex);
4418         return cnt;
4419 }