drm/i915: Maintain LRU order of inactive objects upon access by CPU (v2)
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
37
38 static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
39 static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
40 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
41 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
42 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
43                                              int write);
44 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
45                                                      uint64_t offset,
46                                                      uint64_t size);
47 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
48 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
49 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
50                                            unsigned alignment);
51 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
52 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
53                                 struct drm_i915_gem_pwrite *args,
54                                 struct drm_file *file_priv);
55 static void i915_gem_free_object_tail(struct drm_gem_object *obj);
56
57 static LIST_HEAD(shrink_list);
58 static DEFINE_SPINLOCK(shrink_list_lock);
59
60 static inline bool
61 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj_priv)
62 {
63         return obj_priv->gtt_space &&
64                 !obj_priv->active &&
65                 obj_priv->pin_count == 0;
66 }
67
68 int i915_gem_do_init(struct drm_device *dev, unsigned long start,
69                      unsigned long end)
70 {
71         drm_i915_private_t *dev_priv = dev->dev_private;
72
73         if (start >= end ||
74             (start & (PAGE_SIZE - 1)) != 0 ||
75             (end & (PAGE_SIZE - 1)) != 0) {
76                 return -EINVAL;
77         }
78
79         drm_mm_init(&dev_priv->mm.gtt_space, start,
80                     end - start);
81
82         dev->gtt_total = (uint32_t) (end - start);
83
84         return 0;
85 }
86
87 int
88 i915_gem_init_ioctl(struct drm_device *dev, void *data,
89                     struct drm_file *file_priv)
90 {
91         struct drm_i915_gem_init *args = data;
92         int ret;
93
94         mutex_lock(&dev->struct_mutex);
95         ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
96         mutex_unlock(&dev->struct_mutex);
97
98         return ret;
99 }
100
101 int
102 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
103                             struct drm_file *file_priv)
104 {
105         struct drm_i915_gem_get_aperture *args = data;
106
107         if (!(dev->driver->driver_features & DRIVER_GEM))
108                 return -ENODEV;
109
110         args->aper_size = dev->gtt_total;
111         args->aper_available_size = (args->aper_size -
112                                      atomic_read(&dev->pin_memory));
113
114         return 0;
115 }
116
117
118 /**
119  * Creates a new mm object and returns a handle to it.
120  */
121 int
122 i915_gem_create_ioctl(struct drm_device *dev, void *data,
123                       struct drm_file *file_priv)
124 {
125         struct drm_i915_gem_create *args = data;
126         struct drm_gem_object *obj;
127         int ret;
128         u32 handle;
129
130         args->size = roundup(args->size, PAGE_SIZE);
131
132         /* Allocate the new object */
133         obj = i915_gem_alloc_object(dev, args->size);
134         if (obj == NULL)
135                 return -ENOMEM;
136
137         ret = drm_gem_handle_create(file_priv, obj, &handle);
138         drm_gem_object_unreference_unlocked(obj);
139         if (ret)
140                 return ret;
141
142         args->handle = handle;
143
144         return 0;
145 }
146
147 static inline int
148 fast_shmem_read(struct page **pages,
149                 loff_t page_base, int page_offset,
150                 char __user *data,
151                 int length)
152 {
153         char __iomem *vaddr;
154         int unwritten;
155
156         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
157         if (vaddr == NULL)
158                 return -ENOMEM;
159         unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
160         kunmap_atomic(vaddr, KM_USER0);
161
162         if (unwritten)
163                 return -EFAULT;
164
165         return 0;
166 }
167
168 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
169 {
170         drm_i915_private_t *dev_priv = obj->dev->dev_private;
171         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
172
173         return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
174                 obj_priv->tiling_mode != I915_TILING_NONE;
175 }
176
177 static inline void
178 slow_shmem_copy(struct page *dst_page,
179                 int dst_offset,
180                 struct page *src_page,
181                 int src_offset,
182                 int length)
183 {
184         char *dst_vaddr, *src_vaddr;
185
186         dst_vaddr = kmap(dst_page);
187         src_vaddr = kmap(src_page);
188
189         memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
190
191         kunmap(src_page);
192         kunmap(dst_page);
193 }
194
195 static inline void
196 slow_shmem_bit17_copy(struct page *gpu_page,
197                       int gpu_offset,
198                       struct page *cpu_page,
199                       int cpu_offset,
200                       int length,
201                       int is_read)
202 {
203         char *gpu_vaddr, *cpu_vaddr;
204
205         /* Use the unswizzled path if this page isn't affected. */
206         if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
207                 if (is_read)
208                         return slow_shmem_copy(cpu_page, cpu_offset,
209                                                gpu_page, gpu_offset, length);
210                 else
211                         return slow_shmem_copy(gpu_page, gpu_offset,
212                                                cpu_page, cpu_offset, length);
213         }
214
215         gpu_vaddr = kmap(gpu_page);
216         cpu_vaddr = kmap(cpu_page);
217
218         /* Copy the data, XORing A6 with A17 (1). The user already knows he's
219          * XORing with the other bits (A9 for Y, A9 and A10 for X)
220          */
221         while (length > 0) {
222                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
223                 int this_length = min(cacheline_end - gpu_offset, length);
224                 int swizzled_gpu_offset = gpu_offset ^ 64;
225
226                 if (is_read) {
227                         memcpy(cpu_vaddr + cpu_offset,
228                                gpu_vaddr + swizzled_gpu_offset,
229                                this_length);
230                 } else {
231                         memcpy(gpu_vaddr + swizzled_gpu_offset,
232                                cpu_vaddr + cpu_offset,
233                                this_length);
234                 }
235                 cpu_offset += this_length;
236                 gpu_offset += this_length;
237                 length -= this_length;
238         }
239
240         kunmap(cpu_page);
241         kunmap(gpu_page);
242 }
243
244 /**
245  * This is the fast shmem pread path, which attempts to copy_from_user directly
246  * from the backing pages of the object to the user's address space.  On a
247  * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
248  */
249 static int
250 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
251                           struct drm_i915_gem_pread *args,
252                           struct drm_file *file_priv)
253 {
254         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
255         ssize_t remain;
256         loff_t offset, page_base;
257         char __user *user_data;
258         int page_offset, page_length;
259         int ret;
260
261         user_data = (char __user *) (uintptr_t) args->data_ptr;
262         remain = args->size;
263
264         mutex_lock(&dev->struct_mutex);
265
266         ret = i915_gem_object_get_pages(obj, 0);
267         if (ret != 0)
268                 goto fail_unlock;
269
270         ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
271                                                         args->size);
272         if (ret != 0)
273                 goto fail_put_pages;
274
275         obj_priv = to_intel_bo(obj);
276         offset = args->offset;
277
278         while (remain > 0) {
279                 /* Operation in this page
280                  *
281                  * page_base = page offset within aperture
282                  * page_offset = offset within page
283                  * page_length = bytes to copy for this page
284                  */
285                 page_base = (offset & ~(PAGE_SIZE-1));
286                 page_offset = offset & (PAGE_SIZE-1);
287                 page_length = remain;
288                 if ((page_offset + remain) > PAGE_SIZE)
289                         page_length = PAGE_SIZE - page_offset;
290
291                 ret = fast_shmem_read(obj_priv->pages,
292                                       page_base, page_offset,
293                                       user_data, page_length);
294                 if (ret)
295                         goto fail_put_pages;
296
297                 remain -= page_length;
298                 user_data += page_length;
299                 offset += page_length;
300         }
301
302 fail_put_pages:
303         i915_gem_object_put_pages(obj);
304 fail_unlock:
305         mutex_unlock(&dev->struct_mutex);
306
307         return ret;
308 }
309
310 static int
311 i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
312 {
313         int ret;
314
315         ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN);
316
317         /* If we've insufficient memory to map in the pages, attempt
318          * to make some space by throwing out some old buffers.
319          */
320         if (ret == -ENOMEM) {
321                 struct drm_device *dev = obj->dev;
322
323                 ret = i915_gem_evict_something(dev, obj->size,
324                                                i915_gem_get_gtt_alignment(obj));
325                 if (ret)
326                         return ret;
327
328                 ret = i915_gem_object_get_pages(obj, 0);
329         }
330
331         return ret;
332 }
333
334 /**
335  * This is the fallback shmem pread path, which allocates temporary storage
336  * in kernel space to copy_to_user into outside of the struct_mutex, so we
337  * can copy out of the object's backing pages while holding the struct mutex
338  * and not take page faults.
339  */
340 static int
341 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
342                           struct drm_i915_gem_pread *args,
343                           struct drm_file *file_priv)
344 {
345         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
346         struct mm_struct *mm = current->mm;
347         struct page **user_pages;
348         ssize_t remain;
349         loff_t offset, pinned_pages, i;
350         loff_t first_data_page, last_data_page, num_pages;
351         int shmem_page_index, shmem_page_offset;
352         int data_page_index,  data_page_offset;
353         int page_length;
354         int ret;
355         uint64_t data_ptr = args->data_ptr;
356         int do_bit17_swizzling;
357
358         remain = args->size;
359
360         /* Pin the user pages containing the data.  We can't fault while
361          * holding the struct mutex, yet we want to hold it while
362          * dereferencing the user data.
363          */
364         first_data_page = data_ptr / PAGE_SIZE;
365         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
366         num_pages = last_data_page - first_data_page + 1;
367
368         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
369         if (user_pages == NULL)
370                 return -ENOMEM;
371
372         down_read(&mm->mmap_sem);
373         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
374                                       num_pages, 1, 0, user_pages, NULL);
375         up_read(&mm->mmap_sem);
376         if (pinned_pages < num_pages) {
377                 ret = -EFAULT;
378                 goto fail_put_user_pages;
379         }
380
381         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
382
383         mutex_lock(&dev->struct_mutex);
384
385         ret = i915_gem_object_get_pages_or_evict(obj);
386         if (ret)
387                 goto fail_unlock;
388
389         ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
390                                                         args->size);
391         if (ret != 0)
392                 goto fail_put_pages;
393
394         obj_priv = to_intel_bo(obj);
395         offset = args->offset;
396
397         while (remain > 0) {
398                 /* Operation in this page
399                  *
400                  * shmem_page_index = page number within shmem file
401                  * shmem_page_offset = offset within page in shmem file
402                  * data_page_index = page number in get_user_pages return
403                  * data_page_offset = offset with data_page_index page.
404                  * page_length = bytes to copy for this page
405                  */
406                 shmem_page_index = offset / PAGE_SIZE;
407                 shmem_page_offset = offset & ~PAGE_MASK;
408                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
409                 data_page_offset = data_ptr & ~PAGE_MASK;
410
411                 page_length = remain;
412                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
413                         page_length = PAGE_SIZE - shmem_page_offset;
414                 if ((data_page_offset + page_length) > PAGE_SIZE)
415                         page_length = PAGE_SIZE - data_page_offset;
416
417                 if (do_bit17_swizzling) {
418                         slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
419                                               shmem_page_offset,
420                                               user_pages[data_page_index],
421                                               data_page_offset,
422                                               page_length,
423                                               1);
424                 } else {
425                         slow_shmem_copy(user_pages[data_page_index],
426                                         data_page_offset,
427                                         obj_priv->pages[shmem_page_index],
428                                         shmem_page_offset,
429                                         page_length);
430                 }
431
432                 remain -= page_length;
433                 data_ptr += page_length;
434                 offset += page_length;
435         }
436
437 fail_put_pages:
438         i915_gem_object_put_pages(obj);
439 fail_unlock:
440         mutex_unlock(&dev->struct_mutex);
441 fail_put_user_pages:
442         for (i = 0; i < pinned_pages; i++) {
443                 SetPageDirty(user_pages[i]);
444                 page_cache_release(user_pages[i]);
445         }
446         drm_free_large(user_pages);
447
448         return ret;
449 }
450
451 /**
452  * Reads data from the object referenced by handle.
453  *
454  * On error, the contents of *data are undefined.
455  */
456 int
457 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
458                      struct drm_file *file_priv)
459 {
460         struct drm_i915_gem_pread *args = data;
461         struct drm_gem_object *obj;
462         struct drm_i915_gem_object *obj_priv;
463         int ret;
464
465         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
466         if (obj == NULL)
467                 return -EBADF;
468         obj_priv = to_intel_bo(obj);
469
470         /* Bounds check source.
471          *
472          * XXX: This could use review for overflow issues...
473          */
474         if (args->offset > obj->size || args->size > obj->size ||
475             args->offset + args->size > obj->size) {
476                 drm_gem_object_unreference_unlocked(obj);
477                 return -EINVAL;
478         }
479
480         if (i915_gem_object_needs_bit17_swizzle(obj)) {
481                 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
482         } else {
483                 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
484                 if (ret != 0)
485                         ret = i915_gem_shmem_pread_slow(dev, obj, args,
486                                                         file_priv);
487         }
488
489         drm_gem_object_unreference_unlocked(obj);
490
491         return ret;
492 }
493
494 /* This is the fast write path which cannot handle
495  * page faults in the source data
496  */
497
498 static inline int
499 fast_user_write(struct io_mapping *mapping,
500                 loff_t page_base, int page_offset,
501                 char __user *user_data,
502                 int length)
503 {
504         char *vaddr_atomic;
505         unsigned long unwritten;
506
507         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0);
508         unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
509                                                       user_data, length);
510         io_mapping_unmap_atomic(vaddr_atomic, KM_USER0);
511         if (unwritten)
512                 return -EFAULT;
513         return 0;
514 }
515
516 /* Here's the write path which can sleep for
517  * page faults
518  */
519
520 static inline void
521 slow_kernel_write(struct io_mapping *mapping,
522                   loff_t gtt_base, int gtt_offset,
523                   struct page *user_page, int user_offset,
524                   int length)
525 {
526         char __iomem *dst_vaddr;
527         char *src_vaddr;
528
529         dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
530         src_vaddr = kmap(user_page);
531
532         memcpy_toio(dst_vaddr + gtt_offset,
533                     src_vaddr + user_offset,
534                     length);
535
536         kunmap(user_page);
537         io_mapping_unmap(dst_vaddr);
538 }
539
540 static inline int
541 fast_shmem_write(struct page **pages,
542                  loff_t page_base, int page_offset,
543                  char __user *data,
544                  int length)
545 {
546         char __iomem *vaddr;
547         unsigned long unwritten;
548
549         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
550         if (vaddr == NULL)
551                 return -ENOMEM;
552         unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
553         kunmap_atomic(vaddr, KM_USER0);
554
555         if (unwritten)
556                 return -EFAULT;
557         return 0;
558 }
559
560 /**
561  * This is the fast pwrite path, where we copy the data directly from the
562  * user into the GTT, uncached.
563  */
564 static int
565 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
566                          struct drm_i915_gem_pwrite *args,
567                          struct drm_file *file_priv)
568 {
569         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
570         drm_i915_private_t *dev_priv = dev->dev_private;
571         ssize_t remain;
572         loff_t offset, page_base;
573         char __user *user_data;
574         int page_offset, page_length;
575         int ret;
576
577         user_data = (char __user *) (uintptr_t) args->data_ptr;
578         remain = args->size;
579         if (!access_ok(VERIFY_READ, user_data, remain))
580                 return -EFAULT;
581
582
583         mutex_lock(&dev->struct_mutex);
584         ret = i915_gem_object_pin(obj, 0);
585         if (ret) {
586                 mutex_unlock(&dev->struct_mutex);
587                 return ret;
588         }
589         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
590         if (ret)
591                 goto fail;
592
593         obj_priv = to_intel_bo(obj);
594         offset = obj_priv->gtt_offset + args->offset;
595
596         while (remain > 0) {
597                 /* Operation in this page
598                  *
599                  * page_base = page offset within aperture
600                  * page_offset = offset within page
601                  * page_length = bytes to copy for this page
602                  */
603                 page_base = (offset & ~(PAGE_SIZE-1));
604                 page_offset = offset & (PAGE_SIZE-1);
605                 page_length = remain;
606                 if ((page_offset + remain) > PAGE_SIZE)
607                         page_length = PAGE_SIZE - page_offset;
608
609                 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
610                                        page_offset, user_data, page_length);
611
612                 /* If we get a fault while copying data, then (presumably) our
613                  * source page isn't available.  Return the error and we'll
614                  * retry in the slow path.
615                  */
616                 if (ret)
617                         goto fail;
618
619                 remain -= page_length;
620                 user_data += page_length;
621                 offset += page_length;
622         }
623
624 fail:
625         i915_gem_object_unpin(obj);
626         mutex_unlock(&dev->struct_mutex);
627
628         return ret;
629 }
630
631 /**
632  * This is the fallback GTT pwrite path, which uses get_user_pages to pin
633  * the memory and maps it using kmap_atomic for copying.
634  *
635  * This code resulted in x11perf -rgb10text consuming about 10% more CPU
636  * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
637  */
638 static int
639 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
640                          struct drm_i915_gem_pwrite *args,
641                          struct drm_file *file_priv)
642 {
643         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
644         drm_i915_private_t *dev_priv = dev->dev_private;
645         ssize_t remain;
646         loff_t gtt_page_base, offset;
647         loff_t first_data_page, last_data_page, num_pages;
648         loff_t pinned_pages, i;
649         struct page **user_pages;
650         struct mm_struct *mm = current->mm;
651         int gtt_page_offset, data_page_offset, data_page_index, page_length;
652         int ret;
653         uint64_t data_ptr = args->data_ptr;
654
655         remain = args->size;
656
657         /* Pin the user pages containing the data.  We can't fault while
658          * holding the struct mutex, and all of the pwrite implementations
659          * want to hold it while dereferencing the user data.
660          */
661         first_data_page = data_ptr / PAGE_SIZE;
662         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
663         num_pages = last_data_page - first_data_page + 1;
664
665         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
666         if (user_pages == NULL)
667                 return -ENOMEM;
668
669         down_read(&mm->mmap_sem);
670         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
671                                       num_pages, 0, 0, user_pages, NULL);
672         up_read(&mm->mmap_sem);
673         if (pinned_pages < num_pages) {
674                 ret = -EFAULT;
675                 goto out_unpin_pages;
676         }
677
678         mutex_lock(&dev->struct_mutex);
679         ret = i915_gem_object_pin(obj, 0);
680         if (ret)
681                 goto out_unlock;
682
683         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
684         if (ret)
685                 goto out_unpin_object;
686
687         obj_priv = to_intel_bo(obj);
688         offset = obj_priv->gtt_offset + args->offset;
689
690         while (remain > 0) {
691                 /* Operation in this page
692                  *
693                  * gtt_page_base = page offset within aperture
694                  * gtt_page_offset = offset within page in aperture
695                  * data_page_index = page number in get_user_pages return
696                  * data_page_offset = offset with data_page_index page.
697                  * page_length = bytes to copy for this page
698                  */
699                 gtt_page_base = offset & PAGE_MASK;
700                 gtt_page_offset = offset & ~PAGE_MASK;
701                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
702                 data_page_offset = data_ptr & ~PAGE_MASK;
703
704                 page_length = remain;
705                 if ((gtt_page_offset + page_length) > PAGE_SIZE)
706                         page_length = PAGE_SIZE - gtt_page_offset;
707                 if ((data_page_offset + page_length) > PAGE_SIZE)
708                         page_length = PAGE_SIZE - data_page_offset;
709
710                 slow_kernel_write(dev_priv->mm.gtt_mapping,
711                                   gtt_page_base, gtt_page_offset,
712                                   user_pages[data_page_index],
713                                   data_page_offset,
714                                   page_length);
715
716                 remain -= page_length;
717                 offset += page_length;
718                 data_ptr += page_length;
719         }
720
721 out_unpin_object:
722         i915_gem_object_unpin(obj);
723 out_unlock:
724         mutex_unlock(&dev->struct_mutex);
725 out_unpin_pages:
726         for (i = 0; i < pinned_pages; i++)
727                 page_cache_release(user_pages[i]);
728         drm_free_large(user_pages);
729
730         return ret;
731 }
732
733 /**
734  * This is the fast shmem pwrite path, which attempts to directly
735  * copy_from_user into the kmapped pages backing the object.
736  */
737 static int
738 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
739                            struct drm_i915_gem_pwrite *args,
740                            struct drm_file *file_priv)
741 {
742         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
743         ssize_t remain;
744         loff_t offset, page_base;
745         char __user *user_data;
746         int page_offset, page_length;
747         int ret;
748
749         user_data = (char __user *) (uintptr_t) args->data_ptr;
750         remain = args->size;
751
752         mutex_lock(&dev->struct_mutex);
753
754         ret = i915_gem_object_get_pages(obj, 0);
755         if (ret != 0)
756                 goto fail_unlock;
757
758         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
759         if (ret != 0)
760                 goto fail_put_pages;
761
762         obj_priv = to_intel_bo(obj);
763         offset = args->offset;
764         obj_priv->dirty = 1;
765
766         while (remain > 0) {
767                 /* Operation in this page
768                  *
769                  * page_base = page offset within aperture
770                  * page_offset = offset within page
771                  * page_length = bytes to copy for this page
772                  */
773                 page_base = (offset & ~(PAGE_SIZE-1));
774                 page_offset = offset & (PAGE_SIZE-1);
775                 page_length = remain;
776                 if ((page_offset + remain) > PAGE_SIZE)
777                         page_length = PAGE_SIZE - page_offset;
778
779                 ret = fast_shmem_write(obj_priv->pages,
780                                        page_base, page_offset,
781                                        user_data, page_length);
782                 if (ret)
783                         goto fail_put_pages;
784
785                 remain -= page_length;
786                 user_data += page_length;
787                 offset += page_length;
788         }
789
790 fail_put_pages:
791         i915_gem_object_put_pages(obj);
792 fail_unlock:
793         mutex_unlock(&dev->struct_mutex);
794
795         return ret;
796 }
797
798 /**
799  * This is the fallback shmem pwrite path, which uses get_user_pages to pin
800  * the memory and maps it using kmap_atomic for copying.
801  *
802  * This avoids taking mmap_sem for faulting on the user's address while the
803  * struct_mutex is held.
804  */
805 static int
806 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
807                            struct drm_i915_gem_pwrite *args,
808                            struct drm_file *file_priv)
809 {
810         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
811         struct mm_struct *mm = current->mm;
812         struct page **user_pages;
813         ssize_t remain;
814         loff_t offset, pinned_pages, i;
815         loff_t first_data_page, last_data_page, num_pages;
816         int shmem_page_index, shmem_page_offset;
817         int data_page_index,  data_page_offset;
818         int page_length;
819         int ret;
820         uint64_t data_ptr = args->data_ptr;
821         int do_bit17_swizzling;
822
823         remain = args->size;
824
825         /* Pin the user pages containing the data.  We can't fault while
826          * holding the struct mutex, and all of the pwrite implementations
827          * want to hold it while dereferencing the user data.
828          */
829         first_data_page = data_ptr / PAGE_SIZE;
830         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
831         num_pages = last_data_page - first_data_page + 1;
832
833         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
834         if (user_pages == NULL)
835                 return -ENOMEM;
836
837         down_read(&mm->mmap_sem);
838         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
839                                       num_pages, 0, 0, user_pages, NULL);
840         up_read(&mm->mmap_sem);
841         if (pinned_pages < num_pages) {
842                 ret = -EFAULT;
843                 goto fail_put_user_pages;
844         }
845
846         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
847
848         mutex_lock(&dev->struct_mutex);
849
850         ret = i915_gem_object_get_pages_or_evict(obj);
851         if (ret)
852                 goto fail_unlock;
853
854         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
855         if (ret != 0)
856                 goto fail_put_pages;
857
858         obj_priv = to_intel_bo(obj);
859         offset = args->offset;
860         obj_priv->dirty = 1;
861
862         while (remain > 0) {
863                 /* Operation in this page
864                  *
865                  * shmem_page_index = page number within shmem file
866                  * shmem_page_offset = offset within page in shmem file
867                  * data_page_index = page number in get_user_pages return
868                  * data_page_offset = offset with data_page_index page.
869                  * page_length = bytes to copy for this page
870                  */
871                 shmem_page_index = offset / PAGE_SIZE;
872                 shmem_page_offset = offset & ~PAGE_MASK;
873                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
874                 data_page_offset = data_ptr & ~PAGE_MASK;
875
876                 page_length = remain;
877                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
878                         page_length = PAGE_SIZE - shmem_page_offset;
879                 if ((data_page_offset + page_length) > PAGE_SIZE)
880                         page_length = PAGE_SIZE - data_page_offset;
881
882                 if (do_bit17_swizzling) {
883                         slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
884                                               shmem_page_offset,
885                                               user_pages[data_page_index],
886                                               data_page_offset,
887                                               page_length,
888                                               0);
889                 } else {
890                         slow_shmem_copy(obj_priv->pages[shmem_page_index],
891                                         shmem_page_offset,
892                                         user_pages[data_page_index],
893                                         data_page_offset,
894                                         page_length);
895                 }
896
897                 remain -= page_length;
898                 data_ptr += page_length;
899                 offset += page_length;
900         }
901
902 fail_put_pages:
903         i915_gem_object_put_pages(obj);
904 fail_unlock:
905         mutex_unlock(&dev->struct_mutex);
906 fail_put_user_pages:
907         for (i = 0; i < pinned_pages; i++)
908                 page_cache_release(user_pages[i]);
909         drm_free_large(user_pages);
910
911         return ret;
912 }
913
914 /**
915  * Writes data to the object referenced by handle.
916  *
917  * On error, the contents of the buffer that were to be modified are undefined.
918  */
919 int
920 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
921                       struct drm_file *file_priv)
922 {
923         struct drm_i915_gem_pwrite *args = data;
924         struct drm_gem_object *obj;
925         struct drm_i915_gem_object *obj_priv;
926         int ret = 0;
927
928         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
929         if (obj == NULL)
930                 return -EBADF;
931         obj_priv = to_intel_bo(obj);
932
933         /* Bounds check destination.
934          *
935          * XXX: This could use review for overflow issues...
936          */
937         if (args->offset > obj->size || args->size > obj->size ||
938             args->offset + args->size > obj->size) {
939                 drm_gem_object_unreference_unlocked(obj);
940                 return -EINVAL;
941         }
942
943         /* We can only do the GTT pwrite on untiled buffers, as otherwise
944          * it would end up going through the fenced access, and we'll get
945          * different detiling behavior between reading and writing.
946          * pread/pwrite currently are reading and writing from the CPU
947          * perspective, requiring manual detiling by the client.
948          */
949         if (obj_priv->phys_obj)
950                 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
951         else if (obj_priv->tiling_mode == I915_TILING_NONE &&
952                  dev->gtt_total != 0 &&
953                  obj->write_domain != I915_GEM_DOMAIN_CPU) {
954                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
955                 if (ret == -EFAULT) {
956                         ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
957                                                        file_priv);
958                 }
959         } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
960                 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
961         } else {
962                 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
963                 if (ret == -EFAULT) {
964                         ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
965                                                          file_priv);
966                 }
967         }
968
969 #if WATCH_PWRITE
970         if (ret)
971                 DRM_INFO("pwrite failed %d\n", ret);
972 #endif
973
974         drm_gem_object_unreference_unlocked(obj);
975
976         return ret;
977 }
978
979 /**
980  * Called when user space prepares to use an object with the CPU, either
981  * through the mmap ioctl's mapping or a GTT mapping.
982  */
983 int
984 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
985                           struct drm_file *file_priv)
986 {
987         struct drm_i915_private *dev_priv = dev->dev_private;
988         struct drm_i915_gem_set_domain *args = data;
989         struct drm_gem_object *obj;
990         struct drm_i915_gem_object *obj_priv;
991         uint32_t read_domains = args->read_domains;
992         uint32_t write_domain = args->write_domain;
993         int ret;
994
995         if (!(dev->driver->driver_features & DRIVER_GEM))
996                 return -ENODEV;
997
998         /* Only handle setting domains to types used by the CPU. */
999         if (write_domain & I915_GEM_GPU_DOMAINS)
1000                 return -EINVAL;
1001
1002         if (read_domains & I915_GEM_GPU_DOMAINS)
1003                 return -EINVAL;
1004
1005         /* Having something in the write domain implies it's in the read
1006          * domain, and only that read domain.  Enforce that in the request.
1007          */
1008         if (write_domain != 0 && read_domains != write_domain)
1009                 return -EINVAL;
1010
1011         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1012         if (obj == NULL)
1013                 return -EBADF;
1014         obj_priv = to_intel_bo(obj);
1015
1016         mutex_lock(&dev->struct_mutex);
1017
1018         intel_mark_busy(dev, obj);
1019
1020 #if WATCH_BUF
1021         DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
1022                  obj, obj->size, read_domains, write_domain);
1023 #endif
1024         if (read_domains & I915_GEM_DOMAIN_GTT) {
1025                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1026
1027                 /* Update the LRU on the fence for the CPU access that's
1028                  * about to occur.
1029                  */
1030                 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1031                         struct drm_i915_fence_reg *reg =
1032                                 &dev_priv->fence_regs[obj_priv->fence_reg];
1033                         list_move_tail(&reg->lru_list,
1034                                        &dev_priv->mm.fence_list);
1035                 }
1036
1037                 /* Silently promote "you're not bound, there was nothing to do"
1038                  * to success, since the client was just asking us to
1039                  * make sure everything was done.
1040                  */
1041                 if (ret == -EINVAL)
1042                         ret = 0;
1043         } else {
1044                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1045         }
1046
1047         
1048         /* Maintain LRU order of "inactive" objects */
1049         if (ret == 0 && i915_gem_object_is_inactive(obj_priv))
1050                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1051
1052         drm_gem_object_unreference(obj);
1053         mutex_unlock(&dev->struct_mutex);
1054         return ret;
1055 }
1056
1057 /**
1058  * Called when user space has done writes to this buffer
1059  */
1060 int
1061 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1062                       struct drm_file *file_priv)
1063 {
1064         struct drm_i915_gem_sw_finish *args = data;
1065         struct drm_gem_object *obj;
1066         struct drm_i915_gem_object *obj_priv;
1067         int ret = 0;
1068
1069         if (!(dev->driver->driver_features & DRIVER_GEM))
1070                 return -ENODEV;
1071
1072         mutex_lock(&dev->struct_mutex);
1073         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1074         if (obj == NULL) {
1075                 mutex_unlock(&dev->struct_mutex);
1076                 return -EBADF;
1077         }
1078
1079 #if WATCH_BUF
1080         DRM_INFO("%s: sw_finish %d (%p %zd)\n",
1081                  __func__, args->handle, obj, obj->size);
1082 #endif
1083         obj_priv = to_intel_bo(obj);
1084
1085         /* Pinned buffers may be scanout, so flush the cache */
1086         if (obj_priv->pin_count)
1087                 i915_gem_object_flush_cpu_write_domain(obj);
1088
1089         drm_gem_object_unreference(obj);
1090         mutex_unlock(&dev->struct_mutex);
1091         return ret;
1092 }
1093
1094 /**
1095  * Maps the contents of an object, returning the address it is mapped
1096  * into.
1097  *
1098  * While the mapping holds a reference on the contents of the object, it doesn't
1099  * imply a ref on the object itself.
1100  */
1101 int
1102 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1103                    struct drm_file *file_priv)
1104 {
1105         struct drm_i915_gem_mmap *args = data;
1106         struct drm_gem_object *obj;
1107         loff_t offset;
1108         unsigned long addr;
1109
1110         if (!(dev->driver->driver_features & DRIVER_GEM))
1111                 return -ENODEV;
1112
1113         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1114         if (obj == NULL)
1115                 return -EBADF;
1116
1117         offset = args->offset;
1118
1119         down_write(&current->mm->mmap_sem);
1120         addr = do_mmap(obj->filp, 0, args->size,
1121                        PROT_READ | PROT_WRITE, MAP_SHARED,
1122                        args->offset);
1123         up_write(&current->mm->mmap_sem);
1124         drm_gem_object_unreference_unlocked(obj);
1125         if (IS_ERR((void *)addr))
1126                 return addr;
1127
1128         args->addr_ptr = (uint64_t) addr;
1129
1130         return 0;
1131 }
1132
1133 /**
1134  * i915_gem_fault - fault a page into the GTT
1135  * vma: VMA in question
1136  * vmf: fault info
1137  *
1138  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1139  * from userspace.  The fault handler takes care of binding the object to
1140  * the GTT (if needed), allocating and programming a fence register (again,
1141  * only if needed based on whether the old reg is still valid or the object
1142  * is tiled) and inserting a new PTE into the faulting process.
1143  *
1144  * Note that the faulting process may involve evicting existing objects
1145  * from the GTT and/or fence registers to make room.  So performance may
1146  * suffer if the GTT working set is large or there are few fence registers
1147  * left.
1148  */
1149 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1150 {
1151         struct drm_gem_object *obj = vma->vm_private_data;
1152         struct drm_device *dev = obj->dev;
1153         drm_i915_private_t *dev_priv = dev->dev_private;
1154         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1155         pgoff_t page_offset;
1156         unsigned long pfn;
1157         int ret = 0;
1158         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1159
1160         /* We don't use vmf->pgoff since that has the fake offset */
1161         page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1162                 PAGE_SHIFT;
1163
1164         /* Now bind it into the GTT if needed */
1165         mutex_lock(&dev->struct_mutex);
1166         if (!obj_priv->gtt_space) {
1167                 ret = i915_gem_object_bind_to_gtt(obj, 0);
1168                 if (ret)
1169                         goto unlock;
1170
1171                 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1172                 if (ret)
1173                         goto unlock;
1174         }
1175
1176         /* Need a new fence register? */
1177         if (obj_priv->tiling_mode != I915_TILING_NONE) {
1178                 ret = i915_gem_object_get_fence_reg(obj);
1179                 if (ret)
1180                         goto unlock;
1181         }
1182
1183         if (i915_gem_object_is_inactive(obj_priv))
1184                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1185
1186         pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1187                 page_offset;
1188
1189         /* Finally, remap it using the new GTT offset */
1190         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1191 unlock:
1192         mutex_unlock(&dev->struct_mutex);
1193
1194         switch (ret) {
1195         case 0:
1196         case -ERESTARTSYS:
1197                 return VM_FAULT_NOPAGE;
1198         case -ENOMEM:
1199         case -EAGAIN:
1200                 return VM_FAULT_OOM;
1201         default:
1202                 return VM_FAULT_SIGBUS;
1203         }
1204 }
1205
1206 /**
1207  * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1208  * @obj: obj in question
1209  *
1210  * GEM memory mapping works by handing back to userspace a fake mmap offset
1211  * it can use in a subsequent mmap(2) call.  The DRM core code then looks
1212  * up the object based on the offset and sets up the various memory mapping
1213  * structures.
1214  *
1215  * This routine allocates and attaches a fake offset for @obj.
1216  */
1217 static int
1218 i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1219 {
1220         struct drm_device *dev = obj->dev;
1221         struct drm_gem_mm *mm = dev->mm_private;
1222         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1223         struct drm_map_list *list;
1224         struct drm_local_map *map;
1225         int ret = 0;
1226
1227         /* Set the object up for mmap'ing */
1228         list = &obj->map_list;
1229         list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1230         if (!list->map)
1231                 return -ENOMEM;
1232
1233         map = list->map;
1234         map->type = _DRM_GEM;
1235         map->size = obj->size;
1236         map->handle = obj;
1237
1238         /* Get a DRM GEM mmap offset allocated... */
1239         list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1240                                                     obj->size / PAGE_SIZE, 0, 0);
1241         if (!list->file_offset_node) {
1242                 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1243                 ret = -ENOMEM;
1244                 goto out_free_list;
1245         }
1246
1247         list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1248                                                   obj->size / PAGE_SIZE, 0);
1249         if (!list->file_offset_node) {
1250                 ret = -ENOMEM;
1251                 goto out_free_list;
1252         }
1253
1254         list->hash.key = list->file_offset_node->start;
1255         if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1256                 DRM_ERROR("failed to add to map hash\n");
1257                 ret = -ENOMEM;
1258                 goto out_free_mm;
1259         }
1260
1261         /* By now we should be all set, any drm_mmap request on the offset
1262          * below will get to our mmap & fault handler */
1263         obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1264
1265         return 0;
1266
1267 out_free_mm:
1268         drm_mm_put_block(list->file_offset_node);
1269 out_free_list:
1270         kfree(list->map);
1271
1272         return ret;
1273 }
1274
1275 /**
1276  * i915_gem_release_mmap - remove physical page mappings
1277  * @obj: obj in question
1278  *
1279  * Preserve the reservation of the mmapping with the DRM core code, but
1280  * relinquish ownership of the pages back to the system.
1281  *
1282  * It is vital that we remove the page mapping if we have mapped a tiled
1283  * object through the GTT and then lose the fence register due to
1284  * resource pressure. Similarly if the object has been moved out of the
1285  * aperture, than pages mapped into userspace must be revoked. Removing the
1286  * mapping will then trigger a page fault on the next user access, allowing
1287  * fixup by i915_gem_fault().
1288  */
1289 void
1290 i915_gem_release_mmap(struct drm_gem_object *obj)
1291 {
1292         struct drm_device *dev = obj->dev;
1293         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1294
1295         if (dev->dev_mapping)
1296                 unmap_mapping_range(dev->dev_mapping,
1297                                     obj_priv->mmap_offset, obj->size, 1);
1298 }
1299
1300 static void
1301 i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1302 {
1303         struct drm_device *dev = obj->dev;
1304         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1305         struct drm_gem_mm *mm = dev->mm_private;
1306         struct drm_map_list *list;
1307
1308         list = &obj->map_list;
1309         drm_ht_remove_item(&mm->offset_hash, &list->hash);
1310
1311         if (list->file_offset_node) {
1312                 drm_mm_put_block(list->file_offset_node);
1313                 list->file_offset_node = NULL;
1314         }
1315
1316         if (list->map) {
1317                 kfree(list->map);
1318                 list->map = NULL;
1319         }
1320
1321         obj_priv->mmap_offset = 0;
1322 }
1323
1324 /**
1325  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1326  * @obj: object to check
1327  *
1328  * Return the required GTT alignment for an object, taking into account
1329  * potential fence register mapping if needed.
1330  */
1331 static uint32_t
1332 i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1333 {
1334         struct drm_device *dev = obj->dev;
1335         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1336         int start, i;
1337
1338         /*
1339          * Minimum alignment is 4k (GTT page size), but might be greater
1340          * if a fence register is needed for the object.
1341          */
1342         if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1343                 return 4096;
1344
1345         /*
1346          * Previous chips need to be aligned to the size of the smallest
1347          * fence register that can contain the object.
1348          */
1349         if (IS_I9XX(dev))
1350                 start = 1024*1024;
1351         else
1352                 start = 512*1024;
1353
1354         for (i = start; i < obj->size; i <<= 1)
1355                 ;
1356
1357         return i;
1358 }
1359
1360 /**
1361  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1362  * @dev: DRM device
1363  * @data: GTT mapping ioctl data
1364  * @file_priv: GEM object info
1365  *
1366  * Simply returns the fake offset to userspace so it can mmap it.
1367  * The mmap call will end up in drm_gem_mmap(), which will set things
1368  * up so we can get faults in the handler above.
1369  *
1370  * The fault handler will take care of binding the object into the GTT
1371  * (since it may have been evicted to make room for something), allocating
1372  * a fence register, and mapping the appropriate aperture address into
1373  * userspace.
1374  */
1375 int
1376 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1377                         struct drm_file *file_priv)
1378 {
1379         struct drm_i915_gem_mmap_gtt *args = data;
1380         struct drm_gem_object *obj;
1381         struct drm_i915_gem_object *obj_priv;
1382         int ret;
1383
1384         if (!(dev->driver->driver_features & DRIVER_GEM))
1385                 return -ENODEV;
1386
1387         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1388         if (obj == NULL)
1389                 return -EBADF;
1390
1391         mutex_lock(&dev->struct_mutex);
1392
1393         obj_priv = to_intel_bo(obj);
1394
1395         if (obj_priv->madv != I915_MADV_WILLNEED) {
1396                 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1397                 drm_gem_object_unreference(obj);
1398                 mutex_unlock(&dev->struct_mutex);
1399                 return -EINVAL;
1400         }
1401
1402
1403         if (!obj_priv->mmap_offset) {
1404                 ret = i915_gem_create_mmap_offset(obj);
1405                 if (ret) {
1406                         drm_gem_object_unreference(obj);
1407                         mutex_unlock(&dev->struct_mutex);
1408                         return ret;
1409                 }
1410         }
1411
1412         args->offset = obj_priv->mmap_offset;
1413
1414         /*
1415          * Pull it into the GTT so that we have a page list (makes the
1416          * initial fault faster and any subsequent flushing possible).
1417          */
1418         if (!obj_priv->agp_mem) {
1419                 ret = i915_gem_object_bind_to_gtt(obj, 0);
1420                 if (ret) {
1421                         drm_gem_object_unreference(obj);
1422                         mutex_unlock(&dev->struct_mutex);
1423                         return ret;
1424                 }
1425         }
1426
1427         drm_gem_object_unreference(obj);
1428         mutex_unlock(&dev->struct_mutex);
1429
1430         return 0;
1431 }
1432
1433 void
1434 i915_gem_object_put_pages(struct drm_gem_object *obj)
1435 {
1436         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1437         int page_count = obj->size / PAGE_SIZE;
1438         int i;
1439
1440         BUG_ON(obj_priv->pages_refcount == 0);
1441         BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
1442
1443         if (--obj_priv->pages_refcount != 0)
1444                 return;
1445
1446         if (obj_priv->tiling_mode != I915_TILING_NONE)
1447                 i915_gem_object_save_bit_17_swizzle(obj);
1448
1449         if (obj_priv->madv == I915_MADV_DONTNEED)
1450                 obj_priv->dirty = 0;
1451
1452         for (i = 0; i < page_count; i++) {
1453                 if (obj_priv->dirty)
1454                         set_page_dirty(obj_priv->pages[i]);
1455
1456                 if (obj_priv->madv == I915_MADV_WILLNEED)
1457                         mark_page_accessed(obj_priv->pages[i]);
1458
1459                 page_cache_release(obj_priv->pages[i]);
1460         }
1461         obj_priv->dirty = 0;
1462
1463         drm_free_large(obj_priv->pages);
1464         obj_priv->pages = NULL;
1465 }
1466
1467 static void
1468 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno,
1469                                struct intel_ring_buffer *ring)
1470 {
1471         struct drm_device *dev = obj->dev;
1472         drm_i915_private_t *dev_priv = dev->dev_private;
1473         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1474         BUG_ON(ring == NULL);
1475         obj_priv->ring = ring;
1476
1477         /* Add a reference if we're newly entering the active list. */
1478         if (!obj_priv->active) {
1479                 drm_gem_object_reference(obj);
1480                 obj_priv->active = 1;
1481         }
1482         /* Move from whatever list we were on to the tail of execution. */
1483         spin_lock(&dev_priv->mm.active_list_lock);
1484         list_move_tail(&obj_priv->list, &ring->active_list);
1485         spin_unlock(&dev_priv->mm.active_list_lock);
1486         obj_priv->last_rendering_seqno = seqno;
1487 }
1488
1489 static void
1490 i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1491 {
1492         struct drm_device *dev = obj->dev;
1493         drm_i915_private_t *dev_priv = dev->dev_private;
1494         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1495
1496         BUG_ON(!obj_priv->active);
1497         list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1498         obj_priv->last_rendering_seqno = 0;
1499 }
1500
1501 /* Immediately discard the backing storage */
1502 static void
1503 i915_gem_object_truncate(struct drm_gem_object *obj)
1504 {
1505         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1506         struct inode *inode;
1507
1508         inode = obj->filp->f_path.dentry->d_inode;
1509         if (inode->i_op->truncate)
1510                 inode->i_op->truncate (inode);
1511
1512         obj_priv->madv = __I915_MADV_PURGED;
1513 }
1514
1515 static inline int
1516 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1517 {
1518         return obj_priv->madv == I915_MADV_DONTNEED;
1519 }
1520
1521 static void
1522 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1523 {
1524         struct drm_device *dev = obj->dev;
1525         drm_i915_private_t *dev_priv = dev->dev_private;
1526         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1527
1528         i915_verify_inactive(dev, __FILE__, __LINE__);
1529         if (obj_priv->pin_count != 0)
1530                 list_del_init(&obj_priv->list);
1531         else
1532                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1533
1534         BUG_ON(!list_empty(&obj_priv->gpu_write_list));
1535
1536         obj_priv->last_rendering_seqno = 0;
1537         obj_priv->ring = NULL;
1538         if (obj_priv->active) {
1539                 obj_priv->active = 0;
1540                 drm_gem_object_unreference(obj);
1541         }
1542         i915_verify_inactive(dev, __FILE__, __LINE__);
1543 }
1544
1545 static void
1546 i915_gem_process_flushing_list(struct drm_device *dev,
1547                                uint32_t flush_domains, uint32_t seqno,
1548                                struct intel_ring_buffer *ring)
1549 {
1550         drm_i915_private_t *dev_priv = dev->dev_private;
1551         struct drm_i915_gem_object *obj_priv, *next;
1552
1553         list_for_each_entry_safe(obj_priv, next,
1554                                  &dev_priv->mm.gpu_write_list,
1555                                  gpu_write_list) {
1556                 struct drm_gem_object *obj = &obj_priv->base;
1557
1558                 if ((obj->write_domain & flush_domains) ==
1559                     obj->write_domain &&
1560                     obj_priv->ring->ring_flag == ring->ring_flag) {
1561                         uint32_t old_write_domain = obj->write_domain;
1562
1563                         obj->write_domain = 0;
1564                         list_del_init(&obj_priv->gpu_write_list);
1565                         i915_gem_object_move_to_active(obj, seqno, ring);
1566
1567                         /* update the fence lru list */
1568                         if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1569                                 struct drm_i915_fence_reg *reg =
1570                                         &dev_priv->fence_regs[obj_priv->fence_reg];
1571                                 list_move_tail(&reg->lru_list,
1572                                                 &dev_priv->mm.fence_list);
1573                         }
1574
1575                         trace_i915_gem_object_change_domain(obj,
1576                                                             obj->read_domains,
1577                                                             old_write_domain);
1578                 }
1579         }
1580 }
1581
1582 uint32_t
1583 i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
1584                  uint32_t flush_domains, struct intel_ring_buffer *ring)
1585 {
1586         drm_i915_private_t *dev_priv = dev->dev_private;
1587         struct drm_i915_file_private *i915_file_priv = NULL;
1588         struct drm_i915_gem_request *request;
1589         uint32_t seqno;
1590         int was_empty;
1591
1592         if (file_priv != NULL)
1593                 i915_file_priv = file_priv->driver_priv;
1594
1595         request = kzalloc(sizeof(*request), GFP_KERNEL);
1596         if (request == NULL)
1597                 return 0;
1598
1599         seqno = ring->add_request(dev, ring, file_priv, flush_domains);
1600
1601         request->seqno = seqno;
1602         request->ring = ring;
1603         request->emitted_jiffies = jiffies;
1604         was_empty = list_empty(&ring->request_list);
1605         list_add_tail(&request->list, &ring->request_list);
1606
1607         if (i915_file_priv) {
1608                 list_add_tail(&request->client_list,
1609                               &i915_file_priv->mm.request_list);
1610         } else {
1611                 INIT_LIST_HEAD(&request->client_list);
1612         }
1613
1614         /* Associate any objects on the flushing list matching the write
1615          * domain we're flushing with our flush.
1616          */
1617         if (flush_domains != 0) 
1618                 i915_gem_process_flushing_list(dev, flush_domains, seqno, ring);
1619
1620         if (!dev_priv->mm.suspended) {
1621                 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1622                 if (was_empty)
1623                         queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1624         }
1625         return seqno;
1626 }
1627
1628 /**
1629  * Command execution barrier
1630  *
1631  * Ensures that all commands in the ring are finished
1632  * before signalling the CPU
1633  */
1634 static uint32_t
1635 i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
1636 {
1637         uint32_t flush_domains = 0;
1638
1639         /* The sampler always gets flushed on i965 (sigh) */
1640         if (IS_I965G(dev))
1641                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1642
1643         ring->flush(dev, ring,
1644                         I915_GEM_DOMAIN_COMMAND, flush_domains);
1645         return flush_domains;
1646 }
1647
1648 /**
1649  * Moves buffers associated only with the given active seqno from the active
1650  * to inactive list, potentially freeing them.
1651  */
1652 static void
1653 i915_gem_retire_request(struct drm_device *dev,
1654                         struct drm_i915_gem_request *request)
1655 {
1656         drm_i915_private_t *dev_priv = dev->dev_private;
1657
1658         trace_i915_gem_request_retire(dev, request->seqno);
1659
1660         /* Move any buffers on the active list that are no longer referenced
1661          * by the ringbuffer to the flushing/inactive lists as appropriate.
1662          */
1663         spin_lock(&dev_priv->mm.active_list_lock);
1664         while (!list_empty(&request->ring->active_list)) {
1665                 struct drm_gem_object *obj;
1666                 struct drm_i915_gem_object *obj_priv;
1667
1668                 obj_priv = list_first_entry(&request->ring->active_list,
1669                                             struct drm_i915_gem_object,
1670                                             list);
1671                 obj = &obj_priv->base;
1672
1673                 /* If the seqno being retired doesn't match the oldest in the
1674                  * list, then the oldest in the list must still be newer than
1675                  * this seqno.
1676                  */
1677                 if (obj_priv->last_rendering_seqno != request->seqno)
1678                         goto out;
1679
1680 #if WATCH_LRU
1681                 DRM_INFO("%s: retire %d moves to inactive list %p\n",
1682                          __func__, request->seqno, obj);
1683 #endif
1684
1685                 if (obj->write_domain != 0)
1686                         i915_gem_object_move_to_flushing(obj);
1687                 else {
1688                         /* Take a reference on the object so it won't be
1689                          * freed while the spinlock is held.  The list
1690                          * protection for this spinlock is safe when breaking
1691                          * the lock like this since the next thing we do
1692                          * is just get the head of the list again.
1693                          */
1694                         drm_gem_object_reference(obj);
1695                         i915_gem_object_move_to_inactive(obj);
1696                         spin_unlock(&dev_priv->mm.active_list_lock);
1697                         drm_gem_object_unreference(obj);
1698                         spin_lock(&dev_priv->mm.active_list_lock);
1699                 }
1700         }
1701 out:
1702         spin_unlock(&dev_priv->mm.active_list_lock);
1703 }
1704
1705 /**
1706  * Returns true if seq1 is later than seq2.
1707  */
1708 bool
1709 i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1710 {
1711         return (int32_t)(seq1 - seq2) >= 0;
1712 }
1713
1714 uint32_t
1715 i915_get_gem_seqno(struct drm_device *dev,
1716                    struct intel_ring_buffer *ring)
1717 {
1718         return ring->get_gem_seqno(dev, ring);
1719 }
1720
1721 /**
1722  * This function clears the request list as sequence numbers are passed.
1723  */
1724 static void
1725 i915_gem_retire_requests_ring(struct drm_device *dev,
1726                               struct intel_ring_buffer *ring)
1727 {
1728         drm_i915_private_t *dev_priv = dev->dev_private;
1729         uint32_t seqno;
1730
1731         if (!ring->status_page.page_addr
1732                         || list_empty(&ring->request_list))
1733                 return;
1734
1735         seqno = i915_get_gem_seqno(dev, ring);
1736
1737         while (!list_empty(&ring->request_list)) {
1738                 struct drm_i915_gem_request *request;
1739                 uint32_t retiring_seqno;
1740
1741                 request = list_first_entry(&ring->request_list,
1742                                            struct drm_i915_gem_request,
1743                                            list);
1744                 retiring_seqno = request->seqno;
1745
1746                 if (i915_seqno_passed(seqno, retiring_seqno) ||
1747                     atomic_read(&dev_priv->mm.wedged)) {
1748                         i915_gem_retire_request(dev, request);
1749
1750                         list_del(&request->list);
1751                         list_del(&request->client_list);
1752                         kfree(request);
1753                 } else
1754                         break;
1755         }
1756
1757         if (unlikely (dev_priv->trace_irq_seqno &&
1758                       i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1759
1760                 ring->user_irq_put(dev, ring);
1761                 dev_priv->trace_irq_seqno = 0;
1762         }
1763 }
1764
1765 void
1766 i915_gem_retire_requests(struct drm_device *dev)
1767 {
1768         drm_i915_private_t *dev_priv = dev->dev_private;
1769
1770         if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1771             struct drm_i915_gem_object *obj_priv, *tmp;
1772
1773             /* We must be careful that during unbind() we do not
1774              * accidentally infinitely recurse into retire requests.
1775              * Currently:
1776              *   retire -> free -> unbind -> wait -> retire_ring
1777              */
1778             list_for_each_entry_safe(obj_priv, tmp,
1779                                      &dev_priv->mm.deferred_free_list,
1780                                      list)
1781                     i915_gem_free_object_tail(&obj_priv->base);
1782         }
1783
1784         i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
1785         if (HAS_BSD(dev))
1786                 i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
1787 }
1788
1789 void
1790 i915_gem_retire_work_handler(struct work_struct *work)
1791 {
1792         drm_i915_private_t *dev_priv;
1793         struct drm_device *dev;
1794
1795         dev_priv = container_of(work, drm_i915_private_t,
1796                                 mm.retire_work.work);
1797         dev = dev_priv->dev;
1798
1799         mutex_lock(&dev->struct_mutex);
1800         i915_gem_retire_requests(dev);
1801
1802         if (!dev_priv->mm.suspended &&
1803                 (!list_empty(&dev_priv->render_ring.request_list) ||
1804                         (HAS_BSD(dev) &&
1805                          !list_empty(&dev_priv->bsd_ring.request_list))))
1806                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1807         mutex_unlock(&dev->struct_mutex);
1808 }
1809
1810 int
1811 i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
1812                 int interruptible, struct intel_ring_buffer *ring)
1813 {
1814         drm_i915_private_t *dev_priv = dev->dev_private;
1815         u32 ier;
1816         int ret = 0;
1817
1818         BUG_ON(seqno == 0);
1819
1820         if (atomic_read(&dev_priv->mm.wedged))
1821                 return -EIO;
1822
1823         if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) {
1824                 if (HAS_PCH_SPLIT(dev))
1825                         ier = I915_READ(DEIER) | I915_READ(GTIER);
1826                 else
1827                         ier = I915_READ(IER);
1828                 if (!ier) {
1829                         DRM_ERROR("something (likely vbetool) disabled "
1830                                   "interrupts, re-enabling\n");
1831                         i915_driver_irq_preinstall(dev);
1832                         i915_driver_irq_postinstall(dev);
1833                 }
1834
1835                 trace_i915_gem_request_wait_begin(dev, seqno);
1836
1837                 ring->waiting_gem_seqno = seqno;
1838                 ring->user_irq_get(dev, ring);
1839                 if (interruptible)
1840                         ret = wait_event_interruptible(ring->irq_queue,
1841                                 i915_seqno_passed(
1842                                         ring->get_gem_seqno(dev, ring), seqno)
1843                                 || atomic_read(&dev_priv->mm.wedged));
1844                 else
1845                         wait_event(ring->irq_queue,
1846                                 i915_seqno_passed(
1847                                         ring->get_gem_seqno(dev, ring), seqno)
1848                                 || atomic_read(&dev_priv->mm.wedged));
1849
1850                 ring->user_irq_put(dev, ring);
1851                 ring->waiting_gem_seqno = 0;
1852
1853                 trace_i915_gem_request_wait_end(dev, seqno);
1854         }
1855         if (atomic_read(&dev_priv->mm.wedged))
1856                 ret = -EIO;
1857
1858         if (ret && ret != -ERESTARTSYS)
1859                 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
1860                           __func__, ret, seqno, ring->get_gem_seqno(dev, ring));
1861
1862         /* Directly dispatch request retiring.  While we have the work queue
1863          * to handle this, the waiter on a request often wants an associated
1864          * buffer to have made it to the inactive list, and we would need
1865          * a separate wait queue to handle that.
1866          */
1867         if (ret == 0)
1868                 i915_gem_retire_requests_ring(dev, ring);
1869
1870         return ret;
1871 }
1872
1873 /**
1874  * Waits for a sequence number to be signaled, and cleans up the
1875  * request and object lists appropriately for that event.
1876  */
1877 static int
1878 i915_wait_request(struct drm_device *dev, uint32_t seqno,
1879                 struct intel_ring_buffer *ring)
1880 {
1881         return i915_do_wait_request(dev, seqno, 1, ring);
1882 }
1883
1884 static void
1885 i915_gem_flush(struct drm_device *dev,
1886                uint32_t invalidate_domains,
1887                uint32_t flush_domains)
1888 {
1889         drm_i915_private_t *dev_priv = dev->dev_private;
1890         if (flush_domains & I915_GEM_DOMAIN_CPU)
1891                 drm_agp_chipset_flush(dev);
1892         dev_priv->render_ring.flush(dev, &dev_priv->render_ring,
1893                         invalidate_domains,
1894                         flush_domains);
1895
1896         if (HAS_BSD(dev))
1897                 dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring,
1898                                 invalidate_domains,
1899                                 flush_domains);
1900 }
1901
1902 /**
1903  * Ensures that all rendering to the object has completed and the object is
1904  * safe to unbind from the GTT or access from the CPU.
1905  */
1906 static int
1907 i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1908 {
1909         struct drm_device *dev = obj->dev;
1910         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1911         int ret;
1912
1913         /* This function only exists to support waiting for existing rendering,
1914          * not for emitting required flushes.
1915          */
1916         BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
1917
1918         /* If there is rendering queued on the buffer being evicted, wait for
1919          * it.
1920          */
1921         if (obj_priv->active) {
1922 #if WATCH_BUF
1923                 DRM_INFO("%s: object %p wait for seqno %08x\n",
1924                           __func__, obj, obj_priv->last_rendering_seqno);
1925 #endif
1926                 ret = i915_wait_request(dev,
1927                                 obj_priv->last_rendering_seqno, obj_priv->ring);
1928                 if (ret != 0)
1929                         return ret;
1930         }
1931
1932         return 0;
1933 }
1934
1935 /**
1936  * Unbinds an object from the GTT aperture.
1937  */
1938 int
1939 i915_gem_object_unbind(struct drm_gem_object *obj)
1940 {
1941         struct drm_device *dev = obj->dev;
1942         drm_i915_private_t *dev_priv = dev->dev_private;
1943         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1944         int ret = 0;
1945
1946 #if WATCH_BUF
1947         DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1948         DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
1949 #endif
1950         if (obj_priv->gtt_space == NULL)
1951                 return 0;
1952
1953         if (obj_priv->pin_count != 0) {
1954                 DRM_ERROR("Attempting to unbind pinned buffer\n");
1955                 return -EINVAL;
1956         }
1957
1958         /* blow away mappings if mapped through GTT */
1959         i915_gem_release_mmap(obj);
1960
1961         /* Move the object to the CPU domain to ensure that
1962          * any possible CPU writes while it's not in the GTT
1963          * are flushed when we go to remap it. This will
1964          * also ensure that all pending GPU writes are finished
1965          * before we unbind.
1966          */
1967         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1968         if (ret == -ERESTARTSYS)
1969                 return ret;
1970         /* Continue on if we fail due to EIO, the GPU is hung so we
1971          * should be safe and we need to cleanup or else we might
1972          * cause memory corruption through use-after-free.
1973          */
1974
1975         BUG_ON(obj_priv->active);
1976
1977         /* release the fence reg _after_ flushing */
1978         if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1979                 i915_gem_clear_fence_reg(obj);
1980
1981         if (obj_priv->agp_mem != NULL) {
1982                 drm_unbind_agp(obj_priv->agp_mem);
1983                 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1984                 obj_priv->agp_mem = NULL;
1985         }
1986
1987         i915_gem_object_put_pages(obj);
1988         BUG_ON(obj_priv->pages_refcount);
1989
1990         if (obj_priv->gtt_space) {
1991                 atomic_dec(&dev->gtt_count);
1992                 atomic_sub(obj->size, &dev->gtt_memory);
1993
1994                 drm_mm_put_block(obj_priv->gtt_space);
1995                 obj_priv->gtt_space = NULL;
1996         }
1997
1998         /* Remove ourselves from the LRU list if present. */
1999         spin_lock(&dev_priv->mm.active_list_lock);
2000         if (!list_empty(&obj_priv->list))
2001                 list_del_init(&obj_priv->list);
2002         spin_unlock(&dev_priv->mm.active_list_lock);
2003
2004         if (i915_gem_object_is_purgeable(obj_priv))
2005                 i915_gem_object_truncate(obj);
2006
2007         trace_i915_gem_object_unbind(obj);
2008
2009         return ret;
2010 }
2011
2012 int
2013 i915_gpu_idle(struct drm_device *dev)
2014 {
2015         drm_i915_private_t *dev_priv = dev->dev_private;
2016         bool lists_empty;
2017         uint32_t seqno1, seqno2;
2018         int ret;
2019
2020         spin_lock(&dev_priv->mm.active_list_lock);
2021         lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2022                        list_empty(&dev_priv->render_ring.active_list) &&
2023                        (!HAS_BSD(dev) ||
2024                         list_empty(&dev_priv->bsd_ring.active_list)));
2025         spin_unlock(&dev_priv->mm.active_list_lock);
2026
2027         if (lists_empty)
2028                 return 0;
2029
2030         /* Flush everything onto the inactive list. */
2031         i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2032         seqno1 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
2033                         &dev_priv->render_ring);
2034         if (seqno1 == 0)
2035                 return -ENOMEM;
2036         ret = i915_wait_request(dev, seqno1, &dev_priv->render_ring);
2037
2038         if (HAS_BSD(dev)) {
2039                 seqno2 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
2040                                 &dev_priv->bsd_ring);
2041                 if (seqno2 == 0)
2042                         return -ENOMEM;
2043
2044                 ret = i915_wait_request(dev, seqno2, &dev_priv->bsd_ring);
2045                 if (ret)
2046                         return ret;
2047         }
2048
2049
2050         return ret;
2051 }
2052
2053 int
2054 i915_gem_object_get_pages(struct drm_gem_object *obj,
2055                           gfp_t gfpmask)
2056 {
2057         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2058         int page_count, i;
2059         struct address_space *mapping;
2060         struct inode *inode;
2061         struct page *page;
2062
2063         BUG_ON(obj_priv->pages_refcount
2064                         == DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT);
2065
2066         if (obj_priv->pages_refcount++ != 0)
2067                 return 0;
2068
2069         /* Get the list of pages out of our struct file.  They'll be pinned
2070          * at this point until we release them.
2071          */
2072         page_count = obj->size / PAGE_SIZE;
2073         BUG_ON(obj_priv->pages != NULL);
2074         obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2075         if (obj_priv->pages == NULL) {
2076                 obj_priv->pages_refcount--;
2077                 return -ENOMEM;
2078         }
2079
2080         inode = obj->filp->f_path.dentry->d_inode;
2081         mapping = inode->i_mapping;
2082         for (i = 0; i < page_count; i++) {
2083                 page = read_cache_page_gfp(mapping, i,
2084                                            GFP_HIGHUSER |
2085                                            __GFP_COLD |
2086                                            __GFP_RECLAIMABLE |
2087                                            gfpmask);
2088                 if (IS_ERR(page))
2089                         goto err_pages;
2090
2091                 obj_priv->pages[i] = page;
2092         }
2093
2094         if (obj_priv->tiling_mode != I915_TILING_NONE)
2095                 i915_gem_object_do_bit_17_swizzle(obj);
2096
2097         return 0;
2098
2099 err_pages:
2100         while (i--)
2101                 page_cache_release(obj_priv->pages[i]);
2102
2103         drm_free_large(obj_priv->pages);
2104         obj_priv->pages = NULL;
2105         obj_priv->pages_refcount--;
2106         return PTR_ERR(page);
2107 }
2108
2109 static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
2110 {
2111         struct drm_gem_object *obj = reg->obj;
2112         struct drm_device *dev = obj->dev;
2113         drm_i915_private_t *dev_priv = dev->dev_private;
2114         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2115         int regnum = obj_priv->fence_reg;
2116         uint64_t val;
2117
2118         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2119                     0xfffff000) << 32;
2120         val |= obj_priv->gtt_offset & 0xfffff000;
2121         val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
2122                 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2123
2124         if (obj_priv->tiling_mode == I915_TILING_Y)
2125                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2126         val |= I965_FENCE_REG_VALID;
2127
2128         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
2129 }
2130
2131 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2132 {
2133         struct drm_gem_object *obj = reg->obj;
2134         struct drm_device *dev = obj->dev;
2135         drm_i915_private_t *dev_priv = dev->dev_private;
2136         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2137         int regnum = obj_priv->fence_reg;
2138         uint64_t val;
2139
2140         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2141                     0xfffff000) << 32;
2142         val |= obj_priv->gtt_offset & 0xfffff000;
2143         val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2144         if (obj_priv->tiling_mode == I915_TILING_Y)
2145                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2146         val |= I965_FENCE_REG_VALID;
2147
2148         I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2149 }
2150
2151 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2152 {
2153         struct drm_gem_object *obj = reg->obj;
2154         struct drm_device *dev = obj->dev;
2155         drm_i915_private_t *dev_priv = dev->dev_private;
2156         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2157         int regnum = obj_priv->fence_reg;
2158         int tile_width;
2159         uint32_t fence_reg, val;
2160         uint32_t pitch_val;
2161
2162         if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2163             (obj_priv->gtt_offset & (obj->size - 1))) {
2164                 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
2165                      __func__, obj_priv->gtt_offset, obj->size);
2166                 return;
2167         }
2168
2169         if (obj_priv->tiling_mode == I915_TILING_Y &&
2170             HAS_128_BYTE_Y_TILING(dev))
2171                 tile_width = 128;
2172         else
2173                 tile_width = 512;
2174
2175         /* Note: pitch better be a power of two tile widths */
2176         pitch_val = obj_priv->stride / tile_width;
2177         pitch_val = ffs(pitch_val) - 1;
2178
2179         if (obj_priv->tiling_mode == I915_TILING_Y &&
2180             HAS_128_BYTE_Y_TILING(dev))
2181                 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2182         else
2183                 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);
2184
2185         val = obj_priv->gtt_offset;
2186         if (obj_priv->tiling_mode == I915_TILING_Y)
2187                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2188         val |= I915_FENCE_SIZE_BITS(obj->size);
2189         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2190         val |= I830_FENCE_REG_VALID;
2191
2192         if (regnum < 8)
2193                 fence_reg = FENCE_REG_830_0 + (regnum * 4);
2194         else
2195                 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2196         I915_WRITE(fence_reg, val);
2197 }
2198
2199 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2200 {
2201         struct drm_gem_object *obj = reg->obj;
2202         struct drm_device *dev = obj->dev;
2203         drm_i915_private_t *dev_priv = dev->dev_private;
2204         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2205         int regnum = obj_priv->fence_reg;
2206         uint32_t val;
2207         uint32_t pitch_val;
2208         uint32_t fence_size_bits;
2209
2210         if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
2211             (obj_priv->gtt_offset & (obj->size - 1))) {
2212                 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2213                      __func__, obj_priv->gtt_offset);
2214                 return;
2215         }
2216
2217         pitch_val = obj_priv->stride / 128;
2218         pitch_val = ffs(pitch_val) - 1;
2219         WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2220
2221         val = obj_priv->gtt_offset;
2222         if (obj_priv->tiling_mode == I915_TILING_Y)
2223                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2224         fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2225         WARN_ON(fence_size_bits & ~0x00000f00);
2226         val |= fence_size_bits;
2227         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2228         val |= I830_FENCE_REG_VALID;
2229
2230         I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
2231 }
2232
2233 static int i915_find_fence_reg(struct drm_device *dev)
2234 {
2235         struct drm_i915_fence_reg *reg = NULL;
2236         struct drm_i915_gem_object *obj_priv = NULL;
2237         struct drm_i915_private *dev_priv = dev->dev_private;
2238         struct drm_gem_object *obj = NULL;
2239         int i, avail, ret;
2240
2241         /* First try to find a free reg */
2242         avail = 0;
2243         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2244                 reg = &dev_priv->fence_regs[i];
2245                 if (!reg->obj)
2246                         return i;
2247
2248                 obj_priv = to_intel_bo(reg->obj);
2249                 if (!obj_priv->pin_count)
2250                     avail++;
2251         }
2252
2253         if (avail == 0)
2254                 return -ENOSPC;
2255
2256         /* None available, try to steal one or wait for a user to finish */
2257         i = I915_FENCE_REG_NONE;
2258         list_for_each_entry(reg, &dev_priv->mm.fence_list,
2259                             lru_list) {
2260                 obj = reg->obj;
2261                 obj_priv = to_intel_bo(obj);
2262
2263                 if (obj_priv->pin_count)
2264                         continue;
2265
2266                 /* found one! */
2267                 i = obj_priv->fence_reg;
2268                 break;
2269         }
2270
2271         BUG_ON(i == I915_FENCE_REG_NONE);
2272
2273         /* We only have a reference on obj from the active list. put_fence_reg
2274          * might drop that one, causing a use-after-free in it. So hold a
2275          * private reference to obj like the other callers of put_fence_reg
2276          * (set_tiling ioctl) do. */
2277         drm_gem_object_reference(obj);
2278         ret = i915_gem_object_put_fence_reg(obj);
2279         drm_gem_object_unreference(obj);
2280         if (ret != 0)
2281                 return ret;
2282
2283         return i;
2284 }
2285
2286 /**
2287  * i915_gem_object_get_fence_reg - set up a fence reg for an object
2288  * @obj: object to map through a fence reg
2289  *
2290  * When mapping objects through the GTT, userspace wants to be able to write
2291  * to them without having to worry about swizzling if the object is tiled.
2292  *
2293  * This function walks the fence regs looking for a free one for @obj,
2294  * stealing one if it can't find any.
2295  *
2296  * It then sets up the reg based on the object's properties: address, pitch
2297  * and tiling format.
2298  */
2299 int
2300 i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
2301 {
2302         struct drm_device *dev = obj->dev;
2303         struct drm_i915_private *dev_priv = dev->dev_private;
2304         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2305         struct drm_i915_fence_reg *reg = NULL;
2306         int ret;
2307
2308         /* Just update our place in the LRU if our fence is getting used. */
2309         if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2310                 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2311                 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2312                 return 0;
2313         }
2314
2315         switch (obj_priv->tiling_mode) {
2316         case I915_TILING_NONE:
2317                 WARN(1, "allocating a fence for non-tiled object?\n");
2318                 break;
2319         case I915_TILING_X:
2320                 if (!obj_priv->stride)
2321                         return -EINVAL;
2322                 WARN((obj_priv->stride & (512 - 1)),
2323                      "object 0x%08x is X tiled but has non-512B pitch\n",
2324                      obj_priv->gtt_offset);
2325                 break;
2326         case I915_TILING_Y:
2327                 if (!obj_priv->stride)
2328                         return -EINVAL;
2329                 WARN((obj_priv->stride & (128 - 1)),
2330                      "object 0x%08x is Y tiled but has non-128B pitch\n",
2331                      obj_priv->gtt_offset);
2332                 break;
2333         }
2334
2335         ret = i915_find_fence_reg(dev);
2336         if (ret < 0)
2337                 return ret;
2338
2339         obj_priv->fence_reg = ret;
2340         reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2341         list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2342
2343         reg->obj = obj;
2344
2345         if (IS_GEN6(dev))
2346                 sandybridge_write_fence_reg(reg);
2347         else if (IS_I965G(dev))
2348                 i965_write_fence_reg(reg);
2349         else if (IS_I9XX(dev))
2350                 i915_write_fence_reg(reg);
2351         else
2352                 i830_write_fence_reg(reg);
2353
2354         trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
2355                         obj_priv->tiling_mode);
2356
2357         return 0;
2358 }
2359
2360 /**
2361  * i915_gem_clear_fence_reg - clear out fence register info
2362  * @obj: object to clear
2363  *
2364  * Zeroes out the fence register itself and clears out the associated
2365  * data structures in dev_priv and obj_priv.
2366  */
2367 static void
2368 i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2369 {
2370         struct drm_device *dev = obj->dev;
2371         drm_i915_private_t *dev_priv = dev->dev_private;
2372         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2373         struct drm_i915_fence_reg *reg =
2374                 &dev_priv->fence_regs[obj_priv->fence_reg];
2375
2376         if (IS_GEN6(dev)) {
2377                 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2378                              (obj_priv->fence_reg * 8), 0);
2379         } else if (IS_I965G(dev)) {
2380                 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2381         } else {
2382                 uint32_t fence_reg;
2383
2384                 if (obj_priv->fence_reg < 8)
2385                         fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2386                 else
2387                         fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg -
2388                                                        8) * 4;
2389
2390                 I915_WRITE(fence_reg, 0);
2391         }
2392
2393         reg->obj = NULL;
2394         obj_priv->fence_reg = I915_FENCE_REG_NONE;
2395         list_del_init(&reg->lru_list);
2396 }
2397
2398 /**
2399  * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2400  * to the buffer to finish, and then resets the fence register.
2401  * @obj: tiled object holding a fence register.
2402  *
2403  * Zeroes out the fence register itself and clears out the associated
2404  * data structures in dev_priv and obj_priv.
2405  */
2406 int
2407 i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
2408 {
2409         struct drm_device *dev = obj->dev;
2410         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2411
2412         if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2413                 return 0;
2414
2415         /* If we've changed tiling, GTT-mappings of the object
2416          * need to re-fault to ensure that the correct fence register
2417          * setup is in place.
2418          */
2419         i915_gem_release_mmap(obj);
2420
2421         /* On the i915, GPU access to tiled buffers is via a fence,
2422          * therefore we must wait for any outstanding access to complete
2423          * before clearing the fence.
2424          */
2425         if (!IS_I965G(dev)) {
2426                 int ret;
2427
2428                 ret = i915_gem_object_flush_gpu_write_domain(obj);
2429                 if (ret != 0)
2430                         return ret;
2431
2432                 ret = i915_gem_object_wait_rendering(obj);
2433                 if (ret != 0)
2434                         return ret;
2435         }
2436
2437         i915_gem_object_flush_gtt_write_domain(obj);
2438         i915_gem_clear_fence_reg (obj);
2439
2440         return 0;
2441 }
2442
2443 /**
2444  * Finds free space in the GTT aperture and binds the object there.
2445  */
2446 static int
2447 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2448 {
2449         struct drm_device *dev = obj->dev;
2450         drm_i915_private_t *dev_priv = dev->dev_private;
2451         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2452         struct drm_mm_node *free_space;
2453         gfp_t gfpmask =  __GFP_NORETRY | __GFP_NOWARN;
2454         int ret;
2455
2456         if (obj_priv->madv != I915_MADV_WILLNEED) {
2457                 DRM_ERROR("Attempting to bind a purgeable object\n");
2458                 return -EINVAL;
2459         }
2460
2461         if (alignment == 0)
2462                 alignment = i915_gem_get_gtt_alignment(obj);
2463         if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
2464                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2465                 return -EINVAL;
2466         }
2467
2468         /* If the object is bigger than the entire aperture, reject it early
2469          * before evicting everything in a vain attempt to find space.
2470          */
2471         if (obj->size > dev->gtt_total) {
2472                 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2473                 return -E2BIG;
2474         }
2475
2476  search_free:
2477         free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2478                                         obj->size, alignment, 0);
2479         if (free_space != NULL) {
2480                 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2481                                                        alignment);
2482                 if (obj_priv->gtt_space != NULL)
2483                         obj_priv->gtt_offset = obj_priv->gtt_space->start;
2484         }
2485         if (obj_priv->gtt_space == NULL) {
2486                 /* If the gtt is empty and we're still having trouble
2487                  * fitting our object in, we're out of memory.
2488                  */
2489 #if WATCH_LRU
2490                 DRM_INFO("%s: GTT full, evicting something\n", __func__);
2491 #endif
2492                 ret = i915_gem_evict_something(dev, obj->size, alignment);
2493                 if (ret)
2494                         return ret;
2495
2496                 goto search_free;
2497         }
2498
2499 #if WATCH_BUF
2500         DRM_INFO("Binding object of size %zd at 0x%08x\n",
2501                  obj->size, obj_priv->gtt_offset);
2502 #endif
2503         ret = i915_gem_object_get_pages(obj, gfpmask);
2504         if (ret) {
2505                 drm_mm_put_block(obj_priv->gtt_space);
2506                 obj_priv->gtt_space = NULL;
2507
2508                 if (ret == -ENOMEM) {
2509                         /* first try to clear up some space from the GTT */
2510                         ret = i915_gem_evict_something(dev, obj->size,
2511                                                        alignment);
2512                         if (ret) {
2513                                 /* now try to shrink everyone else */
2514                                 if (gfpmask) {
2515                                         gfpmask = 0;
2516                                         goto search_free;
2517                                 }
2518
2519                                 return ret;
2520                         }
2521
2522                         goto search_free;
2523                 }
2524
2525                 return ret;
2526         }
2527
2528         /* Create an AGP memory structure pointing at our pages, and bind it
2529          * into the GTT.
2530          */
2531         obj_priv->agp_mem = drm_agp_bind_pages(dev,
2532                                                obj_priv->pages,
2533                                                obj->size >> PAGE_SHIFT,
2534                                                obj_priv->gtt_offset,
2535                                                obj_priv->agp_type);
2536         if (obj_priv->agp_mem == NULL) {
2537                 i915_gem_object_put_pages(obj);
2538                 drm_mm_put_block(obj_priv->gtt_space);
2539                 obj_priv->gtt_space = NULL;
2540
2541                 ret = i915_gem_evict_something(dev, obj->size, alignment);
2542                 if (ret)
2543                         return ret;
2544
2545                 goto search_free;
2546         }
2547         atomic_inc(&dev->gtt_count);
2548         atomic_add(obj->size, &dev->gtt_memory);
2549
2550         /* keep track of bounds object by adding it to the inactive list */
2551         list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
2552
2553         /* Assert that the object is not currently in any GPU domain. As it
2554          * wasn't in the GTT, there shouldn't be any way it could have been in
2555          * a GPU cache
2556          */
2557         BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2558         BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2559
2560         trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
2561
2562         return 0;
2563 }
2564
2565 void
2566 i915_gem_clflush_object(struct drm_gem_object *obj)
2567 {
2568         struct drm_i915_gem_object      *obj_priv = to_intel_bo(obj);
2569
2570         /* If we don't have a page list set up, then we're not pinned
2571          * to GPU, and we can ignore the cache flush because it'll happen
2572          * again at bind time.
2573          */
2574         if (obj_priv->pages == NULL)
2575                 return;
2576
2577         trace_i915_gem_object_clflush(obj);
2578
2579         drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2580 }
2581
2582 /** Flushes any GPU write domain for the object if it's dirty. */
2583 static int
2584 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2585 {
2586         struct drm_device *dev = obj->dev;
2587         uint32_t old_write_domain;
2588         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2589
2590         if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2591                 return 0;
2592
2593         /* Queue the GPU write cache flushing we need. */
2594         old_write_domain = obj->write_domain;
2595         i915_gem_flush(dev, 0, obj->write_domain);
2596         if (i915_add_request(dev, NULL, obj->write_domain, obj_priv->ring) == 0)
2597                 return -ENOMEM;
2598
2599         trace_i915_gem_object_change_domain(obj,
2600                                             obj->read_domains,
2601                                             old_write_domain);
2602         return 0;
2603 }
2604
2605 /** Flushes the GTT write domain for the object if it's dirty. */
2606 static void
2607 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2608 {
2609         uint32_t old_write_domain;
2610
2611         if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2612                 return;
2613
2614         /* No actual flushing is required for the GTT write domain.   Writes
2615          * to it immediately go to main memory as far as we know, so there's
2616          * no chipset flush.  It also doesn't land in render cache.
2617          */
2618         old_write_domain = obj->write_domain;
2619         obj->write_domain = 0;
2620
2621         trace_i915_gem_object_change_domain(obj,
2622                                             obj->read_domains,
2623                                             old_write_domain);
2624 }
2625
2626 /** Flushes the CPU write domain for the object if it's dirty. */
2627 static void
2628 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2629 {
2630         struct drm_device *dev = obj->dev;
2631         uint32_t old_write_domain;
2632
2633         if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2634                 return;
2635
2636         i915_gem_clflush_object(obj);
2637         drm_agp_chipset_flush(dev);
2638         old_write_domain = obj->write_domain;
2639         obj->write_domain = 0;
2640
2641         trace_i915_gem_object_change_domain(obj,
2642                                             obj->read_domains,
2643                                             old_write_domain);
2644 }
2645
2646 int
2647 i915_gem_object_flush_write_domain(struct drm_gem_object *obj)
2648 {
2649         int ret = 0;
2650
2651         switch (obj->write_domain) {
2652         case I915_GEM_DOMAIN_GTT:
2653                 i915_gem_object_flush_gtt_write_domain(obj);
2654                 break;
2655         case I915_GEM_DOMAIN_CPU:
2656                 i915_gem_object_flush_cpu_write_domain(obj);
2657                 break;
2658         default:
2659                 ret = i915_gem_object_flush_gpu_write_domain(obj);
2660                 break;
2661         }
2662
2663         return ret;
2664 }
2665
2666 /**
2667  * Moves a single object to the GTT read, and possibly write domain.
2668  *
2669  * This function returns when the move is complete, including waiting on
2670  * flushes to occur.
2671  */
2672 int
2673 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2674 {
2675         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2676         uint32_t old_write_domain, old_read_domains;
2677         int ret;
2678
2679         /* Not valid to be called on unbound objects. */
2680         if (obj_priv->gtt_space == NULL)
2681                 return -EINVAL;
2682
2683         ret = i915_gem_object_flush_gpu_write_domain(obj);
2684         if (ret != 0)
2685                 return ret;
2686
2687         /* Wait on any GPU rendering and flushing to occur. */
2688         ret = i915_gem_object_wait_rendering(obj);
2689         if (ret != 0)
2690                 return ret;
2691
2692         old_write_domain = obj->write_domain;
2693         old_read_domains = obj->read_domains;
2694
2695         /* If we're writing through the GTT domain, then CPU and GPU caches
2696          * will need to be invalidated at next use.
2697          */
2698         if (write)
2699                 obj->read_domains &= I915_GEM_DOMAIN_GTT;
2700
2701         i915_gem_object_flush_cpu_write_domain(obj);
2702
2703         /* It should now be out of any other write domains, and we can update
2704          * the domain values for our changes.
2705          */
2706         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2707         obj->read_domains |= I915_GEM_DOMAIN_GTT;
2708         if (write) {
2709                 obj->write_domain = I915_GEM_DOMAIN_GTT;
2710                 obj_priv->dirty = 1;
2711         }
2712
2713         trace_i915_gem_object_change_domain(obj,
2714                                             old_read_domains,
2715                                             old_write_domain);
2716
2717         return 0;
2718 }
2719
2720 /*
2721  * Prepare buffer for display plane. Use uninterruptible for possible flush
2722  * wait, as in modesetting process we're not supposed to be interrupted.
2723  */
2724 int
2725 i915_gem_object_set_to_display_plane(struct drm_gem_object *obj)
2726 {
2727         struct drm_device *dev = obj->dev;
2728         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2729         uint32_t old_write_domain, old_read_domains;
2730         int ret;
2731
2732         /* Not valid to be called on unbound objects. */
2733         if (obj_priv->gtt_space == NULL)
2734                 return -EINVAL;
2735
2736         ret = i915_gem_object_flush_gpu_write_domain(obj);
2737         if (ret)
2738                 return ret;
2739
2740         /* Wait on any GPU rendering and flushing to occur. */
2741         if (obj_priv->active) {
2742 #if WATCH_BUF
2743                 DRM_INFO("%s: object %p wait for seqno %08x\n",
2744                           __func__, obj, obj_priv->last_rendering_seqno);
2745 #endif
2746                 ret = i915_do_wait_request(dev,
2747                                 obj_priv->last_rendering_seqno,
2748                                 0,
2749                                 obj_priv->ring);
2750                 if (ret != 0)
2751                         return ret;
2752         }
2753
2754         i915_gem_object_flush_cpu_write_domain(obj);
2755
2756         old_write_domain = obj->write_domain;
2757         old_read_domains = obj->read_domains;
2758
2759         /* It should now be out of any other write domains, and we can update
2760          * the domain values for our changes.
2761          */
2762         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2763         obj->read_domains = I915_GEM_DOMAIN_GTT;
2764         obj->write_domain = I915_GEM_DOMAIN_GTT;
2765         obj_priv->dirty = 1;
2766
2767         trace_i915_gem_object_change_domain(obj,
2768                                             old_read_domains,
2769                                             old_write_domain);
2770
2771         return 0;
2772 }
2773
2774 /**
2775  * Moves a single object to the CPU read, and possibly write domain.
2776  *
2777  * This function returns when the move is complete, including waiting on
2778  * flushes to occur.
2779  */
2780 static int
2781 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2782 {
2783         uint32_t old_write_domain, old_read_domains;
2784         int ret;
2785
2786         ret = i915_gem_object_flush_gpu_write_domain(obj);
2787         if (ret)
2788                 return ret;
2789
2790         /* Wait on any GPU rendering and flushing to occur. */
2791         ret = i915_gem_object_wait_rendering(obj);
2792         if (ret != 0)
2793                 return ret;
2794
2795         i915_gem_object_flush_gtt_write_domain(obj);
2796
2797         /* If we have a partially-valid cache of the object in the CPU,
2798          * finish invalidating it and free the per-page flags.
2799          */
2800         i915_gem_object_set_to_full_cpu_read_domain(obj);
2801
2802         old_write_domain = obj->write_domain;
2803         old_read_domains = obj->read_domains;
2804
2805         /* Flush the CPU cache if it's still invalid. */
2806         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2807                 i915_gem_clflush_object(obj);
2808
2809                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2810         }
2811
2812         /* It should now be out of any other write domains, and we can update
2813          * the domain values for our changes.
2814          */
2815         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2816
2817         /* If we're writing through the CPU, then the GPU read domains will
2818          * need to be invalidated at next use.
2819          */
2820         if (write) {
2821                 obj->read_domains &= I915_GEM_DOMAIN_CPU;
2822                 obj->write_domain = I915_GEM_DOMAIN_CPU;
2823         }
2824
2825         trace_i915_gem_object_change_domain(obj,
2826                                             old_read_domains,
2827                                             old_write_domain);
2828
2829         return 0;
2830 }
2831
2832 /*
2833  * Set the next domain for the specified object. This
2834  * may not actually perform the necessary flushing/invaliding though,
2835  * as that may want to be batched with other set_domain operations
2836  *
2837  * This is (we hope) the only really tricky part of gem. The goal
2838  * is fairly simple -- track which caches hold bits of the object
2839  * and make sure they remain coherent. A few concrete examples may
2840  * help to explain how it works. For shorthand, we use the notation
2841  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
2842  * a pair of read and write domain masks.
2843  *
2844  * Case 1: the batch buffer
2845  *
2846  *      1. Allocated
2847  *      2. Written by CPU
2848  *      3. Mapped to GTT
2849  *      4. Read by GPU
2850  *      5. Unmapped from GTT
2851  *      6. Freed
2852  *
2853  *      Let's take these a step at a time
2854  *
2855  *      1. Allocated
2856  *              Pages allocated from the kernel may still have
2857  *              cache contents, so we set them to (CPU, CPU) always.
2858  *      2. Written by CPU (using pwrite)
2859  *              The pwrite function calls set_domain (CPU, CPU) and
2860  *              this function does nothing (as nothing changes)
2861  *      3. Mapped by GTT
2862  *              This function asserts that the object is not
2863  *              currently in any GPU-based read or write domains
2864  *      4. Read by GPU
2865  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
2866  *              As write_domain is zero, this function adds in the
2867  *              current read domains (CPU+COMMAND, 0).
2868  *              flush_domains is set to CPU.
2869  *              invalidate_domains is set to COMMAND
2870  *              clflush is run to get data out of the CPU caches
2871  *              then i915_dev_set_domain calls i915_gem_flush to
2872  *              emit an MI_FLUSH and drm_agp_chipset_flush
2873  *      5. Unmapped from GTT
2874  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
2875  *              flush_domains and invalidate_domains end up both zero
2876  *              so no flushing/invalidating happens
2877  *      6. Freed
2878  *              yay, done
2879  *
2880  * Case 2: The shared render buffer
2881  *
2882  *      1. Allocated
2883  *      2. Mapped to GTT
2884  *      3. Read/written by GPU
2885  *      4. set_domain to (CPU,CPU)
2886  *      5. Read/written by CPU
2887  *      6. Read/written by GPU
2888  *
2889  *      1. Allocated
2890  *              Same as last example, (CPU, CPU)
2891  *      2. Mapped to GTT
2892  *              Nothing changes (assertions find that it is not in the GPU)
2893  *      3. Read/written by GPU
2894  *              execbuffer calls set_domain (RENDER, RENDER)
2895  *              flush_domains gets CPU
2896  *              invalidate_domains gets GPU
2897  *              clflush (obj)
2898  *              MI_FLUSH and drm_agp_chipset_flush
2899  *      4. set_domain (CPU, CPU)
2900  *              flush_domains gets GPU
2901  *              invalidate_domains gets CPU
2902  *              wait_rendering (obj) to make sure all drawing is complete.
2903  *              This will include an MI_FLUSH to get the data from GPU
2904  *              to memory
2905  *              clflush (obj) to invalidate the CPU cache
2906  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
2907  *      5. Read/written by CPU
2908  *              cache lines are loaded and dirtied
2909  *      6. Read written by GPU
2910  *              Same as last GPU access
2911  *
2912  * Case 3: The constant buffer
2913  *
2914  *      1. Allocated
2915  *      2. Written by CPU
2916  *      3. Read by GPU
2917  *      4. Updated (written) by CPU again
2918  *      5. Read by GPU
2919  *
2920  *      1. Allocated
2921  *              (CPU, CPU)
2922  *      2. Written by CPU
2923  *              (CPU, CPU)
2924  *      3. Read by GPU
2925  *              (CPU+RENDER, 0)
2926  *              flush_domains = CPU
2927  *              invalidate_domains = RENDER
2928  *              clflush (obj)
2929  *              MI_FLUSH
2930  *              drm_agp_chipset_flush
2931  *      4. Updated (written) by CPU again
2932  *              (CPU, CPU)
2933  *              flush_domains = 0 (no previous write domain)
2934  *              invalidate_domains = 0 (no new read domains)
2935  *      5. Read by GPU
2936  *              (CPU+RENDER, 0)
2937  *              flush_domains = CPU
2938  *              invalidate_domains = RENDER
2939  *              clflush (obj)
2940  *              MI_FLUSH
2941  *              drm_agp_chipset_flush
2942  */
2943 static void
2944 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
2945 {
2946         struct drm_device               *dev = obj->dev;
2947         drm_i915_private_t              *dev_priv = dev->dev_private;
2948         struct drm_i915_gem_object      *obj_priv = to_intel_bo(obj);
2949         uint32_t                        invalidate_domains = 0;
2950         uint32_t                        flush_domains = 0;
2951         uint32_t                        old_read_domains;
2952
2953         BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
2954         BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
2955
2956         intel_mark_busy(dev, obj);
2957
2958 #if WATCH_BUF
2959         DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
2960                  __func__, obj,
2961                  obj->read_domains, obj->pending_read_domains,
2962                  obj->write_domain, obj->pending_write_domain);
2963 #endif
2964         /*
2965          * If the object isn't moving to a new write domain,
2966          * let the object stay in multiple read domains
2967          */
2968         if (obj->pending_write_domain == 0)
2969                 obj->pending_read_domains |= obj->read_domains;
2970         else
2971                 obj_priv->dirty = 1;
2972
2973         /*
2974          * Flush the current write domain if
2975          * the new read domains don't match. Invalidate
2976          * any read domains which differ from the old
2977          * write domain
2978          */
2979         if (obj->write_domain &&
2980             obj->write_domain != obj->pending_read_domains) {
2981                 flush_domains |= obj->write_domain;
2982                 invalidate_domains |=
2983                         obj->pending_read_domains & ~obj->write_domain;
2984         }
2985         /*
2986          * Invalidate any read caches which may have
2987          * stale data. That is, any new read domains.
2988          */
2989         invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
2990         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
2991 #if WATCH_BUF
2992                 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
2993                          __func__, flush_domains, invalidate_domains);
2994 #endif
2995                 i915_gem_clflush_object(obj);
2996         }
2997
2998         old_read_domains = obj->read_domains;
2999
3000         /* The actual obj->write_domain will be updated with
3001          * pending_write_domain after we emit the accumulated flush for all
3002          * of our domain changes in execbuffers (which clears objects'
3003          * write_domains).  So if we have a current write domain that we
3004          * aren't changing, set pending_write_domain to that.
3005          */
3006         if (flush_domains == 0 && obj->pending_write_domain == 0)
3007                 obj->pending_write_domain = obj->write_domain;
3008         obj->read_domains = obj->pending_read_domains;
3009
3010         if (flush_domains & I915_GEM_GPU_DOMAINS) {
3011                 if (obj_priv->ring == &dev_priv->render_ring)
3012                         dev_priv->flush_rings |= FLUSH_RENDER_RING;
3013                 else if (obj_priv->ring == &dev_priv->bsd_ring)
3014                         dev_priv->flush_rings |= FLUSH_BSD_RING;
3015         }
3016
3017         dev->invalidate_domains |= invalidate_domains;
3018         dev->flush_domains |= flush_domains;
3019 #if WATCH_BUF
3020         DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
3021                  __func__,
3022                  obj->read_domains, obj->write_domain,
3023                  dev->invalidate_domains, dev->flush_domains);
3024 #endif
3025
3026         trace_i915_gem_object_change_domain(obj,
3027                                             old_read_domains,
3028                                             obj->write_domain);
3029 }
3030
3031 /**
3032  * Moves the object from a partially CPU read to a full one.
3033  *
3034  * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3035  * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3036  */
3037 static void
3038 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
3039 {
3040         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3041
3042         if (!obj_priv->page_cpu_valid)
3043                 return;
3044
3045         /* If we're partially in the CPU read domain, finish moving it in.
3046          */
3047         if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
3048                 int i;
3049
3050                 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
3051                         if (obj_priv->page_cpu_valid[i])
3052                                 continue;
3053                         drm_clflush_pages(obj_priv->pages + i, 1);
3054                 }
3055         }
3056
3057         /* Free the page_cpu_valid mappings which are now stale, whether
3058          * or not we've got I915_GEM_DOMAIN_CPU.
3059          */
3060         kfree(obj_priv->page_cpu_valid);
3061         obj_priv->page_cpu_valid = NULL;
3062 }
3063
3064 /**
3065  * Set the CPU read domain on a range of the object.
3066  *
3067  * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3068  * not entirely valid.  The page_cpu_valid member of the object flags which
3069  * pages have been flushed, and will be respected by
3070  * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3071  * of the whole object.
3072  *
3073  * This function returns when the move is complete, including waiting on
3074  * flushes to occur.
3075  */
3076 static int
3077 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3078                                           uint64_t offset, uint64_t size)
3079 {
3080         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3081         uint32_t old_read_domains;
3082         int i, ret;
3083
3084         if (offset == 0 && size == obj->size)
3085                 return i915_gem_object_set_to_cpu_domain(obj, 0);
3086
3087         ret = i915_gem_object_flush_gpu_write_domain(obj);
3088         if (ret)
3089                 return ret;
3090
3091         /* Wait on any GPU rendering and flushing to occur. */
3092         ret = i915_gem_object_wait_rendering(obj);
3093         if (ret != 0)
3094                 return ret;
3095         i915_gem_object_flush_gtt_write_domain(obj);
3096
3097         /* If we're already fully in the CPU read domain, we're done. */
3098         if (obj_priv->page_cpu_valid == NULL &&
3099             (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
3100                 return 0;
3101
3102         /* Otherwise, create/clear the per-page CPU read domain flag if we're
3103          * newly adding I915_GEM_DOMAIN_CPU
3104          */
3105         if (obj_priv->page_cpu_valid == NULL) {
3106                 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
3107                                                    GFP_KERNEL);
3108                 if (obj_priv->page_cpu_valid == NULL)
3109                         return -ENOMEM;
3110         } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
3111                 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
3112
3113         /* Flush the cache on any pages that are still invalid from the CPU's
3114          * perspective.
3115          */
3116         for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3117              i++) {
3118                 if (obj_priv->page_cpu_valid[i])
3119                         continue;
3120
3121                 drm_clflush_pages(obj_priv->pages + i, 1);
3122
3123                 obj_priv->page_cpu_valid[i] = 1;
3124         }
3125
3126         /* It should now be out of any other write domains, and we can update
3127          * the domain values for our changes.
3128          */
3129         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3130
3131         old_read_domains = obj->read_domains;
3132         obj->read_domains |= I915_GEM_DOMAIN_CPU;
3133
3134         trace_i915_gem_object_change_domain(obj,
3135                                             old_read_domains,
3136                                             obj->write_domain);
3137
3138         return 0;
3139 }
3140
3141 /**
3142  * Pin an object to the GTT and evaluate the relocations landing in it.
3143  */
3144 static int
3145 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3146                                  struct drm_file *file_priv,
3147                                  struct drm_i915_gem_exec_object2 *entry,
3148                                  struct drm_i915_gem_relocation_entry *relocs)
3149 {
3150         struct drm_device *dev = obj->dev;
3151         drm_i915_private_t *dev_priv = dev->dev_private;
3152         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3153         int i, ret;
3154         void __iomem *reloc_page;
3155         bool need_fence;
3156
3157         need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3158                      obj_priv->tiling_mode != I915_TILING_NONE;
3159
3160         /* Check fence reg constraints and rebind if necessary */
3161         if (need_fence &&
3162             !i915_gem_object_fence_offset_ok(obj,
3163                                              obj_priv->tiling_mode)) {
3164                 ret = i915_gem_object_unbind(obj);
3165                 if (ret)
3166                         return ret;
3167         }
3168
3169         /* Choose the GTT offset for our buffer and put it there. */
3170         ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
3171         if (ret)
3172                 return ret;
3173
3174         /*
3175          * Pre-965 chips need a fence register set up in order to
3176          * properly handle blits to/from tiled surfaces.
3177          */
3178         if (need_fence) {
3179                 ret = i915_gem_object_get_fence_reg(obj);
3180                 if (ret != 0) {
3181                         i915_gem_object_unpin(obj);
3182                         return ret;
3183                 }
3184         }
3185
3186         entry->offset = obj_priv->gtt_offset;
3187
3188         /* Apply the relocations, using the GTT aperture to avoid cache
3189          * flushing requirements.
3190          */
3191         for (i = 0; i < entry->relocation_count; i++) {
3192                 struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
3193                 struct drm_gem_object *target_obj;
3194                 struct drm_i915_gem_object *target_obj_priv;
3195                 uint32_t reloc_val, reloc_offset;
3196                 uint32_t __iomem *reloc_entry;
3197
3198                 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
3199                                                    reloc->target_handle);
3200                 if (target_obj == NULL) {
3201                         i915_gem_object_unpin(obj);
3202                         return -EBADF;
3203                 }
3204                 target_obj_priv = to_intel_bo(target_obj);
3205
3206 #if WATCH_RELOC
3207                 DRM_INFO("%s: obj %p offset %08x target %d "
3208                          "read %08x write %08x gtt %08x "
3209                          "presumed %08x delta %08x\n",
3210                          __func__,
3211                          obj,
3212                          (int) reloc->offset,
3213                          (int) reloc->target_handle,
3214                          (int) reloc->read_domains,
3215                          (int) reloc->write_domain,
3216                          (int) target_obj_priv->gtt_offset,
3217                          (int) reloc->presumed_offset,
3218                          reloc->delta);
3219 #endif
3220
3221                 /* The target buffer should have appeared before us in the
3222                  * exec_object list, so it should have a GTT space bound by now.
3223                  */
3224                 if (target_obj_priv->gtt_space == NULL) {
3225                         DRM_ERROR("No GTT space found for object %d\n",
3226                                   reloc->target_handle);
3227                         drm_gem_object_unreference(target_obj);
3228                         i915_gem_object_unpin(obj);
3229                         return -EINVAL;
3230                 }
3231
3232                 /* Validate that the target is in a valid r/w GPU domain */
3233                 if (reloc->write_domain & (reloc->write_domain - 1)) {
3234                         DRM_ERROR("reloc with multiple write domains: "
3235                                   "obj %p target %d offset %d "
3236                                   "read %08x write %08x",
3237                                   obj, reloc->target_handle,
3238                                   (int) reloc->offset,
3239                                   reloc->read_domains,
3240                                   reloc->write_domain);
3241                         return -EINVAL;
3242                 }
3243                 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3244                     reloc->read_domains & I915_GEM_DOMAIN_CPU) {
3245                         DRM_ERROR("reloc with read/write CPU domains: "
3246                                   "obj %p target %d offset %d "
3247                                   "read %08x write %08x",
3248                                   obj, reloc->target_handle,
3249                                   (int) reloc->offset,
3250                                   reloc->read_domains,
3251                                   reloc->write_domain);
3252                         drm_gem_object_unreference(target_obj);
3253                         i915_gem_object_unpin(obj);
3254                         return -EINVAL;
3255                 }
3256                 if (reloc->write_domain && target_obj->pending_write_domain &&
3257                     reloc->write_domain != target_obj->pending_write_domain) {
3258                         DRM_ERROR("Write domain conflict: "
3259                                   "obj %p target %d offset %d "
3260                                   "new %08x old %08x\n",
3261                                   obj, reloc->target_handle,
3262                                   (int) reloc->offset,
3263                                   reloc->write_domain,
3264                                   target_obj->pending_write_domain);
3265                         drm_gem_object_unreference(target_obj);
3266                         i915_gem_object_unpin(obj);
3267                         return -EINVAL;
3268                 }
3269
3270                 target_obj->pending_read_domains |= reloc->read_domains;
3271                 target_obj->pending_write_domain |= reloc->write_domain;
3272
3273                 /* If the relocation already has the right value in it, no
3274                  * more work needs to be done.
3275                  */
3276                 if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
3277                         drm_gem_object_unreference(target_obj);
3278                         continue;
3279                 }
3280
3281                 /* Check that the relocation address is valid... */
3282                 if (reloc->offset > obj->size - 4) {
3283                         DRM_ERROR("Relocation beyond object bounds: "
3284                                   "obj %p target %d offset %d size %d.\n",
3285                                   obj, reloc->target_handle,
3286                                   (int) reloc->offset, (int) obj->size);
3287                         drm_gem_object_unreference(target_obj);
3288                         i915_gem_object_unpin(obj);
3289                         return -EINVAL;
3290                 }
3291                 if (reloc->offset & 3) {
3292                         DRM_ERROR("Relocation not 4-byte aligned: "
3293                                   "obj %p target %d offset %d.\n",
3294                                   obj, reloc->target_handle,
3295                                   (int) reloc->offset);
3296                         drm_gem_object_unreference(target_obj);
3297                         i915_gem_object_unpin(obj);
3298                         return -EINVAL;
3299                 }
3300
3301                 /* and points to somewhere within the target object. */
3302                 if (reloc->delta >= target_obj->size) {
3303                         DRM_ERROR("Relocation beyond target object bounds: "
3304                                   "obj %p target %d delta %d size %d.\n",
3305                                   obj, reloc->target_handle,
3306                                   (int) reloc->delta, (int) target_obj->size);
3307                         drm_gem_object_unreference(target_obj);
3308                         i915_gem_object_unpin(obj);
3309                         return -EINVAL;
3310                 }
3311
3312                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3313                 if (ret != 0) {
3314                         drm_gem_object_unreference(target_obj);
3315                         i915_gem_object_unpin(obj);
3316                         return -EINVAL;
3317                 }
3318
3319                 /* Map the page containing the relocation we're going to
3320                  * perform.
3321                  */
3322                 reloc_offset = obj_priv->gtt_offset + reloc->offset;
3323                 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3324                                                       (reloc_offset &
3325                                                        ~(PAGE_SIZE - 1)),
3326                                                       KM_USER0);
3327                 reloc_entry = (uint32_t __iomem *)(reloc_page +
3328                                                    (reloc_offset & (PAGE_SIZE - 1)));
3329                 reloc_val = target_obj_priv->gtt_offset + reloc->delta;
3330
3331 #if WATCH_BUF
3332                 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
3333                           obj, (unsigned int) reloc->offset,
3334                           readl(reloc_entry), reloc_val);
3335 #endif
3336                 writel(reloc_val, reloc_entry);
3337                 io_mapping_unmap_atomic(reloc_page, KM_USER0);
3338
3339                 /* The updated presumed offset for this entry will be
3340                  * copied back out to the user.
3341                  */
3342                 reloc->presumed_offset = target_obj_priv->gtt_offset;
3343
3344                 drm_gem_object_unreference(target_obj);
3345         }
3346
3347 #if WATCH_BUF
3348         if (0)
3349                 i915_gem_dump_object(obj, 128, __func__, ~0);
3350 #endif
3351         return 0;
3352 }
3353
3354 /* Throttle our rendering by waiting until the ring has completed our requests
3355  * emitted over 20 msec ago.
3356  *
3357  * Note that if we were to use the current jiffies each time around the loop,
3358  * we wouldn't escape the function with any frames outstanding if the time to
3359  * render a frame was over 20ms.
3360  *
3361  * This should get us reasonable parallelism between CPU and GPU but also
3362  * relatively low latency when blocking on a particular request to finish.
3363  */
3364 static int
3365 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3366 {
3367         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3368         int ret = 0;
3369         unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3370
3371         mutex_lock(&dev->struct_mutex);
3372         while (!list_empty(&i915_file_priv->mm.request_list)) {
3373                 struct drm_i915_gem_request *request;
3374
3375                 request = list_first_entry(&i915_file_priv->mm.request_list,
3376                                            struct drm_i915_gem_request,
3377                                            client_list);
3378
3379                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3380                         break;
3381
3382                 ret = i915_wait_request(dev, request->seqno, request->ring);
3383                 if (ret != 0)
3384                         break;
3385         }
3386         mutex_unlock(&dev->struct_mutex);
3387
3388         return ret;
3389 }
3390
3391 static int
3392 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list,
3393                               uint32_t buffer_count,
3394                               struct drm_i915_gem_relocation_entry **relocs)
3395 {
3396         uint32_t reloc_count = 0, reloc_index = 0, i;
3397         int ret;
3398
3399         *relocs = NULL;
3400         for (i = 0; i < buffer_count; i++) {
3401                 if (reloc_count + exec_list[i].relocation_count < reloc_count)
3402                         return -EINVAL;
3403                 reloc_count += exec_list[i].relocation_count;
3404         }
3405
3406         *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
3407         if (*relocs == NULL) {
3408                 DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
3409                 return -ENOMEM;
3410         }
3411
3412         for (i = 0; i < buffer_count; i++) {
3413                 struct drm_i915_gem_relocation_entry __user *user_relocs;
3414
3415                 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3416
3417                 ret = copy_from_user(&(*relocs)[reloc_index],
3418                                      user_relocs,
3419                                      exec_list[i].relocation_count *
3420                                      sizeof(**relocs));
3421                 if (ret != 0) {
3422                         drm_free_large(*relocs);
3423                         *relocs = NULL;
3424                         return -EFAULT;
3425                 }
3426
3427                 reloc_index += exec_list[i].relocation_count;
3428         }
3429
3430         return 0;
3431 }
3432
3433 static int
3434 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
3435                             uint32_t buffer_count,
3436                             struct drm_i915_gem_relocation_entry *relocs)
3437 {
3438         uint32_t reloc_count = 0, i;
3439         int ret = 0;
3440
3441         if (relocs == NULL)
3442             return 0;
3443
3444         for (i = 0; i < buffer_count; i++) {
3445                 struct drm_i915_gem_relocation_entry __user *user_relocs;
3446                 int unwritten;
3447
3448                 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3449
3450                 unwritten = copy_to_user(user_relocs,
3451                                          &relocs[reloc_count],
3452                                          exec_list[i].relocation_count *
3453                                          sizeof(*relocs));
3454
3455                 if (unwritten) {
3456                         ret = -EFAULT;
3457                         goto err;
3458                 }
3459
3460                 reloc_count += exec_list[i].relocation_count;
3461         }
3462
3463 err:
3464         drm_free_large(relocs);
3465
3466         return ret;
3467 }
3468
3469 static int
3470 i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
3471                            uint64_t exec_offset)
3472 {
3473         uint32_t exec_start, exec_len;
3474
3475         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3476         exec_len = (uint32_t) exec->batch_len;
3477
3478         if ((exec_start | exec_len) & 0x7)
3479                 return -EINVAL;
3480
3481         if (!exec_start)
3482                 return -EINVAL;
3483
3484         return 0;
3485 }
3486
3487 static int
3488 i915_gem_wait_for_pending_flip(struct drm_device *dev,
3489                                struct drm_gem_object **object_list,
3490                                int count)
3491 {
3492         drm_i915_private_t *dev_priv = dev->dev_private;
3493         struct drm_i915_gem_object *obj_priv;
3494         DEFINE_WAIT(wait);
3495         int i, ret = 0;
3496
3497         for (;;) {
3498                 prepare_to_wait(&dev_priv->pending_flip_queue,
3499                                 &wait, TASK_INTERRUPTIBLE);
3500                 for (i = 0; i < count; i++) {
3501                         obj_priv = to_intel_bo(object_list[i]);
3502                         if (atomic_read(&obj_priv->pending_flip) > 0)
3503                                 break;
3504                 }
3505                 if (i == count)
3506                         break;
3507
3508                 if (!signal_pending(current)) {
3509                         mutex_unlock(&dev->struct_mutex);
3510                         schedule();
3511                         mutex_lock(&dev->struct_mutex);
3512                         continue;
3513                 }
3514                 ret = -ERESTARTSYS;
3515                 break;
3516         }
3517         finish_wait(&dev_priv->pending_flip_queue, &wait);
3518
3519         return ret;
3520 }
3521
3522
3523 int
3524 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3525                        struct drm_file *file_priv,
3526                        struct drm_i915_gem_execbuffer2 *args,
3527                        struct drm_i915_gem_exec_object2 *exec_list)
3528 {
3529         drm_i915_private_t *dev_priv = dev->dev_private;
3530         struct drm_gem_object **object_list = NULL;
3531         struct drm_gem_object *batch_obj;
3532         struct drm_i915_gem_object *obj_priv;
3533         struct drm_clip_rect *cliprects = NULL;
3534         struct drm_i915_gem_relocation_entry *relocs = NULL;
3535         int ret = 0, ret2, i, pinned = 0;
3536         uint64_t exec_offset;
3537         uint32_t seqno, flush_domains, reloc_index;
3538         int pin_tries, flips;
3539
3540         struct intel_ring_buffer *ring = NULL;
3541
3542 #if WATCH_EXEC
3543         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3544                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3545 #endif
3546         if (args->flags & I915_EXEC_BSD) {
3547                 if (!HAS_BSD(dev)) {
3548                         DRM_ERROR("execbuf with wrong flag\n");
3549                         return -EINVAL;
3550                 }
3551                 ring = &dev_priv->bsd_ring;
3552         } else {
3553                 ring = &dev_priv->render_ring;
3554         }
3555
3556         if (args->buffer_count < 1) {
3557                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3558                 return -EINVAL;
3559         }
3560         object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
3561         if (object_list == NULL) {
3562                 DRM_ERROR("Failed to allocate object list for %d buffers\n",
3563                           args->buffer_count);
3564                 ret = -ENOMEM;
3565                 goto pre_mutex_err;
3566         }
3567
3568         if (args->num_cliprects != 0) {
3569                 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3570                                     GFP_KERNEL);
3571                 if (cliprects == NULL) {
3572                         ret = -ENOMEM;
3573                         goto pre_mutex_err;
3574                 }
3575
3576                 ret = copy_from_user(cliprects,
3577                                      (struct drm_clip_rect __user *)
3578                                      (uintptr_t) args->cliprects_ptr,
3579                                      sizeof(*cliprects) * args->num_cliprects);
3580                 if (ret != 0) {
3581                         DRM_ERROR("copy %d cliprects failed: %d\n",
3582                                   args->num_cliprects, ret);
3583                         goto pre_mutex_err;
3584                 }
3585         }
3586
3587         ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3588                                             &relocs);
3589         if (ret != 0)
3590                 goto pre_mutex_err;
3591
3592         mutex_lock(&dev->struct_mutex);
3593
3594         i915_verify_inactive(dev, __FILE__, __LINE__);
3595
3596         if (atomic_read(&dev_priv->mm.wedged)) {
3597                 mutex_unlock(&dev->struct_mutex);
3598                 ret = -EIO;
3599                 goto pre_mutex_err;
3600         }
3601
3602         if (dev_priv->mm.suspended) {
3603                 mutex_unlock(&dev->struct_mutex);
3604                 ret = -EBUSY;
3605                 goto pre_mutex_err;
3606         }
3607
3608         /* Look up object handles */
3609         flips = 0;
3610         for (i = 0; i < args->buffer_count; i++) {
3611                 object_list[i] = drm_gem_object_lookup(dev, file_priv,
3612                                                        exec_list[i].handle);
3613                 if (object_list[i] == NULL) {
3614                         DRM_ERROR("Invalid object handle %d at index %d\n",
3615                                    exec_list[i].handle, i);
3616                         /* prevent error path from reading uninitialized data */
3617                         args->buffer_count = i + 1;
3618                         ret = -EBADF;
3619                         goto err;
3620                 }
3621
3622                 obj_priv = to_intel_bo(object_list[i]);
3623                 if (obj_priv->in_execbuffer) {
3624                         DRM_ERROR("Object %p appears more than once in object list\n",
3625                                    object_list[i]);
3626                         /* prevent error path from reading uninitialized data */
3627                         args->buffer_count = i + 1;
3628                         ret = -EBADF;
3629                         goto err;
3630                 }
3631                 obj_priv->in_execbuffer = true;
3632                 flips += atomic_read(&obj_priv->pending_flip);
3633         }
3634
3635         if (flips > 0) {
3636                 ret = i915_gem_wait_for_pending_flip(dev, object_list,
3637                                                      args->buffer_count);
3638                 if (ret)
3639                         goto err;
3640         }
3641
3642         /* Pin and relocate */
3643         for (pin_tries = 0; ; pin_tries++) {
3644                 ret = 0;
3645                 reloc_index = 0;
3646
3647                 for (i = 0; i < args->buffer_count; i++) {
3648                         object_list[i]->pending_read_domains = 0;
3649                         object_list[i]->pending_write_domain = 0;
3650                         ret = i915_gem_object_pin_and_relocate(object_list[i],
3651                                                                file_priv,
3652                                                                &exec_list[i],
3653                                                                &relocs[reloc_index]);
3654                         if (ret)
3655                                 break;
3656                         pinned = i + 1;
3657                         reloc_index += exec_list[i].relocation_count;
3658                 }
3659                 /* success */
3660                 if (ret == 0)
3661                         break;
3662
3663                 /* error other than GTT full, or we've already tried again */
3664                 if (ret != -ENOSPC || pin_tries >= 1) {
3665                         if (ret != -ERESTARTSYS) {
3666                                 unsigned long long total_size = 0;
3667                                 int num_fences = 0;
3668                                 for (i = 0; i < args->buffer_count; i++) {
3669                                         obj_priv = to_intel_bo(object_list[i]);
3670
3671                                         total_size += object_list[i]->size;
3672                                         num_fences +=
3673                                                 exec_list[i].flags & EXEC_OBJECT_NEEDS_FENCE &&
3674                                                 obj_priv->tiling_mode != I915_TILING_NONE;
3675                                 }
3676                                 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes, %d fences: %d\n",
3677                                           pinned+1, args->buffer_count,
3678                                           total_size, num_fences,
3679                                           ret);
3680                                 DRM_ERROR("%d objects [%d pinned], "
3681                                           "%d object bytes [%d pinned], "
3682                                           "%d/%d gtt bytes\n",
3683                                           atomic_read(&dev->object_count),
3684                                           atomic_read(&dev->pin_count),
3685                                           atomic_read(&dev->object_memory),
3686                                           atomic_read(&dev->pin_memory),
3687                                           atomic_read(&dev->gtt_memory),
3688                                           dev->gtt_total);
3689                         }
3690                         goto err;
3691                 }
3692
3693                 /* unpin all of our buffers */
3694                 for (i = 0; i < pinned; i++)
3695                         i915_gem_object_unpin(object_list[i]);
3696                 pinned = 0;
3697
3698                 /* evict everyone we can from the aperture */
3699                 ret = i915_gem_evict_everything(dev);
3700                 if (ret && ret != -ENOSPC)
3701                         goto err;
3702         }
3703
3704         /* Set the pending read domains for the batch buffer to COMMAND */
3705         batch_obj = object_list[args->buffer_count-1];
3706         if (batch_obj->pending_write_domain) {
3707                 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3708                 ret = -EINVAL;
3709                 goto err;
3710         }
3711         batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3712
3713         /* Sanity check the batch buffer, prior to moving objects */
3714         exec_offset = exec_list[args->buffer_count - 1].offset;
3715         ret = i915_gem_check_execbuffer (args, exec_offset);
3716         if (ret != 0) {
3717                 DRM_ERROR("execbuf with invalid offset/length\n");
3718                 goto err;
3719         }
3720
3721         i915_verify_inactive(dev, __FILE__, __LINE__);
3722
3723         /* Zero the global flush/invalidate flags. These
3724          * will be modified as new domains are computed
3725          * for each object
3726          */
3727         dev->invalidate_domains = 0;
3728         dev->flush_domains = 0;
3729         dev_priv->flush_rings = 0;
3730
3731         for (i = 0; i < args->buffer_count; i++) {
3732                 struct drm_gem_object *obj = object_list[i];
3733
3734                 /* Compute new gpu domains and update invalidate/flush */
3735                 i915_gem_object_set_to_gpu_domain(obj);
3736         }
3737
3738         i915_verify_inactive(dev, __FILE__, __LINE__);
3739
3740         if (dev->invalidate_domains | dev->flush_domains) {
3741 #if WATCH_EXEC
3742                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3743                           __func__,
3744                          dev->invalidate_domains,
3745                          dev->flush_domains);
3746 #endif
3747                 i915_gem_flush(dev,
3748                                dev->invalidate_domains,
3749                                dev->flush_domains);
3750                 if (dev_priv->flush_rings & FLUSH_RENDER_RING)
3751                         (void)i915_add_request(dev, file_priv,
3752                                                dev->flush_domains,
3753                                                &dev_priv->render_ring);
3754                 if (dev_priv->flush_rings & FLUSH_BSD_RING)
3755                         (void)i915_add_request(dev, file_priv,
3756                                                dev->flush_domains,
3757                                                &dev_priv->bsd_ring);
3758         }
3759
3760         for (i = 0; i < args->buffer_count; i++) {
3761                 struct drm_gem_object *obj = object_list[i];
3762                 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3763                 uint32_t old_write_domain = obj->write_domain;
3764
3765                 obj->write_domain = obj->pending_write_domain;
3766                 if (obj->write_domain)
3767                         list_move_tail(&obj_priv->gpu_write_list,
3768                                        &dev_priv->mm.gpu_write_list);
3769                 else
3770                         list_del_init(&obj_priv->gpu_write_list);
3771
3772                 trace_i915_gem_object_change_domain(obj,
3773                                                     obj->read_domains,
3774                                                     old_write_domain);
3775         }
3776
3777         i915_verify_inactive(dev, __FILE__, __LINE__);
3778
3779 #if WATCH_COHERENCY
3780         for (i = 0; i < args->buffer_count; i++) {
3781                 i915_gem_object_check_coherency(object_list[i],
3782                                                 exec_list[i].handle);
3783         }
3784 #endif
3785
3786 #if WATCH_EXEC
3787         i915_gem_dump_object(batch_obj,
3788                               args->batch_len,
3789                               __func__,
3790                               ~0);
3791 #endif
3792
3793         /* Exec the batchbuffer */
3794         ret = ring->dispatch_gem_execbuffer(dev, ring, args,
3795                         cliprects, exec_offset);
3796         if (ret) {
3797                 DRM_ERROR("dispatch failed %d\n", ret);
3798                 goto err;
3799         }
3800
3801         /*
3802          * Ensure that the commands in the batch buffer are
3803          * finished before the interrupt fires
3804          */
3805         flush_domains = i915_retire_commands(dev, ring);
3806
3807         i915_verify_inactive(dev, __FILE__, __LINE__);
3808
3809         /*
3810          * Get a seqno representing the execution of the current buffer,
3811          * which we can wait on.  We would like to mitigate these interrupts,
3812          * likely by only creating seqnos occasionally (so that we have
3813          * *some* interrupts representing completion of buffers that we can
3814          * wait on when trying to clear up gtt space).
3815          */
3816         seqno = i915_add_request(dev, file_priv, flush_domains, ring);
3817         BUG_ON(seqno == 0);
3818         for (i = 0; i < args->buffer_count; i++) {
3819                 struct drm_gem_object *obj = object_list[i];
3820                 obj_priv = to_intel_bo(obj);
3821
3822                 i915_gem_object_move_to_active(obj, seqno, ring);
3823 #if WATCH_LRU
3824                 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
3825 #endif
3826         }
3827 #if WATCH_LRU
3828         i915_dump_lru(dev, __func__);
3829 #endif
3830
3831         i915_verify_inactive(dev, __FILE__, __LINE__);
3832
3833 err:
3834         for (i = 0; i < pinned; i++)
3835                 i915_gem_object_unpin(object_list[i]);
3836
3837         for (i = 0; i < args->buffer_count; i++) {
3838                 if (object_list[i]) {
3839                         obj_priv = to_intel_bo(object_list[i]);
3840                         obj_priv->in_execbuffer = false;
3841                 }
3842                 drm_gem_object_unreference(object_list[i]);
3843         }
3844
3845         mutex_unlock(&dev->struct_mutex);
3846
3847 pre_mutex_err:
3848         /* Copy the updated relocations out regardless of current error
3849          * state.  Failure to update the relocs would mean that the next
3850          * time userland calls execbuf, it would do so with presumed offset
3851          * state that didn't match the actual object state.
3852          */
3853         ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
3854                                            relocs);
3855         if (ret2 != 0) {
3856                 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
3857
3858                 if (ret == 0)
3859                         ret = ret2;
3860         }
3861
3862         drm_free_large(object_list);
3863         kfree(cliprects);
3864
3865         return ret;
3866 }
3867
3868 /*
3869  * Legacy execbuffer just creates an exec2 list from the original exec object
3870  * list array and passes it to the real function.
3871  */
3872 int
3873 i915_gem_execbuffer(struct drm_device *dev, void *data,
3874                     struct drm_file *file_priv)
3875 {
3876         struct drm_i915_gem_execbuffer *args = data;
3877         struct drm_i915_gem_execbuffer2 exec2;
3878         struct drm_i915_gem_exec_object *exec_list = NULL;
3879         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
3880         int ret, i;
3881
3882 #if WATCH_EXEC
3883         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3884                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3885 #endif
3886
3887         if (args->buffer_count < 1) {
3888                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3889                 return -EINVAL;
3890         }
3891
3892         /* Copy in the exec list from userland */
3893         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
3894         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
3895         if (exec_list == NULL || exec2_list == NULL) {
3896                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
3897                           args->buffer_count);
3898                 drm_free_large(exec_list);
3899                 drm_free_large(exec2_list);
3900                 return -ENOMEM;
3901         }
3902         ret = copy_from_user(exec_list,
3903                              (struct drm_i915_relocation_entry __user *)
3904                              (uintptr_t) args->buffers_ptr,
3905                              sizeof(*exec_list) * args->buffer_count);
3906         if (ret != 0) {
3907                 DRM_ERROR("copy %d exec entries failed %d\n",
3908                           args->buffer_count, ret);
3909                 drm_free_large(exec_list);
3910                 drm_free_large(exec2_list);
3911                 return -EFAULT;
3912         }
3913
3914         for (i = 0; i < args->buffer_count; i++) {
3915                 exec2_list[i].handle = exec_list[i].handle;
3916                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
3917                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
3918                 exec2_list[i].alignment = exec_list[i].alignment;
3919                 exec2_list[i].offset = exec_list[i].offset;
3920                 if (!IS_I965G(dev))
3921                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
3922                 else
3923                         exec2_list[i].flags = 0;
3924         }
3925
3926         exec2.buffers_ptr = args->buffers_ptr;
3927         exec2.buffer_count = args->buffer_count;
3928         exec2.batch_start_offset = args->batch_start_offset;
3929         exec2.batch_len = args->batch_len;
3930         exec2.DR1 = args->DR1;
3931         exec2.DR4 = args->DR4;
3932         exec2.num_cliprects = args->num_cliprects;
3933         exec2.cliprects_ptr = args->cliprects_ptr;
3934         exec2.flags = I915_EXEC_RENDER;
3935
3936         ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
3937         if (!ret) {
3938                 /* Copy the new buffer offsets back to the user's exec list. */
3939                 for (i = 0; i < args->buffer_count; i++)
3940                         exec_list[i].offset = exec2_list[i].offset;
3941                 /* ... and back out to userspace */
3942                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3943                                    (uintptr_t) args->buffers_ptr,
3944                                    exec_list,
3945                                    sizeof(*exec_list) * args->buffer_count);
3946                 if (ret) {
3947                         ret = -EFAULT;
3948                         DRM_ERROR("failed to copy %d exec entries "
3949                                   "back to user (%d)\n",
3950                                   args->buffer_count, ret);
3951                 }
3952         }
3953
3954         drm_free_large(exec_list);
3955         drm_free_large(exec2_list);
3956         return ret;
3957 }
3958
3959 int
3960 i915_gem_execbuffer2(struct drm_device *dev, void *data,
3961                      struct drm_file *file_priv)
3962 {
3963         struct drm_i915_gem_execbuffer2 *args = data;
3964         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
3965         int ret;
3966
3967 #if WATCH_EXEC
3968         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3969                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3970 #endif
3971
3972         if (args->buffer_count < 1) {
3973                 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
3974                 return -EINVAL;
3975         }
3976
3977         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
3978         if (exec2_list == NULL) {
3979                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
3980                           args->buffer_count);
3981                 return -ENOMEM;
3982         }
3983         ret = copy_from_user(exec2_list,
3984                              (struct drm_i915_relocation_entry __user *)
3985                              (uintptr_t) args->buffers_ptr,
3986                              sizeof(*exec2_list) * args->buffer_count);
3987         if (ret != 0) {
3988                 DRM_ERROR("copy %d exec entries failed %d\n",
3989                           args->buffer_count, ret);
3990                 drm_free_large(exec2_list);
3991                 return -EFAULT;
3992         }
3993
3994         ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
3995         if (!ret) {
3996                 /* Copy the new buffer offsets back to the user's exec list. */
3997                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3998                                    (uintptr_t) args->buffers_ptr,
3999                                    exec2_list,
4000                                    sizeof(*exec2_list) * args->buffer_count);
4001                 if (ret) {
4002                         ret = -EFAULT;
4003                         DRM_ERROR("failed to copy %d exec entries "
4004                                   "back to user (%d)\n",
4005                                   args->buffer_count, ret);
4006                 }
4007         }
4008
4009         drm_free_large(exec2_list);
4010         return ret;
4011 }
4012
4013 int
4014 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
4015 {
4016         struct drm_device *dev = obj->dev;
4017         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4018         int ret;
4019
4020         BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4021
4022         i915_verify_inactive(dev, __FILE__, __LINE__);
4023
4024         if (obj_priv->gtt_space != NULL) {
4025                 if (alignment == 0)
4026                         alignment = i915_gem_get_gtt_alignment(obj);
4027                 if (obj_priv->gtt_offset & (alignment - 1)) {
4028                         WARN(obj_priv->pin_count,
4029                              "bo is already pinned with incorrect alignment:"
4030                              " offset=%x, req.alignment=%x\n",
4031                              obj_priv->gtt_offset, alignment);
4032                         ret = i915_gem_object_unbind(obj);
4033                         if (ret)
4034                                 return ret;
4035                 }
4036         }
4037
4038         if (obj_priv->gtt_space == NULL) {
4039                 ret = i915_gem_object_bind_to_gtt(obj, alignment);
4040                 if (ret)
4041                         return ret;
4042         }
4043
4044         obj_priv->pin_count++;
4045
4046         /* If the object is not active and not pending a flush,
4047          * remove it from the inactive list
4048          */
4049         if (obj_priv->pin_count == 1) {
4050                 atomic_inc(&dev->pin_count);
4051                 atomic_add(obj->size, &dev->pin_memory);
4052                 if (!obj_priv->active &&
4053                     (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
4054                         list_del_init(&obj_priv->list);
4055         }
4056         i915_verify_inactive(dev, __FILE__, __LINE__);
4057
4058         return 0;
4059 }
4060
4061 void
4062 i915_gem_object_unpin(struct drm_gem_object *obj)
4063 {
4064         struct drm_device *dev = obj->dev;
4065         drm_i915_private_t *dev_priv = dev->dev_private;
4066         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4067
4068         i915_verify_inactive(dev, __FILE__, __LINE__);
4069         obj_priv->pin_count--;
4070         BUG_ON(obj_priv->pin_count < 0);
4071         BUG_ON(obj_priv->gtt_space == NULL);
4072
4073         /* If the object is no longer pinned, and is
4074          * neither active nor being flushed, then stick it on
4075          * the inactive list
4076          */
4077         if (obj_priv->pin_count == 0) {
4078                 if (!obj_priv->active &&
4079                     (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
4080                         list_move_tail(&obj_priv->list,
4081                                        &dev_priv->mm.inactive_list);
4082                 atomic_dec(&dev->pin_count);
4083                 atomic_sub(obj->size, &dev->pin_memory);
4084         }
4085         i915_verify_inactive(dev, __FILE__, __LINE__);
4086 }
4087
4088 int
4089 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4090                    struct drm_file *file_priv)
4091 {
4092         struct drm_i915_gem_pin *args = data;
4093         struct drm_gem_object *obj;
4094         struct drm_i915_gem_object *obj_priv;
4095         int ret;
4096
4097         mutex_lock(&dev->struct_mutex);
4098
4099         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4100         if (obj == NULL) {
4101                 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
4102                           args->handle);
4103                 mutex_unlock(&dev->struct_mutex);
4104                 return -EBADF;
4105         }
4106         obj_priv = to_intel_bo(obj);
4107
4108         if (obj_priv->madv != I915_MADV_WILLNEED) {
4109                 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4110                 drm_gem_object_unreference(obj);
4111                 mutex_unlock(&dev->struct_mutex);
4112                 return -EINVAL;
4113         }
4114
4115         if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
4116                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4117                           args->handle);
4118                 drm_gem_object_unreference(obj);
4119                 mutex_unlock(&dev->struct_mutex);
4120                 return -EINVAL;
4121         }
4122
4123         obj_priv->user_pin_count++;
4124         obj_priv->pin_filp = file_priv;
4125         if (obj_priv->user_pin_count == 1) {
4126                 ret = i915_gem_object_pin(obj, args->alignment);
4127                 if (ret != 0) {
4128                         drm_gem_object_unreference(obj);
4129                         mutex_unlock(&dev->struct_mutex);
4130                         return ret;
4131                 }
4132         }
4133
4134         /* XXX - flush the CPU caches for pinned objects
4135          * as the X server doesn't manage domains yet
4136          */
4137         i915_gem_object_flush_cpu_write_domain(obj);
4138         args->offset = obj_priv->gtt_offset;
4139         drm_gem_object_unreference(obj);
4140         mutex_unlock(&dev->struct_mutex);
4141
4142         return 0;
4143 }
4144
4145 int
4146 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4147                      struct drm_file *file_priv)
4148 {
4149         struct drm_i915_gem_pin *args = data;
4150         struct drm_gem_object *obj;
4151         struct drm_i915_gem_object *obj_priv;
4152
4153         mutex_lock(&dev->struct_mutex);
4154
4155         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4156         if (obj == NULL) {
4157                 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
4158                           args->handle);
4159                 mutex_unlock(&dev->struct_mutex);
4160                 return -EBADF;
4161         }
4162
4163         obj_priv = to_intel_bo(obj);
4164         if (obj_priv->pin_filp != file_priv) {
4165                 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4166                           args->handle);
4167                 drm_gem_object_unreference(obj);
4168                 mutex_unlock(&dev->struct_mutex);
4169                 return -EINVAL;
4170         }
4171         obj_priv->user_pin_count--;
4172         if (obj_priv->user_pin_count == 0) {
4173                 obj_priv->pin_filp = NULL;
4174                 i915_gem_object_unpin(obj);
4175         }
4176
4177         drm_gem_object_unreference(obj);
4178         mutex_unlock(&dev->struct_mutex);
4179         return 0;
4180 }
4181
4182 int
4183 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4184                     struct drm_file *file_priv)
4185 {
4186         struct drm_i915_gem_busy *args = data;
4187         struct drm_gem_object *obj;
4188         struct drm_i915_gem_object *obj_priv;
4189
4190         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4191         if (obj == NULL) {
4192                 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
4193                           args->handle);
4194                 return -EBADF;
4195         }
4196
4197         mutex_lock(&dev->struct_mutex);
4198
4199         /* Count all active objects as busy, even if they are currently not used
4200          * by the gpu. Users of this interface expect objects to eventually
4201          * become non-busy without any further actions, therefore emit any
4202          * necessary flushes here.
4203          */
4204         obj_priv = to_intel_bo(obj);
4205         args->busy = obj_priv->active;
4206         if (args->busy) {
4207                 /* Unconditionally flush objects, even when the gpu still uses this
4208                  * object. Userspace calling this function indicates that it wants to
4209                  * use this buffer rather sooner than later, so issuing the required
4210                  * flush earlier is beneficial.
4211                  */
4212                 if (obj->write_domain) {
4213                         i915_gem_flush(dev, 0, obj->write_domain);
4214                         (void)i915_add_request(dev, file_priv, obj->write_domain, obj_priv->ring);
4215                 }
4216
4217                 /* Update the active list for the hardware's current position.
4218                  * Otherwise this only updates on a delayed timer or when irqs
4219                  * are actually unmasked, and our working set ends up being
4220                  * larger than required.
4221                  */
4222                 i915_gem_retire_requests_ring(dev, obj_priv->ring);
4223
4224                 args->busy = obj_priv->active;
4225         }
4226
4227         drm_gem_object_unreference(obj);
4228         mutex_unlock(&dev->struct_mutex);
4229         return 0;
4230 }
4231
4232 int
4233 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4234                         struct drm_file *file_priv)
4235 {
4236     return i915_gem_ring_throttle(dev, file_priv);
4237 }
4238
4239 int
4240 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4241                        struct drm_file *file_priv)
4242 {
4243         struct drm_i915_gem_madvise *args = data;
4244         struct drm_gem_object *obj;
4245         struct drm_i915_gem_object *obj_priv;
4246
4247         switch (args->madv) {
4248         case I915_MADV_DONTNEED:
4249         case I915_MADV_WILLNEED:
4250             break;
4251         default:
4252             return -EINVAL;
4253         }
4254
4255         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4256         if (obj == NULL) {
4257                 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
4258                           args->handle);
4259                 return -EBADF;
4260         }
4261
4262         mutex_lock(&dev->struct_mutex);
4263         obj_priv = to_intel_bo(obj);
4264
4265         if (obj_priv->pin_count) {
4266                 drm_gem_object_unreference(obj);
4267                 mutex_unlock(&dev->struct_mutex);
4268
4269                 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
4270                 return -EINVAL;
4271         }
4272
4273         if (obj_priv->madv != __I915_MADV_PURGED)
4274                 obj_priv->madv = args->madv;
4275
4276         /* if the object is no longer bound, discard its backing storage */
4277         if (i915_gem_object_is_purgeable(obj_priv) &&
4278             obj_priv->gtt_space == NULL)
4279                 i915_gem_object_truncate(obj);
4280
4281         args->retained = obj_priv->madv != __I915_MADV_PURGED;
4282
4283         drm_gem_object_unreference(obj);
4284         mutex_unlock(&dev->struct_mutex);
4285
4286         return 0;
4287 }
4288
4289 struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
4290                                               size_t size)
4291 {
4292         struct drm_i915_gem_object *obj;
4293
4294         obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4295         if (obj == NULL)
4296                 return NULL;
4297
4298         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4299                 kfree(obj);
4300                 return NULL;
4301         }
4302
4303         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4304         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4305
4306         obj->agp_type = AGP_USER_MEMORY;
4307         obj->base.driver_private = NULL;
4308         obj->fence_reg = I915_FENCE_REG_NONE;
4309         INIT_LIST_HEAD(&obj->list);
4310         INIT_LIST_HEAD(&obj->gpu_write_list);
4311         obj->madv = I915_MADV_WILLNEED;
4312
4313         trace_i915_gem_object_create(&obj->base);
4314
4315         return &obj->base;
4316 }
4317
4318 int i915_gem_init_object(struct drm_gem_object *obj)
4319 {
4320         BUG();
4321
4322         return 0;
4323 }
4324
4325 static void i915_gem_free_object_tail(struct drm_gem_object *obj)
4326 {
4327         struct drm_device *dev = obj->dev;
4328         drm_i915_private_t *dev_priv = dev->dev_private;
4329         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4330         int ret;
4331
4332         ret = i915_gem_object_unbind(obj);
4333         if (ret == -ERESTARTSYS) {
4334                 list_move(&obj_priv->list,
4335                           &dev_priv->mm.deferred_free_list);
4336                 return;
4337         }
4338
4339         if (obj_priv->mmap_offset)
4340                 i915_gem_free_mmap_offset(obj);
4341
4342         drm_gem_object_release(obj);
4343
4344         kfree(obj_priv->page_cpu_valid);
4345         kfree(obj_priv->bit_17);
4346         kfree(obj_priv);
4347 }
4348
4349 void i915_gem_free_object(struct drm_gem_object *obj)
4350 {
4351         struct drm_device *dev = obj->dev;
4352         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4353
4354         trace_i915_gem_object_destroy(obj);
4355
4356         while (obj_priv->pin_count > 0)
4357                 i915_gem_object_unpin(obj);
4358
4359         if (obj_priv->phys_obj)
4360                 i915_gem_detach_phys_object(dev, obj);
4361
4362         i915_gem_free_object_tail(obj);
4363 }
4364
4365 int
4366 i915_gem_idle(struct drm_device *dev)
4367 {
4368         drm_i915_private_t *dev_priv = dev->dev_private;
4369         int ret;
4370
4371         mutex_lock(&dev->struct_mutex);
4372
4373         if (dev_priv->mm.suspended ||
4374                         (dev_priv->render_ring.gem_object == NULL) ||
4375                         (HAS_BSD(dev) &&
4376                          dev_priv->bsd_ring.gem_object == NULL)) {
4377                 mutex_unlock(&dev->struct_mutex);
4378                 return 0;
4379         }
4380
4381         ret = i915_gpu_idle(dev);
4382         if (ret) {
4383                 mutex_unlock(&dev->struct_mutex);
4384                 return ret;
4385         }
4386
4387         /* Under UMS, be paranoid and evict. */
4388         if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4389                 ret = i915_gem_evict_inactive(dev);
4390                 if (ret) {
4391                         mutex_unlock(&dev->struct_mutex);
4392                         return ret;
4393                 }
4394         }
4395
4396         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
4397          * We need to replace this with a semaphore, or something.
4398          * And not confound mm.suspended!
4399          */
4400         dev_priv->mm.suspended = 1;
4401         del_timer(&dev_priv->hangcheck_timer);
4402
4403         i915_kernel_lost_context(dev);
4404         i915_gem_cleanup_ringbuffer(dev);
4405
4406         mutex_unlock(&dev->struct_mutex);
4407
4408         /* Cancel the retire work handler, which should be idle now. */
4409         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4410
4411         return 0;
4412 }
4413
4414 /*
4415  * 965+ support PIPE_CONTROL commands, which provide finer grained control
4416  * over cache flushing.
4417  */
4418 static int
4419 i915_gem_init_pipe_control(struct drm_device *dev)
4420 {
4421         drm_i915_private_t *dev_priv = dev->dev_private;
4422         struct drm_gem_object *obj;
4423         struct drm_i915_gem_object *obj_priv;
4424         int ret;
4425
4426         obj = i915_gem_alloc_object(dev, 4096);
4427         if (obj == NULL) {
4428                 DRM_ERROR("Failed to allocate seqno page\n");
4429                 ret = -ENOMEM;
4430                 goto err;
4431         }
4432         obj_priv = to_intel_bo(obj);
4433         obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4434
4435         ret = i915_gem_object_pin(obj, 4096);
4436         if (ret)
4437                 goto err_unref;
4438
4439         dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
4440         dev_priv->seqno_page =  kmap(obj_priv->pages[0]);
4441         if (dev_priv->seqno_page == NULL)
4442                 goto err_unpin;
4443
4444         dev_priv->seqno_obj = obj;
4445         memset(dev_priv->seqno_page, 0, PAGE_SIZE);
4446
4447         return 0;
4448
4449 err_unpin:
4450         i915_gem_object_unpin(obj);
4451 err_unref:
4452         drm_gem_object_unreference(obj);
4453 err:
4454         return ret;
4455 }
4456
4457
4458 static void
4459 i915_gem_cleanup_pipe_control(struct drm_device *dev)
4460 {
4461         drm_i915_private_t *dev_priv = dev->dev_private;
4462         struct drm_gem_object *obj;
4463         struct drm_i915_gem_object *obj_priv;
4464
4465         obj = dev_priv->seqno_obj;
4466         obj_priv = to_intel_bo(obj);
4467         kunmap(obj_priv->pages[0]);
4468         i915_gem_object_unpin(obj);
4469         drm_gem_object_unreference(obj);
4470         dev_priv->seqno_obj = NULL;
4471
4472         dev_priv->seqno_page = NULL;
4473 }
4474
4475 int
4476 i915_gem_init_ringbuffer(struct drm_device *dev)
4477 {
4478         drm_i915_private_t *dev_priv = dev->dev_private;
4479         int ret;
4480
4481         dev_priv->render_ring = render_ring;
4482
4483         if (!I915_NEED_GFX_HWS(dev)) {
4484                 dev_priv->render_ring.status_page.page_addr
4485                         = dev_priv->status_page_dmah->vaddr;
4486                 memset(dev_priv->render_ring.status_page.page_addr,
4487                                 0, PAGE_SIZE);
4488         }
4489
4490         if (HAS_PIPE_CONTROL(dev)) {
4491                 ret = i915_gem_init_pipe_control(dev);
4492                 if (ret)
4493                         return ret;
4494         }
4495
4496         ret = intel_init_ring_buffer(dev, &dev_priv->render_ring);
4497         if (ret)
4498                 goto cleanup_pipe_control;
4499
4500         if (HAS_BSD(dev)) {
4501                 dev_priv->bsd_ring = bsd_ring;
4502                 ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring);
4503                 if (ret)
4504                         goto cleanup_render_ring;
4505         }
4506
4507         dev_priv->next_seqno = 1;
4508
4509         return 0;
4510
4511 cleanup_render_ring:
4512         intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4513 cleanup_pipe_control:
4514         if (HAS_PIPE_CONTROL(dev))
4515                 i915_gem_cleanup_pipe_control(dev);
4516         return ret;
4517 }
4518
4519 void
4520 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4521 {
4522         drm_i915_private_t *dev_priv = dev->dev_private;
4523
4524         intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4525         if (HAS_BSD(dev))
4526                 intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
4527         if (HAS_PIPE_CONTROL(dev))
4528                 i915_gem_cleanup_pipe_control(dev);
4529 }
4530
4531 int
4532 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4533                        struct drm_file *file_priv)
4534 {
4535         drm_i915_private_t *dev_priv = dev->dev_private;
4536         int ret;
4537
4538         if (drm_core_check_feature(dev, DRIVER_MODESET))
4539                 return 0;
4540
4541         if (atomic_read(&dev_priv->mm.wedged)) {
4542                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4543                 atomic_set(&dev_priv->mm.wedged, 0);
4544         }
4545
4546         mutex_lock(&dev->struct_mutex);
4547         dev_priv->mm.suspended = 0;
4548
4549         ret = i915_gem_init_ringbuffer(dev);
4550         if (ret != 0) {
4551                 mutex_unlock(&dev->struct_mutex);
4552                 return ret;
4553         }
4554
4555         spin_lock(&dev_priv->mm.active_list_lock);
4556         BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
4557         BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.active_list));
4558         spin_unlock(&dev_priv->mm.active_list_lock);
4559
4560         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4561         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4562         BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
4563         BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.request_list));
4564         mutex_unlock(&dev->struct_mutex);
4565
4566         ret = drm_irq_install(dev);
4567         if (ret)
4568                 goto cleanup_ringbuffer;
4569
4570         return 0;
4571
4572 cleanup_ringbuffer:
4573         mutex_lock(&dev->struct_mutex);
4574         i915_gem_cleanup_ringbuffer(dev);
4575         dev_priv->mm.suspended = 1;
4576         mutex_unlock(&dev->struct_mutex);
4577
4578         return ret;
4579 }
4580
4581 int
4582 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4583                        struct drm_file *file_priv)
4584 {
4585         if (drm_core_check_feature(dev, DRIVER_MODESET))
4586                 return 0;
4587
4588         drm_irq_uninstall(dev);
4589         return i915_gem_idle(dev);
4590 }
4591
4592 void
4593 i915_gem_lastclose(struct drm_device *dev)
4594 {
4595         int ret;
4596
4597         if (drm_core_check_feature(dev, DRIVER_MODESET))
4598                 return;
4599
4600         ret = i915_gem_idle(dev);
4601         if (ret)
4602                 DRM_ERROR("failed to idle hardware: %d\n", ret);
4603 }
4604
4605 void
4606 i915_gem_load(struct drm_device *dev)
4607 {
4608         int i;
4609         drm_i915_private_t *dev_priv = dev->dev_private;
4610
4611         spin_lock_init(&dev_priv->mm.active_list_lock);
4612         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4613         INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
4614         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4615         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4616         INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
4617         INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
4618         INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
4619         if (HAS_BSD(dev)) {
4620                 INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list);
4621                 INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list);
4622         }
4623         for (i = 0; i < 16; i++)
4624                 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4625         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4626                           i915_gem_retire_work_handler);
4627         spin_lock(&shrink_list_lock);
4628         list_add(&dev_priv->mm.shrink_list, &shrink_list);
4629         spin_unlock(&shrink_list_lock);
4630
4631         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4632         if (IS_GEN3(dev)) {
4633                 u32 tmp = I915_READ(MI_ARB_STATE);
4634                 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4635                         /* arb state is a masked write, so set bit + bit in mask */
4636                         tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4637                         I915_WRITE(MI_ARB_STATE, tmp);
4638                 }
4639         }
4640
4641         /* Old X drivers will take 0-2 for front, back, depth buffers */
4642         if (!drm_core_check_feature(dev, DRIVER_MODESET))
4643                 dev_priv->fence_reg_start = 3;
4644
4645         if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4646                 dev_priv->num_fence_regs = 16;
4647         else
4648                 dev_priv->num_fence_regs = 8;
4649
4650         /* Initialize fence registers to zero */
4651         if (IS_I965G(dev)) {
4652                 for (i = 0; i < 16; i++)
4653                         I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4654         } else {
4655                 for (i = 0; i < 8; i++)
4656                         I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4657                 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4658                         for (i = 0; i < 8; i++)
4659                                 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4660         }
4661         i915_gem_detect_bit_6_swizzle(dev);
4662         init_waitqueue_head(&dev_priv->pending_flip_queue);
4663 }
4664
4665 /*
4666  * Create a physically contiguous memory object for this object
4667  * e.g. for cursor + overlay regs
4668  */
4669 int i915_gem_init_phys_object(struct drm_device *dev,
4670                               int id, int size)
4671 {
4672         drm_i915_private_t *dev_priv = dev->dev_private;
4673         struct drm_i915_gem_phys_object *phys_obj;
4674         int ret;
4675
4676         if (dev_priv->mm.phys_objs[id - 1] || !size)
4677                 return 0;
4678
4679         phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4680         if (!phys_obj)
4681                 return -ENOMEM;
4682
4683         phys_obj->id = id;
4684
4685         phys_obj->handle = drm_pci_alloc(dev, size, 0);
4686         if (!phys_obj->handle) {
4687                 ret = -ENOMEM;
4688                 goto kfree_obj;
4689         }
4690 #ifdef CONFIG_X86
4691         set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4692 #endif
4693
4694         dev_priv->mm.phys_objs[id - 1] = phys_obj;
4695
4696         return 0;
4697 kfree_obj:
4698         kfree(phys_obj);
4699         return ret;
4700 }
4701
4702 void i915_gem_free_phys_object(struct drm_device *dev, int id)
4703 {
4704         drm_i915_private_t *dev_priv = dev->dev_private;
4705         struct drm_i915_gem_phys_object *phys_obj;
4706
4707         if (!dev_priv->mm.phys_objs[id - 1])
4708                 return;
4709
4710         phys_obj = dev_priv->mm.phys_objs[id - 1];
4711         if (phys_obj->cur_obj) {
4712                 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4713         }
4714
4715 #ifdef CONFIG_X86
4716         set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4717 #endif
4718         drm_pci_free(dev, phys_obj->handle);
4719         kfree(phys_obj);
4720         dev_priv->mm.phys_objs[id - 1] = NULL;
4721 }
4722
4723 void i915_gem_free_all_phys_object(struct drm_device *dev)
4724 {
4725         int i;
4726
4727         for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4728                 i915_gem_free_phys_object(dev, i);
4729 }
4730
4731 void i915_gem_detach_phys_object(struct drm_device *dev,
4732                                  struct drm_gem_object *obj)
4733 {
4734         struct drm_i915_gem_object *obj_priv;
4735         int i;
4736         int ret;
4737         int page_count;
4738
4739         obj_priv = to_intel_bo(obj);
4740         if (!obj_priv->phys_obj)
4741                 return;
4742
4743         ret = i915_gem_object_get_pages(obj, 0);
4744         if (ret)
4745                 goto out;
4746
4747         page_count = obj->size / PAGE_SIZE;
4748
4749         for (i = 0; i < page_count; i++) {
4750                 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
4751                 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4752
4753                 memcpy(dst, src, PAGE_SIZE);
4754                 kunmap_atomic(dst, KM_USER0);
4755         }
4756         drm_clflush_pages(obj_priv->pages, page_count);
4757         drm_agp_chipset_flush(dev);
4758
4759         i915_gem_object_put_pages(obj);
4760 out:
4761         obj_priv->phys_obj->cur_obj = NULL;
4762         obj_priv->phys_obj = NULL;
4763 }
4764
4765 int
4766 i915_gem_attach_phys_object(struct drm_device *dev,
4767                             struct drm_gem_object *obj, int id)
4768 {
4769         drm_i915_private_t *dev_priv = dev->dev_private;
4770         struct drm_i915_gem_object *obj_priv;
4771         int ret = 0;
4772         int page_count;
4773         int i;
4774
4775         if (id > I915_MAX_PHYS_OBJECT)
4776                 return -EINVAL;
4777
4778         obj_priv = to_intel_bo(obj);
4779
4780         if (obj_priv->phys_obj) {
4781                 if (obj_priv->phys_obj->id == id)
4782                         return 0;
4783                 i915_gem_detach_phys_object(dev, obj);
4784         }
4785
4786
4787         /* create a new object */
4788         if (!dev_priv->mm.phys_objs[id - 1]) {
4789                 ret = i915_gem_init_phys_object(dev, id,
4790                                                 obj->size);
4791                 if (ret) {
4792                         DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
4793                         goto out;
4794                 }
4795         }
4796
4797         /* bind to the object */
4798         obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4799         obj_priv->phys_obj->cur_obj = obj;
4800
4801         ret = i915_gem_object_get_pages(obj, 0);
4802         if (ret) {
4803                 DRM_ERROR("failed to get page list\n");
4804                 goto out;
4805         }
4806
4807         page_count = obj->size / PAGE_SIZE;
4808
4809         for (i = 0; i < page_count; i++) {
4810                 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
4811                 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4812
4813                 memcpy(dst, src, PAGE_SIZE);
4814                 kunmap_atomic(src, KM_USER0);
4815         }
4816
4817         i915_gem_object_put_pages(obj);
4818
4819         return 0;
4820 out:
4821         return ret;
4822 }
4823
4824 static int
4825 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4826                      struct drm_i915_gem_pwrite *args,
4827                      struct drm_file *file_priv)
4828 {
4829         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4830         void *obj_addr;
4831         int ret;
4832         char __user *user_data;
4833
4834         user_data = (char __user *) (uintptr_t) args->data_ptr;
4835         obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4836
4837         DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
4838         ret = copy_from_user(obj_addr, user_data, args->size);
4839         if (ret)
4840                 return -EFAULT;
4841
4842         drm_agp_chipset_flush(dev);
4843         return 0;
4844 }
4845
4846 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
4847 {
4848         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
4849
4850         /* Clean up our request list when the client is going away, so that
4851          * later retire_requests won't dereference our soon-to-be-gone
4852          * file_priv.
4853          */
4854         mutex_lock(&dev->struct_mutex);
4855         while (!list_empty(&i915_file_priv->mm.request_list))
4856                 list_del_init(i915_file_priv->mm.request_list.next);
4857         mutex_unlock(&dev->struct_mutex);
4858 }
4859
4860 static int
4861 i915_gpu_is_active(struct drm_device *dev)
4862 {
4863         drm_i915_private_t *dev_priv = dev->dev_private;
4864         int lists_empty;
4865
4866         spin_lock(&dev_priv->mm.active_list_lock);
4867         lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
4868                       list_empty(&dev_priv->render_ring.active_list);
4869         if (HAS_BSD(dev))
4870                 lists_empty &= list_empty(&dev_priv->bsd_ring.active_list);
4871         spin_unlock(&dev_priv->mm.active_list_lock);
4872
4873         return !lists_empty;
4874 }
4875
4876 static int
4877 i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
4878 {
4879         drm_i915_private_t *dev_priv, *next_dev;
4880         struct drm_i915_gem_object *obj_priv, *next_obj;
4881         int cnt = 0;
4882         int would_deadlock = 1;
4883
4884         /* "fast-path" to count number of available objects */
4885         if (nr_to_scan == 0) {
4886                 spin_lock(&shrink_list_lock);
4887                 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4888                         struct drm_device *dev = dev_priv->dev;
4889
4890                         if (mutex_trylock(&dev->struct_mutex)) {
4891                                 list_for_each_entry(obj_priv,
4892                                                     &dev_priv->mm.inactive_list,
4893                                                     list)
4894                                         cnt++;
4895                                 mutex_unlock(&dev->struct_mutex);
4896                         }
4897                 }
4898                 spin_unlock(&shrink_list_lock);
4899
4900                 return (cnt / 100) * sysctl_vfs_cache_pressure;
4901         }
4902
4903         spin_lock(&shrink_list_lock);
4904
4905 rescan:
4906         /* first scan for clean buffers */
4907         list_for_each_entry_safe(dev_priv, next_dev,
4908                                  &shrink_list, mm.shrink_list) {
4909                 struct drm_device *dev = dev_priv->dev;
4910
4911                 if (! mutex_trylock(&dev->struct_mutex))
4912                         continue;
4913
4914                 spin_unlock(&shrink_list_lock);
4915                 i915_gem_retire_requests(dev);
4916
4917                 list_for_each_entry_safe(obj_priv, next_obj,
4918                                          &dev_priv->mm.inactive_list,
4919                                          list) {
4920                         if (i915_gem_object_is_purgeable(obj_priv)) {
4921                                 i915_gem_object_unbind(&obj_priv->base);
4922                                 if (--nr_to_scan <= 0)
4923                                         break;
4924                         }
4925                 }
4926
4927                 spin_lock(&shrink_list_lock);
4928                 mutex_unlock(&dev->struct_mutex);
4929
4930                 would_deadlock = 0;
4931
4932                 if (nr_to_scan <= 0)
4933                         break;
4934         }
4935
4936         /* second pass, evict/count anything still on the inactive list */
4937         list_for_each_entry_safe(dev_priv, next_dev,
4938                                  &shrink_list, mm.shrink_list) {
4939                 struct drm_device *dev = dev_priv->dev;
4940
4941                 if (! mutex_trylock(&dev->struct_mutex))
4942                         continue;
4943
4944                 spin_unlock(&shrink_list_lock);
4945
4946                 list_for_each_entry_safe(obj_priv, next_obj,
4947                                          &dev_priv->mm.inactive_list,
4948                                          list) {
4949                         if (nr_to_scan > 0) {
4950                                 i915_gem_object_unbind(&obj_priv->base);
4951                                 nr_to_scan--;
4952                         } else
4953                                 cnt++;
4954                 }
4955
4956                 spin_lock(&shrink_list_lock);
4957                 mutex_unlock(&dev->struct_mutex);
4958
4959                 would_deadlock = 0;
4960         }
4961
4962         if (nr_to_scan) {
4963                 int active = 0;
4964
4965                 /*
4966                  * We are desperate for pages, so as a last resort, wait
4967                  * for the GPU to finish and discard whatever we can.
4968                  * This has a dramatic impact to reduce the number of
4969                  * OOM-killer events whilst running the GPU aggressively.
4970                  */
4971                 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4972                         struct drm_device *dev = dev_priv->dev;
4973
4974                         if (!mutex_trylock(&dev->struct_mutex))
4975                                 continue;
4976
4977                         spin_unlock(&shrink_list_lock);
4978
4979                         if (i915_gpu_is_active(dev)) {
4980                                 i915_gpu_idle(dev);
4981                                 active++;
4982                         }
4983
4984                         spin_lock(&shrink_list_lock);
4985                         mutex_unlock(&dev->struct_mutex);
4986                 }
4987
4988                 if (active)
4989                         goto rescan;
4990         }
4991
4992         spin_unlock(&shrink_list_lock);
4993
4994         if (would_deadlock)
4995                 return -1;
4996         else if (cnt > 0)
4997                 return (cnt / 100) * sysctl_vfs_cache_pressure;
4998         else
4999                 return 0;
5000 }
5001
5002 static struct shrinker shrinker = {
5003         .shrink = i915_gem_shrink,
5004         .seeks = DEFAULT_SEEKS,
5005 };
5006
5007 __init void
5008 i915_gem_shrinker_init(void)
5009 {
5010     register_shrinker(&shrinker);
5011 }
5012
5013 __exit void
5014 i915_gem_shrinker_exit(void)
5015 {
5016     unregister_shrinker(&shrinker);
5017 }