Merge branch 'overlayfs-current' of git://git.kernel.org/pub/scm/linux/kernel/git...
[firefly-linux-kernel-4.4.55.git] / drivers / gpu / drm / radeon / radeon_fence.c
1 /*
2  * Copyright 2009 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 /*
27  * Authors:
28  *    Jerome Glisse <glisse@freedesktop.org>
29  *    Dave Airlie
30  */
31 #include <linux/seq_file.h>
32 #include <linux/atomic.h>
33 #include <linux/wait.h>
34 #include <linux/kref.h>
35 #include <linux/slab.h>
36 #include <linux/firmware.h>
37 #include <drm/drmP.h>
38 #include "radeon_reg.h"
39 #include "radeon.h"
40 #include "radeon_trace.h"
41
42 /*
43  * Fences
44  * Fences mark an event in the GPUs pipeline and are used
45  * for GPU/CPU synchronization.  When the fence is written,
46  * it is expected that all buffers associated with that fence
47  * are no longer in use by the associated ring on the GPU and
48  * that the the relevant GPU caches have been flushed.  Whether
49  * we use a scratch register or memory location depends on the asic
50  * and whether writeback is enabled.
51  */
52
53 /**
54  * radeon_fence_write - write a fence value
55  *
56  * @rdev: radeon_device pointer
57  * @seq: sequence number to write
58  * @ring: ring index the fence is associated with
59  *
60  * Writes a fence value to memory or a scratch register (all asics).
61  */
62 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
63 {
64         struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
65         if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
66                 if (drv->cpu_addr) {
67                         *drv->cpu_addr = cpu_to_le32(seq);
68                 }
69         } else {
70                 WREG32(drv->scratch_reg, seq);
71         }
72 }
73
74 /**
75  * radeon_fence_read - read a fence value
76  *
77  * @rdev: radeon_device pointer
78  * @ring: ring index the fence is associated with
79  *
80  * Reads a fence value from memory or a scratch register (all asics).
81  * Returns the value of the fence read from memory or register.
82  */
83 static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
84 {
85         struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
86         u32 seq = 0;
87
88         if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
89                 if (drv->cpu_addr) {
90                         seq = le32_to_cpu(*drv->cpu_addr);
91                 } else {
92                         seq = lower_32_bits(atomic64_read(&drv->last_seq));
93                 }
94         } else {
95                 seq = RREG32(drv->scratch_reg);
96         }
97         return seq;
98 }
99
100 /**
101  * radeon_fence_schedule_check - schedule lockup check
102  *
103  * @rdev: radeon_device pointer
104  * @ring: ring index we should work with
105  *
106  * Queues a delayed work item to check for lockups.
107  */
108 static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
109 {
110         /*
111          * Do not reset the timer here with mod_delayed_work,
112          * this can livelock in an interaction with TTM delayed destroy.
113          */
114         queue_delayed_work(system_power_efficient_wq,
115                            &rdev->fence_drv[ring].lockup_work,
116                            RADEON_FENCE_JIFFIES_TIMEOUT);
117 }
118
119 /**
120  * radeon_fence_emit - emit a fence on the requested ring
121  *
122  * @rdev: radeon_device pointer
123  * @fence: radeon fence object
124  * @ring: ring index the fence is associated with
125  *
126  * Emits a fence command on the requested ring (all asics).
127  * Returns 0 on success, -ENOMEM on failure.
128  */
129 int radeon_fence_emit(struct radeon_device *rdev,
130                       struct radeon_fence **fence,
131                       int ring)
132 {
133         u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
134
135         /* we are protected by the ring emission mutex */
136         *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
137         if ((*fence) == NULL) {
138                 return -ENOMEM;
139         }
140         (*fence)->rdev = rdev;
141         (*fence)->seq = seq;
142         (*fence)->ring = ring;
143         fence_init(&(*fence)->base, &radeon_fence_ops,
144                    &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
145         radeon_fence_ring_emit(rdev, ring, *fence);
146         trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
147         radeon_fence_schedule_check(rdev, ring);
148         return 0;
149 }
150
151 /**
152  * radeon_fence_check_signaled - callback from fence_queue
153  *
154  * this function is called with fence_queue lock held, which is also used
155  * for the fence locking itself, so unlocked variants are used for
156  * fence_signal, and remove_wait_queue.
157  */
158 static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
159 {
160         struct radeon_fence *fence;
161         u64 seq;
162
163         fence = container_of(wait, struct radeon_fence, fence_wake);
164
165         /*
166          * We cannot use radeon_fence_process here because we're already
167          * in the waitqueue, in a call from wake_up_all.
168          */
169         seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
170         if (seq >= fence->seq) {
171                 int ret = fence_signal_locked(&fence->base);
172
173                 if (!ret)
174                         FENCE_TRACE(&fence->base, "signaled from irq context\n");
175                 else
176                         FENCE_TRACE(&fence->base, "was already signaled\n");
177
178                 radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
179                 __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
180                 fence_put(&fence->base);
181         } else
182                 FENCE_TRACE(&fence->base, "pending\n");
183         return 0;
184 }
185
186 /**
187  * radeon_fence_activity - check for fence activity
188  *
189  * @rdev: radeon_device pointer
190  * @ring: ring index the fence is associated with
191  *
192  * Checks the current fence value and calculates the last
193  * signalled fence value. Returns true if activity occured
194  * on the ring, and the fence_queue should be waken up.
195  */
196 static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
197 {
198         uint64_t seq, last_seq, last_emitted;
199         unsigned count_loop = 0;
200         bool wake = false;
201
202         /* Note there is a scenario here for an infinite loop but it's
203          * very unlikely to happen. For it to happen, the current polling
204          * process need to be interrupted by another process and another
205          * process needs to update the last_seq btw the atomic read and
206          * xchg of the current process.
207          *
208          * More over for this to go in infinite loop there need to be
209          * continuously new fence signaled ie radeon_fence_read needs
210          * to return a different value each time for both the currently
211          * polling process and the other process that xchg the last_seq
212          * btw atomic read and xchg of the current process. And the
213          * value the other process set as last seq must be higher than
214          * the seq value we just read. Which means that current process
215          * need to be interrupted after radeon_fence_read and before
216          * atomic xchg.
217          *
218          * To be even more safe we count the number of time we loop and
219          * we bail after 10 loop just accepting the fact that we might
220          * have temporarly set the last_seq not to the true real last
221          * seq but to an older one.
222          */
223         last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
224         do {
225                 last_emitted = rdev->fence_drv[ring].sync_seq[ring];
226                 seq = radeon_fence_read(rdev, ring);
227                 seq |= last_seq & 0xffffffff00000000LL;
228                 if (seq < last_seq) {
229                         seq &= 0xffffffff;
230                         seq |= last_emitted & 0xffffffff00000000LL;
231                 }
232
233                 if (seq <= last_seq || seq > last_emitted) {
234                         break;
235                 }
236                 /* If we loop over we don't want to return without
237                  * checking if a fence is signaled as it means that the
238                  * seq we just read is different from the previous on.
239                  */
240                 wake = true;
241                 last_seq = seq;
242                 if ((count_loop++) > 10) {
243                         /* We looped over too many time leave with the
244                          * fact that we might have set an older fence
245                          * seq then the current real last seq as signaled
246                          * by the hw.
247                          */
248                         break;
249                 }
250         } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
251
252         if (seq < last_emitted)
253                 radeon_fence_schedule_check(rdev, ring);
254
255         return wake;
256 }
257
258 /**
259  * radeon_fence_check_lockup - check for hardware lockup
260  *
261  * @work: delayed work item
262  *
263  * Checks for fence activity and if there is none probe
264  * the hardware if a lockup occured.
265  */
266 static void radeon_fence_check_lockup(struct work_struct *work)
267 {
268         struct radeon_fence_driver *fence_drv;
269         struct radeon_device *rdev;
270         int ring;
271
272         fence_drv = container_of(work, struct radeon_fence_driver,
273                                  lockup_work.work);
274         rdev = fence_drv->rdev;
275         ring = fence_drv - &rdev->fence_drv[0];
276
277         if (!down_read_trylock(&rdev->exclusive_lock)) {
278                 /* just reschedule the check if a reset is going on */
279                 radeon_fence_schedule_check(rdev, ring);
280                 return;
281         }
282
283         if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
284                 unsigned long irqflags;
285
286                 fence_drv->delayed_irq = false;
287                 spin_lock_irqsave(&rdev->irq.lock, irqflags);
288                 radeon_irq_set(rdev);
289                 spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
290         }
291
292         if (radeon_fence_activity(rdev, ring))
293                 wake_up_all(&rdev->fence_queue);
294
295         else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
296
297                 /* good news we believe it's a lockup */
298                 dev_warn(rdev->dev, "GPU lockup (current fence id "
299                          "0x%016llx last fence id 0x%016llx on ring %d)\n",
300                          (uint64_t)atomic64_read(&fence_drv->last_seq),
301                          fence_drv->sync_seq[ring], ring);
302
303                 /* remember that we need an reset */
304                 rdev->needs_reset = true;
305                 wake_up_all(&rdev->fence_queue);
306         }
307         up_read(&rdev->exclusive_lock);
308 }
309
310 /**
311  * radeon_fence_process - process a fence
312  *
313  * @rdev: radeon_device pointer
314  * @ring: ring index the fence is associated with
315  *
316  * Checks the current fence value and wakes the fence queue
317  * if the sequence number has increased (all asics).
318  */
319 void radeon_fence_process(struct radeon_device *rdev, int ring)
320 {
321         if (radeon_fence_activity(rdev, ring))
322                 wake_up_all(&rdev->fence_queue);
323 }
324
325 /**
326  * radeon_fence_seq_signaled - check if a fence sequence number has signaled
327  *
328  * @rdev: radeon device pointer
329  * @seq: sequence number
330  * @ring: ring index the fence is associated with
331  *
332  * Check if the last signaled fence sequnce number is >= the requested
333  * sequence number (all asics).
334  * Returns true if the fence has signaled (current fence value
335  * is >= requested value) or false if it has not (current fence
336  * value is < the requested value.  Helper function for
337  * radeon_fence_signaled().
338  */
339 static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
340                                       u64 seq, unsigned ring)
341 {
342         if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
343                 return true;
344         }
345         /* poll new last sequence at least once */
346         radeon_fence_process(rdev, ring);
347         if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
348                 return true;
349         }
350         return false;
351 }
352
353 static bool radeon_fence_is_signaled(struct fence *f)
354 {
355         struct radeon_fence *fence = to_radeon_fence(f);
356         struct radeon_device *rdev = fence->rdev;
357         unsigned ring = fence->ring;
358         u64 seq = fence->seq;
359
360         if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
361                 return true;
362         }
363
364         if (down_read_trylock(&rdev->exclusive_lock)) {
365                 radeon_fence_process(rdev, ring);
366                 up_read(&rdev->exclusive_lock);
367
368                 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
369                         return true;
370                 }
371         }
372         return false;
373 }
374
375 /**
376  * radeon_fence_enable_signaling - enable signalling on fence
377  * @fence: fence
378  *
379  * This function is called with fence_queue lock held, and adds a callback
380  * to fence_queue that checks if this fence is signaled, and if so it
381  * signals the fence and removes itself.
382  */
383 static bool radeon_fence_enable_signaling(struct fence *f)
384 {
385         struct radeon_fence *fence = to_radeon_fence(f);
386         struct radeon_device *rdev = fence->rdev;
387
388         if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
389                 return false;
390
391         if (down_read_trylock(&rdev->exclusive_lock)) {
392                 radeon_irq_kms_sw_irq_get(rdev, fence->ring);
393
394                 if (radeon_fence_activity(rdev, fence->ring))
395                         wake_up_all_locked(&rdev->fence_queue);
396
397                 /* did fence get signaled after we enabled the sw irq? */
398                 if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
399                         radeon_irq_kms_sw_irq_put(rdev, fence->ring);
400                         up_read(&rdev->exclusive_lock);
401                         return false;
402                 }
403
404                 up_read(&rdev->exclusive_lock);
405         } else {
406                 /* we're probably in a lockup, lets not fiddle too much */
407                 if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
408                         rdev->fence_drv[fence->ring].delayed_irq = true;
409                 radeon_fence_schedule_check(rdev, fence->ring);
410         }
411
412         fence->fence_wake.flags = 0;
413         fence->fence_wake.private = NULL;
414         fence->fence_wake.func = radeon_fence_check_signaled;
415         __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
416         fence_get(f);
417
418         FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
419         return true;
420 }
421
422 /**
423  * radeon_fence_signaled - check if a fence has signaled
424  *
425  * @fence: radeon fence object
426  *
427  * Check if the requested fence has signaled (all asics).
428  * Returns true if the fence has signaled or false if it has not.
429  */
430 bool radeon_fence_signaled(struct radeon_fence *fence)
431 {
432         if (!fence)
433                 return true;
434
435         if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
436                 int ret;
437
438                 ret = fence_signal(&fence->base);
439                 if (!ret)
440                         FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
441                 return true;
442         }
443         return false;
444 }
445
446 /**
447  * radeon_fence_any_seq_signaled - check if any sequence number is signaled
448  *
449  * @rdev: radeon device pointer
450  * @seq: sequence numbers
451  *
452  * Check if the last signaled fence sequnce number is >= the requested
453  * sequence number (all asics).
454  * Returns true if any has signaled (current value is >= requested value)
455  * or false if it has not. Helper function for radeon_fence_wait_seq.
456  */
457 static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
458 {
459         unsigned i;
460
461         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
462                 if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
463                         return true;
464         }
465         return false;
466 }
467
468 /**
469  * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
470  *
471  * @rdev: radeon device pointer
472  * @target_seq: sequence number(s) we want to wait for
473  * @intr: use interruptable sleep
474  * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
475  *
476  * Wait for the requested sequence number(s) to be written by any ring
477  * (all asics).  Sequnce number array is indexed by ring id.
478  * @intr selects whether to use interruptable (true) or non-interruptable
479  * (false) sleep when waiting for the sequence number.  Helper function
480  * for radeon_fence_wait_*().
481  * Returns remaining time if the sequence number has passed, 0 when
482  * the wait timeout, or an error for all other cases.
483  * -EDEADLK is returned when a GPU lockup has been detected.
484  */
485 static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
486                                           u64 *target_seq, bool intr,
487                                           long timeout)
488 {
489         long r;
490         int i;
491
492         if (radeon_fence_any_seq_signaled(rdev, target_seq))
493                 return timeout;
494
495         /* enable IRQs and tracing */
496         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
497                 if (!target_seq[i])
498                         continue;
499
500                 trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
501                 radeon_irq_kms_sw_irq_get(rdev, i);
502         }
503
504         if (intr) {
505                 r = wait_event_interruptible_timeout(rdev->fence_queue, (
506                         radeon_fence_any_seq_signaled(rdev, target_seq)
507                          || rdev->needs_reset), timeout);
508         } else {
509                 r = wait_event_timeout(rdev->fence_queue, (
510                         radeon_fence_any_seq_signaled(rdev, target_seq)
511                          || rdev->needs_reset), timeout);
512         }
513
514         if (rdev->needs_reset)
515                 r = -EDEADLK;
516
517         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
518                 if (!target_seq[i])
519                         continue;
520
521                 radeon_irq_kms_sw_irq_put(rdev, i);
522                 trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
523         }
524
525         return r;
526 }
527
528 /**
529  * radeon_fence_wait - wait for a fence to signal
530  *
531  * @fence: radeon fence object
532  * @intr: use interruptible sleep
533  *
534  * Wait for the requested fence to signal (all asics).
535  * @intr selects whether to use interruptable (true) or non-interruptable
536  * (false) sleep when waiting for the fence.
537  * Returns 0 if the fence has passed, error for all other cases.
538  */
539 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
540 {
541         uint64_t seq[RADEON_NUM_RINGS] = {};
542         long r;
543
544         /*
545          * This function should not be called on !radeon fences.
546          * If this is the case, it would mean this function can
547          * also be called on radeon fences belonging to another card.
548          * exclusive_lock is not held in that case.
549          */
550         if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
551                 return fence_wait(&fence->base, intr);
552
553         seq[fence->ring] = fence->seq;
554         r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
555         if (r < 0) {
556                 return r;
557         }
558
559         r = fence_signal(&fence->base);
560         if (!r)
561                 FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
562         return 0;
563 }
564
565 /**
566  * radeon_fence_wait_any - wait for a fence to signal on any ring
567  *
568  * @rdev: radeon device pointer
569  * @fences: radeon fence object(s)
570  * @intr: use interruptable sleep
571  *
572  * Wait for any requested fence to signal (all asics).  Fence
573  * array is indexed by ring id.  @intr selects whether to use
574  * interruptable (true) or non-interruptable (false) sleep when
575  * waiting for the fences. Used by the suballocator.
576  * Returns 0 if any fence has passed, error for all other cases.
577  */
578 int radeon_fence_wait_any(struct radeon_device *rdev,
579                           struct radeon_fence **fences,
580                           bool intr)
581 {
582         uint64_t seq[RADEON_NUM_RINGS];
583         unsigned i, num_rings = 0;
584         long r;
585
586         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
587                 seq[i] = 0;
588
589                 if (!fences[i]) {
590                         continue;
591                 }
592
593                 seq[i] = fences[i]->seq;
594                 ++num_rings;
595         }
596
597         /* nothing to wait for ? */
598         if (num_rings == 0)
599                 return -ENOENT;
600
601         r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
602         if (r < 0) {
603                 return r;
604         }
605         return 0;
606 }
607
608 /**
609  * radeon_fence_wait_next - wait for the next fence to signal
610  *
611  * @rdev: radeon device pointer
612  * @ring: ring index the fence is associated with
613  *
614  * Wait for the next fence on the requested ring to signal (all asics).
615  * Returns 0 if the next fence has passed, error for all other cases.
616  * Caller must hold ring lock.
617  */
618 int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
619 {
620         uint64_t seq[RADEON_NUM_RINGS] = {};
621         long r;
622
623         seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
624         if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
625                 /* nothing to wait for, last_seq is
626                    already the last emited fence */
627                 return -ENOENT;
628         }
629         r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
630         if (r < 0)
631                 return r;
632         return 0;
633 }
634
635 /**
636  * radeon_fence_wait_empty - wait for all fences to signal
637  *
638  * @rdev: radeon device pointer
639  * @ring: ring index the fence is associated with
640  *
641  * Wait for all fences on the requested ring to signal (all asics).
642  * Returns 0 if the fences have passed, error for all other cases.
643  * Caller must hold ring lock.
644  */
645 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
646 {
647         uint64_t seq[RADEON_NUM_RINGS] = {};
648         long r;
649
650         seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
651         if (!seq[ring])
652                 return 0;
653
654         r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
655         if (r < 0) {
656                 if (r == -EDEADLK)
657                         return -EDEADLK;
658
659                 dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
660                         ring, r);
661         }
662         return 0;
663 }
664
665 /**
666  * radeon_fence_ref - take a ref on a fence
667  *
668  * @fence: radeon fence object
669  *
670  * Take a reference on a fence (all asics).
671  * Returns the fence.
672  */
673 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
674 {
675         fence_get(&fence->base);
676         return fence;
677 }
678
679 /**
680  * radeon_fence_unref - remove a ref on a fence
681  *
682  * @fence: radeon fence object
683  *
684  * Remove a reference on a fence (all asics).
685  */
686 void radeon_fence_unref(struct radeon_fence **fence)
687 {
688         struct radeon_fence *tmp = *fence;
689
690         *fence = NULL;
691         if (tmp) {
692                 fence_put(&tmp->base);
693         }
694 }
695
696 /**
697  * radeon_fence_count_emitted - get the count of emitted fences
698  *
699  * @rdev: radeon device pointer
700  * @ring: ring index the fence is associated with
701  *
702  * Get the number of fences emitted on the requested ring (all asics).
703  * Returns the number of emitted fences on the ring.  Used by the
704  * dynpm code to ring track activity.
705  */
706 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
707 {
708         uint64_t emitted;
709
710         /* We are not protected by ring lock when reading the last sequence
711          * but it's ok to report slightly wrong fence count here.
712          */
713         radeon_fence_process(rdev, ring);
714         emitted = rdev->fence_drv[ring].sync_seq[ring]
715                 - atomic64_read(&rdev->fence_drv[ring].last_seq);
716         /* to avoid 32bits warp around */
717         if (emitted > 0x10000000) {
718                 emitted = 0x10000000;
719         }
720         return (unsigned)emitted;
721 }
722
723 /**
724  * radeon_fence_need_sync - do we need a semaphore
725  *
726  * @fence: radeon fence object
727  * @dst_ring: which ring to check against
728  *
729  * Check if the fence needs to be synced against another ring
730  * (all asics).  If so, we need to emit a semaphore.
731  * Returns true if we need to sync with another ring, false if
732  * not.
733  */
734 bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
735 {
736         struct radeon_fence_driver *fdrv;
737
738         if (!fence) {
739                 return false;
740         }
741
742         if (fence->ring == dst_ring) {
743                 return false;
744         }
745
746         /* we are protected by the ring mutex */
747         fdrv = &fence->rdev->fence_drv[dst_ring];
748         if (fence->seq <= fdrv->sync_seq[fence->ring]) {
749                 return false;
750         }
751
752         return true;
753 }
754
755 /**
756  * radeon_fence_note_sync - record the sync point
757  *
758  * @fence: radeon fence object
759  * @dst_ring: which ring to check against
760  *
761  * Note the sequence number at which point the fence will
762  * be synced with the requested ring (all asics).
763  */
764 void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
765 {
766         struct radeon_fence_driver *dst, *src;
767         unsigned i;
768
769         if (!fence) {
770                 return;
771         }
772
773         if (fence->ring == dst_ring) {
774                 return;
775         }
776
777         /* we are protected by the ring mutex */
778         src = &fence->rdev->fence_drv[fence->ring];
779         dst = &fence->rdev->fence_drv[dst_ring];
780         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
781                 if (i == dst_ring) {
782                         continue;
783                 }
784                 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
785         }
786 }
787
788 /**
789  * radeon_fence_driver_start_ring - make the fence driver
790  * ready for use on the requested ring.
791  *
792  * @rdev: radeon device pointer
793  * @ring: ring index to start the fence driver on
794  *
795  * Make the fence driver ready for processing (all asics).
796  * Not all asics have all rings, so each asic will only
797  * start the fence driver on the rings it has.
798  * Returns 0 for success, errors for failure.
799  */
800 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
801 {
802         uint64_t index;
803         int r;
804
805         radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
806         if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
807                 rdev->fence_drv[ring].scratch_reg = 0;
808                 if (ring != R600_RING_TYPE_UVD_INDEX) {
809                         index = R600_WB_EVENT_OFFSET + ring * 4;
810                         rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
811                         rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
812                                                          index;
813
814                 } else {
815                         /* put fence directly behind firmware */
816                         index = ALIGN(rdev->uvd_fw->size, 8);
817                         rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
818                         rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
819                 }
820
821         } else {
822                 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
823                 if (r) {
824                         dev_err(rdev->dev, "fence failed to get scratch register\n");
825                         return r;
826                 }
827                 index = RADEON_WB_SCRATCH_OFFSET +
828                         rdev->fence_drv[ring].scratch_reg -
829                         rdev->scratch.reg_base;
830                 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
831                 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
832         }
833         radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
834         rdev->fence_drv[ring].initialized = true;
835         dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
836                  ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
837         return 0;
838 }
839
840 /**
841  * radeon_fence_driver_init_ring - init the fence driver
842  * for the requested ring.
843  *
844  * @rdev: radeon device pointer
845  * @ring: ring index to start the fence driver on
846  *
847  * Init the fence driver for the requested ring (all asics).
848  * Helper function for radeon_fence_driver_init().
849  */
850 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
851 {
852         int i;
853
854         rdev->fence_drv[ring].scratch_reg = -1;
855         rdev->fence_drv[ring].cpu_addr = NULL;
856         rdev->fence_drv[ring].gpu_addr = 0;
857         for (i = 0; i < RADEON_NUM_RINGS; ++i)
858                 rdev->fence_drv[ring].sync_seq[i] = 0;
859         atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
860         rdev->fence_drv[ring].initialized = false;
861         INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
862                           radeon_fence_check_lockup);
863         rdev->fence_drv[ring].rdev = rdev;
864 }
865
866 /**
867  * radeon_fence_driver_init - init the fence driver
868  * for all possible rings.
869  *
870  * @rdev: radeon device pointer
871  *
872  * Init the fence driver for all possible rings (all asics).
873  * Not all asics have all rings, so each asic will only
874  * start the fence driver on the rings it has using
875  * radeon_fence_driver_start_ring().
876  * Returns 0 for success.
877  */
878 int radeon_fence_driver_init(struct radeon_device *rdev)
879 {
880         int ring;
881
882         init_waitqueue_head(&rdev->fence_queue);
883         for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
884                 radeon_fence_driver_init_ring(rdev, ring);
885         }
886         if (radeon_debugfs_fence_init(rdev)) {
887                 dev_err(rdev->dev, "fence debugfs file creation failed\n");
888         }
889         return 0;
890 }
891
892 /**
893  * radeon_fence_driver_fini - tear down the fence driver
894  * for all possible rings.
895  *
896  * @rdev: radeon device pointer
897  *
898  * Tear down the fence driver for all possible rings (all asics).
899  */
900 void radeon_fence_driver_fini(struct radeon_device *rdev)
901 {
902         int ring, r;
903
904         mutex_lock(&rdev->ring_lock);
905         for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
906                 if (!rdev->fence_drv[ring].initialized)
907                         continue;
908                 r = radeon_fence_wait_empty(rdev, ring);
909                 if (r) {
910                         /* no need to trigger GPU reset as we are unloading */
911                         radeon_fence_driver_force_completion(rdev, ring);
912                 }
913                 cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
914                 wake_up_all(&rdev->fence_queue);
915                 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
916                 rdev->fence_drv[ring].initialized = false;
917         }
918         mutex_unlock(&rdev->ring_lock);
919 }
920
921 /**
922  * radeon_fence_driver_force_completion - force all fence waiter to complete
923  *
924  * @rdev: radeon device pointer
925  * @ring: the ring to complete
926  *
927  * In case of GPU reset failure make sure no process keep waiting on fence
928  * that will never complete.
929  */
930 void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
931 {
932         if (rdev->fence_drv[ring].initialized) {
933                 radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
934                 cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
935         }
936 }
937
938
939 /*
940  * Fence debugfs
941  */
942 #if defined(CONFIG_DEBUG_FS)
943 static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
944 {
945         struct drm_info_node *node = (struct drm_info_node *)m->private;
946         struct drm_device *dev = node->minor->dev;
947         struct radeon_device *rdev = dev->dev_private;
948         int i, j;
949
950         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
951                 if (!rdev->fence_drv[i].initialized)
952                         continue;
953
954                 radeon_fence_process(rdev, i);
955
956                 seq_printf(m, "--- ring %d ---\n", i);
957                 seq_printf(m, "Last signaled fence 0x%016llx\n",
958                            (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
959                 seq_printf(m, "Last emitted        0x%016llx\n",
960                            rdev->fence_drv[i].sync_seq[i]);
961
962                 for (j = 0; j < RADEON_NUM_RINGS; ++j) {
963                         if (i != j && rdev->fence_drv[j].initialized)
964                                 seq_printf(m, "Last sync to ring %d 0x%016llx\n",
965                                            j, rdev->fence_drv[i].sync_seq[j]);
966                 }
967         }
968         return 0;
969 }
970
971 /**
972  * radeon_debugfs_gpu_reset - manually trigger a gpu reset
973  *
974  * Manually trigger a gpu reset at the next fence wait.
975  */
976 static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
977 {
978         struct drm_info_node *node = (struct drm_info_node *) m->private;
979         struct drm_device *dev = node->minor->dev;
980         struct radeon_device *rdev = dev->dev_private;
981
982         down_read(&rdev->exclusive_lock);
983         seq_printf(m, "%d\n", rdev->needs_reset);
984         rdev->needs_reset = true;
985         wake_up_all(&rdev->fence_queue);
986         up_read(&rdev->exclusive_lock);
987
988         return 0;
989 }
990
991 static struct drm_info_list radeon_debugfs_fence_list[] = {
992         {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
993         {"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
994 };
995 #endif
996
997 int radeon_debugfs_fence_init(struct radeon_device *rdev)
998 {
999 #if defined(CONFIG_DEBUG_FS)
1000         return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
1001 #else
1002         return 0;
1003 #endif
1004 }
1005
1006 static const char *radeon_fence_get_driver_name(struct fence *fence)
1007 {
1008         return "radeon";
1009 }
1010
1011 static const char *radeon_fence_get_timeline_name(struct fence *f)
1012 {
1013         struct radeon_fence *fence = to_radeon_fence(f);
1014         switch (fence->ring) {
1015         case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
1016         case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
1017         case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
1018         case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
1019         case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
1020         case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
1021         case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
1022         case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
1023         default: WARN_ON_ONCE(1); return "radeon.unk";
1024         }
1025 }
1026
1027 static inline bool radeon_test_signaled(struct radeon_fence *fence)
1028 {
1029         return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
1030 }
1031
1032 static signed long radeon_fence_default_wait(struct fence *f, bool intr,
1033                                              signed long t)
1034 {
1035         struct radeon_fence *fence = to_radeon_fence(f);
1036         struct radeon_device *rdev = fence->rdev;
1037         bool signaled;
1038
1039         fence_enable_sw_signaling(&fence->base);
1040
1041         /*
1042          * This function has to return -EDEADLK, but cannot hold
1043          * exclusive_lock during the wait because some callers
1044          * may already hold it. This means checking needs_reset without
1045          * lock, and not fiddling with any gpu internals.
1046          *
1047          * The callback installed with fence_enable_sw_signaling will
1048          * run before our wait_event_*timeout call, so we will see
1049          * both the signaled fence and the changes to needs_reset.
1050          */
1051
1052         if (intr)
1053                 t = wait_event_interruptible_timeout(rdev->fence_queue,
1054                         ((signaled = radeon_test_signaled(fence)) ||
1055                          rdev->needs_reset), t);
1056         else
1057                 t = wait_event_timeout(rdev->fence_queue,
1058                         ((signaled = radeon_test_signaled(fence)) ||
1059                          rdev->needs_reset), t);
1060
1061         if (t > 0 && !signaled)
1062                 return -EDEADLK;
1063         return t;
1064 }
1065
1066 const struct fence_ops radeon_fence_ops = {
1067         .get_driver_name = radeon_fence_get_driver_name,
1068         .get_timeline_name = radeon_fence_get_timeline_name,
1069         .enable_signaling = radeon_fence_enable_signaling,
1070         .signaled = radeon_fence_is_signaled,
1071         .wait = radeon_fence_default_wait,
1072         .release = NULL,
1073 };