rtmutex: Turn the plist into an rb-tree
[firefly-linux-kernel-4.4.55.git] / kernel / locking / rtmutex.c
1 /*
2  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
3  *
4  * started by Ingo Molnar and Thomas Gleixner.
5  *
6  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9  *  Copyright (C) 2006 Esben Nielsen
10  *
11  *  See Documentation/rt-mutex-design.txt for details.
12  */
13 #include <linux/spinlock.h>
14 #include <linux/export.h>
15 #include <linux/sched.h>
16 #include <linux/sched/rt.h>
17 #include <linux/sched/deadline.h>
18 #include <linux/timer.h>
19
20 #include "rtmutex_common.h"
21
22 /*
23  * lock->owner state tracking:
24  *
25  * lock->owner holds the task_struct pointer of the owner. Bit 0
26  * is used to keep track of the "lock has waiters" state.
27  *
28  * owner        bit0
29  * NULL         0       lock is free (fast acquire possible)
30  * NULL         1       lock is free and has waiters and the top waiter
31  *                              is going to take the lock*
32  * taskpointer  0       lock is held (fast release possible)
33  * taskpointer  1       lock is held and has waiters**
34  *
35  * The fast atomic compare exchange based acquire and release is only
36  * possible when bit 0 of lock->owner is 0.
37  *
38  * (*) It also can be a transitional state when grabbing the lock
39  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
40  * we need to set the bit0 before looking at the lock, and the owner may be
41  * NULL in this small time, hence this can be a transitional state.
42  *
43  * (**) There is a small time when bit 0 is set but there are no
44  * waiters. This can happen when grabbing the lock in the slow path.
45  * To prevent a cmpxchg of the owner releasing the lock, we need to
46  * set this bit before looking at the lock.
47  */
48
49 static void
50 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
51 {
52         unsigned long val = (unsigned long)owner;
53
54         if (rt_mutex_has_waiters(lock))
55                 val |= RT_MUTEX_HAS_WAITERS;
56
57         lock->owner = (struct task_struct *)val;
58 }
59
60 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
61 {
62         lock->owner = (struct task_struct *)
63                         ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
64 }
65
66 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
67 {
68         if (!rt_mutex_has_waiters(lock))
69                 clear_rt_mutex_waiters(lock);
70 }
71
72 /*
73  * We can speed up the acquire/release, if the architecture
74  * supports cmpxchg and if there's no debugging state to be set up
75  */
76 #if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
77 # define rt_mutex_cmpxchg(l,c,n)        (cmpxchg(&l->owner, c, n) == c)
78 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
79 {
80         unsigned long owner, *p = (unsigned long *) &lock->owner;
81
82         do {
83                 owner = *p;
84         } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
85 }
86 #else
87 # define rt_mutex_cmpxchg(l,c,n)        (0)
88 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
89 {
90         lock->owner = (struct task_struct *)
91                         ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
92 }
93 #endif
94
95 static inline int
96 rt_mutex_waiter_less(struct rt_mutex_waiter *left,
97                      struct rt_mutex_waiter *right)
98 {
99         if (left->task->prio < right->task->prio)
100                 return 1;
101
102         /*
103          * If both tasks are dl_task(), we check their deadlines.
104          */
105         if (dl_prio(left->task->prio) && dl_prio(right->task->prio))
106                 return (left->task->dl.deadline < right->task->dl.deadline);
107
108         return 0;
109 }
110
111 static void
112 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
113 {
114         struct rb_node **link = &lock->waiters.rb_node;
115         struct rb_node *parent = NULL;
116         struct rt_mutex_waiter *entry;
117         int leftmost = 1;
118
119         while (*link) {
120                 parent = *link;
121                 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
122                 if (rt_mutex_waiter_less(waiter, entry)) {
123                         link = &parent->rb_left;
124                 } else {
125                         link = &parent->rb_right;
126                         leftmost = 0;
127                 }
128         }
129
130         if (leftmost)
131                 lock->waiters_leftmost = &waiter->tree_entry;
132
133         rb_link_node(&waiter->tree_entry, parent, link);
134         rb_insert_color(&waiter->tree_entry, &lock->waiters);
135 }
136
137 static void
138 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
139 {
140         if (RB_EMPTY_NODE(&waiter->tree_entry))
141                 return;
142
143         if (lock->waiters_leftmost == &waiter->tree_entry)
144                 lock->waiters_leftmost = rb_next(&waiter->tree_entry);
145
146         rb_erase(&waiter->tree_entry, &lock->waiters);
147         RB_CLEAR_NODE(&waiter->tree_entry);
148 }
149
150 static void
151 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
152 {
153         struct rb_node **link = &task->pi_waiters.rb_node;
154         struct rb_node *parent = NULL;
155         struct rt_mutex_waiter *entry;
156         int leftmost = 1;
157
158         while (*link) {
159                 parent = *link;
160                 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
161                 if (rt_mutex_waiter_less(waiter, entry)) {
162                         link = &parent->rb_left;
163                 } else {
164                         link = &parent->rb_right;
165                         leftmost = 0;
166                 }
167         }
168
169         if (leftmost)
170                 task->pi_waiters_leftmost = &waiter->pi_tree_entry;
171
172         rb_link_node(&waiter->pi_tree_entry, parent, link);
173         rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
174 }
175
176 static void
177 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
178 {
179         if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
180                 return;
181
182         if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
183                 task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
184
185         rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
186         RB_CLEAR_NODE(&waiter->pi_tree_entry);
187 }
188
189 /*
190  * Calculate task priority from the waiter tree priority
191  *
192  * Return task->normal_prio when the waiter tree is empty or when
193  * the waiter is not allowed to do priority boosting
194  */
195 int rt_mutex_getprio(struct task_struct *task)
196 {
197         if (likely(!task_has_pi_waiters(task)))
198                 return task->normal_prio;
199
200         return min(task_top_pi_waiter(task)->task->prio,
201                    task->normal_prio);
202 }
203
204 /*
205  * Adjust the priority of a task, after its pi_waiters got modified.
206  *
207  * This can be both boosting and unboosting. task->pi_lock must be held.
208  */
209 static void __rt_mutex_adjust_prio(struct task_struct *task)
210 {
211         int prio = rt_mutex_getprio(task);
212
213         if (task->prio != prio)
214                 rt_mutex_setprio(task, prio);
215 }
216
217 /*
218  * Adjust task priority (undo boosting). Called from the exit path of
219  * rt_mutex_slowunlock() and rt_mutex_slowlock().
220  *
221  * (Note: We do this outside of the protection of lock->wait_lock to
222  * allow the lock to be taken while or before we readjust the priority
223  * of task. We do not use the spin_xx_mutex() variants here as we are
224  * outside of the debug path.)
225  */
226 static void rt_mutex_adjust_prio(struct task_struct *task)
227 {
228         unsigned long flags;
229
230         raw_spin_lock_irqsave(&task->pi_lock, flags);
231         __rt_mutex_adjust_prio(task);
232         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
233 }
234
235 /*
236  * Max number of times we'll walk the boosting chain:
237  */
238 int max_lock_depth = 1024;
239
240 /*
241  * Adjust the priority chain. Also used for deadlock detection.
242  * Decreases task's usage by one - may thus free the task.
243  *
244  * @task: the task owning the mutex (owner) for which a chain walk is probably
245  *        needed
246  * @deadlock_detect: do we have to carry out deadlock detection?
247  * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
248  *             things for a task that has just got its priority adjusted, and
249  *             is waiting on a mutex)
250  * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
251  *               its priority to the mutex owner (can be NULL in the case
252  *               depicted above or if the top waiter is gone away and we are
253  *               actually deboosting the owner)
254  * @top_task: the current top waiter
255  *
256  * Returns 0 or -EDEADLK.
257  */
258 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
259                                       int deadlock_detect,
260                                       struct rt_mutex *orig_lock,
261                                       struct rt_mutex_waiter *orig_waiter,
262                                       struct task_struct *top_task)
263 {
264         struct rt_mutex *lock;
265         struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
266         int detect_deadlock, ret = 0, depth = 0;
267         unsigned long flags;
268
269         detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
270                                                          deadlock_detect);
271
272         /*
273          * The (de)boosting is a step by step approach with a lot of
274          * pitfalls. We want this to be preemptible and we want hold a
275          * maximum of two locks per step. So we have to check
276          * carefully whether things change under us.
277          */
278  again:
279         if (++depth > max_lock_depth) {
280                 static int prev_max;
281
282                 /*
283                  * Print this only once. If the admin changes the limit,
284                  * print a new message when reaching the limit again.
285                  */
286                 if (prev_max != max_lock_depth) {
287                         prev_max = max_lock_depth;
288                         printk(KERN_WARNING "Maximum lock depth %d reached "
289                                "task: %s (%d)\n", max_lock_depth,
290                                top_task->comm, task_pid_nr(top_task));
291                 }
292                 put_task_struct(task);
293
294                 return deadlock_detect ? -EDEADLK : 0;
295         }
296  retry:
297         /*
298          * Task can not go away as we did a get_task() before !
299          */
300         raw_spin_lock_irqsave(&task->pi_lock, flags);
301
302         waiter = task->pi_blocked_on;
303         /*
304          * Check whether the end of the boosting chain has been
305          * reached or the state of the chain has changed while we
306          * dropped the locks.
307          */
308         if (!waiter)
309                 goto out_unlock_pi;
310
311         /*
312          * Check the orig_waiter state. After we dropped the locks,
313          * the previous owner of the lock might have released the lock.
314          */
315         if (orig_waiter && !rt_mutex_owner(orig_lock))
316                 goto out_unlock_pi;
317
318         /*
319          * Drop out, when the task has no waiters. Note,
320          * top_waiter can be NULL, when we are in the deboosting
321          * mode!
322          */
323         if (top_waiter && (!task_has_pi_waiters(task) ||
324                            top_waiter != task_top_pi_waiter(task)))
325                 goto out_unlock_pi;
326
327         /*
328          * When deadlock detection is off then we check, if further
329          * priority adjustment is necessary.
330          */
331         if (!detect_deadlock && waiter->task->prio == task->prio)
332                 goto out_unlock_pi;
333
334         lock = waiter->lock;
335         if (!raw_spin_trylock(&lock->wait_lock)) {
336                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
337                 cpu_relax();
338                 goto retry;
339         }
340
341         /* Deadlock detection */
342         if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
343                 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
344                 raw_spin_unlock(&lock->wait_lock);
345                 ret = deadlock_detect ? -EDEADLK : 0;
346                 goto out_unlock_pi;
347         }
348
349         top_waiter = rt_mutex_top_waiter(lock);
350
351         /* Requeue the waiter */
352         rt_mutex_dequeue(lock, waiter);
353         waiter->task->prio = task->prio;
354         rt_mutex_enqueue(lock, waiter);
355
356         /* Release the task */
357         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
358         if (!rt_mutex_owner(lock)) {
359                 /*
360                  * If the requeue above changed the top waiter, then we need
361                  * to wake the new top waiter up to try to get the lock.
362                  */
363
364                 if (top_waiter != rt_mutex_top_waiter(lock))
365                         wake_up_process(rt_mutex_top_waiter(lock)->task);
366                 raw_spin_unlock(&lock->wait_lock);
367                 goto out_put_task;
368         }
369         put_task_struct(task);
370
371         /* Grab the next task */
372         task = rt_mutex_owner(lock);
373         get_task_struct(task);
374         raw_spin_lock_irqsave(&task->pi_lock, flags);
375
376         if (waiter == rt_mutex_top_waiter(lock)) {
377                 /* Boost the owner */
378                 rt_mutex_dequeue_pi(task, top_waiter);
379                 rt_mutex_enqueue_pi(task, waiter);
380                 __rt_mutex_adjust_prio(task);
381
382         } else if (top_waiter == waiter) {
383                 /* Deboost the owner */
384                 rt_mutex_dequeue_pi(task, waiter);
385                 waiter = rt_mutex_top_waiter(lock);
386                 rt_mutex_enqueue_pi(task, waiter);
387                 __rt_mutex_adjust_prio(task);
388         }
389
390         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
391
392         top_waiter = rt_mutex_top_waiter(lock);
393         raw_spin_unlock(&lock->wait_lock);
394
395         if (!detect_deadlock && waiter != top_waiter)
396                 goto out_put_task;
397
398         goto again;
399
400  out_unlock_pi:
401         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
402  out_put_task:
403         put_task_struct(task);
404
405         return ret;
406 }
407
408 /*
409  * Try to take an rt-mutex
410  *
411  * Must be called with lock->wait_lock held.
412  *
413  * @lock:   the lock to be acquired.
414  * @task:   the task which wants to acquire the lock
415  * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
416  */
417 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
418                 struct rt_mutex_waiter *waiter)
419 {
420         /*
421          * We have to be careful here if the atomic speedups are
422          * enabled, such that, when
423          *  - no other waiter is on the lock
424          *  - the lock has been released since we did the cmpxchg
425          * the lock can be released or taken while we are doing the
426          * checks and marking the lock with RT_MUTEX_HAS_WAITERS.
427          *
428          * The atomic acquire/release aware variant of
429          * mark_rt_mutex_waiters uses a cmpxchg loop. After setting
430          * the WAITERS bit, the atomic release / acquire can not
431          * happen anymore and lock->wait_lock protects us from the
432          * non-atomic case.
433          *
434          * Note, that this might set lock->owner =
435          * RT_MUTEX_HAS_WAITERS in the case the lock is not contended
436          * any more. This is fixed up when we take the ownership.
437          * This is the transitional state explained at the top of this file.
438          */
439         mark_rt_mutex_waiters(lock);
440
441         if (rt_mutex_owner(lock))
442                 return 0;
443
444         /*
445          * It will get the lock because of one of these conditions:
446          * 1) there is no waiter
447          * 2) higher priority than waiters
448          * 3) it is top waiter
449          */
450         if (rt_mutex_has_waiters(lock)) {
451                 if (task->prio >= rt_mutex_top_waiter(lock)->task->prio) {
452                         if (!waiter || waiter != rt_mutex_top_waiter(lock))
453                                 return 0;
454                 }
455         }
456
457         if (waiter || rt_mutex_has_waiters(lock)) {
458                 unsigned long flags;
459                 struct rt_mutex_waiter *top;
460
461                 raw_spin_lock_irqsave(&task->pi_lock, flags);
462
463                 /* remove the queued waiter. */
464                 if (waiter) {
465                         rt_mutex_dequeue(lock, waiter);
466                         task->pi_blocked_on = NULL;
467                 }
468
469                 /*
470                  * We have to enqueue the top waiter(if it exists) into
471                  * task->pi_waiters list.
472                  */
473                 if (rt_mutex_has_waiters(lock)) {
474                         top = rt_mutex_top_waiter(lock);
475                         rt_mutex_enqueue_pi(task, top);
476                 }
477                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
478         }
479
480         /* We got the lock. */
481         debug_rt_mutex_lock(lock);
482
483         rt_mutex_set_owner(lock, task);
484
485         rt_mutex_deadlock_account_lock(lock, task);
486
487         return 1;
488 }
489
490 /*
491  * Task blocks on lock.
492  *
493  * Prepare waiter and propagate pi chain
494  *
495  * This must be called with lock->wait_lock held.
496  */
497 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
498                                    struct rt_mutex_waiter *waiter,
499                                    struct task_struct *task,
500                                    int detect_deadlock)
501 {
502         struct task_struct *owner = rt_mutex_owner(lock);
503         struct rt_mutex_waiter *top_waiter = waiter;
504         unsigned long flags;
505         int chain_walk = 0, res;
506
507         raw_spin_lock_irqsave(&task->pi_lock, flags);
508         __rt_mutex_adjust_prio(task);
509         waiter->task = task;
510         waiter->lock = lock;
511
512         /* Get the top priority waiter on the lock */
513         if (rt_mutex_has_waiters(lock))
514                 top_waiter = rt_mutex_top_waiter(lock);
515         rt_mutex_enqueue(lock, waiter);
516
517         task->pi_blocked_on = waiter;
518
519         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
520
521         if (!owner)
522                 return 0;
523
524         if (waiter == rt_mutex_top_waiter(lock)) {
525                 raw_spin_lock_irqsave(&owner->pi_lock, flags);
526                 rt_mutex_dequeue_pi(owner, top_waiter);
527                 rt_mutex_enqueue_pi(owner, waiter);
528
529                 __rt_mutex_adjust_prio(owner);
530                 if (owner->pi_blocked_on)
531                         chain_walk = 1;
532                 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
533         }
534         else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
535                 chain_walk = 1;
536
537         if (!chain_walk)
538                 return 0;
539
540         /*
541          * The owner can't disappear while holding a lock,
542          * so the owner struct is protected by wait_lock.
543          * Gets dropped in rt_mutex_adjust_prio_chain()!
544          */
545         get_task_struct(owner);
546
547         raw_spin_unlock(&lock->wait_lock);
548
549         res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
550                                          task);
551
552         raw_spin_lock(&lock->wait_lock);
553
554         return res;
555 }
556
557 /*
558  * Wake up the next waiter on the lock.
559  *
560  * Remove the top waiter from the current tasks waiter list and wake it up.
561  *
562  * Called with lock->wait_lock held.
563  */
564 static void wakeup_next_waiter(struct rt_mutex *lock)
565 {
566         struct rt_mutex_waiter *waiter;
567         unsigned long flags;
568
569         raw_spin_lock_irqsave(&current->pi_lock, flags);
570
571         waiter = rt_mutex_top_waiter(lock);
572
573         /*
574          * Remove it from current->pi_waiters. We do not adjust a
575          * possible priority boost right now. We execute wakeup in the
576          * boosted mode and go back to normal after releasing
577          * lock->wait_lock.
578          */
579         rt_mutex_dequeue_pi(current, waiter);
580
581         rt_mutex_set_owner(lock, NULL);
582
583         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
584
585         wake_up_process(waiter->task);
586 }
587
588 /*
589  * Remove a waiter from a lock and give up
590  *
591  * Must be called with lock->wait_lock held and
592  * have just failed to try_to_take_rt_mutex().
593  */
594 static void remove_waiter(struct rt_mutex *lock,
595                           struct rt_mutex_waiter *waiter)
596 {
597         int first = (waiter == rt_mutex_top_waiter(lock));
598         struct task_struct *owner = rt_mutex_owner(lock);
599         unsigned long flags;
600         int chain_walk = 0;
601
602         raw_spin_lock_irqsave(&current->pi_lock, flags);
603         rt_mutex_dequeue(lock, waiter);
604         current->pi_blocked_on = NULL;
605         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
606
607         if (!owner)
608                 return;
609
610         if (first) {
611
612                 raw_spin_lock_irqsave(&owner->pi_lock, flags);
613
614                 rt_mutex_dequeue_pi(owner, waiter);
615
616                 if (rt_mutex_has_waiters(lock)) {
617                         struct rt_mutex_waiter *next;
618
619                         next = rt_mutex_top_waiter(lock);
620                         rt_mutex_enqueue_pi(owner, next);
621                 }
622                 __rt_mutex_adjust_prio(owner);
623
624                 if (owner->pi_blocked_on)
625                         chain_walk = 1;
626
627                 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
628         }
629
630         if (!chain_walk)
631                 return;
632
633         /* gets dropped in rt_mutex_adjust_prio_chain()! */
634         get_task_struct(owner);
635
636         raw_spin_unlock(&lock->wait_lock);
637
638         rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
639
640         raw_spin_lock(&lock->wait_lock);
641 }
642
643 /*
644  * Recheck the pi chain, in case we got a priority setting
645  *
646  * Called from sched_setscheduler
647  */
648 void rt_mutex_adjust_pi(struct task_struct *task)
649 {
650         struct rt_mutex_waiter *waiter;
651         unsigned long flags;
652
653         raw_spin_lock_irqsave(&task->pi_lock, flags);
654
655         waiter = task->pi_blocked_on;
656         if (!waiter || waiter->task->prio == task->prio) {
657                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
658                 return;
659         }
660
661         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
662
663         /* gets dropped in rt_mutex_adjust_prio_chain()! */
664         get_task_struct(task);
665         rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
666 }
667
668 /**
669  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
670  * @lock:                the rt_mutex to take
671  * @state:               the state the task should block in (TASK_INTERRUPTIBLE
672  *                       or TASK_UNINTERRUPTIBLE)
673  * @timeout:             the pre-initialized and started timer, or NULL for none
674  * @waiter:              the pre-initialized rt_mutex_waiter
675  *
676  * lock->wait_lock must be held by the caller.
677  */
678 static int __sched
679 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
680                     struct hrtimer_sleeper *timeout,
681                     struct rt_mutex_waiter *waiter)
682 {
683         int ret = 0;
684
685         for (;;) {
686                 /* Try to acquire the lock: */
687                 if (try_to_take_rt_mutex(lock, current, waiter))
688                         break;
689
690                 /*
691                  * TASK_INTERRUPTIBLE checks for signals and
692                  * timeout. Ignored otherwise.
693                  */
694                 if (unlikely(state == TASK_INTERRUPTIBLE)) {
695                         /* Signal pending? */
696                         if (signal_pending(current))
697                                 ret = -EINTR;
698                         if (timeout && !timeout->task)
699                                 ret = -ETIMEDOUT;
700                         if (ret)
701                                 break;
702                 }
703
704                 raw_spin_unlock(&lock->wait_lock);
705
706                 debug_rt_mutex_print_deadlock(waiter);
707
708                 schedule_rt_mutex(lock);
709
710                 raw_spin_lock(&lock->wait_lock);
711                 set_current_state(state);
712         }
713
714         return ret;
715 }
716
717 /*
718  * Slow path lock function:
719  */
720 static int __sched
721 rt_mutex_slowlock(struct rt_mutex *lock, int state,
722                   struct hrtimer_sleeper *timeout,
723                   int detect_deadlock)
724 {
725         struct rt_mutex_waiter waiter;
726         int ret = 0;
727
728         debug_rt_mutex_init_waiter(&waiter);
729         RB_CLEAR_NODE(&waiter.pi_tree_entry);
730         RB_CLEAR_NODE(&waiter.tree_entry);
731
732         raw_spin_lock(&lock->wait_lock);
733
734         /* Try to acquire the lock again: */
735         if (try_to_take_rt_mutex(lock, current, NULL)) {
736                 raw_spin_unlock(&lock->wait_lock);
737                 return 0;
738         }
739
740         set_current_state(state);
741
742         /* Setup the timer, when timeout != NULL */
743         if (unlikely(timeout)) {
744                 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
745                 if (!hrtimer_active(&timeout->timer))
746                         timeout->task = NULL;
747         }
748
749         ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
750
751         if (likely(!ret))
752                 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
753
754         set_current_state(TASK_RUNNING);
755
756         if (unlikely(ret))
757                 remove_waiter(lock, &waiter);
758
759         /*
760          * try_to_take_rt_mutex() sets the waiter bit
761          * unconditionally. We might have to fix that up.
762          */
763         fixup_rt_mutex_waiters(lock);
764
765         raw_spin_unlock(&lock->wait_lock);
766
767         /* Remove pending timer: */
768         if (unlikely(timeout))
769                 hrtimer_cancel(&timeout->timer);
770
771         debug_rt_mutex_free_waiter(&waiter);
772
773         return ret;
774 }
775
776 /*
777  * Slow path try-lock function:
778  */
779 static inline int
780 rt_mutex_slowtrylock(struct rt_mutex *lock)
781 {
782         int ret = 0;
783
784         raw_spin_lock(&lock->wait_lock);
785
786         if (likely(rt_mutex_owner(lock) != current)) {
787
788                 ret = try_to_take_rt_mutex(lock, current, NULL);
789                 /*
790                  * try_to_take_rt_mutex() sets the lock waiters
791                  * bit unconditionally. Clean this up.
792                  */
793                 fixup_rt_mutex_waiters(lock);
794         }
795
796         raw_spin_unlock(&lock->wait_lock);
797
798         return ret;
799 }
800
801 /*
802  * Slow path to release a rt-mutex:
803  */
804 static void __sched
805 rt_mutex_slowunlock(struct rt_mutex *lock)
806 {
807         raw_spin_lock(&lock->wait_lock);
808
809         debug_rt_mutex_unlock(lock);
810
811         rt_mutex_deadlock_account_unlock(current);
812
813         if (!rt_mutex_has_waiters(lock)) {
814                 lock->owner = NULL;
815                 raw_spin_unlock(&lock->wait_lock);
816                 return;
817         }
818
819         wakeup_next_waiter(lock);
820
821         raw_spin_unlock(&lock->wait_lock);
822
823         /* Undo pi boosting if necessary: */
824         rt_mutex_adjust_prio(current);
825 }
826
827 /*
828  * debug aware fast / slowpath lock,trylock,unlock
829  *
830  * The atomic acquire/release ops are compiled away, when either the
831  * architecture does not support cmpxchg or when debugging is enabled.
832  */
833 static inline int
834 rt_mutex_fastlock(struct rt_mutex *lock, int state,
835                   int detect_deadlock,
836                   int (*slowfn)(struct rt_mutex *lock, int state,
837                                 struct hrtimer_sleeper *timeout,
838                                 int detect_deadlock))
839 {
840         if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
841                 rt_mutex_deadlock_account_lock(lock, current);
842                 return 0;
843         } else
844                 return slowfn(lock, state, NULL, detect_deadlock);
845 }
846
847 static inline int
848 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
849                         struct hrtimer_sleeper *timeout, int detect_deadlock,
850                         int (*slowfn)(struct rt_mutex *lock, int state,
851                                       struct hrtimer_sleeper *timeout,
852                                       int detect_deadlock))
853 {
854         if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
855                 rt_mutex_deadlock_account_lock(lock, current);
856                 return 0;
857         } else
858                 return slowfn(lock, state, timeout, detect_deadlock);
859 }
860
861 static inline int
862 rt_mutex_fasttrylock(struct rt_mutex *lock,
863                      int (*slowfn)(struct rt_mutex *lock))
864 {
865         if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
866                 rt_mutex_deadlock_account_lock(lock, current);
867                 return 1;
868         }
869         return slowfn(lock);
870 }
871
872 static inline void
873 rt_mutex_fastunlock(struct rt_mutex *lock,
874                     void (*slowfn)(struct rt_mutex *lock))
875 {
876         if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
877                 rt_mutex_deadlock_account_unlock(current);
878         else
879                 slowfn(lock);
880 }
881
882 /**
883  * rt_mutex_lock - lock a rt_mutex
884  *
885  * @lock: the rt_mutex to be locked
886  */
887 void __sched rt_mutex_lock(struct rt_mutex *lock)
888 {
889         might_sleep();
890
891         rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
892 }
893 EXPORT_SYMBOL_GPL(rt_mutex_lock);
894
895 /**
896  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
897  *
898  * @lock:               the rt_mutex to be locked
899  * @detect_deadlock:    deadlock detection on/off
900  *
901  * Returns:
902  *  0           on success
903  * -EINTR       when interrupted by a signal
904  * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
905  */
906 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
907                                                  int detect_deadlock)
908 {
909         might_sleep();
910
911         return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
912                                  detect_deadlock, rt_mutex_slowlock);
913 }
914 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
915
916 /**
917  * rt_mutex_timed_lock - lock a rt_mutex interruptible
918  *                      the timeout structure is provided
919  *                      by the caller
920  *
921  * @lock:               the rt_mutex to be locked
922  * @timeout:            timeout structure or NULL (no timeout)
923  * @detect_deadlock:    deadlock detection on/off
924  *
925  * Returns:
926  *  0           on success
927  * -EINTR       when interrupted by a signal
928  * -ETIMEDOUT   when the timeout expired
929  * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
930  */
931 int
932 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
933                     int detect_deadlock)
934 {
935         might_sleep();
936
937         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
938                                        detect_deadlock, rt_mutex_slowlock);
939 }
940 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
941
942 /**
943  * rt_mutex_trylock - try to lock a rt_mutex
944  *
945  * @lock:       the rt_mutex to be locked
946  *
947  * Returns 1 on success and 0 on contention
948  */
949 int __sched rt_mutex_trylock(struct rt_mutex *lock)
950 {
951         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
952 }
953 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
954
955 /**
956  * rt_mutex_unlock - unlock a rt_mutex
957  *
958  * @lock: the rt_mutex to be unlocked
959  */
960 void __sched rt_mutex_unlock(struct rt_mutex *lock)
961 {
962         rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
963 }
964 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
965
966 /**
967  * rt_mutex_destroy - mark a mutex unusable
968  * @lock: the mutex to be destroyed
969  *
970  * This function marks the mutex uninitialized, and any subsequent
971  * use of the mutex is forbidden. The mutex must not be locked when
972  * this function is called.
973  */
974 void rt_mutex_destroy(struct rt_mutex *lock)
975 {
976         WARN_ON(rt_mutex_is_locked(lock));
977 #ifdef CONFIG_DEBUG_RT_MUTEXES
978         lock->magic = NULL;
979 #endif
980 }
981
982 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
983
984 /**
985  * __rt_mutex_init - initialize the rt lock
986  *
987  * @lock: the rt lock to be initialized
988  *
989  * Initialize the rt lock to unlocked state.
990  *
991  * Initializing of a locked rt lock is not allowed
992  */
993 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
994 {
995         lock->owner = NULL;
996         raw_spin_lock_init(&lock->wait_lock);
997         lock->waiters = RB_ROOT;
998         lock->waiters_leftmost = NULL;
999
1000         debug_rt_mutex_init(lock, name);
1001 }
1002 EXPORT_SYMBOL_GPL(__rt_mutex_init);
1003
1004 /**
1005  * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
1006  *                              proxy owner
1007  *
1008  * @lock:       the rt_mutex to be locked
1009  * @proxy_owner:the task to set as owner
1010  *
1011  * No locking. Caller has to do serializing itself
1012  * Special API call for PI-futex support
1013  */
1014 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
1015                                 struct task_struct *proxy_owner)
1016 {
1017         __rt_mutex_init(lock, NULL);
1018         debug_rt_mutex_proxy_lock(lock, proxy_owner);
1019         rt_mutex_set_owner(lock, proxy_owner);
1020         rt_mutex_deadlock_account_lock(lock, proxy_owner);
1021 }
1022
1023 /**
1024  * rt_mutex_proxy_unlock - release a lock on behalf of owner
1025  *
1026  * @lock:       the rt_mutex to be locked
1027  *
1028  * No locking. Caller has to do serializing itself
1029  * Special API call for PI-futex support
1030  */
1031 void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1032                            struct task_struct *proxy_owner)
1033 {
1034         debug_rt_mutex_proxy_unlock(lock);
1035         rt_mutex_set_owner(lock, NULL);
1036         rt_mutex_deadlock_account_unlock(proxy_owner);
1037 }
1038
1039 /**
1040  * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1041  * @lock:               the rt_mutex to take
1042  * @waiter:             the pre-initialized rt_mutex_waiter
1043  * @task:               the task to prepare
1044  * @detect_deadlock:    perform deadlock detection (1) or not (0)
1045  *
1046  * Returns:
1047  *  0 - task blocked on lock
1048  *  1 - acquired the lock for task, caller should wake it up
1049  * <0 - error
1050  *
1051  * Special API call for FUTEX_REQUEUE_PI support.
1052  */
1053 int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1054                               struct rt_mutex_waiter *waiter,
1055                               struct task_struct *task, int detect_deadlock)
1056 {
1057         int ret;
1058
1059         raw_spin_lock(&lock->wait_lock);
1060
1061         if (try_to_take_rt_mutex(lock, task, NULL)) {
1062                 raw_spin_unlock(&lock->wait_lock);
1063                 return 1;
1064         }
1065
1066         ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
1067
1068         if (ret && !rt_mutex_owner(lock)) {
1069                 /*
1070                  * Reset the return value. We might have
1071                  * returned with -EDEADLK and the owner
1072                  * released the lock while we were walking the
1073                  * pi chain.  Let the waiter sort it out.
1074                  */
1075                 ret = 0;
1076         }
1077
1078         if (unlikely(ret))
1079                 remove_waiter(lock, waiter);
1080
1081         raw_spin_unlock(&lock->wait_lock);
1082
1083         debug_rt_mutex_print_deadlock(waiter);
1084
1085         return ret;
1086 }
1087
1088 /**
1089  * rt_mutex_next_owner - return the next owner of the lock
1090  *
1091  * @lock: the rt lock query
1092  *
1093  * Returns the next owner of the lock or NULL
1094  *
1095  * Caller has to serialize against other accessors to the lock
1096  * itself.
1097  *
1098  * Special API call for PI-futex support
1099  */
1100 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1101 {
1102         if (!rt_mutex_has_waiters(lock))
1103                 return NULL;
1104
1105         return rt_mutex_top_waiter(lock)->task;
1106 }
1107
1108 /**
1109  * rt_mutex_finish_proxy_lock() - Complete lock acquisition
1110  * @lock:               the rt_mutex we were woken on
1111  * @to:                 the timeout, null if none. hrtimer should already have
1112  *                      been started.
1113  * @waiter:             the pre-initialized rt_mutex_waiter
1114  * @detect_deadlock:    perform deadlock detection (1) or not (0)
1115  *
1116  * Complete the lock acquisition started our behalf by another thread.
1117  *
1118  * Returns:
1119  *  0 - success
1120  * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
1121  *
1122  * Special API call for PI-futex requeue support
1123  */
1124 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1125                                struct hrtimer_sleeper *to,
1126                                struct rt_mutex_waiter *waiter,
1127                                int detect_deadlock)
1128 {
1129         int ret;
1130
1131         raw_spin_lock(&lock->wait_lock);
1132
1133         set_current_state(TASK_INTERRUPTIBLE);
1134
1135         ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1136
1137         set_current_state(TASK_RUNNING);
1138
1139         if (unlikely(ret))
1140                 remove_waiter(lock, waiter);
1141
1142         /*
1143          * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1144          * have to fix that up.
1145          */
1146         fixup_rt_mutex_waiters(lock);
1147
1148         raw_spin_unlock(&lock->wait_lock);
1149
1150         return ret;
1151 }