Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * RT-Mutexes: simple blocking mutual exclusion locks with PI support
4 : *
5 : * started by Ingo Molnar and Thomas Gleixner.
6 : *
7 : * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
8 : * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
9 : * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
10 : * Copyright (C) 2006 Esben Nielsen
11 : * Adaptive Spinlocks:
12 : * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
13 : * and Peter Morreale,
14 : * Adaptive Spinlocks simplification:
15 : * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
16 : *
17 : * See Documentation/locking/rt-mutex-design.rst for details.
18 : */
19 : #include <linux/sched.h>
20 : #include <linux/sched/debug.h>
21 : #include <linux/sched/deadline.h>
22 : #include <linux/sched/signal.h>
23 : #include <linux/sched/rt.h>
24 : #include <linux/sched/wake_q.h>
25 : #include <linux/ww_mutex.h>
26 :
27 : #include "rtmutex_common.h"
28 :
29 : #ifndef WW_RT
30 : # define build_ww_mutex() (false)
31 : # define ww_container_of(rtm) NULL
32 :
33 : static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
34 : struct rt_mutex *lock,
35 : struct ww_acquire_ctx *ww_ctx)
36 : {
37 : return 0;
38 : }
39 :
40 : static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
41 : struct ww_acquire_ctx *ww_ctx)
42 : {
43 : }
44 :
45 : static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
46 : struct ww_acquire_ctx *ww_ctx)
47 : {
48 : }
49 :
50 : static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
51 : struct rt_mutex_waiter *waiter,
52 : struct ww_acquire_ctx *ww_ctx)
53 : {
54 : return 0;
55 : }
56 :
57 : #else
58 : # define build_ww_mutex() (true)
59 : # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base)
60 : # include "ww_mutex.h"
61 : #endif
62 :
63 : /*
64 : * lock->owner state tracking:
65 : *
66 : * lock->owner holds the task_struct pointer of the owner. Bit 0
67 : * is used to keep track of the "lock has waiters" state.
68 : *
69 : * owner bit0
70 : * NULL 0 lock is free (fast acquire possible)
71 : * NULL 1 lock is free and has waiters and the top waiter
72 : * is going to take the lock*
73 : * taskpointer 0 lock is held (fast release possible)
74 : * taskpointer 1 lock is held and has waiters**
75 : *
76 : * The fast atomic compare exchange based acquire and release is only
77 : * possible when bit 0 of lock->owner is 0.
78 : *
79 : * (*) It also can be a transitional state when grabbing the lock
80 : * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
81 : * we need to set the bit0 before looking at the lock, and the owner may be
82 : * NULL in this small time, hence this can be a transitional state.
83 : *
84 : * (**) There is a small time when bit 0 is set but there are no
85 : * waiters. This can happen when grabbing the lock in the slow path.
86 : * To prevent a cmpxchg of the owner releasing the lock, we need to
87 : * set this bit before looking at the lock.
88 : */
89 :
90 : static __always_inline void
91 : rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
92 : {
93 0 : unsigned long val = (unsigned long)owner;
94 :
95 : if (rt_mutex_has_waiters(lock))
96 0 : val |= RT_MUTEX_HAS_WAITERS;
97 :
98 0 : WRITE_ONCE(lock->owner, (struct task_struct *)val);
99 : }
100 :
101 : static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
102 : {
103 0 : lock->owner = (struct task_struct *)
104 0 : ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
105 : }
106 :
107 : static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
108 : {
109 0 : unsigned long owner, *p = (unsigned long *) &lock->owner;
110 :
111 0 : if (rt_mutex_has_waiters(lock))
112 : return;
113 :
114 : /*
115 : * The rbtree has no waiters enqueued, now make sure that the
116 : * lock->owner still has the waiters bit set, otherwise the
117 : * following can happen:
118 : *
119 : * CPU 0 CPU 1 CPU2
120 : * l->owner=T1
121 : * rt_mutex_lock(l)
122 : * lock(l->lock)
123 : * l->owner = T1 | HAS_WAITERS;
124 : * enqueue(T2)
125 : * boost()
126 : * unlock(l->lock)
127 : * block()
128 : *
129 : * rt_mutex_lock(l)
130 : * lock(l->lock)
131 : * l->owner = T1 | HAS_WAITERS;
132 : * enqueue(T3)
133 : * boost()
134 : * unlock(l->lock)
135 : * block()
136 : * signal(->T2) signal(->T3)
137 : * lock(l->lock)
138 : * dequeue(T2)
139 : * deboost()
140 : * unlock(l->lock)
141 : * lock(l->lock)
142 : * dequeue(T3)
143 : * ==> wait list is empty
144 : * deboost()
145 : * unlock(l->lock)
146 : * lock(l->lock)
147 : * fixup_rt_mutex_waiters()
148 : * if (wait_list_empty(l) {
149 : * l->owner = owner
150 : * owner = l->owner & ~HAS_WAITERS;
151 : * ==> l->owner = T1
152 : * }
153 : * lock(l->lock)
154 : * rt_mutex_unlock(l) fixup_rt_mutex_waiters()
155 : * if (wait_list_empty(l) {
156 : * owner = l->owner & ~HAS_WAITERS;
157 : * cmpxchg(l->owner, T1, NULL)
158 : * ===> Success (l->owner = NULL)
159 : *
160 : * l->owner = owner
161 : * ==> l->owner = T1
162 : * }
163 : *
164 : * With the check for the waiter bit in place T3 on CPU2 will not
165 : * overwrite. All tasks fiddling with the waiters bit are
166 : * serialized by l->lock, so nothing else can modify the waiters
167 : * bit. If the bit is set then nothing can change l->owner either
168 : * so the simple RMW is safe. The cmpxchg() will simply fail if it
169 : * happens in the middle of the RMW because the waiters bit is
170 : * still set.
171 : */
172 0 : owner = READ_ONCE(*p);
173 0 : if (owner & RT_MUTEX_HAS_WAITERS)
174 0 : WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
175 : }
176 :
177 : /*
178 : * We can speed up the acquire/release, if there's no debugging state to be
179 : * set up.
180 : */
181 : #ifndef CONFIG_DEBUG_RT_MUTEXES
182 : static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
183 : struct task_struct *old,
184 : struct task_struct *new)
185 : {
186 0 : return try_cmpxchg_acquire(&lock->owner, &old, new);
187 : }
188 :
189 : static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
190 : struct task_struct *old,
191 : struct task_struct *new)
192 : {
193 0 : return try_cmpxchg_release(&lock->owner, &old, new);
194 : }
195 :
196 : /*
197 : * Callers must hold the ->wait_lock -- which is the whole purpose as we force
198 : * all future threads that attempt to [Rmw] the lock to the slowpath. As such
199 : * relaxed semantics suffice.
200 : */
201 : static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
202 : {
203 0 : unsigned long owner, *p = (unsigned long *) &lock->owner;
204 :
205 : do {
206 0 : owner = *p;
207 0 : } while (cmpxchg_relaxed(p, owner,
208 : owner | RT_MUTEX_HAS_WAITERS) != owner);
209 : }
210 :
211 : /*
212 : * Safe fastpath aware unlock:
213 : * 1) Clear the waiters bit
214 : * 2) Drop lock->wait_lock
215 : * 3) Try to unlock the lock with cmpxchg
216 : */
217 : static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
218 : unsigned long flags)
219 : __releases(lock->wait_lock)
220 : {
221 0 : struct task_struct *owner = rt_mutex_owner(lock);
222 :
223 0 : clear_rt_mutex_waiters(lock);
224 0 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
225 : /*
226 : * If a new waiter comes in between the unlock and the cmpxchg
227 : * we have two situations:
228 : *
229 : * unlock(wait_lock);
230 : * lock(wait_lock);
231 : * cmpxchg(p, owner, 0) == owner
232 : * mark_rt_mutex_waiters(lock);
233 : * acquire(lock);
234 : * or:
235 : *
236 : * unlock(wait_lock);
237 : * lock(wait_lock);
238 : * mark_rt_mutex_waiters(lock);
239 : *
240 : * cmpxchg(p, owner, 0) != owner
241 : * enqueue_waiter();
242 : * unlock(wait_lock);
243 : * lock(wait_lock);
244 : * wake waiter();
245 : * unlock(wait_lock);
246 : * lock(wait_lock);
247 : * acquire(lock);
248 : */
249 0 : return rt_mutex_cmpxchg_release(lock, owner, NULL);
250 : }
251 :
252 : #else
253 : static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
254 : struct task_struct *old,
255 : struct task_struct *new)
256 : {
257 : return false;
258 :
259 : }
260 :
261 : static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
262 : struct task_struct *old,
263 : struct task_struct *new)
264 : {
265 : return false;
266 : }
267 :
268 : static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
269 : {
270 : lock->owner = (struct task_struct *)
271 : ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
272 : }
273 :
274 : /*
275 : * Simple slow path only version: lock->owner is protected by lock->wait_lock.
276 : */
277 : static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
278 : unsigned long flags)
279 : __releases(lock->wait_lock)
280 : {
281 : lock->owner = NULL;
282 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
283 : return true;
284 : }
285 : #endif
286 :
287 : static __always_inline int __waiter_prio(struct task_struct *task)
288 : {
289 0 : int prio = task->prio;
290 :
291 0 : if (!rt_prio(prio))
292 : return DEFAULT_PRIO;
293 :
294 : return prio;
295 : }
296 :
297 : static __always_inline void
298 : waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
299 : {
300 0 : waiter->prio = __waiter_prio(task);
301 0 : waiter->deadline = task->dl.deadline;
302 : }
303 :
304 : /*
305 : * Only use with rt_mutex_waiter_{less,equal}()
306 : */
307 : #define task_to_waiter(p) \
308 : &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
309 :
310 : static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
311 : struct rt_mutex_waiter *right)
312 : {
313 0 : if (left->prio < right->prio)
314 : return 1;
315 :
316 : /*
317 : * If both waiters have dl_prio(), we check the deadlines of the
318 : * associated tasks.
319 : * If left waiter has a dl_prio(), and we didn't return 1 above,
320 : * then right waiter has a dl_prio() too.
321 : */
322 0 : if (dl_prio(left->prio))
323 0 : return dl_time_before(left->deadline, right->deadline);
324 :
325 : return 0;
326 : }
327 :
328 : static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
329 : struct rt_mutex_waiter *right)
330 : {
331 0 : if (left->prio != right->prio)
332 : return 0;
333 :
334 : /*
335 : * If both waiters have dl_prio(), we check the deadlines of the
336 : * associated tasks.
337 : * If left waiter has a dl_prio(), and we didn't return 0 above,
338 : * then right waiter has a dl_prio() too.
339 : */
340 0 : if (dl_prio(left->prio))
341 0 : return left->deadline == right->deadline;
342 :
343 : return 1;
344 : }
345 :
346 : static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
347 : struct rt_mutex_waiter *top_waiter)
348 : {
349 0 : if (rt_mutex_waiter_less(waiter, top_waiter))
350 : return true;
351 :
352 : #ifdef RT_MUTEX_BUILD_SPINLOCKS
353 : /*
354 : * Note that RT tasks are excluded from same priority (lateral)
355 : * steals to prevent the introduction of an unbounded latency.
356 : */
357 : if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
358 : return false;
359 :
360 : return rt_mutex_waiter_equal(waiter, top_waiter);
361 : #else
362 : return false;
363 : #endif
364 : }
365 :
366 : #define __node_2_waiter(node) \
367 : rb_entry((node), struct rt_mutex_waiter, tree_entry)
368 :
369 : static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
370 : {
371 0 : struct rt_mutex_waiter *aw = __node_2_waiter(a);
372 0 : struct rt_mutex_waiter *bw = __node_2_waiter(b);
373 :
374 0 : if (rt_mutex_waiter_less(aw, bw))
375 : return 1;
376 :
377 : if (!build_ww_mutex())
378 : return 0;
379 :
380 : if (rt_mutex_waiter_less(bw, aw))
381 : return 0;
382 :
383 : /* NOTE: relies on waiter->ww_ctx being set before insertion */
384 : if (aw->ww_ctx) {
385 : if (!bw->ww_ctx)
386 : return 1;
387 :
388 : return (signed long)(aw->ww_ctx->stamp -
389 : bw->ww_ctx->stamp) < 0;
390 : }
391 :
392 : return 0;
393 : }
394 :
395 : static __always_inline void
396 : rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
397 : {
398 0 : rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
399 : }
400 :
401 : static __always_inline void
402 : rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
403 : {
404 0 : if (RB_EMPTY_NODE(&waiter->tree_entry))
405 : return;
406 :
407 0 : rb_erase_cached(&waiter->tree_entry, &lock->waiters);
408 0 : RB_CLEAR_NODE(&waiter->tree_entry);
409 : }
410 :
411 : #define __node_2_pi_waiter(node) \
412 : rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
413 :
414 : static __always_inline bool
415 : __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
416 : {
417 0 : return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
418 : }
419 :
420 : static __always_inline void
421 : rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
422 : {
423 0 : rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
424 : }
425 :
426 : static __always_inline void
427 : rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
428 : {
429 0 : if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
430 : return;
431 :
432 0 : rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
433 0 : RB_CLEAR_NODE(&waiter->pi_tree_entry);
434 : }
435 :
436 : static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
437 : {
438 0 : struct task_struct *pi_task = NULL;
439 :
440 : lockdep_assert_held(&p->pi_lock);
441 :
442 0 : if (task_has_pi_waiters(p))
443 0 : pi_task = task_top_pi_waiter(p)->task;
444 :
445 0 : rt_mutex_setprio(p, pi_task);
446 : }
447 :
448 : /* RT mutex specific wake_q wrappers */
449 : static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh,
450 : struct task_struct *task,
451 : unsigned int wake_state)
452 : {
453 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) {
454 : if (IS_ENABLED(CONFIG_PROVE_LOCKING))
455 : WARN_ON_ONCE(wqh->rtlock_task);
456 : get_task_struct(task);
457 : wqh->rtlock_task = task;
458 : } else {
459 0 : wake_q_add(&wqh->head, task);
460 : }
461 : }
462 :
463 : static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
464 : struct rt_mutex_waiter *w)
465 : {
466 0 : rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state);
467 : }
468 :
469 : static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
470 : {
471 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
472 : wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);
473 : put_task_struct(wqh->rtlock_task);
474 : wqh->rtlock_task = NULL;
475 : }
476 :
477 0 : if (!wake_q_empty(&wqh->head))
478 0 : wake_up_q(&wqh->head);
479 :
480 : /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
481 0 : preempt_enable();
482 : }
483 :
484 : /*
485 : * Deadlock detection is conditional:
486 : *
487 : * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
488 : * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
489 : *
490 : * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
491 : * conducted independent of the detect argument.
492 : *
493 : * If the waiter argument is NULL this indicates the deboost path and
494 : * deadlock detection is disabled independent of the detect argument
495 : * and the config settings.
496 : */
497 : static __always_inline bool
498 : rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
499 : enum rtmutex_chainwalk chwalk)
500 : {
501 : if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
502 : return waiter != NULL;
503 : return chwalk == RT_MUTEX_FULL_CHAINWALK;
504 : }
505 :
506 : static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)
507 : {
508 0 : return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
509 : }
510 :
511 : /*
512 : * Adjust the priority chain. Also used for deadlock detection.
513 : * Decreases task's usage by one - may thus free the task.
514 : *
515 : * @task: the task owning the mutex (owner) for which a chain walk is
516 : * probably needed
517 : * @chwalk: do we have to carry out deadlock detection?
518 : * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
519 : * things for a task that has just got its priority adjusted, and
520 : * is waiting on a mutex)
521 : * @next_lock: the mutex on which the owner of @orig_lock was blocked before
522 : * we dropped its pi_lock. Is never dereferenced, only used for
523 : * comparison to detect lock chain changes.
524 : * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
525 : * its priority to the mutex owner (can be NULL in the case
526 : * depicted above or if the top waiter is gone away and we are
527 : * actually deboosting the owner)
528 : * @top_task: the current top waiter
529 : *
530 : * Returns 0 or -EDEADLK.
531 : *
532 : * Chain walk basics and protection scope
533 : *
534 : * [R] refcount on task
535 : * [P] task->pi_lock held
536 : * [L] rtmutex->wait_lock held
537 : *
538 : * Step Description Protected by
539 : * function arguments:
540 : * @task [R]
541 : * @orig_lock if != NULL @top_task is blocked on it
542 : * @next_lock Unprotected. Cannot be
543 : * dereferenced. Only used for
544 : * comparison.
545 : * @orig_waiter if != NULL @top_task is blocked on it
546 : * @top_task current, or in case of proxy
547 : * locking protected by calling
548 : * code
549 : * again:
550 : * loop_sanity_check();
551 : * retry:
552 : * [1] lock(task->pi_lock); [R] acquire [P]
553 : * [2] waiter = task->pi_blocked_on; [P]
554 : * [3] check_exit_conditions_1(); [P]
555 : * [4] lock = waiter->lock; [P]
556 : * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
557 : * unlock(task->pi_lock); release [P]
558 : * goto retry;
559 : * }
560 : * [6] check_exit_conditions_2(); [P] + [L]
561 : * [7] requeue_lock_waiter(lock, waiter); [P] + [L]
562 : * [8] unlock(task->pi_lock); release [P]
563 : * put_task_struct(task); release [R]
564 : * [9] check_exit_conditions_3(); [L]
565 : * [10] task = owner(lock); [L]
566 : * get_task_struct(task); [L] acquire [R]
567 : * lock(task->pi_lock); [L] acquire [P]
568 : * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
569 : * [12] check_exit_conditions_4(); [P] + [L]
570 : * [13] unlock(task->pi_lock); release [P]
571 : * unlock(lock->wait_lock); release [L]
572 : * goto again;
573 : */
574 0 : static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
575 : enum rtmutex_chainwalk chwalk,
576 : struct rt_mutex_base *orig_lock,
577 : struct rt_mutex_base *next_lock,
578 : struct rt_mutex_waiter *orig_waiter,
579 : struct task_struct *top_task)
580 : {
581 0 : struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
582 : struct rt_mutex_waiter *prerequeue_top_waiter;
583 0 : int ret = 0, depth = 0;
584 : struct rt_mutex_base *lock;
585 : bool detect_deadlock;
586 0 : bool requeue = true;
587 :
588 0 : detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
589 :
590 : /*
591 : * The (de)boosting is a step by step approach with a lot of
592 : * pitfalls. We want this to be preemptible and we want hold a
593 : * maximum of two locks per step. So we have to check
594 : * carefully whether things change under us.
595 : */
596 : again:
597 : /*
598 : * We limit the lock chain length for each invocation.
599 : */
600 0 : if (++depth > max_lock_depth) {
601 : static int prev_max;
602 :
603 : /*
604 : * Print this only once. If the admin changes the limit,
605 : * print a new message when reaching the limit again.
606 : */
607 0 : if (prev_max != max_lock_depth) {
608 0 : prev_max = max_lock_depth;
609 0 : printk(KERN_WARNING "Maximum lock depth %d reached "
610 : "task: %s (%d)\n", max_lock_depth,
611 : top_task->comm, task_pid_nr(top_task));
612 : }
613 0 : put_task_struct(task);
614 :
615 0 : return -EDEADLK;
616 : }
617 :
618 : /*
619 : * We are fully preemptible here and only hold the refcount on
620 : * @task. So everything can have changed under us since the
621 : * caller or our own code below (goto retry/again) dropped all
622 : * locks.
623 : */
624 : retry:
625 : /*
626 : * [1] Task cannot go away as we did a get_task() before !
627 : */
628 0 : raw_spin_lock_irq(&task->pi_lock);
629 :
630 : /*
631 : * [2] Get the waiter on which @task is blocked on.
632 : */
633 0 : waiter = task->pi_blocked_on;
634 :
635 : /*
636 : * [3] check_exit_conditions_1() protected by task->pi_lock.
637 : */
638 :
639 : /*
640 : * Check whether the end of the boosting chain has been
641 : * reached or the state of the chain has changed while we
642 : * dropped the locks.
643 : */
644 0 : if (!waiter)
645 : goto out_unlock_pi;
646 :
647 : /*
648 : * Check the orig_waiter state. After we dropped the locks,
649 : * the previous owner of the lock might have released the lock.
650 : */
651 0 : if (orig_waiter && !rt_mutex_owner(orig_lock))
652 : goto out_unlock_pi;
653 :
654 : /*
655 : * We dropped all locks after taking a refcount on @task, so
656 : * the task might have moved on in the lock chain or even left
657 : * the chain completely and blocks now on an unrelated lock or
658 : * on @orig_lock.
659 : *
660 : * We stored the lock on which @task was blocked in @next_lock,
661 : * so we can detect the chain change.
662 : */
663 0 : if (next_lock != waiter->lock)
664 : goto out_unlock_pi;
665 :
666 : /*
667 : * There could be 'spurious' loops in the lock graph due to ww_mutex,
668 : * consider:
669 : *
670 : * P1: A, ww_A, ww_B
671 : * P2: ww_B, ww_A
672 : * P3: A
673 : *
674 : * P3 should not return -EDEADLK because it gets trapped in the cycle
675 : * created by P1 and P2 (which will resolve -- and runs into
676 : * max_lock_depth above). Therefore disable detect_deadlock such that
677 : * the below termination condition can trigger once all relevant tasks
678 : * are boosted.
679 : *
680 : * Even when we start with ww_mutex we can disable deadlock detection,
681 : * since we would supress a ww_mutex induced deadlock at [6] anyway.
682 : * Supressing it here however is not sufficient since we might still
683 : * hit [6] due to adjustment driven iteration.
684 : *
685 : * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
686 : * utterly fail to report it; lockdep should.
687 : */
688 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
689 : detect_deadlock = false;
690 :
691 : /*
692 : * Drop out, when the task has no waiters. Note,
693 : * top_waiter can be NULL, when we are in the deboosting
694 : * mode!
695 : */
696 0 : if (top_waiter) {
697 0 : if (!task_has_pi_waiters(task))
698 : goto out_unlock_pi;
699 : /*
700 : * If deadlock detection is off, we stop here if we
701 : * are not the top pi waiter of the task. If deadlock
702 : * detection is enabled we continue, but stop the
703 : * requeueing in the chain walk.
704 : */
705 0 : if (top_waiter != task_top_pi_waiter(task)) {
706 0 : if (!detect_deadlock)
707 : goto out_unlock_pi;
708 : else
709 : requeue = false;
710 : }
711 : }
712 :
713 : /*
714 : * If the waiter priority is the same as the task priority
715 : * then there is no further priority adjustment necessary. If
716 : * deadlock detection is off, we stop the chain walk. If its
717 : * enabled we continue, but stop the requeueing in the chain
718 : * walk.
719 : */
720 0 : if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
721 0 : if (!detect_deadlock)
722 : goto out_unlock_pi;
723 : else
724 : requeue = false;
725 : }
726 :
727 : /*
728 : * [4] Get the next lock
729 : */
730 0 : lock = waiter->lock;
731 : /*
732 : * [5] We need to trylock here as we are holding task->pi_lock,
733 : * which is the reverse lock order versus the other rtmutex
734 : * operations.
735 : */
736 0 : if (!raw_spin_trylock(&lock->wait_lock)) {
737 : raw_spin_unlock_irq(&task->pi_lock);
738 : cpu_relax();
739 : goto retry;
740 : }
741 :
742 : /*
743 : * [6] check_exit_conditions_2() protected by task->pi_lock and
744 : * lock->wait_lock.
745 : *
746 : * Deadlock detection. If the lock is the same as the original
747 : * lock which caused us to walk the lock chain or if the
748 : * current lock is owned by the task which initiated the chain
749 : * walk, we detected a deadlock.
750 : */
751 0 : if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
752 0 : ret = -EDEADLK;
753 :
754 : /*
755 : * When the deadlock is due to ww_mutex; also see above. Don't
756 : * report the deadlock and instead let the ww_mutex wound/die
757 : * logic pick which of the contending threads gets -EDEADLK.
758 : *
759 : * NOTE: assumes the cycle only contains a single ww_class; any
760 : * other configuration and we fail to report; also, see
761 : * lockdep.
762 : */
763 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
764 : ret = 0;
765 :
766 0 : raw_spin_unlock(&lock->wait_lock);
767 0 : goto out_unlock_pi;
768 : }
769 :
770 : /*
771 : * If we just follow the lock chain for deadlock detection, no
772 : * need to do all the requeue operations. To avoid a truckload
773 : * of conditionals around the various places below, just do the
774 : * minimum chain walk checks.
775 : */
776 0 : if (!requeue) {
777 : /*
778 : * No requeue[7] here. Just release @task [8]
779 : */
780 0 : raw_spin_unlock(&task->pi_lock);
781 0 : put_task_struct(task);
782 :
783 : /*
784 : * [9] check_exit_conditions_3 protected by lock->wait_lock.
785 : * If there is no owner of the lock, end of chain.
786 : */
787 0 : if (!rt_mutex_owner(lock)) {
788 0 : raw_spin_unlock_irq(&lock->wait_lock);
789 0 : return 0;
790 : }
791 :
792 : /* [10] Grab the next task, i.e. owner of @lock */
793 0 : task = get_task_struct(rt_mutex_owner(lock));
794 0 : raw_spin_lock(&task->pi_lock);
795 :
796 : /*
797 : * No requeue [11] here. We just do deadlock detection.
798 : *
799 : * [12] Store whether owner is blocked
800 : * itself. Decision is made after dropping the locks
801 : */
802 0 : next_lock = task_blocked_on_lock(task);
803 : /*
804 : * Get the top waiter for the next iteration
805 : */
806 0 : top_waiter = rt_mutex_top_waiter(lock);
807 :
808 : /* [13] Drop locks */
809 0 : raw_spin_unlock(&task->pi_lock);
810 0 : raw_spin_unlock_irq(&lock->wait_lock);
811 :
812 : /* If owner is not blocked, end of chain. */
813 0 : if (!next_lock)
814 : goto out_put_task;
815 : goto again;
816 : }
817 :
818 : /*
819 : * Store the current top waiter before doing the requeue
820 : * operation on @lock. We need it for the boost/deboost
821 : * decision below.
822 : */
823 0 : prerequeue_top_waiter = rt_mutex_top_waiter(lock);
824 :
825 : /* [7] Requeue the waiter in the lock waiter tree. */
826 0 : rt_mutex_dequeue(lock, waiter);
827 :
828 : /*
829 : * Update the waiter prio fields now that we're dequeued.
830 : *
831 : * These values can have changed through either:
832 : *
833 : * sys_sched_set_scheduler() / sys_sched_setattr()
834 : *
835 : * or
836 : *
837 : * DL CBS enforcement advancing the effective deadline.
838 : *
839 : * Even though pi_waiters also uses these fields, and that tree is only
840 : * updated in [11], we can do this here, since we hold [L], which
841 : * serializes all pi_waiters access and rb_erase() does not care about
842 : * the values of the node being removed.
843 : */
844 0 : waiter_update_prio(waiter, task);
845 :
846 0 : rt_mutex_enqueue(lock, waiter);
847 :
848 : /* [8] Release the task */
849 0 : raw_spin_unlock(&task->pi_lock);
850 0 : put_task_struct(task);
851 :
852 : /*
853 : * [9] check_exit_conditions_3 protected by lock->wait_lock.
854 : *
855 : * We must abort the chain walk if there is no lock owner even
856 : * in the dead lock detection case, as we have nothing to
857 : * follow here. This is the end of the chain we are walking.
858 : */
859 0 : if (!rt_mutex_owner(lock)) {
860 : /*
861 : * If the requeue [7] above changed the top waiter,
862 : * then we need to wake the new top waiter up to try
863 : * to get the lock.
864 : */
865 0 : if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
866 0 : wake_up_state(waiter->task, waiter->wake_state);
867 0 : raw_spin_unlock_irq(&lock->wait_lock);
868 0 : return 0;
869 : }
870 :
871 : /* [10] Grab the next task, i.e. the owner of @lock */
872 0 : task = get_task_struct(rt_mutex_owner(lock));
873 0 : raw_spin_lock(&task->pi_lock);
874 :
875 : /* [11] requeue the pi waiters if necessary */
876 0 : if (waiter == rt_mutex_top_waiter(lock)) {
877 : /*
878 : * The waiter became the new top (highest priority)
879 : * waiter on the lock. Replace the previous top waiter
880 : * in the owner tasks pi waiters tree with this waiter
881 : * and adjust the priority of the owner.
882 : */
883 0 : rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
884 0 : rt_mutex_enqueue_pi(task, waiter);
885 : rt_mutex_adjust_prio(task);
886 :
887 0 : } else if (prerequeue_top_waiter == waiter) {
888 : /*
889 : * The waiter was the top waiter on the lock, but is
890 : * no longer the top priority waiter. Replace waiter in
891 : * the owner tasks pi waiters tree with the new top
892 : * (highest priority) waiter and adjust the priority
893 : * of the owner.
894 : * The new top waiter is stored in @waiter so that
895 : * @waiter == @top_waiter evaluates to true below and
896 : * we continue to deboost the rest of the chain.
897 : */
898 0 : rt_mutex_dequeue_pi(task, waiter);
899 0 : waiter = rt_mutex_top_waiter(lock);
900 0 : rt_mutex_enqueue_pi(task, waiter);
901 : rt_mutex_adjust_prio(task);
902 : } else {
903 : /*
904 : * Nothing changed. No need to do any priority
905 : * adjustment.
906 : */
907 : }
908 :
909 : /*
910 : * [12] check_exit_conditions_4() protected by task->pi_lock
911 : * and lock->wait_lock. The actual decisions are made after we
912 : * dropped the locks.
913 : *
914 : * Check whether the task which owns the current lock is pi
915 : * blocked itself. If yes we store a pointer to the lock for
916 : * the lock chain change detection above. After we dropped
917 : * task->pi_lock next_lock cannot be dereferenced anymore.
918 : */
919 0 : next_lock = task_blocked_on_lock(task);
920 : /*
921 : * Store the top waiter of @lock for the end of chain walk
922 : * decision below.
923 : */
924 0 : top_waiter = rt_mutex_top_waiter(lock);
925 :
926 : /* [13] Drop the locks */
927 0 : raw_spin_unlock(&task->pi_lock);
928 0 : raw_spin_unlock_irq(&lock->wait_lock);
929 :
930 : /*
931 : * Make the actual exit decisions [12], based on the stored
932 : * values.
933 : *
934 : * We reached the end of the lock chain. Stop right here. No
935 : * point to go back just to figure that out.
936 : */
937 0 : if (!next_lock)
938 : goto out_put_task;
939 :
940 : /*
941 : * If the current waiter is not the top waiter on the lock,
942 : * then we can stop the chain walk here if we are not in full
943 : * deadlock detection mode.
944 : */
945 0 : if (!detect_deadlock && waiter != top_waiter)
946 : goto out_put_task;
947 :
948 : goto again;
949 :
950 : out_unlock_pi:
951 0 : raw_spin_unlock_irq(&task->pi_lock);
952 : out_put_task:
953 0 : put_task_struct(task);
954 :
955 0 : return ret;
956 : }
957 :
958 : /*
959 : * Try to take an rt-mutex
960 : *
961 : * Must be called with lock->wait_lock held and interrupts disabled
962 : *
963 : * @lock: The lock to be acquired.
964 : * @task: The task which wants to acquire the lock
965 : * @waiter: The waiter that is queued to the lock's wait tree if the
966 : * callsite called task_blocked_on_lock(), otherwise NULL
967 : */
968 : static int __sched
969 0 : try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
970 : struct rt_mutex_waiter *waiter)
971 : {
972 : lockdep_assert_held(&lock->wait_lock);
973 :
974 : /*
975 : * Before testing whether we can acquire @lock, we set the
976 : * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
977 : * other tasks which try to modify @lock into the slow path
978 : * and they serialize on @lock->wait_lock.
979 : *
980 : * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
981 : * as explained at the top of this file if and only if:
982 : *
983 : * - There is a lock owner. The caller must fixup the
984 : * transient state if it does a trylock or leaves the lock
985 : * function due to a signal or timeout.
986 : *
987 : * - @task acquires the lock and there are no other
988 : * waiters. This is undone in rt_mutex_set_owner(@task) at
989 : * the end of this function.
990 : */
991 0 : mark_rt_mutex_waiters(lock);
992 :
993 : /*
994 : * If @lock has an owner, give up.
995 : */
996 0 : if (rt_mutex_owner(lock))
997 : return 0;
998 :
999 : /*
1000 : * If @waiter != NULL, @task has already enqueued the waiter
1001 : * into @lock waiter tree. If @waiter == NULL then this is a
1002 : * trylock attempt.
1003 : */
1004 0 : if (waiter) {
1005 0 : struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
1006 :
1007 : /*
1008 : * If waiter is the highest priority waiter of @lock,
1009 : * or allowed to steal it, take it over.
1010 : */
1011 0 : if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
1012 : /*
1013 : * We can acquire the lock. Remove the waiter from the
1014 : * lock waiters tree.
1015 : */
1016 : rt_mutex_dequeue(lock, waiter);
1017 : } else {
1018 : return 0;
1019 : }
1020 : } else {
1021 : /*
1022 : * If the lock has waiters already we check whether @task is
1023 : * eligible to take over the lock.
1024 : *
1025 : * If there are no other waiters, @task can acquire
1026 : * the lock. @task->pi_blocked_on is NULL, so it does
1027 : * not need to be dequeued.
1028 : */
1029 0 : if (rt_mutex_has_waiters(lock)) {
1030 : /* Check whether the trylock can steal it. */
1031 0 : if (!rt_mutex_steal(task_to_waiter(task),
1032 : rt_mutex_top_waiter(lock)))
1033 : return 0;
1034 :
1035 : /*
1036 : * The current top waiter stays enqueued. We
1037 : * don't have to change anything in the lock
1038 : * waiters order.
1039 : */
1040 : } else {
1041 : /*
1042 : * No waiters. Take the lock without the
1043 : * pi_lock dance.@task->pi_blocked_on is NULL
1044 : * and we have no waiters to enqueue in @task
1045 : * pi waiters tree.
1046 : */
1047 : goto takeit;
1048 : }
1049 : }
1050 :
1051 : /*
1052 : * Clear @task->pi_blocked_on. Requires protection by
1053 : * @task->pi_lock. Redundant operation for the @waiter == NULL
1054 : * case, but conditionals are more expensive than a redundant
1055 : * store.
1056 : */
1057 0 : raw_spin_lock(&task->pi_lock);
1058 0 : task->pi_blocked_on = NULL;
1059 : /*
1060 : * Finish the lock acquisition. @task is the new owner. If
1061 : * other waiters exist we have to insert the highest priority
1062 : * waiter into @task->pi_waiters tree.
1063 : */
1064 0 : if (rt_mutex_has_waiters(lock))
1065 0 : rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
1066 0 : raw_spin_unlock(&task->pi_lock);
1067 :
1068 : takeit:
1069 : /*
1070 : * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
1071 : * are still waiters or clears it.
1072 : */
1073 0 : rt_mutex_set_owner(lock, task);
1074 :
1075 0 : return 1;
1076 : }
1077 :
1078 : /*
1079 : * Task blocks on lock.
1080 : *
1081 : * Prepare waiter and propagate pi chain
1082 : *
1083 : * This must be called with lock->wait_lock held and interrupts disabled
1084 : */
1085 0 : static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
1086 : struct rt_mutex_waiter *waiter,
1087 : struct task_struct *task,
1088 : struct ww_acquire_ctx *ww_ctx,
1089 : enum rtmutex_chainwalk chwalk)
1090 : {
1091 0 : struct task_struct *owner = rt_mutex_owner(lock);
1092 0 : struct rt_mutex_waiter *top_waiter = waiter;
1093 : struct rt_mutex_base *next_lock;
1094 0 : int chain_walk = 0, res;
1095 :
1096 : lockdep_assert_held(&lock->wait_lock);
1097 :
1098 : /*
1099 : * Early deadlock detection. We really don't want the task to
1100 : * enqueue on itself just to untangle the mess later. It's not
1101 : * only an optimization. We drop the locks, so another waiter
1102 : * can come in before the chain walk detects the deadlock. So
1103 : * the other will detect the deadlock and return -EDEADLOCK,
1104 : * which is wrong, as the other waiter is not in a deadlock
1105 : * situation.
1106 : *
1107 : * Except for ww_mutex, in that case the chain walk must already deal
1108 : * with spurious cycles, see the comments at [3] and [6].
1109 : */
1110 0 : if (owner == task && !(build_ww_mutex() && ww_ctx))
1111 : return -EDEADLK;
1112 :
1113 0 : raw_spin_lock(&task->pi_lock);
1114 0 : waiter->task = task;
1115 0 : waiter->lock = lock;
1116 0 : waiter_update_prio(waiter, task);
1117 :
1118 : /* Get the top priority waiter on the lock */
1119 0 : if (rt_mutex_has_waiters(lock))
1120 0 : top_waiter = rt_mutex_top_waiter(lock);
1121 0 : rt_mutex_enqueue(lock, waiter);
1122 :
1123 0 : task->pi_blocked_on = waiter;
1124 :
1125 0 : raw_spin_unlock(&task->pi_lock);
1126 :
1127 : if (build_ww_mutex() && ww_ctx) {
1128 : struct rt_mutex *rtm;
1129 :
1130 : /* Check whether the waiter should back out immediately */
1131 : rtm = container_of(lock, struct rt_mutex, rtmutex);
1132 : res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
1133 : if (res) {
1134 : raw_spin_lock(&task->pi_lock);
1135 : rt_mutex_dequeue(lock, waiter);
1136 : task->pi_blocked_on = NULL;
1137 : raw_spin_unlock(&task->pi_lock);
1138 : return res;
1139 : }
1140 : }
1141 :
1142 0 : if (!owner)
1143 : return 0;
1144 :
1145 0 : raw_spin_lock(&owner->pi_lock);
1146 0 : if (waiter == rt_mutex_top_waiter(lock)) {
1147 0 : rt_mutex_dequeue_pi(owner, top_waiter);
1148 0 : rt_mutex_enqueue_pi(owner, waiter);
1149 :
1150 0 : rt_mutex_adjust_prio(owner);
1151 0 : if (owner->pi_blocked_on)
1152 0 : chain_walk = 1;
1153 0 : } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
1154 0 : chain_walk = 1;
1155 : }
1156 :
1157 : /* Store the lock on which owner is blocked or NULL */
1158 0 : next_lock = task_blocked_on_lock(owner);
1159 :
1160 0 : raw_spin_unlock(&owner->pi_lock);
1161 : /*
1162 : * Even if full deadlock detection is on, if the owner is not
1163 : * blocked itself, we can avoid finding this out in the chain
1164 : * walk.
1165 : */
1166 0 : if (!chain_walk || !next_lock)
1167 : return 0;
1168 :
1169 : /*
1170 : * The owner can't disappear while holding a lock,
1171 : * so the owner struct is protected by wait_lock.
1172 : * Gets dropped in rt_mutex_adjust_prio_chain()!
1173 : */
1174 0 : get_task_struct(owner);
1175 :
1176 0 : raw_spin_unlock_irq(&lock->wait_lock);
1177 :
1178 0 : res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
1179 : next_lock, waiter, task);
1180 :
1181 0 : raw_spin_lock_irq(&lock->wait_lock);
1182 :
1183 : return res;
1184 : }
1185 :
1186 : /*
1187 : * Remove the top waiter from the current tasks pi waiter tree and
1188 : * queue it up.
1189 : *
1190 : * Called with lock->wait_lock held and interrupts disabled.
1191 : */
1192 0 : static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
1193 : struct rt_mutex_base *lock)
1194 : {
1195 : struct rt_mutex_waiter *waiter;
1196 :
1197 0 : raw_spin_lock(¤t->pi_lock);
1198 :
1199 0 : waiter = rt_mutex_top_waiter(lock);
1200 :
1201 : /*
1202 : * Remove it from current->pi_waiters and deboost.
1203 : *
1204 : * We must in fact deboost here in order to ensure we call
1205 : * rt_mutex_setprio() to update p->pi_top_task before the
1206 : * task unblocks.
1207 : */
1208 0 : rt_mutex_dequeue_pi(current, waiter);
1209 0 : rt_mutex_adjust_prio(current);
1210 :
1211 : /*
1212 : * As we are waking up the top waiter, and the waiter stays
1213 : * queued on the lock until it gets the lock, this lock
1214 : * obviously has waiters. Just set the bit here and this has
1215 : * the added benefit of forcing all new tasks into the
1216 : * slow path making sure no task of lower priority than
1217 : * the top waiter can steal this lock.
1218 : */
1219 0 : lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1220 :
1221 : /*
1222 : * We deboosted before waking the top waiter task such that we don't
1223 : * run two tasks with the 'same' priority (and ensure the
1224 : * p->pi_top_task pointer points to a blocked task). This however can
1225 : * lead to priority inversion if we would get preempted after the
1226 : * deboost but before waking our donor task, hence the preempt_disable()
1227 : * before unlock.
1228 : *
1229 : * Pairs with preempt_enable() in rt_mutex_wake_up_q();
1230 : */
1231 0 : preempt_disable();
1232 0 : rt_mutex_wake_q_add(wqh, waiter);
1233 0 : raw_spin_unlock(¤t->pi_lock);
1234 0 : }
1235 :
1236 0 : static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1237 : {
1238 0 : int ret = try_to_take_rt_mutex(lock, current, NULL);
1239 :
1240 : /*
1241 : * try_to_take_rt_mutex() sets the lock waiters bit
1242 : * unconditionally. Clean this up.
1243 : */
1244 0 : fixup_rt_mutex_waiters(lock);
1245 :
1246 0 : return ret;
1247 : }
1248 :
1249 : /*
1250 : * Slow path try-lock function:
1251 : */
1252 0 : static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1253 : {
1254 : unsigned long flags;
1255 : int ret;
1256 :
1257 : /*
1258 : * If the lock already has an owner we fail to get the lock.
1259 : * This can be done without taking the @lock->wait_lock as
1260 : * it is only being read, and this is a trylock anyway.
1261 : */
1262 0 : if (rt_mutex_owner(lock))
1263 : return 0;
1264 :
1265 : /*
1266 : * The mutex has currently no owner. Lock the wait lock and try to
1267 : * acquire the lock. We use irqsave here to support early boot calls.
1268 : */
1269 0 : raw_spin_lock_irqsave(&lock->wait_lock, flags);
1270 :
1271 0 : ret = __rt_mutex_slowtrylock(lock);
1272 :
1273 0 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1274 :
1275 0 : return ret;
1276 : }
1277 :
1278 : static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
1279 : {
1280 0 : if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1281 : return 1;
1282 :
1283 0 : return rt_mutex_slowtrylock(lock);
1284 : }
1285 :
1286 : /*
1287 : * Slow path to release a rt-mutex.
1288 : */
1289 0 : static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
1290 : {
1291 0 : DEFINE_RT_WAKE_Q(wqh);
1292 : unsigned long flags;
1293 :
1294 : /* irqsave required to support early boot calls */
1295 0 : raw_spin_lock_irqsave(&lock->wait_lock, flags);
1296 :
1297 0 : debug_rt_mutex_unlock(lock);
1298 :
1299 : /*
1300 : * We must be careful here if the fast path is enabled. If we
1301 : * have no waiters queued we cannot set owner to NULL here
1302 : * because of:
1303 : *
1304 : * foo->lock->owner = NULL;
1305 : * rtmutex_lock(foo->lock); <- fast path
1306 : * free = atomic_dec_and_test(foo->refcnt);
1307 : * rtmutex_unlock(foo->lock); <- fast path
1308 : * if (free)
1309 : * kfree(foo);
1310 : * raw_spin_unlock(foo->lock->wait_lock);
1311 : *
1312 : * So for the fastpath enabled kernel:
1313 : *
1314 : * Nothing can set the waiters bit as long as we hold
1315 : * lock->wait_lock. So we do the following sequence:
1316 : *
1317 : * owner = rt_mutex_owner(lock);
1318 : * clear_rt_mutex_waiters(lock);
1319 : * raw_spin_unlock(&lock->wait_lock);
1320 : * if (cmpxchg(&lock->owner, owner, 0) == owner)
1321 : * return;
1322 : * goto retry;
1323 : *
1324 : * The fastpath disabled variant is simple as all access to
1325 : * lock->owner is serialized by lock->wait_lock:
1326 : *
1327 : * lock->owner = NULL;
1328 : * raw_spin_unlock(&lock->wait_lock);
1329 : */
1330 0 : while (!rt_mutex_has_waiters(lock)) {
1331 : /* Drops lock->wait_lock ! */
1332 0 : if (unlock_rt_mutex_safe(lock, flags) == true)
1333 0 : return;
1334 : /* Relock the rtmutex and try again */
1335 0 : raw_spin_lock_irqsave(&lock->wait_lock, flags);
1336 : }
1337 :
1338 : /*
1339 : * The wakeup next waiter path does not suffer from the above
1340 : * race. See the comments there.
1341 : *
1342 : * Queue the next waiter for wakeup once we release the wait_lock.
1343 : */
1344 0 : mark_wakeup_next_waiter(&wqh, lock);
1345 0 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1346 :
1347 0 : rt_mutex_wake_up_q(&wqh);
1348 : }
1349 :
1350 : static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
1351 : {
1352 0 : if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
1353 : return;
1354 :
1355 0 : rt_mutex_slowunlock(lock);
1356 : }
1357 :
1358 : #ifdef CONFIG_SMP
1359 : static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1360 : struct rt_mutex_waiter *waiter,
1361 : struct task_struct *owner)
1362 : {
1363 : bool res = true;
1364 :
1365 : rcu_read_lock();
1366 : for (;;) {
1367 : /* If owner changed, trylock again. */
1368 : if (owner != rt_mutex_owner(lock))
1369 : break;
1370 : /*
1371 : * Ensure that @owner is dereferenced after checking that
1372 : * the lock owner still matches @owner. If that fails,
1373 : * @owner might point to freed memory. If it still matches,
1374 : * the rcu_read_lock() ensures the memory stays valid.
1375 : */
1376 : barrier();
1377 : /*
1378 : * Stop spinning when:
1379 : * - the lock owner has been scheduled out
1380 : * - current is not longer the top waiter
1381 : * - current is requested to reschedule (redundant
1382 : * for CONFIG_PREEMPT_RCU=y)
1383 : * - the VCPU on which owner runs is preempted
1384 : */
1385 : if (!owner_on_cpu(owner) || need_resched() ||
1386 : !rt_mutex_waiter_is_top_waiter(lock, waiter)) {
1387 : res = false;
1388 : break;
1389 : }
1390 : cpu_relax();
1391 : }
1392 : rcu_read_unlock();
1393 : return res;
1394 : }
1395 : #else
1396 : static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1397 : struct rt_mutex_waiter *waiter,
1398 : struct task_struct *owner)
1399 : {
1400 : return false;
1401 : }
1402 : #endif
1403 :
1404 : #ifdef RT_MUTEX_BUILD_MUTEX
1405 : /*
1406 : * Functions required for:
1407 : * - rtmutex, futex on all kernels
1408 : * - mutex and rwsem substitutions on RT kernels
1409 : */
1410 :
1411 : /*
1412 : * Remove a waiter from a lock and give up
1413 : *
1414 : * Must be called with lock->wait_lock held and interrupts disabled. It must
1415 : * have just failed to try_to_take_rt_mutex().
1416 : */
1417 0 : static void __sched remove_waiter(struct rt_mutex_base *lock,
1418 : struct rt_mutex_waiter *waiter)
1419 : {
1420 0 : bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1421 0 : struct task_struct *owner = rt_mutex_owner(lock);
1422 : struct rt_mutex_base *next_lock;
1423 :
1424 : lockdep_assert_held(&lock->wait_lock);
1425 :
1426 0 : raw_spin_lock(¤t->pi_lock);
1427 0 : rt_mutex_dequeue(lock, waiter);
1428 0 : current->pi_blocked_on = NULL;
1429 0 : raw_spin_unlock(¤t->pi_lock);
1430 :
1431 : /*
1432 : * Only update priority if the waiter was the highest priority
1433 : * waiter of the lock and there is an owner to update.
1434 : */
1435 0 : if (!owner || !is_top_waiter)
1436 : return;
1437 :
1438 0 : raw_spin_lock(&owner->pi_lock);
1439 :
1440 0 : rt_mutex_dequeue_pi(owner, waiter);
1441 :
1442 0 : if (rt_mutex_has_waiters(lock))
1443 0 : rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1444 :
1445 0 : rt_mutex_adjust_prio(owner);
1446 :
1447 : /* Store the lock on which owner is blocked or NULL */
1448 0 : next_lock = task_blocked_on_lock(owner);
1449 :
1450 0 : raw_spin_unlock(&owner->pi_lock);
1451 :
1452 : /*
1453 : * Don't walk the chain, if the owner task is not blocked
1454 : * itself.
1455 : */
1456 0 : if (!next_lock)
1457 : return;
1458 :
1459 : /* gets dropped in rt_mutex_adjust_prio_chain()! */
1460 0 : get_task_struct(owner);
1461 :
1462 0 : raw_spin_unlock_irq(&lock->wait_lock);
1463 :
1464 0 : rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1465 0 : next_lock, NULL, current);
1466 :
1467 0 : raw_spin_lock_irq(&lock->wait_lock);
1468 : }
1469 :
1470 : /**
1471 : * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
1472 : * @lock: the rt_mutex to take
1473 : * @ww_ctx: WW mutex context pointer
1474 : * @state: the state the task should block in (TASK_INTERRUPTIBLE
1475 : * or TASK_UNINTERRUPTIBLE)
1476 : * @timeout: the pre-initialized and started timer, or NULL for none
1477 : * @waiter: the pre-initialized rt_mutex_waiter
1478 : *
1479 : * Must be called with lock->wait_lock held and interrupts disabled
1480 : */
1481 0 : static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
1482 : struct ww_acquire_ctx *ww_ctx,
1483 : unsigned int state,
1484 : struct hrtimer_sleeper *timeout,
1485 : struct rt_mutex_waiter *waiter)
1486 : {
1487 0 : struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1488 : struct task_struct *owner;
1489 0 : int ret = 0;
1490 :
1491 : for (;;) {
1492 : /* Try to acquire the lock: */
1493 0 : if (try_to_take_rt_mutex(lock, current, waiter))
1494 : break;
1495 :
1496 0 : if (timeout && !timeout->task) {
1497 : ret = -ETIMEDOUT;
1498 : break;
1499 : }
1500 0 : if (signal_pending_state(state, current)) {
1501 : ret = -EINTR;
1502 : break;
1503 : }
1504 :
1505 : if (build_ww_mutex() && ww_ctx) {
1506 : ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
1507 : if (ret)
1508 : break;
1509 : }
1510 :
1511 0 : if (waiter == rt_mutex_top_waiter(lock))
1512 0 : owner = rt_mutex_owner(lock);
1513 : else
1514 : owner = NULL;
1515 0 : raw_spin_unlock_irq(&lock->wait_lock);
1516 :
1517 : if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
1518 0 : schedule();
1519 :
1520 0 : raw_spin_lock_irq(&lock->wait_lock);
1521 0 : set_current_state(state);
1522 : }
1523 :
1524 0 : __set_current_state(TASK_RUNNING);
1525 0 : return ret;
1526 : }
1527 :
1528 0 : static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
1529 : struct rt_mutex_waiter *w)
1530 : {
1531 : /*
1532 : * If the result is not -EDEADLOCK or the caller requested
1533 : * deadlock detection, nothing to do here.
1534 : */
1535 0 : if (res != -EDEADLOCK || detect_deadlock)
1536 : return;
1537 :
1538 : if (build_ww_mutex() && w->ww_ctx)
1539 : return;
1540 :
1541 : /*
1542 : * Yell loudly and stop the task right here.
1543 : */
1544 0 : WARN(1, "rtmutex deadlock detected\n");
1545 : while (1) {
1546 0 : set_current_state(TASK_INTERRUPTIBLE);
1547 0 : schedule();
1548 : }
1549 : }
1550 :
1551 : /**
1552 : * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
1553 : * @lock: The rtmutex to block lock
1554 : * @ww_ctx: WW mutex context pointer
1555 : * @state: The task state for sleeping
1556 : * @chwalk: Indicator whether full or partial chainwalk is requested
1557 : * @waiter: Initializer waiter for blocking
1558 : */
1559 0 : static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
1560 : struct ww_acquire_ctx *ww_ctx,
1561 : unsigned int state,
1562 : enum rtmutex_chainwalk chwalk,
1563 : struct rt_mutex_waiter *waiter)
1564 : {
1565 0 : struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1566 0 : struct ww_mutex *ww = ww_container_of(rtm);
1567 : int ret;
1568 :
1569 : lockdep_assert_held(&lock->wait_lock);
1570 :
1571 : /* Try to acquire the lock again: */
1572 0 : if (try_to_take_rt_mutex(lock, current, NULL)) {
1573 : if (build_ww_mutex() && ww_ctx) {
1574 : __ww_mutex_check_waiters(rtm, ww_ctx);
1575 : ww_mutex_lock_acquired(ww, ww_ctx);
1576 : }
1577 : return 0;
1578 : }
1579 :
1580 0 : set_current_state(state);
1581 :
1582 0 : ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
1583 0 : if (likely(!ret))
1584 0 : ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
1585 :
1586 0 : if (likely(!ret)) {
1587 : /* acquired the lock */
1588 : if (build_ww_mutex() && ww_ctx) {
1589 : if (!ww_ctx->is_wait_die)
1590 : __ww_mutex_check_waiters(rtm, ww_ctx);
1591 : ww_mutex_lock_acquired(ww, ww_ctx);
1592 : }
1593 : } else {
1594 0 : __set_current_state(TASK_RUNNING);
1595 0 : remove_waiter(lock, waiter);
1596 0 : rt_mutex_handle_deadlock(ret, chwalk, waiter);
1597 : }
1598 :
1599 : /*
1600 : * try_to_take_rt_mutex() sets the waiter bit
1601 : * unconditionally. We might have to fix that up.
1602 : */
1603 : fixup_rt_mutex_waiters(lock);
1604 : return ret;
1605 : }
1606 :
1607 : static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
1608 : struct ww_acquire_ctx *ww_ctx,
1609 : unsigned int state)
1610 : {
1611 : struct rt_mutex_waiter waiter;
1612 : int ret;
1613 :
1614 0 : rt_mutex_init_waiter(&waiter);
1615 0 : waiter.ww_ctx = ww_ctx;
1616 :
1617 0 : ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
1618 : &waiter);
1619 :
1620 0 : debug_rt_mutex_free_waiter(&waiter);
1621 : return ret;
1622 : }
1623 :
1624 : /*
1625 : * rt_mutex_slowlock - Locking slowpath invoked when fast path fails
1626 : * @lock: The rtmutex to block lock
1627 : * @ww_ctx: WW mutex context pointer
1628 : * @state: The task state for sleeping
1629 : */
1630 0 : static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
1631 : struct ww_acquire_ctx *ww_ctx,
1632 : unsigned int state)
1633 : {
1634 : unsigned long flags;
1635 : int ret;
1636 :
1637 : /*
1638 : * Technically we could use raw_spin_[un]lock_irq() here, but this can
1639 : * be called in early boot if the cmpxchg() fast path is disabled
1640 : * (debug, no architecture support). In this case we will acquire the
1641 : * rtmutex with lock->wait_lock held. But we cannot unconditionally
1642 : * enable interrupts in that early boot case. So we need to use the
1643 : * irqsave/restore variants.
1644 : */
1645 0 : raw_spin_lock_irqsave(&lock->wait_lock, flags);
1646 0 : ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
1647 0 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1648 :
1649 0 : return ret;
1650 : }
1651 :
1652 : static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
1653 : unsigned int state)
1654 : {
1655 0 : if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1656 : return 0;
1657 :
1658 0 : return rt_mutex_slowlock(lock, NULL, state);
1659 : }
1660 : #endif /* RT_MUTEX_BUILD_MUTEX */
1661 :
1662 : #ifdef RT_MUTEX_BUILD_SPINLOCKS
1663 : /*
1664 : * Functions required for spin/rw_lock substitution on RT kernels
1665 : */
1666 :
1667 : /**
1668 : * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
1669 : * @lock: The underlying RT mutex
1670 : */
1671 : static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
1672 : {
1673 : struct rt_mutex_waiter waiter;
1674 : struct task_struct *owner;
1675 :
1676 : lockdep_assert_held(&lock->wait_lock);
1677 :
1678 : if (try_to_take_rt_mutex(lock, current, NULL))
1679 : return;
1680 :
1681 : rt_mutex_init_rtlock_waiter(&waiter);
1682 :
1683 : /* Save current state and set state to TASK_RTLOCK_WAIT */
1684 : current_save_and_set_rtlock_wait_state();
1685 :
1686 : task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
1687 :
1688 : for (;;) {
1689 : /* Try to acquire the lock again */
1690 : if (try_to_take_rt_mutex(lock, current, &waiter))
1691 : break;
1692 :
1693 : if (&waiter == rt_mutex_top_waiter(lock))
1694 : owner = rt_mutex_owner(lock);
1695 : else
1696 : owner = NULL;
1697 : raw_spin_unlock_irq(&lock->wait_lock);
1698 :
1699 : if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
1700 : schedule_rtlock();
1701 :
1702 : raw_spin_lock_irq(&lock->wait_lock);
1703 : set_current_state(TASK_RTLOCK_WAIT);
1704 : }
1705 :
1706 : /* Restore the task state */
1707 : current_restore_rtlock_saved_state();
1708 :
1709 : /*
1710 : * try_to_take_rt_mutex() sets the waiter bit unconditionally.
1711 : * We might have to fix that up:
1712 : */
1713 : fixup_rt_mutex_waiters(lock);
1714 : debug_rt_mutex_free_waiter(&waiter);
1715 : }
1716 :
1717 : static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
1718 : {
1719 : unsigned long flags;
1720 :
1721 : raw_spin_lock_irqsave(&lock->wait_lock, flags);
1722 : rtlock_slowlock_locked(lock);
1723 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1724 : }
1725 :
1726 : #endif /* RT_MUTEX_BUILD_SPINLOCKS */
|