LCOV - code coverage report
Current view: top level - kernel/sched - deadline.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 42 547 7.7 %
Date: 2022-12-09 01:23:36 Functions: 6 49 12.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Deadline Scheduling Class (SCHED_DEADLINE)
       4             :  *
       5             :  * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
       6             :  *
       7             :  * Tasks that periodically executes their instances for less than their
       8             :  * runtime won't miss any of their deadlines.
       9             :  * Tasks that are not periodic or sporadic or that tries to execute more
      10             :  * than their reserved bandwidth will be slowed down (and may potentially
      11             :  * miss some of their deadlines), and won't affect any other task.
      12             :  *
      13             :  * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
      14             :  *                    Juri Lelli <juri.lelli@gmail.com>,
      15             :  *                    Michael Trimarchi <michael@amarulasolutions.com>,
      16             :  *                    Fabio Checconi <fchecconi@gmail.com>
      17             :  */
      18             : 
      19             : static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
      20             : {
      21           0 :         return container_of(dl_se, struct task_struct, dl);
      22             : }
      23             : 
      24             : static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
      25             : {
      26           0 :         return container_of(dl_rq, struct rq, dl);
      27             : }
      28             : 
      29             : static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
      30             : {
      31           0 :         struct task_struct *p = dl_task_of(dl_se);
      32           0 :         struct rq *rq = task_rq(p);
      33             : 
      34             :         return &rq->dl;
      35             : }
      36             : 
      37             : static inline int on_dl_rq(struct sched_dl_entity *dl_se)
      38             : {
      39           0 :         return !RB_EMPTY_NODE(&dl_se->rb_node);
      40             : }
      41             : 
      42             : #ifdef CONFIG_RT_MUTEXES
      43             : static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
      44             : {
      45             :         return dl_se->pi_se;
      46             : }
      47             : 
      48             : static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
      49             : {
      50           0 :         return pi_of(dl_se) != dl_se;
      51             : }
      52             : #else
      53             : static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
      54             : {
      55             :         return dl_se;
      56             : }
      57             : 
      58             : static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
      59             : {
      60             :         return false;
      61             : }
      62             : #endif
      63             : 
      64             : #ifdef CONFIG_SMP
      65             : static inline struct dl_bw *dl_bw_of(int i)
      66             : {
      67             :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
      68             :                          "sched RCU must be held");
      69             :         return &cpu_rq(i)->rd->dl_bw;
      70             : }
      71             : 
      72             : static inline int dl_bw_cpus(int i)
      73             : {
      74             :         struct root_domain *rd = cpu_rq(i)->rd;
      75             :         int cpus;
      76             : 
      77             :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
      78             :                          "sched RCU must be held");
      79             : 
      80             :         if (cpumask_subset(rd->span, cpu_active_mask))
      81             :                 return cpumask_weight(rd->span);
      82             : 
      83             :         cpus = 0;
      84             : 
      85             :         for_each_cpu_and(i, rd->span, cpu_active_mask)
      86             :                 cpus++;
      87             : 
      88             :         return cpus;
      89             : }
      90             : 
      91             : static inline unsigned long __dl_bw_capacity(int i)
      92             : {
      93             :         struct root_domain *rd = cpu_rq(i)->rd;
      94             :         unsigned long cap = 0;
      95             : 
      96             :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
      97             :                          "sched RCU must be held");
      98             : 
      99             :         for_each_cpu_and(i, rd->span, cpu_active_mask)
     100             :                 cap += capacity_orig_of(i);
     101             : 
     102             :         return cap;
     103             : }
     104             : 
     105             : /*
     106             :  * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
     107             :  * of the CPU the task is running on rather rd's \Sum CPU capacity.
     108             :  */
     109             : static inline unsigned long dl_bw_capacity(int i)
     110             : {
     111             :         if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
     112             :             capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
     113             :                 return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
     114             :         } else {
     115             :                 return __dl_bw_capacity(i);
     116             :         }
     117             : }
     118             : 
     119             : static inline bool dl_bw_visited(int cpu, u64 gen)
     120             : {
     121             :         struct root_domain *rd = cpu_rq(cpu)->rd;
     122             : 
     123             :         if (rd->visit_gen == gen)
     124             :                 return true;
     125             : 
     126             :         rd->visit_gen = gen;
     127             :         return false;
     128             : }
     129             : 
     130             : static inline
     131             : void __dl_update(struct dl_bw *dl_b, s64 bw)
     132             : {
     133             :         struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
     134             :         int i;
     135             : 
     136             :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
     137             :                          "sched RCU must be held");
     138             :         for_each_cpu_and(i, rd->span, cpu_active_mask) {
     139             :                 struct rq *rq = cpu_rq(i);
     140             : 
     141             :                 rq->dl.extra_bw += bw;
     142             :         }
     143             : }
     144             : #else
     145             : static inline struct dl_bw *dl_bw_of(int i)
     146             : {
     147           0 :         return &cpu_rq(i)->dl.dl_bw;
     148             : }
     149             : 
     150             : static inline int dl_bw_cpus(int i)
     151             : {
     152             :         return 1;
     153             : }
     154             : 
     155             : static inline unsigned long dl_bw_capacity(int i)
     156             : {
     157             :         return SCHED_CAPACITY_SCALE;
     158             : }
     159             : 
     160             : static inline bool dl_bw_visited(int cpu, u64 gen)
     161             : {
     162             :         return false;
     163             : }
     164             : 
     165             : static inline
     166             : void __dl_update(struct dl_bw *dl_b, s64 bw)
     167             : {
     168           0 :         struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
     169             : 
     170           0 :         dl->extra_bw += bw;
     171             : }
     172             : #endif
     173             : 
     174             : static inline
     175             : void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
     176             : {
     177           0 :         dl_b->total_bw -= tsk_bw;
     178           0 :         __dl_update(dl_b, (s32)tsk_bw / cpus);
     179             : }
     180             : 
     181             : static inline
     182             : void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
     183             : {
     184           0 :         dl_b->total_bw += tsk_bw;
     185           0 :         __dl_update(dl_b, -((s32)tsk_bw / cpus));
     186             : }
     187             : 
     188             : static inline bool
     189             : __dl_overflow(struct dl_bw *dl_b, unsigned long cap, u64 old_bw, u64 new_bw)
     190             : {
     191           0 :         return dl_b->bw != -1 &&
     192           0 :                cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
     193             : }
     194             : 
     195             : static inline
     196           0 : void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
     197             : {
     198           0 :         u64 old = dl_rq->running_bw;
     199             : 
     200           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     201           0 :         dl_rq->running_bw += dl_bw;
     202           0 :         SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
     203           0 :         SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
     204             :         /* kick cpufreq (see the comment in kernel/sched/sched.h). */
     205           0 :         cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
     206           0 : }
     207             : 
     208             : static inline
     209           0 : void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
     210             : {
     211           0 :         u64 old = dl_rq->running_bw;
     212             : 
     213           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     214           0 :         dl_rq->running_bw -= dl_bw;
     215           0 :         SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
     216           0 :         if (dl_rq->running_bw > old)
     217           0 :                 dl_rq->running_bw = 0;
     218             :         /* kick cpufreq (see the comment in kernel/sched/sched.h). */
     219           0 :         cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
     220           0 : }
     221             : 
     222             : static inline
     223           0 : void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
     224             : {
     225           0 :         u64 old = dl_rq->this_bw;
     226             : 
     227           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     228           0 :         dl_rq->this_bw += dl_bw;
     229           0 :         SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
     230           0 : }
     231             : 
     232             : static inline
     233           0 : void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
     234             : {
     235           0 :         u64 old = dl_rq->this_bw;
     236             : 
     237           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     238           0 :         dl_rq->this_bw -= dl_bw;
     239           0 :         SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
     240           0 :         if (dl_rq->this_bw > old)
     241           0 :                 dl_rq->this_bw = 0;
     242           0 :         SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
     243           0 : }
     244             : 
     245             : static inline
     246             : void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     247             : {
     248           0 :         if (!dl_entity_is_special(dl_se))
     249           0 :                 __add_rq_bw(dl_se->dl_bw, dl_rq);
     250             : }
     251             : 
     252             : static inline
     253             : void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     254             : {
     255           0 :         if (!dl_entity_is_special(dl_se))
     256           0 :                 __sub_rq_bw(dl_se->dl_bw, dl_rq);
     257             : }
     258             : 
     259             : static inline
     260             : void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     261             : {
     262           0 :         if (!dl_entity_is_special(dl_se))
     263           0 :                 __add_running_bw(dl_se->dl_bw, dl_rq);
     264             : }
     265             : 
     266             : static inline
     267             : void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     268             : {
     269           0 :         if (!dl_entity_is_special(dl_se))
     270           0 :                 __sub_running_bw(dl_se->dl_bw, dl_rq);
     271             : }
     272             : 
     273           0 : static void dl_change_utilization(struct task_struct *p, u64 new_bw)
     274             : {
     275             :         struct rq *rq;
     276             : 
     277           0 :         BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);
     278             : 
     279           0 :         if (task_on_rq_queued(p))
     280             :                 return;
     281             : 
     282           0 :         rq = task_rq(p);
     283           0 :         if (p->dl.dl_non_contending) {
     284           0 :                 sub_running_bw(&p->dl, &rq->dl);
     285           0 :                 p->dl.dl_non_contending = 0;
     286             :                 /*
     287             :                  * If the timer handler is currently running and the
     288             :                  * timer cannot be canceled, inactive_task_timer()
     289             :                  * will see that dl_not_contending is not set, and
     290             :                  * will not touch the rq's active utilization,
     291             :                  * so we are still safe.
     292             :                  */
     293           0 :                 if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
     294           0 :                         put_task_struct(p);
     295             :         }
     296           0 :         __sub_rq_bw(p->dl.dl_bw, &rq->dl);
     297           0 :         __add_rq_bw(new_bw, &rq->dl);
     298             : }
     299             : 
     300             : /*
     301             :  * The utilization of a task cannot be immediately removed from
     302             :  * the rq active utilization (running_bw) when the task blocks.
     303             :  * Instead, we have to wait for the so called "0-lag time".
     304             :  *
     305             :  * If a task blocks before the "0-lag time", a timer (the inactive
     306             :  * timer) is armed, and running_bw is decreased when the timer
     307             :  * fires.
     308             :  *
     309             :  * If the task wakes up again before the inactive timer fires,
     310             :  * the timer is canceled, whereas if the task wakes up after the
     311             :  * inactive timer fired (and running_bw has been decreased) the
     312             :  * task's utilization has to be added to running_bw again.
     313             :  * A flag in the deadline scheduling entity (dl_non_contending)
     314             :  * is used to avoid race conditions between the inactive timer handler
     315             :  * and task wakeups.
     316             :  *
     317             :  * The following diagram shows how running_bw is updated. A task is
     318             :  * "ACTIVE" when its utilization contributes to running_bw; an
     319             :  * "ACTIVE contending" task is in the TASK_RUNNING state, while an
     320             :  * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
     321             :  * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
     322             :  * time already passed, which does not contribute to running_bw anymore.
     323             :  *                              +------------------+
     324             :  *             wakeup           |    ACTIVE        |
     325             :  *          +------------------>+   contending     |
     326             :  *          | add_running_bw    |                  |
     327             :  *          |                   +----+------+------+
     328             :  *          |                        |      ^
     329             :  *          |                dequeue |      |
     330             :  * +--------+-------+                |      |
     331             :  * |                |   t >= 0-lag   |      | wakeup
     332             :  * |    INACTIVE    |<---------------+      |
     333             :  * |                | sub_running_bw |      |
     334             :  * +--------+-------+                |      |
     335             :  *          ^                        |      |
     336             :  *          |              t < 0-lag |      |
     337             :  *          |                        |      |
     338             :  *          |                        V      |
     339             :  *          |                   +----+------+------+
     340             :  *          | sub_running_bw    |    ACTIVE        |
     341             :  *          +-------------------+                  |
     342             :  *            inactive timer    |  non contending  |
     343             :  *            fired             +------------------+
     344             :  *
     345             :  * The task_non_contending() function is invoked when a task
     346             :  * blocks, and checks if the 0-lag time already passed or
     347             :  * not (in the first case, it directly updates running_bw;
     348             :  * in the second case, it arms the inactive timer).
     349             :  *
     350             :  * The task_contending() function is invoked when a task wakes
     351             :  * up, and checks if the task is still in the "ACTIVE non contending"
     352             :  * state or not (in the second case, it updates running_bw).
     353             :  */
     354           0 : static void task_non_contending(struct task_struct *p)
     355             : {
     356           0 :         struct sched_dl_entity *dl_se = &p->dl;
     357           0 :         struct hrtimer *timer = &dl_se->inactive_timer;
     358           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     359           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
     360             :         s64 zerolag_time;
     361             : 
     362             :         /*
     363             :          * If this is a non-deadline task that has been boosted,
     364             :          * do nothing
     365             :          */
     366           0 :         if (dl_se->dl_runtime == 0)
     367             :                 return;
     368             : 
     369           0 :         if (dl_entity_is_special(dl_se))
     370             :                 return;
     371             : 
     372           0 :         WARN_ON(dl_se->dl_non_contending);
     373             : 
     374           0 :         zerolag_time = dl_se->deadline -
     375           0 :                  div64_long((dl_se->runtime * dl_se->dl_period),
     376             :                         dl_se->dl_runtime);
     377             : 
     378             :         /*
     379             :          * Using relative times instead of the absolute "0-lag time"
     380             :          * allows to simplify the code
     381             :          */
     382           0 :         zerolag_time -= rq_clock(rq);
     383             : 
     384             :         /*
     385             :          * If the "0-lag time" already passed, decrease the active
     386             :          * utilization now, instead of starting a timer
     387             :          */
     388           0 :         if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
     389           0 :                 if (dl_task(p))
     390           0 :                         sub_running_bw(dl_se, dl_rq);
     391           0 :                 if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
     392           0 :                         struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
     393             : 
     394           0 :                         if (READ_ONCE(p->__state) == TASK_DEAD)
     395           0 :                                 sub_rq_bw(&p->dl, &rq->dl);
     396           0 :                         raw_spin_lock(&dl_b->lock);
     397           0 :                         __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
     398           0 :                         __dl_clear_params(p);
     399           0 :                         raw_spin_unlock(&dl_b->lock);
     400             :                 }
     401             : 
     402             :                 return;
     403             :         }
     404             : 
     405           0 :         dl_se->dl_non_contending = 1;
     406           0 :         get_task_struct(p);
     407           0 :         hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD);
     408             : }
     409             : 
     410           0 : static void task_contending(struct sched_dl_entity *dl_se, int flags)
     411             : {
     412           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     413             : 
     414             :         /*
     415             :          * If this is a non-deadline task that has been boosted,
     416             :          * do nothing
     417             :          */
     418           0 :         if (dl_se->dl_runtime == 0)
     419             :                 return;
     420             : 
     421             :         if (flags & ENQUEUE_MIGRATED)
     422             :                 add_rq_bw(dl_se, dl_rq);
     423             : 
     424           0 :         if (dl_se->dl_non_contending) {
     425           0 :                 dl_se->dl_non_contending = 0;
     426             :                 /*
     427             :                  * If the timer handler is currently running and the
     428             :                  * timer cannot be canceled, inactive_task_timer()
     429             :                  * will see that dl_not_contending is not set, and
     430             :                  * will not touch the rq's active utilization,
     431             :                  * so we are still safe.
     432             :                  */
     433           0 :                 if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
     434           0 :                         put_task_struct(dl_task_of(dl_se));
     435             :         } else {
     436             :                 /*
     437             :                  * Since "dl_non_contending" is not set, the
     438             :                  * task's utilization has already been removed from
     439             :                  * active utilization (either when the task blocked,
     440             :                  * when the "inactive timer" fired).
     441             :                  * So, add it back.
     442             :                  */
     443           0 :                 add_running_bw(dl_se, dl_rq);
     444             :         }
     445             : }
     446             : 
     447             : static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
     448             : {
     449           0 :         struct sched_dl_entity *dl_se = &p->dl;
     450             : 
     451             :         return rb_first_cached(&dl_rq->root) == &dl_se->rb_node;
     452             : }
     453             : 
     454             : static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
     455             : 
     456           0 : void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
     457             : {
     458             :         raw_spin_lock_init(&dl_b->dl_runtime_lock);
     459           0 :         dl_b->dl_period = period;
     460           0 :         dl_b->dl_runtime = runtime;
     461           0 : }
     462             : 
     463           1 : void init_dl_bw(struct dl_bw *dl_b)
     464             : {
     465             :         raw_spin_lock_init(&dl_b->lock);
     466           1 :         if (global_rt_runtime() == RUNTIME_INF)
     467           0 :                 dl_b->bw = -1;
     468             :         else
     469           1 :                 dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
     470           1 :         dl_b->total_bw = 0;
     471           1 : }
     472             : 
     473           1 : void init_dl_rq(struct dl_rq *dl_rq)
     474             : {
     475           1 :         dl_rq->root = RB_ROOT_CACHED;
     476             : 
     477             : #ifdef CONFIG_SMP
     478             :         /* zero means no -deadline tasks */
     479             :         dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
     480             : 
     481             :         dl_rq->dl_nr_migratory = 0;
     482             :         dl_rq->overloaded = 0;
     483             :         dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
     484             : #else
     485           1 :         init_dl_bw(&dl_rq->dl_bw);
     486             : #endif
     487             : 
     488           1 :         dl_rq->running_bw = 0;
     489           1 :         dl_rq->this_bw = 0;
     490           1 :         init_dl_rq_bw_ratio(dl_rq);
     491           1 : }
     492             : 
     493             : #ifdef CONFIG_SMP
     494             : 
     495             : static inline int dl_overloaded(struct rq *rq)
     496             : {
     497             :         return atomic_read(&rq->rd->dlo_count);
     498             : }
     499             : 
     500             : static inline void dl_set_overload(struct rq *rq)
     501             : {
     502             :         if (!rq->online)
     503             :                 return;
     504             : 
     505             :         cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
     506             :         /*
     507             :          * Must be visible before the overload count is
     508             :          * set (as in sched_rt.c).
     509             :          *
     510             :          * Matched by the barrier in pull_dl_task().
     511             :          */
     512             :         smp_wmb();
     513             :         atomic_inc(&rq->rd->dlo_count);
     514             : }
     515             : 
     516             : static inline void dl_clear_overload(struct rq *rq)
     517             : {
     518             :         if (!rq->online)
     519             :                 return;
     520             : 
     521             :         atomic_dec(&rq->rd->dlo_count);
     522             :         cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
     523             : }
     524             : 
     525             : static void update_dl_migration(struct dl_rq *dl_rq)
     526             : {
     527             :         if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
     528             :                 if (!dl_rq->overloaded) {
     529             :                         dl_set_overload(rq_of_dl_rq(dl_rq));
     530             :                         dl_rq->overloaded = 1;
     531             :                 }
     532             :         } else if (dl_rq->overloaded) {
     533             :                 dl_clear_overload(rq_of_dl_rq(dl_rq));
     534             :                 dl_rq->overloaded = 0;
     535             :         }
     536             : }
     537             : 
     538             : static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     539             : {
     540             :         struct task_struct *p = dl_task_of(dl_se);
     541             : 
     542             :         if (p->nr_cpus_allowed > 1)
     543             :                 dl_rq->dl_nr_migratory++;
     544             : 
     545             :         update_dl_migration(dl_rq);
     546             : }
     547             : 
     548             : static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     549             : {
     550             :         struct task_struct *p = dl_task_of(dl_se);
     551             : 
     552             :         if (p->nr_cpus_allowed > 1)
     553             :                 dl_rq->dl_nr_migratory--;
     554             : 
     555             :         update_dl_migration(dl_rq);
     556             : }
     557             : 
     558             : #define __node_2_pdl(node) \
     559             :         rb_entry((node), struct task_struct, pushable_dl_tasks)
     560             : 
     561             : static inline bool __pushable_less(struct rb_node *a, const struct rb_node *b)
     562             : {
     563             :         return dl_entity_preempt(&__node_2_pdl(a)->dl, &__node_2_pdl(b)->dl);
     564             : }
     565             : 
     566             : /*
     567             :  * The list of pushable -deadline task is not a plist, like in
     568             :  * sched_rt.c, it is an rb-tree with tasks ordered by deadline.
     569             :  */
     570             : static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     571             : {
     572             :         struct rb_node *leftmost;
     573             : 
     574             :         BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
     575             : 
     576             :         leftmost = rb_add_cached(&p->pushable_dl_tasks,
     577             :                                  &rq->dl.pushable_dl_tasks_root,
     578             :                                  __pushable_less);
     579             :         if (leftmost)
     580             :                 rq->dl.earliest_dl.next = p->dl.deadline;
     581             : }
     582             : 
     583             : static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     584             : {
     585             :         struct dl_rq *dl_rq = &rq->dl;
     586             :         struct rb_root_cached *root = &dl_rq->pushable_dl_tasks_root;
     587             :         struct rb_node *leftmost;
     588             : 
     589             :         if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
     590             :                 return;
     591             : 
     592             :         leftmost = rb_erase_cached(&p->pushable_dl_tasks, root);
     593             :         if (leftmost)
     594             :                 dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline;
     595             : 
     596             :         RB_CLEAR_NODE(&p->pushable_dl_tasks);
     597             : }
     598             : 
     599             : static inline int has_pushable_dl_tasks(struct rq *rq)
     600             : {
     601             :         return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
     602             : }
     603             : 
     604             : static int push_dl_task(struct rq *rq);
     605             : 
     606             : static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
     607             : {
     608             :         return rq->online && dl_task(prev);
     609             : }
     610             : 
     611             : static DEFINE_PER_CPU(struct callback_head, dl_push_head);
     612             : static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
     613             : 
     614             : static void push_dl_tasks(struct rq *);
     615             : static void pull_dl_task(struct rq *);
     616             : 
     617             : static inline void deadline_queue_push_tasks(struct rq *rq)
     618             : {
     619             :         if (!has_pushable_dl_tasks(rq))
     620             :                 return;
     621             : 
     622             :         queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
     623             : }
     624             : 
     625             : static inline void deadline_queue_pull_task(struct rq *rq)
     626             : {
     627             :         queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
     628             : }
     629             : 
     630             : static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
     631             : 
     632             : static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
     633             : {
     634             :         struct rq *later_rq = NULL;
     635             :         struct dl_bw *dl_b;
     636             : 
     637             :         later_rq = find_lock_later_rq(p, rq);
     638             :         if (!later_rq) {
     639             :                 int cpu;
     640             : 
     641             :                 /*
     642             :                  * If we cannot preempt any rq, fall back to pick any
     643             :                  * online CPU:
     644             :                  */
     645             :                 cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
     646             :                 if (cpu >= nr_cpu_ids) {
     647             :                         /*
     648             :                          * Failed to find any suitable CPU.
     649             :                          * The task will never come back!
     650             :                          */
     651             :                         BUG_ON(dl_bandwidth_enabled());
     652             : 
     653             :                         /*
     654             :                          * If admission control is disabled we
     655             :                          * try a little harder to let the task
     656             :                          * run.
     657             :                          */
     658             :                         cpu = cpumask_any(cpu_active_mask);
     659             :                 }
     660             :                 later_rq = cpu_rq(cpu);
     661             :                 double_lock_balance(rq, later_rq);
     662             :         }
     663             : 
     664             :         if (p->dl.dl_non_contending || p->dl.dl_throttled) {
     665             :                 /*
     666             :                  * Inactive timer is armed (or callback is running, but
     667             :                  * waiting for us to release rq locks). In any case, when it
     668             :                  * will fire (or continue), it will see running_bw of this
     669             :                  * task migrated to later_rq (and correctly handle it).
     670             :                  */
     671             :                 sub_running_bw(&p->dl, &rq->dl);
     672             :                 sub_rq_bw(&p->dl, &rq->dl);
     673             : 
     674             :                 add_rq_bw(&p->dl, &later_rq->dl);
     675             :                 add_running_bw(&p->dl, &later_rq->dl);
     676             :         } else {
     677             :                 sub_rq_bw(&p->dl, &rq->dl);
     678             :                 add_rq_bw(&p->dl, &later_rq->dl);
     679             :         }
     680             : 
     681             :         /*
     682             :          * And we finally need to fixup root_domain(s) bandwidth accounting,
     683             :          * since p is still hanging out in the old (now moved to default) root
     684             :          * domain.
     685             :          */
     686             :         dl_b = &rq->rd->dl_bw;
     687             :         raw_spin_lock(&dl_b->lock);
     688             :         __dl_sub(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
     689             :         raw_spin_unlock(&dl_b->lock);
     690             : 
     691             :         dl_b = &later_rq->rd->dl_bw;
     692             :         raw_spin_lock(&dl_b->lock);
     693             :         __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(later_rq->rd->span));
     694             :         raw_spin_unlock(&dl_b->lock);
     695             : 
     696             :         set_task_cpu(p, later_rq->cpu);
     697             :         double_unlock_balance(later_rq, rq);
     698             : 
     699             :         return later_rq;
     700             : }
     701             : 
     702             : #else
     703             : 
     704             : static inline
     705             : void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     706             : {
     707             : }
     708             : 
     709             : static inline
     710             : void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     711             : {
     712             : }
     713             : 
     714             : static inline
     715             : void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     716             : {
     717             : }
     718             : 
     719             : static inline
     720             : void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     721             : {
     722             : }
     723             : 
     724             : static inline void deadline_queue_push_tasks(struct rq *rq)
     725             : {
     726             : }
     727             : 
     728             : static inline void deadline_queue_pull_task(struct rq *rq)
     729             : {
     730             : }
     731             : #endif /* CONFIG_SMP */
     732             : 
     733             : static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
     734             : static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
     735             : static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
     736             : 
     737             : /*
     738             :  * We are being explicitly informed that a new instance is starting,
     739             :  * and this means that:
     740             :  *  - the absolute deadline of the entity has to be placed at
     741             :  *    current time + relative deadline;
     742             :  *  - the runtime of the entity has to be set to the maximum value.
     743             :  *
     744             :  * The capability of specifying such event is useful whenever a -deadline
     745             :  * entity wants to (try to!) synchronize its behaviour with the scheduler's
     746             :  * one, and to (try to!) reconcile itself with its own scheduling
     747             :  * parameters.
     748             :  */
     749           0 : static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
     750             : {
     751           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     752           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
     753             : 
     754           0 :         WARN_ON(is_dl_boosted(dl_se));
     755           0 :         WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
     756             : 
     757             :         /*
     758             :          * We are racing with the deadline timer. So, do nothing because
     759             :          * the deadline timer handler will take care of properly recharging
     760             :          * the runtime and postponing the deadline
     761             :          */
     762           0 :         if (dl_se->dl_throttled)
     763             :                 return;
     764             : 
     765             :         /*
     766             :          * We use the regular wall clock time to set deadlines in the
     767             :          * future; in fact, we must consider execution overheads (time
     768             :          * spent on hardirq context, etc.).
     769             :          */
     770           0 :         dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
     771           0 :         dl_se->runtime = dl_se->dl_runtime;
     772             : }
     773             : 
     774             : /*
     775             :  * Pure Earliest Deadline First (EDF) scheduling does not deal with the
     776             :  * possibility of a entity lasting more than what it declared, and thus
     777             :  * exhausting its runtime.
     778             :  *
     779             :  * Here we are interested in making runtime overrun possible, but we do
     780             :  * not want a entity which is misbehaving to affect the scheduling of all
     781             :  * other entities.
     782             :  * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
     783             :  * is used, in order to confine each entity within its own bandwidth.
     784             :  *
     785             :  * This function deals exactly with that, and ensures that when the runtime
     786             :  * of a entity is replenished, its deadline is also postponed. That ensures
     787             :  * the overrunning entity can't interfere with other entity in the system and
     788             :  * can't make them miss their deadlines. Reasons why this kind of overruns
     789             :  * could happen are, typically, a entity voluntarily trying to overcome its
     790             :  * runtime, or it just underestimated it during sched_setattr().
     791             :  */
     792           0 : static void replenish_dl_entity(struct sched_dl_entity *dl_se)
     793             : {
     794           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     795           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
     796             : 
     797           0 :         BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
     798             : 
     799             :         /*
     800             :          * This could be the case for a !-dl task that is boosted.
     801             :          * Just go with full inherited parameters.
     802             :          */
     803           0 :         if (dl_se->dl_deadline == 0) {
     804           0 :                 dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
     805           0 :                 dl_se->runtime = pi_of(dl_se)->dl_runtime;
     806             :         }
     807             : 
     808           0 :         if (dl_se->dl_yielded && dl_se->runtime > 0)
     809           0 :                 dl_se->runtime = 0;
     810             : 
     811             :         /*
     812             :          * We keep moving the deadline away until we get some
     813             :          * available runtime for the entity. This ensures correct
     814             :          * handling of situations where the runtime overrun is
     815             :          * arbitrary large.
     816             :          */
     817           0 :         while (dl_se->runtime <= 0) {
     818           0 :                 dl_se->deadline += pi_of(dl_se)->dl_period;
     819           0 :                 dl_se->runtime += pi_of(dl_se)->dl_runtime;
     820             :         }
     821             : 
     822             :         /*
     823             :          * At this point, the deadline really should be "in
     824             :          * the future" with respect to rq->clock. If it's
     825             :          * not, we are, for some reason, lagging too much!
     826             :          * Anyway, after having warn userspace abut that,
     827             :          * we still try to keep the things running by
     828             :          * resetting the deadline and the budget of the
     829             :          * entity.
     830             :          */
     831           0 :         if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
     832           0 :                 printk_deferred_once("sched: DL replenish lagged too much\n");
     833           0 :                 dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
     834           0 :                 dl_se->runtime = pi_of(dl_se)->dl_runtime;
     835             :         }
     836             : 
     837           0 :         if (dl_se->dl_yielded)
     838           0 :                 dl_se->dl_yielded = 0;
     839           0 :         if (dl_se->dl_throttled)
     840           0 :                 dl_se->dl_throttled = 0;
     841           0 : }
     842             : 
     843             : /*
     844             :  * Here we check if --at time t-- an entity (which is probably being
     845             :  * [re]activated or, in general, enqueued) can use its remaining runtime
     846             :  * and its current deadline _without_ exceeding the bandwidth it is
     847             :  * assigned (function returns true if it can't). We are in fact applying
     848             :  * one of the CBS rules: when a task wakes up, if the residual runtime
     849             :  * over residual deadline fits within the allocated bandwidth, then we
     850             :  * can keep the current (absolute) deadline and residual budget without
     851             :  * disrupting the schedulability of the system. Otherwise, we should
     852             :  * refill the runtime and set the deadline a period in the future,
     853             :  * because keeping the current (absolute) deadline of the task would
     854             :  * result in breaking guarantees promised to other tasks (refer to
     855             :  * Documentation/scheduler/sched-deadline.rst for more information).
     856             :  *
     857             :  * This function returns true if:
     858             :  *
     859             :  *   runtime / (deadline - t) > dl_runtime / dl_deadline ,
     860             :  *
     861             :  * IOW we can't recycle current parameters.
     862             :  *
     863             :  * Notice that the bandwidth check is done against the deadline. For
     864             :  * task with deadline equal to period this is the same of using
     865             :  * dl_period instead of dl_deadline in the equation above.
     866             :  */
     867             : static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
     868             : {
     869             :         u64 left, right;
     870             : 
     871             :         /*
     872             :          * left and right are the two sides of the equation above,
     873             :          * after a bit of shuffling to use multiplications instead
     874             :          * of divisions.
     875             :          *
     876             :          * Note that none of the time values involved in the two
     877             :          * multiplications are absolute: dl_deadline and dl_runtime
     878             :          * are the relative deadline and the maximum runtime of each
     879             :          * instance, runtime is the runtime left for the last instance
     880             :          * and (deadline - t), since t is rq->clock, is the time left
     881             :          * to the (absolute) deadline. Even if overflowing the u64 type
     882             :          * is very unlikely to occur in both cases, here we scale down
     883             :          * as we want to avoid that risk at all. Scaling down by 10
     884             :          * means that we reduce granularity to 1us. We are fine with it,
     885             :          * since this is only a true/false check and, anyway, thinking
     886             :          * of anything below microseconds resolution is actually fiction
     887             :          * (but still we want to give the user that illusion >;).
     888             :          */
     889           0 :         left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
     890           0 :         right = ((dl_se->deadline - t) >> DL_SCALE) *
     891           0 :                 (pi_of(dl_se)->dl_runtime >> DL_SCALE);
     892             : 
     893           0 :         return dl_time_before(right, left);
     894             : }
     895             : 
     896             : /*
     897             :  * Revised wakeup rule [1]: For self-suspending tasks, rather then
     898             :  * re-initializing task's runtime and deadline, the revised wakeup
     899             :  * rule adjusts the task's runtime to avoid the task to overrun its
     900             :  * density.
     901             :  *
     902             :  * Reasoning: a task may overrun the density if:
     903             :  *    runtime / (deadline - t) > dl_runtime / dl_deadline
     904             :  *
     905             :  * Therefore, runtime can be adjusted to:
     906             :  *     runtime = (dl_runtime / dl_deadline) * (deadline - t)
     907             :  *
     908             :  * In such way that runtime will be equal to the maximum density
     909             :  * the task can use without breaking any rule.
     910             :  *
     911             :  * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
     912             :  * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
     913             :  */
     914             : static void
     915           0 : update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
     916             : {
     917           0 :         u64 laxity = dl_se->deadline - rq_clock(rq);
     918             : 
     919             :         /*
     920             :          * If the task has deadline < period, and the deadline is in the past,
     921             :          * it should already be throttled before this check.
     922             :          *
     923             :          * See update_dl_entity() comments for further details.
     924             :          */
     925           0 :         WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
     926             : 
     927           0 :         dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
     928           0 : }
     929             : 
     930             : /*
     931             :  * Regarding the deadline, a task with implicit deadline has a relative
     932             :  * deadline == relative period. A task with constrained deadline has a
     933             :  * relative deadline <= relative period.
     934             :  *
     935             :  * We support constrained deadline tasks. However, there are some restrictions
     936             :  * applied only for tasks which do not have an implicit deadline. See
     937             :  * update_dl_entity() to know more about such restrictions.
     938             :  *
     939             :  * The dl_is_implicit() returns true if the task has an implicit deadline.
     940             :  */
     941             : static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
     942             : {
     943             :         return dl_se->dl_deadline == dl_se->dl_period;
     944             : }
     945             : 
     946             : /*
     947             :  * When a deadline entity is placed in the runqueue, its runtime and deadline
     948             :  * might need to be updated. This is done by a CBS wake up rule. There are two
     949             :  * different rules: 1) the original CBS; and 2) the Revisited CBS.
     950             :  *
     951             :  * When the task is starting a new period, the Original CBS is used. In this
     952             :  * case, the runtime is replenished and a new absolute deadline is set.
     953             :  *
     954             :  * When a task is queued before the begin of the next period, using the
     955             :  * remaining runtime and deadline could make the entity to overflow, see
     956             :  * dl_entity_overflow() to find more about runtime overflow. When such case
     957             :  * is detected, the runtime and deadline need to be updated.
     958             :  *
     959             :  * If the task has an implicit deadline, i.e., deadline == period, the Original
     960             :  * CBS is applied. the runtime is replenished and a new absolute deadline is
     961             :  * set, as in the previous cases.
     962             :  *
     963             :  * However, the Original CBS does not work properly for tasks with
     964             :  * deadline < period, which are said to have a constrained deadline. By
     965             :  * applying the Original CBS, a constrained deadline task would be able to run
     966             :  * runtime/deadline in a period. With deadline < period, the task would
     967             :  * overrun the runtime/period allowed bandwidth, breaking the admission test.
     968             :  *
     969             :  * In order to prevent this misbehave, the Revisited CBS is used for
     970             :  * constrained deadline tasks when a runtime overflow is detected. In the
     971             :  * Revisited CBS, rather than replenishing & setting a new absolute deadline,
     972             :  * the remaining runtime of the task is reduced to avoid runtime overflow.
     973             :  * Please refer to the comments update_dl_revised_wakeup() function to find
     974             :  * more about the Revised CBS rule.
     975             :  */
     976           0 : static void update_dl_entity(struct sched_dl_entity *dl_se)
     977             : {
     978           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     979           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
     980             : 
     981           0 :         if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
     982           0 :             dl_entity_overflow(dl_se, rq_clock(rq))) {
     983             : 
     984           0 :                 if (unlikely(!dl_is_implicit(dl_se) &&
     985             :                              !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
     986             :                              !is_dl_boosted(dl_se))) {
     987           0 :                         update_dl_revised_wakeup(dl_se, rq);
     988           0 :                         return;
     989             :                 }
     990             : 
     991           0 :                 dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
     992           0 :                 dl_se->runtime = pi_of(dl_se)->dl_runtime;
     993             :         }
     994             : }
     995             : 
     996             : static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
     997             : {
     998           0 :         return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
     999             : }
    1000             : 
    1001             : /*
    1002             :  * If the entity depleted all its runtime, and if we want it to sleep
    1003             :  * while waiting for some new execution time to become available, we
    1004             :  * set the bandwidth replenishment timer to the replenishment instant
    1005             :  * and try to activate it.
    1006             :  *
    1007             :  * Notice that it is important for the caller to know if the timer
    1008             :  * actually started or not (i.e., the replenishment instant is in
    1009             :  * the future or in the past).
    1010             :  */
    1011           0 : static int start_dl_timer(struct task_struct *p)
    1012             : {
    1013           0 :         struct sched_dl_entity *dl_se = &p->dl;
    1014           0 :         struct hrtimer *timer = &dl_se->dl_timer;
    1015           0 :         struct rq *rq = task_rq(p);
    1016             :         ktime_t now, act;
    1017             :         s64 delta;
    1018             : 
    1019           0 :         lockdep_assert_rq_held(rq);
    1020             : 
    1021             :         /*
    1022             :          * We want the timer to fire at the deadline, but considering
    1023             :          * that it is actually coming from rq->clock and not from
    1024             :          * hrtimer's time base reading.
    1025             :          */
    1026           0 :         act = ns_to_ktime(dl_next_period(dl_se));
    1027           0 :         now = hrtimer_cb_get_time(timer);
    1028           0 :         delta = ktime_to_ns(now) - rq_clock(rq);
    1029           0 :         act = ktime_add_ns(act, delta);
    1030             : 
    1031             :         /*
    1032             :          * If the expiry time already passed, e.g., because the value
    1033             :          * chosen as the deadline is too small, don't even try to
    1034             :          * start the timer in the past!
    1035             :          */
    1036           0 :         if (ktime_us_delta(act, now) < 0)
    1037             :                 return 0;
    1038             : 
    1039             :         /*
    1040             :          * !enqueued will guarantee another callback; even if one is already in
    1041             :          * progress. This ensures a balanced {get,put}_task_struct().
    1042             :          *
    1043             :          * The race against __run_timer() clearing the enqueued state is
    1044             :          * harmless because we're holding task_rq()->lock, therefore the timer
    1045             :          * expiring after we've done the check will wait on its task_rq_lock()
    1046             :          * and observe our state.
    1047             :          */
    1048           0 :         if (!hrtimer_is_queued(timer)) {
    1049           0 :                 get_task_struct(p);
    1050             :                 hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
    1051             :         }
    1052             : 
    1053             :         return 1;
    1054             : }
    1055             : 
    1056             : /*
    1057             :  * This is the bandwidth enforcement timer callback. If here, we know
    1058             :  * a task is not on its dl_rq, since the fact that the timer was running
    1059             :  * means the task is throttled and needs a runtime replenishment.
    1060             :  *
    1061             :  * However, what we actually do depends on the fact the task is active,
    1062             :  * (it is on its rq) or has been removed from there by a call to
    1063             :  * dequeue_task_dl(). In the former case we must issue the runtime
    1064             :  * replenishment and add the task back to the dl_rq; in the latter, we just
    1065             :  * do nothing but clearing dl_throttled, so that runtime and deadline
    1066             :  * updating (and the queueing back to dl_rq) will be done by the
    1067             :  * next call to enqueue_task_dl().
    1068             :  */
    1069           0 : static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
    1070             : {
    1071           0 :         struct sched_dl_entity *dl_se = container_of(timer,
    1072             :                                                      struct sched_dl_entity,
    1073             :                                                      dl_timer);
    1074           0 :         struct task_struct *p = dl_task_of(dl_se);
    1075             :         struct rq_flags rf;
    1076             :         struct rq *rq;
    1077             : 
    1078           0 :         rq = task_rq_lock(p, &rf);
    1079             : 
    1080             :         /*
    1081             :          * The task might have changed its scheduling policy to something
    1082             :          * different than SCHED_DEADLINE (through switched_from_dl()).
    1083             :          */
    1084           0 :         if (!dl_task(p))
    1085             :                 goto unlock;
    1086             : 
    1087             :         /*
    1088             :          * The task might have been boosted by someone else and might be in the
    1089             :          * boosting/deboosting path, its not throttled.
    1090             :          */
    1091           0 :         if (is_dl_boosted(dl_se))
    1092             :                 goto unlock;
    1093             : 
    1094             :         /*
    1095             :          * Spurious timer due to start_dl_timer() race; or we already received
    1096             :          * a replenishment from rt_mutex_setprio().
    1097             :          */
    1098           0 :         if (!dl_se->dl_throttled)
    1099             :                 goto unlock;
    1100             : 
    1101             :         sched_clock_tick();
    1102           0 :         update_rq_clock(rq);
    1103             : 
    1104             :         /*
    1105             :          * If the throttle happened during sched-out; like:
    1106             :          *
    1107             :          *   schedule()
    1108             :          *     deactivate_task()
    1109             :          *       dequeue_task_dl()
    1110             :          *         update_curr_dl()
    1111             :          *           start_dl_timer()
    1112             :          *         __dequeue_task_dl()
    1113             :          *     prev->on_rq = 0;
    1114             :          *
    1115             :          * We can be both throttled and !queued. Replenish the counter
    1116             :          * but do not enqueue -- wait for our wakeup to do that.
    1117             :          */
    1118           0 :         if (!task_on_rq_queued(p)) {
    1119           0 :                 replenish_dl_entity(dl_se);
    1120           0 :                 goto unlock;
    1121             :         }
    1122             : 
    1123             : #ifdef CONFIG_SMP
    1124             :         if (unlikely(!rq->online)) {
    1125             :                 /*
    1126             :                  * If the runqueue is no longer available, migrate the
    1127             :                  * task elsewhere. This necessarily changes rq.
    1128             :                  */
    1129             :                 lockdep_unpin_lock(__rq_lockp(rq), rf.cookie);
    1130             :                 rq = dl_task_offline_migration(rq, p);
    1131             :                 rf.cookie = lockdep_pin_lock(__rq_lockp(rq));
    1132             :                 update_rq_clock(rq);
    1133             : 
    1134             :                 /*
    1135             :                  * Now that the task has been migrated to the new RQ and we
    1136             :                  * have that locked, proceed as normal and enqueue the task
    1137             :                  * there.
    1138             :                  */
    1139             :         }
    1140             : #endif
    1141             : 
    1142           0 :         enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
    1143           0 :         if (dl_task(rq->curr))
    1144             :                 check_preempt_curr_dl(rq, p, 0);
    1145             :         else
    1146           0 :                 resched_curr(rq);
    1147             : 
    1148             : #ifdef CONFIG_SMP
    1149             :         /*
    1150             :          * Queueing this task back might have overloaded rq, check if we need
    1151             :          * to kick someone away.
    1152             :          */
    1153             :         if (has_pushable_dl_tasks(rq)) {
    1154             :                 /*
    1155             :                  * Nothing relies on rq->lock after this, so its safe to drop
    1156             :                  * rq->lock.
    1157             :                  */
    1158             :                 rq_unpin_lock(rq, &rf);
    1159             :                 push_dl_task(rq);
    1160             :                 rq_repin_lock(rq, &rf);
    1161             :         }
    1162             : #endif
    1163             : 
    1164             : unlock:
    1165           0 :         task_rq_unlock(rq, p, &rf);
    1166             : 
    1167             :         /*
    1168             :          * This can free the task_struct, including this hrtimer, do not touch
    1169             :          * anything related to that after this.
    1170             :          */
    1171           0 :         put_task_struct(p);
    1172             : 
    1173           0 :         return HRTIMER_NORESTART;
    1174             : }
    1175             : 
    1176         108 : void init_dl_task_timer(struct sched_dl_entity *dl_se)
    1177             : {
    1178         108 :         struct hrtimer *timer = &dl_se->dl_timer;
    1179             : 
    1180         108 :         hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
    1181         108 :         timer->function = dl_task_timer;
    1182         108 : }
    1183             : 
    1184             : /*
    1185             :  * During the activation, CBS checks if it can reuse the current task's
    1186             :  * runtime and period. If the deadline of the task is in the past, CBS
    1187             :  * cannot use the runtime, and so it replenishes the task. This rule
    1188             :  * works fine for implicit deadline tasks (deadline == period), and the
    1189             :  * CBS was designed for implicit deadline tasks. However, a task with
    1190             :  * constrained deadline (deadline < period) might be awakened after the
    1191             :  * deadline, but before the next period. In this case, replenishing the
    1192             :  * task would allow it to run for runtime / deadline. As in this case
    1193             :  * deadline < period, CBS enables a task to run for more than the
    1194             :  * runtime / period. In a very loaded system, this can cause a domino
    1195             :  * effect, making other tasks miss their deadlines.
    1196             :  *
    1197             :  * To avoid this problem, in the activation of a constrained deadline
    1198             :  * task after the deadline but before the next period, throttle the
    1199             :  * task and set the replenishing timer to the begin of the next period,
    1200             :  * unless it is boosted.
    1201             :  */
    1202           0 : static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
    1203             : {
    1204           0 :         struct task_struct *p = dl_task_of(dl_se);
    1205           0 :         struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
    1206             : 
    1207           0 :         if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
    1208           0 :             dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
    1209           0 :                 if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
    1210             :                         return;
    1211           0 :                 dl_se->dl_throttled = 1;
    1212           0 :                 if (dl_se->runtime > 0)
    1213           0 :                         dl_se->runtime = 0;
    1214             :         }
    1215             : }
    1216             : 
    1217             : static
    1218             : int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
    1219             : {
    1220             :         return (dl_se->runtime <= 0);
    1221             : }
    1222             : 
    1223             : extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
    1224             : 
    1225             : /*
    1226             :  * This function implements the GRUB accounting rule:
    1227             :  * according to the GRUB reclaiming algorithm, the runtime is
    1228             :  * not decreased as "dq = -dt", but as
    1229             :  * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
    1230             :  * where u is the utilization of the task, Umax is the maximum reclaimable
    1231             :  * utilization, Uinact is the (per-runqueue) inactive utilization, computed
    1232             :  * as the difference between the "total runqueue utilization" and the
    1233             :  * runqueue active utilization, and Uextra is the (per runqueue) extra
    1234             :  * reclaimable utilization.
    1235             :  * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
    1236             :  * multiplied by 2^BW_SHIFT, the result has to be shifted right by
    1237             :  * BW_SHIFT.
    1238             :  * Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT,
    1239             :  * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
    1240             :  * Since delta is a 64 bit variable, to have an overflow its value
    1241             :  * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
    1242             :  * So, overflow is not an issue here.
    1243             :  */
    1244             : static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
    1245             : {
    1246           0 :         u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
    1247             :         u64 u_act;
    1248           0 :         u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
    1249             : 
    1250             :         /*
    1251             :          * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
    1252             :          * we compare u_inact + rq->dl.extra_bw with
    1253             :          * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
    1254             :          * u_inact + rq->dl.extra_bw can be larger than
    1255             :          * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
    1256             :          * leading to wrong results)
    1257             :          */
    1258           0 :         if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
    1259             :                 u_act = u_act_min;
    1260             :         else
    1261           0 :                 u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
    1262             : 
    1263           0 :         return (delta * u_act) >> BW_SHIFT;
    1264             : }
    1265             : 
    1266             : /*
    1267             :  * Update the current task's runtime statistics (provided it is still
    1268             :  * a -deadline task and has not been removed from the dl_rq).
    1269             :  */
    1270           0 : static void update_curr_dl(struct rq *rq)
    1271             : {
    1272           0 :         struct task_struct *curr = rq->curr;
    1273           0 :         struct sched_dl_entity *dl_se = &curr->dl;
    1274             :         u64 delta_exec, scaled_delta_exec;
    1275           0 :         int cpu = cpu_of(rq);
    1276             :         u64 now;
    1277             : 
    1278           0 :         if (!dl_task(curr) || !on_dl_rq(dl_se))
    1279             :                 return;
    1280             : 
    1281             :         /*
    1282             :          * Consumed budget is computed considering the time as
    1283             :          * observed by schedulable tasks (excluding time spent
    1284             :          * in hardirq context, etc.). Deadlines are instead
    1285             :          * computed using hard walltime. This seems to be the more
    1286             :          * natural solution, but the full ramifications of this
    1287             :          * approach need further study.
    1288             :          */
    1289           0 :         now = rq_clock_task(rq);
    1290           0 :         delta_exec = now - curr->se.exec_start;
    1291           0 :         if (unlikely((s64)delta_exec <= 0)) {
    1292           0 :                 if (unlikely(dl_se->dl_yielded))
    1293             :                         goto throttle;
    1294             :                 return;
    1295             :         }
    1296             : 
    1297             :         schedstat_set(curr->stats.exec_max,
    1298             :                       max(curr->stats.exec_max, delta_exec));
    1299             : 
    1300           0 :         trace_sched_stat_runtime(curr, delta_exec, 0);
    1301             : 
    1302           0 :         curr->se.sum_exec_runtime += delta_exec;
    1303           0 :         account_group_exec_runtime(curr, delta_exec);
    1304             : 
    1305           0 :         curr->se.exec_start = now;
    1306           0 :         cgroup_account_cputime(curr, delta_exec);
    1307             : 
    1308           0 :         if (dl_entity_is_special(dl_se))
    1309             :                 return;
    1310             : 
    1311             :         /*
    1312             :          * For tasks that participate in GRUB, we implement GRUB-PA: the
    1313             :          * spare reclaimed bandwidth is used to clock down frequency.
    1314             :          *
    1315             :          * For the others, we still need to scale reservation parameters
    1316             :          * according to current frequency and CPU maximum capacity.
    1317             :          */
    1318           0 :         if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
    1319           0 :                 scaled_delta_exec = grub_reclaim(delta_exec,
    1320             :                                                  rq,
    1321             :                                                  &curr->dl);
    1322             :         } else {
    1323           0 :                 unsigned long scale_freq = arch_scale_freq_capacity(cpu);
    1324           0 :                 unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
    1325             : 
    1326           0 :                 scaled_delta_exec = cap_scale(delta_exec, scale_freq);
    1327           0 :                 scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
    1328             :         }
    1329             : 
    1330           0 :         dl_se->runtime -= scaled_delta_exec;
    1331             : 
    1332             : throttle:
    1333           0 :         if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
    1334           0 :                 dl_se->dl_throttled = 1;
    1335             : 
    1336             :                 /* If requested, inform the user about runtime overruns. */
    1337           0 :                 if (dl_runtime_exceeded(dl_se) &&
    1338           0 :                     (dl_se->flags & SCHED_FLAG_DL_OVERRUN))
    1339           0 :                         dl_se->dl_overrun = 1;
    1340             : 
    1341           0 :                 __dequeue_task_dl(rq, curr, 0);
    1342           0 :                 if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
    1343           0 :                         enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
    1344             : 
    1345           0 :                 if (!is_leftmost(curr, &rq->dl))
    1346           0 :                         resched_curr(rq);
    1347             :         }
    1348             : 
    1349             :         /*
    1350             :          * Because -- for now -- we share the rt bandwidth, we need to
    1351             :          * account our runtime there too, otherwise actual rt tasks
    1352             :          * would be able to exceed the shared quota.
    1353             :          *
    1354             :          * Account to the root rt group for now.
    1355             :          *
    1356             :          * The solution we're working towards is having the RT groups scheduled
    1357             :          * using deadline servers -- however there's a few nasties to figure
    1358             :          * out before that can happen.
    1359             :          */
    1360           0 :         if (rt_bandwidth_enabled()) {
    1361           0 :                 struct rt_rq *rt_rq = &rq->rt;
    1362             : 
    1363           0 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
    1364             :                 /*
    1365             :                  * We'll let actual RT tasks worry about the overflow here, we
    1366             :                  * have our own CBS to keep us inline; only account when RT
    1367             :                  * bandwidth is relevant.
    1368             :                  */
    1369           0 :                 if (sched_rt_bandwidth_account(rt_rq))
    1370           0 :                         rt_rq->rt_time += delta_exec;
    1371           0 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
    1372             :         }
    1373             : }
    1374             : 
    1375           0 : static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
    1376             : {
    1377           0 :         struct sched_dl_entity *dl_se = container_of(timer,
    1378             :                                                      struct sched_dl_entity,
    1379             :                                                      inactive_timer);
    1380           0 :         struct task_struct *p = dl_task_of(dl_se);
    1381             :         struct rq_flags rf;
    1382             :         struct rq *rq;
    1383             : 
    1384           0 :         rq = task_rq_lock(p, &rf);
    1385             : 
    1386             :         sched_clock_tick();
    1387           0 :         update_rq_clock(rq);
    1388             : 
    1389           0 :         if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
    1390           0 :                 struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
    1391             : 
    1392           0 :                 if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) {
    1393           0 :                         sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
    1394           0 :                         sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
    1395           0 :                         dl_se->dl_non_contending = 0;
    1396             :                 }
    1397             : 
    1398           0 :                 raw_spin_lock(&dl_b->lock);
    1399           0 :                 __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
    1400           0 :                 raw_spin_unlock(&dl_b->lock);
    1401             :                 __dl_clear_params(p);
    1402             : 
    1403             :                 goto unlock;
    1404             :         }
    1405           0 :         if (dl_se->dl_non_contending == 0)
    1406             :                 goto unlock;
    1407             : 
    1408           0 :         sub_running_bw(dl_se, &rq->dl);
    1409           0 :         dl_se->dl_non_contending = 0;
    1410             : unlock:
    1411           0 :         task_rq_unlock(rq, p, &rf);
    1412           0 :         put_task_struct(p);
    1413             : 
    1414           0 :         return HRTIMER_NORESTART;
    1415             : }
    1416             : 
    1417         108 : void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
    1418             : {
    1419         108 :         struct hrtimer *timer = &dl_se->inactive_timer;
    1420             : 
    1421         108 :         hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
    1422         108 :         timer->function = inactive_task_timer;
    1423         108 : }
    1424             : 
    1425             : #define __node_2_dle(node) \
    1426             :         rb_entry((node), struct sched_dl_entity, rb_node)
    1427             : 
    1428             : #ifdef CONFIG_SMP
    1429             : 
    1430             : static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
    1431             : {
    1432             :         struct rq *rq = rq_of_dl_rq(dl_rq);
    1433             : 
    1434             :         if (dl_rq->earliest_dl.curr == 0 ||
    1435             :             dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
    1436             :                 if (dl_rq->earliest_dl.curr == 0)
    1437             :                         cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_HIGHER);
    1438             :                 dl_rq->earliest_dl.curr = deadline;
    1439             :                 cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
    1440             :         }
    1441             : }
    1442             : 
    1443             : static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
    1444             : {
    1445             :         struct rq *rq = rq_of_dl_rq(dl_rq);
    1446             : 
    1447             :         /*
    1448             :          * Since we may have removed our earliest (and/or next earliest)
    1449             :          * task we must recompute them.
    1450             :          */
    1451             :         if (!dl_rq->dl_nr_running) {
    1452             :                 dl_rq->earliest_dl.curr = 0;
    1453             :                 dl_rq->earliest_dl.next = 0;
    1454             :                 cpudl_clear(&rq->rd->cpudl, rq->cpu);
    1455             :                 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
    1456             :         } else {
    1457             :                 struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
    1458             :                 struct sched_dl_entity *entry = __node_2_dle(leftmost);
    1459             : 
    1460             :                 dl_rq->earliest_dl.curr = entry->deadline;
    1461             :                 cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
    1462             :         }
    1463             : }
    1464             : 
    1465             : #else
    1466             : 
    1467             : static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
    1468             : static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
    1469             : 
    1470             : #endif /* CONFIG_SMP */
    1471             : 
    1472             : static inline
    1473           0 : void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
    1474             : {
    1475           0 :         int prio = dl_task_of(dl_se)->prio;
    1476           0 :         u64 deadline = dl_se->deadline;
    1477             : 
    1478           0 :         WARN_ON(!dl_prio(prio));
    1479           0 :         dl_rq->dl_nr_running++;
    1480           0 :         add_nr_running(rq_of_dl_rq(dl_rq), 1);
    1481             : 
    1482           0 :         inc_dl_deadline(dl_rq, deadline);
    1483           0 :         inc_dl_migration(dl_se, dl_rq);
    1484           0 : }
    1485             : 
    1486             : static inline
    1487           0 : void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
    1488             : {
    1489           0 :         int prio = dl_task_of(dl_se)->prio;
    1490             : 
    1491           0 :         WARN_ON(!dl_prio(prio));
    1492           0 :         WARN_ON(!dl_rq->dl_nr_running);
    1493           0 :         dl_rq->dl_nr_running--;
    1494           0 :         sub_nr_running(rq_of_dl_rq(dl_rq), 1);
    1495             : 
    1496           0 :         dec_dl_deadline(dl_rq, dl_se->deadline);
    1497           0 :         dec_dl_migration(dl_se, dl_rq);
    1498           0 : }
    1499             : 
    1500             : static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
    1501             : {
    1502           0 :         return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
    1503             : }
    1504             : 
    1505             : static inline struct sched_statistics *
    1506             : __schedstats_from_dl_se(struct sched_dl_entity *dl_se)
    1507             : {
    1508             :         return &dl_task_of(dl_se)->stats;
    1509             : }
    1510             : 
    1511             : static inline void
    1512             : update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
    1513             : {
    1514             :         struct sched_statistics *stats;
    1515             : 
    1516             :         if (!schedstat_enabled())
    1517             :                 return;
    1518             : 
    1519             :         stats = __schedstats_from_dl_se(dl_se);
    1520             :         __update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
    1521             : }
    1522             : 
    1523             : static inline void
    1524             : update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
    1525             : {
    1526             :         struct sched_statistics *stats;
    1527             : 
    1528             :         if (!schedstat_enabled())
    1529             :                 return;
    1530             : 
    1531             :         stats = __schedstats_from_dl_se(dl_se);
    1532             :         __update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
    1533             : }
    1534             : 
    1535             : static inline void
    1536             : update_stats_enqueue_sleeper_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
    1537             : {
    1538             :         struct sched_statistics *stats;
    1539             : 
    1540             :         if (!schedstat_enabled())
    1541             :                 return;
    1542             : 
    1543             :         stats = __schedstats_from_dl_se(dl_se);
    1544             :         __update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
    1545             : }
    1546             : 
    1547             : static inline void
    1548             : update_stats_enqueue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
    1549             :                         int flags)
    1550             : {
    1551             :         if (!schedstat_enabled())
    1552             :                 return;
    1553             : 
    1554             :         if (flags & ENQUEUE_WAKEUP)
    1555             :                 update_stats_enqueue_sleeper_dl(dl_rq, dl_se);
    1556             : }
    1557             : 
    1558             : static inline void
    1559             : update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
    1560             :                         int flags)
    1561             : {
    1562           0 :         struct task_struct *p = dl_task_of(dl_se);
    1563             : 
    1564             :         if (!schedstat_enabled())
    1565             :                 return;
    1566             : 
    1567             :         if ((flags & DEQUEUE_SLEEP)) {
    1568             :                 unsigned int state;
    1569             : 
    1570             :                 state = READ_ONCE(p->__state);
    1571             :                 if (state & TASK_INTERRUPTIBLE)
    1572             :                         __schedstat_set(p->stats.sleep_start,
    1573             :                                         rq_clock(rq_of_dl_rq(dl_rq)));
    1574             : 
    1575             :                 if (state & TASK_UNINTERRUPTIBLE)
    1576             :                         __schedstat_set(p->stats.block_start,
    1577             :                                         rq_clock(rq_of_dl_rq(dl_rq)));
    1578             :         }
    1579             : }
    1580             : 
    1581           0 : static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
    1582             : {
    1583           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
    1584             : 
    1585           0 :         BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
    1586             : 
    1587           0 :         rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less);
    1588             : 
    1589           0 :         inc_dl_tasks(dl_se, dl_rq);
    1590           0 : }
    1591             : 
    1592           0 : static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
    1593             : {
    1594           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
    1595             : 
    1596           0 :         if (RB_EMPTY_NODE(&dl_se->rb_node))
    1597             :                 return;
    1598             : 
    1599           0 :         rb_erase_cached(&dl_se->rb_node, &dl_rq->root);
    1600             : 
    1601           0 :         RB_CLEAR_NODE(&dl_se->rb_node);
    1602             : 
    1603           0 :         dec_dl_tasks(dl_se, dl_rq);
    1604             : }
    1605             : 
    1606             : static void
    1607           0 : enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
    1608             : {
    1609           0 :         BUG_ON(on_dl_rq(dl_se));
    1610             : 
    1611           0 :         update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags);
    1612             : 
    1613             :         /*
    1614             :          * If this is a wakeup or a new instance, the scheduling
    1615             :          * parameters of the task might need updating. Otherwise,
    1616             :          * we want a replenishment of its runtime.
    1617             :          */
    1618           0 :         if (flags & ENQUEUE_WAKEUP) {
    1619           0 :                 task_contending(dl_se, flags);
    1620           0 :                 update_dl_entity(dl_se);
    1621           0 :         } else if (flags & ENQUEUE_REPLENISH) {
    1622           0 :                 replenish_dl_entity(dl_se);
    1623           0 :         } else if ((flags & ENQUEUE_RESTORE) &&
    1624           0 :                   dl_time_before(dl_se->deadline,
    1625             :                                  rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
    1626           0 :                 setup_new_dl_entity(dl_se);
    1627             :         }
    1628             : 
    1629           0 :         __enqueue_dl_entity(dl_se);
    1630           0 : }
    1631             : 
    1632             : static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
    1633             : {
    1634           0 :         __dequeue_dl_entity(dl_se);
    1635             : }
    1636             : 
    1637           0 : static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
    1638             : {
    1639           0 :         if (is_dl_boosted(&p->dl)) {
    1640             :                 /*
    1641             :                  * Because of delays in the detection of the overrun of a
    1642             :                  * thread's runtime, it might be the case that a thread
    1643             :                  * goes to sleep in a rt mutex with negative runtime. As
    1644             :                  * a consequence, the thread will be throttled.
    1645             :                  *
    1646             :                  * While waiting for the mutex, this thread can also be
    1647             :                  * boosted via PI, resulting in a thread that is throttled
    1648             :                  * and boosted at the same time.
    1649             :                  *
    1650             :                  * In this case, the boost overrides the throttle.
    1651             :                  */
    1652           0 :                 if (p->dl.dl_throttled) {
    1653             :                         /*
    1654             :                          * The replenish timer needs to be canceled. No
    1655             :                          * problem if it fires concurrently: boosted threads
    1656             :                          * are ignored in dl_task_timer().
    1657             :                          */
    1658           0 :                         hrtimer_try_to_cancel(&p->dl.dl_timer);
    1659           0 :                         p->dl.dl_throttled = 0;
    1660             :                 }
    1661           0 :         } else if (!dl_prio(p->normal_prio)) {
    1662             :                 /*
    1663             :                  * Special case in which we have a !SCHED_DEADLINE task that is going
    1664             :                  * to be deboosted, but exceeds its runtime while doing so. No point in
    1665             :                  * replenishing it, as it's going to return back to its original
    1666             :                  * scheduling class after this. If it has been throttled, we need to
    1667             :                  * clear the flag, otherwise the task may wake up as throttled after
    1668             :                  * being boosted again with no means to replenish the runtime and clear
    1669             :                  * the throttle.
    1670             :                  */
    1671           0 :                 p->dl.dl_throttled = 0;
    1672           0 :                 BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
    1673             :                 return;
    1674             :         }
    1675             : 
    1676             :         /*
    1677             :          * Check if a constrained deadline task was activated
    1678             :          * after the deadline but before the next period.
    1679             :          * If that is the case, the task will be throttled and
    1680             :          * the replenishment timer will be set to the next period.
    1681             :          */
    1682           0 :         if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
    1683           0 :                 dl_check_constrained_dl(&p->dl);
    1684             : 
    1685           0 :         if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
    1686           0 :                 add_rq_bw(&p->dl, &rq->dl);
    1687           0 :                 add_running_bw(&p->dl, &rq->dl);
    1688             :         }
    1689             : 
    1690             :         /*
    1691             :          * If p is throttled, we do not enqueue it. In fact, if it exhausted
    1692             :          * its budget it needs a replenishment and, since it now is on
    1693             :          * its rq, the bandwidth timer callback (which clearly has not
    1694             :          * run yet) will take care of this.
    1695             :          * However, the active utilization does not depend on the fact
    1696             :          * that the task is on the runqueue or not (but depends on the
    1697             :          * task's state - in GRUB parlance, "inactive" vs "active contending").
    1698             :          * In other words, even if a task is throttled its utilization must
    1699             :          * be counted in the active utilization; hence, we need to call
    1700             :          * add_running_bw().
    1701             :          */
    1702           0 :         if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
    1703           0 :                 if (flags & ENQUEUE_WAKEUP)
    1704           0 :                         task_contending(&p->dl, flags);
    1705             : 
    1706             :                 return;
    1707             :         }
    1708             : 
    1709             :         check_schedstat_required();
    1710           0 :         update_stats_wait_start_dl(dl_rq_of_se(&p->dl), &p->dl);
    1711             : 
    1712           0 :         enqueue_dl_entity(&p->dl, flags);
    1713             : 
    1714           0 :         if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
    1715             :                 enqueue_pushable_dl_task(rq, p);
    1716             : }
    1717             : 
    1718             : static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
    1719             : {
    1720           0 :         update_stats_dequeue_dl(&rq->dl, &p->dl, flags);
    1721           0 :         dequeue_dl_entity(&p->dl);
    1722           0 :         dequeue_pushable_dl_task(rq, p);
    1723             : }
    1724             : 
    1725           0 : static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
    1726             : {
    1727           0 :         update_curr_dl(rq);
    1728           0 :         __dequeue_task_dl(rq, p, flags);
    1729             : 
    1730           0 :         if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
    1731           0 :                 sub_running_bw(&p->dl, &rq->dl);
    1732           0 :                 sub_rq_bw(&p->dl, &rq->dl);
    1733             :         }
    1734             : 
    1735             :         /*
    1736             :          * This check allows to start the inactive timer (or to immediately
    1737             :          * decrease the active utilization, if needed) in two cases:
    1738             :          * when the task blocks and when it is terminating
    1739             :          * (p->state == TASK_DEAD). We can handle the two cases in the same
    1740             :          * way, because from GRUB's point of view the same thing is happening
    1741             :          * (the task moves from "active contending" to "active non contending"
    1742             :          * or "inactive")
    1743             :          */
    1744           0 :         if (flags & DEQUEUE_SLEEP)
    1745           0 :                 task_non_contending(p);
    1746           0 : }
    1747             : 
    1748             : /*
    1749             :  * Yield task semantic for -deadline tasks is:
    1750             :  *
    1751             :  *   get off from the CPU until our next instance, with
    1752             :  *   a new runtime. This is of little use now, since we
    1753             :  *   don't have a bandwidth reclaiming mechanism. Anyway,
    1754             :  *   bandwidth reclaiming is planned for the future, and
    1755             :  *   yield_task_dl will indicate that some spare budget
    1756             :  *   is available for other task instances to use it.
    1757             :  */
    1758           0 : static void yield_task_dl(struct rq *rq)
    1759             : {
    1760             :         /*
    1761             :          * We make the task go to sleep until its current deadline by
    1762             :          * forcing its runtime to zero. This way, update_curr_dl() stops
    1763             :          * it and the bandwidth timer will wake it up and will give it
    1764             :          * new scheduling parameters (thanks to dl_yielded=1).
    1765             :          */
    1766           0 :         rq->curr->dl.dl_yielded = 1;
    1767             : 
    1768           0 :         update_rq_clock(rq);
    1769           0 :         update_curr_dl(rq);
    1770             :         /*
    1771             :          * Tell update_rq_clock() that we've just updated,
    1772             :          * so we don't do microscopic update in schedule()
    1773             :          * and double the fastpath cost.
    1774             :          */
    1775           0 :         rq_clock_skip_update(rq);
    1776           0 : }
    1777             : 
    1778             : #ifdef CONFIG_SMP
    1779             : 
    1780             : static int find_later_rq(struct task_struct *task);
    1781             : 
    1782             : static int
    1783             : select_task_rq_dl(struct task_struct *p, int cpu, int flags)
    1784             : {
    1785             :         struct task_struct *curr;
    1786             :         bool select_rq;
    1787             :         struct rq *rq;
    1788             : 
    1789             :         if (!(flags & WF_TTWU))
    1790             :                 goto out;
    1791             : 
    1792             :         rq = cpu_rq(cpu);
    1793             : 
    1794             :         rcu_read_lock();
    1795             :         curr = READ_ONCE(rq->curr); /* unlocked access */
    1796             : 
    1797             :         /*
    1798             :          * If we are dealing with a -deadline task, we must
    1799             :          * decide where to wake it up.
    1800             :          * If it has a later deadline and the current task
    1801             :          * on this rq can't move (provided the waking task
    1802             :          * can!) we prefer to send it somewhere else. On the
    1803             :          * other hand, if it has a shorter deadline, we
    1804             :          * try to make it stay here, it might be important.
    1805             :          */
    1806             :         select_rq = unlikely(dl_task(curr)) &&
    1807             :                     (curr->nr_cpus_allowed < 2 ||
    1808             :                      !dl_entity_preempt(&p->dl, &curr->dl)) &&
    1809             :                     p->nr_cpus_allowed > 1;
    1810             : 
    1811             :         /*
    1812             :          * Take the capacity of the CPU into account to
    1813             :          * ensure it fits the requirement of the task.
    1814             :          */
    1815             :         if (static_branch_unlikely(&sched_asym_cpucapacity))
    1816             :                 select_rq |= !dl_task_fits_capacity(p, cpu);
    1817             : 
    1818             :         if (select_rq) {
    1819             :                 int target = find_later_rq(p);
    1820             : 
    1821             :                 if (target != -1 &&
    1822             :                                 (dl_time_before(p->dl.deadline,
    1823             :                                         cpu_rq(target)->dl.earliest_dl.curr) ||
    1824             :                                 (cpu_rq(target)->dl.dl_nr_running == 0)))
    1825             :                         cpu = target;
    1826             :         }
    1827             :         rcu_read_unlock();
    1828             : 
    1829             : out:
    1830             :         return cpu;
    1831             : }
    1832             : 
    1833             : static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
    1834             : {
    1835             :         struct rq *rq;
    1836             : 
    1837             :         if (READ_ONCE(p->__state) != TASK_WAKING)
    1838             :                 return;
    1839             : 
    1840             :         rq = task_rq(p);
    1841             :         /*
    1842             :          * Since p->state == TASK_WAKING, set_task_cpu() has been called
    1843             :          * from try_to_wake_up(). Hence, p->pi_lock is locked, but
    1844             :          * rq->lock is not... So, lock it
    1845             :          */
    1846             :         raw_spin_rq_lock(rq);
    1847             :         if (p->dl.dl_non_contending) {
    1848             :                 update_rq_clock(rq);
    1849             :                 sub_running_bw(&p->dl, &rq->dl);
    1850             :                 p->dl.dl_non_contending = 0;
    1851             :                 /*
    1852             :                  * If the timer handler is currently running and the
    1853             :                  * timer cannot be canceled, inactive_task_timer()
    1854             :                  * will see that dl_not_contending is not set, and
    1855             :                  * will not touch the rq's active utilization,
    1856             :                  * so we are still safe.
    1857             :                  */
    1858             :                 if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
    1859             :                         put_task_struct(p);
    1860             :         }
    1861             :         sub_rq_bw(&p->dl, &rq->dl);
    1862             :         raw_spin_rq_unlock(rq);
    1863             : }
    1864             : 
    1865             : static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
    1866             : {
    1867             :         /*
    1868             :          * Current can't be migrated, useless to reschedule,
    1869             :          * let's hope p can move out.
    1870             :          */
    1871             :         if (rq->curr->nr_cpus_allowed == 1 ||
    1872             :             !cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
    1873             :                 return;
    1874             : 
    1875             :         /*
    1876             :          * p is migratable, so let's not schedule it and
    1877             :          * see if it is pushed or pulled somewhere else.
    1878             :          */
    1879             :         if (p->nr_cpus_allowed != 1 &&
    1880             :             cpudl_find(&rq->rd->cpudl, p, NULL))
    1881             :                 return;
    1882             : 
    1883             :         resched_curr(rq);
    1884             : }
    1885             : 
    1886             : static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
    1887             : {
    1888             :         if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
    1889             :                 /*
    1890             :                  * This is OK, because current is on_cpu, which avoids it being
    1891             :                  * picked for load-balance and preemption/IRQs are still
    1892             :                  * disabled avoiding further scheduler activity on it and we've
    1893             :                  * not yet started the picking loop.
    1894             :                  */
    1895             :                 rq_unpin_lock(rq, rf);
    1896             :                 pull_dl_task(rq);
    1897             :                 rq_repin_lock(rq, rf);
    1898             :         }
    1899             : 
    1900             :         return sched_stop_runnable(rq) || sched_dl_runnable(rq);
    1901             : }
    1902             : #endif /* CONFIG_SMP */
    1903             : 
    1904             : /*
    1905             :  * Only called when both the current and waking task are -deadline
    1906             :  * tasks.
    1907             :  */
    1908           0 : static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
    1909             :                                   int flags)
    1910             : {
    1911           0 :         if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
    1912           0 :                 resched_curr(rq);
    1913           0 :                 return;
    1914             :         }
    1915             : 
    1916             : #ifdef CONFIG_SMP
    1917             :         /*
    1918             :          * In the unlikely case current and p have the same deadline
    1919             :          * let us try to decide what's the best thing to do...
    1920             :          */
    1921             :         if ((p->dl.deadline == rq->curr->dl.deadline) &&
    1922             :             !test_tsk_need_resched(rq->curr))
    1923             :                 check_preempt_equal_dl(rq, p);
    1924             : #endif /* CONFIG_SMP */
    1925             : }
    1926             : 
    1927             : #ifdef CONFIG_SCHED_HRTICK
    1928             : static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
    1929             : {
    1930             :         hrtick_start(rq, p->dl.runtime);
    1931             : }
    1932             : #else /* !CONFIG_SCHED_HRTICK */
    1933             : static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
    1934             : {
    1935             : }
    1936             : #endif
    1937             : 
    1938           0 : static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
    1939             : {
    1940           0 :         struct sched_dl_entity *dl_se = &p->dl;
    1941           0 :         struct dl_rq *dl_rq = &rq->dl;
    1942             : 
    1943           0 :         p->se.exec_start = rq_clock_task(rq);
    1944           0 :         if (on_dl_rq(&p->dl))
    1945             :                 update_stats_wait_end_dl(dl_rq, dl_se);
    1946             : 
    1947             :         /* You can't push away the running task */
    1948           0 :         dequeue_pushable_dl_task(rq, p);
    1949             : 
    1950           0 :         if (!first)
    1951             :                 return;
    1952             : 
    1953           0 :         if (hrtick_enabled_dl(rq))
    1954             :                 start_hrtick_dl(rq, p);
    1955             : 
    1956           0 :         if (rq->curr->sched_class != &dl_sched_class)
    1957           0 :                 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    1958             : 
    1959             :         deadline_queue_push_tasks(rq);
    1960             : }
    1961             : 
    1962             : static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
    1963             : {
    1964           0 :         struct rb_node *left = rb_first_cached(&dl_rq->root);
    1965             : 
    1966           0 :         if (!left)
    1967             :                 return NULL;
    1968             : 
    1969           0 :         return __node_2_dle(left);
    1970             : }
    1971             : 
    1972           0 : static struct task_struct *pick_task_dl(struct rq *rq)
    1973             : {
    1974             :         struct sched_dl_entity *dl_se;
    1975           0 :         struct dl_rq *dl_rq = &rq->dl;
    1976             :         struct task_struct *p;
    1977             : 
    1978           0 :         if (!sched_dl_runnable(rq))
    1979             :                 return NULL;
    1980             : 
    1981           0 :         dl_se = pick_next_dl_entity(dl_rq);
    1982           0 :         BUG_ON(!dl_se);
    1983           0 :         p = dl_task_of(dl_se);
    1984             : 
    1985           0 :         return p;
    1986             : }
    1987             : 
    1988           0 : static struct task_struct *pick_next_task_dl(struct rq *rq)
    1989             : {
    1990             :         struct task_struct *p;
    1991             : 
    1992           0 :         p = pick_task_dl(rq);
    1993           0 :         if (p)
    1994           0 :                 set_next_task_dl(rq, p, true);
    1995             : 
    1996           0 :         return p;
    1997             : }
    1998             : 
    1999           0 : static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
    2000             : {
    2001           0 :         struct sched_dl_entity *dl_se = &p->dl;
    2002           0 :         struct dl_rq *dl_rq = &rq->dl;
    2003             : 
    2004           0 :         if (on_dl_rq(&p->dl))
    2005             :                 update_stats_wait_start_dl(dl_rq, dl_se);
    2006             : 
    2007           0 :         update_curr_dl(rq);
    2008             : 
    2009           0 :         update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    2010           0 :         if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
    2011             :                 enqueue_pushable_dl_task(rq, p);
    2012           0 : }
    2013             : 
    2014             : /*
    2015             :  * scheduler tick hitting a task of our scheduling class.
    2016             :  *
    2017             :  * NOTE: This function can be called remotely by the tick offload that
    2018             :  * goes along full dynticks. Therefore no local assumption can be made
    2019             :  * and everything must be accessed through the @rq and @curr passed in
    2020             :  * parameters.
    2021             :  */
    2022           0 : static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
    2023             : {
    2024           0 :         update_curr_dl(rq);
    2025             : 
    2026           0 :         update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    2027             :         /*
    2028             :          * Even when we have runtime, update_curr_dl() might have resulted in us
    2029             :          * not being the leftmost task anymore. In that case NEED_RESCHED will
    2030             :          * be set and schedule() will start a new hrtick for the next task.
    2031             :          */
    2032           0 :         if (hrtick_enabled_dl(rq) && queued && p->dl.runtime > 0 &&
    2033             :             is_leftmost(p, &rq->dl))
    2034             :                 start_hrtick_dl(rq, p);
    2035           0 : }
    2036             : 
    2037           0 : static void task_fork_dl(struct task_struct *p)
    2038             : {
    2039             :         /*
    2040             :          * SCHED_DEADLINE tasks cannot fork and this is achieved through
    2041             :          * sched_fork()
    2042             :          */
    2043           0 : }
    2044             : 
    2045             : #ifdef CONFIG_SMP
    2046             : 
    2047             : /* Only try algorithms three times */
    2048             : #define DL_MAX_TRIES 3
    2049             : 
    2050             : static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
    2051             : {
    2052             :         if (!task_running(rq, p) &&
    2053             :             cpumask_test_cpu(cpu, &p->cpus_mask))
    2054             :                 return 1;
    2055             :         return 0;
    2056             : }
    2057             : 
    2058             : /*
    2059             :  * Return the earliest pushable rq's task, which is suitable to be executed
    2060             :  * on the CPU, NULL otherwise:
    2061             :  */
    2062             : static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
    2063             : {
    2064             :         struct task_struct *p = NULL;
    2065             :         struct rb_node *next_node;
    2066             : 
    2067             :         if (!has_pushable_dl_tasks(rq))
    2068             :                 return NULL;
    2069             : 
    2070             :         next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
    2071             : 
    2072             : next_node:
    2073             :         if (next_node) {
    2074             :                 p = __node_2_pdl(next_node);
    2075             : 
    2076             :                 if (pick_dl_task(rq, p, cpu))
    2077             :                         return p;
    2078             : 
    2079             :                 next_node = rb_next(next_node);
    2080             :                 goto next_node;
    2081             :         }
    2082             : 
    2083             :         return NULL;
    2084             : }
    2085             : 
    2086             : static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
    2087             : 
    2088             : static int find_later_rq(struct task_struct *task)
    2089             : {
    2090             :         struct sched_domain *sd;
    2091             :         struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
    2092             :         int this_cpu = smp_processor_id();
    2093             :         int cpu = task_cpu(task);
    2094             : 
    2095             :         /* Make sure the mask is initialized first */
    2096             :         if (unlikely(!later_mask))
    2097             :                 return -1;
    2098             : 
    2099             :         if (task->nr_cpus_allowed == 1)
    2100             :                 return -1;
    2101             : 
    2102             :         /*
    2103             :          * We have to consider system topology and task affinity
    2104             :          * first, then we can look for a suitable CPU.
    2105             :          */
    2106             :         if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
    2107             :                 return -1;
    2108             : 
    2109             :         /*
    2110             :          * If we are here, some targets have been found, including
    2111             :          * the most suitable which is, among the runqueues where the
    2112             :          * current tasks have later deadlines than the task's one, the
    2113             :          * rq with the latest possible one.
    2114             :          *
    2115             :          * Now we check how well this matches with task's
    2116             :          * affinity and system topology.
    2117             :          *
    2118             :          * The last CPU where the task run is our first
    2119             :          * guess, since it is most likely cache-hot there.
    2120             :          */
    2121             :         if (cpumask_test_cpu(cpu, later_mask))
    2122             :                 return cpu;
    2123             :         /*
    2124             :          * Check if this_cpu is to be skipped (i.e., it is
    2125             :          * not in the mask) or not.
    2126             :          */
    2127             :         if (!cpumask_test_cpu(this_cpu, later_mask))
    2128             :                 this_cpu = -1;
    2129             : 
    2130             :         rcu_read_lock();
    2131             :         for_each_domain(cpu, sd) {
    2132             :                 if (sd->flags & SD_WAKE_AFFINE) {
    2133             :                         int best_cpu;
    2134             : 
    2135             :                         /*
    2136             :                          * If possible, preempting this_cpu is
    2137             :                          * cheaper than migrating.
    2138             :                          */
    2139             :                         if (this_cpu != -1 &&
    2140             :                             cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
    2141             :                                 rcu_read_unlock();
    2142             :                                 return this_cpu;
    2143             :                         }
    2144             : 
    2145             :                         best_cpu = cpumask_any_and_distribute(later_mask,
    2146             :                                                               sched_domain_span(sd));
    2147             :                         /*
    2148             :                          * Last chance: if a CPU being in both later_mask
    2149             :                          * and current sd span is valid, that becomes our
    2150             :                          * choice. Of course, the latest possible CPU is
    2151             :                          * already under consideration through later_mask.
    2152             :                          */
    2153             :                         if (best_cpu < nr_cpu_ids) {
    2154             :                                 rcu_read_unlock();
    2155             :                                 return best_cpu;
    2156             :                         }
    2157             :                 }
    2158             :         }
    2159             :         rcu_read_unlock();
    2160             : 
    2161             :         /*
    2162             :          * At this point, all our guesses failed, we just return
    2163             :          * 'something', and let the caller sort the things out.
    2164             :          */
    2165             :         if (this_cpu != -1)
    2166             :                 return this_cpu;
    2167             : 
    2168             :         cpu = cpumask_any_distribute(later_mask);
    2169             :         if (cpu < nr_cpu_ids)
    2170             :                 return cpu;
    2171             : 
    2172             :         return -1;
    2173             : }
    2174             : 
    2175             : /* Locks the rq it finds */
    2176             : static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
    2177             : {
    2178             :         struct rq *later_rq = NULL;
    2179             :         int tries;
    2180             :         int cpu;
    2181             : 
    2182             :         for (tries = 0; tries < DL_MAX_TRIES; tries++) {
    2183             :                 cpu = find_later_rq(task);
    2184             : 
    2185             :                 if ((cpu == -1) || (cpu == rq->cpu))
    2186             :                         break;
    2187             : 
    2188             :                 later_rq = cpu_rq(cpu);
    2189             : 
    2190             :                 if (later_rq->dl.dl_nr_running &&
    2191             :                     !dl_time_before(task->dl.deadline,
    2192             :                                         later_rq->dl.earliest_dl.curr)) {
    2193             :                         /*
    2194             :                          * Target rq has tasks of equal or earlier deadline,
    2195             :                          * retrying does not release any lock and is unlikely
    2196             :                          * to yield a different result.
    2197             :                          */
    2198             :                         later_rq = NULL;
    2199             :                         break;
    2200             :                 }
    2201             : 
    2202             :                 /* Retry if something changed. */
    2203             :                 if (double_lock_balance(rq, later_rq)) {
    2204             :                         if (unlikely(task_rq(task) != rq ||
    2205             :                                      !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
    2206             :                                      task_running(rq, task) ||
    2207             :                                      !dl_task(task) ||
    2208             :                                      !task_on_rq_queued(task))) {
    2209             :                                 double_unlock_balance(rq, later_rq);
    2210             :                                 later_rq = NULL;
    2211             :                                 break;
    2212             :                         }
    2213             :                 }
    2214             : 
    2215             :                 /*
    2216             :                  * If the rq we found has no -deadline task, or
    2217             :                  * its earliest one has a later deadline than our
    2218             :                  * task, the rq is a good one.
    2219             :                  */
    2220             :                 if (!later_rq->dl.dl_nr_running ||
    2221             :                     dl_time_before(task->dl.deadline,
    2222             :                                    later_rq->dl.earliest_dl.curr))
    2223             :                         break;
    2224             : 
    2225             :                 /* Otherwise we try again. */
    2226             :                 double_unlock_balance(rq, later_rq);
    2227             :                 later_rq = NULL;
    2228             :         }
    2229             : 
    2230             :         return later_rq;
    2231             : }
    2232             : 
    2233             : static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
    2234             : {
    2235             :         struct task_struct *p;
    2236             : 
    2237             :         if (!has_pushable_dl_tasks(rq))
    2238             :                 return NULL;
    2239             : 
    2240             :         p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
    2241             : 
    2242             :         BUG_ON(rq->cpu != task_cpu(p));
    2243             :         BUG_ON(task_current(rq, p));
    2244             :         BUG_ON(p->nr_cpus_allowed <= 1);
    2245             : 
    2246             :         BUG_ON(!task_on_rq_queued(p));
    2247             :         BUG_ON(!dl_task(p));
    2248             : 
    2249             :         return p;
    2250             : }
    2251             : 
    2252             : /*
    2253             :  * See if the non running -deadline tasks on this rq
    2254             :  * can be sent to some other CPU where they can preempt
    2255             :  * and start executing.
    2256             :  */
    2257             : static int push_dl_task(struct rq *rq)
    2258             : {
    2259             :         struct task_struct *next_task;
    2260             :         struct rq *later_rq;
    2261             :         int ret = 0;
    2262             : 
    2263             :         if (!rq->dl.overloaded)
    2264             :                 return 0;
    2265             : 
    2266             :         next_task = pick_next_pushable_dl_task(rq);
    2267             :         if (!next_task)
    2268             :                 return 0;
    2269             : 
    2270             : retry:
    2271             :         /*
    2272             :          * If next_task preempts rq->curr, and rq->curr
    2273             :          * can move away, it makes sense to just reschedule
    2274             :          * without going further in pushing next_task.
    2275             :          */
    2276             :         if (dl_task(rq->curr) &&
    2277             :             dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
    2278             :             rq->curr->nr_cpus_allowed > 1) {
    2279             :                 resched_curr(rq);
    2280             :                 return 0;
    2281             :         }
    2282             : 
    2283             :         if (is_migration_disabled(next_task))
    2284             :                 return 0;
    2285             : 
    2286             :         if (WARN_ON(next_task == rq->curr))
    2287             :                 return 0;
    2288             : 
    2289             :         /* We might release rq lock */
    2290             :         get_task_struct(next_task);
    2291             : 
    2292             :         /* Will lock the rq it'll find */
    2293             :         later_rq = find_lock_later_rq(next_task, rq);
    2294             :         if (!later_rq) {
    2295             :                 struct task_struct *task;
    2296             : 
    2297             :                 /*
    2298             :                  * We must check all this again, since
    2299             :                  * find_lock_later_rq releases rq->lock and it is
    2300             :                  * then possible that next_task has migrated.
    2301             :                  */
    2302             :                 task = pick_next_pushable_dl_task(rq);
    2303             :                 if (task == next_task) {
    2304             :                         /*
    2305             :                          * The task is still there. We don't try
    2306             :                          * again, some other CPU will pull it when ready.
    2307             :                          */
    2308             :                         goto out;
    2309             :                 }
    2310             : 
    2311             :                 if (!task)
    2312             :                         /* No more tasks */
    2313             :                         goto out;
    2314             : 
    2315             :                 put_task_struct(next_task);
    2316             :                 next_task = task;
    2317             :                 goto retry;
    2318             :         }
    2319             : 
    2320             :         deactivate_task(rq, next_task, 0);
    2321             :         set_task_cpu(next_task, later_rq->cpu);
    2322             : 
    2323             :         /*
    2324             :          * Update the later_rq clock here, because the clock is used
    2325             :          * by the cpufreq_update_util() inside __add_running_bw().
    2326             :          */
    2327             :         update_rq_clock(later_rq);
    2328             :         activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
    2329             :         ret = 1;
    2330             : 
    2331             :         resched_curr(later_rq);
    2332             : 
    2333             :         double_unlock_balance(rq, later_rq);
    2334             : 
    2335             : out:
    2336             :         put_task_struct(next_task);
    2337             : 
    2338             :         return ret;
    2339             : }
    2340             : 
    2341             : static void push_dl_tasks(struct rq *rq)
    2342             : {
    2343             :         /* push_dl_task() will return true if it moved a -deadline task */
    2344             :         while (push_dl_task(rq))
    2345             :                 ;
    2346             : }
    2347             : 
    2348             : static void pull_dl_task(struct rq *this_rq)
    2349             : {
    2350             :         int this_cpu = this_rq->cpu, cpu;
    2351             :         struct task_struct *p, *push_task;
    2352             :         bool resched = false;
    2353             :         struct rq *src_rq;
    2354             :         u64 dmin = LONG_MAX;
    2355             : 
    2356             :         if (likely(!dl_overloaded(this_rq)))
    2357             :                 return;
    2358             : 
    2359             :         /*
    2360             :          * Match the barrier from dl_set_overloaded; this guarantees that if we
    2361             :          * see overloaded we must also see the dlo_mask bit.
    2362             :          */
    2363             :         smp_rmb();
    2364             : 
    2365             :         for_each_cpu(cpu, this_rq->rd->dlo_mask) {
    2366             :                 if (this_cpu == cpu)
    2367             :                         continue;
    2368             : 
    2369             :                 src_rq = cpu_rq(cpu);
    2370             : 
    2371             :                 /*
    2372             :                  * It looks racy, abd it is! However, as in sched_rt.c,
    2373             :                  * we are fine with this.
    2374             :                  */
    2375             :                 if (this_rq->dl.dl_nr_running &&
    2376             :                     dl_time_before(this_rq->dl.earliest_dl.curr,
    2377             :                                    src_rq->dl.earliest_dl.next))
    2378             :                         continue;
    2379             : 
    2380             :                 /* Might drop this_rq->lock */
    2381             :                 push_task = NULL;
    2382             :                 double_lock_balance(this_rq, src_rq);
    2383             : 
    2384             :                 /*
    2385             :                  * If there are no more pullable tasks on the
    2386             :                  * rq, we're done with it.
    2387             :                  */
    2388             :                 if (src_rq->dl.dl_nr_running <= 1)
    2389             :                         goto skip;
    2390             : 
    2391             :                 p = pick_earliest_pushable_dl_task(src_rq, this_cpu);
    2392             : 
    2393             :                 /*
    2394             :                  * We found a task to be pulled if:
    2395             :                  *  - it preempts our current (if there's one),
    2396             :                  *  - it will preempt the last one we pulled (if any).
    2397             :                  */
    2398             :                 if (p && dl_time_before(p->dl.deadline, dmin) &&
    2399             :                     (!this_rq->dl.dl_nr_running ||
    2400             :                      dl_time_before(p->dl.deadline,
    2401             :                                     this_rq->dl.earliest_dl.curr))) {
    2402             :                         WARN_ON(p == src_rq->curr);
    2403             :                         WARN_ON(!task_on_rq_queued(p));
    2404             : 
    2405             :                         /*
    2406             :                          * Then we pull iff p has actually an earlier
    2407             :                          * deadline than the current task of its runqueue.
    2408             :                          */
    2409             :                         if (dl_time_before(p->dl.deadline,
    2410             :                                            src_rq->curr->dl.deadline))
    2411             :                                 goto skip;
    2412             : 
    2413             :                         if (is_migration_disabled(p)) {
    2414             :                                 push_task = get_push_task(src_rq);
    2415             :                         } else {
    2416             :                                 deactivate_task(src_rq, p, 0);
    2417             :                                 set_task_cpu(p, this_cpu);
    2418             :                                 activate_task(this_rq, p, 0);
    2419             :                                 dmin = p->dl.deadline;
    2420             :                                 resched = true;
    2421             :                         }
    2422             : 
    2423             :                         /* Is there any other task even earlier? */
    2424             :                 }
    2425             : skip:
    2426             :                 double_unlock_balance(this_rq, src_rq);
    2427             : 
    2428             :                 if (push_task) {
    2429             :                         raw_spin_rq_unlock(this_rq);
    2430             :                         stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
    2431             :                                             push_task, &src_rq->push_work);
    2432             :                         raw_spin_rq_lock(this_rq);
    2433             :                 }
    2434             :         }
    2435             : 
    2436             :         if (resched)
    2437             :                 resched_curr(this_rq);
    2438             : }
    2439             : 
    2440             : /*
    2441             :  * Since the task is not running and a reschedule is not going to happen
    2442             :  * anytime soon on its runqueue, we try pushing it away now.
    2443             :  */
    2444             : static void task_woken_dl(struct rq *rq, struct task_struct *p)
    2445             : {
    2446             :         if (!task_running(rq, p) &&
    2447             :             !test_tsk_need_resched(rq->curr) &&
    2448             :             p->nr_cpus_allowed > 1 &&
    2449             :             dl_task(rq->curr) &&
    2450             :             (rq->curr->nr_cpus_allowed < 2 ||
    2451             :              !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
    2452             :                 push_dl_tasks(rq);
    2453             :         }
    2454             : }
    2455             : 
    2456             : static void set_cpus_allowed_dl(struct task_struct *p,
    2457             :                                 const struct cpumask *new_mask,
    2458             :                                 u32 flags)
    2459             : {
    2460             :         struct root_domain *src_rd;
    2461             :         struct rq *rq;
    2462             : 
    2463             :         BUG_ON(!dl_task(p));
    2464             : 
    2465             :         rq = task_rq(p);
    2466             :         src_rd = rq->rd;
    2467             :         /*
    2468             :          * Migrating a SCHED_DEADLINE task between exclusive
    2469             :          * cpusets (different root_domains) entails a bandwidth
    2470             :          * update. We already made space for us in the destination
    2471             :          * domain (see cpuset_can_attach()).
    2472             :          */
    2473             :         if (!cpumask_intersects(src_rd->span, new_mask)) {
    2474             :                 struct dl_bw *src_dl_b;
    2475             : 
    2476             :                 src_dl_b = dl_bw_of(cpu_of(rq));
    2477             :                 /*
    2478             :                  * We now free resources of the root_domain we are migrating
    2479             :                  * off. In the worst case, sched_setattr() may temporary fail
    2480             :                  * until we complete the update.
    2481             :                  */
    2482             :                 raw_spin_lock(&src_dl_b->lock);
    2483             :                 __dl_sub(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
    2484             :                 raw_spin_unlock(&src_dl_b->lock);
    2485             :         }
    2486             : 
    2487             :         set_cpus_allowed_common(p, new_mask, flags);
    2488             : }
    2489             : 
    2490             : /* Assumes rq->lock is held */
    2491             : static void rq_online_dl(struct rq *rq)
    2492             : {
    2493             :         if (rq->dl.overloaded)
    2494             :                 dl_set_overload(rq);
    2495             : 
    2496             :         cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
    2497             :         if (rq->dl.dl_nr_running > 0)
    2498             :                 cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
    2499             : }
    2500             : 
    2501             : /* Assumes rq->lock is held */
    2502             : static void rq_offline_dl(struct rq *rq)
    2503             : {
    2504             :         if (rq->dl.overloaded)
    2505             :                 dl_clear_overload(rq);
    2506             : 
    2507             :         cpudl_clear(&rq->rd->cpudl, rq->cpu);
    2508             :         cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
    2509             : }
    2510             : 
    2511             : void __init init_sched_dl_class(void)
    2512             : {
    2513             :         unsigned int i;
    2514             : 
    2515             :         for_each_possible_cpu(i)
    2516             :                 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
    2517             :                                         GFP_KERNEL, cpu_to_node(i));
    2518             : }
    2519             : 
    2520             : void dl_add_task_root_domain(struct task_struct *p)
    2521             : {
    2522             :         struct rq_flags rf;
    2523             :         struct rq *rq;
    2524             :         struct dl_bw *dl_b;
    2525             : 
    2526             :         raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
    2527             :         if (!dl_task(p)) {
    2528             :                 raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
    2529             :                 return;
    2530             :         }
    2531             : 
    2532             :         rq = __task_rq_lock(p, &rf);
    2533             : 
    2534             :         dl_b = &rq->rd->dl_bw;
    2535             :         raw_spin_lock(&dl_b->lock);
    2536             : 
    2537             :         __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
    2538             : 
    2539             :         raw_spin_unlock(&dl_b->lock);
    2540             : 
    2541             :         task_rq_unlock(rq, p, &rf);
    2542             : }
    2543             : 
    2544             : void dl_clear_root_domain(struct root_domain *rd)
    2545             : {
    2546             :         unsigned long flags;
    2547             : 
    2548             :         raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
    2549             :         rd->dl_bw.total_bw = 0;
    2550             :         raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
    2551             : }
    2552             : 
    2553             : #endif /* CONFIG_SMP */
    2554             : 
    2555           0 : static void switched_from_dl(struct rq *rq, struct task_struct *p)
    2556             : {
    2557             :         /*
    2558             :          * task_non_contending() can start the "inactive timer" (if the 0-lag
    2559             :          * time is in the future). If the task switches back to dl before
    2560             :          * the "inactive timer" fires, it can continue to consume its current
    2561             :          * runtime using its current deadline. If it stays outside of
    2562             :          * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
    2563             :          * will reset the task parameters.
    2564             :          */
    2565           0 :         if (task_on_rq_queued(p) && p->dl.dl_runtime)
    2566           0 :                 task_non_contending(p);
    2567             : 
    2568           0 :         if (!task_on_rq_queued(p)) {
    2569             :                 /*
    2570             :                  * Inactive timer is armed. However, p is leaving DEADLINE and
    2571             :                  * might migrate away from this rq while continuing to run on
    2572             :                  * some other class. We need to remove its contribution from
    2573             :                  * this rq running_bw now, or sub_rq_bw (below) will complain.
    2574             :                  */
    2575           0 :                 if (p->dl.dl_non_contending)
    2576           0 :                         sub_running_bw(&p->dl, &rq->dl);
    2577           0 :                 sub_rq_bw(&p->dl, &rq->dl);
    2578             :         }
    2579             : 
    2580             :         /*
    2581             :          * We cannot use inactive_task_timer() to invoke sub_running_bw()
    2582             :          * at the 0-lag time, because the task could have been migrated
    2583             :          * while SCHED_OTHER in the meanwhile.
    2584             :          */
    2585           0 :         if (p->dl.dl_non_contending)
    2586           0 :                 p->dl.dl_non_contending = 0;
    2587             : 
    2588             :         /*
    2589             :          * Since this might be the only -deadline task on the rq,
    2590             :          * this is the right place to try to pull some other one
    2591             :          * from an overloaded CPU, if any.
    2592             :          */
    2593           0 :         if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
    2594             :                 return;
    2595             : 
    2596             :         deadline_queue_pull_task(rq);
    2597             : }
    2598             : 
    2599             : /*
    2600             :  * When switching to -deadline, we may overload the rq, then
    2601             :  * we try to push someone off, if possible.
    2602             :  */
    2603           0 : static void switched_to_dl(struct rq *rq, struct task_struct *p)
    2604             : {
    2605           0 :         if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
    2606           0 :                 put_task_struct(p);
    2607             : 
    2608             :         /* If p is not queued we will update its parameters at next wakeup. */
    2609           0 :         if (!task_on_rq_queued(p)) {
    2610           0 :                 add_rq_bw(&p->dl, &rq->dl);
    2611             : 
    2612             :                 return;
    2613             :         }
    2614             : 
    2615           0 :         if (rq->curr != p) {
    2616             : #ifdef CONFIG_SMP
    2617             :                 if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
    2618             :                         deadline_queue_push_tasks(rq);
    2619             : #endif
    2620           0 :                 if (dl_task(rq->curr))
    2621             :                         check_preempt_curr_dl(rq, p, 0);
    2622             :                 else
    2623           0 :                         resched_curr(rq);
    2624             :         } else {
    2625           0 :                 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    2626             :         }
    2627             : }
    2628             : 
    2629             : /*
    2630             :  * If the scheduling parameters of a -deadline task changed,
    2631             :  * a push or pull operation might be needed.
    2632             :  */
    2633           0 : static void prio_changed_dl(struct rq *rq, struct task_struct *p,
    2634             :                             int oldprio)
    2635             : {
    2636           0 :         if (task_on_rq_queued(p) || task_current(rq, p)) {
    2637             : #ifdef CONFIG_SMP
    2638             :                 /*
    2639             :                  * This might be too much, but unfortunately
    2640             :                  * we don't have the old deadline value, and
    2641             :                  * we can't argue if the task is increasing
    2642             :                  * or lowering its prio, so...
    2643             :                  */
    2644             :                 if (!rq->dl.overloaded)
    2645             :                         deadline_queue_pull_task(rq);
    2646             : 
    2647             :                 /*
    2648             :                  * If we now have a earlier deadline task than p,
    2649             :                  * then reschedule, provided p is still on this
    2650             :                  * runqueue.
    2651             :                  */
    2652             :                 if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
    2653             :                         resched_curr(rq);
    2654             : #else
    2655             :                 /*
    2656             :                  * Again, we don't know if p has a earlier
    2657             :                  * or later deadline, so let's blindly set a
    2658             :                  * (maybe not needed) rescheduling point.
    2659             :                  */
    2660           0 :                 resched_curr(rq);
    2661             : #endif /* CONFIG_SMP */
    2662             :         }
    2663           0 : }
    2664             : 
    2665             : DEFINE_SCHED_CLASS(dl) = {
    2666             : 
    2667             :         .enqueue_task           = enqueue_task_dl,
    2668             :         .dequeue_task           = dequeue_task_dl,
    2669             :         .yield_task             = yield_task_dl,
    2670             : 
    2671             :         .check_preempt_curr     = check_preempt_curr_dl,
    2672             : 
    2673             :         .pick_next_task         = pick_next_task_dl,
    2674             :         .put_prev_task          = put_prev_task_dl,
    2675             :         .set_next_task          = set_next_task_dl,
    2676             : 
    2677             : #ifdef CONFIG_SMP
    2678             :         .balance                = balance_dl,
    2679             :         .pick_task              = pick_task_dl,
    2680             :         .select_task_rq         = select_task_rq_dl,
    2681             :         .migrate_task_rq        = migrate_task_rq_dl,
    2682             :         .set_cpus_allowed       = set_cpus_allowed_dl,
    2683             :         .rq_online              = rq_online_dl,
    2684             :         .rq_offline             = rq_offline_dl,
    2685             :         .task_woken             = task_woken_dl,
    2686             :         .find_lock_rq           = find_lock_later_rq,
    2687             : #endif
    2688             : 
    2689             :         .task_tick              = task_tick_dl,
    2690             :         .task_fork              = task_fork_dl,
    2691             : 
    2692             :         .prio_changed           = prio_changed_dl,
    2693             :         .switched_from          = switched_from_dl,
    2694             :         .switched_to            = switched_to_dl,
    2695             : 
    2696             :         .update_curr            = update_curr_dl,
    2697             : };
    2698             : 
    2699             : /* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
    2700             : static u64 dl_generation;
    2701             : 
    2702           0 : int sched_dl_global_validate(void)
    2703             : {
    2704           0 :         u64 runtime = global_rt_runtime();
    2705           0 :         u64 period = global_rt_period();
    2706           0 :         u64 new_bw = to_ratio(period, runtime);
    2707           0 :         u64 gen = ++dl_generation;
    2708             :         struct dl_bw *dl_b;
    2709           0 :         int cpu, cpus, ret = 0;
    2710             :         unsigned long flags;
    2711             : 
    2712             :         /*
    2713             :          * Here we want to check the bandwidth not being set to some
    2714             :          * value smaller than the currently allocated bandwidth in
    2715             :          * any of the root_domains.
    2716             :          */
    2717           0 :         for_each_possible_cpu(cpu) {
    2718             :                 rcu_read_lock_sched();
    2719             : 
    2720           0 :                 if (dl_bw_visited(cpu, gen))
    2721             :                         goto next;
    2722             : 
    2723           0 :                 dl_b = dl_bw_of(cpu);
    2724           0 :                 cpus = dl_bw_cpus(cpu);
    2725             : 
    2726           0 :                 raw_spin_lock_irqsave(&dl_b->lock, flags);
    2727           0 :                 if (new_bw * cpus < dl_b->total_bw)
    2728           0 :                         ret = -EBUSY;
    2729           0 :                 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
    2730             : 
    2731             : next:
    2732             :                 rcu_read_unlock_sched();
    2733             : 
    2734           0 :                 if (ret)
    2735             :                         break;
    2736             :         }
    2737             : 
    2738           0 :         return ret;
    2739             : }
    2740             : 
    2741           1 : static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
    2742             : {
    2743           1 :         if (global_rt_runtime() == RUNTIME_INF) {
    2744           0 :                 dl_rq->bw_ratio = 1 << RATIO_SHIFT;
    2745           0 :                 dl_rq->extra_bw = 1 << BW_SHIFT;
    2746             :         } else {
    2747           2 :                 dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
    2748           1 :                           global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
    2749           1 :                 dl_rq->extra_bw = to_ratio(global_rt_period(),
    2750             :                                                     global_rt_runtime());
    2751             :         }
    2752           1 : }
    2753             : 
    2754           0 : void sched_dl_do_global(void)
    2755             : {
    2756           0 :         u64 new_bw = -1;
    2757           0 :         u64 gen = ++dl_generation;
    2758             :         struct dl_bw *dl_b;
    2759             :         int cpu;
    2760             :         unsigned long flags;
    2761             : 
    2762           0 :         if (global_rt_runtime() != RUNTIME_INF)
    2763           0 :                 new_bw = to_ratio(global_rt_period(), global_rt_runtime());
    2764             : 
    2765           0 :         for_each_possible_cpu(cpu) {
    2766             :                 rcu_read_lock_sched();
    2767             : 
    2768           0 :                 if (dl_bw_visited(cpu, gen)) {
    2769             :                         rcu_read_unlock_sched();
    2770             :                         continue;
    2771             :                 }
    2772             : 
    2773           0 :                 dl_b = dl_bw_of(cpu);
    2774             : 
    2775           0 :                 raw_spin_lock_irqsave(&dl_b->lock, flags);
    2776           0 :                 dl_b->bw = new_bw;
    2777           0 :                 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
    2778             : 
    2779             :                 rcu_read_unlock_sched();
    2780           0 :                 init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
    2781             :         }
    2782           0 : }
    2783             : 
    2784             : /*
    2785             :  * We must be sure that accepting a new task (or allowing changing the
    2786             :  * parameters of an existing one) is consistent with the bandwidth
    2787             :  * constraints. If yes, this function also accordingly updates the currently
    2788             :  * allocated bandwidth to reflect the new situation.
    2789             :  *
    2790             :  * This function is called while holding p's rq->lock.
    2791             :  */
    2792           0 : int sched_dl_overflow(struct task_struct *p, int policy,
    2793             :                       const struct sched_attr *attr)
    2794             : {
    2795           0 :         u64 period = attr->sched_period ?: attr->sched_deadline;
    2796           0 :         u64 runtime = attr->sched_runtime;
    2797           0 :         u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
    2798           0 :         int cpus, err = -1, cpu = task_cpu(p);
    2799           0 :         struct dl_bw *dl_b = dl_bw_of(cpu);
    2800             :         unsigned long cap;
    2801             : 
    2802           0 :         if (attr->sched_flags & SCHED_FLAG_SUGOV)
    2803             :                 return 0;
    2804             : 
    2805             :         /* !deadline task may carry old deadline bandwidth */
    2806           0 :         if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
    2807             :                 return 0;
    2808             : 
    2809             :         /*
    2810             :          * Either if a task, enters, leave, or stays -deadline but changes
    2811             :          * its parameters, we may need to update accordingly the total
    2812             :          * allocated bandwidth of the container.
    2813             :          */
    2814           0 :         raw_spin_lock(&dl_b->lock);
    2815           0 :         cpus = dl_bw_cpus(cpu);
    2816           0 :         cap = dl_bw_capacity(cpu);
    2817             : 
    2818           0 :         if (dl_policy(policy) && !task_has_dl_policy(p) &&
    2819           0 :             !__dl_overflow(dl_b, cap, 0, new_bw)) {
    2820           0 :                 if (hrtimer_active(&p->dl.inactive_timer))
    2821           0 :                         __dl_sub(dl_b, p->dl.dl_bw, cpus);
    2822           0 :                 __dl_add(dl_b, new_bw, cpus);
    2823           0 :                 err = 0;
    2824           0 :         } else if (dl_policy(policy) && task_has_dl_policy(p) &&
    2825           0 :                    !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
    2826             :                 /*
    2827             :                  * XXX this is slightly incorrect: when the task
    2828             :                  * utilization decreases, we should delay the total
    2829             :                  * utilization change until the task's 0-lag point.
    2830             :                  * But this would require to set the task's "inactive
    2831             :                  * timer" when the task is not inactive.
    2832             :                  */
    2833           0 :                 __dl_sub(dl_b, p->dl.dl_bw, cpus);
    2834           0 :                 __dl_add(dl_b, new_bw, cpus);
    2835           0 :                 dl_change_utilization(p, new_bw);
    2836           0 :                 err = 0;
    2837           0 :         } else if (!dl_policy(policy) && task_has_dl_policy(p)) {
    2838             :                 /*
    2839             :                  * Do not decrease the total deadline utilization here,
    2840             :                  * switched_from_dl() will take care to do it at the correct
    2841             :                  * (0-lag) time.
    2842             :                  */
    2843           0 :                 err = 0;
    2844             :         }
    2845           0 :         raw_spin_unlock(&dl_b->lock);
    2846             : 
    2847           0 :         return err;
    2848             : }
    2849             : 
    2850             : /*
    2851             :  * This function initializes the sched_dl_entity of a newly becoming
    2852             :  * SCHED_DEADLINE task.
    2853             :  *
    2854             :  * Only the static values are considered here, the actual runtime and the
    2855             :  * absolute deadline will be properly calculated when the task is enqueued
    2856             :  * for the first time with its new policy.
    2857             :  */
    2858           0 : void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
    2859             : {
    2860           0 :         struct sched_dl_entity *dl_se = &p->dl;
    2861             : 
    2862           0 :         dl_se->dl_runtime = attr->sched_runtime;
    2863           0 :         dl_se->dl_deadline = attr->sched_deadline;
    2864           0 :         dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
    2865           0 :         dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS;
    2866           0 :         dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
    2867           0 :         dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
    2868           0 : }
    2869             : 
    2870           0 : void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
    2871             : {
    2872           0 :         struct sched_dl_entity *dl_se = &p->dl;
    2873             : 
    2874           0 :         attr->sched_priority = p->rt_priority;
    2875           0 :         attr->sched_runtime = dl_se->dl_runtime;
    2876           0 :         attr->sched_deadline = dl_se->dl_deadline;
    2877           0 :         attr->sched_period = dl_se->dl_period;
    2878           0 :         attr->sched_flags &= ~SCHED_DL_FLAGS;
    2879           0 :         attr->sched_flags |= dl_se->flags;
    2880           0 : }
    2881             : 
    2882             : /*
    2883             :  * Default limits for DL period; on the top end we guard against small util
    2884             :  * tasks still getting ridiculously long effective runtimes, on the bottom end we
    2885             :  * guard against timer DoS.
    2886             :  */
    2887             : unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
    2888             : unsigned int sysctl_sched_dl_period_min = 100;     /* 100 us */
    2889             : 
    2890             : /*
    2891             :  * This function validates the new parameters of a -deadline task.
    2892             :  * We ask for the deadline not being zero, and greater or equal
    2893             :  * than the runtime, as well as the period of being zero or
    2894             :  * greater than deadline. Furthermore, we have to be sure that
    2895             :  * user parameters are above the internal resolution of 1us (we
    2896             :  * check sched_runtime only since it is always the smaller one) and
    2897             :  * below 2^63 ns (we have to check both sched_deadline and
    2898             :  * sched_period, as the latter can be zero).
    2899             :  */
    2900           0 : bool __checkparam_dl(const struct sched_attr *attr)
    2901             : {
    2902             :         u64 period, max, min;
    2903             : 
    2904             :         /* special dl tasks don't actually use any parameter */
    2905           0 :         if (attr->sched_flags & SCHED_FLAG_SUGOV)
    2906             :                 return true;
    2907             : 
    2908             :         /* deadline != 0 */
    2909           0 :         if (attr->sched_deadline == 0)
    2910             :                 return false;
    2911             : 
    2912             :         /*
    2913             :          * Since we truncate DL_SCALE bits, make sure we're at least
    2914             :          * that big.
    2915             :          */
    2916           0 :         if (attr->sched_runtime < (1ULL << DL_SCALE))
    2917             :                 return false;
    2918             : 
    2919             :         /*
    2920             :          * Since we use the MSB for wrap-around and sign issues, make
    2921             :          * sure it's not set (mind that period can be equal to zero).
    2922             :          */
    2923           0 :         if (attr->sched_deadline & (1ULL << 63) ||
    2924           0 :             attr->sched_period & (1ULL << 63))
    2925             :                 return false;
    2926             : 
    2927           0 :         period = attr->sched_period;
    2928           0 :         if (!period)
    2929           0 :                 period = attr->sched_deadline;
    2930             : 
    2931             :         /* runtime <= deadline <= period (if period != 0) */
    2932           0 :         if (period < attr->sched_deadline ||
    2933             :             attr->sched_deadline < attr->sched_runtime)
    2934             :                 return false;
    2935             : 
    2936           0 :         max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
    2937           0 :         min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
    2938             : 
    2939           0 :         if (period < min || period > max)
    2940             :                 return false;
    2941             : 
    2942           0 :         return true;
    2943             : }
    2944             : 
    2945             : /*
    2946             :  * This function clears the sched_dl_entity static params.
    2947             :  */
    2948         108 : void __dl_clear_params(struct task_struct *p)
    2949             : {
    2950         108 :         struct sched_dl_entity *dl_se = &p->dl;
    2951             : 
    2952         108 :         dl_se->dl_runtime            = 0;
    2953         108 :         dl_se->dl_deadline           = 0;
    2954         108 :         dl_se->dl_period             = 0;
    2955         108 :         dl_se->flags                 = 0;
    2956         108 :         dl_se->dl_bw                 = 0;
    2957         108 :         dl_se->dl_density            = 0;
    2958             : 
    2959         108 :         dl_se->dl_throttled          = 0;
    2960         108 :         dl_se->dl_yielded            = 0;
    2961         108 :         dl_se->dl_non_contending     = 0;
    2962         108 :         dl_se->dl_overrun            = 0;
    2963             : 
    2964             : #ifdef CONFIG_RT_MUTEXES
    2965         108 :         dl_se->pi_se                 = dl_se;
    2966             : #endif
    2967         108 : }
    2968             : 
    2969           0 : bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
    2970             : {
    2971           0 :         struct sched_dl_entity *dl_se = &p->dl;
    2972             : 
    2973           0 :         if (dl_se->dl_runtime != attr->sched_runtime ||
    2974           0 :             dl_se->dl_deadline != attr->sched_deadline ||
    2975           0 :             dl_se->dl_period != attr->sched_period ||
    2976           0 :             dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS))
    2977             :                 return true;
    2978             : 
    2979           0 :         return false;
    2980             : }
    2981             : 
    2982             : #ifdef CONFIG_SMP
    2983             : int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
    2984             :                                  const struct cpumask *trial)
    2985             : {
    2986             :         int ret = 1, trial_cpus;
    2987             :         struct dl_bw *cur_dl_b;
    2988             :         unsigned long flags;
    2989             : 
    2990             :         rcu_read_lock_sched();
    2991             :         cur_dl_b = dl_bw_of(cpumask_any(cur));
    2992             :         trial_cpus = cpumask_weight(trial);
    2993             : 
    2994             :         raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
    2995             :         if (cur_dl_b->bw != -1 &&
    2996             :             cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
    2997             :                 ret = 0;
    2998             :         raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
    2999             :         rcu_read_unlock_sched();
    3000             : 
    3001             :         return ret;
    3002             : }
    3003             : 
    3004             : int dl_cpu_busy(int cpu, struct task_struct *p)
    3005             : {
    3006             :         unsigned long flags, cap;
    3007             :         struct dl_bw *dl_b;
    3008             :         bool overflow;
    3009             : 
    3010             :         rcu_read_lock_sched();
    3011             :         dl_b = dl_bw_of(cpu);
    3012             :         raw_spin_lock_irqsave(&dl_b->lock, flags);
    3013             :         cap = dl_bw_capacity(cpu);
    3014             :         overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
    3015             : 
    3016             :         if (!overflow && p) {
    3017             :                 /*
    3018             :                  * We reserve space for this task in the destination
    3019             :                  * root_domain, as we can't fail after this point.
    3020             :                  * We will free resources in the source root_domain
    3021             :                  * later on (see set_cpus_allowed_dl()).
    3022             :                  */
    3023             :                 __dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu));
    3024             :         }
    3025             : 
    3026             :         raw_spin_unlock_irqrestore(&dl_b->lock, flags);
    3027             :         rcu_read_unlock_sched();
    3028             : 
    3029             :         return overflow ? -EBUSY : 0;
    3030             : }
    3031             : #endif
    3032             : 
    3033             : #ifdef CONFIG_SCHED_DEBUG
    3034           0 : void print_dl_stats(struct seq_file *m, int cpu)
    3035             : {
    3036           0 :         print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
    3037           0 : }
    3038             : #endif /* CONFIG_SCHED_DEBUG */

Generated by: LCOV version 1.14