LCOV - coverage.info - kernel/sched/rt.c

LCOV - code coverage report

Current view:	top level - kernel/sched - rt.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	17	393	4.3 %
Date:	2022-12-09 01:23:36	Functions:	2	40	5.0 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
       4             :  * policies)
       5             :  */
       6             : 
       7             : int sched_rr_timeslice = RR_TIMESLICE;
       8             : int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
       9             : /* More than 4 hours if BW_SHIFT equals 20. */
      10             : static const u64 max_rt_runtime = MAX_BW;
      11             : 
      12             : static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
      13             : 
      14             : struct rt_bandwidth def_rt_bandwidth;
      15             : 
      16           0 : static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
      17             : {
      18           0 :         struct rt_bandwidth *rt_b =
      19           0 :                 container_of(timer, struct rt_bandwidth, rt_period_timer);
      20           0 :         int idle = 0;
      21             :         int overrun;
      22             : 
      23           0 :         raw_spin_lock(&rt_b->rt_runtime_lock);
      24             :         for (;;) {
      25           0 :                 overrun = hrtimer_forward_now(timer, rt_b->rt_period);
      26           0 :                 if (!overrun)
      27             :                         break;
      28             : 
      29           0 :                 raw_spin_unlock(&rt_b->rt_runtime_lock);
      30           0 :                 idle = do_sched_rt_period_timer(rt_b, overrun);
      31           0 :                 raw_spin_lock(&rt_b->rt_runtime_lock);
      32             :         }
      33           0 :         if (idle)
      34           0 :                 rt_b->rt_period_active = 0;
      35           0 :         raw_spin_unlock(&rt_b->rt_runtime_lock);
      36             : 
      37           0 :         return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
      38             : }
      39             : 
      40           1 : void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
      41             : {
      42           1 :         rt_b->rt_period = ns_to_ktime(period);
      43           1 :         rt_b->rt_runtime = runtime;
      44             : 
      45             :         raw_spin_lock_init(&rt_b->rt_runtime_lock);
      46             : 
      47           1 :         hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
      48             :                      HRTIMER_MODE_REL_HARD);
      49           1 :         rt_b->rt_period_timer.function = sched_rt_period_timer;
      50           1 : }
      51             : 
      52           0 : static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b)
      53             : {
      54           0 :         raw_spin_lock(&rt_b->rt_runtime_lock);
      55           0 :         if (!rt_b->rt_period_active) {
      56           0 :                 rt_b->rt_period_active = 1;
      57             :                 /*
      58             :                  * SCHED_DEADLINE updates the bandwidth, as a run away
      59             :                  * RT task with a DL task could hog a CPU. But DL does
      60             :                  * not reset the period. If a deadline task was running
      61             :                  * without an RT task running, it can cause RT tasks to
      62             :                  * throttle when they start up. Kick the timer right away
      63             :                  * to update the period.
      64             :                  */
      65           0 :                 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
      66           0 :                 hrtimer_start_expires(&rt_b->rt_period_timer,
      67             :                                       HRTIMER_MODE_ABS_PINNED_HARD);
      68             :         }
      69           0 :         raw_spin_unlock(&rt_b->rt_runtime_lock);
      70           0 : }
      71             : 
      72             : static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
      73             : {
      74           0 :         if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
      75             :                 return;
      76             : 
      77           0 :         do_start_rt_bandwidth(rt_b);
      78             : }
      79             : 
      80           1 : void init_rt_rq(struct rt_rq *rt_rq)
      81             : {
      82             :         struct rt_prio_array *array;
      83             :         int i;
      84             : 
      85           1 :         array = &rt_rq->active;
      86         101 :         for (i = 0; i < MAX_RT_PRIO; i++) {
      87         200 :                 INIT_LIST_HEAD(array->queue + i);
      88         200 :                 __clear_bit(i, array->bitmap);
      89             :         }
      90             :         /* delimiter for bitsearch: */
      91           2 :         __set_bit(MAX_RT_PRIO, array->bitmap);
      92             : 
      93             : #if defined CONFIG_SMP
      94             :         rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
      95             :         rt_rq->highest_prio.next = MAX_RT_PRIO-1;
      96             :         rt_rq->rt_nr_migratory = 0;
      97             :         rt_rq->overloaded = 0;
      98             :         plist_head_init(&rt_rq->pushable_tasks);
      99             : #endif /* CONFIG_SMP */
     100             :         /* We start is dequeued state, because no RT tasks are queued */
     101           1 :         rt_rq->rt_queued = 0;
     102             : 
     103           1 :         rt_rq->rt_time = 0;
     104           1 :         rt_rq->rt_throttled = 0;
     105           1 :         rt_rq->rt_runtime = 0;
     106             :         raw_spin_lock_init(&rt_rq->rt_runtime_lock);
     107           1 : }
     108             : 
     109             : #ifdef CONFIG_RT_GROUP_SCHED
     110             : static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
     111             : {
     112             :         hrtimer_cancel(&rt_b->rt_period_timer);
     113             : }
     114             : 
     115             : #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
     116             : 
     117             : static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
     118             : {
     119             : #ifdef CONFIG_SCHED_DEBUG
     120             :         WARN_ON_ONCE(!rt_entity_is_task(rt_se));
     121             : #endif
     122             :         return container_of(rt_se, struct task_struct, rt);
     123             : }
     124             : 
     125             : static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
     126             : {
     127             :         return rt_rq->rq;
     128             : }
     129             : 
     130             : static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
     131             : {
     132             :         return rt_se->rt_rq;
     133             : }
     134             : 
     135             : static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
     136             : {
     137             :         struct rt_rq *rt_rq = rt_se->rt_rq;
     138             : 
     139             :         return rt_rq->rq;
     140             : }
     141             : 
     142             : void unregister_rt_sched_group(struct task_group *tg)
     143             : {
     144             :         if (tg->rt_se)
     145             :                 destroy_rt_bandwidth(&tg->rt_bandwidth);
     146             : 
     147             : }
     148             : 
     149             : void free_rt_sched_group(struct task_group *tg)
     150             : {
     151             :         int i;
     152             : 
     153             :         for_each_possible_cpu(i) {
     154             :                 if (tg->rt_rq)
     155             :                         kfree(tg->rt_rq[i]);
     156             :                 if (tg->rt_se)
     157             :                         kfree(tg->rt_se[i]);
     158             :         }
     159             : 
     160             :         kfree(tg->rt_rq);
     161             :         kfree(tg->rt_se);
     162             : }
     163             : 
     164             : void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
     165             :                 struct sched_rt_entity *rt_se, int cpu,
     166             :                 struct sched_rt_entity *parent)
     167             : {
     168             :         struct rq *rq = cpu_rq(cpu);
     169             : 
     170             :         rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
     171             :         rt_rq->rt_nr_boosted = 0;
     172             :         rt_rq->rq = rq;
     173             :         rt_rq->tg = tg;
     174             : 
     175             :         tg->rt_rq[cpu] = rt_rq;
     176             :         tg->rt_se[cpu] = rt_se;
     177             : 
     178             :         if (!rt_se)
     179             :                 return;
     180             : 
     181             :         if (!parent)
     182             :                 rt_se->rt_rq = &rq->rt;
     183             :         else
     184             :                 rt_se->rt_rq = parent->my_q;
     185             : 
     186             :         rt_se->my_q = rt_rq;
     187             :         rt_se->parent = parent;
     188             :         INIT_LIST_HEAD(&rt_se->run_list);
     189             : }
     190             : 
     191             : int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
     192             : {
     193             :         struct rt_rq *rt_rq;
     194             :         struct sched_rt_entity *rt_se;
     195             :         int i;
     196             : 
     197             :         tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
     198             :         if (!tg->rt_rq)
     199             :                 goto err;
     200             :         tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL);
     201             :         if (!tg->rt_se)
     202             :                 goto err;
     203             : 
     204             :         init_rt_bandwidth(&tg->rt_bandwidth,
     205             :                         ktime_to_ns(def_rt_bandwidth.rt_period), 0);
     206             : 
     207             :         for_each_possible_cpu(i) {
     208             :                 rt_rq = kzalloc_node(sizeof(struct rt_rq),
     209             :                                      GFP_KERNEL, cpu_to_node(i));
     210             :                 if (!rt_rq)
     211             :                         goto err;
     212             : 
     213             :                 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
     214             :                                      GFP_KERNEL, cpu_to_node(i));
     215             :                 if (!rt_se)
     216             :                         goto err_free_rq;
     217             : 
     218             :                 init_rt_rq(rt_rq);
     219             :                 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
     220             :                 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
     221             :         }
     222             : 
     223             :         return 1;
     224             : 
     225             : err_free_rq:
     226             :         kfree(rt_rq);
     227             : err:
     228             :         return 0;
     229             : }
     230             : 
     231             : #else /* CONFIG_RT_GROUP_SCHED */
     232             : 
     233             : #define rt_entity_is_task(rt_se) (1)
     234             : 
     235             : static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
     236             : {
     237           0 :         return container_of(rt_se, struct task_struct, rt);
     238             : }
     239             : 
     240             : static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
     241             : {
     242           0 :         return container_of(rt_rq, struct rq, rt);
     243             : }
     244             : 
     245             : static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
     246             : {
     247           0 :         struct task_struct *p = rt_task_of(rt_se);
     248             : 
     249           0 :         return task_rq(p);
     250             : }
     251             : 
     252             : static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
     253             : {
     254           0 :         struct rq *rq = rq_of_rt_se(rt_se);
     255             : 
     256             :         return &rq->rt;
     257             : }
     258             : 
     259           0 : void unregister_rt_sched_group(struct task_group *tg) { }
     260             : 
     261           0 : void free_rt_sched_group(struct task_group *tg) { }
     262             : 
     263           0 : int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
     264             : {
     265           0 :         return 1;
     266             : }
     267             : #endif /* CONFIG_RT_GROUP_SCHED */
     268             : 
     269             : #ifdef CONFIG_SMP
     270             : 
     271             : static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
     272             : {
     273             :         /* Try to pull RT tasks here if we lower this rq's prio */
     274             :         return rq->online && rq->rt.highest_prio.curr > prev->prio;
     275             : }
     276             : 
     277             : static inline int rt_overloaded(struct rq *rq)
     278             : {
     279             :         return atomic_read(&rq->rd->rto_count);
     280             : }
     281             : 
     282             : static inline void rt_set_overload(struct rq *rq)
     283             : {
     284             :         if (!rq->online)
     285             :                 return;
     286             : 
     287             :         cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
     288             :         /*
     289             :          * Make sure the mask is visible before we set
     290             :          * the overload count. That is checked to determine
     291             :          * if we should look at the mask. It would be a shame
     292             :          * if we looked at the mask, but the mask was not
     293             :          * updated yet.
     294             :          *
     295             :          * Matched by the barrier in pull_rt_task().
     296             :          */
     297             :         smp_wmb();
     298             :         atomic_inc(&rq->rd->rto_count);
     299             : }
     300             : 
     301             : static inline void rt_clear_overload(struct rq *rq)
     302             : {
     303             :         if (!rq->online)
     304             :                 return;
     305             : 
     306             :         /* the order here really doesn't matter */
     307             :         atomic_dec(&rq->rd->rto_count);
     308             :         cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
     309             : }
     310             : 
     311             : static void update_rt_migration(struct rt_rq *rt_rq)
     312             : {
     313             :         if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
     314             :                 if (!rt_rq->overloaded) {
     315             :                         rt_set_overload(rq_of_rt_rq(rt_rq));
     316             :                         rt_rq->overloaded = 1;
     317             :                 }
     318             :         } else if (rt_rq->overloaded) {
     319             :                 rt_clear_overload(rq_of_rt_rq(rt_rq));
     320             :                 rt_rq->overloaded = 0;
     321             :         }
     322             : }
     323             : 
     324             : static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     325             : {
     326             :         struct task_struct *p;
     327             : 
     328             :         if (!rt_entity_is_task(rt_se))
     329             :                 return;
     330             : 
     331             :         p = rt_task_of(rt_se);
     332             :         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
     333             : 
     334             :         rt_rq->rt_nr_total++;
     335             :         if (p->nr_cpus_allowed > 1)
     336             :                 rt_rq->rt_nr_migratory++;
     337             : 
     338             :         update_rt_migration(rt_rq);
     339             : }
     340             : 
     341             : static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     342             : {
     343             :         struct task_struct *p;
     344             : 
     345             :         if (!rt_entity_is_task(rt_se))
     346             :                 return;
     347             : 
     348             :         p = rt_task_of(rt_se);
     349             :         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
     350             : 
     351             :         rt_rq->rt_nr_total--;
     352             :         if (p->nr_cpus_allowed > 1)
     353             :                 rt_rq->rt_nr_migratory--;
     354             : 
     355             :         update_rt_migration(rt_rq);
     356             : }
     357             : 
     358             : static inline int has_pushable_tasks(struct rq *rq)
     359             : {
     360             :         return !plist_head_empty(&rq->rt.pushable_tasks);
     361             : }
     362             : 
     363             : static DEFINE_PER_CPU(struct callback_head, rt_push_head);
     364             : static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
     365             : 
     366             : static void push_rt_tasks(struct rq *);
     367             : static void pull_rt_task(struct rq *);
     368             : 
     369             : static inline void rt_queue_push_tasks(struct rq *rq)
     370             : {
     371             :         if (!has_pushable_tasks(rq))
     372             :                 return;
     373             : 
     374             :         queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
     375             : }
     376             : 
     377             : static inline void rt_queue_pull_task(struct rq *rq)
     378             : {
     379             :         queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
     380             : }
     381             : 
     382             : static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
     383             : {
     384             :         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
     385             :         plist_node_init(&p->pushable_tasks, p->prio);
     386             :         plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
     387             : 
     388             :         /* Update the highest prio pushable task */
     389             :         if (p->prio < rq->rt.highest_prio.next)
     390             :                 rq->rt.highest_prio.next = p->prio;
     391             : }
     392             : 
     393             : static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
     394             : {
     395             :         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
     396             : 
     397             :         /* Update the new highest prio pushable task */
     398             :         if (has_pushable_tasks(rq)) {
     399             :                 p = plist_first_entry(&rq->rt.pushable_tasks,
     400             :                                       struct task_struct, pushable_tasks);
     401             :                 rq->rt.highest_prio.next = p->prio;
     402             :         } else {
     403             :                 rq->rt.highest_prio.next = MAX_RT_PRIO-1;
     404             :         }
     405             : }
     406             : 
     407             : #else
     408             : 
     409             : static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
     410             : {
     411             : }
     412             : 
     413             : static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
     414             : {
     415             : }
     416             : 
     417             : static inline
     418             : void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     419             : {
     420             : }
     421             : 
     422             : static inline
     423             : void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     424             : {
     425             : }
     426             : 
     427             : static inline void rt_queue_push_tasks(struct rq *rq)
     428             : {
     429             : }
     430             : #endif /* CONFIG_SMP */
     431             : 
     432             : static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
     433             : static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
     434             : 
     435             : static inline int on_rt_rq(struct sched_rt_entity *rt_se)
     436             : {
     437           0 :         return rt_se->on_rq;
     438             : }
     439             : 
     440             : #ifdef CONFIG_UCLAMP_TASK
     441             : /*
     442             :  * Verify the fitness of task @p to run on @cpu taking into account the uclamp
     443             :  * settings.
     444             :  *
     445             :  * This check is only important for heterogeneous systems where uclamp_min value
     446             :  * is higher than the capacity of a @cpu. For non-heterogeneous system this
     447             :  * function will always return true.
     448             :  *
     449             :  * The function will return true if the capacity of the @cpu is >= the
     450             :  * uclamp_min and false otherwise.
     451             :  *
     452             :  * Note that uclamp_min will be clamped to uclamp_max if uclamp_min
     453             :  * > uclamp_max.
     454             :  */
     455             : static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
     456             : {
     457             :         unsigned int min_cap;
     458             :         unsigned int max_cap;
     459             :         unsigned int cpu_cap;
     460             : 
     461             :         /* Only heterogeneous systems can benefit from this check */
     462             :         if (!static_branch_unlikely(&sched_asym_cpucapacity))
     463             :                 return true;
     464             : 
     465             :         min_cap = uclamp_eff_value(p, UCLAMP_MIN);
     466             :         max_cap = uclamp_eff_value(p, UCLAMP_MAX);
     467             : 
     468             :         cpu_cap = capacity_orig_of(cpu);
     469             : 
     470             :         return cpu_cap >= min(min_cap, max_cap);
     471             : }
     472             : #else
     473             : static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
     474             : {
     475             :         return true;
     476             : }
     477             : #endif
     478             : 
     479             : #ifdef CONFIG_RT_GROUP_SCHED
     480             : 
     481             : static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
     482             : {
     483             :         if (!rt_rq->tg)
     484             :                 return RUNTIME_INF;
     485             : 
     486             :         return rt_rq->rt_runtime;
     487             : }
     488             : 
     489             : static inline u64 sched_rt_period(struct rt_rq *rt_rq)
     490             : {
     491             :         return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
     492             : }
     493             : 
     494             : typedef struct task_group *rt_rq_iter_t;
     495             : 
     496             : static inline struct task_group *next_task_group(struct task_group *tg)
     497             : {
     498             :         do {
     499             :                 tg = list_entry_rcu(tg->list.next,
     500             :                         typeof(struct task_group), list);
     501             :         } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
     502             : 
     503             :         if (&tg->list == &task_groups)
     504             :                 tg = NULL;
     505             : 
     506             :         return tg;
     507             : }
     508             : 
     509             : #define for_each_rt_rq(rt_rq, iter, rq)                                 \
     510             :         for (iter = container_of(&task_groups, typeof(*iter), list);        \
     511             :                 (iter = next_task_group(iter)) &&                       \
     512             :                 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
     513             : 
     514             : #define for_each_sched_rt_entity(rt_se) \
     515             :         for (; rt_se; rt_se = rt_se->parent)
     516             : 
     517             : static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
     518             : {
     519             :         return rt_se->my_q;
     520             : }
     521             : 
     522             : static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
     523             : static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
     524             : 
     525             : static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
     526             : {
     527             :         struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
     528             :         struct rq *rq = rq_of_rt_rq(rt_rq);
     529             :         struct sched_rt_entity *rt_se;
     530             : 
     531             :         int cpu = cpu_of(rq);
     532             : 
     533             :         rt_se = rt_rq->tg->rt_se[cpu];
     534             : 
     535             :         if (rt_rq->rt_nr_running) {
     536             :                 if (!rt_se)
     537             :                         enqueue_top_rt_rq(rt_rq);
     538             :                 else if (!on_rt_rq(rt_se))
     539             :                         enqueue_rt_entity(rt_se, 0);
     540             : 
     541             :                 if (rt_rq->highest_prio.curr < curr->prio)
     542             :                         resched_curr(rq);
     543             :         }
     544             : }
     545             : 
     546             : static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
     547             : {
     548             :         struct sched_rt_entity *rt_se;
     549             :         int cpu = cpu_of(rq_of_rt_rq(rt_rq));
     550             : 
     551             :         rt_se = rt_rq->tg->rt_se[cpu];
     552             : 
     553             :         if (!rt_se) {
     554             :                 dequeue_top_rt_rq(rt_rq);
     555             :                 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
     556             :                 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
     557             :         }
     558             :         else if (on_rt_rq(rt_se))
     559             :                 dequeue_rt_entity(rt_se, 0);
     560             : }
     561             : 
     562             : static inline int rt_rq_throttled(struct rt_rq *rt_rq)
     563             : {
     564             :         return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
     565             : }
     566             : 
     567             : static int rt_se_boosted(struct sched_rt_entity *rt_se)
     568             : {
     569             :         struct rt_rq *rt_rq = group_rt_rq(rt_se);
     570             :         struct task_struct *p;
     571             : 
     572             :         if (rt_rq)
     573             :                 return !!rt_rq->rt_nr_boosted;
     574             : 
     575             :         p = rt_task_of(rt_se);
     576             :         return p->prio != p->normal_prio;
     577             : }
     578             : 
     579             : #ifdef CONFIG_SMP
     580             : static inline const struct cpumask *sched_rt_period_mask(void)
     581             : {
     582             :         return this_rq()->rd->span;
     583             : }
     584             : #else
     585             : static inline const struct cpumask *sched_rt_period_mask(void)
     586             : {
     587             :         return cpu_online_mask;
     588             : }
     589             : #endif
     590             : 
     591             : static inline
     592             : struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
     593             : {
     594             :         return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
     595             : }
     596             : 
     597             : static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
     598             : {
     599             :         return &rt_rq->tg->rt_bandwidth;
     600             : }
     601             : 
     602             : #else /* !CONFIG_RT_GROUP_SCHED */
     603             : 
     604             : static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
     605             : {
     606             :         return rt_rq->rt_runtime;
     607             : }
     608             : 
     609             : static inline u64 sched_rt_period(struct rt_rq *rt_rq)
     610             : {
     611           0 :         return ktime_to_ns(def_rt_bandwidth.rt_period);
     612             : }
     613             : 
     614             : typedef struct rt_rq *rt_rq_iter_t;
     615             : 
     616             : #define for_each_rt_rq(rt_rq, iter, rq) \
     617             :         for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
     618             : 
     619             : #define for_each_sched_rt_entity(rt_se) \
     620             :         for (; rt_se; rt_se = NULL)
     621             : 
     622             : static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
     623             : {
     624             :         return NULL;
     625             : }
     626             : 
     627           0 : static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
     628             : {
     629           0 :         struct rq *rq = rq_of_rt_rq(rt_rq);
     630             : 
     631           0 :         if (!rt_rq->rt_nr_running)
     632             :                 return;
     633             : 
     634           0 :         enqueue_top_rt_rq(rt_rq);
     635           0 :         resched_curr(rq);
     636             : }
     637             : 
     638             : static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
     639             : {
     640           0 :         dequeue_top_rt_rq(rt_rq);
     641             : }
     642             : 
     643             : static inline int rt_rq_throttled(struct rt_rq *rt_rq)
     644             : {
     645             :         return rt_rq->rt_throttled;
     646             : }
     647             : 
     648             : static inline const struct cpumask *sched_rt_period_mask(void)
     649             : {
     650             :         return cpu_online_mask;
     651             : }
     652             : 
     653             : static inline
     654             : struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
     655             : {
     656           0 :         return &cpu_rq(cpu)->rt;
     657             : }
     658             : 
     659             : static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
     660             : {
     661             :         return &def_rt_bandwidth;
     662             : }
     663             : 
     664             : #endif /* CONFIG_RT_GROUP_SCHED */
     665             : 
     666           0 : bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
     667             : {
     668           0 :         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     669             : 
     670           0 :         return (hrtimer_active(&rt_b->rt_period_timer) ||
     671           0 :                 rt_rq->rt_time < rt_b->rt_runtime);
     672             : }
     673             : 
     674             : #ifdef CONFIG_SMP
     675             : /*
     676             :  * We ran out of runtime, see if we can borrow some from our neighbours.
     677             :  */
     678             : static void do_balance_runtime(struct rt_rq *rt_rq)
     679             : {
     680             :         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     681             :         struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
     682             :         int i, weight;
     683             :         u64 rt_period;
     684             : 
     685             :         weight = cpumask_weight(rd->span);
     686             : 
     687             :         raw_spin_lock(&rt_b->rt_runtime_lock);
     688             :         rt_period = ktime_to_ns(rt_b->rt_period);
     689             :         for_each_cpu(i, rd->span) {
     690             :                 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
     691             :                 s64 diff;
     692             : 
     693             :                 if (iter == rt_rq)
     694             :                         continue;
     695             : 
     696             :                 raw_spin_lock(&iter->rt_runtime_lock);
     697             :                 /*
     698             :                  * Either all rqs have inf runtime and there's nothing to steal
     699             :                  * or __disable_runtime() below sets a specific rq to inf to
     700             :                  * indicate its been disabled and disallow stealing.
     701             :                  */
     702             :                 if (iter->rt_runtime == RUNTIME_INF)
     703             :                         goto next;
     704             : 
     705             :                 /*
     706             :                  * From runqueues with spare time, take 1/n part of their
     707             :                  * spare time, but no more than our period.
     708             :                  */
     709             :                 diff = iter->rt_runtime - iter->rt_time;
     710             :                 if (diff > 0) {
     711             :                         diff = div_u64((u64)diff, weight);
     712             :                         if (rt_rq->rt_runtime + diff > rt_period)
     713             :                                 diff = rt_period - rt_rq->rt_runtime;
     714             :                         iter->rt_runtime -= diff;
     715             :                         rt_rq->rt_runtime += diff;
     716             :                         if (rt_rq->rt_runtime == rt_period) {
     717             :                                 raw_spin_unlock(&iter->rt_runtime_lock);
     718             :                                 break;
     719             :                         }
     720             :                 }
     721             : next:
     722             :                 raw_spin_unlock(&iter->rt_runtime_lock);
     723             :         }
     724             :         raw_spin_unlock(&rt_b->rt_runtime_lock);
     725             : }
     726             : 
     727             : /*
     728             :  * Ensure this RQ takes back all the runtime it lend to its neighbours.
     729             :  */
     730             : static void __disable_runtime(struct rq *rq)
     731             : {
     732             :         struct root_domain *rd = rq->rd;
     733             :         rt_rq_iter_t iter;
     734             :         struct rt_rq *rt_rq;
     735             : 
     736             :         if (unlikely(!scheduler_running))
     737             :                 return;
     738             : 
     739             :         for_each_rt_rq(rt_rq, iter, rq) {
     740             :                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     741             :                 s64 want;
     742             :                 int i;
     743             : 
     744             :                 raw_spin_lock(&rt_b->rt_runtime_lock);
     745             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     746             :                 /*
     747             :                  * Either we're all inf and nobody needs to borrow, or we're
     748             :                  * already disabled and thus have nothing to do, or we have
     749             :                  * exactly the right amount of runtime to take out.
     750             :                  */
     751             :                 if (rt_rq->rt_runtime == RUNTIME_INF ||
     752             :                                 rt_rq->rt_runtime == rt_b->rt_runtime)
     753             :                         goto balanced;
     754             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     755             : 
     756             :                 /*
     757             :                  * Calculate the difference between what we started out with
     758             :                  * and what we current have, that's the amount of runtime
     759             :                  * we lend and now have to reclaim.
     760             :                  */
     761             :                 want = rt_b->rt_runtime - rt_rq->rt_runtime;
     762             : 
     763             :                 /*
     764             :                  * Greedy reclaim, take back as much as we can.
     765             :                  */
     766             :                 for_each_cpu(i, rd->span) {
     767             :                         struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
     768             :                         s64 diff;
     769             : 
     770             :                         /*
     771             :                          * Can't reclaim from ourselves or disabled runqueues.
     772             :                          */
     773             :                         if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
     774             :                                 continue;
     775             : 
     776             :                         raw_spin_lock(&iter->rt_runtime_lock);
     777             :                         if (want > 0) {
     778             :                                 diff = min_t(s64, iter->rt_runtime, want);
     779             :                                 iter->rt_runtime -= diff;
     780             :                                 want -= diff;
     781             :                         } else {
     782             :                                 iter->rt_runtime -= want;
     783             :                                 want -= want;
     784             :                         }
     785             :                         raw_spin_unlock(&iter->rt_runtime_lock);
     786             : 
     787             :                         if (!want)
     788             :                                 break;
     789             :                 }
     790             : 
     791             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     792             :                 /*
     793             :                  * We cannot be left wanting - that would mean some runtime
     794             :                  * leaked out of the system.
     795             :                  */
     796             :                 BUG_ON(want);
     797             : balanced:
     798             :                 /*
     799             :                  * Disable all the borrow logic by pretending we have inf
     800             :                  * runtime - in which case borrowing doesn't make sense.
     801             :                  */
     802             :                 rt_rq->rt_runtime = RUNTIME_INF;
     803             :                 rt_rq->rt_throttled = 0;
     804             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     805             :                 raw_spin_unlock(&rt_b->rt_runtime_lock);
     806             : 
     807             :                 /* Make rt_rq available for pick_next_task() */
     808             :                 sched_rt_rq_enqueue(rt_rq);
     809             :         }
     810             : }
     811             : 
     812             : static void __enable_runtime(struct rq *rq)
     813             : {
     814             :         rt_rq_iter_t iter;
     815             :         struct rt_rq *rt_rq;
     816             : 
     817             :         if (unlikely(!scheduler_running))
     818             :                 return;
     819             : 
     820             :         /*
     821             :          * Reset each runqueue's bandwidth settings
     822             :          */
     823             :         for_each_rt_rq(rt_rq, iter, rq) {
     824             :                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     825             : 
     826             :                 raw_spin_lock(&rt_b->rt_runtime_lock);
     827             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     828             :                 rt_rq->rt_runtime = rt_b->rt_runtime;
     829             :                 rt_rq->rt_time = 0;
     830             :                 rt_rq->rt_throttled = 0;
     831             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     832             :                 raw_spin_unlock(&rt_b->rt_runtime_lock);
     833             :         }
     834             : }
     835             : 
     836             : static void balance_runtime(struct rt_rq *rt_rq)
     837             : {
     838             :         if (!sched_feat(RT_RUNTIME_SHARE))
     839             :                 return;
     840             : 
     841             :         if (rt_rq->rt_time > rt_rq->rt_runtime) {
     842             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     843             :                 do_balance_runtime(rt_rq);
     844             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     845             :         }
     846             : }
     847             : #else /* !CONFIG_SMP */
     848             : static inline void balance_runtime(struct rt_rq *rt_rq) {}
     849             : #endif /* CONFIG_SMP */
     850             : 
     851           0 : static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
     852             : {
     853           0 :         int i, idle = 1, throttled = 0;
     854             :         const struct cpumask *span;
     855             : 
     856           0 :         span = sched_rt_period_mask();
     857             : #ifdef CONFIG_RT_GROUP_SCHED
     858             :         /*
     859             :          * FIXME: isolated CPUs should really leave the root task group,
     860             :          * whether they are isolcpus or were isolated via cpusets, lest
     861             :          * the timer run on a CPU which does not service all runqueues,
     862             :          * potentially leaving other CPUs indefinitely throttled.  If
     863             :          * isolation is really required, the user will turn the throttle
     864             :          * off to kill the perturbations it causes anyway.  Meanwhile,
     865             :          * this maintains functionality for boot and/or troubleshooting.
     866             :          */
     867             :         if (rt_b == &root_task_group.rt_bandwidth)
     868             :                 span = cpu_online_mask;
     869             : #endif
     870           0 :         for_each_cpu(i, span) {
     871           0 :                 int enqueue = 0;
     872           0 :                 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
     873           0 :                 struct rq *rq = rq_of_rt_rq(rt_rq);
     874             :                 int skip;
     875             : 
     876             :                 /*
     877             :                  * When span == cpu_online_mask, taking each rq->lock
     878             :                  * can be time-consuming. Try to avoid it when possible.
     879             :                  */
     880           0 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     881           0 :                 if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
     882           0 :                         rt_rq->rt_runtime = rt_b->rt_runtime;
     883           0 :                 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
     884           0 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     885           0 :                 if (skip)
     886           0 :                         continue;
     887             : 
     888           0 :                 raw_spin_rq_lock(rq);
     889           0 :                 update_rq_clock(rq);
     890             : 
     891           0 :                 if (rt_rq->rt_time) {
     892             :                         u64 runtime;
     893             : 
     894           0 :                         raw_spin_lock(&rt_rq->rt_runtime_lock);
     895           0 :                         if (rt_rq->rt_throttled)
     896             :                                 balance_runtime(rt_rq);
     897           0 :                         runtime = rt_rq->rt_runtime;
     898           0 :                         rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
     899           0 :                         if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
     900           0 :                                 rt_rq->rt_throttled = 0;
     901           0 :                                 enqueue = 1;
     902             : 
     903             :                                 /*
     904             :                                  * When we're idle and a woken (rt) task is
     905             :                                  * throttled check_preempt_curr() will set
     906             :                                  * skip_update and the time between the wakeup
     907             :                                  * and this unthrottle will get accounted as
     908             :                                  * 'runtime'.
     909             :                                  */
     910           0 :                                 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
     911             :                                         rq_clock_cancel_skipupdate(rq);
     912             :                         }
     913           0 :                         if (rt_rq->rt_time || rt_rq->rt_nr_running)
     914           0 :                                 idle = 0;
     915           0 :                         raw_spin_unlock(&rt_rq->rt_runtime_lock);
     916           0 :                 } else if (rt_rq->rt_nr_running) {
     917           0 :                         idle = 0;
     918           0 :                         if (!rt_rq_throttled(rt_rq))
     919           0 :                                 enqueue = 1;
     920             :                 }
     921           0 :                 if (rt_rq->rt_throttled)
     922           0 :                         throttled = 1;
     923             : 
     924           0 :                 if (enqueue)
     925           0 :                         sched_rt_rq_enqueue(rt_rq);
     926           0 :                 raw_spin_rq_unlock(rq);
     927             :         }
     928             : 
     929           0 :         if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
     930             :                 return 1;
     931             : 
     932             :         return idle;
     933             : }
     934             : 
     935             : static inline int rt_se_prio(struct sched_rt_entity *rt_se)
     936             : {
     937             : #ifdef CONFIG_RT_GROUP_SCHED
     938             :         struct rt_rq *rt_rq = group_rt_rq(rt_se);
     939             : 
     940             :         if (rt_rq)
     941             :                 return rt_rq->highest_prio.curr;
     942             : #endif
     943             : 
     944           0 :         return rt_task_of(rt_se)->prio;
     945             : }
     946             : 
     947           0 : static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
     948             : {
     949           0 :         u64 runtime = sched_rt_runtime(rt_rq);
     950             : 
     951           0 :         if (rt_rq->rt_throttled)
     952             :                 return rt_rq_throttled(rt_rq);
     953             : 
     954           0 :         if (runtime >= sched_rt_period(rt_rq))
     955             :                 return 0;
     956             : 
     957           0 :         balance_runtime(rt_rq);
     958           0 :         runtime = sched_rt_runtime(rt_rq);
     959           0 :         if (runtime == RUNTIME_INF)
     960             :                 return 0;
     961             : 
     962           0 :         if (rt_rq->rt_time > runtime) {
     963           0 :                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     964             : 
     965             :                 /*
     966             :                  * Don't actually throttle groups that have no runtime assigned
     967             :                  * but accrue some time due to boosting.
     968             :                  */
     969           0 :                 if (likely(rt_b->rt_runtime)) {
     970           0 :                         rt_rq->rt_throttled = 1;
     971           0 :                         printk_deferred_once("sched: RT throttling activated\n");
     972             :                 } else {
     973             :                         /*
     974             :                          * In case we did anyway, make it go away,
     975             :                          * replenishment is a joke, since it will replenish us
     976             :                          * with exactly 0 ns.
     977             :                          */
     978           0 :                         rt_rq->rt_time = 0;
     979             :                 }
     980             : 
     981           0 :                 if (rt_rq_throttled(rt_rq)) {
     982           0 :                         sched_rt_rq_dequeue(rt_rq);
     983           0 :                         return 1;
     984             :                 }
     985             :         }
     986             : 
     987             :         return 0;
     988             : }
     989             : 
     990             : /*
     991             :  * Update the current task's runtime statistics. Skip current tasks that
     992             :  * are not in our scheduling class.
     993             :  */
     994           0 : static void update_curr_rt(struct rq *rq)
     995             : {
     996           0 :         struct task_struct *curr = rq->curr;
     997           0 :         struct sched_rt_entity *rt_se = &curr->rt;
     998             :         u64 delta_exec;
     999             :         u64 now;
    1000             : 
    1001           0 :         if (curr->sched_class != &rt_sched_class)
    1002             :                 return;
    1003             : 
    1004           0 :         now = rq_clock_task(rq);
    1005           0 :         delta_exec = now - curr->se.exec_start;
    1006           0 :         if (unlikely((s64)delta_exec <= 0))
    1007             :                 return;
    1008             : 
    1009             :         schedstat_set(curr->stats.exec_max,
    1010             :                       max(curr->stats.exec_max, delta_exec));
    1011             : 
    1012           0 :         trace_sched_stat_runtime(curr, delta_exec, 0);
    1013             : 
    1014           0 :         curr->se.sum_exec_runtime += delta_exec;
    1015           0 :         account_group_exec_runtime(curr, delta_exec);
    1016             : 
    1017           0 :         curr->se.exec_start = now;
    1018           0 :         cgroup_account_cputime(curr, delta_exec);
    1019             : 
    1020           0 :         if (!rt_bandwidth_enabled())
    1021             :                 return;
    1022             : 
    1023           0 :         for_each_sched_rt_entity(rt_se) {
    1024           0 :                 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
    1025             :                 int exceeded;
    1026             : 
    1027           0 :                 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
    1028           0 :                         raw_spin_lock(&rt_rq->rt_runtime_lock);
    1029           0 :                         rt_rq->rt_time += delta_exec;
    1030           0 :                         exceeded = sched_rt_runtime_exceeded(rt_rq);
    1031           0 :                         if (exceeded)
    1032           0 :                                 resched_curr(rq);
    1033           0 :                         raw_spin_unlock(&rt_rq->rt_runtime_lock);
    1034           0 :                         if (exceeded)
    1035           0 :                                 do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
    1036             :                 }
    1037             :         }
    1038             : }
    1039             : 
    1040             : static void
    1041           0 : dequeue_top_rt_rq(struct rt_rq *rt_rq)
    1042             : {
    1043           0 :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1044             : 
    1045           0 :         BUG_ON(&rq->rt != rt_rq);
    1046             : 
    1047           0 :         if (!rt_rq->rt_queued)
    1048             :                 return;
    1049             : 
    1050           0 :         BUG_ON(!rq->nr_running);
    1051             : 
    1052           0 :         sub_nr_running(rq, rt_rq->rt_nr_running);
    1053           0 :         rt_rq->rt_queued = 0;
    1054             : 
    1055             : }
    1056             : 
    1057             : static void
    1058           0 : enqueue_top_rt_rq(struct rt_rq *rt_rq)
    1059             : {
    1060           0 :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1061             : 
    1062           0 :         BUG_ON(&rq->rt != rt_rq);
    1063             : 
    1064           0 :         if (rt_rq->rt_queued)
    1065             :                 return;
    1066             : 
    1067           0 :         if (rt_rq_throttled(rt_rq))
    1068             :                 return;
    1069             : 
    1070           0 :         if (rt_rq->rt_nr_running) {
    1071           0 :                 add_nr_running(rq, rt_rq->rt_nr_running);
    1072           0 :                 rt_rq->rt_queued = 1;
    1073             :         }
    1074             : 
    1075             :         /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
    1076             :         cpufreq_update_util(rq, 0);
    1077             : }
    1078             : 
    1079             : #if defined CONFIG_SMP
    1080             : 
    1081             : static void
    1082             : inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
    1083             : {
    1084             :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1085             : 
    1086             : #ifdef CONFIG_RT_GROUP_SCHED
    1087             :         /*
    1088             :          * Change rq's cpupri only if rt_rq is the top queue.
    1089             :          */
    1090             :         if (&rq->rt != rt_rq)
    1091             :                 return;
    1092             : #endif
    1093             :         if (rq->online && prio < prev_prio)
    1094             :                 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
    1095             : }
    1096             : 
    1097             : static void
    1098             : dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
    1099             : {
    1100             :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1101             : 
    1102             : #ifdef CONFIG_RT_GROUP_SCHED
    1103             :         /*
    1104             :          * Change rq's cpupri only if rt_rq is the top queue.
    1105             :          */
    1106             :         if (&rq->rt != rt_rq)
    1107             :                 return;
    1108             : #endif
    1109             :         if (rq->online && rt_rq->highest_prio.curr != prev_prio)
    1110             :                 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
    1111             : }
    1112             : 
    1113             : #else /* CONFIG_SMP */
    1114             : 
    1115             : static inline
    1116             : void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
    1117             : static inline
    1118             : void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
    1119             : 
    1120             : #endif /* CONFIG_SMP */
    1121             : 
    1122             : #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
    1123             : static void
    1124             : inc_rt_prio(struct rt_rq *rt_rq, int prio)
    1125             : {
    1126             :         int prev_prio = rt_rq->highest_prio.curr;
    1127             : 
    1128             :         if (prio < prev_prio)
    1129             :                 rt_rq->highest_prio.curr = prio;
    1130             : 
    1131             :         inc_rt_prio_smp(rt_rq, prio, prev_prio);
    1132             : }
    1133             : 
    1134             : static void
    1135             : dec_rt_prio(struct rt_rq *rt_rq, int prio)
    1136             : {
    1137             :         int prev_prio = rt_rq->highest_prio.curr;
    1138             : 
    1139             :         if (rt_rq->rt_nr_running) {
    1140             : 
    1141             :                 WARN_ON(prio < prev_prio);
    1142             : 
    1143             :                 /*
    1144             :                  * This may have been our highest task, and therefore
    1145             :                  * we may have some recomputation to do
    1146             :                  */
    1147             :                 if (prio == prev_prio) {
    1148             :                         struct rt_prio_array *array = &rt_rq->active;
    1149             : 
    1150             :                         rt_rq->highest_prio.curr =
    1151             :                                 sched_find_first_bit(array->bitmap);
    1152             :                 }
    1153             : 
    1154             :         } else {
    1155             :                 rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
    1156             :         }
    1157             : 
    1158             :         dec_rt_prio_smp(rt_rq, prio, prev_prio);
    1159             : }
    1160             : 
    1161             : #else
    1162             : 
    1163             : static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
    1164             : static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
    1165             : 
    1166             : #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
    1167             : 
    1168             : #ifdef CONFIG_RT_GROUP_SCHED
    1169             : 
    1170             : static void
    1171             : inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1172             : {
    1173             :         if (rt_se_boosted(rt_se))
    1174             :                 rt_rq->rt_nr_boosted++;
    1175             : 
    1176             :         if (rt_rq->tg)
    1177             :                 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
    1178             : }
    1179             : 
    1180             : static void
    1181             : dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1182             : {
    1183             :         if (rt_se_boosted(rt_se))
    1184             :                 rt_rq->rt_nr_boosted--;
    1185             : 
    1186             :         WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
    1187             : }
    1188             : 
    1189             : #else /* CONFIG_RT_GROUP_SCHED */
    1190             : 
    1191             : static void
    1192             : inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1193             : {
    1194           0 :         start_rt_bandwidth(&def_rt_bandwidth);
    1195             : }
    1196             : 
    1197             : static inline
    1198             : void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
    1199             : 
    1200             : #endif /* CONFIG_RT_GROUP_SCHED */
    1201             : 
    1202             : static inline
    1203             : unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
    1204             : {
    1205           0 :         struct rt_rq *group_rq = group_rt_rq(rt_se);
    1206             : 
    1207             :         if (group_rq)
    1208             :                 return group_rq->rt_nr_running;
    1209             :         else
    1210             :                 return 1;
    1211             : }
    1212             : 
    1213             : static inline
    1214             : unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
    1215             : {
    1216           0 :         struct rt_rq *group_rq = group_rt_rq(rt_se);
    1217             :         struct task_struct *tsk;
    1218             : 
    1219             :         if (group_rq)
    1220             :                 return group_rq->rr_nr_running;
    1221             : 
    1222           0 :         tsk = rt_task_of(rt_se);
    1223             : 
    1224           0 :         return (tsk->policy == SCHED_RR) ? 1 : 0;
    1225             : }
    1226             : 
    1227             : static inline
    1228           0 : void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1229             : {
    1230           0 :         int prio = rt_se_prio(rt_se);
    1231             : 
    1232           0 :         WARN_ON(!rt_prio(prio));
    1233           0 :         rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
    1234           0 :         rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
    1235             : 
    1236           0 :         inc_rt_prio(rt_rq, prio);
    1237           0 :         inc_rt_migration(rt_se, rt_rq);
    1238           0 :         inc_rt_group(rt_se, rt_rq);
    1239           0 : }
    1240             : 
    1241             : static inline
    1242           0 : void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1243             : {
    1244           0 :         WARN_ON(!rt_prio(rt_se_prio(rt_se)));
    1245           0 :         WARN_ON(!rt_rq->rt_nr_running);
    1246           0 :         rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
    1247           0 :         rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
    1248             : 
    1249           0 :         dec_rt_prio(rt_rq, rt_se_prio(rt_se));
    1250           0 :         dec_rt_migration(rt_se, rt_rq);
    1251           0 :         dec_rt_group(rt_se, rt_rq);
    1252           0 : }
    1253             : 
    1254             : /*
    1255             :  * Change rt_se->run_list location unless SAVE && !MOVE
    1256             :  *
    1257             :  * assumes ENQUEUE/DEQUEUE flags match
    1258             :  */
    1259             : static inline bool move_entity(unsigned int flags)
    1260             : {
    1261           0 :         if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
    1262             :                 return false;
    1263             : 
    1264             :         return true;
    1265             : }
    1266             : 
    1267             : static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
    1268             : {
    1269           0 :         list_del_init(&rt_se->run_list);
    1270             : 
    1271           0 :         if (list_empty(array->queue + rt_se_prio(rt_se)))
    1272           0 :                 __clear_bit(rt_se_prio(rt_se), array->bitmap);
    1273             : 
    1274           0 :         rt_se->on_list = 0;
    1275             : }
    1276             : 
    1277             : static inline struct sched_statistics *
    1278             : __schedstats_from_rt_se(struct sched_rt_entity *rt_se)
    1279             : {
    1280             : #ifdef CONFIG_RT_GROUP_SCHED
    1281             :         /* schedstats is not supported for rt group. */
    1282             :         if (!rt_entity_is_task(rt_se))
    1283             :                 return NULL;
    1284             : #endif
    1285             : 
    1286             :         return &rt_task_of(rt_se)->stats;
    1287             : }
    1288             : 
    1289             : static inline void
    1290             : update_stats_wait_start_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
    1291             : {
    1292             :         struct sched_statistics *stats;
    1293           0 :         struct task_struct *p = NULL;
    1294             : 
    1295             :         if (!schedstat_enabled())
    1296             :                 return;
    1297             : 
    1298             :         if (rt_entity_is_task(rt_se))
    1299             :                 p = rt_task_of(rt_se);
    1300             : 
    1301             :         stats = __schedstats_from_rt_se(rt_se);
    1302             :         if (!stats)
    1303             :                 return;
    1304             : 
    1305             :         __update_stats_wait_start(rq_of_rt_rq(rt_rq), p, stats);
    1306             : }
    1307             : 
    1308             : static inline void
    1309             : update_stats_enqueue_sleeper_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
    1310             : {
    1311             :         struct sched_statistics *stats;
    1312             :         struct task_struct *p = NULL;
    1313             : 
    1314             :         if (!schedstat_enabled())
    1315             :                 return;
    1316             : 
    1317             :         if (rt_entity_is_task(rt_se))
    1318             :                 p = rt_task_of(rt_se);
    1319             : 
    1320             :         stats = __schedstats_from_rt_se(rt_se);
    1321             :         if (!stats)
    1322             :                 return;
    1323             : 
    1324             :         __update_stats_enqueue_sleeper(rq_of_rt_rq(rt_rq), p, stats);
    1325             : }
    1326             : 
    1327             : static inline void
    1328             : update_stats_enqueue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
    1329             :                         int flags)
    1330             : {
    1331             :         if (!schedstat_enabled())
    1332             :                 return;
    1333             : 
    1334             :         if (flags & ENQUEUE_WAKEUP)
    1335             :                 update_stats_enqueue_sleeper_rt(rt_rq, rt_se);
    1336             : }
    1337             : 
    1338             : static inline void
    1339             : update_stats_wait_end_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
    1340             : {
    1341             :         struct sched_statistics *stats;
    1342             :         struct task_struct *p = NULL;
    1343             : 
    1344             :         if (!schedstat_enabled())
    1345             :                 return;
    1346             : 
    1347             :         if (rt_entity_is_task(rt_se))
    1348             :                 p = rt_task_of(rt_se);
    1349             : 
    1350             :         stats = __schedstats_from_rt_se(rt_se);
    1351             :         if (!stats)
    1352             :                 return;
    1353             : 
    1354             :         __update_stats_wait_end(rq_of_rt_rq(rt_rq), p, stats);
    1355             : }
    1356             : 
    1357             : static inline void
    1358             : update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
    1359             :                         int flags)
    1360             : {
    1361           0 :         struct task_struct *p = NULL;
    1362             : 
    1363             :         if (!schedstat_enabled())
    1364             :                 return;
    1365             : 
    1366             :         if (rt_entity_is_task(rt_se))
    1367             :                 p = rt_task_of(rt_se);
    1368             : 
    1369             :         if ((flags & DEQUEUE_SLEEP) && p) {
    1370             :                 unsigned int state;
    1371             : 
    1372             :                 state = READ_ONCE(p->__state);
    1373             :                 if (state & TASK_INTERRUPTIBLE)
    1374             :                         __schedstat_set(p->stats.sleep_start,
    1375             :                                         rq_clock(rq_of_rt_rq(rt_rq)));
    1376             : 
    1377             :                 if (state & TASK_UNINTERRUPTIBLE)
    1378             :                         __schedstat_set(p->stats.block_start,
    1379             :                                         rq_clock(rq_of_rt_rq(rt_rq)));
    1380             :         }
    1381             : }
    1382             : 
    1383           0 : static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1384             : {
    1385           0 :         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
    1386           0 :         struct rt_prio_array *array = &rt_rq->active;
    1387           0 :         struct rt_rq *group_rq = group_rt_rq(rt_se);
    1388           0 :         struct list_head *queue = array->queue + rt_se_prio(rt_se);
    1389             : 
    1390             :         /*
    1391             :          * Don't enqueue the group if its throttled, or when empty.
    1392             :          * The latter is a consequence of the former when a child group
    1393             :          * get throttled and the current group doesn't have any other
    1394             :          * active members.
    1395             :          */
    1396             :         if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
    1397             :                 if (rt_se->on_list)
    1398             :                         __delist_rt_entity(rt_se, array);
    1399             :                 return;
    1400             :         }
    1401             : 
    1402           0 :         if (move_entity(flags)) {
    1403           0 :                 WARN_ON_ONCE(rt_se->on_list);
    1404           0 :                 if (flags & ENQUEUE_HEAD)
    1405           0 :                         list_add(&rt_se->run_list, queue);
    1406             :                 else
    1407           0 :                         list_add_tail(&rt_se->run_list, queue);
    1408             : 
    1409           0 :                 __set_bit(rt_se_prio(rt_se), array->bitmap);
    1410           0 :                 rt_se->on_list = 1;
    1411             :         }
    1412           0 :         rt_se->on_rq = 1;
    1413             : 
    1414           0 :         inc_rt_tasks(rt_se, rt_rq);
    1415             : }
    1416             : 
    1417           0 : static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1418             : {
    1419           0 :         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
    1420           0 :         struct rt_prio_array *array = &rt_rq->active;
    1421             : 
    1422           0 :         if (move_entity(flags)) {
    1423           0 :                 WARN_ON_ONCE(!rt_se->on_list);
    1424             :                 __delist_rt_entity(rt_se, array);
    1425             :         }
    1426           0 :         rt_se->on_rq = 0;
    1427             : 
    1428           0 :         dec_rt_tasks(rt_se, rt_rq);
    1429           0 : }
    1430             : 
    1431             : /*
    1432             :  * Because the prio of an upper entry depends on the lower
    1433             :  * entries, we must remove entries top - down.
    1434             :  */
    1435           0 : static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
    1436             : {
    1437           0 :         struct sched_rt_entity *back = NULL;
    1438             : 
    1439           0 :         for_each_sched_rt_entity(rt_se) {
    1440           0 :                 rt_se->back = back;
    1441           0 :                 back = rt_se;
    1442             :         }
    1443             : 
    1444           0 :         dequeue_top_rt_rq(rt_rq_of_se(back));
    1445             : 
    1446           0 :         for (rt_se = back; rt_se; rt_se = rt_se->back) {
    1447           0 :                 if (on_rt_rq(rt_se))
    1448           0 :                         __dequeue_rt_entity(rt_se, flags);
    1449             :         }
    1450           0 : }
    1451             : 
    1452           0 : static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1453             : {
    1454           0 :         struct rq *rq = rq_of_rt_se(rt_se);
    1455             : 
    1456           0 :         update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
    1457             : 
    1458           0 :         dequeue_rt_stack(rt_se, flags);
    1459           0 :         for_each_sched_rt_entity(rt_se)
    1460           0 :                 __enqueue_rt_entity(rt_se, flags);
    1461           0 :         enqueue_top_rt_rq(&rq->rt);
    1462           0 : }
    1463             : 
    1464             : static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1465             : {
    1466           0 :         struct rq *rq = rq_of_rt_se(rt_se);
    1467             : 
    1468           0 :         update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
    1469             : 
    1470           0 :         dequeue_rt_stack(rt_se, flags);
    1471             : 
    1472           0 :         for_each_sched_rt_entity(rt_se) {
    1473             :                 struct rt_rq *rt_rq = group_rt_rq(rt_se);
    1474             : 
    1475             :                 if (rt_rq && rt_rq->rt_nr_running)
    1476             :                         __enqueue_rt_entity(rt_se, flags);
    1477             :         }
    1478           0 :         enqueue_top_rt_rq(&rq->rt);
    1479             : }
    1480             : 
    1481             : /*
    1482             :  * Adding/removing a task to/from a priority array:
    1483             :  */
    1484             : static void
    1485           0 : enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
    1486             : {
    1487           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1488             : 
    1489           0 :         if (flags & ENQUEUE_WAKEUP)
    1490           0 :                 rt_se->timeout = 0;
    1491             : 
    1492             :         check_schedstat_required();
    1493           0 :         update_stats_wait_start_rt(rt_rq_of_se(rt_se), rt_se);
    1494             : 
    1495           0 :         enqueue_rt_entity(rt_se, flags);
    1496             : 
    1497           0 :         if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
    1498             :                 enqueue_pushable_task(rq, p);
    1499           0 : }
    1500             : 
    1501           0 : static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
    1502             : {
    1503           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1504             : 
    1505           0 :         update_curr_rt(rq);
    1506           0 :         dequeue_rt_entity(rt_se, flags);
    1507             : 
    1508           0 :         dequeue_pushable_task(rq, p);
    1509           0 : }
    1510             : 
    1511             : /*
    1512             :  * Put task to the head or the end of the run list without the overhead of
    1513             :  * dequeue followed by enqueue.
    1514             :  */
    1515             : static void
    1516           0 : requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
    1517             : {
    1518           0 :         if (on_rt_rq(rt_se)) {
    1519           0 :                 struct rt_prio_array *array = &rt_rq->active;
    1520           0 :                 struct list_head *queue = array->queue + rt_se_prio(rt_se);
    1521             : 
    1522           0 :                 if (head)
    1523           0 :                         list_move(&rt_se->run_list, queue);
    1524             :                 else
    1525           0 :                         list_move_tail(&rt_se->run_list, queue);
    1526             :         }
    1527           0 : }
    1528             : 
    1529             : static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
    1530             : {
    1531           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1532             :         struct rt_rq *rt_rq;
    1533             : 
    1534           0 :         for_each_sched_rt_entity(rt_se) {
    1535           0 :                 rt_rq = rt_rq_of_se(rt_se);
    1536           0 :                 requeue_rt_entity(rt_rq, rt_se, head);
    1537             :         }
    1538             : }
    1539             : 
    1540           0 : static void yield_task_rt(struct rq *rq)
    1541             : {
    1542           0 :         requeue_task_rt(rq, rq->curr, 0);
    1543           0 : }
    1544             : 
    1545             : #ifdef CONFIG_SMP
    1546             : static int find_lowest_rq(struct task_struct *task);
    1547             : 
    1548             : static int
    1549             : select_task_rq_rt(struct task_struct *p, int cpu, int flags)
    1550             : {
    1551             :         struct task_struct *curr;
    1552             :         struct rq *rq;
    1553             :         bool test;
    1554             : 
    1555             :         /* For anything but wake ups, just return the task_cpu */
    1556             :         if (!(flags & (WF_TTWU | WF_FORK)))
    1557             :                 goto out;
    1558             : 
    1559             :         rq = cpu_rq(cpu);
    1560             : 
    1561             :         rcu_read_lock();
    1562             :         curr = READ_ONCE(rq->curr); /* unlocked access */
    1563             : 
    1564             :         /*
    1565             :          * If the current task on @p's runqueue is an RT task, then
    1566             :          * try to see if we can wake this RT task up on another
    1567             :          * runqueue. Otherwise simply start this RT task
    1568             :          * on its current runqueue.
    1569             :          *
    1570             :          * We want to avoid overloading runqueues. If the woken
    1571             :          * task is a higher priority, then it will stay on this CPU
    1572             :          * and the lower prio task should be moved to another CPU.
    1573             :          * Even though this will probably make the lower prio task
    1574             :          * lose its cache, we do not want to bounce a higher task
    1575             :          * around just because it gave up its CPU, perhaps for a
    1576             :          * lock?
    1577             :          *
    1578             :          * For equal prio tasks, we just let the scheduler sort it out.
    1579             :          *
    1580             :          * Otherwise, just let it ride on the affined RQ and the
    1581             :          * post-schedule router will push the preempted task away
    1582             :          *
    1583             :          * This test is optimistic, if we get it wrong the load-balancer
    1584             :          * will have to sort it out.
    1585             :          *
    1586             :          * We take into account the capacity of the CPU to ensure it fits the
    1587             :          * requirement of the task - which is only important on heterogeneous
    1588             :          * systems like big.LITTLE.
    1589             :          */
    1590             :         test = curr &&
    1591             :                unlikely(rt_task(curr)) &&
    1592             :                (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
    1593             : 
    1594             :         if (test || !rt_task_fits_capacity(p, cpu)) {
    1595             :                 int target = find_lowest_rq(p);
    1596             : 
    1597             :                 /*
    1598             :                  * Bail out if we were forcing a migration to find a better
    1599             :                  * fitting CPU but our search failed.
    1600             :                  */
    1601             :                 if (!test && target != -1 && !rt_task_fits_capacity(p, target))
    1602             :                         goto out_unlock;
    1603             : 
    1604             :                 /*
    1605             :                  * Don't bother moving it if the destination CPU is
    1606             :                  * not running a lower priority task.
    1607             :                  */
    1608             :                 if (target != -1 &&
    1609             :                     p->prio < cpu_rq(target)->rt.highest_prio.curr)
    1610             :                         cpu = target;
    1611             :         }
    1612             : 
    1613             : out_unlock:
    1614             :         rcu_read_unlock();
    1615             : 
    1616             : out:
    1617             :         return cpu;
    1618             : }
    1619             : 
    1620             : static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
    1621             : {
    1622             :         /*
    1623             :          * Current can't be migrated, useless to reschedule,
    1624             :          * let's hope p can move out.
    1625             :          */
    1626             :         if (rq->curr->nr_cpus_allowed == 1 ||
    1627             :             !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
    1628             :                 return;
    1629             : 
    1630             :         /*
    1631             :          * p is migratable, so let's not schedule it and
    1632             :          * see if it is pushed or pulled somewhere else.
    1633             :          */
    1634             :         if (p->nr_cpus_allowed != 1 &&
    1635             :             cpupri_find(&rq->rd->cpupri, p, NULL))
    1636             :                 return;
    1637             : 
    1638             :         /*
    1639             :          * There appear to be other CPUs that can accept
    1640             :          * the current task but none can run 'p', so lets reschedule
    1641             :          * to try and push the current task away:
    1642             :          */
    1643             :         requeue_task_rt(rq, p, 1);
    1644             :         resched_curr(rq);
    1645             : }
    1646             : 
    1647             : static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
    1648             : {
    1649             :         if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
    1650             :                 /*
    1651             :                  * This is OK, because current is on_cpu, which avoids it being
    1652             :                  * picked for load-balance and preemption/IRQs are still
    1653             :                  * disabled avoiding further scheduler activity on it and we've
    1654             :                  * not yet started the picking loop.
    1655             :                  */
    1656             :                 rq_unpin_lock(rq, rf);
    1657             :                 pull_rt_task(rq);
    1658             :                 rq_repin_lock(rq, rf);
    1659             :         }
    1660             : 
    1661             :         return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
    1662             : }
    1663             : #endif /* CONFIG_SMP */
    1664             : 
    1665             : /*
    1666             :  * Preempt the current task with a newly woken task if needed:
    1667             :  */
    1668           0 : static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
    1669             : {
    1670           0 :         if (p->prio < rq->curr->prio) {
    1671           0 :                 resched_curr(rq);
    1672           0 :                 return;
    1673             :         }
    1674             : 
    1675             : #ifdef CONFIG_SMP
    1676             :         /*
    1677             :          * If:
    1678             :          *
    1679             :          * - the newly woken task is of equal priority to the current task
    1680             :          * - the newly woken task is non-migratable while current is migratable
    1681             :          * - current will be preempted on the next reschedule
    1682             :          *
    1683             :          * we should check to see if current can readily move to a different
    1684             :          * cpu.  If so, we will reschedule to allow the push logic to try
    1685             :          * to move current somewhere else, making room for our non-migratable
    1686             :          * task.
    1687             :          */
    1688             :         if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
    1689             :                 check_preempt_equal_prio(rq, p);
    1690             : #endif
    1691             : }
    1692             : 
    1693           0 : static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
    1694             : {
    1695           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1696           0 :         struct rt_rq *rt_rq = &rq->rt;
    1697             : 
    1698           0 :         p->se.exec_start = rq_clock_task(rq);
    1699           0 :         if (on_rt_rq(&p->rt))
    1700             :                 update_stats_wait_end_rt(rt_rq, rt_se);
    1701             : 
    1702             :         /* The running task is never eligible for pushing */
    1703           0 :         dequeue_pushable_task(rq, p);
    1704             : 
    1705           0 :         if (!first)
    1706             :                 return;
    1707             : 
    1708             :         /*
    1709             :          * If prev task was rt, put_prev_task() has already updated the
    1710             :          * utilization. We only care of the case where we start to schedule a
    1711             :          * rt task
    1712             :          */
    1713           0 :         if (rq->curr->sched_class != &rt_sched_class)
    1714           0 :                 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    1715             : 
    1716             :         rt_queue_push_tasks(rq);
    1717             : }
    1718             : 
    1719           0 : static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
    1720             : {
    1721           0 :         struct rt_prio_array *array = &rt_rq->active;
    1722           0 :         struct sched_rt_entity *next = NULL;
    1723             :         struct list_head *queue;
    1724             :         int idx;
    1725             : 
    1726           0 :         idx = sched_find_first_bit(array->bitmap);
    1727           0 :         BUG_ON(idx >= MAX_RT_PRIO);
    1728             : 
    1729           0 :         queue = array->queue + idx;
    1730           0 :         next = list_entry(queue->next, struct sched_rt_entity, run_list);
    1731             : 
    1732           0 :         return next;
    1733             : }
    1734             : 
    1735           0 : static struct task_struct *_pick_next_task_rt(struct rq *rq)
    1736             : {
    1737             :         struct sched_rt_entity *rt_se;
    1738           0 :         struct rt_rq *rt_rq  = &rq->rt;
    1739             : 
    1740             :         do {
    1741           0 :                 rt_se = pick_next_rt_entity(rt_rq);
    1742           0 :                 BUG_ON(!rt_se);
    1743           0 :                 rt_rq = group_rt_rq(rt_se);
    1744             :         } while (rt_rq);
    1745             : 
    1746           0 :         return rt_task_of(rt_se);
    1747             : }
    1748             : 
    1749             : static struct task_struct *pick_task_rt(struct rq *rq)
    1750             : {
    1751             :         struct task_struct *p;
    1752             : 
    1753           0 :         if (!sched_rt_runnable(rq))
    1754             :                 return NULL;
    1755             : 
    1756           0 :         p = _pick_next_task_rt(rq);
    1757             : 
    1758             :         return p;
    1759             : }
    1760             : 
    1761           0 : static struct task_struct *pick_next_task_rt(struct rq *rq)
    1762             : {
    1763           0 :         struct task_struct *p = pick_task_rt(rq);
    1764             : 
    1765           0 :         if (p)
    1766           0 :                 set_next_task_rt(rq, p, true);
    1767             : 
    1768           0 :         return p;
    1769             : }
    1770             : 
    1771           0 : static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
    1772             : {
    1773           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1774           0 :         struct rt_rq *rt_rq = &rq->rt;
    1775             : 
    1776           0 :         if (on_rt_rq(&p->rt))
    1777             :                 update_stats_wait_start_rt(rt_rq, rt_se);
    1778             : 
    1779           0 :         update_curr_rt(rq);
    1780             : 
    1781           0 :         update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    1782             : 
    1783             :         /*
    1784             :          * The previous task needs to be made eligible for pushing
    1785             :          * if it is still active
    1786             :          */
    1787           0 :         if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
    1788             :                 enqueue_pushable_task(rq, p);
    1789           0 : }
    1790             : 
    1791             : #ifdef CONFIG_SMP
    1792             : 
    1793             : /* Only try algorithms three times */
    1794             : #define RT_MAX_TRIES 3
    1795             : 
    1796             : static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
    1797             : {
    1798             :         if (!task_running(rq, p) &&
    1799             :             cpumask_test_cpu(cpu, &p->cpus_mask))
    1800             :                 return 1;
    1801             : 
    1802             :         return 0;
    1803             : }
    1804             : 
    1805             : /*
    1806             :  * Return the highest pushable rq's task, which is suitable to be executed
    1807             :  * on the CPU, NULL otherwise
    1808             :  */
    1809             : static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
    1810             : {
    1811             :         struct plist_head *head = &rq->rt.pushable_tasks;
    1812             :         struct task_struct *p;
    1813             : 
    1814             :         if (!has_pushable_tasks(rq))
    1815             :                 return NULL;
    1816             : 
    1817             :         plist_for_each_entry(p, head, pushable_tasks) {
    1818             :                 if (pick_rt_task(rq, p, cpu))
    1819             :                         return p;
    1820             :         }
    1821             : 
    1822             :         return NULL;
    1823             : }
    1824             : 
    1825             : static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
    1826             : 
    1827             : static int find_lowest_rq(struct task_struct *task)
    1828             : {
    1829             :         struct sched_domain *sd;
    1830             :         struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
    1831             :         int this_cpu = smp_processor_id();
    1832             :         int cpu      = task_cpu(task);
    1833             :         int ret;
    1834             : 
    1835             :         /* Make sure the mask is initialized first */
    1836             :         if (unlikely(!lowest_mask))
    1837             :                 return -1;
    1838             : 
    1839             :         if (task->nr_cpus_allowed == 1)
    1840             :                 return -1; /* No other targets possible */
    1841             : 
    1842             :         /*
    1843             :          * If we're on asym system ensure we consider the different capacities
    1844             :          * of the CPUs when searching for the lowest_mask.
    1845             :          */
    1846             :         if (static_branch_unlikely(&sched_asym_cpucapacity)) {
    1847             : 
    1848             :                 ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
    1849             :                                           task, lowest_mask,
    1850             :                                           rt_task_fits_capacity);
    1851             :         } else {
    1852             : 
    1853             :                 ret = cpupri_find(&task_rq(task)->rd->cpupri,
    1854             :                                   task, lowest_mask);
    1855             :         }
    1856             : 
    1857             :         if (!ret)
    1858             :                 return -1; /* No targets found */
    1859             : 
    1860             :         /*
    1861             :          * At this point we have built a mask of CPUs representing the
    1862             :          * lowest priority tasks in the system.  Now we want to elect
    1863             :          * the best one based on our affinity and topology.
    1864             :          *
    1865             :          * We prioritize the last CPU that the task executed on since
    1866             :          * it is most likely cache-hot in that location.
    1867             :          */
    1868             :         if (cpumask_test_cpu(cpu, lowest_mask))
    1869             :                 return cpu;
    1870             : 
    1871             :         /*
    1872             :          * Otherwise, we consult the sched_domains span maps to figure
    1873             :          * out which CPU is logically closest to our hot cache data.
    1874             :          */
    1875             :         if (!cpumask_test_cpu(this_cpu, lowest_mask))
    1876             :                 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
    1877             : 
    1878             :         rcu_read_lock();
    1879             :         for_each_domain(cpu, sd) {
    1880             :                 if (sd->flags & SD_WAKE_AFFINE) {
    1881             :                         int best_cpu;
    1882             : 
    1883             :                         /*
    1884             :                          * "this_cpu" is cheaper to preempt than a
    1885             :                          * remote processor.
    1886             :                          */
    1887             :                         if (this_cpu != -1 &&
    1888             :                             cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
    1889             :                                 rcu_read_unlock();
    1890             :                                 return this_cpu;
    1891             :                         }
    1892             : 
    1893             :                         best_cpu = cpumask_any_and_distribute(lowest_mask,
    1894             :                                                               sched_domain_span(sd));
    1895             :                         if (best_cpu < nr_cpu_ids) {
    1896             :                                 rcu_read_unlock();
    1897             :                                 return best_cpu;
    1898             :                         }
    1899             :                 }
    1900             :         }
    1901             :         rcu_read_unlock();
    1902             : 
    1903             :         /*
    1904             :          * And finally, if there were no matches within the domains
    1905             :          * just give the caller *something* to work with from the compatible
    1906             :          * locations.
    1907             :          */
    1908             :         if (this_cpu != -1)
    1909             :                 return this_cpu;
    1910             : 
    1911             :         cpu = cpumask_any_distribute(lowest_mask);
    1912             :         if (cpu < nr_cpu_ids)
    1913             :                 return cpu;
    1914             : 
    1915             :         return -1;
    1916             : }
    1917             : 
    1918             : /* Will lock the rq it finds */
    1919             : static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
    1920             : {
    1921             :         struct rq *lowest_rq = NULL;
    1922             :         int tries;
    1923             :         int cpu;
    1924             : 
    1925             :         for (tries = 0; tries < RT_MAX_TRIES; tries++) {
    1926             :                 cpu = find_lowest_rq(task);
    1927             : 
    1928             :                 if ((cpu == -1) || (cpu == rq->cpu))
    1929             :                         break;
    1930             : 
    1931             :                 lowest_rq = cpu_rq(cpu);
    1932             : 
    1933             :                 if (lowest_rq->rt.highest_prio.curr <= task->prio) {
    1934             :                         /*
    1935             :                          * Target rq has tasks of equal or higher priority,
    1936             :                          * retrying does not release any lock and is unlikely
    1937             :                          * to yield a different result.
    1938             :                          */
    1939             :                         lowest_rq = NULL;
    1940             :                         break;
    1941             :                 }
    1942             : 
    1943             :                 /* if the prio of this runqueue changed, try again */
    1944             :                 if (double_lock_balance(rq, lowest_rq)) {
    1945             :                         /*
    1946             :                          * We had to unlock the run queue. In
    1947             :                          * the mean time, task could have
    1948             :                          * migrated already or had its affinity changed.
    1949             :                          * Also make sure that it wasn't scheduled on its rq.
    1950             :                          */
    1951             :                         if (unlikely(task_rq(task) != rq ||
    1952             :                                      !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
    1953             :                                      task_running(rq, task) ||
    1954             :                                      !rt_task(task) ||
    1955             :                                      !task_on_rq_queued(task))) {
    1956             : 
    1957             :                                 double_unlock_balance(rq, lowest_rq);
    1958             :                                 lowest_rq = NULL;
    1959             :                                 break;
    1960             :                         }
    1961             :                 }
    1962             : 
    1963             :                 /* If this rq is still suitable use it. */
    1964             :                 if (lowest_rq->rt.highest_prio.curr > task->prio)
    1965             :                         break;
    1966             : 
    1967             :                 /* try again */
    1968             :                 double_unlock_balance(rq, lowest_rq);
    1969             :                 lowest_rq = NULL;
    1970             :         }
    1971             : 
    1972             :         return lowest_rq;
    1973             : }
    1974             : 
    1975             : static struct task_struct *pick_next_pushable_task(struct rq *rq)
    1976             : {
    1977             :         struct task_struct *p;
    1978             : 
    1979             :         if (!has_pushable_tasks(rq))
    1980             :                 return NULL;
    1981             : 
    1982             :         p = plist_first_entry(&rq->rt.pushable_tasks,
    1983             :                               struct task_struct, pushable_tasks);
    1984             : 
    1985             :         BUG_ON(rq->cpu != task_cpu(p));
    1986             :         BUG_ON(task_current(rq, p));
    1987             :         BUG_ON(p->nr_cpus_allowed <= 1);
    1988             : 
    1989             :         BUG_ON(!task_on_rq_queued(p));
    1990             :         BUG_ON(!rt_task(p));
    1991             : 
    1992             :         return p;
    1993             : }
    1994             : 
    1995             : /*
    1996             :  * If the current CPU has more than one RT task, see if the non
    1997             :  * running task can migrate over to a CPU that is running a task
    1998             :  * of lesser priority.
    1999             :  */
    2000             : static int push_rt_task(struct rq *rq, bool pull)
    2001             : {
    2002             :         struct task_struct *next_task;
    2003             :         struct rq *lowest_rq;
    2004             :         int ret = 0;
    2005             : 
    2006             :         if (!rq->rt.overloaded)
    2007             :                 return 0;
    2008             : 
    2009             :         next_task = pick_next_pushable_task(rq);
    2010             :         if (!next_task)
    2011             :                 return 0;
    2012             : 
    2013             : retry:
    2014             :         /*
    2015             :          * It's possible that the next_task slipped in of
    2016             :          * higher priority than current. If that's the case
    2017             :          * just reschedule current.
    2018             :          */
    2019             :         if (unlikely(next_task->prio < rq->curr->prio)) {
    2020             :                 resched_curr(rq);
    2021             :                 return 0;
    2022             :         }
    2023             : 
    2024             :         if (is_migration_disabled(next_task)) {
    2025             :                 struct task_struct *push_task = NULL;
    2026             :                 int cpu;
    2027             : 
    2028             :                 if (!pull || rq->push_busy)
    2029             :                         return 0;
    2030             : 
    2031             :                 /*
    2032             :                  * Invoking find_lowest_rq() on anything but an RT task doesn't
    2033             :                  * make sense. Per the above priority check, curr has to
    2034             :                  * be of higher priority than next_task, so no need to
    2035             :                  * reschedule when bailing out.
    2036             :                  *
    2037             :                  * Note that the stoppers are masqueraded as SCHED_FIFO
    2038             :                  * (cf. sched_set_stop_task()), so we can't rely on rt_task().
    2039             :                  */
    2040             :                 if (rq->curr->sched_class != &rt_sched_class)
    2041             :                         return 0;
    2042             : 
    2043             :                 cpu = find_lowest_rq(rq->curr);
    2044             :                 if (cpu == -1 || cpu == rq->cpu)
    2045             :                         return 0;
    2046             : 
    2047             :                 /*
    2048             :                  * Given we found a CPU with lower priority than @next_task,
    2049             :                  * therefore it should be running. However we cannot migrate it
    2050             :                  * to this other CPU, instead attempt to push the current
    2051             :                  * running task on this CPU away.
    2052             :                  */
    2053             :                 push_task = get_push_task(rq);
    2054             :                 if (push_task) {
    2055             :                         raw_spin_rq_unlock(rq);
    2056             :                         stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
    2057             :                                             push_task, &rq->push_work);
    2058             :                         raw_spin_rq_lock(rq);
    2059             :                 }
    2060             : 
    2061             :                 return 0;
    2062             :         }
    2063             : 
    2064             :         if (WARN_ON(next_task == rq->curr))
    2065             :                 return 0;
    2066             : 
    2067             :         /* We might release rq lock */
    2068             :         get_task_struct(next_task);
    2069             : 
    2070             :         /* find_lock_lowest_rq locks the rq if found */
    2071             :         lowest_rq = find_lock_lowest_rq(next_task, rq);
    2072             :         if (!lowest_rq) {
    2073             :                 struct task_struct *task;
    2074             :                 /*
    2075             :                  * find_lock_lowest_rq releases rq->lock
    2076             :                  * so it is possible that next_task has migrated.
    2077             :                  *
    2078             :                  * We need to make sure that the task is still on the same
    2079             :                  * run-queue and is also still the next task eligible for
    2080             :                  * pushing.
    2081             :                  */
    2082             :                 task = pick_next_pushable_task(rq);
    2083             :                 if (task == next_task) {
    2084             :                         /*
    2085             :                          * The task hasn't migrated, and is still the next
    2086             :                          * eligible task, but we failed to find a run-queue
    2087             :                          * to push it to.  Do not retry in this case, since
    2088             :                          * other CPUs will pull from us when ready.
    2089             :                          */
    2090             :                         goto out;
    2091             :                 }
    2092             : 
    2093             :                 if (!task)
    2094             :                         /* No more tasks, just exit */
    2095             :                         goto out;
    2096             : 
    2097             :                 /*
    2098             :                  * Something has shifted, try again.
    2099             :                  */
    2100             :                 put_task_struct(next_task);
    2101             :                 next_task = task;
    2102             :                 goto retry;
    2103             :         }
    2104             : 
    2105             :         deactivate_task(rq, next_task, 0);
    2106             :         set_task_cpu(next_task, lowest_rq->cpu);
    2107             :         activate_task(lowest_rq, next_task, 0);
    2108             :         resched_curr(lowest_rq);
    2109             :         ret = 1;
    2110             : 
    2111             :         double_unlock_balance(rq, lowest_rq);
    2112             : out:
    2113             :         put_task_struct(next_task);
    2114             : 
    2115             :         return ret;
    2116             : }
    2117             : 
    2118             : static void push_rt_tasks(struct rq *rq)
    2119             : {
    2120             :         /* push_rt_task will return true if it moved an RT */
    2121             :         while (push_rt_task(rq, false))
    2122             :                 ;
    2123             : }
    2124             : 
    2125             : #ifdef HAVE_RT_PUSH_IPI
    2126             : 
    2127             : /*
    2128             :  * When a high priority task schedules out from a CPU and a lower priority
    2129             :  * task is scheduled in, a check is made to see if there's any RT tasks
    2130             :  * on other CPUs that are waiting to run because a higher priority RT task
    2131             :  * is currently running on its CPU. In this case, the CPU with multiple RT
    2132             :  * tasks queued on it (overloaded) needs to be notified that a CPU has opened
    2133             :  * up that may be able to run one of its non-running queued RT tasks.
    2134             :  *
    2135             :  * All CPUs with overloaded RT tasks need to be notified as there is currently
    2136             :  * no way to know which of these CPUs have the highest priority task waiting
    2137             :  * to run. Instead of trying to take a spinlock on each of these CPUs,
    2138             :  * which has shown to cause large latency when done on machines with many
    2139             :  * CPUs, sending an IPI to the CPUs to have them push off the overloaded
    2140             :  * RT tasks waiting to run.
    2141             :  *
    2142             :  * Just sending an IPI to each of the CPUs is also an issue, as on large
    2143             :  * count CPU machines, this can cause an IPI storm on a CPU, especially
    2144             :  * if its the only CPU with multiple RT tasks queued, and a large number
    2145             :  * of CPUs scheduling a lower priority task at the same time.
    2146             :  *
    2147             :  * Each root domain has its own irq work function that can iterate over
    2148             :  * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
    2149             :  * task must be checked if there's one or many CPUs that are lowering
    2150             :  * their priority, there's a single irq work iterator that will try to
    2151             :  * push off RT tasks that are waiting to run.
    2152             :  *
    2153             :  * When a CPU schedules a lower priority task, it will kick off the
    2154             :  * irq work iterator that will jump to each CPU with overloaded RT tasks.
    2155             :  * As it only takes the first CPU that schedules a lower priority task
    2156             :  * to start the process, the rto_start variable is incremented and if
    2157             :  * the atomic result is one, then that CPU will try to take the rto_lock.
    2158             :  * This prevents high contention on the lock as the process handles all
    2159             :  * CPUs scheduling lower priority tasks.
    2160             :  *
    2161             :  * All CPUs that are scheduling a lower priority task will increment the
    2162             :  * rt_loop_next variable. This will make sure that the irq work iterator
    2163             :  * checks all RT overloaded CPUs whenever a CPU schedules a new lower
    2164             :  * priority task, even if the iterator is in the middle of a scan. Incrementing
    2165             :  * the rt_loop_next will cause the iterator to perform another scan.
    2166             :  *
    2167             :  */
    2168             : static int rto_next_cpu(struct root_domain *rd)
    2169             : {
    2170             :         int next;
    2171             :         int cpu;
    2172             : 
    2173             :         /*
    2174             :          * When starting the IPI RT pushing, the rto_cpu is set to -1,
    2175             :          * rt_next_cpu() will simply return the first CPU found in
    2176             :          * the rto_mask.
    2177             :          *
    2178             :          * If rto_next_cpu() is called with rto_cpu is a valid CPU, it
    2179             :          * will return the next CPU found in the rto_mask.
    2180             :          *
    2181             :          * If there are no more CPUs left in the rto_mask, then a check is made
    2182             :          * against rto_loop and rto_loop_next. rto_loop is only updated with
    2183             :          * the rto_lock held, but any CPU may increment the rto_loop_next
    2184             :          * without any locking.
    2185             :          */
    2186             :         for (;;) {
    2187             : 
    2188             :                 /* When rto_cpu is -1 this acts like cpumask_first() */
    2189             :                 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
    2190             : 
    2191             :                 rd->rto_cpu = cpu;
    2192             : 
    2193             :                 if (cpu < nr_cpu_ids)
    2194             :                         return cpu;
    2195             : 
    2196             :                 rd->rto_cpu = -1;
    2197             : 
    2198             :                 /*
    2199             :                  * ACQUIRE ensures we see the @rto_mask changes
    2200             :                  * made prior to the @next value observed.
    2201             :                  *
    2202             :                  * Matches WMB in rt_set_overload().
    2203             :                  */
    2204             :                 next = atomic_read_acquire(&rd->rto_loop_next);
    2205             : 
    2206             :                 if (rd->rto_loop == next)
    2207             :                         break;
    2208             : 
    2209             :                 rd->rto_loop = next;
    2210             :         }
    2211             : 
    2212             :         return -1;
    2213             : }
    2214             : 
    2215             : static inline bool rto_start_trylock(atomic_t *v)
    2216             : {
    2217             :         return !atomic_cmpxchg_acquire(v, 0, 1);
    2218             : }
    2219             : 
    2220             : static inline void rto_start_unlock(atomic_t *v)
    2221             : {
    2222             :         atomic_set_release(v, 0);
    2223             : }
    2224             : 
    2225             : static void tell_cpu_to_push(struct rq *rq)
    2226             : {
    2227             :         int cpu = -1;
    2228             : 
    2229             :         /* Keep the loop going if the IPI is currently active */
    2230             :         atomic_inc(&rq->rd->rto_loop_next);
    2231             : 
    2232             :         /* Only one CPU can initiate a loop at a time */
    2233             :         if (!rto_start_trylock(&rq->rd->rto_loop_start))
    2234             :                 return;
    2235             : 
    2236             :         raw_spin_lock(&rq->rd->rto_lock);
    2237             : 
    2238             :         /*
    2239             :          * The rto_cpu is updated under the lock, if it has a valid CPU
    2240             :          * then the IPI is still running and will continue due to the
    2241             :          * update to loop_next, and nothing needs to be done here.
    2242             :          * Otherwise it is finishing up and an ipi needs to be sent.
    2243             :          */
    2244             :         if (rq->rd->rto_cpu < 0)
    2245             :                 cpu = rto_next_cpu(rq->rd);
    2246             : 
    2247             :         raw_spin_unlock(&rq->rd->rto_lock);
    2248             : 
    2249             :         rto_start_unlock(&rq->rd->rto_loop_start);
    2250             : 
    2251             :         if (cpu >= 0) {
    2252             :                 /* Make sure the rd does not get freed while pushing */
    2253             :                 sched_get_rd(rq->rd);
    2254             :                 irq_work_queue_on(&rq->rd->rto_push_work, cpu);
    2255             :         }
    2256             : }
    2257             : 
    2258             : /* Called from hardirq context */
    2259             : void rto_push_irq_work_func(struct irq_work *work)
    2260             : {
    2261             :         struct root_domain *rd =
    2262             :                 container_of(work, struct root_domain, rto_push_work);
    2263             :         struct rq *rq;
    2264             :         int cpu;
    2265             : 
    2266             :         rq = this_rq();
    2267             : 
    2268             :         /*
    2269             :          * We do not need to grab the lock to check for has_pushable_tasks.
    2270             :          * When it gets updated, a check is made if a push is possible.
    2271             :          */
    2272             :         if (has_pushable_tasks(rq)) {
    2273             :                 raw_spin_rq_lock(rq);
    2274             :                 while (push_rt_task(rq, true))
    2275             :                         ;
    2276             :                 raw_spin_rq_unlock(rq);
    2277             :         }
    2278             : 
    2279             :         raw_spin_lock(&rd->rto_lock);
    2280             : 
    2281             :         /* Pass the IPI to the next rt overloaded queue */
    2282             :         cpu = rto_next_cpu(rd);
    2283             : 
    2284             :         raw_spin_unlock(&rd->rto_lock);
    2285             : 
    2286             :         if (cpu < 0) {
    2287             :                 sched_put_rd(rd);
    2288             :                 return;
    2289             :         }
    2290             : 
    2291             :         /* Try the next RT overloaded CPU */
    2292             :         irq_work_queue_on(&rd->rto_push_work, cpu);
    2293             : }
    2294             : #endif /* HAVE_RT_PUSH_IPI */
    2295             : 
    2296             : static void pull_rt_task(struct rq *this_rq)
    2297             : {
    2298             :         int this_cpu = this_rq->cpu, cpu;
    2299             :         bool resched = false;
    2300             :         struct task_struct *p, *push_task;
    2301             :         struct rq *src_rq;
    2302             :         int rt_overload_count = rt_overloaded(this_rq);
    2303             : 
    2304             :         if (likely(!rt_overload_count))
    2305             :                 return;
    2306             : 
    2307             :         /*
    2308             :          * Match the barrier from rt_set_overloaded; this guarantees that if we
    2309             :          * see overloaded we must also see the rto_mask bit.
    2310             :          */
    2311             :         smp_rmb();
    2312             : 
    2313             :         /* If we are the only overloaded CPU do nothing */
    2314             :         if (rt_overload_count == 1 &&
    2315             :             cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
    2316             :                 return;
    2317             : 
    2318             : #ifdef HAVE_RT_PUSH_IPI
    2319             :         if (sched_feat(RT_PUSH_IPI)) {
    2320             :                 tell_cpu_to_push(this_rq);
    2321             :                 return;
    2322             :         }
    2323             : #endif
    2324             : 
    2325             :         for_each_cpu(cpu, this_rq->rd->rto_mask) {
    2326             :                 if (this_cpu == cpu)
    2327             :                         continue;
    2328             : 
    2329             :                 src_rq = cpu_rq(cpu);
    2330             : 
    2331             :                 /*
    2332             :                  * Don't bother taking the src_rq->lock if the next highest
    2333             :                  * task is known to be lower-priority than our current task.
    2334             :                  * This may look racy, but if this value is about to go
    2335             :                  * logically higher, the src_rq will push this task away.
    2336             :                  * And if its going logically lower, we do not care
    2337             :                  */
    2338             :                 if (src_rq->rt.highest_prio.next >=
    2339             :                     this_rq->rt.highest_prio.curr)
    2340             :                         continue;
    2341             : 
    2342             :                 /*
    2343             :                  * We can potentially drop this_rq's lock in
    2344             :                  * double_lock_balance, and another CPU could
    2345             :                  * alter this_rq
    2346             :                  */
    2347             :                 push_task = NULL;
    2348             :                 double_lock_balance(this_rq, src_rq);
    2349             : 
    2350             :                 /*
    2351             :                  * We can pull only a task, which is pushable
    2352             :                  * on its rq, and no others.
    2353             :                  */
    2354             :                 p = pick_highest_pushable_task(src_rq, this_cpu);
    2355             : 
    2356             :                 /*
    2357             :                  * Do we have an RT task that preempts
    2358             :                  * the to-be-scheduled task?
    2359             :                  */
    2360             :                 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
    2361             :                         WARN_ON(p == src_rq->curr);
    2362             :                         WARN_ON(!task_on_rq_queued(p));
    2363             : 
    2364             :                         /*
    2365             :                          * There's a chance that p is higher in priority
    2366             :                          * than what's currently running on its CPU.
    2367             :                          * This is just that p is waking up and hasn't
    2368             :                          * had a chance to schedule. We only pull
    2369             :                          * p if it is lower in priority than the
    2370             :                          * current task on the run queue
    2371             :                          */
    2372             :                         if (p->prio < src_rq->curr->prio)
    2373             :                                 goto skip;
    2374             : 
    2375             :                         if (is_migration_disabled(p)) {
    2376             :                                 push_task = get_push_task(src_rq);
    2377             :                         } else {
    2378             :                                 deactivate_task(src_rq, p, 0);
    2379             :                                 set_task_cpu(p, this_cpu);
    2380             :                                 activate_task(this_rq, p, 0);
    2381             :                                 resched = true;
    2382             :                         }
    2383             :                         /*
    2384             :                          * We continue with the search, just in
    2385             :                          * case there's an even higher prio task
    2386             :                          * in another runqueue. (low likelihood
    2387             :                          * but possible)
    2388             :                          */
    2389             :                 }
    2390             : skip:
    2391             :                 double_unlock_balance(this_rq, src_rq);
    2392             : 
    2393             :                 if (push_task) {
    2394             :                         raw_spin_rq_unlock(this_rq);
    2395             :                         stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
    2396             :                                             push_task, &src_rq->push_work);
    2397             :                         raw_spin_rq_lock(this_rq);
    2398             :                 }
    2399             :         }
    2400             : 
    2401             :         if (resched)
    2402             :                 resched_curr(this_rq);
    2403             : }
    2404             : 
    2405             : /*
    2406             :  * If we are not running and we are not going to reschedule soon, we should
    2407             :  * try to push tasks away now
    2408             :  */
    2409             : static void task_woken_rt(struct rq *rq, struct task_struct *p)
    2410             : {
    2411             :         bool need_to_push = !task_running(rq, p) &&
    2412             :                             !test_tsk_need_resched(rq->curr) &&
    2413             :                             p->nr_cpus_allowed > 1 &&
    2414             :                             (dl_task(rq->curr) || rt_task(rq->curr)) &&
    2415             :                             (rq->curr->nr_cpus_allowed < 2 ||
    2416             :                              rq->curr->prio <= p->prio);
    2417             : 
    2418             :         if (need_to_push)
    2419             :                 push_rt_tasks(rq);
    2420             : }
    2421             : 
    2422             : /* Assumes rq->lock is held */
    2423             : static void rq_online_rt(struct rq *rq)
    2424             : {
    2425             :         if (rq->rt.overloaded)
    2426             :                 rt_set_overload(rq);
    2427             : 
    2428             :         __enable_runtime(rq);
    2429             : 
    2430             :         cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
    2431             : }
    2432             : 
    2433             : /* Assumes rq->lock is held */
    2434             : static void rq_offline_rt(struct rq *rq)
    2435             : {
    2436             :         if (rq->rt.overloaded)
    2437             :                 rt_clear_overload(rq);
    2438             : 
    2439             :         __disable_runtime(rq);
    2440             : 
    2441             :         cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
    2442             : }
    2443             : 
    2444             : /*
    2445             :  * When switch from the rt queue, we bring ourselves to a position
    2446             :  * that we might want to pull RT tasks from other runqueues.
    2447             :  */
    2448             : static void switched_from_rt(struct rq *rq, struct task_struct *p)
    2449             : {
    2450             :         /*
    2451             :          * If there are other RT tasks then we will reschedule
    2452             :          * and the scheduling of the other RT tasks will handle
    2453             :          * the balancing. But if we are the last RT task
    2454             :          * we may need to handle the pulling of RT tasks
    2455             :          * now.
    2456             :          */
    2457             :         if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
    2458             :                 return;
    2459             : 
    2460             :         rt_queue_pull_task(rq);
    2461             : }
    2462             : 
    2463             : void __init init_sched_rt_class(void)
    2464             : {
    2465             :         unsigned int i;
    2466             : 
    2467             :         for_each_possible_cpu(i) {
    2468             :                 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
    2469             :                                         GFP_KERNEL, cpu_to_node(i));
    2470             :         }
    2471             : }
    2472             : #endif /* CONFIG_SMP */
    2473             : 
    2474             : /*
    2475             :  * When switching a task to RT, we may overload the runqueue
    2476             :  * with RT tasks. In this case we try to push them off to
    2477             :  * other runqueues.
    2478             :  */
    2479           0 : static void switched_to_rt(struct rq *rq, struct task_struct *p)
    2480             : {
    2481             :         /*
    2482             :          * If we are running, update the avg_rt tracking, as the running time
    2483             :          * will now on be accounted into the latter.
    2484             :          */
    2485           0 :         if (task_current(rq, p)) {
    2486           0 :                 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    2487             :                 return;
    2488             :         }
    2489             : 
    2490             :         /*
    2491             :          * If we are not running we may need to preempt the current
    2492             :          * running task. If that current running task is also an RT task
    2493             :          * then see if we can move to another run queue.
    2494             :          */
    2495           0 :         if (task_on_rq_queued(p)) {
    2496             : #ifdef CONFIG_SMP
    2497             :                 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
    2498             :                         rt_queue_push_tasks(rq);
    2499             : #endif /* CONFIG_SMP */
    2500           0 :                 if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
    2501           0 :                         resched_curr(rq);
    2502             :         }
    2503             : }
    2504             : 
    2505             : /*
    2506             :  * Priority of the task has changed. This may cause
    2507             :  * us to initiate a push or pull.
    2508             :  */
    2509             : static void
    2510           0 : prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
    2511             : {
    2512           0 :         if (!task_on_rq_queued(p))
    2513             :                 return;
    2514             : 
    2515           0 :         if (task_current(rq, p)) {
    2516             : #ifdef CONFIG_SMP
    2517             :                 /*
    2518             :                  * If our priority decreases while running, we
    2519             :                  * may need to pull tasks to this runqueue.
    2520             :                  */
    2521             :                 if (oldprio < p->prio)
    2522             :                         rt_queue_pull_task(rq);
    2523             : 
    2524             :                 /*
    2525             :                  * If there's a higher priority task waiting to run
    2526             :                  * then reschedule.
    2527             :                  */
    2528             :                 if (p->prio > rq->rt.highest_prio.curr)
    2529             :                         resched_curr(rq);
    2530             : #else
    2531             :                 /* For UP simply resched on drop of prio */
    2532           0 :                 if (oldprio < p->prio)
    2533           0 :                         resched_curr(rq);
    2534             : #endif /* CONFIG_SMP */
    2535             :         } else {
    2536             :                 /*
    2537             :                  * This task is not running, but if it is
    2538             :                  * greater than the current running task
    2539             :                  * then reschedule.
    2540             :                  */
    2541           0 :                 if (p->prio < rq->curr->prio)
    2542           0 :                         resched_curr(rq);
    2543             :         }
    2544             : }
    2545             : 
    2546             : #ifdef CONFIG_POSIX_TIMERS
    2547           0 : static void watchdog(struct rq *rq, struct task_struct *p)
    2548             : {
    2549             :         unsigned long soft, hard;
    2550             : 
    2551             :         /* max may change after cur was read, this will be fixed next tick */
    2552           0 :         soft = task_rlimit(p, RLIMIT_RTTIME);
    2553           0 :         hard = task_rlimit_max(p, RLIMIT_RTTIME);
    2554             : 
    2555           0 :         if (soft != RLIM_INFINITY) {
    2556             :                 unsigned long next;
    2557             : 
    2558           0 :                 if (p->rt.watchdog_stamp != jiffies) {
    2559           0 :                         p->rt.timeout++;
    2560           0 :                         p->rt.watchdog_stamp = jiffies;
    2561             :                 }
    2562             : 
    2563           0 :                 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
    2564           0 :                 if (p->rt.timeout > next) {
    2565           0 :                         posix_cputimers_rt_watchdog(&p->posix_cputimers,
    2566             :                                                     p->se.sum_exec_runtime);
    2567             :                 }
    2568             :         }
    2569           0 : }
    2570             : #else
    2571             : static inline void watchdog(struct rq *rq, struct task_struct *p) { }
    2572             : #endif
    2573             : 
    2574             : /*
    2575             :  * scheduler tick hitting a task of our scheduling class.
    2576             :  *
    2577             :  * NOTE: This function can be called remotely by the tick offload that
    2578             :  * goes along full dynticks. Therefore no local assumption can be made
    2579             :  * and everything must be accessed through the @rq and @curr passed in
    2580             :  * parameters.
    2581             :  */
    2582           0 : static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
    2583             : {
    2584           0 :         struct sched_rt_entity *rt_se = &p->rt;
    2585             : 
    2586           0 :         update_curr_rt(rq);
    2587           0 :         update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    2588             : 
    2589           0 :         watchdog(rq, p);
    2590             : 
    2591             :         /*
    2592             :          * RR tasks need a special form of timeslice management.
    2593             :          * FIFO tasks have no timeslices.
    2594             :          */
    2595           0 :         if (p->policy != SCHED_RR)
    2596             :                 return;
    2597             : 
    2598           0 :         if (--p->rt.time_slice)
    2599             :                 return;
    2600             : 
    2601           0 :         p->rt.time_slice = sched_rr_timeslice;
    2602             : 
    2603             :         /*
    2604             :          * Requeue to the end of queue if we (and all of our ancestors) are not
    2605             :          * the only element on the queue
    2606             :          */
    2607           0 :         for_each_sched_rt_entity(rt_se) {
    2608           0 :                 if (rt_se->run_list.prev != rt_se->run_list.next) {
    2609           0 :                         requeue_task_rt(rq, p, 0);
    2610           0 :                         resched_curr(rq);
    2611           0 :                         return;
    2612             :                 }
    2613             :         }
    2614             : }
    2615             : 
    2616           0 : static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
    2617             : {
    2618             :         /*
    2619             :          * Time slice is 0 for SCHED_FIFO tasks
    2620             :          */
    2621           0 :         if (task->policy == SCHED_RR)
    2622           0 :                 return sched_rr_timeslice;
    2623             :         else
    2624             :                 return 0;
    2625             : }
    2626             : 
    2627             : DEFINE_SCHED_CLASS(rt) = {
    2628             : 
    2629             :         .enqueue_task           = enqueue_task_rt,
    2630             :         .dequeue_task           = dequeue_task_rt,
    2631             :         .yield_task             = yield_task_rt,
    2632             : 
    2633             :         .check_preempt_curr     = check_preempt_curr_rt,
    2634             : 
    2635             :         .pick_next_task         = pick_next_task_rt,
    2636             :         .put_prev_task          = put_prev_task_rt,
    2637             :         .set_next_task          = set_next_task_rt,
    2638             : 
    2639             : #ifdef CONFIG_SMP
    2640             :         .balance                = balance_rt,
    2641             :         .pick_task              = pick_task_rt,
    2642             :         .select_task_rq         = select_task_rq_rt,
    2643             :         .set_cpus_allowed       = set_cpus_allowed_common,
    2644             :         .rq_online              = rq_online_rt,
    2645             :         .rq_offline             = rq_offline_rt,
    2646             :         .task_woken             = task_woken_rt,
    2647             :         .switched_from          = switched_from_rt,
    2648             :         .find_lock_rq           = find_lock_lowest_rq,
    2649             : #endif
    2650             : 
    2651             :         .task_tick              = task_tick_rt,
    2652             : 
    2653             :         .get_rr_interval        = get_rr_interval_rt,
    2654             : 
    2655             :         .prio_changed           = prio_changed_rt,
    2656             :         .switched_to            = switched_to_rt,
    2657             : 
    2658             :         .update_curr            = update_curr_rt,
    2659             : 
    2660             : #ifdef CONFIG_UCLAMP_TASK
    2661             :         .uclamp_enabled         = 1,
    2662             : #endif
    2663             : };
    2664             : 
    2665             : #ifdef CONFIG_RT_GROUP_SCHED
    2666             : /*
    2667             :  * Ensure that the real time constraints are schedulable.
    2668             :  */
    2669             : static DEFINE_MUTEX(rt_constraints_mutex);
    2670             : 
    2671             : static inline int tg_has_rt_tasks(struct task_group *tg)
    2672             : {
    2673             :         struct task_struct *task;
    2674             :         struct css_task_iter it;
    2675             :         int ret = 0;
    2676             : 
    2677             :         /*
    2678             :          * Autogroups do not have RT tasks; see autogroup_create().
    2679             :          */
    2680             :         if (task_group_is_autogroup(tg))
    2681             :                 return 0;
    2682             : 
    2683             :         css_task_iter_start(&tg->css, 0, &it);
    2684             :         while (!ret && (task = css_task_iter_next(&it)))
    2685             :                 ret |= rt_task(task);
    2686             :         css_task_iter_end(&it);
    2687             : 
    2688             :         return ret;
    2689             : }
    2690             : 
    2691             : struct rt_schedulable_data {
    2692             :         struct task_group *tg;
    2693             :         u64 rt_period;
    2694             :         u64 rt_runtime;
    2695             : };
    2696             : 
    2697             : static int tg_rt_schedulable(struct task_group *tg, void *data)
    2698             : {
    2699             :         struct rt_schedulable_data *d = data;
    2700             :         struct task_group *child;
    2701             :         unsigned long total, sum = 0;
    2702             :         u64 period, runtime;
    2703             : 
    2704             :         period = ktime_to_ns(tg->rt_bandwidth.rt_period);
    2705             :         runtime = tg->rt_bandwidth.rt_runtime;
    2706             : 
    2707             :         if (tg == d->tg) {
    2708             :                 period = d->rt_period;
    2709             :                 runtime = d->rt_runtime;
    2710             :         }
    2711             : 
    2712             :         /*
    2713             :          * Cannot have more runtime than the period.
    2714             :          */
    2715             :         if (runtime > period && runtime != RUNTIME_INF)
    2716             :                 return -EINVAL;
    2717             : 
    2718             :         /*
    2719             :          * Ensure we don't starve existing RT tasks if runtime turns zero.
    2720             :          */
    2721             :         if (rt_bandwidth_enabled() && !runtime &&
    2722             :             tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
    2723             :                 return -EBUSY;
    2724             : 
    2725             :         total = to_ratio(period, runtime);
    2726             : 
    2727             :         /*
    2728             :          * Nobody can have more than the global setting allows.
    2729             :          */
    2730             :         if (total > to_ratio(global_rt_period(), global_rt_runtime()))
    2731             :                 return -EINVAL;
    2732             : 
    2733             :         /*
    2734             :          * The sum of our children's runtime should not exceed our own.
    2735             :          */
    2736             :         list_for_each_entry_rcu(child, &tg->children, siblings) {
    2737             :                 period = ktime_to_ns(child->rt_bandwidth.rt_period);
    2738             :                 runtime = child->rt_bandwidth.rt_runtime;
    2739             : 
    2740             :                 if (child == d->tg) {
    2741             :                         period = d->rt_period;
    2742             :                         runtime = d->rt_runtime;
    2743             :                 }
    2744             : 
    2745             :                 sum += to_ratio(period, runtime);
    2746             :         }
    2747             : 
    2748             :         if (sum > total)
    2749             :                 return -EINVAL;
    2750             : 
    2751             :         return 0;
    2752             : }
    2753             : 
    2754             : static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
    2755             : {
    2756             :         int ret;
    2757             : 
    2758             :         struct rt_schedulable_data data = {
    2759             :                 .tg = tg,
    2760             :                 .rt_period = period,
    2761             :                 .rt_runtime = runtime,
    2762             :         };
    2763             : 
    2764             :         rcu_read_lock();
    2765             :         ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
    2766             :         rcu_read_unlock();
    2767             : 
    2768             :         return ret;
    2769             : }
    2770             : 
    2771             : static int tg_set_rt_bandwidth(struct task_group *tg,
    2772             :                 u64 rt_period, u64 rt_runtime)
    2773             : {
    2774             :         int i, err = 0;
    2775             : 
    2776             :         /*
    2777             :          * Disallowing the root group RT runtime is BAD, it would disallow the
    2778             :          * kernel creating (and or operating) RT threads.
    2779             :          */
    2780             :         if (tg == &root_task_group && rt_runtime == 0)
    2781             :                 return -EINVAL;
    2782             : 
    2783             :         /* No period doesn't make any sense. */
    2784             :         if (rt_period == 0)
    2785             :                 return -EINVAL;
    2786             : 
    2787             :         /*
    2788             :          * Bound quota to defend quota against overflow during bandwidth shift.
    2789             :          */
    2790             :         if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
    2791             :                 return -EINVAL;
    2792             : 
    2793             :         mutex_lock(&rt_constraints_mutex);
    2794             :         err = __rt_schedulable(tg, rt_period, rt_runtime);
    2795             :         if (err)
    2796             :                 goto unlock;
    2797             : 
    2798             :         raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
    2799             :         tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
    2800             :         tg->rt_bandwidth.rt_runtime = rt_runtime;
    2801             : 
    2802             :         for_each_possible_cpu(i) {
    2803             :                 struct rt_rq *rt_rq = tg->rt_rq[i];
    2804             : 
    2805             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
    2806             :                 rt_rq->rt_runtime = rt_runtime;
    2807             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
    2808             :         }
    2809             :         raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
    2810             : unlock:
    2811             :         mutex_unlock(&rt_constraints_mutex);
    2812             : 
    2813             :         return err;
    2814             : }
    2815             : 
    2816             : int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
    2817             : {
    2818             :         u64 rt_runtime, rt_period;
    2819             : 
    2820             :         rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
    2821             :         rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
    2822             :         if (rt_runtime_us < 0)
    2823             :                 rt_runtime = RUNTIME_INF;
    2824             :         else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
    2825             :                 return -EINVAL;
    2826             : 
    2827             :         return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
    2828             : }
    2829             : 
    2830             : long sched_group_rt_runtime(struct task_group *tg)
    2831             : {
    2832             :         u64 rt_runtime_us;
    2833             : 
    2834             :         if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
    2835             :                 return -1;
    2836             : 
    2837             :         rt_runtime_us = tg->rt_bandwidth.rt_runtime;
    2838             :         do_div(rt_runtime_us, NSEC_PER_USEC);
    2839             :         return rt_runtime_us;
    2840             : }
    2841             : 
    2842             : int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
    2843             : {
    2844             :         u64 rt_runtime, rt_period;
    2845             : 
    2846             :         if (rt_period_us > U64_MAX / NSEC_PER_USEC)
    2847             :                 return -EINVAL;
    2848             : 
    2849             :         rt_period = rt_period_us * NSEC_PER_USEC;
    2850             :         rt_runtime = tg->rt_bandwidth.rt_runtime;
    2851             : 
    2852             :         return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
    2853             : }
    2854             : 
    2855             : long sched_group_rt_period(struct task_group *tg)
    2856             : {
    2857             :         u64 rt_period_us;
    2858             : 
    2859             :         rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
    2860             :         do_div(rt_period_us, NSEC_PER_USEC);
    2861             :         return rt_period_us;
    2862             : }
    2863             : 
    2864             : static int sched_rt_global_constraints(void)
    2865             : {
    2866             :         int ret = 0;
    2867             : 
    2868             :         mutex_lock(&rt_constraints_mutex);
    2869             :         ret = __rt_schedulable(NULL, 0, 0);
    2870             :         mutex_unlock(&rt_constraints_mutex);
    2871             : 
    2872             :         return ret;
    2873             : }
    2874             : 
    2875             : int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
    2876             : {
    2877             :         /* Don't accept realtime tasks when there is no way for them to run */
    2878             :         if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
    2879             :                 return 0;
    2880             : 
    2881             :         return 1;
    2882             : }
    2883             : 
    2884             : #else /* !CONFIG_RT_GROUP_SCHED */
    2885           0 : static int sched_rt_global_constraints(void)
    2886             : {
    2887             :         unsigned long flags;
    2888             :         int i;
    2889             : 
    2890           0 :         raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
    2891           0 :         for_each_possible_cpu(i) {
    2892           0 :                 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
    2893             : 
    2894           0 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
    2895           0 :                 rt_rq->rt_runtime = global_rt_runtime();
    2896           0 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
    2897             :         }
    2898           0 :         raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
    2899             : 
    2900           0 :         return 0;
    2901             : }
    2902             : #endif /* CONFIG_RT_GROUP_SCHED */
    2903             : 
    2904             : static int sched_rt_global_validate(void)
    2905             : {
    2906           0 :         if (sysctl_sched_rt_period <= 0)
    2907             :                 return -EINVAL;
    2908             : 
    2909           0 :         if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
    2910           0 :                 ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
    2911           0 :                  ((u64)sysctl_sched_rt_runtime *
    2912             :                         NSEC_PER_USEC > max_rt_runtime)))
    2913             :                 return -EINVAL;
    2914             : 
    2915             :         return 0;
    2916             : }
    2917             : 
    2918           0 : static void sched_rt_do_global(void)
    2919             : {
    2920             :         unsigned long flags;
    2921             : 
    2922           0 :         raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
    2923           0 :         def_rt_bandwidth.rt_runtime = global_rt_runtime();
    2924           0 :         def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
    2925           0 :         raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
    2926           0 : }
    2927             : 
    2928           0 : int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
    2929             :                 size_t *lenp, loff_t *ppos)
    2930             : {
    2931             :         int old_period, old_runtime;
    2932             :         static DEFINE_MUTEX(mutex);
    2933             :         int ret;
    2934             : 
    2935           0 :         mutex_lock(&mutex);
    2936           0 :         old_period = sysctl_sched_rt_period;
    2937           0 :         old_runtime = sysctl_sched_rt_runtime;
    2938             : 
    2939           0 :         ret = proc_dointvec(table, write, buffer, lenp, ppos);
    2940             : 
    2941           0 :         if (!ret && write) {
    2942           0 :                 ret = sched_rt_global_validate();
    2943           0 :                 if (ret)
    2944             :                         goto undo;
    2945             : 
    2946           0 :                 ret = sched_dl_global_validate();
    2947           0 :                 if (ret)
    2948             :                         goto undo;
    2949             : 
    2950           0 :                 ret = sched_rt_global_constraints();
    2951           0 :                 if (ret)
    2952             :                         goto undo;
    2953             : 
    2954           0 :                 sched_rt_do_global();
    2955           0 :                 sched_dl_do_global();
    2956             :         }
    2957             :         if (0) {
    2958             : undo:
    2959           0 :                 sysctl_sched_rt_period = old_period;
    2960           0 :                 sysctl_sched_rt_runtime = old_runtime;
    2961             :         }
    2962           0 :         mutex_unlock(&mutex);
    2963             : 
    2964           0 :         return ret;
    2965             : }
    2966             : 
    2967           0 : int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
    2968             :                 size_t *lenp, loff_t *ppos)
    2969             : {
    2970             :         int ret;
    2971             :         static DEFINE_MUTEX(mutex);
    2972             : 
    2973           0 :         mutex_lock(&mutex);
    2974           0 :         ret = proc_dointvec(table, write, buffer, lenp, ppos);
    2975             :         /*
    2976             :          * Make sure that internally we keep jiffies.
    2977             :          * Also, writing zero resets the timeslice to default:
    2978             :          */
    2979           0 :         if (!ret && write) {
    2980           0 :                 sched_rr_timeslice =
    2981           0 :                         sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
    2982           0 :                         msecs_to_jiffies(sysctl_sched_rr_timeslice);
    2983             :         }
    2984           0 :         mutex_unlock(&mutex);
    2985             : 
    2986           0 :         return ret;
    2987             : }
    2988             : 
    2989             : #ifdef CONFIG_SCHED_DEBUG
    2990           0 : void print_rt_stats(struct seq_file *m, int cpu)
    2991             : {
    2992             :         rt_rq_iter_t iter;
    2993             :         struct rt_rq *rt_rq;
    2994             : 
    2995             :         rcu_read_lock();
    2996           0 :         for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
    2997           0 :                 print_rt_rq(m, cpu, rt_rq);
    2998             :         rcu_read_unlock();
    2999           0 : }
    3000             : #endif /* CONFIG_SCHED_DEBUG */

Generated by: LCOV version 1.14