LCOV - coverage.info - kernel/sched/sched.h

LCOV - code coverage report

Current view:	top level - kernel/sched - sched.h (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	52	88	59.1 %
Date:	2022-12-09 01:23:36	Functions:	3	7	42.9 %

          Line data    Source code

       1             : /* SPDX-License-Identifier: GPL-2.0 */
       2             : /*
       3             :  * Scheduler internal types and methods:
       4             :  */
       5             : #ifndef _KERNEL_SCHED_SCHED_H
       6             : #define _KERNEL_SCHED_SCHED_H
       7             : 
       8             : #include <linux/sched/affinity.h>
       9             : #include <linux/sched/autogroup.h>
      10             : #include <linux/sched/cpufreq.h>
      11             : #include <linux/sched/deadline.h>
      12             : #include <linux/sched.h>
      13             : #include <linux/sched/loadavg.h>
      14             : #include <linux/sched/mm.h>
      15             : #include <linux/sched/rseq_api.h>
      16             : #include <linux/sched/signal.h>
      17             : #include <linux/sched/smt.h>
      18             : #include <linux/sched/stat.h>
      19             : #include <linux/sched/sysctl.h>
      20             : #include <linux/sched/task_flags.h>
      21             : #include <linux/sched/task.h>
      22             : #include <linux/sched/topology.h>
      23             : 
      24             : #include <linux/atomic.h>
      25             : #include <linux/bitmap.h>
      26             : #include <linux/bug.h>
      27             : #include <linux/capability.h>
      28             : #include <linux/cgroup_api.h>
      29             : #include <linux/cgroup.h>
      30             : #include <linux/cpufreq.h>
      31             : #include <linux/cpumask_api.h>
      32             : #include <linux/ctype.h>
      33             : #include <linux/file.h>
      34             : #include <linux/fs_api.h>
      35             : #include <linux/hrtimer_api.h>
      36             : #include <linux/interrupt.h>
      37             : #include <linux/irq_work.h>
      38             : #include <linux/jiffies.h>
      39             : #include <linux/kref_api.h>
      40             : #include <linux/kthread.h>
      41             : #include <linux/ktime_api.h>
      42             : #include <linux/lockdep_api.h>
      43             : #include <linux/lockdep.h>
      44             : #include <linux/minmax.h>
      45             : #include <linux/mm.h>
      46             : #include <linux/module.h>
      47             : #include <linux/mutex_api.h>
      48             : #include <linux/plist.h>
      49             : #include <linux/poll.h>
      50             : #include <linux/proc_fs.h>
      51             : #include <linux/profile.h>
      52             : #include <linux/psi.h>
      53             : #include <linux/rcupdate.h>
      54             : #include <linux/seq_file.h>
      55             : #include <linux/seqlock.h>
      56             : #include <linux/softirq.h>
      57             : #include <linux/spinlock_api.h>
      58             : #include <linux/static_key.h>
      59             : #include <linux/stop_machine.h>
      60             : #include <linux/syscalls_api.h>
      61             : #include <linux/syscalls.h>
      62             : #include <linux/tick.h>
      63             : #include <linux/topology.h>
      64             : #include <linux/types.h>
      65             : #include <linux/u64_stats_sync_api.h>
      66             : #include <linux/uaccess.h>
      67             : #include <linux/wait_api.h>
      68             : #include <linux/wait_bit.h>
      69             : #include <linux/workqueue_api.h>
      70             : 
      71             : #include <trace/events/power.h>
      72             : #include <trace/events/sched.h>
      73             : 
      74             : #include "../workqueue_internal.h"
      75             : 
      76             : #ifdef CONFIG_CGROUP_SCHED
      77             : #include <linux/cgroup.h>
      78             : #include <linux/psi.h>
      79             : #endif
      80             : 
      81             : #ifdef CONFIG_SCHED_DEBUG
      82             : # include <linux/static_key.h>
      83             : #endif
      84             : 
      85             : #ifdef CONFIG_PARAVIRT
      86             : # include <asm/paravirt.h>
      87             : # include <asm/paravirt_api_clock.h>
      88             : #endif
      89             : 
      90             : #include "cpupri.h"
      91             : #include "cpudeadline.h"
      92             : 
      93             : #ifdef CONFIG_SCHED_DEBUG
      94             : # define SCHED_WARN_ON(x)      WARN_ONCE(x, #x)
      95             : #else
      96             : # define SCHED_WARN_ON(x)      ({ (void)(x), 0; })
      97             : #endif
      98             : 
      99             : struct rq;
     100             : struct cpuidle_state;
     101             : 
     102             : /* task_struct::on_rq states: */
     103             : #define TASK_ON_RQ_QUEUED       1
     104             : #define TASK_ON_RQ_MIGRATING    2
     105             : 
     106             : extern __read_mostly int scheduler_running;
     107             : 
     108             : extern unsigned long calc_load_update;
     109             : extern atomic_long_t calc_load_tasks;
     110             : 
     111             : extern void calc_global_load_tick(struct rq *this_rq);
     112             : extern long calc_load_fold_active(struct rq *this_rq, long adjust);
     113             : 
     114             : extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
     115             : /*
     116             :  * Helpers for converting nanosecond timing to jiffy resolution
     117             :  */
     118             : #define NS_TO_JIFFIES(TIME)     ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
     119             : 
     120             : /*
     121             :  * Increase resolution of nice-level calculations for 64-bit architectures.
     122             :  * The extra resolution improves shares distribution and load balancing of
     123             :  * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
     124             :  * hierarchies, especially on larger systems. This is not a user-visible change
     125             :  * and does not change the user-interface for setting shares/weights.
     126             :  *
     127             :  * We increase resolution only if we have enough bits to allow this increased
     128             :  * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
     129             :  * are pretty high and the returns do not justify the increased costs.
     130             :  *
     131             :  * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
     132             :  * increase coverage and consistency always enable it on 64-bit platforms.
     133             :  */
     134             : #ifdef CONFIG_64BIT
     135             : # define NICE_0_LOAD_SHIFT      (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
     136             : # define scale_load(w)          ((w) << SCHED_FIXEDPOINT_SHIFT)
     137             : # define scale_load_down(w) \
     138             : ({ \
     139             :         unsigned long __w = (w); \
     140             :         if (__w) \
     141             :                 __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
     142             :         __w; \
     143             : })
     144             : #else
     145             : # define NICE_0_LOAD_SHIFT      (SCHED_FIXEDPOINT_SHIFT)
     146             : # define scale_load(w)          (w)
     147             : # define scale_load_down(w)     (w)
     148             : #endif
     149             : 
     150             : /*
     151             :  * Task weight (visible to users) and its load (invisible to users) have
     152             :  * independent resolution, but they should be well calibrated. We use
     153             :  * scale_load() and scale_load_down(w) to convert between them. The
     154             :  * following must be true:
     155             :  *
     156             :  *  scale_load(sched_prio_to_weight[NICE_TO_PRIO(0)-MAX_RT_PRIO]) == NICE_0_LOAD
     157             :  *
     158             :  */
     159             : #define NICE_0_LOAD             (1L << NICE_0_LOAD_SHIFT)
     160             : 
     161             : /*
     162             :  * Single value that decides SCHED_DEADLINE internal math precision.
     163             :  * 10 -> just above 1us
     164             :  * 9  -> just above 0.5us
     165             :  */
     166             : #define DL_SCALE                10
     167             : 
     168             : /*
     169             :  * Single value that denotes runtime == period, ie unlimited time.
     170             :  */
     171             : #define RUNTIME_INF             ((u64)~0ULL)
     172             : 
     173             : static inline int idle_policy(int policy)
     174             : {
     175        1754 :         return policy == SCHED_IDLE;
     176             : }
     177             : static inline int fair_policy(int policy)
     178             : {
     179         210 :         return policy == SCHED_NORMAL || policy == SCHED_BATCH;
     180             : }
     181             : 
     182             : static inline int rt_policy(int policy)
     183             : {
     184         113 :         return policy == SCHED_FIFO || policy == SCHED_RR;
     185             : }
     186             : 
     187             : static inline int dl_policy(int policy)
     188             : {
     189             :         return policy == SCHED_DEADLINE;
     190             : }
     191             : static inline bool valid_policy(int policy)
     192             : {
     193         210 :         return idle_policy(policy) || fair_policy(policy) ||
     194         105 :                 rt_policy(policy) || dl_policy(policy);
     195             : }
     196             : 
     197             : static inline int task_has_idle_policy(struct task_struct *p)
     198             : {
     199        3513 :         return idle_policy(p->policy);
     200             : }
     201             : 
     202             : static inline int task_has_rt_policy(struct task_struct *p)
     203             : {
     204           8 :         return rt_policy(p->policy);
     205             : }
     206             : 
     207             : static inline int task_has_dl_policy(struct task_struct *p)
     208             : {
     209           4 :         return dl_policy(p->policy);
     210             : }
     211             : 
     212             : #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
     213             : 
     214             : static inline void update_avg(u64 *avg, u64 sample)
     215             : {
     216             :         s64 diff = sample - *avg;
     217             :         *avg += diff / 8;
     218             : }
     219             : 
     220             : /*
     221             :  * Shifting a value by an exponent greater *or equal* to the size of said value
     222             :  * is UB; cap at size-1.
     223             :  */
     224             : #define shr_bound(val, shift)                                                   \
     225             :         (val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
     226             : 
     227             : /*
     228             :  * !! For sched_setattr_nocheck() (kernel) only !!
     229             :  *
     230             :  * This is actually gross. :(
     231             :  *
     232             :  * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
     233             :  * tasks, but still be able to sleep. We need this on platforms that cannot
     234             :  * atomically change clock frequency. Remove once fast switching will be
     235             :  * available on such platforms.
     236             :  *
     237             :  * SUGOV stands for SchedUtil GOVernor.
     238             :  */
     239             : #define SCHED_FLAG_SUGOV        0x10000000
     240             : 
     241             : #define SCHED_DL_FLAGS (SCHED_FLAG_RECLAIM | SCHED_FLAG_DL_OVERRUN | SCHED_FLAG_SUGOV)
     242             : 
     243             : static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se)
     244             : {
     245             : #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
     246             :         return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
     247             : #else
     248             :         return false;
     249             : #endif
     250             : }
     251             : 
     252             : /*
     253             :  * Tells if entity @a should preempt entity @b.
     254             :  */
     255             : static inline bool
     256             : dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
     257             : {
     258           0 :         return dl_entity_is_special(a) ||
     259           0 :                dl_time_before(a->deadline, b->deadline);
     260             : }
     261             : 
     262             : /*
     263             :  * This is the priority-queue data structure of the RT scheduling class:
     264             :  */
     265             : struct rt_prio_array {
     266             :         DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
     267             :         struct list_head queue[MAX_RT_PRIO];
     268             : };
     269             : 
     270             : struct rt_bandwidth {
     271             :         /* nests inside the rq lock: */
     272             :         raw_spinlock_t          rt_runtime_lock;
     273             :         ktime_t                 rt_period;
     274             :         u64                     rt_runtime;
     275             :         struct hrtimer          rt_period_timer;
     276             :         unsigned int            rt_period_active;
     277             : };
     278             : 
     279             : void __dl_clear_params(struct task_struct *p);
     280             : 
     281             : struct dl_bandwidth {
     282             :         raw_spinlock_t          dl_runtime_lock;
     283             :         u64                     dl_runtime;
     284             :         u64                     dl_period;
     285             : };
     286             : 
     287             : static inline int dl_bandwidth_enabled(void)
     288             : {
     289             :         return sysctl_sched_rt_runtime >= 0;
     290             : }
     291             : 
     292             : /*
     293             :  * To keep the bandwidth of -deadline tasks under control
     294             :  * we need some place where:
     295             :  *  - store the maximum -deadline bandwidth of each cpu;
     296             :  *  - cache the fraction of bandwidth that is currently allocated in
     297             :  *    each root domain;
     298             :  *
     299             :  * This is all done in the data structure below. It is similar to the
     300             :  * one used for RT-throttling (rt_bandwidth), with the main difference
     301             :  * that, since here we are only interested in admission control, we
     302             :  * do not decrease any runtime while the group "executes", neither we
     303             :  * need a timer to replenish it.
     304             :  *
     305             :  * With respect to SMP, bandwidth is given on a per root domain basis,
     306             :  * meaning that:
     307             :  *  - bw (< 100%) is the deadline bandwidth of each CPU;
     308             :  *  - total_bw is the currently allocated bandwidth in each root domain;
     309             :  */
     310             : struct dl_bw {
     311             :         raw_spinlock_t          lock;
     312             :         u64                     bw;
     313             :         u64                     total_bw;
     314             : };
     315             : 
     316             : /*
     317             :  * Verify the fitness of task @p to run on @cpu taking into account the
     318             :  * CPU original capacity and the runtime/deadline ratio of the task.
     319             :  *
     320             :  * The function will return true if the CPU original capacity of the
     321             :  * @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the
     322             :  * task and false otherwise.
     323             :  */
     324             : static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
     325             : {
     326             :         unsigned long cap = arch_scale_cpu_capacity(cpu);
     327             : 
     328             :         return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;
     329             : }
     330             : 
     331             : extern void init_dl_bw(struct dl_bw *dl_b);
     332             : extern int  sched_dl_global_validate(void);
     333             : extern void sched_dl_do_global(void);
     334             : extern int  sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr);
     335             : extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
     336             : extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
     337             : extern bool __checkparam_dl(const struct sched_attr *attr);
     338             : extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
     339             : extern int  dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
     340             : extern int  dl_cpu_busy(int cpu, struct task_struct *p);
     341             : 
     342             : #ifdef CONFIG_CGROUP_SCHED
     343             : 
     344             : struct cfs_rq;
     345             : struct rt_rq;
     346             : 
     347             : extern struct list_head task_groups;
     348             : 
     349             : struct cfs_bandwidth {
     350             : #ifdef CONFIG_CFS_BANDWIDTH
     351             :         raw_spinlock_t          lock;
     352             :         ktime_t                 period;
     353             :         u64                     quota;
     354             :         u64                     runtime;
     355             :         u64                     burst;
     356             :         u64                     runtime_snap;
     357             :         s64                     hierarchical_quota;
     358             : 
     359             :         u8                      idle;
     360             :         u8                      period_active;
     361             :         u8                      slack_started;
     362             :         struct hrtimer          period_timer;
     363             :         struct hrtimer          slack_timer;
     364             :         struct list_head        throttled_cfs_rq;
     365             : 
     366             :         /* Statistics: */
     367             :         int                     nr_periods;
     368             :         int                     nr_throttled;
     369             :         int                     nr_burst;
     370             :         u64                     throttled_time;
     371             :         u64                     burst_time;
     372             : #endif
     373             : };
     374             : 
     375             : /* Task group related information */
     376             : struct task_group {
     377             :         struct cgroup_subsys_state css;
     378             : 
     379             : #ifdef CONFIG_FAIR_GROUP_SCHED
     380             :         /* schedulable entities of this group on each CPU */
     381             :         struct sched_entity     **se;
     382             :         /* runqueue "owned" by this group on each CPU */
     383             :         struct cfs_rq           **cfs_rq;
     384             :         unsigned long           shares;
     385             : 
     386             :         /* A positive value indicates that this is a SCHED_IDLE group. */
     387             :         int                     idle;
     388             : 
     389             : #ifdef  CONFIG_SMP
     390             :         /*
     391             :          * load_avg can be heavily contended at clock tick time, so put
     392             :          * it in its own cacheline separated from the fields above which
     393             :          * will also be accessed at each tick.
     394             :          */
     395             :         atomic_long_t           load_avg ____cacheline_aligned;
     396             : #endif
     397             : #endif
     398             : 
     399             : #ifdef CONFIG_RT_GROUP_SCHED
     400             :         struct sched_rt_entity  **rt_se;
     401             :         struct rt_rq            **rt_rq;
     402             : 
     403             :         struct rt_bandwidth     rt_bandwidth;
     404             : #endif
     405             : 
     406             :         struct rcu_head         rcu;
     407             :         struct list_head        list;
     408             : 
     409             :         struct task_group       *parent;
     410             :         struct list_head        siblings;
     411             :         struct list_head        children;
     412             : 
     413             : #ifdef CONFIG_SCHED_AUTOGROUP
     414             :         struct autogroup        *autogroup;
     415             : #endif
     416             : 
     417             :         struct cfs_bandwidth    cfs_bandwidth;
     418             : 
     419             : #ifdef CONFIG_UCLAMP_TASK_GROUP
     420             :         /* The two decimal precision [%] value requested from user-space */
     421             :         unsigned int            uclamp_pct[UCLAMP_CNT];
     422             :         /* Clamp values requested for a task group */
     423             :         struct uclamp_se        uclamp_req[UCLAMP_CNT];
     424             :         /* Effective clamp values used for a task group */
     425             :         struct uclamp_se        uclamp[UCLAMP_CNT];
     426             : #endif
     427             : 
     428             : };
     429             : 
     430             : #ifdef CONFIG_FAIR_GROUP_SCHED
     431             : #define ROOT_TASK_GROUP_LOAD    NICE_0_LOAD
     432             : 
     433             : /*
     434             :  * A weight of 0 or 1 can cause arithmetics problems.
     435             :  * A weight of a cfs_rq is the sum of weights of which entities
     436             :  * are queued on this cfs_rq, so a weight of a entity should not be
     437             :  * too large, so as the shares value of a task group.
     438             :  * (The default weight is 1024 - so there's no practical
     439             :  *  limitation from this.)
     440             :  */
     441             : #define MIN_SHARES              (1UL <<  1)
     442             : #define MAX_SHARES              (1UL << 18)
     443             : #endif
     444             : 
     445             : typedef int (*tg_visitor)(struct task_group *, void *);
     446             : 
     447             : extern int walk_tg_tree_from(struct task_group *from,
     448             :                              tg_visitor down, tg_visitor up, void *data);
     449             : 
     450             : /*
     451             :  * Iterate the full tree, calling @down when first entering a node and @up when
     452             :  * leaving it for the final time.
     453             :  *
     454             :  * Caller must hold rcu_lock or sufficient equivalent.
     455             :  */
     456             : static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
     457             : {
     458             :         return walk_tg_tree_from(&root_task_group, down, up, data);
     459             : }
     460             : 
     461             : extern int tg_nop(struct task_group *tg, void *data);
     462             : 
     463             : extern void free_fair_sched_group(struct task_group *tg);
     464             : extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
     465             : extern void online_fair_sched_group(struct task_group *tg);
     466             : extern void unregister_fair_sched_group(struct task_group *tg);
     467             : extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
     468             :                         struct sched_entity *se, int cpu,
     469             :                         struct sched_entity *parent);
     470             : extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
     471             : 
     472             : extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
     473             : extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
     474             : extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
     475             : 
     476             : extern void unregister_rt_sched_group(struct task_group *tg);
     477             : extern void free_rt_sched_group(struct task_group *tg);
     478             : extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
     479             : extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
     480             :                 struct sched_rt_entity *rt_se, int cpu,
     481             :                 struct sched_rt_entity *parent);
     482             : extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
     483             : extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
     484             : extern long sched_group_rt_runtime(struct task_group *tg);
     485             : extern long sched_group_rt_period(struct task_group *tg);
     486             : extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
     487             : 
     488             : extern struct task_group *sched_create_group(struct task_group *parent);
     489             : extern void sched_online_group(struct task_group *tg,
     490             :                                struct task_group *parent);
     491             : extern void sched_destroy_group(struct task_group *tg);
     492             : extern void sched_release_group(struct task_group *tg);
     493             : 
     494             : extern void sched_move_task(struct task_struct *tsk);
     495             : 
     496             : #ifdef CONFIG_FAIR_GROUP_SCHED
     497             : extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
     498             : 
     499             : extern int sched_group_set_idle(struct task_group *tg, long idle);
     500             : 
     501             : #ifdef CONFIG_SMP
     502             : extern void set_task_rq_fair(struct sched_entity *se,
     503             :                              struct cfs_rq *prev, struct cfs_rq *next);
     504             : #else /* !CONFIG_SMP */
     505             : static inline void set_task_rq_fair(struct sched_entity *se,
     506             :                              struct cfs_rq *prev, struct cfs_rq *next) { }
     507             : #endif /* CONFIG_SMP */
     508             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     509             : 
     510             : #else /* CONFIG_CGROUP_SCHED */
     511             : 
     512             : struct cfs_bandwidth { };
     513             : 
     514             : #endif  /* CONFIG_CGROUP_SCHED */
     515             : 
     516             : /* CFS-related fields in a runqueue */
     517             : struct cfs_rq {
     518             :         struct load_weight      load;
     519             :         unsigned int            nr_running;
     520             :         unsigned int            h_nr_running;      /* SCHED_{NORMAL,BATCH,IDLE} */
     521             :         unsigned int            idle_nr_running;   /* SCHED_IDLE */
     522             :         unsigned int            idle_h_nr_running; /* SCHED_IDLE */
     523             : 
     524             :         u64                     exec_clock;
     525             :         u64                     min_vruntime;
     526             : #ifdef CONFIG_SCHED_CORE
     527             :         unsigned int            forceidle_seq;
     528             :         u64                     min_vruntime_fi;
     529             : #endif
     530             : 
     531             : #ifndef CONFIG_64BIT
     532             :         u64                     min_vruntime_copy;
     533             : #endif
     534             : 
     535             :         struct rb_root_cached   tasks_timeline;
     536             : 
     537             :         /*
     538             :          * 'curr' points to currently running entity on this cfs_rq.
     539             :          * It is set to NULL otherwise (i.e when none are currently running).
     540             :          */
     541             :         struct sched_entity     *curr;
     542             :         struct sched_entity     *next;
     543             :         struct sched_entity     *last;
     544             :         struct sched_entity     *skip;
     545             : 
     546             : #ifdef  CONFIG_SCHED_DEBUG
     547             :         unsigned int            nr_spread_over;
     548             : #endif
     549             : 
     550             : #ifdef CONFIG_SMP
     551             :         /*
     552             :          * CFS load tracking
     553             :          */
     554             :         struct sched_avg        avg;
     555             : #ifndef CONFIG_64BIT
     556             :         u64                     load_last_update_time_copy;
     557             : #endif
     558             :         struct {
     559             :                 raw_spinlock_t  lock ____cacheline_aligned;
     560             :                 int             nr;
     561             :                 unsigned long   load_avg;
     562             :                 unsigned long   util_avg;
     563             :                 unsigned long   runnable_avg;
     564             :         } removed;
     565             : 
     566             : #ifdef CONFIG_FAIR_GROUP_SCHED
     567             :         unsigned long           tg_load_avg_contrib;
     568             :         long                    propagate;
     569             :         long                    prop_runnable_sum;
     570             : 
     571             :         /*
     572             :          *   h_load = weight * f(tg)
     573             :          *
     574             :          * Where f(tg) is the recursive weight fraction assigned to
     575             :          * this group.
     576             :          */
     577             :         unsigned long           h_load;
     578             :         u64                     last_h_load_update;
     579             :         struct sched_entity     *h_load_next;
     580             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     581             : #endif /* CONFIG_SMP */
     582             : 
     583             : #ifdef CONFIG_FAIR_GROUP_SCHED
     584             :         struct rq               *rq;    /* CPU runqueue to which this cfs_rq is attached */
     585             : 
     586             :         /*
     587             :          * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
     588             :          * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
     589             :          * (like users, containers etc.)
     590             :          *
     591             :          * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
     592             :          * This list is used during load balance.
     593             :          */
     594             :         int                     on_list;
     595             :         struct list_head        leaf_cfs_rq_list;
     596             :         struct task_group       *tg;    /* group that "owns" this runqueue */
     597             : 
     598             :         /* Locally cached copy of our task_group's idle value */
     599             :         int                     idle;
     600             : 
     601             : #ifdef CONFIG_CFS_BANDWIDTH
     602             :         int                     runtime_enabled;
     603             :         s64                     runtime_remaining;
     604             : 
     605             :         u64                     throttled_clock;
     606             :         u64                     throttled_clock_task;
     607             :         u64                     throttled_clock_task_time;
     608             :         int                     throttled;
     609             :         int                     throttle_count;
     610             :         struct list_head        throttled_list;
     611             : #endif /* CONFIG_CFS_BANDWIDTH */
     612             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     613             : };
     614             : 
     615             : static inline int rt_bandwidth_enabled(void)
     616             : {
     617           0 :         return sysctl_sched_rt_runtime >= 0;
     618             : }
     619             : 
     620             : /* RT IPI pull logic requires IRQ_WORK */
     621             : #if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
     622             : # define HAVE_RT_PUSH_IPI
     623             : #endif
     624             : 
     625             : /* Real-Time classes' related field in a runqueue: */
     626             : struct rt_rq {
     627             :         struct rt_prio_array    active;
     628             :         unsigned int            rt_nr_running;
     629             :         unsigned int            rr_nr_running;
     630             : #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
     631             :         struct {
     632             :                 int             curr; /* highest queued rt task prio */
     633             : #ifdef CONFIG_SMP
     634             :                 int             next; /* next highest */
     635             : #endif
     636             :         } highest_prio;
     637             : #endif
     638             : #ifdef CONFIG_SMP
     639             :         unsigned int            rt_nr_migratory;
     640             :         unsigned int            rt_nr_total;
     641             :         int                     overloaded;
     642             :         struct plist_head       pushable_tasks;
     643             : 
     644             : #endif /* CONFIG_SMP */
     645             :         int                     rt_queued;
     646             : 
     647             :         int                     rt_throttled;
     648             :         u64                     rt_time;
     649             :         u64                     rt_runtime;
     650             :         /* Nests inside the rq lock: */
     651             :         raw_spinlock_t          rt_runtime_lock;
     652             : 
     653             : #ifdef CONFIG_RT_GROUP_SCHED
     654             :         unsigned int            rt_nr_boosted;
     655             : 
     656             :         struct rq               *rq;
     657             :         struct task_group       *tg;
     658             : #endif
     659             : };
     660             : 
     661             : static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
     662             : {
     663             :         return rt_rq->rt_queued && rt_rq->rt_nr_running;
     664             : }
     665             : 
     666             : /* Deadline class' related fields in a runqueue */
     667             : struct dl_rq {
     668             :         /* runqueue is an rbtree, ordered by deadline */
     669             :         struct rb_root_cached   root;
     670             : 
     671             :         unsigned int            dl_nr_running;
     672             : 
     673             : #ifdef CONFIG_SMP
     674             :         /*
     675             :          * Deadline values of the currently executing and the
     676             :          * earliest ready task on this rq. Caching these facilitates
     677             :          * the decision whether or not a ready but not running task
     678             :          * should migrate somewhere else.
     679             :          */
     680             :         struct {
     681             :                 u64             curr;
     682             :                 u64             next;
     683             :         } earliest_dl;
     684             : 
     685             :         unsigned int            dl_nr_migratory;
     686             :         int                     overloaded;
     687             : 
     688             :         /*
     689             :          * Tasks on this rq that can be pushed away. They are kept in
     690             :          * an rb-tree, ordered by tasks' deadlines, with caching
     691             :          * of the leftmost (earliest deadline) element.
     692             :          */
     693             :         struct rb_root_cached   pushable_dl_tasks_root;
     694             : #else
     695             :         struct dl_bw            dl_bw;
     696             : #endif
     697             :         /*
     698             :          * "Active utilization" for this runqueue: increased when a
     699             :          * task wakes up (becomes TASK_RUNNING) and decreased when a
     700             :          * task blocks
     701             :          */
     702             :         u64                     running_bw;
     703             : 
     704             :         /*
     705             :          * Utilization of the tasks "assigned" to this runqueue (including
     706             :          * the tasks that are in runqueue and the tasks that executed on this
     707             :          * CPU and blocked). Increased when a task moves to this runqueue, and
     708             :          * decreased when the task moves away (migrates, changes scheduling
     709             :          * policy, or terminates).
     710             :          * This is needed to compute the "inactive utilization" for the
     711             :          * runqueue (inactive utilization = this_bw - running_bw).
     712             :          */
     713             :         u64                     this_bw;
     714             :         u64                     extra_bw;
     715             : 
     716             :         /*
     717             :          * Inverse of the fraction of CPU utilization that can be reclaimed
     718             :          * by the GRUB algorithm.
     719             :          */
     720             :         u64                     bw_ratio;
     721             : };
     722             : 
     723             : #ifdef CONFIG_FAIR_GROUP_SCHED
     724             : /* An entity is a task if it doesn't "own" a runqueue */
     725             : #define entity_is_task(se)      (!se->my_q)
     726             : 
     727             : static inline void se_update_runnable(struct sched_entity *se)
     728             : {
     729             :         if (!entity_is_task(se))
     730             :                 se->runnable_weight = se->my_q->h_nr_running;
     731             : }
     732             : 
     733             : static inline long se_runnable(struct sched_entity *se)
     734             : {
     735             :         if (entity_is_task(se))
     736             :                 return !!se->on_rq;
     737             :         else
     738             :                 return se->runnable_weight;
     739             : }
     740             : 
     741             : #else
     742             : #define entity_is_task(se)      1
     743             : 
     744             : static inline void se_update_runnable(struct sched_entity *se) {}
     745             : 
     746             : static inline long se_runnable(struct sched_entity *se)
     747             : {
     748             :         return !!se->on_rq;
     749             : }
     750             : #endif
     751             : 
     752             : #ifdef CONFIG_SMP
     753             : /*
     754             :  * XXX we want to get rid of these helpers and use the full load resolution.
     755             :  */
     756             : static inline long se_weight(struct sched_entity *se)
     757             : {
     758             :         return scale_load_down(se->load.weight);
     759             : }
     760             : 
     761             : 
     762             : static inline bool sched_asym_prefer(int a, int b)
     763             : {
     764             :         return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
     765             : }
     766             : 
     767             : struct perf_domain {
     768             :         struct em_perf_domain *em_pd;
     769             :         struct perf_domain *next;
     770             :         struct rcu_head rcu;
     771             : };
     772             : 
     773             : /* Scheduling group status flags */
     774             : #define SG_OVERLOAD             0x1 /* More than one runnable task on a CPU. */
     775             : #define SG_OVERUTILIZED         0x2 /* One or more CPUs are over-utilized. */
     776             : 
     777             : /*
     778             :  * We add the notion of a root-domain which will be used to define per-domain
     779             :  * variables. Each exclusive cpuset essentially defines an island domain by
     780             :  * fully partitioning the member CPUs from any other cpuset. Whenever a new
     781             :  * exclusive cpuset is created, we also create and attach a new root-domain
     782             :  * object.
     783             :  *
     784             :  */
     785             : struct root_domain {
     786             :         atomic_t                refcount;
     787             :         atomic_t                rto_count;
     788             :         struct rcu_head         rcu;
     789             :         cpumask_var_t           span;
     790             :         cpumask_var_t           online;
     791             : 
     792             :         /*
     793             :          * Indicate pullable load on at least one CPU, e.g:
     794             :          * - More than one runnable task
     795             :          * - Running task is misfit
     796             :          */
     797             :         int                     overload;
     798             : 
     799             :         /* Indicate one or more cpus over-utilized (tipping point) */
     800             :         int                     overutilized;
     801             : 
     802             :         /*
     803             :          * The bit corresponding to a CPU gets set here if such CPU has more
     804             :          * than one runnable -deadline task (as it is below for RT tasks).
     805             :          */
     806             :         cpumask_var_t           dlo_mask;
     807             :         atomic_t                dlo_count;
     808             :         struct dl_bw            dl_bw;
     809             :         struct cpudl            cpudl;
     810             : 
     811             :         /*
     812             :          * Indicate whether a root_domain's dl_bw has been checked or
     813             :          * updated. It's monotonously increasing value.
     814             :          *
     815             :          * Also, some corner cases, like 'wrap around' is dangerous, but given
     816             :          * that u64 is 'big enough'. So that shouldn't be a concern.
     817             :          */
     818             :         u64 visit_gen;
     819             : 
     820             : #ifdef HAVE_RT_PUSH_IPI
     821             :         /*
     822             :          * For IPI pull requests, loop across the rto_mask.
     823             :          */
     824             :         struct irq_work         rto_push_work;
     825             :         raw_spinlock_t          rto_lock;
     826             :         /* These are only updated and read within rto_lock */
     827             :         int                     rto_loop;
     828             :         int                     rto_cpu;
     829             :         /* These atomics are updated outside of a lock */
     830             :         atomic_t                rto_loop_next;
     831             :         atomic_t                rto_loop_start;
     832             : #endif
     833             :         /*
     834             :          * The "RT overload" flag: it gets set if a CPU has more than
     835             :          * one runnable RT task.
     836             :          */
     837             :         cpumask_var_t           rto_mask;
     838             :         struct cpupri           cpupri;
     839             : 
     840             :         unsigned long           max_cpu_capacity;
     841             : 
     842             :         /*
     843             :          * NULL-terminated list of performance domains intersecting with the
     844             :          * CPUs of the rd. Protected by RCU.
     845             :          */
     846             :         struct perf_domain __rcu *pd;
     847             : };
     848             : 
     849             : extern void init_defrootdomain(void);
     850             : extern int sched_init_domains(const struct cpumask *cpu_map);
     851             : extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
     852             : extern void sched_get_rd(struct root_domain *rd);
     853             : extern void sched_put_rd(struct root_domain *rd);
     854             : 
     855             : #ifdef HAVE_RT_PUSH_IPI
     856             : extern void rto_push_irq_work_func(struct irq_work *work);
     857             : #endif
     858             : #endif /* CONFIG_SMP */
     859             : 
     860             : #ifdef CONFIG_UCLAMP_TASK
     861             : /*
     862             :  * struct uclamp_bucket - Utilization clamp bucket
     863             :  * @value: utilization clamp value for tasks on this clamp bucket
     864             :  * @tasks: number of RUNNABLE tasks on this clamp bucket
     865             :  *
     866             :  * Keep track of how many tasks are RUNNABLE for a given utilization
     867             :  * clamp value.
     868             :  */
     869             : struct uclamp_bucket {
     870             :         unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
     871             :         unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
     872             : };
     873             : 
     874             : /*
     875             :  * struct uclamp_rq - rq's utilization clamp
     876             :  * @value: currently active clamp values for a rq
     877             :  * @bucket: utilization clamp buckets affecting a rq
     878             :  *
     879             :  * Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
     880             :  * A clamp value is affecting a rq when there is at least one task RUNNABLE
     881             :  * (or actually running) with that value.
     882             :  *
     883             :  * There are up to UCLAMP_CNT possible different clamp values, currently there
     884             :  * are only two: minimum utilization and maximum utilization.
     885             :  *
     886             :  * All utilization clamping values are MAX aggregated, since:
     887             :  * - for util_min: we want to run the CPU at least at the max of the minimum
     888             :  *   utilization required by its currently RUNNABLE tasks.
     889             :  * - for util_max: we want to allow the CPU to run up to the max of the
     890             :  *   maximum utilization allowed by its currently RUNNABLE tasks.
     891             :  *
     892             :  * Since on each system we expect only a limited number of different
     893             :  * utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
     894             :  * the metrics required to compute all the per-rq utilization clamp values.
     895             :  */
     896             : struct uclamp_rq {
     897             :         unsigned int value;
     898             :         struct uclamp_bucket bucket[UCLAMP_BUCKETS];
     899             : };
     900             : 
     901             : DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
     902             : #endif /* CONFIG_UCLAMP_TASK */
     903             : 
     904             : /*
     905             :  * This is the main, per-CPU runqueue data structure.
     906             :  *
     907             :  * Locking rule: those places that want to lock multiple runqueues
     908             :  * (such as the load balancing or the thread migration code), lock
     909             :  * acquire operations must be ordered by ascending &runqueue.
     910             :  */
     911             : struct rq {
     912             :         /* runqueue lock: */
     913             :         raw_spinlock_t          __lock;
     914             : 
     915             :         /*
     916             :          * nr_running and cpu_load should be in the same cacheline because
     917             :          * remote CPUs use both these fields when doing load calculation.
     918             :          */
     919             :         unsigned int            nr_running;
     920             : #ifdef CONFIG_NUMA_BALANCING
     921             :         unsigned int            nr_numa_running;
     922             :         unsigned int            nr_preferred_running;
     923             :         unsigned int            numa_migrate_on;
     924             : #endif
     925             : #ifdef CONFIG_NO_HZ_COMMON
     926             : #ifdef CONFIG_SMP
     927             :         unsigned long           last_blocked_load_update_tick;
     928             :         unsigned int            has_blocked_load;
     929             :         call_single_data_t      nohz_csd;
     930             : #endif /* CONFIG_SMP */
     931             :         unsigned int            nohz_tick_stopped;
     932             :         atomic_t                nohz_flags;
     933             : #endif /* CONFIG_NO_HZ_COMMON */
     934             : 
     935             : #ifdef CONFIG_SMP
     936             :         unsigned int            ttwu_pending;
     937             : #endif
     938             :         u64                     nr_switches;
     939             : 
     940             : #ifdef CONFIG_UCLAMP_TASK
     941             :         /* Utilization clamp values based on CPU's RUNNABLE tasks */
     942             :         struct uclamp_rq        uclamp[UCLAMP_CNT] ____cacheline_aligned;
     943             :         unsigned int            uclamp_flags;
     944             : #define UCLAMP_FLAG_IDLE 0x01
     945             : #endif
     946             : 
     947             :         struct cfs_rq           cfs;
     948             :         struct rt_rq            rt;
     949             :         struct dl_rq            dl;
     950             : 
     951             : #ifdef CONFIG_FAIR_GROUP_SCHED
     952             :         /* list of leaf cfs_rq on this CPU: */
     953             :         struct list_head        leaf_cfs_rq_list;
     954             :         struct list_head        *tmp_alone_branch;
     955             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     956             : 
     957             :         /*
     958             :          * This is part of a global counter where only the total sum
     959             :          * over all CPUs matters. A task can increase this counter on
     960             :          * one CPU and if it got migrated afterwards it may decrease
     961             :          * it on another CPU. Always updated under the runqueue lock:
     962             :          */
     963             :         unsigned int            nr_uninterruptible;
     964             : 
     965             :         struct task_struct __rcu        *curr;
     966             :         struct task_struct      *idle;
     967             :         struct task_struct      *stop;
     968             :         unsigned long           next_balance;
     969             :         struct mm_struct        *prev_mm;
     970             : 
     971             :         unsigned int            clock_update_flags;
     972             :         u64                     clock;
     973             :         /* Ensure that all clocks are in the same cache line */
     974             :         u64                     clock_task ____cacheline_aligned;
     975             :         u64                     clock_pelt;
     976             :         unsigned long           lost_idle_time;
     977             : 
     978             :         atomic_t                nr_iowait;
     979             : 
     980             : #ifdef CONFIG_SCHED_DEBUG
     981             :         u64 last_seen_need_resched_ns;
     982             :         int ticks_without_resched;
     983             : #endif
     984             : 
     985             : #ifdef CONFIG_MEMBARRIER
     986             :         int membarrier_state;
     987             : #endif
     988             : 
     989             : #ifdef CONFIG_SMP
     990             :         struct root_domain              *rd;
     991             :         struct sched_domain __rcu       *sd;
     992             : 
     993             :         unsigned long           cpu_capacity;
     994             :         unsigned long           cpu_capacity_orig;
     995             : 
     996             :         struct callback_head    *balance_callback;
     997             : 
     998             :         unsigned char           nohz_idle_balance;
     999             :         unsigned char           idle_balance;
    1000             : 
    1001             :         unsigned long           misfit_task_load;
    1002             : 
    1003             :         /* For active balancing */
    1004             :         int                     active_balance;
    1005             :         int                     push_cpu;
    1006             :         struct cpu_stop_work    active_balance_work;
    1007             : 
    1008             :         /* CPU of this runqueue: */
    1009             :         int                     cpu;
    1010             :         int                     online;
    1011             : 
    1012             :         struct list_head cfs_tasks;
    1013             : 
    1014             :         struct sched_avg        avg_rt;
    1015             :         struct sched_avg        avg_dl;
    1016             : #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
    1017             :         struct sched_avg        avg_irq;
    1018             : #endif
    1019             : #ifdef CONFIG_SCHED_THERMAL_PRESSURE
    1020             :         struct sched_avg        avg_thermal;
    1021             : #endif
    1022             :         u64                     idle_stamp;
    1023             :         u64                     avg_idle;
    1024             : 
    1025             :         unsigned long           wake_stamp;
    1026             :         u64                     wake_avg_idle;
    1027             : 
    1028             :         /* This is used to determine avg_idle's max value */
    1029             :         u64                     max_idle_balance_cost;
    1030             : 
    1031             : #ifdef CONFIG_HOTPLUG_CPU
    1032             :         struct rcuwait          hotplug_wait;
    1033             : #endif
    1034             : #endif /* CONFIG_SMP */
    1035             : 
    1036             : #ifdef CONFIG_IRQ_TIME_ACCOUNTING
    1037             :         u64                     prev_irq_time;
    1038             : #endif
    1039             : #ifdef CONFIG_PARAVIRT
    1040             :         u64                     prev_steal_time;
    1041             : #endif
    1042             : #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
    1043             :         u64                     prev_steal_time_rq;
    1044             : #endif
    1045             : 
    1046             :         /* calc_load related fields */
    1047             :         unsigned long           calc_load_update;
    1048             :         long                    calc_load_active;
    1049             : 
    1050             : #ifdef CONFIG_SCHED_HRTICK
    1051             : #ifdef CONFIG_SMP
    1052             :         call_single_data_t      hrtick_csd;
    1053             : #endif
    1054             :         struct hrtimer          hrtick_timer;
    1055             :         ktime_t                 hrtick_time;
    1056             : #endif
    1057             : 
    1058             : #ifdef CONFIG_SCHEDSTATS
    1059             :         /* latency stats */
    1060             :         struct sched_info       rq_sched_info;
    1061             :         unsigned long long      rq_cpu_time;
    1062             :         /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
    1063             : 
    1064             :         /* sys_sched_yield() stats */
    1065             :         unsigned int            yld_count;
    1066             : 
    1067             :         /* schedule() stats */
    1068             :         unsigned int            sched_count;
    1069             :         unsigned int            sched_goidle;
    1070             : 
    1071             :         /* try_to_wake_up() stats */
    1072             :         unsigned int            ttwu_count;
    1073             :         unsigned int            ttwu_local;
    1074             : #endif
    1075             : 
    1076             : #ifdef CONFIG_CPU_IDLE
    1077             :         /* Must be inspected within a rcu lock section */
    1078             :         struct cpuidle_state    *idle_state;
    1079             : #endif
    1080             : 
    1081             : #ifdef CONFIG_SMP
    1082             :         unsigned int            nr_pinned;
    1083             : #endif
    1084             :         unsigned int            push_busy;
    1085             :         struct cpu_stop_work    push_work;
    1086             : 
    1087             : #ifdef CONFIG_SCHED_CORE
    1088             :         /* per rq */
    1089             :         struct rq               *core;
    1090             :         struct task_struct      *core_pick;
    1091             :         unsigned int            core_enabled;
    1092             :         unsigned int            core_sched_seq;
    1093             :         struct rb_root          core_tree;
    1094             : 
    1095             :         /* shared state -- careful with sched_core_cpu_deactivate() */
    1096             :         unsigned int            core_task_seq;
    1097             :         unsigned int            core_pick_seq;
    1098             :         unsigned long           core_cookie;
    1099             :         unsigned int            core_forceidle_count;
    1100             :         unsigned int            core_forceidle_seq;
    1101             :         unsigned int            core_forceidle_occupation;
    1102             :         u64                     core_forceidle_start;
    1103             : #endif
    1104             : };
    1105             : 
    1106             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1107             : 
    1108             : /* CPU runqueue to which this cfs_rq is attached */
    1109             : static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
    1110             : {
    1111             :         return cfs_rq->rq;
    1112             : }
    1113             : 
    1114             : #else
    1115             : 
    1116             : static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
    1117             : {
    1118        3837 :         return container_of(cfs_rq, struct rq, cfs);
    1119             : }
    1120             : #endif
    1121             : 
    1122             : static inline int cpu_of(struct rq *rq)
    1123             : {
    1124             : #ifdef CONFIG_SMP
    1125             :         return rq->cpu;
    1126             : #else
    1127             :         return 0;
    1128             : #endif
    1129             : }
    1130             : 
    1131             : #define MDF_PUSH        0x01
    1132             : 
    1133             : static inline bool is_migration_disabled(struct task_struct *p)
    1134             : {
    1135             : #ifdef CONFIG_SMP
    1136             :         return p->migration_disabled;
    1137             : #else
    1138             :         return false;
    1139             : #endif
    1140             : }
    1141             : 
    1142             : struct sched_group;
    1143             : #ifdef CONFIG_SCHED_CORE
    1144             : static inline struct cpumask *sched_group_span(struct sched_group *sg);
    1145             : 
    1146             : DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
    1147             : 
    1148             : static inline bool sched_core_enabled(struct rq *rq)
    1149             : {
    1150             :         return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled;
    1151             : }
    1152             : 
    1153             : static inline bool sched_core_disabled(void)
    1154             : {
    1155             :         return !static_branch_unlikely(&__sched_core_enabled);
    1156             : }
    1157             : 
    1158             : /*
    1159             :  * Be careful with this function; not for general use. The return value isn't
    1160             :  * stable unless you actually hold a relevant rq->__lock.
    1161             :  */
    1162             : static inline raw_spinlock_t *rq_lockp(struct rq *rq)
    1163             : {
    1164             :         if (sched_core_enabled(rq))
    1165             :                 return &rq->core->__lock;
    1166             : 
    1167             :         return &rq->__lock;
    1168             : }
    1169             : 
    1170             : static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
    1171             : {
    1172             :         if (rq->core_enabled)
    1173             :                 return &rq->core->__lock;
    1174             : 
    1175             :         return &rq->__lock;
    1176             : }
    1177             : 
    1178             : bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi);
    1179             : 
    1180             : /*
    1181             :  * Helpers to check if the CPU's core cookie matches with the task's cookie
    1182             :  * when core scheduling is enabled.
    1183             :  * A special case is that the task's cookie always matches with CPU's core
    1184             :  * cookie if the CPU is in an idle core.
    1185             :  */
    1186             : static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
    1187             : {
    1188             :         /* Ignore cookie match if core scheduler is not enabled on the CPU. */
    1189             :         if (!sched_core_enabled(rq))
    1190             :                 return true;
    1191             : 
    1192             :         return rq->core->core_cookie == p->core_cookie;
    1193             : }
    1194             : 
    1195             : static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
    1196             : {
    1197             :         bool idle_core = true;
    1198             :         int cpu;
    1199             : 
    1200             :         /* Ignore cookie match if core scheduler is not enabled on the CPU. */
    1201             :         if (!sched_core_enabled(rq))
    1202             :                 return true;
    1203             : 
    1204             :         for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
    1205             :                 if (!available_idle_cpu(cpu)) {
    1206             :                         idle_core = false;
    1207             :                         break;
    1208             :                 }
    1209             :         }
    1210             : 
    1211             :         /*
    1212             :          * A CPU in an idle core is always the best choice for tasks with
    1213             :          * cookies.
    1214             :          */
    1215             :         return idle_core || rq->core->core_cookie == p->core_cookie;
    1216             : }
    1217             : 
    1218             : static inline bool sched_group_cookie_match(struct rq *rq,
    1219             :                                             struct task_struct *p,
    1220             :                                             struct sched_group *group)
    1221             : {
    1222             :         int cpu;
    1223             : 
    1224             :         /* Ignore cookie match if core scheduler is not enabled on the CPU. */
    1225             :         if (!sched_core_enabled(rq))
    1226             :                 return true;
    1227             : 
    1228             :         for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
    1229             :                 if (sched_core_cookie_match(rq, p))
    1230             :                         return true;
    1231             :         }
    1232             :         return false;
    1233             : }
    1234             : 
    1235             : static inline bool sched_core_enqueued(struct task_struct *p)
    1236             : {
    1237             :         return !RB_EMPTY_NODE(&p->core_node);
    1238             : }
    1239             : 
    1240             : extern void sched_core_enqueue(struct rq *rq, struct task_struct *p);
    1241             : extern void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags);
    1242             : 
    1243             : extern void sched_core_get(void);
    1244             : extern void sched_core_put(void);
    1245             : 
    1246             : #else /* !CONFIG_SCHED_CORE */
    1247             : 
    1248             : static inline bool sched_core_enabled(struct rq *rq)
    1249             : {
    1250             :         return false;
    1251             : }
    1252             : 
    1253             : static inline bool sched_core_disabled(void)
    1254             : {
    1255             :         return true;
    1256             : }
    1257             : 
    1258             : static inline raw_spinlock_t *rq_lockp(struct rq *rq)
    1259             : {
    1260             :         return &rq->__lock;
    1261             : }
    1262             : 
    1263             : static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
    1264             : {
    1265             :         return &rq->__lock;
    1266             : }
    1267             : 
    1268             : static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
    1269             : {
    1270             :         return true;
    1271             : }
    1272             : 
    1273             : static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
    1274             : {
    1275             :         return true;
    1276             : }
    1277             : 
    1278             : static inline bool sched_group_cookie_match(struct rq *rq,
    1279             :                                             struct task_struct *p,
    1280             :                                             struct sched_group *group)
    1281             : {
    1282             :         return true;
    1283             : }
    1284             : #endif /* CONFIG_SCHED_CORE */
    1285             : 
    1286             : static inline void lockdep_assert_rq_held(struct rq *rq)
    1287             : {
    1288        4977 :         lockdep_assert_held(__rq_lockp(rq));
    1289             : }
    1290             : 
    1291             : extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass);
    1292             : extern bool raw_spin_rq_trylock(struct rq *rq);
    1293             : extern void raw_spin_rq_unlock(struct rq *rq);
    1294             : 
    1295             : static inline void raw_spin_rq_lock(struct rq *rq)
    1296             : {
    1297        1465 :         raw_spin_rq_lock_nested(rq, 0);
    1298             : }
    1299             : 
    1300             : static inline void raw_spin_rq_lock_irq(struct rq *rq)
    1301             : {
    1302           0 :         local_irq_disable();
    1303           0 :         raw_spin_rq_lock(rq);
    1304             : }
    1305             : 
    1306             : static inline void raw_spin_rq_unlock_irq(struct rq *rq)
    1307             : {
    1308         618 :         raw_spin_rq_unlock(rq);
    1309             :         local_irq_enable();
    1310             : }
    1311             : 
    1312             : static inline unsigned long _raw_spin_rq_lock_irqsave(struct rq *rq)
    1313             : {
    1314             :         unsigned long flags;
    1315           0 :         local_irq_save(flags);
    1316           0 :         raw_spin_rq_lock(rq);
    1317             :         return flags;
    1318             : }
    1319             : 
    1320             : static inline void raw_spin_rq_unlock_irqrestore(struct rq *rq, unsigned long flags)
    1321             : {
    1322           0 :         raw_spin_rq_unlock(rq);
    1323           0 :         local_irq_restore(flags);
    1324             : }
    1325             : 
    1326             : #define raw_spin_rq_lock_irqsave(rq, flags)     \
    1327             : do {                                            \
    1328             :         flags = _raw_spin_rq_lock_irqsave(rq);  \
    1329             : } while (0)
    1330             : 
    1331             : #ifdef CONFIG_SCHED_SMT
    1332             : extern void __update_idle_core(struct rq *rq);
    1333             : 
    1334             : static inline void update_idle_core(struct rq *rq)
    1335             : {
    1336             :         if (static_branch_unlikely(&sched_smt_present))
    1337             :                 __update_idle_core(rq);
    1338             : }
    1339             : 
    1340             : #else
    1341             : static inline void update_idle_core(struct rq *rq) { }
    1342             : #endif
    1343             : 
    1344             : DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
    1345             : 
    1346             : #define cpu_rq(cpu)             (&per_cpu(runqueues, (cpu)))
    1347             : #define this_rq()               this_cpu_ptr(&runqueues)
    1348             : #define task_rq(p)              cpu_rq(task_cpu(p))
    1349             : #define cpu_curr(cpu)           (cpu_rq(cpu)->curr)
    1350             : #define raw_rq()                raw_cpu_ptr(&runqueues)
    1351             : 
    1352             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1353             : static inline struct task_struct *task_of(struct sched_entity *se)
    1354             : {
    1355             :         SCHED_WARN_ON(!entity_is_task(se));
    1356             :         return container_of(se, struct task_struct, se);
    1357             : }
    1358             : 
    1359             : static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
    1360             : {
    1361             :         return p->se.cfs_rq;
    1362             : }
    1363             : 
    1364             : /* runqueue on which this entity is (to be) queued */
    1365             : static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
    1366             : {
    1367             :         return se->cfs_rq;
    1368             : }
    1369             : 
    1370             : /* runqueue "owned" by this group */
    1371             : static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
    1372             : {
    1373             :         return grp->my_q;
    1374             : }
    1375             : 
    1376             : #else
    1377             : 
    1378             : static inline struct task_struct *task_of(struct sched_entity *se)
    1379             : {
    1380        4127 :         return container_of(se, struct task_struct, se);
    1381             : }
    1382             : 
    1383             : static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
    1384             : {
    1385         721 :         return &task_rq(p)->cfs;
    1386             : }
    1387             : 
    1388             : static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
    1389             : {
    1390        3507 :         struct task_struct *p = task_of(se);
    1391        3507 :         struct rq *rq = task_rq(p);
    1392             : 
    1393             :         return &rq->cfs;
    1394             : }
    1395             : 
    1396             : /* runqueue "owned" by this group */
    1397             : static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
    1398             : {
    1399             :         return NULL;
    1400             : }
    1401             : #endif
    1402             : 
    1403             : extern void update_rq_clock(struct rq *rq);
    1404             : 
    1405             : /*
    1406             :  * rq::clock_update_flags bits
    1407             :  *
    1408             :  * %RQCF_REQ_SKIP - will request skipping of clock update on the next
    1409             :  *  call to __schedule(). This is an optimisation to avoid
    1410             :  *  neighbouring rq clock updates.
    1411             :  *
    1412             :  * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
    1413             :  *  in effect and calls to update_rq_clock() are being ignored.
    1414             :  *
    1415             :  * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
    1416             :  *  made to update_rq_clock() since the last time rq::lock was pinned.
    1417             :  *
    1418             :  * If inside of __schedule(), clock_update_flags will have been
    1419             :  * shifted left (a left shift is a cheap operation for the fast path
    1420             :  * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
    1421             :  *
    1422             :  *      if (rq-clock_update_flags >= RQCF_UPDATED)
    1423             :  *
    1424             :  * to check if %RQCF_UPDATED is set. It'll never be shifted more than
    1425             :  * one position though, because the next rq_unpin_lock() will shift it
    1426             :  * back.
    1427             :  */
    1428             : #define RQCF_REQ_SKIP           0x01
    1429             : #define RQCF_ACT_SKIP           0x02
    1430             : #define RQCF_UPDATED            0x04
    1431             : 
    1432        2496 : static inline void assert_clock_updated(struct rq *rq)
    1433             : {
    1434             :         /*
    1435             :          * The only reason for not seeing a clock update since the
    1436             :          * last rq_pin_lock() is if we're currently skipping updates.
    1437             :          */
    1438        2496 :         SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP);
    1439        2496 : }
    1440             : 
    1441             : static inline u64 rq_clock(struct rq *rq)
    1442             : {
    1443           0 :         lockdep_assert_rq_held(rq);
    1444           0 :         assert_clock_updated(rq);
    1445             : 
    1446           0 :         return rq->clock;
    1447             : }
    1448             : 
    1449             : static inline u64 rq_clock_task(struct rq *rq)
    1450             : {
    1451          13 :         lockdep_assert_rq_held(rq);
    1452        2496 :         assert_clock_updated(rq);
    1453             : 
    1454        2483 :         return rq->clock_task;
    1455             : }
    1456             : 
    1457             : /**
    1458             :  * By default the decay is the default pelt decay period.
    1459             :  * The decay shift can change the decay period in
    1460             :  * multiples of 32.
    1461             :  *  Decay shift         Decay period(ms)
    1462             :  *      0                       32
    1463             :  *      1                       64
    1464             :  *      2                       128
    1465             :  *      3                       256
    1466             :  *      4                       512
    1467             :  */
    1468             : extern int sched_thermal_decay_shift;
    1469             : 
    1470             : static inline u64 rq_clock_thermal(struct rq *rq)
    1471             : {
    1472          13 :         return rq_clock_task(rq) >> sched_thermal_decay_shift;
    1473             : }
    1474             : 
    1475             : static inline void rq_clock_skip_update(struct rq *rq)
    1476             : {
    1477         301 :         lockdep_assert_rq_held(rq);
    1478         301 :         rq->clock_update_flags |= RQCF_REQ_SKIP;
    1479             : }
    1480             : 
    1481             : /*
    1482             :  * See rt task throttling, which is the only time a skip
    1483             :  * request is canceled.
    1484             :  */
    1485             : static inline void rq_clock_cancel_skipupdate(struct rq *rq)
    1486             : {
    1487           0 :         lockdep_assert_rq_held(rq);
    1488           0 :         rq->clock_update_flags &= ~RQCF_REQ_SKIP;
    1489             : }
    1490             : 
    1491             : struct rq_flags {
    1492             :         unsigned long flags;
    1493             :         struct pin_cookie cookie;
    1494             : #ifdef CONFIG_SCHED_DEBUG
    1495             :         /*
    1496             :          * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
    1497             :          * current pin context is stashed here in case it needs to be
    1498             :          * restored in rq_repin_lock().
    1499             :          */
    1500             :         unsigned int clock_update_flags;
    1501             : #endif
    1502             : };
    1503             : 
    1504             : extern struct callback_head balance_push_callback;
    1505             : 
    1506             : /*
    1507             :  * Lockdep annotation that avoids accidental unlocks; it's like a
    1508             :  * sticky/continuous lockdep_assert_held().
    1509             :  *
    1510             :  * This avoids code that has access to 'struct rq *rq' (basically everything in
    1511             :  * the scheduler) from accidentally unlocking the rq if they do not also have a
    1512             :  * copy of the (on-stack) 'struct rq_flags rf'.
    1513             :  *
    1514             :  * Also see Documentation/locking/lockdep-design.rst.
    1515             :  */
    1516             : static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
    1517             : {
    1518             :         rf->cookie = lockdep_pin_lock(__rq_lockp(rq));
    1519             : 
    1520             : #ifdef CONFIG_SCHED_DEBUG
    1521        1464 :         rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
    1522        1464 :         rf->clock_update_flags = 0;
    1523             : #ifdef CONFIG_SMP
    1524             :         SCHED_WARN_ON(rq->balance_callback && rq->balance_callback != &balance_push_callback);
    1525             : #endif
    1526             : #endif
    1527             : }
    1528             : 
    1529             : static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
    1530             : {
    1531             : #ifdef CONFIG_SCHED_DEBUG
    1532         727 :         if (rq->clock_update_flags > RQCF_ACT_SKIP)
    1533         522 :                 rf->clock_update_flags = RQCF_UPDATED;
    1534             : #endif
    1535             : 
    1536        1464 :         lockdep_unpin_lock(__rq_lockp(rq), rf->cookie);
    1537             : }
    1538             : 
    1539             : static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
    1540             : {
    1541             :         lockdep_repin_lock(__rq_lockp(rq), rf->cookie);
    1542             : 
    1543             : #ifdef CONFIG_SCHED_DEBUG
    1544             :         /*
    1545             :          * Restore the value we stashed in @rf for this pin context.
    1546             :          */
    1547             :         rq->clock_update_flags |= rf->clock_update_flags;
    1548             : #endif
    1549             : }
    1550             : 
    1551             : struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
    1552             :         __acquires(rq->lock);
    1553             : 
    1554             : struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
    1555             :         __acquires(p->pi_lock)
    1556             :         __acquires(rq->lock);
    1557             : 
    1558             : static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
    1559             :         __releases(rq->lock)
    1560             : {
    1561           0 :         rq_unpin_lock(rq, rf);
    1562           0 :         raw_spin_rq_unlock(rq);
    1563             : }
    1564             : 
    1565             : static inline void
    1566           0 : task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
    1567             :         __releases(rq->lock)
    1568             :         __releases(p->pi_lock)
    1569             : {
    1570         432 :         rq_unpin_lock(rq, rf);
    1571         216 :         raw_spin_rq_unlock(rq);
    1572         432 :         raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
    1573           0 : }
    1574             : 
    1575             : static inline void
    1576             : rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
    1577             :         __acquires(rq->lock)
    1578             : {
    1579             :         raw_spin_rq_lock_irqsave(rq, rf->flags);
    1580             :         rq_pin_lock(rq, rf);
    1581             : }
    1582             : 
    1583             : static inline void
    1584             : rq_lock_irq(struct rq *rq, struct rq_flags *rf)
    1585             :         __acquires(rq->lock)
    1586             : {
    1587             :         raw_spin_rq_lock_irq(rq);
    1588             :         rq_pin_lock(rq, rf);
    1589             : }
    1590             : 
    1591             : static inline void
    1592             : rq_lock(struct rq *rq, struct rq_flags *rf)
    1593             :         __acquires(rq->lock)
    1594             : {
    1595        1248 :         raw_spin_rq_lock(rq);
    1596        1248 :         rq_pin_lock(rq, rf);
    1597             : }
    1598             : 
    1599             : static inline void
    1600             : rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
    1601             :         __releases(rq->lock)
    1602             : {
    1603             :         rq_unpin_lock(rq, rf);
    1604             :         raw_spin_rq_unlock_irqrestore(rq, rf->flags);
    1605             : }
    1606             : 
    1607             : static inline void
    1608             : rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
    1609             :         __releases(rq->lock)
    1610             : {
    1611           0 :         rq_unpin_lock(rq, rf);
    1612             :         raw_spin_rq_unlock_irq(rq);
    1613             : }
    1614             : 
    1615             : static inline void
    1616             : rq_unlock(struct rq *rq, struct rq_flags *rf)
    1617             :         __releases(rq->lock)
    1618             : {
    1619        1153 :         rq_unpin_lock(rq, rf);
    1620         630 :         raw_spin_rq_unlock(rq);
    1621             : }
    1622             : 
    1623             : static inline struct rq *
    1624             : this_rq_lock_irq(struct rq_flags *rf)
    1625             :         __acquires(rq->lock)
    1626             : {
    1627             :         struct rq *rq;
    1628             : 
    1629             :         local_irq_disable();
    1630           0 :         rq = this_rq();
    1631           0 :         rq_lock(rq, rf);
    1632             :         return rq;
    1633             : }
    1634             : 
    1635             : #ifdef CONFIG_NUMA
    1636             : enum numa_topology_type {
    1637             :         NUMA_DIRECT,
    1638             :         NUMA_GLUELESS_MESH,
    1639             :         NUMA_BACKPLANE,
    1640             : };
    1641             : extern enum numa_topology_type sched_numa_topology_type;
    1642             : extern int sched_max_numa_distance;
    1643             : extern bool find_numa_distance(int distance);
    1644             : extern void sched_init_numa(int offline_node);
    1645             : extern void sched_update_numa(int cpu, bool online);
    1646             : extern void sched_domains_numa_masks_set(unsigned int cpu);
    1647             : extern void sched_domains_numa_masks_clear(unsigned int cpu);
    1648             : extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
    1649             : #else
    1650             : static inline void sched_init_numa(int offline_node) { }
    1651             : static inline void sched_update_numa(int cpu, bool online) { }
    1652             : static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
    1653             : static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
    1654             : static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
    1655             : {
    1656             :         return nr_cpu_ids;
    1657             : }
    1658             : #endif
    1659             : 
    1660             : #ifdef CONFIG_NUMA_BALANCING
    1661             : /* The regions in numa_faults array from task_struct */
    1662             : enum numa_faults_stats {
    1663             :         NUMA_MEM = 0,
    1664             :         NUMA_CPU,
    1665             :         NUMA_MEMBUF,
    1666             :         NUMA_CPUBUF
    1667             : };
    1668             : extern void sched_setnuma(struct task_struct *p, int node);
    1669             : extern int migrate_task_to(struct task_struct *p, int cpu);
    1670             : extern int migrate_swap(struct task_struct *p, struct task_struct *t,
    1671             :                         int cpu, int scpu);
    1672             : extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
    1673             : #else
    1674             : static inline void
    1675             : init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
    1676             : {
    1677             : }
    1678             : #endif /* CONFIG_NUMA_BALANCING */
    1679             : 
    1680             : #ifdef CONFIG_SMP
    1681             : 
    1682             : static inline void
    1683             : queue_balance_callback(struct rq *rq,
    1684             :                        struct callback_head *head,
    1685             :                        void (*func)(struct rq *rq))
    1686             : {
    1687             :         lockdep_assert_rq_held(rq);
    1688             : 
    1689             :         if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
    1690             :                 return;
    1691             : 
    1692             :         head->func = (void (*)(struct callback_head *))func;
    1693             :         head->next = rq->balance_callback;
    1694             :         rq->balance_callback = head;
    1695             : }
    1696             : 
    1697             : #define rcu_dereference_check_sched_domain(p) \
    1698             :         rcu_dereference_check((p), \
    1699             :                               lockdep_is_held(&sched_domains_mutex))
    1700             : 
    1701             : /*
    1702             :  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
    1703             :  * See destroy_sched_domains: call_rcu for details.
    1704             :  *
    1705             :  * The domain tree of any CPU may only be accessed from within
    1706             :  * preempt-disabled sections.
    1707             :  */
    1708             : #define for_each_domain(cpu, __sd) \
    1709             :         for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
    1710             :                         __sd; __sd = __sd->parent)
    1711             : 
    1712             : /**
    1713             :  * highest_flag_domain - Return highest sched_domain containing flag.
    1714             :  * @cpu:        The CPU whose highest level of sched domain is to
    1715             :  *              be returned.
    1716             :  * @flag:       The flag to check for the highest sched_domain
    1717             :  *              for the given CPU.
    1718             :  *
    1719             :  * Returns the highest sched_domain of a CPU which contains the given flag.
    1720             :  */
    1721             : static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
    1722             : {
    1723             :         struct sched_domain *sd, *hsd = NULL;
    1724             : 
    1725             :         for_each_domain(cpu, sd) {
    1726             :                 if (!(sd->flags & flag))
    1727             :                         break;
    1728             :                 hsd = sd;
    1729             :         }
    1730             : 
    1731             :         return hsd;
    1732             : }
    1733             : 
    1734             : static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
    1735             : {
    1736             :         struct sched_domain *sd;
    1737             : 
    1738             :         for_each_domain(cpu, sd) {
    1739             :                 if (sd->flags & flag)
    1740             :                         break;
    1741             :         }
    1742             : 
    1743             :         return sd;
    1744             : }
    1745             : 
    1746             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
    1747             : DECLARE_PER_CPU(int, sd_llc_size);
    1748             : DECLARE_PER_CPU(int, sd_llc_id);
    1749             : DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
    1750             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
    1751             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
    1752             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
    1753             : extern struct static_key_false sched_asym_cpucapacity;
    1754             : 
    1755             : struct sched_group_capacity {
    1756             :         atomic_t                ref;
    1757             :         /*
    1758             :          * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
    1759             :          * for a single CPU.
    1760             :          */
    1761             :         unsigned long           capacity;
    1762             :         unsigned long           min_capacity;           /* Min per-CPU capacity in group */
    1763             :         unsigned long           max_capacity;           /* Max per-CPU capacity in group */
    1764             :         unsigned long           next_update;
    1765             :         int                     imbalance;              /* XXX unrelated to capacity but shared group state */
    1766             : 
    1767             : #ifdef CONFIG_SCHED_DEBUG
    1768             :         int                     id;
    1769             : #endif
    1770             : 
    1771             :         unsigned long           cpumask[];              /* Balance mask */
    1772             : };
    1773             : 
    1774             : struct sched_group {
    1775             :         struct sched_group      *next;                  /* Must be a circular list */
    1776             :         atomic_t                ref;
    1777             : 
    1778             :         unsigned int            group_weight;
    1779             :         struct sched_group_capacity *sgc;
    1780             :         int                     asym_prefer_cpu;        /* CPU of highest priority in group */
    1781             :         int                     flags;
    1782             : 
    1783             :         /*
    1784             :          * The CPUs this group covers.
    1785             :          *
    1786             :          * NOTE: this field is variable length. (Allocated dynamically
    1787             :          * by attaching extra space to the end of the structure,
    1788             :          * depending on how many CPUs the kernel has booted up with)
    1789             :          */
    1790             :         unsigned long           cpumask[];
    1791             : };
    1792             : 
    1793             : static inline struct cpumask *sched_group_span(struct sched_group *sg)
    1794             : {
    1795             :         return to_cpumask(sg->cpumask);
    1796             : }
    1797             : 
    1798             : /*
    1799             :  * See build_balance_mask().
    1800             :  */
    1801             : static inline struct cpumask *group_balance_mask(struct sched_group *sg)
    1802             : {
    1803             :         return to_cpumask(sg->sgc->cpumask);
    1804             : }
    1805             : 
    1806             : /**
    1807             :  * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
    1808             :  * @group: The group whose first CPU is to be returned.
    1809             :  */
    1810             : static inline unsigned int group_first_cpu(struct sched_group *group)
    1811             : {
    1812             :         return cpumask_first(sched_group_span(group));
    1813             : }
    1814             : 
    1815             : extern int group_balance_cpu(struct sched_group *sg);
    1816             : 
    1817             : #ifdef CONFIG_SCHED_DEBUG
    1818             : void update_sched_domain_debugfs(void);
    1819             : void dirty_sched_domain_sysctl(int cpu);
    1820             : #else
    1821             : static inline void update_sched_domain_debugfs(void)
    1822             : {
    1823             : }
    1824             : static inline void dirty_sched_domain_sysctl(int cpu)
    1825             : {
    1826             : }
    1827             : #endif
    1828             : 
    1829             : extern int sched_update_scaling(void);
    1830             : 
    1831             : extern void flush_smp_call_function_from_idle(void);
    1832             : 
    1833             : #else /* !CONFIG_SMP: */
    1834             : static inline void flush_smp_call_function_from_idle(void) { }
    1835             : #endif
    1836             : 
    1837             : #include "stats.h"
    1838             : 
    1839             : #if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
    1840             : 
    1841             : extern void __sched_core_account_forceidle(struct rq *rq);
    1842             : 
    1843             : static inline void sched_core_account_forceidle(struct rq *rq)
    1844             : {
    1845             :         if (schedstat_enabled())
    1846             :                 __sched_core_account_forceidle(rq);
    1847             : }
    1848             : 
    1849             : extern void __sched_core_tick(struct rq *rq);
    1850             : 
    1851             : static inline void sched_core_tick(struct rq *rq)
    1852             : {
    1853             :         if (sched_core_enabled(rq) && schedstat_enabled())
    1854             :                 __sched_core_tick(rq);
    1855             : }
    1856             : 
    1857             : #else
    1858             : 
    1859             : static inline void sched_core_account_forceidle(struct rq *rq) {}
    1860             : 
    1861             : static inline void sched_core_tick(struct rq *rq) {}
    1862             : 
    1863             : #endif /* CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS */
    1864             : 
    1865             : #ifdef CONFIG_CGROUP_SCHED
    1866             : 
    1867             : /*
    1868             :  * Return the group to which this tasks belongs.
    1869             :  *
    1870             :  * We cannot use task_css() and friends because the cgroup subsystem
    1871             :  * changes that value before the cgroup_subsys::attach() method is called,
    1872             :  * therefore we cannot pin it and might observe the wrong value.
    1873             :  *
    1874             :  * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
    1875             :  * core changes this before calling sched_move_task().
    1876             :  *
    1877             :  * Instead we use a 'copy' which is updated from sched_move_task() while
    1878             :  * holding both task_struct::pi_lock and rq::lock.
    1879             :  */
    1880             : static inline struct task_group *task_group(struct task_struct *p)
    1881             : {
    1882             :         return p->sched_task_group;
    1883             : }
    1884             : 
    1885             : /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
    1886             : static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
    1887             : {
    1888             : #if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
    1889             :         struct task_group *tg = task_group(p);
    1890             : #endif
    1891             : 
    1892             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1893             :         set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
    1894             :         p->se.cfs_rq = tg->cfs_rq[cpu];
    1895             :         p->se.parent = tg->se[cpu];
    1896             : #endif
    1897             : 
    1898             : #ifdef CONFIG_RT_GROUP_SCHED
    1899             :         p->rt.rt_rq  = tg->rt_rq[cpu];
    1900             :         p->rt.parent = tg->rt_se[cpu];
    1901             : #endif
    1902             : }
    1903             : 
    1904             : #else /* CONFIG_CGROUP_SCHED */
    1905             : 
    1906             : static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
    1907             : static inline struct task_group *task_group(struct task_struct *p)
    1908             : {
    1909             :         return NULL;
    1910             : }
    1911             : 
    1912             : #endif /* CONFIG_CGROUP_SCHED */
    1913             : 
    1914             : static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
    1915             : {
    1916         108 :         set_task_rq(p, cpu);
    1917             : #ifdef CONFIG_SMP
    1918             :         /*
    1919             :          * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
    1920             :          * successfully executed on another CPU. We must ensure that updates of
    1921             :          * per-task data have been completed by this moment.
    1922             :          */
    1923             :         smp_wmb();
    1924             :         WRITE_ONCE(task_thread_info(p)->cpu, cpu);
    1925             :         p->wake_cpu = cpu;
    1926             : #endif
    1927             : }
    1928             : 
    1929             : /*
    1930             :  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
    1931             :  */
    1932             : #ifdef CONFIG_SCHED_DEBUG
    1933             : # define const_debug __read_mostly
    1934             : #else
    1935             : # define const_debug const
    1936             : #endif
    1937             : 
    1938             : #define SCHED_FEAT(name, enabled)       \
    1939             :         __SCHED_FEAT_##name ,
    1940             : 
    1941             : enum {
    1942             : #include "features.h"
    1943             :         __SCHED_FEAT_NR,
    1944             : };
    1945             : 
    1946             : #undef SCHED_FEAT
    1947             : 
    1948             : #ifdef CONFIG_SCHED_DEBUG
    1949             : 
    1950             : /*
    1951             :  * To support run-time toggling of sched features, all the translation units
    1952             :  * (but core.c) reference the sysctl_sched_features defined in core.c.
    1953             :  */
    1954             : extern const_debug unsigned int sysctl_sched_features;
    1955             : 
    1956             : #ifdef CONFIG_JUMP_LABEL
    1957             : #define SCHED_FEAT(name, enabled)                                       \
    1958             : static __always_inline bool static_branch_##name(struct static_key *key) \
    1959             : {                                                                       \
    1960             :         return static_key_##enabled(key);                               \
    1961             : }
    1962             : 
    1963             : #include "features.h"
    1964             : #undef SCHED_FEAT
    1965             : 
    1966             : extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
    1967             : #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
    1968             : 
    1969             : #else /* !CONFIG_JUMP_LABEL */
    1970             : 
    1971             : #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
    1972             : 
    1973             : #endif /* CONFIG_JUMP_LABEL */
    1974             : 
    1975             : #else /* !SCHED_DEBUG */
    1976             : 
    1977             : /*
    1978             :  * Each translation unit has its own copy of sysctl_sched_features to allow
    1979             :  * constants propagation at compile time and compiler optimization based on
    1980             :  * features default.
    1981             :  */
    1982             : #define SCHED_FEAT(name, enabled)       \
    1983             :         (1UL << __SCHED_FEAT_##name) * enabled |
    1984             : static const_debug __maybe_unused unsigned int sysctl_sched_features =
    1985             : #include "features.h"
    1986             :         0;
    1987             : #undef SCHED_FEAT
    1988             : 
    1989             : #define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
    1990             : 
    1991             : #endif /* SCHED_DEBUG */
    1992             : 
    1993             : extern struct static_key_false sched_numa_balancing;
    1994             : extern struct static_key_false sched_schedstats;
    1995             : 
    1996             : static inline u64 global_rt_period(void)
    1997             : {
    1998           4 :         return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
    1999             : }
    2000             : 
    2001             : static inline u64 global_rt_runtime(void)
    2002             : {
    2003           4 :         if (sysctl_sched_rt_runtime < 0)
    2004             :                 return RUNTIME_INF;
    2005             : 
    2006           4 :         return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
    2007             : }
    2008             : 
    2009             : static inline int task_current(struct rq *rq, struct task_struct *p)
    2010             : {
    2011             :         return rq->curr == p;
    2012             : }
    2013             : 
    2014             : static inline int task_running(struct rq *rq, struct task_struct *p)
    2015             : {
    2016             : #ifdef CONFIG_SMP
    2017             :         return p->on_cpu;
    2018             : #else
    2019           0 :         return task_current(rq, p);
    2020             : #endif
    2021             : }
    2022             : 
    2023             : static inline int task_on_rq_queued(struct task_struct *p)
    2024             : {
    2025           0 :         return p->on_rq == TASK_ON_RQ_QUEUED;
    2026             : }
    2027             : 
    2028             : static inline int task_on_rq_migrating(struct task_struct *p)
    2029             : {
    2030         216 :         return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
    2031             : }
    2032             : 
    2033             : /* Wake flags. The first three directly map to some SD flag value */
    2034             : #define WF_EXEC     0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
    2035             : #define WF_FORK     0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
    2036             : #define WF_TTWU     0x08 /* Wakeup;            maps to SD_BALANCE_WAKE */
    2037             : 
    2038             : #define WF_SYNC     0x10 /* Waker goes to sleep after wakeup */
    2039             : #define WF_MIGRATED 0x20 /* Internal use, task got migrated */
    2040             : #define WF_ON_CPU   0x40 /* Wakee is on_cpu */
    2041             : 
    2042             : #ifdef CONFIG_SMP
    2043             : static_assert(WF_EXEC == SD_BALANCE_EXEC);
    2044             : static_assert(WF_FORK == SD_BALANCE_FORK);
    2045             : static_assert(WF_TTWU == SD_BALANCE_WAKE);
    2046             : #endif
    2047             : 
    2048             : /*
    2049             :  * To aid in avoiding the subversion of "niceness" due to uneven distribution
    2050             :  * of tasks with abnormal "nice" values across CPUs the contribution that
    2051             :  * each task makes to its run queue's load is weighted according to its
    2052             :  * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
    2053             :  * scaled version of the new time slice allocation that they receive on time
    2054             :  * slice expiry etc.
    2055             :  */
    2056             : 
    2057             : #define WEIGHT_IDLEPRIO         3
    2058             : #define WMULT_IDLEPRIO          1431655765
    2059             : 
    2060             : extern const int                sched_prio_to_weight[40];
    2061             : extern const u32                sched_prio_to_wmult[40];
    2062             : 
    2063             : /*
    2064             :  * {de,en}queue flags:
    2065             :  *
    2066             :  * DEQUEUE_SLEEP  - task is no longer runnable
    2067             :  * ENQUEUE_WAKEUP - task just became runnable
    2068             :  *
    2069             :  * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
    2070             :  *                are in a known state which allows modification. Such pairs
    2071             :  *                should preserve as much state as possible.
    2072             :  *
    2073             :  * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
    2074             :  *        in the runqueue.
    2075             :  *
    2076             :  * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
    2077             :  * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
    2078             :  * ENQUEUE_MIGRATED  - the task was migrated during wakeup
    2079             :  *
    2080             :  */
    2081             : 
    2082             : #define DEQUEUE_SLEEP           0x01
    2083             : #define DEQUEUE_SAVE            0x02 /* Matches ENQUEUE_RESTORE */
    2084             : #define DEQUEUE_MOVE            0x04 /* Matches ENQUEUE_MOVE */
    2085             : #define DEQUEUE_NOCLOCK         0x08 /* Matches ENQUEUE_NOCLOCK */
    2086             : 
    2087             : #define ENQUEUE_WAKEUP          0x01
    2088             : #define ENQUEUE_RESTORE         0x02
    2089             : #define ENQUEUE_MOVE            0x04
    2090             : #define ENQUEUE_NOCLOCK         0x08
    2091             : 
    2092             : #define ENQUEUE_HEAD            0x10
    2093             : #define ENQUEUE_REPLENISH       0x20
    2094             : #ifdef CONFIG_SMP
    2095             : #define ENQUEUE_MIGRATED        0x40
    2096             : #else
    2097             : #define ENQUEUE_MIGRATED        0x00
    2098             : #endif
    2099             : 
    2100             : #define RETRY_TASK              ((void *)-1UL)
    2101             : 
    2102             : struct sched_class {
    2103             : 
    2104             : #ifdef CONFIG_UCLAMP_TASK
    2105             :         int uclamp_enabled;
    2106             : #endif
    2107             : 
    2108             :         void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
    2109             :         void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
    2110             :         void (*yield_task)   (struct rq *rq);
    2111             :         bool (*yield_to_task)(struct rq *rq, struct task_struct *p);
    2112             : 
    2113             :         void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
    2114             : 
    2115             :         struct task_struct *(*pick_next_task)(struct rq *rq);
    2116             : 
    2117             :         void (*put_prev_task)(struct rq *rq, struct task_struct *p);
    2118             :         void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
    2119             : 
    2120             : #ifdef CONFIG_SMP
    2121             :         int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
    2122             :         int  (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
    2123             : 
    2124             :         struct task_struct * (*pick_task)(struct rq *rq);
    2125             : 
    2126             :         void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
    2127             : 
    2128             :         void (*task_woken)(struct rq *this_rq, struct task_struct *task);
    2129             : 
    2130             :         void (*set_cpus_allowed)(struct task_struct *p,
    2131             :                                  const struct cpumask *newmask,
    2132             :                                  u32 flags);
    2133             : 
    2134             :         void (*rq_online)(struct rq *rq);
    2135             :         void (*rq_offline)(struct rq *rq);
    2136             : 
    2137             :         struct rq *(*find_lock_rq)(struct task_struct *p, struct rq *rq);
    2138             : #endif
    2139             : 
    2140             :         void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
    2141             :         void (*task_fork)(struct task_struct *p);
    2142             :         void (*task_dead)(struct task_struct *p);
    2143             : 
    2144             :         /*
    2145             :          * The switched_from() call is allowed to drop rq->lock, therefore we
    2146             :          * cannot assume the switched_from/switched_to pair is serialized by
    2147             :          * rq->lock. They are however serialized by p->pi_lock.
    2148             :          */
    2149             :         void (*switched_from)(struct rq *this_rq, struct task_struct *task);
    2150             :         void (*switched_to)  (struct rq *this_rq, struct task_struct *task);
    2151             :         void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
    2152             :                               int oldprio);
    2153             : 
    2154             :         unsigned int (*get_rr_interval)(struct rq *rq,
    2155             :                                         struct task_struct *task);
    2156             : 
    2157             :         void (*update_curr)(struct rq *rq);
    2158             : 
    2159             : #define TASK_SET_GROUP          0
    2160             : #define TASK_MOVE_GROUP         1
    2161             : 
    2162             : #ifdef CONFIG_FAIR_GROUP_SCHED
    2163             :         void (*task_change_group)(struct task_struct *p, int type);
    2164             : #endif
    2165             : };
    2166             : 
    2167         621 : static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
    2168             : {
    2169         621 :         WARN_ON_ONCE(rq->curr != prev);
    2170         621 :         prev->sched_class->put_prev_task(rq, prev);
    2171         621 : }
    2172             : 
    2173             : static inline void set_next_task(struct rq *rq, struct task_struct *next)
    2174             : {
    2175           3 :         next->sched_class->set_next_task(rq, next, false);
    2176             : }
    2177             : 
    2178             : 
    2179             : /*
    2180             :  * Helper to define a sched_class instance; each one is placed in a separate
    2181             :  * section which is ordered by the linker script:
    2182             :  *
    2183             :  *   include/asm-generic/vmlinux.lds.h
    2184             :  *
    2185             :  * Also enforce alignment on the instance, not the type, to guarantee layout.
    2186             :  */
    2187             : #define DEFINE_SCHED_CLASS(name) \
    2188             : const struct sched_class name##_sched_class \
    2189             :         __aligned(__alignof__(struct sched_class)) \
    2190             :         __section("__" #name "_sched_class")
    2191             : 
    2192             : /* Defined in include/asm-generic/vmlinux.lds.h */
    2193             : extern struct sched_class __begin_sched_classes[];
    2194             : extern struct sched_class __end_sched_classes[];
    2195             : 
    2196             : #define sched_class_highest (__end_sched_classes - 1)
    2197             : #define sched_class_lowest  (__begin_sched_classes - 1)
    2198             : 
    2199             : #define for_class_range(class, _from, _to) \
    2200             :         for (class = (_from); class != (_to); class--)
    2201             : 
    2202             : #define for_each_class(class) \
    2203             :         for_class_range(class, sched_class_highest, sched_class_lowest)
    2204             : 
    2205             : extern const struct sched_class stop_sched_class;
    2206             : extern const struct sched_class dl_sched_class;
    2207             : extern const struct sched_class rt_sched_class;
    2208             : extern const struct sched_class fair_sched_class;
    2209             : extern const struct sched_class idle_sched_class;
    2210             : 
    2211             : static inline bool sched_stop_runnable(struct rq *rq)
    2212             : {
    2213             :         return rq->stop && task_on_rq_queued(rq->stop);
    2214             : }
    2215             : 
    2216             : static inline bool sched_dl_runnable(struct rq *rq)
    2217             : {
    2218             :         return rq->dl.dl_nr_running > 0;
    2219             : }
    2220             : 
    2221             : static inline bool sched_rt_runnable(struct rq *rq)
    2222             : {
    2223             :         return rq->rt.rt_queued > 0;
    2224             : }
    2225             : 
    2226             : static inline bool sched_fair_runnable(struct rq *rq)
    2227             : {
    2228             :         return rq->cfs.nr_running > 0;
    2229             : }
    2230             : 
    2231             : extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
    2232             : extern struct task_struct *pick_next_task_idle(struct rq *rq);
    2233             : 
    2234             : #define SCA_CHECK               0x01
    2235             : #define SCA_MIGRATE_DISABLE     0x02
    2236             : #define SCA_MIGRATE_ENABLE      0x04
    2237             : #define SCA_USER                0x08
    2238             : 
    2239             : #ifdef CONFIG_SMP
    2240             : 
    2241             : extern void update_group_capacity(struct sched_domain *sd, int cpu);
    2242             : 
    2243             : extern void trigger_load_balance(struct rq *rq);
    2244             : 
    2245             : extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
    2246             : 
    2247             : static inline struct task_struct *get_push_task(struct rq *rq)
    2248             : {
    2249             :         struct task_struct *p = rq->curr;
    2250             : 
    2251             :         lockdep_assert_rq_held(rq);
    2252             : 
    2253             :         if (rq->push_busy)
    2254             :                 return NULL;
    2255             : 
    2256             :         if (p->nr_cpus_allowed == 1)
    2257             :                 return NULL;
    2258             : 
    2259             :         if (p->migration_disabled)
    2260             :                 return NULL;
    2261             : 
    2262             :         rq->push_busy = true;
    2263             :         return get_task_struct(p);
    2264             : }
    2265             : 
    2266             : extern int push_cpu_stop(void *arg);
    2267             : 
    2268             : #endif
    2269             : 
    2270             : #ifdef CONFIG_CPU_IDLE
    2271             : static inline void idle_set_state(struct rq *rq,
    2272             :                                   struct cpuidle_state *idle_state)
    2273             : {
    2274             :         rq->idle_state = idle_state;
    2275             : }
    2276             : 
    2277             : static inline struct cpuidle_state *idle_get_state(struct rq *rq)
    2278             : {
    2279             :         SCHED_WARN_ON(!rcu_read_lock_held());
    2280             : 
    2281             :         return rq->idle_state;
    2282             : }
    2283             : #else
    2284             : static inline void idle_set_state(struct rq *rq,
    2285             :                                   struct cpuidle_state *idle_state)
    2286             : {
    2287             : }
    2288             : 
    2289             : static inline struct cpuidle_state *idle_get_state(struct rq *rq)
    2290             : {
    2291             :         return NULL;
    2292             : }
    2293             : #endif
    2294             : 
    2295             : extern void schedule_idle(void);
    2296             : 
    2297             : extern void sysrq_sched_debug_show(void);
    2298             : extern void sched_init_granularity(void);
    2299             : extern void update_max_interval(void);
    2300             : 
    2301             : extern void init_sched_dl_class(void);
    2302             : extern void init_sched_rt_class(void);
    2303             : extern void init_sched_fair_class(void);
    2304             : 
    2305             : extern void reweight_task(struct task_struct *p, int prio);
    2306             : 
    2307             : extern void resched_curr(struct rq *rq);
    2308             : extern void resched_cpu(int cpu);
    2309             : 
    2310             : extern struct rt_bandwidth def_rt_bandwidth;
    2311             : extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
    2312             : 
    2313             : extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
    2314             : extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
    2315             : extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
    2316             : 
    2317             : #define BW_SHIFT                20
    2318             : #define BW_UNIT                 (1 << BW_SHIFT)
    2319             : #define RATIO_SHIFT             8
    2320             : #define MAX_BW_BITS             (64 - BW_SHIFT)
    2321             : #define MAX_BW                  ((1ULL << MAX_BW_BITS) - 1)
    2322             : unsigned long to_ratio(u64 period, u64 runtime);
    2323             : 
    2324             : extern void init_entity_runnable_average(struct sched_entity *se);
    2325             : extern void post_init_entity_util_avg(struct task_struct *p);
    2326             : 
    2327             : #ifdef CONFIG_NO_HZ_FULL
    2328             : extern bool sched_can_stop_tick(struct rq *rq);
    2329             : extern int __init sched_tick_offload_init(void);
    2330             : 
    2331             : /*
    2332             :  * Tick may be needed by tasks in the runqueue depending on their policy and
    2333             :  * requirements. If tick is needed, lets send the target an IPI to kick it out of
    2334             :  * nohz mode if necessary.
    2335             :  */
    2336             : static inline void sched_update_tick_dependency(struct rq *rq)
    2337             : {
    2338             :         int cpu = cpu_of(rq);
    2339             : 
    2340             :         if (!tick_nohz_full_cpu(cpu))
    2341             :                 return;
    2342             : 
    2343             :         if (sched_can_stop_tick(rq))
    2344             :                 tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
    2345             :         else
    2346             :                 tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
    2347             : }
    2348             : #else
    2349             : static inline int sched_tick_offload_init(void) { return 0; }
    2350             : static inline void sched_update_tick_dependency(struct rq *rq) { }
    2351             : #endif
    2352             : 
    2353             : static inline void add_nr_running(struct rq *rq, unsigned count)
    2354             : {
    2355         620 :         unsigned prev_nr = rq->nr_running;
    2356             : 
    2357         620 :         rq->nr_running = prev_nr + count;
    2358             :         if (trace_sched_update_nr_running_tp_enabled()) {
    2359             :                 call_trace_sched_update_nr_running(rq, count);
    2360             :         }
    2361             : 
    2362             : #ifdef CONFIG_SMP
    2363             :         if (prev_nr < 2 && rq->nr_running >= 2) {
    2364             :                 if (!READ_ONCE(rq->rd->overload))
    2365             :                         WRITE_ONCE(rq->rd->overload, 1);
    2366             :         }
    2367             : #endif
    2368             : 
    2369         620 :         sched_update_tick_dependency(rq);
    2370             : }
    2371             : 
    2372             : static inline void sub_nr_running(struct rq *rq, unsigned count)
    2373             : {
    2374         618 :         rq->nr_running -= count;
    2375             :         if (trace_sched_update_nr_running_tp_enabled()) {
    2376             :                 call_trace_sched_update_nr_running(rq, -count);
    2377             :         }
    2378             : 
    2379             :         /* Check if we still need preemption */
    2380         618 :         sched_update_tick_dependency(rq);
    2381             : }
    2382             : 
    2383             : extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
    2384             : extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
    2385             : 
    2386             : extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
    2387             : 
    2388             : extern const_debug unsigned int sysctl_sched_nr_migrate;
    2389             : extern const_debug unsigned int sysctl_sched_migration_cost;
    2390             : 
    2391             : #ifdef CONFIG_SCHED_DEBUG
    2392             : extern unsigned int sysctl_sched_latency;
    2393             : extern unsigned int sysctl_sched_min_granularity;
    2394             : extern unsigned int sysctl_sched_idle_min_granularity;
    2395             : extern unsigned int sysctl_sched_wakeup_granularity;
    2396             : extern int sysctl_resched_latency_warn_ms;
    2397             : extern int sysctl_resched_latency_warn_once;
    2398             : 
    2399             : extern unsigned int sysctl_sched_tunable_scaling;
    2400             : 
    2401             : extern unsigned int sysctl_numa_balancing_scan_delay;
    2402             : extern unsigned int sysctl_numa_balancing_scan_period_min;
    2403             : extern unsigned int sysctl_numa_balancing_scan_period_max;
    2404             : extern unsigned int sysctl_numa_balancing_scan_size;
    2405             : #endif
    2406             : 
    2407             : #ifdef CONFIG_SCHED_HRTICK
    2408             : 
    2409             : /*
    2410             :  * Use hrtick when:
    2411             :  *  - enabled by features
    2412             :  *  - hrtimer is actually high res
    2413             :  */
    2414             : static inline int hrtick_enabled(struct rq *rq)
    2415             : {
    2416             :         if (!cpu_active(cpu_of(rq)))
    2417             :                 return 0;
    2418             :         return hrtimer_is_hres_active(&rq->hrtick_timer);
    2419             : }
    2420             : 
    2421             : static inline int hrtick_enabled_fair(struct rq *rq)
    2422             : {
    2423             :         if (!sched_feat(HRTICK))
    2424             :                 return 0;
    2425             :         return hrtick_enabled(rq);
    2426             : }
    2427             : 
    2428             : static inline int hrtick_enabled_dl(struct rq *rq)
    2429             : {
    2430             :         if (!sched_feat(HRTICK_DL))
    2431             :                 return 0;
    2432             :         return hrtick_enabled(rq);
    2433             : }
    2434             : 
    2435             : void hrtick_start(struct rq *rq, u64 delay);
    2436             : 
    2437             : #else
    2438             : 
    2439             : static inline int hrtick_enabled_fair(struct rq *rq)
    2440             : {
    2441             :         return 0;
    2442             : }
    2443             : 
    2444             : static inline int hrtick_enabled_dl(struct rq *rq)
    2445             : {
    2446             :         return 0;
    2447             : }
    2448             : 
    2449             : static inline int hrtick_enabled(struct rq *rq)
    2450             : {
    2451             :         return 0;
    2452             : }
    2453             : 
    2454             : #endif /* CONFIG_SCHED_HRTICK */
    2455             : 
    2456             : #ifndef arch_scale_freq_tick
    2457             : static __always_inline
    2458             : void arch_scale_freq_tick(void)
    2459             : {
    2460             : }
    2461             : #endif
    2462             : 
    2463             : #ifndef arch_scale_freq_capacity
    2464             : /**
    2465             :  * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
    2466             :  * @cpu: the CPU in question.
    2467             :  *
    2468             :  * Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e.
    2469             :  *
    2470             :  *     f_curr
    2471             :  *     ------ * SCHED_CAPACITY_SCALE
    2472             :  *     f_max
    2473             :  */
    2474             : static __always_inline
    2475             : unsigned long arch_scale_freq_capacity(int cpu)
    2476             : {
    2477             :         return SCHED_CAPACITY_SCALE;
    2478             : }
    2479             : #endif
    2480             : 
    2481             : 
    2482             : #ifdef CONFIG_SMP
    2483             : 
    2484             : static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
    2485             : {
    2486             : #ifdef CONFIG_SCHED_CORE
    2487             :         /*
    2488             :          * In order to not have {0,2},{1,3} turn into into an AB-BA,
    2489             :          * order by core-id first and cpu-id second.
    2490             :          *
    2491             :          * Notably:
    2492             :          *
    2493             :          *      double_rq_lock(0,3); will take core-0, core-1 lock
    2494             :          *      double_rq_lock(1,2); will take core-1, core-0 lock
    2495             :          *
    2496             :          * when only cpu-id is considered.
    2497             :          */
    2498             :         if (rq1->core->cpu < rq2->core->cpu)
    2499             :                 return true;
    2500             :         if (rq1->core->cpu > rq2->core->cpu)
    2501             :                 return false;
    2502             : 
    2503             :         /*
    2504             :          * __sched_core_flip() relies on SMT having cpu-id lock order.
    2505             :          */
    2506             : #endif
    2507             :         return rq1->cpu < rq2->cpu;
    2508             : }
    2509             : 
    2510             : extern void double_rq_lock(struct rq *rq1, struct rq *rq2);
    2511             : 
    2512             : #ifdef CONFIG_PREEMPTION
    2513             : 
    2514             : /*
    2515             :  * fair double_lock_balance: Safely acquires both rq->locks in a fair
    2516             :  * way at the expense of forcing extra atomic operations in all
    2517             :  * invocations.  This assures that the double_lock is acquired using the
    2518             :  * same underlying policy as the spinlock_t on this architecture, which
    2519             :  * reduces latency compared to the unfair variant below.  However, it
    2520             :  * also adds more overhead and therefore may reduce throughput.
    2521             :  */
    2522             : static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2523             :         __releases(this_rq->lock)
    2524             :         __acquires(busiest->lock)
    2525             :         __acquires(this_rq->lock)
    2526             : {
    2527             :         raw_spin_rq_unlock(this_rq);
    2528             :         double_rq_lock(this_rq, busiest);
    2529             : 
    2530             :         return 1;
    2531             : }
    2532             : 
    2533             : #else
    2534             : /*
    2535             :  * Unfair double_lock_balance: Optimizes throughput at the expense of
    2536             :  * latency by eliminating extra atomic operations when the locks are
    2537             :  * already in proper order on entry.  This favors lower CPU-ids and will
    2538             :  * grant the double lock to lower CPUs over higher ids under contention,
    2539             :  * regardless of entry order into the function.
    2540             :  */
    2541             : static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2542             :         __releases(this_rq->lock)
    2543             :         __acquires(busiest->lock)
    2544             :         __acquires(this_rq->lock)
    2545             : {
    2546             :         if (__rq_lockp(this_rq) == __rq_lockp(busiest))
    2547             :                 return 0;
    2548             : 
    2549             :         if (likely(raw_spin_rq_trylock(busiest)))
    2550             :                 return 0;
    2551             : 
    2552             :         if (rq_order_less(this_rq, busiest)) {
    2553             :                 raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
    2554             :                 return 0;
    2555             :         }
    2556             : 
    2557             :         raw_spin_rq_unlock(this_rq);
    2558             :         double_rq_lock(this_rq, busiest);
    2559             : 
    2560             :         return 1;
    2561             : }
    2562             : 
    2563             : #endif /* CONFIG_PREEMPTION */
    2564             : 
    2565             : /*
    2566             :  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
    2567             :  */
    2568             : static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2569             : {
    2570             :         lockdep_assert_irqs_disabled();
    2571             : 
    2572             :         return _double_lock_balance(this_rq, busiest);
    2573             : }
    2574             : 
    2575             : static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
    2576             :         __releases(busiest->lock)
    2577             : {
    2578             :         if (__rq_lockp(this_rq) != __rq_lockp(busiest))
    2579             :                 raw_spin_rq_unlock(busiest);
    2580             :         lock_set_subclass(&__rq_lockp(this_rq)->dep_map, 0, _RET_IP_);
    2581             : }
    2582             : 
    2583             : static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
    2584             : {
    2585             :         if (l1 > l2)
    2586             :                 swap(l1, l2);
    2587             : 
    2588             :         spin_lock(l1);
    2589             :         spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2590             : }
    2591             : 
    2592             : static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
    2593             : {
    2594             :         if (l1 > l2)
    2595             :                 swap(l1, l2);
    2596             : 
    2597             :         spin_lock_irq(l1);
    2598             :         spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2599             : }
    2600             : 
    2601             : static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
    2602             : {
    2603             :         if (l1 > l2)
    2604             :                 swap(l1, l2);
    2605             : 
    2606             :         raw_spin_lock(l1);
    2607             :         raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2608             : }
    2609             : 
    2610             : /*
    2611             :  * double_rq_unlock - safely unlock two runqueues
    2612             :  *
    2613             :  * Note this does not restore interrupts like task_rq_unlock,
    2614             :  * you need to do so manually after calling.
    2615             :  */
    2616             : static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
    2617             :         __releases(rq1->lock)
    2618             :         __releases(rq2->lock)
    2619             : {
    2620             :         if (__rq_lockp(rq1) != __rq_lockp(rq2))
    2621             :                 raw_spin_rq_unlock(rq2);
    2622             :         else
    2623             :                 __release(rq2->lock);
    2624             :         raw_spin_rq_unlock(rq1);
    2625             : }
    2626             : 
    2627             : extern void set_rq_online (struct rq *rq);
    2628             : extern void set_rq_offline(struct rq *rq);
    2629             : extern bool sched_smp_initialized;
    2630             : 
    2631             : #else /* CONFIG_SMP */
    2632             : 
    2633             : /*
    2634             :  * double_rq_lock - safely lock two runqueues
    2635             :  *
    2636             :  * Note this does not disable interrupts like task_rq_lock,
    2637             :  * you need to do so manually before calling.
    2638             :  */
    2639           0 : static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
    2640             :         __acquires(rq1->lock)
    2641             :         __acquires(rq2->lock)
    2642             : {
    2643           0 :         BUG_ON(!irqs_disabled());
    2644           0 :         BUG_ON(rq1 != rq2);
    2645           0 :         raw_spin_rq_lock(rq1);
    2646             :         __acquire(rq2->lock);        /* Fake it out ;) */
    2647           0 : }
    2648             : 
    2649             : /*
    2650             :  * double_rq_unlock - safely unlock two runqueues
    2651             :  *
    2652             :  * Note this does not restore interrupts like task_rq_unlock,
    2653             :  * you need to do so manually after calling.
    2654             :  */
    2655           0 : static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
    2656             :         __releases(rq1->lock)
    2657             :         __releases(rq2->lock)
    2658             : {
    2659           0 :         BUG_ON(rq1 != rq2);
    2660           0 :         raw_spin_rq_unlock(rq1);
    2661             :         __release(rq2->lock);
    2662           0 : }
    2663             : 
    2664             : #endif
    2665             : 
    2666             : extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
    2667             : extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
    2668             : 
    2669             : #ifdef  CONFIG_SCHED_DEBUG
    2670             : extern bool sched_debug_verbose;
    2671             : 
    2672             : extern void print_cfs_stats(struct seq_file *m, int cpu);
    2673             : extern void print_rt_stats(struct seq_file *m, int cpu);
    2674             : extern void print_dl_stats(struct seq_file *m, int cpu);
    2675             : extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
    2676             : extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
    2677             : extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
    2678             : 
    2679             : extern void resched_latency_warn(int cpu, u64 latency);
    2680             : #ifdef CONFIG_NUMA_BALANCING
    2681             : extern void
    2682             : show_numa_stats(struct task_struct *p, struct seq_file *m);
    2683             : extern void
    2684             : print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
    2685             :         unsigned long tpf, unsigned long gsf, unsigned long gpf);
    2686             : #endif /* CONFIG_NUMA_BALANCING */
    2687             : #else
    2688             : static inline void resched_latency_warn(int cpu, u64 latency) {}
    2689             : #endif /* CONFIG_SCHED_DEBUG */
    2690             : 
    2691             : extern void init_cfs_rq(struct cfs_rq *cfs_rq);
    2692             : extern void init_rt_rq(struct rt_rq *rt_rq);
    2693             : extern void init_dl_rq(struct dl_rq *dl_rq);
    2694             : 
    2695             : extern void cfs_bandwidth_usage_inc(void);
    2696             : extern void cfs_bandwidth_usage_dec(void);
    2697             : 
    2698             : #ifdef CONFIG_NO_HZ_COMMON
    2699             : #define NOHZ_BALANCE_KICK_BIT   0
    2700             : #define NOHZ_STATS_KICK_BIT     1
    2701             : #define NOHZ_NEWILB_KICK_BIT    2
    2702             : #define NOHZ_NEXT_KICK_BIT      3
    2703             : 
    2704             : /* Run rebalance_domains() */
    2705             : #define NOHZ_BALANCE_KICK       BIT(NOHZ_BALANCE_KICK_BIT)
    2706             : /* Update blocked load */
    2707             : #define NOHZ_STATS_KICK         BIT(NOHZ_STATS_KICK_BIT)
    2708             : /* Update blocked load when entering idle */
    2709             : #define NOHZ_NEWILB_KICK        BIT(NOHZ_NEWILB_KICK_BIT)
    2710             : /* Update nohz.next_balance */
    2711             : #define NOHZ_NEXT_KICK          BIT(NOHZ_NEXT_KICK_BIT)
    2712             : 
    2713             : #define NOHZ_KICK_MASK  (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK)
    2714             : 
    2715             : #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
    2716             : 
    2717             : extern void nohz_balance_exit_idle(struct rq *rq);
    2718             : #else
    2719             : static inline void nohz_balance_exit_idle(struct rq *rq) { }
    2720             : #endif
    2721             : 
    2722             : #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
    2723             : extern void nohz_run_idle_balance(int cpu);
    2724             : #else
    2725             : static inline void nohz_run_idle_balance(int cpu) { }
    2726             : #endif
    2727             : 
    2728             : #ifdef CONFIG_IRQ_TIME_ACCOUNTING
    2729             : struct irqtime {
    2730             :         u64                     total;
    2731             :         u64                     tick_delta;
    2732             :         u64                     irq_start_time;
    2733             :         struct u64_stats_sync   sync;
    2734             : };
    2735             : 
    2736             : DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
    2737             : 
    2738             : /*
    2739             :  * Returns the irqtime minus the softirq time computed by ksoftirqd.
    2740             :  * Otherwise ksoftirqd's sum_exec_runtime is subtracted its own runtime
    2741             :  * and never move forward.
    2742             :  */
    2743             : static inline u64 irq_time_read(int cpu)
    2744             : {
    2745             :         struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
    2746             :         unsigned int seq;
    2747             :         u64 total;
    2748             : 
    2749             :         do {
    2750             :                 seq = __u64_stats_fetch_begin(&irqtime->sync);
    2751             :                 total = irqtime->total;
    2752             :         } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
    2753             : 
    2754             :         return total;
    2755             : }
    2756             : #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
    2757             : 
    2758             : #ifdef CONFIG_CPU_FREQ
    2759             : DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
    2760             : 
    2761             : /**
    2762             :  * cpufreq_update_util - Take a note about CPU utilization changes.
    2763             :  * @rq: Runqueue to carry out the update for.
    2764             :  * @flags: Update reason flags.
    2765             :  *
    2766             :  * This function is called by the scheduler on the CPU whose utilization is
    2767             :  * being updated.
    2768             :  *
    2769             :  * It can only be called from RCU-sched read-side critical sections.
    2770             :  *
    2771             :  * The way cpufreq is currently arranged requires it to evaluate the CPU
    2772             :  * performance state (frequency/voltage) on a regular basis to prevent it from
    2773             :  * being stuck in a completely inadequate performance level for too long.
    2774             :  * That is not guaranteed to happen if the updates are only triggered from CFS
    2775             :  * and DL, though, because they may not be coming in if only RT tasks are
    2776             :  * active all the time (or there are RT tasks only).
    2777             :  *
    2778             :  * As a workaround for that issue, this function is called periodically by the
    2779             :  * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
    2780             :  * but that really is a band-aid.  Going forward it should be replaced with
    2781             :  * solutions targeted more specifically at RT tasks.
    2782             :  */
    2783             : static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
    2784             : {
    2785             :         struct update_util_data *data;
    2786             : 
    2787             :         data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
    2788             :                                                   cpu_of(rq)));
    2789             :         if (data)
    2790             :                 data->func(data, rq_clock(rq), flags);
    2791             : }
    2792             : #else
    2793             : static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
    2794             : #endif /* CONFIG_CPU_FREQ */
    2795             : 
    2796             : #ifdef arch_scale_freq_capacity
    2797             : # ifndef arch_scale_freq_invariant
    2798             : #  define arch_scale_freq_invariant()   true
    2799             : # endif
    2800             : #else
    2801             : # define arch_scale_freq_invariant()    false
    2802             : #endif
    2803             : 
    2804             : #ifdef CONFIG_SMP
    2805             : static inline unsigned long capacity_orig_of(int cpu)
    2806             : {
    2807             :         return cpu_rq(cpu)->cpu_capacity_orig;
    2808             : }
    2809             : 
    2810             : /**
    2811             :  * enum cpu_util_type - CPU utilization type
    2812             :  * @FREQUENCY_UTIL:     Utilization used to select frequency
    2813             :  * @ENERGY_UTIL:        Utilization used during energy calculation
    2814             :  *
    2815             :  * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
    2816             :  * need to be aggregated differently depending on the usage made of them. This
    2817             :  * enum is used within effective_cpu_util() to differentiate the types of
    2818             :  * utilization expected by the callers, and adjust the aggregation accordingly.
    2819             :  */
    2820             : enum cpu_util_type {
    2821             :         FREQUENCY_UTIL,
    2822             :         ENERGY_UTIL,
    2823             : };
    2824             : 
    2825             : unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
    2826             :                                  unsigned long max, enum cpu_util_type type,
    2827             :                                  struct task_struct *p);
    2828             : 
    2829             : static inline unsigned long cpu_bw_dl(struct rq *rq)
    2830             : {
    2831             :         return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
    2832             : }
    2833             : 
    2834             : static inline unsigned long cpu_util_dl(struct rq *rq)
    2835             : {
    2836             :         return READ_ONCE(rq->avg_dl.util_avg);
    2837             : }
    2838             : 
    2839             : /**
    2840             :  * cpu_util_cfs() - Estimates the amount of CPU capacity used by CFS tasks.
    2841             :  * @cpu: the CPU to get the utilization for.
    2842             :  *
    2843             :  * The unit of the return value must be the same as the one of CPU capacity
    2844             :  * so that CPU utilization can be compared with CPU capacity.
    2845             :  *
    2846             :  * CPU utilization is the sum of running time of runnable tasks plus the
    2847             :  * recent utilization of currently non-runnable tasks on that CPU.
    2848             :  * It represents the amount of CPU capacity currently used by CFS tasks in
    2849             :  * the range [0..max CPU capacity] with max CPU capacity being the CPU
    2850             :  * capacity at f_max.
    2851             :  *
    2852             :  * The estimated CPU utilization is defined as the maximum between CPU
    2853             :  * utilization and sum of the estimated utilization of the currently
    2854             :  * runnable tasks on that CPU. It preserves a utilization "snapshot" of
    2855             :  * previously-executed tasks, which helps better deduce how busy a CPU will
    2856             :  * be when a long-sleeping task wakes up. The contribution to CPU utilization
    2857             :  * of such a task would be significantly decayed at this point of time.
    2858             :  *
    2859             :  * CPU utilization can be higher than the current CPU capacity
    2860             :  * (f_curr/f_max * max CPU capacity) or even the max CPU capacity because
    2861             :  * of rounding errors as well as task migrations or wakeups of new tasks.
    2862             :  * CPU utilization has to be capped to fit into the [0..max CPU capacity]
    2863             :  * range. Otherwise a group of CPUs (CPU0 util = 121% + CPU1 util = 80%)
    2864             :  * could be seen as over-utilized even though CPU1 has 20% of spare CPU
    2865             :  * capacity. CPU utilization is allowed to overshoot current CPU capacity
    2866             :  * though since this is useful for predicting the CPU capacity required
    2867             :  * after task migrations (scheduler-driven DVFS).
    2868             :  *
    2869             :  * Return: (Estimated) utilization for the specified CPU.
    2870             :  */
    2871             : static inline unsigned long cpu_util_cfs(int cpu)
    2872             : {
    2873             :         struct cfs_rq *cfs_rq;
    2874             :         unsigned long util;
    2875             : 
    2876             :         cfs_rq = &cpu_rq(cpu)->cfs;
    2877             :         util = READ_ONCE(cfs_rq->avg.util_avg);
    2878             : 
    2879             :         if (sched_feat(UTIL_EST)) {
    2880             :                 util = max_t(unsigned long, util,
    2881             :                              READ_ONCE(cfs_rq->avg.util_est.enqueued));
    2882             :         }
    2883             : 
    2884             :         return min(util, capacity_orig_of(cpu));
    2885             : }
    2886             : 
    2887             : static inline unsigned long cpu_util_rt(struct rq *rq)
    2888             : {
    2889             :         return READ_ONCE(rq->avg_rt.util_avg);
    2890             : }
    2891             : #endif
    2892             : 
    2893             : #ifdef CONFIG_UCLAMP_TASK
    2894             : unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
    2895             : 
    2896             : /**
    2897             :  * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
    2898             :  * @rq:         The rq to clamp against. Must not be NULL.
    2899             :  * @util:       The util value to clamp.
    2900             :  * @p:          The task to clamp against. Can be NULL if you want to clamp
    2901             :  *              against @rq only.
    2902             :  *
    2903             :  * Clamps the passed @util to the max(@rq, @p) effective uclamp values.
    2904             :  *
    2905             :  * If sched_uclamp_used static key is disabled, then just return the util
    2906             :  * without any clamping since uclamp aggregation at the rq level in the fast
    2907             :  * path is disabled, rendering this operation a NOP.
    2908             :  *
    2909             :  * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
    2910             :  * will return the correct effective uclamp value of the task even if the
    2911             :  * static key is disabled.
    2912             :  */
    2913             : static __always_inline
    2914             : unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
    2915             :                                   struct task_struct *p)
    2916             : {
    2917             :         unsigned long min_util = 0;
    2918             :         unsigned long max_util = 0;
    2919             : 
    2920             :         if (!static_branch_likely(&sched_uclamp_used))
    2921             :                 return util;
    2922             : 
    2923             :         if (p) {
    2924             :                 min_util = uclamp_eff_value(p, UCLAMP_MIN);
    2925             :                 max_util = uclamp_eff_value(p, UCLAMP_MAX);
    2926             : 
    2927             :                 /*
    2928             :                  * Ignore last runnable task's max clamp, as this task will
    2929             :                  * reset it. Similarly, no need to read the rq's min clamp.
    2930             :                  */
    2931             :                 if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
    2932             :                         goto out;
    2933             :         }
    2934             : 
    2935             :         min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value));
    2936             :         max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value));
    2937             : out:
    2938             :         /*
    2939             :          * Since CPU's {min,max}_util clamps are MAX aggregated considering
    2940             :          * RUNNABLE tasks with _different_ clamps, we can end up with an
    2941             :          * inversion. Fix it now when the clamps are applied.
    2942             :          */
    2943             :         if (unlikely(min_util >= max_util))
    2944             :                 return min_util;
    2945             : 
    2946             :         return clamp(util, min_util, max_util);
    2947             : }
    2948             : 
    2949             : /* Is the rq being capped/throttled by uclamp_max? */
    2950             : static inline bool uclamp_rq_is_capped(struct rq *rq)
    2951             : {
    2952             :         unsigned long rq_util;
    2953             :         unsigned long max_util;
    2954             : 
    2955             :         if (!static_branch_likely(&sched_uclamp_used))
    2956             :                 return false;
    2957             : 
    2958             :         rq_util = cpu_util_cfs(cpu_of(rq)) + cpu_util_rt(rq);
    2959             :         max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
    2960             : 
    2961             :         return max_util != SCHED_CAPACITY_SCALE && rq_util >= max_util;
    2962             : }
    2963             : 
    2964             : /*
    2965             :  * When uclamp is compiled in, the aggregation at rq level is 'turned off'
    2966             :  * by default in the fast path and only gets turned on once userspace performs
    2967             :  * an operation that requires it.
    2968             :  *
    2969             :  * Returns true if userspace opted-in to use uclamp and aggregation at rq level
    2970             :  * hence is active.
    2971             :  */
    2972             : static inline bool uclamp_is_used(void)
    2973             : {
    2974             :         return static_branch_likely(&sched_uclamp_used);
    2975             : }
    2976             : #else /* CONFIG_UCLAMP_TASK */
    2977             : static inline
    2978             : unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
    2979             :                                   struct task_struct *p)
    2980             : {
    2981             :         return util;
    2982             : }
    2983             : 
    2984             : static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
    2985             : 
    2986             : static inline bool uclamp_is_used(void)
    2987             : {
    2988             :         return false;
    2989             : }
    2990             : #endif /* CONFIG_UCLAMP_TASK */
    2991             : 
    2992             : #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
    2993             : static inline unsigned long cpu_util_irq(struct rq *rq)
    2994             : {
    2995             :         return rq->avg_irq.util_avg;
    2996             : }
    2997             : 
    2998             : static inline
    2999             : unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
    3000             : {
    3001             :         util *= (max - irq);
    3002             :         util /= max;
    3003             : 
    3004             :         return util;
    3005             : 
    3006             : }
    3007             : #else
    3008             : static inline unsigned long cpu_util_irq(struct rq *rq)
    3009             : {
    3010             :         return 0;
    3011             : }
    3012             : 
    3013             : static inline
    3014             : unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
    3015             : {
    3016             :         return util;
    3017             : }
    3018             : #endif
    3019             : 
    3020             : #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
    3021             : 
    3022             : #define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
    3023             : 
    3024             : DECLARE_STATIC_KEY_FALSE(sched_energy_present);
    3025             : 
    3026             : static inline bool sched_energy_enabled(void)
    3027             : {
    3028             :         return static_branch_unlikely(&sched_energy_present);
    3029             : }
    3030             : 
    3031             : #else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
    3032             : 
    3033             : #define perf_domain_span(pd) NULL
    3034             : static inline bool sched_energy_enabled(void) { return false; }
    3035             : 
    3036             : #endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
    3037             : 
    3038             : #ifdef CONFIG_MEMBARRIER
    3039             : /*
    3040             :  * The scheduler provides memory barriers required by membarrier between:
    3041             :  * - prior user-space memory accesses and store to rq->membarrier_state,
    3042             :  * - store to rq->membarrier_state and following user-space memory accesses.
    3043             :  * In the same way it provides those guarantees around store to rq->curr.
    3044             :  */
    3045             : static inline void membarrier_switch_mm(struct rq *rq,
    3046             :                                         struct mm_struct *prev_mm,
    3047             :                                         struct mm_struct *next_mm)
    3048             : {
    3049             :         int membarrier_state;
    3050             : 
    3051           0 :         if (prev_mm == next_mm)
    3052             :                 return;
    3053             : 
    3054           0 :         membarrier_state = atomic_read(&next_mm->membarrier_state);
    3055           0 :         if (READ_ONCE(rq->membarrier_state) == membarrier_state)
    3056             :                 return;
    3057             : 
    3058           0 :         WRITE_ONCE(rq->membarrier_state, membarrier_state);
    3059             : }
    3060             : #else
    3061             : static inline void membarrier_switch_mm(struct rq *rq,
    3062             :                                         struct mm_struct *prev_mm,
    3063             :                                         struct mm_struct *next_mm)
    3064             : {
    3065             : }
    3066             : #endif
    3067             : 
    3068             : #ifdef CONFIG_SMP
    3069             : static inline bool is_per_cpu_kthread(struct task_struct *p)
    3070             : {
    3071             :         if (!(p->flags & PF_KTHREAD))
    3072             :                 return false;
    3073             : 
    3074             :         if (p->nr_cpus_allowed != 1)
    3075             :                 return false;
    3076             : 
    3077             :         return true;
    3078             : }
    3079             : #endif
    3080             : 
    3081             : extern void swake_up_all_locked(struct swait_queue_head *q);
    3082             : extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
    3083             : 
    3084             : #ifdef CONFIG_PREEMPT_DYNAMIC
    3085             : extern int preempt_dynamic_mode;
    3086             : extern int sched_dynamic_mode(const char *str);
    3087             : extern void sched_dynamic_update(int mode);
    3088             : #endif
    3089             : 
    3090             : #endif /* _KERNEL_SCHED_SCHED_H */

Generated by: LCOV version 1.14