LCOV - code coverage report
Current view: top level - kernel/time - clocksource.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 94 250 37.6 %
Date: 2022-12-09 01:23:36 Functions: 7 27 25.9 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0+
       2             : /*
       3             :  * This file contains the functions which manage clocksource drivers.
       4             :  *
       5             :  * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
       6             :  */
       7             : 
       8             : #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
       9             : 
      10             : #include <linux/device.h>
      11             : #include <linux/clocksource.h>
      12             : #include <linux/init.h>
      13             : #include <linux/module.h>
      14             : #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
      15             : #include <linux/tick.h>
      16             : #include <linux/kthread.h>
      17             : #include <linux/prandom.h>
      18             : #include <linux/cpu.h>
      19             : 
      20             : #include "tick-internal.h"
      21             : #include "timekeeping_internal.h"
      22             : 
      23             : /**
      24             :  * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
      25             :  * @mult:       pointer to mult variable
      26             :  * @shift:      pointer to shift variable
      27             :  * @from:       frequency to convert from
      28             :  * @to:         frequency to convert to
      29             :  * @maxsec:     guaranteed runtime conversion range in seconds
      30             :  *
      31             :  * The function evaluates the shift/mult pair for the scaled math
      32             :  * operations of clocksources and clockevents.
      33             :  *
      34             :  * @to and @from are frequency values in HZ. For clock sources @to is
      35             :  * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
      36             :  * event @to is the counter frequency and @from is NSEC_PER_SEC.
      37             :  *
      38             :  * The @maxsec conversion range argument controls the time frame in
      39             :  * seconds which must be covered by the runtime conversion with the
      40             :  * calculated mult and shift factors. This guarantees that no 64bit
      41             :  * overflow happens when the input value of the conversion is
      42             :  * multiplied with the calculated mult factor. Larger ranges may
      43             :  * reduce the conversion accuracy by choosing smaller mult and shift
      44             :  * factors.
      45             :  */
      46             : void
      47           0 : clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
      48             : {
      49             :         u64 tmp;
      50           1 :         u32 sft, sftacc= 32;
      51             : 
      52             :         /*
      53             :          * Calculate the shift factor which is limiting the conversion
      54             :          * range:
      55             :          */
      56           1 :         tmp = ((u64)maxsec * from) >> 32;
      57           1 :         while (tmp) {
      58           0 :                 tmp >>=1;
      59           0 :                 sftacc--;
      60             :         }
      61             : 
      62             :         /*
      63             :          * Find the conversion shift/mult pair which has the best
      64             :          * accuracy and fits the maxsec conversion range:
      65             :          */
      66           9 :         for (sft = 32; sft > 0; sft--) {
      67          10 :                 tmp = (u64) to << sft;
      68          10 :                 tmp += from / 2;
      69          10 :                 do_div(tmp, from);
      70          10 :                 if ((tmp >> sftacc) == 0)
      71             :                         break;
      72             :         }
      73           1 :         *mult = tmp;
      74           1 :         *shift = sft;
      75           0 : }
      76             : EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
      77             : 
      78             : /*[Clocksource internal variables]---------
      79             :  * curr_clocksource:
      80             :  *      currently selected clocksource.
      81             :  * suspend_clocksource:
      82             :  *      used to calculate the suspend time.
      83             :  * clocksource_list:
      84             :  *      linked list with the registered clocksources
      85             :  * clocksource_mutex:
      86             :  *      protects manipulations to curr_clocksource and the clocksource_list
      87             :  * override_name:
      88             :  *      Name of the user-specified clocksource.
      89             :  */
      90             : static struct clocksource *curr_clocksource;
      91             : static struct clocksource *suspend_clocksource;
      92             : static LIST_HEAD(clocksource_list);
      93             : static DEFINE_MUTEX(clocksource_mutex);
      94             : static char override_name[CS_NAME_LEN];
      95             : static int finished_booting;
      96             : static u64 suspend_start;
      97             : 
      98             : /*
      99             :  * Threshold: 0.0312s, when doubled: 0.0625s.
     100             :  * Also a default for cs->uncertainty_margin when registering clocks.
     101             :  */
     102             : #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5)
     103             : 
     104             : /*
     105             :  * Maximum permissible delay between two readouts of the watchdog
     106             :  * clocksource surrounding a read of the clocksource being validated.
     107             :  * This delay could be due to SMIs, NMIs, or to VCPU preemptions.  Used as
     108             :  * a lower bound for cs->uncertainty_margin values when registering clocks.
     109             :  */
     110             : #ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
     111             : #define MAX_SKEW_USEC   CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
     112             : #else
     113             : #define MAX_SKEW_USEC   100
     114             : #endif
     115             : 
     116             : #define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
     117             : 
     118             : #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
     119             : static void clocksource_watchdog_work(struct work_struct *work);
     120             : static void clocksource_select(void);
     121             : 
     122             : static LIST_HEAD(watchdog_list);
     123             : static struct clocksource *watchdog;
     124             : static struct timer_list watchdog_timer;
     125             : static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
     126             : static DEFINE_SPINLOCK(watchdog_lock);
     127             : static int watchdog_running;
     128             : static atomic_t watchdog_reset_pending;
     129             : 
     130             : static inline void clocksource_watchdog_lock(unsigned long *flags)
     131             : {
     132             :         spin_lock_irqsave(&watchdog_lock, *flags);
     133             : }
     134             : 
     135             : static inline void clocksource_watchdog_unlock(unsigned long *flags)
     136             : {
     137             :         spin_unlock_irqrestore(&watchdog_lock, *flags);
     138             : }
     139             : 
     140             : static int clocksource_watchdog_kthread(void *data);
     141             : static void __clocksource_change_rating(struct clocksource *cs, int rating);
     142             : 
     143             : /*
     144             :  * Interval: 0.5sec.
     145             :  */
     146             : #define WATCHDOG_INTERVAL (HZ >> 1)
     147             : 
     148             : static void clocksource_watchdog_work(struct work_struct *work)
     149             : {
     150             :         /*
     151             :          * We cannot directly run clocksource_watchdog_kthread() here, because
     152             :          * clocksource_select() calls timekeeping_notify() which uses
     153             :          * stop_machine(). One cannot use stop_machine() from a workqueue() due
     154             :          * lock inversions wrt CPU hotplug.
     155             :          *
     156             :          * Also, we only ever run this work once or twice during the lifetime
     157             :          * of the kernel, so there is no point in creating a more permanent
     158             :          * kthread for this.
     159             :          *
     160             :          * If kthread_run fails the next watchdog scan over the
     161             :          * watchdog_list will find the unstable clock again.
     162             :          */
     163             :         kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
     164             : }
     165             : 
     166             : static void __clocksource_unstable(struct clocksource *cs)
     167             : {
     168             :         cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
     169             :         cs->flags |= CLOCK_SOURCE_UNSTABLE;
     170             : 
     171             :         /*
     172             :          * If the clocksource is registered clocksource_watchdog_kthread() will
     173             :          * re-rate and re-select.
     174             :          */
     175             :         if (list_empty(&cs->list)) {
     176             :                 cs->rating = 0;
     177             :                 return;
     178             :         }
     179             : 
     180             :         if (cs->mark_unstable)
     181             :                 cs->mark_unstable(cs);
     182             : 
     183             :         /* kick clocksource_watchdog_kthread() */
     184             :         if (finished_booting)
     185             :                 schedule_work(&watchdog_work);
     186             : }
     187             : 
     188             : /**
     189             :  * clocksource_mark_unstable - mark clocksource unstable via watchdog
     190             :  * @cs:         clocksource to be marked unstable
     191             :  *
     192             :  * This function is called by the x86 TSC code to mark clocksources as unstable;
     193             :  * it defers demotion and re-selection to a kthread.
     194             :  */
     195             : void clocksource_mark_unstable(struct clocksource *cs)
     196             : {
     197             :         unsigned long flags;
     198             : 
     199             :         spin_lock_irqsave(&watchdog_lock, flags);
     200             :         if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
     201             :                 if (!list_empty(&cs->list) && list_empty(&cs->wd_list))
     202             :                         list_add(&cs->wd_list, &watchdog_list);
     203             :                 __clocksource_unstable(cs);
     204             :         }
     205             :         spin_unlock_irqrestore(&watchdog_lock, flags);
     206             : }
     207             : 
     208             : ulong max_cswd_read_retries = 2;
     209             : module_param(max_cswd_read_retries, ulong, 0644);
     210             : EXPORT_SYMBOL_GPL(max_cswd_read_retries);
     211             : static int verify_n_cpus = 8;
     212             : module_param(verify_n_cpus, int, 0644);
     213             : 
     214             : enum wd_read_status {
     215             :         WD_READ_SUCCESS,
     216             :         WD_READ_UNSTABLE,
     217             :         WD_READ_SKIP
     218             : };
     219             : 
     220             : static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
     221             : {
     222             :         unsigned int nretries;
     223             :         u64 wd_end, wd_end2, wd_delta;
     224             :         int64_t wd_delay, wd_seq_delay;
     225             : 
     226             :         for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
     227             :                 local_irq_disable();
     228             :                 *wdnow = watchdog->read(watchdog);
     229             :                 *csnow = cs->read(cs);
     230             :                 wd_end = watchdog->read(watchdog);
     231             :                 wd_end2 = watchdog->read(watchdog);
     232             :                 local_irq_enable();
     233             : 
     234             :                 wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
     235             :                 wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
     236             :                                               watchdog->shift);
     237             :                 if (wd_delay <= WATCHDOG_MAX_SKEW) {
     238             :                         if (nretries > 1 || nretries >= max_cswd_read_retries) {
     239             :                                 pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
     240             :                                         smp_processor_id(), watchdog->name, nretries);
     241             :                         }
     242             :                         return WD_READ_SUCCESS;
     243             :                 }
     244             : 
     245             :                 /*
     246             :                  * Now compute delay in consecutive watchdog read to see if
     247             :                  * there is too much external interferences that cause
     248             :                  * significant delay in reading both clocksource and watchdog.
     249             :                  *
     250             :                  * If consecutive WD read-back delay > WATCHDOG_MAX_SKEW/2,
     251             :                  * report system busy, reinit the watchdog and skip the current
     252             :                  * watchdog test.
     253             :                  */
     254             :                 wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask);
     255             :                 wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift);
     256             :                 if (wd_seq_delay > WATCHDOG_MAX_SKEW/2)
     257             :                         goto skip_test;
     258             :         }
     259             : 
     260             :         pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
     261             :                 smp_processor_id(), watchdog->name, wd_delay, nretries);
     262             :         return WD_READ_UNSTABLE;
     263             : 
     264             : skip_test:
     265             :         pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n",
     266             :                 smp_processor_id(), watchdog->name, wd_seq_delay);
     267             :         pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n",
     268             :                 cs->name, wd_delay);
     269             :         return WD_READ_SKIP;
     270             : }
     271             : 
     272             : static u64 csnow_mid;
     273             : static cpumask_t cpus_ahead;
     274             : static cpumask_t cpus_behind;
     275             : static cpumask_t cpus_chosen;
     276             : 
     277             : static void clocksource_verify_choose_cpus(void)
     278             : {
     279             :         int cpu, i, n = verify_n_cpus;
     280             : 
     281             :         if (n < 0) {
     282             :                 /* Check all of the CPUs. */
     283             :                 cpumask_copy(&cpus_chosen, cpu_online_mask);
     284             :                 cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
     285             :                 return;
     286             :         }
     287             : 
     288             :         /* If no checking desired, or no other CPU to check, leave. */
     289             :         cpumask_clear(&cpus_chosen);
     290             :         if (n == 0 || num_online_cpus() <= 1)
     291             :                 return;
     292             : 
     293             :         /* Make sure to select at least one CPU other than the current CPU. */
     294             :         cpu = cpumask_first(cpu_online_mask);
     295             :         if (cpu == smp_processor_id())
     296             :                 cpu = cpumask_next(cpu, cpu_online_mask);
     297             :         if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
     298             :                 return;
     299             :         cpumask_set_cpu(cpu, &cpus_chosen);
     300             : 
     301             :         /* Force a sane value for the boot parameter. */
     302             :         if (n > nr_cpu_ids)
     303             :                 n = nr_cpu_ids;
     304             : 
     305             :         /*
     306             :          * Randomly select the specified number of CPUs.  If the same
     307             :          * CPU is selected multiple times, that CPU is checked only once,
     308             :          * and no replacement CPU is selected.  This gracefully handles
     309             :          * situations where verify_n_cpus is greater than the number of
     310             :          * CPUs that are currently online.
     311             :          */
     312             :         for (i = 1; i < n; i++) {
     313             :                 cpu = prandom_u32() % nr_cpu_ids;
     314             :                 cpu = cpumask_next(cpu - 1, cpu_online_mask);
     315             :                 if (cpu >= nr_cpu_ids)
     316             :                         cpu = cpumask_first(cpu_online_mask);
     317             :                 if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
     318             :                         cpumask_set_cpu(cpu, &cpus_chosen);
     319             :         }
     320             : 
     321             :         /* Don't verify ourselves. */
     322             :         cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
     323             : }
     324             : 
     325             : static void clocksource_verify_one_cpu(void *csin)
     326             : {
     327             :         struct clocksource *cs = (struct clocksource *)csin;
     328             : 
     329             :         csnow_mid = cs->read(cs);
     330             : }
     331             : 
     332             : void clocksource_verify_percpu(struct clocksource *cs)
     333             : {
     334             :         int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
     335             :         u64 csnow_begin, csnow_end;
     336             :         int cpu, testcpu;
     337             :         s64 delta;
     338             : 
     339             :         if (verify_n_cpus == 0)
     340             :                 return;
     341             :         cpumask_clear(&cpus_ahead);
     342             :         cpumask_clear(&cpus_behind);
     343             :         cpus_read_lock();
     344             :         preempt_disable();
     345             :         clocksource_verify_choose_cpus();
     346             :         if (cpumask_weight(&cpus_chosen) == 0) {
     347             :                 preempt_enable();
     348             :                 cpus_read_unlock();
     349             :                 pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
     350             :                 return;
     351             :         }
     352             :         testcpu = smp_processor_id();
     353             :         pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
     354             :         for_each_cpu(cpu, &cpus_chosen) {
     355             :                 if (cpu == testcpu)
     356             :                         continue;
     357             :                 csnow_begin = cs->read(cs);
     358             :                 smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1);
     359             :                 csnow_end = cs->read(cs);
     360             :                 delta = (s64)((csnow_mid - csnow_begin) & cs->mask);
     361             :                 if (delta < 0)
     362             :                         cpumask_set_cpu(cpu, &cpus_behind);
     363             :                 delta = (csnow_end - csnow_mid) & cs->mask;
     364             :                 if (delta < 0)
     365             :                         cpumask_set_cpu(cpu, &cpus_ahead);
     366             :                 delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
     367             :                 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
     368             :                 if (cs_nsec > cs_nsec_max)
     369             :                         cs_nsec_max = cs_nsec;
     370             :                 if (cs_nsec < cs_nsec_min)
     371             :                         cs_nsec_min = cs_nsec;
     372             :         }
     373             :         preempt_enable();
     374             :         cpus_read_unlock();
     375             :         if (!cpumask_empty(&cpus_ahead))
     376             :                 pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
     377             :                         cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
     378             :         if (!cpumask_empty(&cpus_behind))
     379             :                 pr_warn("        CPUs %*pbl behind CPU %d for clocksource %s.\n",
     380             :                         cpumask_pr_args(&cpus_behind), testcpu, cs->name);
     381             :         if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind))
     382             :                 pr_warn("        CPU %d check durations %lldns - %lldns for clocksource %s.\n",
     383             :                         testcpu, cs_nsec_min, cs_nsec_max, cs->name);
     384             : }
     385             : EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
     386             : 
     387             : static void clocksource_watchdog(struct timer_list *unused)
     388             : {
     389             :         u64 csnow, wdnow, cslast, wdlast, delta;
     390             :         int next_cpu, reset_pending;
     391             :         int64_t wd_nsec, cs_nsec;
     392             :         struct clocksource *cs;
     393             :         enum wd_read_status read_ret;
     394             :         u32 md;
     395             : 
     396             :         spin_lock(&watchdog_lock);
     397             :         if (!watchdog_running)
     398             :                 goto out;
     399             : 
     400             :         reset_pending = atomic_read(&watchdog_reset_pending);
     401             : 
     402             :         list_for_each_entry(cs, &watchdog_list, wd_list) {
     403             : 
     404             :                 /* Clocksource already marked unstable? */
     405             :                 if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
     406             :                         if (finished_booting)
     407             :                                 schedule_work(&watchdog_work);
     408             :                         continue;
     409             :                 }
     410             : 
     411             :                 read_ret = cs_watchdog_read(cs, &csnow, &wdnow);
     412             : 
     413             :                 if (read_ret != WD_READ_SUCCESS) {
     414             :                         if (read_ret == WD_READ_UNSTABLE)
     415             :                                 /* Clock readout unreliable, so give it up. */
     416             :                                 __clocksource_unstable(cs);
     417             :                         continue;
     418             :                 }
     419             : 
     420             :                 /* Clocksource initialized ? */
     421             :                 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
     422             :                     atomic_read(&watchdog_reset_pending)) {
     423             :                         cs->flags |= CLOCK_SOURCE_WATCHDOG;
     424             :                         cs->wd_last = wdnow;
     425             :                         cs->cs_last = csnow;
     426             :                         continue;
     427             :                 }
     428             : 
     429             :                 delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
     430             :                 wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
     431             :                                              watchdog->shift);
     432             : 
     433             :                 delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
     434             :                 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
     435             :                 wdlast = cs->wd_last; /* save these in case we print them */
     436             :                 cslast = cs->cs_last;
     437             :                 cs->cs_last = csnow;
     438             :                 cs->wd_last = wdnow;
     439             : 
     440             :                 if (atomic_read(&watchdog_reset_pending))
     441             :                         continue;
     442             : 
     443             :                 /* Check the deviation from the watchdog clocksource. */
     444             :                 md = cs->uncertainty_margin + watchdog->uncertainty_margin;
     445             :                 if (abs(cs_nsec - wd_nsec) > md) {
     446             :                         pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
     447             :                                 smp_processor_id(), cs->name);
     448             :                         pr_warn("                      '%s' wd_nsec: %lld wd_now: %llx wd_last: %llx mask: %llx\n",
     449             :                                 watchdog->name, wd_nsec, wdnow, wdlast, watchdog->mask);
     450             :                         pr_warn("                      '%s' cs_nsec: %lld cs_now: %llx cs_last: %llx mask: %llx\n",
     451             :                                 cs->name, cs_nsec, csnow, cslast, cs->mask);
     452             :                         if (curr_clocksource == cs)
     453             :                                 pr_warn("                      '%s' is current clocksource.\n", cs->name);
     454             :                         else if (curr_clocksource)
     455             :                                 pr_warn("                      '%s' (not '%s') is current clocksource.\n", curr_clocksource->name, cs->name);
     456             :                         else
     457             :                                 pr_warn("                      No current clocksource.\n");
     458             :                         __clocksource_unstable(cs);
     459             :                         continue;
     460             :                 }
     461             : 
     462             :                 if (cs == curr_clocksource && cs->tick_stable)
     463             :                         cs->tick_stable(cs);
     464             : 
     465             :                 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
     466             :                     (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
     467             :                     (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
     468             :                         /* Mark it valid for high-res. */
     469             :                         cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
     470             : 
     471             :                         /*
     472             :                          * clocksource_done_booting() will sort it if
     473             :                          * finished_booting is not set yet.
     474             :                          */
     475             :                         if (!finished_booting)
     476             :                                 continue;
     477             : 
     478             :                         /*
     479             :                          * If this is not the current clocksource let
     480             :                          * the watchdog thread reselect it. Due to the
     481             :                          * change to high res this clocksource might
     482             :                          * be preferred now. If it is the current
     483             :                          * clocksource let the tick code know about
     484             :                          * that change.
     485             :                          */
     486             :                         if (cs != curr_clocksource) {
     487             :                                 cs->flags |= CLOCK_SOURCE_RESELECT;
     488             :                                 schedule_work(&watchdog_work);
     489             :                         } else {
     490             :                                 tick_clock_notify();
     491             :                         }
     492             :                 }
     493             :         }
     494             : 
     495             :         /*
     496             :          * We only clear the watchdog_reset_pending, when we did a
     497             :          * full cycle through all clocksources.
     498             :          */
     499             :         if (reset_pending)
     500             :                 atomic_dec(&watchdog_reset_pending);
     501             : 
     502             :         /*
     503             :          * Cycle through CPUs to check if the CPUs stay synchronized
     504             :          * to each other.
     505             :          */
     506             :         next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
     507             :         if (next_cpu >= nr_cpu_ids)
     508             :                 next_cpu = cpumask_first(cpu_online_mask);
     509             : 
     510             :         /*
     511             :          * Arm timer if not already pending: could race with concurrent
     512             :          * pair clocksource_stop_watchdog() clocksource_start_watchdog().
     513             :          */
     514             :         if (!timer_pending(&watchdog_timer)) {
     515             :                 watchdog_timer.expires += WATCHDOG_INTERVAL;
     516             :                 add_timer_on(&watchdog_timer, next_cpu);
     517             :         }
     518             : out:
     519             :         spin_unlock(&watchdog_lock);
     520             : }
     521             : 
     522             : static inline void clocksource_start_watchdog(void)
     523             : {
     524             :         if (watchdog_running || !watchdog || list_empty(&watchdog_list))
     525             :                 return;
     526             :         timer_setup(&watchdog_timer, clocksource_watchdog, 0);
     527             :         watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
     528             :         add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
     529             :         watchdog_running = 1;
     530             : }
     531             : 
     532             : static inline void clocksource_stop_watchdog(void)
     533             : {
     534             :         if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
     535             :                 return;
     536             :         del_timer(&watchdog_timer);
     537             :         watchdog_running = 0;
     538             : }
     539             : 
     540             : static inline void clocksource_reset_watchdog(void)
     541             : {
     542             :         struct clocksource *cs;
     543             : 
     544             :         list_for_each_entry(cs, &watchdog_list, wd_list)
     545             :                 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
     546             : }
     547             : 
     548             : static void clocksource_resume_watchdog(void)
     549             : {
     550             :         atomic_inc(&watchdog_reset_pending);
     551             : }
     552             : 
     553             : static void clocksource_enqueue_watchdog(struct clocksource *cs)
     554             : {
     555             :         INIT_LIST_HEAD(&cs->wd_list);
     556             : 
     557             :         if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
     558             :                 /* cs is a clocksource to be watched. */
     559             :                 list_add(&cs->wd_list, &watchdog_list);
     560             :                 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
     561             :         } else {
     562             :                 /* cs is a watchdog. */
     563             :                 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
     564             :                         cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
     565             :         }
     566             : }
     567             : 
     568             : static void clocksource_select_watchdog(bool fallback)
     569             : {
     570             :         struct clocksource *cs, *old_wd;
     571             :         unsigned long flags;
     572             : 
     573             :         spin_lock_irqsave(&watchdog_lock, flags);
     574             :         /* save current watchdog */
     575             :         old_wd = watchdog;
     576             :         if (fallback)
     577             :                 watchdog = NULL;
     578             : 
     579             :         list_for_each_entry(cs, &clocksource_list, list) {
     580             :                 /* cs is a clocksource to be watched. */
     581             :                 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
     582             :                         continue;
     583             : 
     584             :                 /* Skip current if we were requested for a fallback. */
     585             :                 if (fallback && cs == old_wd)
     586             :                         continue;
     587             : 
     588             :                 /* Pick the best watchdog. */
     589             :                 if (!watchdog || cs->rating > watchdog->rating)
     590             :                         watchdog = cs;
     591             :         }
     592             :         /* If we failed to find a fallback restore the old one. */
     593             :         if (!watchdog)
     594             :                 watchdog = old_wd;
     595             : 
     596             :         /* If we changed the watchdog we need to reset cycles. */
     597             :         if (watchdog != old_wd)
     598             :                 clocksource_reset_watchdog();
     599             : 
     600             :         /* Check if the watchdog timer needs to be started. */
     601             :         clocksource_start_watchdog();
     602             :         spin_unlock_irqrestore(&watchdog_lock, flags);
     603             : }
     604             : 
     605             : static void clocksource_dequeue_watchdog(struct clocksource *cs)
     606             : {
     607             :         if (cs != watchdog) {
     608             :                 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
     609             :                         /* cs is a watched clocksource. */
     610             :                         list_del_init(&cs->wd_list);
     611             :                         /* Check if the watchdog timer needs to be stopped. */
     612             :                         clocksource_stop_watchdog();
     613             :                 }
     614             :         }
     615             : }
     616             : 
     617             : static int __clocksource_watchdog_kthread(void)
     618             : {
     619             :         struct clocksource *cs, *tmp;
     620             :         unsigned long flags;
     621             :         int select = 0;
     622             : 
     623             :         /* Do any required per-CPU skew verification. */
     624             :         if (curr_clocksource &&
     625             :             curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE &&
     626             :             curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU)
     627             :                 clocksource_verify_percpu(curr_clocksource);
     628             : 
     629             :         spin_lock_irqsave(&watchdog_lock, flags);
     630             :         list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
     631             :                 if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
     632             :                         list_del_init(&cs->wd_list);
     633             :                         __clocksource_change_rating(cs, 0);
     634             :                         select = 1;
     635             :                 }
     636             :                 if (cs->flags & CLOCK_SOURCE_RESELECT) {
     637             :                         cs->flags &= ~CLOCK_SOURCE_RESELECT;
     638             :                         select = 1;
     639             :                 }
     640             :         }
     641             :         /* Check if the watchdog timer needs to be stopped. */
     642             :         clocksource_stop_watchdog();
     643             :         spin_unlock_irqrestore(&watchdog_lock, flags);
     644             : 
     645             :         return select;
     646             : }
     647             : 
     648             : static int clocksource_watchdog_kthread(void *data)
     649             : {
     650             :         mutex_lock(&clocksource_mutex);
     651             :         if (__clocksource_watchdog_kthread())
     652             :                 clocksource_select();
     653             :         mutex_unlock(&clocksource_mutex);
     654             :         return 0;
     655             : }
     656             : 
     657             : static bool clocksource_is_watchdog(struct clocksource *cs)
     658             : {
     659             :         return cs == watchdog;
     660             : }
     661             : 
     662             : #else /* CONFIG_CLOCKSOURCE_WATCHDOG */
     663             : 
     664             : static void clocksource_enqueue_watchdog(struct clocksource *cs)
     665             : {
     666           2 :         if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
     667           1 :                 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
     668             : }
     669             : 
     670             : static void clocksource_select_watchdog(bool fallback) { }
     671             : static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
     672             : static inline void clocksource_resume_watchdog(void) { }
     673             : static inline int __clocksource_watchdog_kthread(void) { return 0; }
     674             : static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
     675           0 : void clocksource_mark_unstable(struct clocksource *cs) { }
     676             : 
     677             : static inline void clocksource_watchdog_lock(unsigned long *flags) { }
     678             : static inline void clocksource_watchdog_unlock(unsigned long *flags) { }
     679             : 
     680             : #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
     681             : 
     682             : static bool clocksource_is_suspend(struct clocksource *cs)
     683             : {
     684           0 :         return cs == suspend_clocksource;
     685             : }
     686             : 
     687           2 : static void __clocksource_suspend_select(struct clocksource *cs)
     688             : {
     689             :         /*
     690             :          * Skip the clocksource which will be stopped in suspend state.
     691             :          */
     692           2 :         if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP))
     693             :                 return;
     694             : 
     695             :         /*
     696             :          * The nonstop clocksource can be selected as the suspend clocksource to
     697             :          * calculate the suspend time, so it should not supply suspend/resume
     698             :          * interfaces to suspend the nonstop clocksource when system suspends.
     699             :          */
     700           0 :         if (cs->suspend || cs->resume) {
     701           0 :                 pr_warn("Nonstop clocksource %s should not supply suspend/resume interfaces\n",
     702             :                         cs->name);
     703             :         }
     704             : 
     705             :         /* Pick the best rating. */
     706           0 :         if (!suspend_clocksource || cs->rating > suspend_clocksource->rating)
     707           0 :                 suspend_clocksource = cs;
     708             : }
     709             : 
     710             : /**
     711             :  * clocksource_suspend_select - Select the best clocksource for suspend timing
     712             :  * @fallback:   if select a fallback clocksource
     713             :  */
     714           0 : static void clocksource_suspend_select(bool fallback)
     715             : {
     716             :         struct clocksource *cs, *old_suspend;
     717             : 
     718           0 :         old_suspend = suspend_clocksource;
     719           0 :         if (fallback)
     720           0 :                 suspend_clocksource = NULL;
     721             : 
     722           0 :         list_for_each_entry(cs, &clocksource_list, list) {
     723             :                 /* Skip current if we were requested for a fallback. */
     724           0 :                 if (fallback && cs == old_suspend)
     725           0 :                         continue;
     726             : 
     727           0 :                 __clocksource_suspend_select(cs);
     728             :         }
     729           0 : }
     730             : 
     731             : /**
     732             :  * clocksource_start_suspend_timing - Start measuring the suspend timing
     733             :  * @cs:                 current clocksource from timekeeping
     734             :  * @start_cycles:       current cycles from timekeeping
     735             :  *
     736             :  * This function will save the start cycle values of suspend timer to calculate
     737             :  * the suspend time when resuming system.
     738             :  *
     739             :  * This function is called late in the suspend process from timekeeping_suspend(),
     740             :  * that means processes are frozen, non-boot cpus and interrupts are disabled
     741             :  * now. It is therefore possible to start the suspend timer without taking the
     742             :  * clocksource mutex.
     743             :  */
     744           0 : void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
     745             : {
     746           0 :         if (!suspend_clocksource)
     747             :                 return;
     748             : 
     749             :         /*
     750             :          * If current clocksource is the suspend timer, we should use the
     751             :          * tkr_mono.cycle_last value as suspend_start to avoid same reading
     752             :          * from suspend timer.
     753             :          */
     754           0 :         if (clocksource_is_suspend(cs)) {
     755           0 :                 suspend_start = start_cycles;
     756           0 :                 return;
     757             :         }
     758             : 
     759           0 :         if (suspend_clocksource->enable &&
     760           0 :             suspend_clocksource->enable(suspend_clocksource)) {
     761           0 :                 pr_warn_once("Failed to enable the non-suspend-able clocksource.\n");
     762             :                 return;
     763             :         }
     764             : 
     765           0 :         suspend_start = suspend_clocksource->read(suspend_clocksource);
     766             : }
     767             : 
     768             : /**
     769             :  * clocksource_stop_suspend_timing - Stop measuring the suspend timing
     770             :  * @cs:         current clocksource from timekeeping
     771             :  * @cycle_now:  current cycles from timekeeping
     772             :  *
     773             :  * This function will calculate the suspend time from suspend timer.
     774             :  *
     775             :  * Returns nanoseconds since suspend started, 0 if no usable suspend clocksource.
     776             :  *
     777             :  * This function is called early in the resume process from timekeeping_resume(),
     778             :  * that means there is only one cpu, no processes are running and the interrupts
     779             :  * are disabled. It is therefore possible to stop the suspend timer without
     780             :  * taking the clocksource mutex.
     781             :  */
     782           0 : u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
     783             : {
     784           0 :         u64 now, delta, nsec = 0;
     785             : 
     786           0 :         if (!suspend_clocksource)
     787             :                 return 0;
     788             : 
     789             :         /*
     790             :          * If current clocksource is the suspend timer, we should use the
     791             :          * tkr_mono.cycle_last value from timekeeping as current cycle to
     792             :          * avoid same reading from suspend timer.
     793             :          */
     794           0 :         if (clocksource_is_suspend(cs))
     795             :                 now = cycle_now;
     796             :         else
     797           0 :                 now = suspend_clocksource->read(suspend_clocksource);
     798             : 
     799           0 :         if (now > suspend_start) {
     800           0 :                 delta = clocksource_delta(now, suspend_start,
     801           0 :                                           suspend_clocksource->mask);
     802           0 :                 nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult,
     803             :                                        suspend_clocksource->shift);
     804             :         }
     805             : 
     806             :         /*
     807             :          * Disable the suspend timer to save power if current clocksource is
     808             :          * not the suspend timer.
     809             :          */
     810           0 :         if (!clocksource_is_suspend(cs) && suspend_clocksource->disable)
     811           0 :                 suspend_clocksource->disable(suspend_clocksource);
     812             : 
     813             :         return nsec;
     814             : }
     815             : 
     816             : /**
     817             :  * clocksource_suspend - suspend the clocksource(s)
     818             :  */
     819           0 : void clocksource_suspend(void)
     820             : {
     821             :         struct clocksource *cs;
     822             : 
     823           0 :         list_for_each_entry_reverse(cs, &clocksource_list, list)
     824           0 :                 if (cs->suspend)
     825           0 :                         cs->suspend(cs);
     826           0 : }
     827             : 
     828             : /**
     829             :  * clocksource_resume - resume the clocksource(s)
     830             :  */
     831           0 : void clocksource_resume(void)
     832             : {
     833             :         struct clocksource *cs;
     834             : 
     835           0 :         list_for_each_entry(cs, &clocksource_list, list)
     836           0 :                 if (cs->resume)
     837           0 :                         cs->resume(cs);
     838             : 
     839             :         clocksource_resume_watchdog();
     840           0 : }
     841             : 
     842             : /**
     843             :  * clocksource_touch_watchdog - Update watchdog
     844             :  *
     845             :  * Update the watchdog after exception contexts such as kgdb so as not
     846             :  * to incorrectly trip the watchdog. This might fail when the kernel
     847             :  * was stopped in code which holds watchdog_lock.
     848             :  */
     849           0 : void clocksource_touch_watchdog(void)
     850             : {
     851             :         clocksource_resume_watchdog();
     852           0 : }
     853             : 
     854             : /**
     855             :  * clocksource_max_adjustment- Returns max adjustment amount
     856             :  * @cs:         Pointer to clocksource
     857             :  *
     858             :  */
     859             : static u32 clocksource_max_adjustment(struct clocksource *cs)
     860             : {
     861             :         u64 ret;
     862             :         /*
     863             :          * We won't try to correct for more than 11% adjustments (110,000 ppm),
     864             :          */
     865           2 :         ret = (u64)cs->mult * 11;
     866           2 :         do_div(ret,100);
     867           2 :         return (u32)ret;
     868             : }
     869             : 
     870             : /**
     871             :  * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
     872             :  * @mult:       cycle to nanosecond multiplier
     873             :  * @shift:      cycle to nanosecond divisor (power of two)
     874             :  * @maxadj:     maximum adjustment value to mult (~11%)
     875             :  * @mask:       bitmask for two's complement subtraction of non 64 bit counters
     876             :  * @max_cyc:    maximum cycle value before potential overflow (does not include
     877             :  *              any safety margin)
     878             :  *
     879             :  * NOTE: This function includes a safety margin of 50%, in other words, we
     880             :  * return half the number of nanoseconds the hardware counter can technically
     881             :  * cover. This is done so that we can potentially detect problems caused by
     882             :  * delayed timers or bad hardware, which might result in time intervals that
     883             :  * are larger than what the math used can handle without overflows.
     884             :  */
     885           0 : u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
     886             : {
     887             :         u64 max_nsecs, max_cycles;
     888             : 
     889             :         /*
     890             :          * Calculate the maximum number of cycles that we can pass to the
     891             :          * cyc2ns() function without overflowing a 64-bit result.
     892             :          */
     893           2 :         max_cycles = ULLONG_MAX;
     894           2 :         do_div(max_cycles, mult+maxadj);
     895             : 
     896             :         /*
     897             :          * The actual maximum number of cycles we can defer the clocksource is
     898             :          * determined by the minimum of max_cycles and mask.
     899             :          * Note: Here we subtract the maxadj to make sure we don't sleep for
     900             :          * too long if there's a large negative adjustment.
     901             :          */
     902           2 :         max_cycles = min(max_cycles, mask);
     903           4 :         max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
     904             : 
     905             :         /* return the max_cycles value as well if requested */
     906           2 :         if (max_cyc)
     907           2 :                 *max_cyc = max_cycles;
     908             : 
     909             :         /* Return 50% of the actual maximum, so we can detect bad values */
     910           2 :         max_nsecs >>= 1;
     911             : 
     912           0 :         return max_nsecs;
     913             : }
     914             : 
     915             : /**
     916             :  * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
     917             :  * @cs:         Pointer to clocksource to be updated
     918             :  *
     919             :  */
     920             : static inline void clocksource_update_max_deferment(struct clocksource *cs)
     921             : {
     922           4 :         cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
     923             :                                                 cs->maxadj, cs->mask,
     924             :                                                 &cs->max_cycles);
     925             : }
     926             : 
     927           3 : static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
     928             : {
     929             :         struct clocksource *cs;
     930             : 
     931           4 :         if (!finished_booting || list_empty(&clocksource_list))
     932             :                 return NULL;
     933             : 
     934             :         /*
     935             :          * We pick the clocksource with the highest rating. If oneshot
     936             :          * mode is active, we pick the highres valid clocksource with
     937             :          * the best rating.
     938             :          */
     939           1 :         list_for_each_entry(cs, &clocksource_list, list) {
     940           1 :                 if (skipcur && cs == curr_clocksource)
     941           0 :                         continue;
     942           1 :                 if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
     943           0 :                         continue;
     944             :                 return cs;
     945             :         }
     946             :         return NULL;
     947             : }
     948             : 
     949           3 : static void __clocksource_select(bool skipcur)
     950             : {
     951           3 :         bool oneshot = tick_oneshot_mode_active();
     952             :         struct clocksource *best, *cs;
     953             : 
     954             :         /* Find the best suitable clocksource */
     955           3 :         best = clocksource_find_best(oneshot, skipcur);
     956           3 :         if (!best)
     957             :                 return;
     958             : 
     959           1 :         if (!strlen(override_name))
     960             :                 goto found;
     961             : 
     962             :         /* Check for the override clocksource. */
     963           0 :         list_for_each_entry(cs, &clocksource_list, list) {
     964           0 :                 if (skipcur && cs == curr_clocksource)
     965           0 :                         continue;
     966           0 :                 if (strcmp(cs->name, override_name) != 0)
     967           0 :                         continue;
     968             :                 /*
     969             :                  * Check to make sure we don't switch to a non-highres
     970             :                  * capable clocksource if the tick code is in oneshot
     971             :                  * mode (highres or nohz)
     972             :                  */
     973             :                 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
     974             :                         /* Override clocksource cannot be used. */
     975             :                         if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
     976             :                                 pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
     977             :                                         cs->name);
     978             :                                 override_name[0] = 0;
     979             :                         } else {
     980             :                                 /*
     981             :                                  * The override cannot be currently verified.
     982             :                                  * Deferring to let the watchdog check.
     983             :                                  */
     984             :                                 pr_info("Override clocksource %s is not currently HRT compatible - deferring\n",
     985             :                                         cs->name);
     986             :                         }
     987             :                 } else
     988             :                         /* Override clocksource can be used. */
     989             :                         best = cs;
     990             :                 break;
     991             :         }
     992             : 
     993             : found:
     994           1 :         if (curr_clocksource != best && !timekeeping_notify(best)) {
     995           1 :                 pr_info("Switched to clocksource %s\n", best->name);
     996           1 :                 curr_clocksource = best;
     997             :         }
     998             : }
     999             : 
    1000             : /**
    1001             :  * clocksource_select - Select the best clocksource available
    1002             :  *
    1003             :  * Private function. Must hold clocksource_mutex when called.
    1004             :  *
    1005             :  * Select the clocksource with the best rating, or the clocksource,
    1006             :  * which is selected by userspace override.
    1007             :  */
    1008             : static void clocksource_select(void)
    1009             : {
    1010           3 :         __clocksource_select(false);
    1011             : }
    1012             : 
    1013             : static void clocksource_select_fallback(void)
    1014             : {
    1015           0 :         __clocksource_select(true);
    1016             : }
    1017             : 
    1018             : /*
    1019             :  * clocksource_done_booting - Called near the end of core bootup
    1020             :  *
    1021             :  * Hack to avoid lots of clocksource churn at boot time.
    1022             :  * We use fs_initcall because we want this to start before
    1023             :  * device_initcall but after subsys_initcall.
    1024             :  */
    1025           1 : static int __init clocksource_done_booting(void)
    1026             : {
    1027           1 :         mutex_lock(&clocksource_mutex);
    1028           1 :         curr_clocksource = clocksource_default_clock();
    1029           1 :         finished_booting = 1;
    1030             :         /*
    1031             :          * Run the watchdog first to eliminate unstable clock sources
    1032             :          */
    1033             :         __clocksource_watchdog_kthread();
    1034             :         clocksource_select();
    1035           1 :         mutex_unlock(&clocksource_mutex);
    1036           1 :         return 0;
    1037             : }
    1038             : fs_initcall(clocksource_done_booting);
    1039             : 
    1040             : /*
    1041             :  * Enqueue the clocksource sorted by rating
    1042             :  */
    1043             : static void clocksource_enqueue(struct clocksource *cs)
    1044             : {
    1045           2 :         struct list_head *entry = &clocksource_list;
    1046             :         struct clocksource *tmp;
    1047             : 
    1048           3 :         list_for_each_entry(tmp, &clocksource_list, list) {
    1049             :                 /* Keep track of the place, where to insert */
    1050           1 :                 if (tmp->rating < cs->rating)
    1051             :                         break;
    1052           1 :                 entry = &tmp->list;
    1053             :         }
    1054           4 :         list_add(&cs->list, entry);
    1055             : }
    1056             : 
    1057             : /**
    1058             :  * __clocksource_update_freq_scale - Used update clocksource with new freq
    1059             :  * @cs:         clocksource to be registered
    1060             :  * @scale:      Scale factor multiplied against freq to get clocksource hz
    1061             :  * @freq:       clocksource frequency (cycles per second) divided by scale
    1062             :  *
    1063             :  * This should only be called from the clocksource->enable() method.
    1064             :  *
    1065             :  * This *SHOULD NOT* be called directly! Please use the
    1066             :  * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
    1067             :  * functions.
    1068             :  */
    1069           2 : void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
    1070             : {
    1071             :         u64 sec;
    1072             : 
    1073             :         /*
    1074             :          * Default clocksources are *special* and self-define their mult/shift.
    1075             :          * But, you're not special, so you should specify a freq value.
    1076             :          */
    1077           2 :         if (freq) {
    1078             :                 /*
    1079             :                  * Calc the maximum number of seconds which we can run before
    1080             :                  * wrapping around. For clocksources which have a mask > 32-bit
    1081             :                  * we need to limit the max sleep time to have a good
    1082             :                  * conversion precision. 10 minutes is still a reasonable
    1083             :                  * amount. That results in a shift value of 24 for a
    1084             :                  * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
    1085             :                  * ~ 0.06ppm granularity for NTP.
    1086             :                  */
    1087           1 :                 sec = cs->mask;
    1088           1 :                 do_div(sec, freq);
    1089           1 :                 do_div(sec, scale);
    1090           1 :                 if (!sec)
    1091             :                         sec = 1;
    1092           1 :                 else if (sec > 600 && cs->mask > UINT_MAX)
    1093           1 :                         sec = 600;
    1094             : 
    1095           2 :                 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
    1096           1 :                                        NSEC_PER_SEC / scale, sec * scale);
    1097             :         }
    1098             : 
    1099             :         /*
    1100             :          * If the uncertainty margin is not specified, calculate it.
    1101             :          * If both scale and freq are non-zero, calculate the clock
    1102             :          * period, but bound below at 2*WATCHDOG_MAX_SKEW.  However,
    1103             :          * if either of scale or freq is zero, be very conservative and
    1104             :          * take the tens-of-milliseconds WATCHDOG_THRESHOLD value for the
    1105             :          * uncertainty margin.  Allow stupidly small uncertainty margins
    1106             :          * to be specified by the caller for testing purposes, but warn
    1107             :          * to discourage production use of this capability.
    1108             :          */
    1109           2 :         if (scale && freq && !cs->uncertainty_margin) {
    1110           1 :                 cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq);
    1111           1 :                 if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW)
    1112           1 :                         cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW;
    1113           1 :         } else if (!cs->uncertainty_margin) {
    1114           0 :                 cs->uncertainty_margin = WATCHDOG_THRESHOLD;
    1115             :         }
    1116           2 :         WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW);
    1117             : 
    1118             :         /*
    1119             :          * Ensure clocksources that have large 'mult' values don't overflow
    1120             :          * when adjusted.
    1121             :          */
    1122           4 :         cs->maxadj = clocksource_max_adjustment(cs);
    1123           4 :         while (freq && ((cs->mult + cs->maxadj < cs->mult)
    1124           1 :                 || (cs->mult - cs->maxadj > cs->mult))) {
    1125           0 :                 cs->mult >>= 1;
    1126           0 :                 cs->shift--;
    1127           0 :                 cs->maxadj = clocksource_max_adjustment(cs);
    1128             :         }
    1129             : 
    1130             :         /*
    1131             :          * Only warn for *special* clocksources that self-define
    1132             :          * their mult/shift values and don't specify a freq.
    1133             :          */
    1134           2 :         WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
    1135             :                 "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
    1136             :                 cs->name);
    1137             : 
    1138           2 :         clocksource_update_max_deferment(cs);
    1139             : 
    1140           2 :         pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
    1141             :                 cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
    1142           2 : }
    1143             : EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
    1144             : 
    1145             : /**
    1146             :  * __clocksource_register_scale - Used to install new clocksources
    1147             :  * @cs:         clocksource to be registered
    1148             :  * @scale:      Scale factor multiplied against freq to get clocksource hz
    1149             :  * @freq:       clocksource frequency (cycles per second) divided by scale
    1150             :  *
    1151             :  * Returns -EBUSY if registration fails, zero otherwise.
    1152             :  *
    1153             :  * This *SHOULD NOT* be called directly! Please use the
    1154             :  * clocksource_register_hz() or clocksource_register_khz helper functions.
    1155             :  */
    1156           2 : int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
    1157             : {
    1158             :         unsigned long flags;
    1159             : 
    1160           2 :         clocksource_arch_init(cs);
    1161             : 
    1162           2 :         if (WARN_ON_ONCE((unsigned int)cs->id >= CSID_MAX))
    1163           0 :                 cs->id = CSID_GENERIC;
    1164           2 :         if (cs->vdso_clock_mode < 0 ||
    1165           2 :             cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) {
    1166           0 :                 pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n",
    1167             :                         cs->name, cs->vdso_clock_mode);
    1168           0 :                 cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
    1169             :         }
    1170             : 
    1171             :         /* Initialize mult/shift and max_idle_ns */
    1172           2 :         __clocksource_update_freq_scale(cs, scale, freq);
    1173             : 
    1174             :         /* Add clocksource to the clocksource list */
    1175           2 :         mutex_lock(&clocksource_mutex);
    1176             : 
    1177           2 :         clocksource_watchdog_lock(&flags);
    1178           2 :         clocksource_enqueue(cs);
    1179           2 :         clocksource_enqueue_watchdog(cs);
    1180           2 :         clocksource_watchdog_unlock(&flags);
    1181             : 
    1182           2 :         clocksource_select();
    1183           2 :         clocksource_select_watchdog(false);
    1184           2 :         __clocksource_suspend_select(cs);
    1185           2 :         mutex_unlock(&clocksource_mutex);
    1186           2 :         return 0;
    1187             : }
    1188             : EXPORT_SYMBOL_GPL(__clocksource_register_scale);
    1189             : 
    1190           0 : static void __clocksource_change_rating(struct clocksource *cs, int rating)
    1191             : {
    1192           0 :         list_del(&cs->list);
    1193           0 :         cs->rating = rating;
    1194           0 :         clocksource_enqueue(cs);
    1195           0 : }
    1196             : 
    1197             : /**
    1198             :  * clocksource_change_rating - Change the rating of a registered clocksource
    1199             :  * @cs:         clocksource to be changed
    1200             :  * @rating:     new rating
    1201             :  */
    1202           0 : void clocksource_change_rating(struct clocksource *cs, int rating)
    1203             : {
    1204             :         unsigned long flags;
    1205             : 
    1206           0 :         mutex_lock(&clocksource_mutex);
    1207           0 :         clocksource_watchdog_lock(&flags);
    1208           0 :         __clocksource_change_rating(cs, rating);
    1209           0 :         clocksource_watchdog_unlock(&flags);
    1210             : 
    1211           0 :         clocksource_select();
    1212           0 :         clocksource_select_watchdog(false);
    1213           0 :         clocksource_suspend_select(false);
    1214           0 :         mutex_unlock(&clocksource_mutex);
    1215           0 : }
    1216             : EXPORT_SYMBOL(clocksource_change_rating);
    1217             : 
    1218             : /*
    1219             :  * Unbind clocksource @cs. Called with clocksource_mutex held
    1220             :  */
    1221           0 : static int clocksource_unbind(struct clocksource *cs)
    1222             : {
    1223             :         unsigned long flags;
    1224             : 
    1225           0 :         if (clocksource_is_watchdog(cs)) {
    1226             :                 /* Select and try to install a replacement watchdog. */
    1227             :                 clocksource_select_watchdog(true);
    1228             :                 if (clocksource_is_watchdog(cs))
    1229             :                         return -EBUSY;
    1230             :         }
    1231             : 
    1232           0 :         if (cs == curr_clocksource) {
    1233             :                 /* Select and try to install a replacement clock source */
    1234             :                 clocksource_select_fallback();
    1235           0 :                 if (curr_clocksource == cs)
    1236             :                         return -EBUSY;
    1237             :         }
    1238             : 
    1239           0 :         if (clocksource_is_suspend(cs)) {
    1240             :                 /*
    1241             :                  * Select and try to install a replacement suspend clocksource.
    1242             :                  * If no replacement suspend clocksource, we will just let the
    1243             :                  * clocksource go and have no suspend clocksource.
    1244             :                  */
    1245           0 :                 clocksource_suspend_select(true);
    1246             :         }
    1247             : 
    1248           0 :         clocksource_watchdog_lock(&flags);
    1249           0 :         clocksource_dequeue_watchdog(cs);
    1250           0 :         list_del_init(&cs->list);
    1251           0 :         clocksource_watchdog_unlock(&flags);
    1252             : 
    1253           0 :         return 0;
    1254             : }
    1255             : 
    1256             : /**
    1257             :  * clocksource_unregister - remove a registered clocksource
    1258             :  * @cs: clocksource to be unregistered
    1259             :  */
    1260           0 : int clocksource_unregister(struct clocksource *cs)
    1261             : {
    1262           0 :         int ret = 0;
    1263             : 
    1264           0 :         mutex_lock(&clocksource_mutex);
    1265           0 :         if (!list_empty(&cs->list))
    1266           0 :                 ret = clocksource_unbind(cs);
    1267           0 :         mutex_unlock(&clocksource_mutex);
    1268           0 :         return ret;
    1269             : }
    1270             : EXPORT_SYMBOL(clocksource_unregister);
    1271             : 
    1272             : #ifdef CONFIG_SYSFS
    1273             : /**
    1274             :  * current_clocksource_show - sysfs interface for current clocksource
    1275             :  * @dev:        unused
    1276             :  * @attr:       unused
    1277             :  * @buf:        char buffer to be filled with clocksource list
    1278             :  *
    1279             :  * Provides sysfs interface for listing current clocksource.
    1280             :  */
    1281           0 : static ssize_t current_clocksource_show(struct device *dev,
    1282             :                                         struct device_attribute *attr,
    1283             :                                         char *buf)
    1284             : {
    1285           0 :         ssize_t count = 0;
    1286             : 
    1287           0 :         mutex_lock(&clocksource_mutex);
    1288           0 :         count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
    1289           0 :         mutex_unlock(&clocksource_mutex);
    1290             : 
    1291           0 :         return count;
    1292             : }
    1293             : 
    1294           0 : ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
    1295             : {
    1296           0 :         size_t ret = cnt;
    1297             : 
    1298             :         /* strings from sysfs write are not 0 terminated! */
    1299           0 :         if (!cnt || cnt >= CS_NAME_LEN)
    1300             :                 return -EINVAL;
    1301             : 
    1302             :         /* strip of \n: */
    1303           0 :         if (buf[cnt-1] == '\n')
    1304           0 :                 cnt--;
    1305           0 :         if (cnt > 0)
    1306           0 :                 memcpy(dst, buf, cnt);
    1307           0 :         dst[cnt] = 0;
    1308           0 :         return ret;
    1309             : }
    1310             : 
    1311             : /**
    1312             :  * current_clocksource_store - interface for manually overriding clocksource
    1313             :  * @dev:        unused
    1314             :  * @attr:       unused
    1315             :  * @buf:        name of override clocksource
    1316             :  * @count:      length of buffer
    1317             :  *
    1318             :  * Takes input from sysfs interface for manually overriding the default
    1319             :  * clocksource selection.
    1320             :  */
    1321           0 : static ssize_t current_clocksource_store(struct device *dev,
    1322             :                                          struct device_attribute *attr,
    1323             :                                          const char *buf, size_t count)
    1324             : {
    1325             :         ssize_t ret;
    1326             : 
    1327           0 :         mutex_lock(&clocksource_mutex);
    1328             : 
    1329           0 :         ret = sysfs_get_uname(buf, override_name, count);
    1330           0 :         if (ret >= 0)
    1331             :                 clocksource_select();
    1332             : 
    1333           0 :         mutex_unlock(&clocksource_mutex);
    1334             : 
    1335           0 :         return ret;
    1336             : }
    1337             : static DEVICE_ATTR_RW(current_clocksource);
    1338             : 
    1339             : /**
    1340             :  * unbind_clocksource_store - interface for manually unbinding clocksource
    1341             :  * @dev:        unused
    1342             :  * @attr:       unused
    1343             :  * @buf:        unused
    1344             :  * @count:      length of buffer
    1345             :  *
    1346             :  * Takes input from sysfs interface for manually unbinding a clocksource.
    1347             :  */
    1348           0 : static ssize_t unbind_clocksource_store(struct device *dev,
    1349             :                                         struct device_attribute *attr,
    1350             :                                         const char *buf, size_t count)
    1351             : {
    1352             :         struct clocksource *cs;
    1353             :         char name[CS_NAME_LEN];
    1354             :         ssize_t ret;
    1355             : 
    1356           0 :         ret = sysfs_get_uname(buf, name, count);
    1357           0 :         if (ret < 0)
    1358             :                 return ret;
    1359             : 
    1360           0 :         ret = -ENODEV;
    1361           0 :         mutex_lock(&clocksource_mutex);
    1362           0 :         list_for_each_entry(cs, &clocksource_list, list) {
    1363           0 :                 if (strcmp(cs->name, name))
    1364           0 :                         continue;
    1365           0 :                 ret = clocksource_unbind(cs);
    1366           0 :                 break;
    1367             :         }
    1368           0 :         mutex_unlock(&clocksource_mutex);
    1369             : 
    1370           0 :         return ret ? ret : count;
    1371             : }
    1372             : static DEVICE_ATTR_WO(unbind_clocksource);
    1373             : 
    1374             : /**
    1375             :  * available_clocksource_show - sysfs interface for listing clocksource
    1376             :  * @dev:        unused
    1377             :  * @attr:       unused
    1378             :  * @buf:        char buffer to be filled with clocksource list
    1379             :  *
    1380             :  * Provides sysfs interface for listing registered clocksources
    1381             :  */
    1382           0 : static ssize_t available_clocksource_show(struct device *dev,
    1383             :                                           struct device_attribute *attr,
    1384             :                                           char *buf)
    1385             : {
    1386             :         struct clocksource *src;
    1387           0 :         ssize_t count = 0;
    1388             : 
    1389           0 :         mutex_lock(&clocksource_mutex);
    1390           0 :         list_for_each_entry(src, &clocksource_list, list) {
    1391             :                 /*
    1392             :                  * Don't show non-HRES clocksource if the tick code is
    1393             :                  * in one shot mode (highres=on or nohz=on)
    1394             :                  */
    1395             :                 if (!tick_oneshot_mode_active() ||
    1396             :                     (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
    1397           0 :                         count += snprintf(buf + count,
    1398           0 :                                   max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
    1399             :                                   "%s ", src->name);
    1400             :         }
    1401           0 :         mutex_unlock(&clocksource_mutex);
    1402             : 
    1403           0 :         count += snprintf(buf + count,
    1404           0 :                           max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
    1405             : 
    1406           0 :         return count;
    1407             : }
    1408             : static DEVICE_ATTR_RO(available_clocksource);
    1409             : 
    1410             : static struct attribute *clocksource_attrs[] = {
    1411             :         &dev_attr_current_clocksource.attr,
    1412             :         &dev_attr_unbind_clocksource.attr,
    1413             :         &dev_attr_available_clocksource.attr,
    1414             :         NULL
    1415             : };
    1416             : ATTRIBUTE_GROUPS(clocksource);
    1417             : 
    1418             : static struct bus_type clocksource_subsys = {
    1419             :         .name = "clocksource",
    1420             :         .dev_name = "clocksource",
    1421             : };
    1422             : 
    1423             : static struct device device_clocksource = {
    1424             :         .id     = 0,
    1425             :         .bus    = &clocksource_subsys,
    1426             :         .groups = clocksource_groups,
    1427             : };
    1428             : 
    1429           1 : static int __init init_clocksource_sysfs(void)
    1430             : {
    1431           1 :         int error = subsys_system_register(&clocksource_subsys, NULL);
    1432             : 
    1433           1 :         if (!error)
    1434           1 :                 error = device_register(&device_clocksource);
    1435             : 
    1436           1 :         return error;
    1437             : }
    1438             : 
    1439             : device_initcall(init_clocksource_sysfs);
    1440             : #endif /* CONFIG_SYSFS */
    1441             : 
    1442             : /**
    1443             :  * boot_override_clocksource - boot clock override
    1444             :  * @str:        override name
    1445             :  *
    1446             :  * Takes a clocksource= boot argument and uses it
    1447             :  * as the clocksource override name.
    1448             :  */
    1449           0 : static int __init boot_override_clocksource(char* str)
    1450             : {
    1451           0 :         mutex_lock(&clocksource_mutex);
    1452           0 :         if (str)
    1453           0 :                 strlcpy(override_name, str, sizeof(override_name));
    1454           0 :         mutex_unlock(&clocksource_mutex);
    1455           0 :         return 1;
    1456             : }
    1457             : 
    1458             : __setup("clocksource=", boot_override_clocksource);
    1459             : 
    1460             : /**
    1461             :  * boot_override_clock - Compatibility layer for deprecated boot option
    1462             :  * @str:        override name
    1463             :  *
    1464             :  * DEPRECATED! Takes a clock= boot argument and uses it
    1465             :  * as the clocksource override name
    1466             :  */
    1467           0 : static int __init boot_override_clock(char* str)
    1468             : {
    1469           0 :         if (!strcmp(str, "pmtmr")) {
    1470           0 :                 pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n");
    1471           0 :                 return boot_override_clocksource("acpi_pm");
    1472             :         }
    1473           0 :         pr_warn("clock= boot option is deprecated - use clocksource=xyz\n");
    1474           0 :         return boot_override_clocksource(str);
    1475             : }
    1476             : 
    1477             : __setup("clock=", boot_override_clock);

Generated by: LCOV version 1.14