LCOV - code coverage report
Current view: top level - kernel/locking - percpu-rwsem.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 8 70 11.4 %
Date: 2022-12-09 01:23:36 Functions: 1 8 12.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : #include <linux/atomic.h>
       3             : #include <linux/percpu.h>
       4             : #include <linux/wait.h>
       5             : #include <linux/lockdep.h>
       6             : #include <linux/percpu-rwsem.h>
       7             : #include <linux/rcupdate.h>
       8             : #include <linux/sched.h>
       9             : #include <linux/sched/task.h>
      10             : #include <linux/sched/debug.h>
      11             : #include <linux/errno.h>
      12             : 
      13          30 : int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
      14             :                         const char *name, struct lock_class_key *key)
      15             : {
      16          30 :         sem->read_count = alloc_percpu(int);
      17          30 :         if (unlikely(!sem->read_count))
      18             :                 return -ENOMEM;
      19             : 
      20          30 :         rcu_sync_init(&sem->rss);
      21          60 :         rcuwait_init(&sem->writer);
      22          30 :         init_waitqueue_head(&sem->waiters);
      23          60 :         atomic_set(&sem->block, 0);
      24             : #ifdef CONFIG_DEBUG_LOCK_ALLOC
      25             :         debug_check_no_locks_freed((void *)sem, sizeof(*sem));
      26             :         lockdep_init_map(&sem->dep_map, name, key, 0);
      27             : #endif
      28          30 :         return 0;
      29             : }
      30             : EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
      31             : 
      32           0 : void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
      33             : {
      34             :         /*
      35             :          * XXX: temporary kludge. The error path in alloc_super()
      36             :          * assumes that percpu_free_rwsem() is safe after kzalloc().
      37             :          */
      38           0 :         if (!sem->read_count)
      39             :                 return;
      40             : 
      41           0 :         rcu_sync_dtor(&sem->rss);
      42           0 :         free_percpu(sem->read_count);
      43           0 :         sem->read_count = NULL; /* catch use after free bugs */
      44             : }
      45             : EXPORT_SYMBOL_GPL(percpu_free_rwsem);
      46             : 
      47           0 : static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
      48             : {
      49           0 :         this_cpu_inc(*sem->read_count);
      50             : 
      51             :         /*
      52             :          * Due to having preemption disabled the decrement happens on
      53             :          * the same CPU as the increment, avoiding the
      54             :          * increment-on-one-CPU-and-decrement-on-another problem.
      55             :          *
      56             :          * If the reader misses the writer's assignment of sem->block, then the
      57             :          * writer is guaranteed to see the reader's increment.
      58             :          *
      59             :          * Conversely, any readers that increment their sem->read_count after
      60             :          * the writer looks are guaranteed to see the sem->block value, which
      61             :          * in turn means that they are guaranteed to immediately decrement
      62             :          * their sem->read_count, so that it doesn't matter that the writer
      63             :          * missed them.
      64             :          */
      65             : 
      66           0 :         smp_mb(); /* A matches D */
      67             : 
      68             :         /*
      69             :          * If !sem->block the critical section starts here, matched by the
      70             :          * release in percpu_up_write().
      71             :          */
      72           0 :         if (likely(!atomic_read_acquire(&sem->block)))
      73             :                 return true;
      74             : 
      75           0 :         this_cpu_dec(*sem->read_count);
      76             : 
      77             :         /* Prod writer to re-evaluate readers_active_check() */
      78           0 :         rcuwait_wake_up(&sem->writer);
      79             : 
      80           0 :         return false;
      81             : }
      82             : 
      83             : static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
      84             : {
      85           0 :         if (atomic_read(&sem->block))
      86             :                 return false;
      87             : 
      88           0 :         return atomic_xchg(&sem->block, 1) == 0;
      89             : }
      90             : 
      91             : static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
      92             : {
      93           0 :         if (reader) {
      94             :                 bool ret;
      95             : 
      96           0 :                 preempt_disable();
      97           0 :                 ret = __percpu_down_read_trylock(sem);
      98           0 :                 preempt_enable();
      99             : 
     100             :                 return ret;
     101             :         }
     102             :         return __percpu_down_write_trylock(sem);
     103             : }
     104             : 
     105             : /*
     106             :  * The return value of wait_queue_entry::func means:
     107             :  *
     108             :  *  <0 - error, wakeup is terminated and the error is returned
     109             :  *   0 - no wakeup, a next waiter is tried
     110             :  *  >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
     111             :  *
     112             :  * We use EXCLUSIVE for both readers and writers to preserve FIFO order,
     113             :  * and play games with the return value to allow waking multiple readers.
     114             :  *
     115             :  * Specifically, we wake readers until we've woken a single writer, or until a
     116             :  * trylock fails.
     117             :  */
     118           0 : static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
     119             :                                       unsigned int mode, int wake_flags,
     120             :                                       void *key)
     121             : {
     122           0 :         bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
     123           0 :         struct percpu_rw_semaphore *sem = key;
     124             :         struct task_struct *p;
     125             : 
     126             :         /* concurrent against percpu_down_write(), can get stolen */
     127           0 :         if (!__percpu_rwsem_trylock(sem, reader))
     128             :                 return 1;
     129             : 
     130           0 :         p = get_task_struct(wq_entry->private);
     131           0 :         list_del_init(&wq_entry->entry);
     132           0 :         smp_store_release(&wq_entry->private, NULL);
     133             : 
     134           0 :         wake_up_process(p);
     135           0 :         put_task_struct(p);
     136             : 
     137           0 :         return !reader; /* wake (readers until) 1 writer */
     138             : }
     139             : 
     140           0 : static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
     141             : {
     142           0 :         DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
     143             :         bool wait;
     144             : 
     145           0 :         spin_lock_irq(&sem->waiters.lock);
     146             :         /*
     147             :          * Serialize against the wakeup in percpu_up_write(), if we fail
     148             :          * the trylock, the wakeup must see us on the list.
     149             :          */
     150           0 :         wait = !__percpu_rwsem_trylock(sem, reader);
     151           0 :         if (wait) {
     152           0 :                 wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
     153           0 :                 __add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
     154             :         }
     155           0 :         spin_unlock_irq(&sem->waiters.lock);
     156             : 
     157           0 :         while (wait) {
     158           0 :                 set_current_state(TASK_UNINTERRUPTIBLE);
     159           0 :                 if (!smp_load_acquire(&wq_entry.private))
     160             :                         break;
     161           0 :                 schedule();
     162             :         }
     163           0 :         __set_current_state(TASK_RUNNING);
     164           0 : }
     165             : 
     166           0 : bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
     167             : {
     168           0 :         if (__percpu_down_read_trylock(sem))
     169             :                 return true;
     170             : 
     171           0 :         if (try)
     172             :                 return false;
     173             : 
     174           0 :         preempt_enable();
     175           0 :         percpu_rwsem_wait(sem, /* .reader = */ true);
     176           0 :         preempt_disable();
     177             : 
     178           0 :         return true;
     179             : }
     180             : EXPORT_SYMBOL_GPL(__percpu_down_read);
     181             : 
     182             : #define per_cpu_sum(var)                                                \
     183             : ({                                                                      \
     184             :         typeof(var) __sum = 0;                                          \
     185             :         int cpu;                                                        \
     186             :         compiletime_assert_atomic_type(__sum);                          \
     187             :         for_each_possible_cpu(cpu)                                      \
     188             :                 __sum += per_cpu(var, cpu);                             \
     189             :         __sum;                                                          \
     190             : })
     191             : 
     192             : /*
     193             :  * Return true if the modular sum of the sem->read_count per-CPU variable is
     194             :  * zero.  If this sum is zero, then it is stable due to the fact that if any
     195             :  * newly arriving readers increment a given counter, they will immediately
     196             :  * decrement that same counter.
     197             :  *
     198             :  * Assumes sem->block is set.
     199             :  */
     200             : static bool readers_active_check(struct percpu_rw_semaphore *sem)
     201             : {
     202           0 :         if (per_cpu_sum(*sem->read_count) != 0)
     203             :                 return false;
     204             : 
     205             :         /*
     206             :          * If we observed the decrement; ensure we see the entire critical
     207             :          * section.
     208             :          */
     209             : 
     210           0 :         smp_mb(); /* C matches B */
     211             : 
     212             :         return true;
     213             : }
     214             : 
     215           0 : void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
     216             : {
     217             :         might_sleep();
     218             :         rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
     219             : 
     220             :         /* Notify readers to take the slow path. */
     221           0 :         rcu_sync_enter(&sem->rss);
     222             : 
     223             :         /*
     224             :          * Try set sem->block; this provides writer-writer exclusion.
     225             :          * Having sem->block set makes new readers block.
     226             :          */
     227           0 :         if (!__percpu_down_write_trylock(sem))
     228           0 :                 percpu_rwsem_wait(sem, /* .reader = */ false);
     229             : 
     230             :         /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
     231             : 
     232             :         /*
     233             :          * If they don't see our store of sem->block, then we are guaranteed to
     234             :          * see their sem->read_count increment, and therefore will wait for
     235             :          * them.
     236             :          */
     237             : 
     238             :         /* Wait for all active readers to complete. */
     239           0 :         rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
     240           0 : }
     241             : EXPORT_SYMBOL_GPL(percpu_down_write);
     242             : 
     243           0 : void percpu_up_write(struct percpu_rw_semaphore *sem)
     244             : {
     245             :         rwsem_release(&sem->dep_map, _RET_IP_);
     246             : 
     247             :         /*
     248             :          * Signal the writer is done, no fast path yet.
     249             :          *
     250             :          * One reason that we cannot just immediately flip to readers_fast is
     251             :          * that new readers might fail to see the results of this writer's
     252             :          * critical section.
     253             :          *
     254             :          * Therefore we force it through the slow path which guarantees an
     255             :          * acquire and thereby guarantees the critical section's consistency.
     256             :          */
     257           0 :         atomic_set_release(&sem->block, 0);
     258             : 
     259             :         /*
     260             :          * Prod any pending reader/writer to make progress.
     261             :          */
     262           0 :         __wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
     263             : 
     264             :         /*
     265             :          * Once this completes (at least one RCU-sched grace period hence) the
     266             :          * reader fast path will be available again. Safe to use outside the
     267             :          * exclusive write lock because its counting.
     268             :          */
     269           0 :         rcu_sync_exit(&sem->rss);
     270           0 : }
     271             : EXPORT_SYMBOL_GPL(percpu_up_write);

Generated by: LCOV version 1.14