LCOV - code coverage report
Current view: top level - mm - mlock.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 321 0.0 %
Date: 2022-12-09 01:23:36 Functions: 0 27 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *      linux/mm/mlock.c
       4             :  *
       5             :  *  (C) Copyright 1995 Linus Torvalds
       6             :  *  (C) Copyright 2002 Christoph Hellwig
       7             :  */
       8             : 
       9             : #include <linux/capability.h>
      10             : #include <linux/mman.h>
      11             : #include <linux/mm.h>
      12             : #include <linux/sched/user.h>
      13             : #include <linux/swap.h>
      14             : #include <linux/swapops.h>
      15             : #include <linux/pagemap.h>
      16             : #include <linux/pagevec.h>
      17             : #include <linux/pagewalk.h>
      18             : #include <linux/mempolicy.h>
      19             : #include <linux/syscalls.h>
      20             : #include <linux/sched.h>
      21             : #include <linux/export.h>
      22             : #include <linux/rmap.h>
      23             : #include <linux/mmzone.h>
      24             : #include <linux/hugetlb.h>
      25             : #include <linux/memcontrol.h>
      26             : #include <linux/mm_inline.h>
      27             : #include <linux/secretmem.h>
      28             : 
      29             : #include "internal.h"
      30             : 
      31             : struct mlock_pvec {
      32             :         local_lock_t lock;
      33             :         struct pagevec vec;
      34             : };
      35             : 
      36             : static DEFINE_PER_CPU(struct mlock_pvec, mlock_pvec) = {
      37             :         .lock = INIT_LOCAL_LOCK(lock),
      38             : };
      39             : 
      40           0 : bool can_do_mlock(void)
      41             : {
      42           0 :         if (rlimit(RLIMIT_MEMLOCK) != 0)
      43             :                 return true;
      44           0 :         if (capable(CAP_IPC_LOCK))
      45             :                 return true;
      46           0 :         return false;
      47             : }
      48             : EXPORT_SYMBOL(can_do_mlock);
      49             : 
      50             : /*
      51             :  * Mlocked pages are marked with PageMlocked() flag for efficient testing
      52             :  * in vmscan and, possibly, the fault path; and to support semi-accurate
      53             :  * statistics.
      54             :  *
      55             :  * An mlocked page [PageMlocked(page)] is unevictable.  As such, it will
      56             :  * be placed on the LRU "unevictable" list, rather than the [in]active lists.
      57             :  * The unevictable list is an LRU sibling list to the [in]active lists.
      58             :  * PageUnevictable is set to indicate the unevictable state.
      59             :  */
      60             : 
      61           0 : static struct lruvec *__mlock_page(struct page *page, struct lruvec *lruvec)
      62             : {
      63             :         /* There is nothing more we can do while it's off LRU */
      64           0 :         if (!TestClearPageLRU(page))
      65             :                 return lruvec;
      66             : 
      67           0 :         lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec);
      68             : 
      69           0 :         if (unlikely(page_evictable(page))) {
      70             :                 /*
      71             :                  * This is a little surprising, but quite possible:
      72             :                  * PageMlocked must have got cleared already by another CPU.
      73             :                  * Could this page be on the Unevictable LRU?  I'm not sure,
      74             :                  * but move it now if so.
      75             :                  */
      76           0 :                 if (PageUnevictable(page)) {
      77           0 :                         del_page_from_lru_list(page, lruvec);
      78           0 :                         ClearPageUnevictable(page);
      79           0 :                         add_page_to_lru_list(page, lruvec);
      80           0 :                         __count_vm_events(UNEVICTABLE_PGRESCUED,
      81           0 :                                           thp_nr_pages(page));
      82             :                 }
      83             :                 goto out;
      84             :         }
      85             : 
      86           0 :         if (PageUnevictable(page)) {
      87           0 :                 if (PageMlocked(page))
      88           0 :                         page->mlock_count++;
      89             :                 goto out;
      90             :         }
      91             : 
      92           0 :         del_page_from_lru_list(page, lruvec);
      93           0 :         ClearPageActive(page);
      94           0 :         SetPageUnevictable(page);
      95           0 :         page->mlock_count = !!PageMlocked(page);
      96           0 :         add_page_to_lru_list(page, lruvec);
      97           0 :         __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page));
      98             : out:
      99           0 :         SetPageLRU(page);
     100           0 :         return lruvec;
     101             : }
     102             : 
     103           0 : static struct lruvec *__mlock_new_page(struct page *page, struct lruvec *lruvec)
     104             : {
     105             :         VM_BUG_ON_PAGE(PageLRU(page), page);
     106             : 
     107           0 :         lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec);
     108             : 
     109             :         /* As above, this is a little surprising, but possible */
     110           0 :         if (unlikely(page_evictable(page)))
     111             :                 goto out;
     112             : 
     113           0 :         SetPageUnevictable(page);
     114           0 :         page->mlock_count = !!PageMlocked(page);
     115           0 :         __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page));
     116             : out:
     117           0 :         add_page_to_lru_list(page, lruvec);
     118           0 :         SetPageLRU(page);
     119           0 :         return lruvec;
     120             : }
     121             : 
     122           0 : static struct lruvec *__munlock_page(struct page *page, struct lruvec *lruvec)
     123             : {
     124           0 :         int nr_pages = thp_nr_pages(page);
     125           0 :         bool isolated = false;
     126             : 
     127           0 :         if (!TestClearPageLRU(page))
     128             :                 goto munlock;
     129             : 
     130           0 :         isolated = true;
     131           0 :         lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec);
     132             : 
     133           0 :         if (PageUnevictable(page)) {
     134             :                 /* Then mlock_count is maintained, but might undercount */
     135           0 :                 if (page->mlock_count)
     136           0 :                         page->mlock_count--;
     137           0 :                 if (page->mlock_count)
     138             :                         goto out;
     139             :         }
     140             :         /* else assume that was the last mlock: reclaim will fix it if not */
     141             : 
     142             : munlock:
     143           0 :         if (TestClearPageMlocked(page)) {
     144           0 :                 __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
     145           0 :                 if (isolated || !PageUnevictable(page))
     146           0 :                         __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
     147             :                 else
     148           0 :                         __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
     149             :         }
     150             : 
     151             :         /* page_evictable() has to be checked *after* clearing Mlocked */
     152           0 :         if (isolated && PageUnevictable(page) && page_evictable(page)) {
     153           0 :                 del_page_from_lru_list(page, lruvec);
     154           0 :                 ClearPageUnevictable(page);
     155           0 :                 add_page_to_lru_list(page, lruvec);
     156           0 :                 __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
     157             :         }
     158             : out:
     159           0 :         if (isolated)
     160             :                 SetPageLRU(page);
     161           0 :         return lruvec;
     162             : }
     163             : 
     164             : /*
     165             :  * Flags held in the low bits of a struct page pointer on the mlock_pvec.
     166             :  */
     167             : #define LRU_PAGE 0x1
     168             : #define NEW_PAGE 0x2
     169             : static inline struct page *mlock_lru(struct page *page)
     170             : {
     171           0 :         return (struct page *)((unsigned long)page + LRU_PAGE);
     172             : }
     173             : 
     174             : static inline struct page *mlock_new(struct page *page)
     175             : {
     176           0 :         return (struct page *)((unsigned long)page + NEW_PAGE);
     177             : }
     178             : 
     179             : /*
     180             :  * mlock_pagevec() is derived from pagevec_lru_move_fn():
     181             :  * perhaps that can make use of such page pointer flags in future,
     182             :  * but for now just keep it for mlock.  We could use three separate
     183             :  * pagevecs instead, but one feels better (munlocking a full pagevec
     184             :  * does not need to drain mlocking pagevecs first).
     185             :  */
     186           0 : static void mlock_pagevec(struct pagevec *pvec)
     187             : {
     188           0 :         struct lruvec *lruvec = NULL;
     189             :         unsigned long mlock;
     190             :         struct page *page;
     191             :         int i;
     192             : 
     193           0 :         for (i = 0; i < pagevec_count(pvec); i++) {
     194           0 :                 page = pvec->pages[i];
     195           0 :                 mlock = (unsigned long)page & (LRU_PAGE | NEW_PAGE);
     196           0 :                 page = (struct page *)((unsigned long)page - mlock);
     197           0 :                 pvec->pages[i] = page;
     198             : 
     199           0 :                 if (mlock & LRU_PAGE)
     200           0 :                         lruvec = __mlock_page(page, lruvec);
     201           0 :                 else if (mlock & NEW_PAGE)
     202           0 :                         lruvec = __mlock_new_page(page, lruvec);
     203             :                 else
     204           0 :                         lruvec = __munlock_page(page, lruvec);
     205             :         }
     206             : 
     207           0 :         if (lruvec)
     208           0 :                 unlock_page_lruvec_irq(lruvec);
     209           0 :         release_pages(pvec->pages, pvec->nr);
     210           0 :         pagevec_reinit(pvec);
     211           0 : }
     212             : 
     213           0 : void mlock_page_drain_local(void)
     214             : {
     215             :         struct pagevec *pvec;
     216             : 
     217           0 :         local_lock(&mlock_pvec.lock);
     218           0 :         pvec = this_cpu_ptr(&mlock_pvec.vec);
     219           0 :         if (pagevec_count(pvec))
     220           0 :                 mlock_pagevec(pvec);
     221           0 :         local_unlock(&mlock_pvec.lock);
     222           0 : }
     223             : 
     224           0 : void mlock_page_drain_remote(int cpu)
     225             : {
     226             :         struct pagevec *pvec;
     227             : 
     228           0 :         WARN_ON_ONCE(cpu_online(cpu));
     229           0 :         pvec = &per_cpu(mlock_pvec.vec, cpu);
     230           0 :         if (pagevec_count(pvec))
     231           0 :                 mlock_pagevec(pvec);
     232           0 : }
     233             : 
     234           0 : bool need_mlock_page_drain(int cpu)
     235             : {
     236           0 :         return pagevec_count(&per_cpu(mlock_pvec.vec, cpu));
     237             : }
     238             : 
     239             : /**
     240             :  * mlock_folio - mlock a folio already on (or temporarily off) LRU
     241             :  * @folio: folio to be mlocked.
     242             :  */
     243           0 : void mlock_folio(struct folio *folio)
     244             : {
     245             :         struct pagevec *pvec;
     246             : 
     247           0 :         local_lock(&mlock_pvec.lock);
     248           0 :         pvec = this_cpu_ptr(&mlock_pvec.vec);
     249             : 
     250           0 :         if (!folio_test_set_mlocked(folio)) {
     251           0 :                 int nr_pages = folio_nr_pages(folio);
     252             : 
     253           0 :                 zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
     254           0 :                 __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
     255             :         }
     256             : 
     257           0 :         folio_get(folio);
     258           0 :         if (!pagevec_add(pvec, mlock_lru(&folio->page)) ||
     259           0 :             folio_test_large(folio) || lru_cache_disabled())
     260           0 :                 mlock_pagevec(pvec);
     261           0 :         local_unlock(&mlock_pvec.lock);
     262           0 : }
     263             : 
     264             : /**
     265             :  * mlock_new_page - mlock a newly allocated page not yet on LRU
     266             :  * @page: page to be mlocked, either a normal page or a THP head.
     267             :  */
     268           0 : void mlock_new_page(struct page *page)
     269             : {
     270             :         struct pagevec *pvec;
     271           0 :         int nr_pages = thp_nr_pages(page);
     272             : 
     273           0 :         local_lock(&mlock_pvec.lock);
     274           0 :         pvec = this_cpu_ptr(&mlock_pvec.vec);
     275           0 :         SetPageMlocked(page);
     276           0 :         mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
     277           0 :         __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
     278             : 
     279           0 :         get_page(page);
     280           0 :         if (!pagevec_add(pvec, mlock_new(page)) ||
     281           0 :             PageHead(page) || lru_cache_disabled())
     282           0 :                 mlock_pagevec(pvec);
     283           0 :         local_unlock(&mlock_pvec.lock);
     284           0 : }
     285             : 
     286             : /**
     287             :  * munlock_page - munlock a page
     288             :  * @page: page to be munlocked, either a normal page or a THP head.
     289             :  */
     290           0 : void munlock_page(struct page *page)
     291             : {
     292             :         struct pagevec *pvec;
     293             : 
     294           0 :         local_lock(&mlock_pvec.lock);
     295           0 :         pvec = this_cpu_ptr(&mlock_pvec.vec);
     296             :         /*
     297             :          * TestClearPageMlocked(page) must be left to __munlock_page(),
     298             :          * which will check whether the page is multiply mlocked.
     299             :          */
     300             : 
     301           0 :         get_page(page);
     302           0 :         if (!pagevec_add(pvec, page) ||
     303           0 :             PageHead(page) || lru_cache_disabled())
     304           0 :                 mlock_pagevec(pvec);
     305           0 :         local_unlock(&mlock_pvec.lock);
     306           0 : }
     307             : 
     308           0 : static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
     309             :                            unsigned long end, struct mm_walk *walk)
     310             : 
     311             : {
     312           0 :         struct vm_area_struct *vma = walk->vma;
     313             :         spinlock_t *ptl;
     314             :         pte_t *start_pte, *pte;
     315             :         struct page *page;
     316             : 
     317           0 :         ptl = pmd_trans_huge_lock(pmd, vma);
     318             :         if (ptl) {
     319             :                 if (!pmd_present(*pmd))
     320             :                         goto out;
     321             :                 if (is_huge_zero_pmd(*pmd))
     322             :                         goto out;
     323             :                 page = pmd_page(*pmd);
     324             :                 if (vma->vm_flags & VM_LOCKED)
     325             :                         mlock_folio(page_folio(page));
     326             :                 else
     327             :                         munlock_page(page);
     328             :                 goto out;
     329             :         }
     330             : 
     331           0 :         start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
     332           0 :         for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
     333           0 :                 if (!pte_present(*pte))
     334           0 :                         continue;
     335           0 :                 page = vm_normal_page(vma, addr, *pte);
     336           0 :                 if (!page)
     337           0 :                         continue;
     338           0 :                 if (PageTransCompound(page))
     339             :                         continue;
     340           0 :                 if (vma->vm_flags & VM_LOCKED)
     341           0 :                         mlock_folio(page_folio(page));
     342             :                 else
     343           0 :                         munlock_page(page);
     344             :         }
     345             :         pte_unmap(start_pte);
     346             : out:
     347           0 :         spin_unlock(ptl);
     348           0 :         cond_resched();
     349           0 :         return 0;
     350             : }
     351             : 
     352             : /*
     353             :  * mlock_vma_pages_range() - mlock any pages already in the range,
     354             :  *                           or munlock all pages in the range.
     355             :  * @vma - vma containing range to be mlock()ed or munlock()ed
     356             :  * @start - start address in @vma of the range
     357             :  * @end - end of range in @vma
     358             :  * @newflags - the new set of flags for @vma.
     359             :  *
     360             :  * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED;
     361             :  * called for munlock() and munlockall(), to clear VM_LOCKED from @vma.
     362             :  */
     363           0 : static void mlock_vma_pages_range(struct vm_area_struct *vma,
     364             :         unsigned long start, unsigned long end, vm_flags_t newflags)
     365             : {
     366             :         static const struct mm_walk_ops mlock_walk_ops = {
     367             :                 .pmd_entry = mlock_pte_range,
     368             :         };
     369             : 
     370             :         /*
     371             :          * There is a slight chance that concurrent page migration,
     372             :          * or page reclaim finding a page of this now-VM_LOCKED vma,
     373             :          * will call mlock_vma_page() and raise page's mlock_count:
     374             :          * double counting, leaving the page unevictable indefinitely.
     375             :          * Communicate this danger to mlock_vma_page() with VM_IO,
     376             :          * which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas.
     377             :          * mmap_lock is held in write mode here, so this weird
     378             :          * combination should not be visible to other mmap_lock users;
     379             :          * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED.
     380             :          */
     381           0 :         if (newflags & VM_LOCKED)
     382           0 :                 newflags |= VM_IO;
     383           0 :         WRITE_ONCE(vma->vm_flags, newflags);
     384             : 
     385           0 :         lru_add_drain();
     386           0 :         walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL);
     387           0 :         lru_add_drain();
     388             : 
     389           0 :         if (newflags & VM_IO) {
     390           0 :                 newflags &= ~VM_IO;
     391           0 :                 WRITE_ONCE(vma->vm_flags, newflags);
     392             :         }
     393           0 : }
     394             : 
     395             : /*
     396             :  * mlock_fixup  - handle mlock[all]/munlock[all] requests.
     397             :  *
     398             :  * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
     399             :  * munlock is a no-op.  However, for some special vmas, we go ahead and
     400             :  * populate the ptes.
     401             :  *
     402             :  * For vmas that pass the filters, merge/split as appropriate.
     403             :  */
     404           0 : static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
     405             :         unsigned long start, unsigned long end, vm_flags_t newflags)
     406             : {
     407           0 :         struct mm_struct *mm = vma->vm_mm;
     408             :         pgoff_t pgoff;
     409             :         int nr_pages;
     410           0 :         int ret = 0;
     411           0 :         vm_flags_t oldflags = vma->vm_flags;
     412             : 
     413           0 :         if (newflags == oldflags || (oldflags & VM_SPECIAL) ||
     414           0 :             is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
     415           0 :             vma_is_dax(vma) || vma_is_secretmem(vma))
     416             :                 /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
     417             :                 goto out;
     418             : 
     419           0 :         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
     420           0 :         *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
     421             :                           vma->vm_file, pgoff, vma_policy(vma),
     422             :                           vma->vm_userfaultfd_ctx, anon_vma_name(vma));
     423           0 :         if (*prev) {
     424             :                 vma = *prev;
     425             :                 goto success;
     426             :         }
     427             : 
     428           0 :         if (start != vma->vm_start) {
     429           0 :                 ret = split_vma(mm, vma, start, 1);
     430           0 :                 if (ret)
     431             :                         goto out;
     432             :         }
     433             : 
     434           0 :         if (end != vma->vm_end) {
     435           0 :                 ret = split_vma(mm, vma, end, 0);
     436           0 :                 if (ret)
     437             :                         goto out;
     438             :         }
     439             : 
     440             : success:
     441             :         /*
     442             :          * Keep track of amount of locked VM.
     443             :          */
     444           0 :         nr_pages = (end - start) >> PAGE_SHIFT;
     445           0 :         if (!(newflags & VM_LOCKED))
     446           0 :                 nr_pages = -nr_pages;
     447           0 :         else if (oldflags & VM_LOCKED)
     448           0 :                 nr_pages = 0;
     449           0 :         mm->locked_vm += nr_pages;
     450             : 
     451             :         /*
     452             :          * vm_flags is protected by the mmap_lock held in write mode.
     453             :          * It's okay if try_to_unmap_one unmaps a page just after we
     454             :          * set VM_LOCKED, populate_vma_page_range will bring it back.
     455             :          */
     456             : 
     457           0 :         if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
     458             :                 /* No work to do, and mlocking twice would be wrong */
     459           0 :                 vma->vm_flags = newflags;
     460             :         } else {
     461           0 :                 mlock_vma_pages_range(vma, start, end, newflags);
     462             :         }
     463             : out:
     464           0 :         *prev = vma;
     465           0 :         return ret;
     466             : }
     467             : 
     468           0 : static int apply_vma_lock_flags(unsigned long start, size_t len,
     469             :                                 vm_flags_t flags)
     470             : {
     471             :         unsigned long nstart, end, tmp;
     472             :         struct vm_area_struct *vma, *prev;
     473             :         int error;
     474             : 
     475             :         VM_BUG_ON(offset_in_page(start));
     476             :         VM_BUG_ON(len != PAGE_ALIGN(len));
     477           0 :         end = start + len;
     478           0 :         if (end < start)
     479             :                 return -EINVAL;
     480           0 :         if (end == start)
     481             :                 return 0;
     482           0 :         vma = find_vma(current->mm, start);
     483           0 :         if (!vma || vma->vm_start > start)
     484             :                 return -ENOMEM;
     485             : 
     486           0 :         prev = vma->vm_prev;
     487           0 :         if (start > vma->vm_start)
     488           0 :                 prev = vma;
     489             : 
     490             :         for (nstart = start ; ; ) {
     491           0 :                 vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
     492             : 
     493           0 :                 newflags |= flags;
     494             : 
     495             :                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
     496           0 :                 tmp = vma->vm_end;
     497           0 :                 if (tmp > end)
     498           0 :                         tmp = end;
     499           0 :                 error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
     500           0 :                 if (error)
     501             :                         break;
     502           0 :                 nstart = tmp;
     503           0 :                 if (nstart < prev->vm_end)
     504           0 :                         nstart = prev->vm_end;
     505           0 :                 if (nstart >= end)
     506             :                         break;
     507             : 
     508           0 :                 vma = prev->vm_next;
     509           0 :                 if (!vma || vma->vm_start != nstart) {
     510             :                         error = -ENOMEM;
     511             :                         break;
     512             :                 }
     513             :         }
     514             :         return error;
     515             : }
     516             : 
     517             : /*
     518             :  * Go through vma areas and sum size of mlocked
     519             :  * vma pages, as return value.
     520             :  * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
     521             :  * is also counted.
     522             :  * Return value: previously mlocked page counts
     523             :  */
     524           0 : static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
     525             :                 unsigned long start, size_t len)
     526             : {
     527             :         struct vm_area_struct *vma;
     528           0 :         unsigned long count = 0;
     529             : 
     530           0 :         if (mm == NULL)
     531           0 :                 mm = current->mm;
     532             : 
     533           0 :         vma = find_vma(mm, start);
     534           0 :         if (vma == NULL)
     535             :                 return 0;
     536             : 
     537           0 :         for (; vma ; vma = vma->vm_next) {
     538           0 :                 if (start >= vma->vm_end)
     539           0 :                         continue;
     540           0 :                 if (start + len <=  vma->vm_start)
     541             :                         break;
     542           0 :                 if (vma->vm_flags & VM_LOCKED) {
     543           0 :                         if (start > vma->vm_start)
     544           0 :                                 count -= (start - vma->vm_start);
     545           0 :                         if (start + len < vma->vm_end) {
     546           0 :                                 count += start + len - vma->vm_start;
     547           0 :                                 break;
     548             :                         }
     549           0 :                         count += vma->vm_end - vma->vm_start;
     550             :                 }
     551             :         }
     552             : 
     553           0 :         return count >> PAGE_SHIFT;
     554             : }
     555             : 
     556             : /*
     557             :  * convert get_user_pages() return value to posix mlock() error
     558             :  */
     559             : static int __mlock_posix_error_return(long retval)
     560             : {
     561           0 :         if (retval == -EFAULT)
     562             :                 retval = -ENOMEM;
     563           0 :         else if (retval == -ENOMEM)
     564           0 :                 retval = -EAGAIN;
     565           0 :         return retval;
     566             : }
     567             : 
     568           0 : static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
     569             : {
     570             :         unsigned long locked;
     571             :         unsigned long lock_limit;
     572           0 :         int error = -ENOMEM;
     573             : 
     574           0 :         start = untagged_addr(start);
     575             : 
     576           0 :         if (!can_do_mlock())
     577             :                 return -EPERM;
     578             : 
     579           0 :         len = PAGE_ALIGN(len + (offset_in_page(start)));
     580           0 :         start &= PAGE_MASK;
     581             : 
     582           0 :         lock_limit = rlimit(RLIMIT_MEMLOCK);
     583           0 :         lock_limit >>= PAGE_SHIFT;
     584           0 :         locked = len >> PAGE_SHIFT;
     585             : 
     586           0 :         if (mmap_write_lock_killable(current->mm))
     587             :                 return -EINTR;
     588             : 
     589           0 :         locked += current->mm->locked_vm;
     590           0 :         if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
     591             :                 /*
     592             :                  * It is possible that the regions requested intersect with
     593             :                  * previously mlocked areas, that part area in "mm->locked_vm"
     594             :                  * should not be counted to new mlock increment count. So check
     595             :                  * and adjust locked count if necessary.
     596             :                  */
     597           0 :                 locked -= count_mm_mlocked_page_nr(current->mm,
     598             :                                 start, len);
     599             :         }
     600             : 
     601             :         /* check against resource limits */
     602           0 :         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
     603           0 :                 error = apply_vma_lock_flags(start, len, flags);
     604             : 
     605           0 :         mmap_write_unlock(current->mm);
     606           0 :         if (error)
     607             :                 return error;
     608             : 
     609           0 :         error = __mm_populate(start, len, 0);
     610           0 :         if (error)
     611           0 :                 return __mlock_posix_error_return(error);
     612             :         return 0;
     613             : }
     614             : 
     615           0 : SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
     616             : {
     617           0 :         return do_mlock(start, len, VM_LOCKED);
     618             : }
     619             : 
     620           0 : SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
     621             : {
     622           0 :         vm_flags_t vm_flags = VM_LOCKED;
     623             : 
     624           0 :         if (flags & ~MLOCK_ONFAULT)
     625             :                 return -EINVAL;
     626             : 
     627           0 :         if (flags & MLOCK_ONFAULT)
     628           0 :                 vm_flags |= VM_LOCKONFAULT;
     629             : 
     630           0 :         return do_mlock(start, len, vm_flags);
     631             : }
     632             : 
     633           0 : SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
     634             : {
     635             :         int ret;
     636             : 
     637           0 :         start = untagged_addr(start);
     638             : 
     639           0 :         len = PAGE_ALIGN(len + (offset_in_page(start)));
     640           0 :         start &= PAGE_MASK;
     641             : 
     642           0 :         if (mmap_write_lock_killable(current->mm))
     643             :                 return -EINTR;
     644           0 :         ret = apply_vma_lock_flags(start, len, 0);
     645           0 :         mmap_write_unlock(current->mm);
     646             : 
     647           0 :         return ret;
     648             : }
     649             : 
     650             : /*
     651             :  * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall)
     652             :  * and translate into the appropriate modifications to mm->def_flags and/or the
     653             :  * flags for all current VMAs.
     654             :  *
     655             :  * There are a couple of subtleties with this.  If mlockall() is called multiple
     656             :  * times with different flags, the values do not necessarily stack.  If mlockall
     657             :  * is called once including the MCL_FUTURE flag and then a second time without
     658             :  * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
     659             :  */
     660           0 : static int apply_mlockall_flags(int flags)
     661             : {
     662           0 :         struct vm_area_struct *vma, *prev = NULL;
     663           0 :         vm_flags_t to_add = 0;
     664             : 
     665           0 :         current->mm->def_flags &= VM_LOCKED_CLEAR_MASK;
     666           0 :         if (flags & MCL_FUTURE) {
     667           0 :                 current->mm->def_flags |= VM_LOCKED;
     668             : 
     669           0 :                 if (flags & MCL_ONFAULT)
     670           0 :                         current->mm->def_flags |= VM_LOCKONFAULT;
     671             : 
     672           0 :                 if (!(flags & MCL_CURRENT))
     673             :                         goto out;
     674             :         }
     675             : 
     676           0 :         if (flags & MCL_CURRENT) {
     677           0 :                 to_add |= VM_LOCKED;
     678           0 :                 if (flags & MCL_ONFAULT)
     679           0 :                         to_add |= VM_LOCKONFAULT;
     680             :         }
     681             : 
     682           0 :         for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
     683             :                 vm_flags_t newflags;
     684             : 
     685           0 :                 newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
     686           0 :                 newflags |= to_add;
     687             : 
     688             :                 /* Ignore errors */
     689           0 :                 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
     690           0 :                 cond_resched();
     691             :         }
     692             : out:
     693           0 :         return 0;
     694             : }
     695             : 
     696           0 : SYSCALL_DEFINE1(mlockall, int, flags)
     697             : {
     698             :         unsigned long lock_limit;
     699             :         int ret;
     700             : 
     701           0 :         if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) ||
     702             :             flags == MCL_ONFAULT)
     703             :                 return -EINVAL;
     704             : 
     705           0 :         if (!can_do_mlock())
     706             :                 return -EPERM;
     707             : 
     708           0 :         lock_limit = rlimit(RLIMIT_MEMLOCK);
     709           0 :         lock_limit >>= PAGE_SHIFT;
     710             : 
     711           0 :         if (mmap_write_lock_killable(current->mm))
     712             :                 return -EINTR;
     713             : 
     714           0 :         ret = -ENOMEM;
     715           0 :         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
     716           0 :             capable(CAP_IPC_LOCK))
     717           0 :                 ret = apply_mlockall_flags(flags);
     718           0 :         mmap_write_unlock(current->mm);
     719           0 :         if (!ret && (flags & MCL_CURRENT))
     720           0 :                 mm_populate(0, TASK_SIZE);
     721             : 
     722           0 :         return ret;
     723             : }
     724             : 
     725           0 : SYSCALL_DEFINE0(munlockall)
     726             : {
     727             :         int ret;
     728             : 
     729           0 :         if (mmap_write_lock_killable(current->mm))
     730             :                 return -EINTR;
     731           0 :         ret = apply_mlockall_flags(0);
     732           0 :         mmap_write_unlock(current->mm);
     733           0 :         return ret;
     734             : }
     735             : 
     736             : /*
     737             :  * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
     738             :  * shm segments) get accounted against the user_struct instead.
     739             :  */
     740             : static DEFINE_SPINLOCK(shmlock_user_lock);
     741             : 
     742           0 : int user_shm_lock(size_t size, struct ucounts *ucounts)
     743             : {
     744             :         unsigned long lock_limit, locked;
     745             :         long memlock;
     746           0 :         int allowed = 0;
     747             : 
     748           0 :         locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
     749           0 :         lock_limit = rlimit(RLIMIT_MEMLOCK);
     750           0 :         if (lock_limit != RLIM_INFINITY)
     751           0 :                 lock_limit >>= PAGE_SHIFT;
     752           0 :         spin_lock(&shmlock_user_lock);
     753           0 :         memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
     754             : 
     755           0 :         if ((memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) {
     756           0 :                 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
     757           0 :                 goto out;
     758             :         }
     759           0 :         if (!get_ucounts(ucounts)) {
     760           0 :                 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
     761           0 :                 allowed = 0;
     762           0 :                 goto out;
     763             :         }
     764             :         allowed = 1;
     765             : out:
     766           0 :         spin_unlock(&shmlock_user_lock);
     767           0 :         return allowed;
     768             : }
     769             : 
     770           0 : void user_shm_unlock(size_t size, struct ucounts *ucounts)
     771             : {
     772           0 :         spin_lock(&shmlock_user_lock);
     773           0 :         dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
     774           0 :         spin_unlock(&shmlock_user_lock);
     775           0 :         put_ucounts(ucounts);
     776           0 : }

Generated by: LCOV version 1.14