LCOV - code coverage report
Current view: top level - mm - mprotect.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 201 0.0 %
Date: 2022-12-09 01:23:36 Functions: 0 8 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  mm/mprotect.c
       4             :  *
       5             :  *  (C) Copyright 1994 Linus Torvalds
       6             :  *  (C) Copyright 2002 Christoph Hellwig
       7             :  *
       8             :  *  Address space accounting code       <alan@lxorguk.ukuu.org.uk>
       9             :  *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
      10             :  */
      11             : 
      12             : #include <linux/pagewalk.h>
      13             : #include <linux/hugetlb.h>
      14             : #include <linux/shm.h>
      15             : #include <linux/mman.h>
      16             : #include <linux/fs.h>
      17             : #include <linux/highmem.h>
      18             : #include <linux/security.h>
      19             : #include <linux/mempolicy.h>
      20             : #include <linux/personality.h>
      21             : #include <linux/syscalls.h>
      22             : #include <linux/swap.h>
      23             : #include <linux/swapops.h>
      24             : #include <linux/mmu_notifier.h>
      25             : #include <linux/migrate.h>
      26             : #include <linux/perf_event.h>
      27             : #include <linux/pkeys.h>
      28             : #include <linux/ksm.h>
      29             : #include <linux/uaccess.h>
      30             : #include <linux/mm_inline.h>
      31             : #include <linux/pgtable.h>
      32             : #include <linux/sched/sysctl.h>
      33             : #include <asm/cacheflush.h>
      34             : #include <asm/mmu_context.h>
      35             : #include <asm/tlbflush.h>
      36             : 
      37             : #include "internal.h"
      38             : 
      39           0 : static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
      40             :                 unsigned long addr, unsigned long end, pgprot_t newprot,
      41             :                 unsigned long cp_flags)
      42             : {
      43             :         pte_t *pte, oldpte;
      44             :         spinlock_t *ptl;
      45           0 :         unsigned long pages = 0;
      46           0 :         int target_node = NUMA_NO_NODE;
      47           0 :         bool dirty_accountable = cp_flags & MM_CP_DIRTY_ACCT;
      48           0 :         bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
      49           0 :         bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
      50           0 :         bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
      51             : 
      52             :         /*
      53             :          * Can be called with only the mmap_lock for reading by
      54             :          * prot_numa so we must check the pmd isn't constantly
      55             :          * changing from under us from pmd_none to pmd_trans_huge
      56             :          * and/or the other way around.
      57             :          */
      58           0 :         if (pmd_trans_unstable(pmd))
      59             :                 return 0;
      60             : 
      61             :         /*
      62             :          * The pmd points to a regular pte so the pmd can't change
      63             :          * from under us even if the mmap_lock is only hold for
      64             :          * reading.
      65             :          */
      66           0 :         pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
      67             : 
      68             :         /* Get target node for single threaded private VMAs */
      69           0 :         if (prot_numa && !(vma->vm_flags & VM_SHARED) &&
      70           0 :             atomic_read(&vma->vm_mm->mm_users) == 1)
      71           0 :                 target_node = numa_node_id();
      72             : 
      73           0 :         flush_tlb_batched_pending(vma->vm_mm);
      74             :         arch_enter_lazy_mmu_mode();
      75             :         do {
      76           0 :                 oldpte = *pte;
      77           0 :                 if (pte_present(oldpte)) {
      78             :                         pte_t ptent;
      79           0 :                         bool preserve_write = prot_numa && pte_write(oldpte);
      80             : 
      81             :                         /*
      82             :                          * Avoid trapping faults against the zero or KSM
      83             :                          * pages. See similar comment in change_huge_pmd.
      84             :                          */
      85           0 :                         if (prot_numa) {
      86             :                                 struct page *page;
      87             :                                 int nid;
      88             : 
      89             :                                 /* Avoid TLB flush if possible */
      90           0 :                                 if (pte_protnone(oldpte))
      91             :                                         continue;
      92             : 
      93           0 :                                 page = vm_normal_page(vma, addr, oldpte);
      94           0 :                                 if (!page || PageKsm(page))
      95           0 :                                         continue;
      96             : 
      97             :                                 /* Also skip shared copy-on-write pages */
      98           0 :                                 if (is_cow_mapping(vma->vm_flags) &&
      99           0 :                                     page_count(page) != 1)
     100           0 :                                         continue;
     101             : 
     102             :                                 /*
     103             :                                  * While migration can move some dirty pages,
     104             :                                  * it cannot move them all from MIGRATE_ASYNC
     105             :                                  * context.
     106             :                                  */
     107           0 :                                 if (page_is_file_lru(page) && PageDirty(page))
     108           0 :                                         continue;
     109             : 
     110             :                                 /*
     111             :                                  * Don't mess with PTEs if page is already on the node
     112             :                                  * a single-threaded process is running on.
     113             :                                  */
     114           0 :                                 nid = page_to_nid(page);
     115           0 :                                 if (target_node == nid)
     116           0 :                                         continue;
     117             : 
     118             :                                 /*
     119             :                                  * Skip scanning top tier node if normal numa
     120             :                                  * balancing is disabled
     121             :                                  */
     122             :                                 if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
     123           0 :                                     node_is_toptier(nid))
     124           0 :                                         continue;
     125             :                         }
     126             : 
     127           0 :                         oldpte = ptep_modify_prot_start(vma, addr, pte);
     128           0 :                         ptent = pte_modify(oldpte, newprot);
     129           0 :                         if (preserve_write)
     130             :                                 ptent = pte_mk_savedwrite(ptent);
     131             : 
     132           0 :                         if (uffd_wp) {
     133             :                                 ptent = pte_wrprotect(ptent);
     134             :                                 ptent = pte_mkuffd_wp(ptent);
     135             :                         } else if (uffd_wp_resolve) {
     136             :                                 /*
     137             :                                  * Leave the write bit to be handled
     138             :                                  * by PF interrupt handler, then
     139             :                                  * things like COW could be properly
     140             :                                  * handled.
     141             :                                  */
     142             :                                 ptent = pte_clear_uffd_wp(ptent);
     143             :                         }
     144             : 
     145             :                         /* Avoid taking write faults for known dirty pages */
     146           0 :                         if (dirty_accountable && pte_dirty(ptent) &&
     147           0 :                                         (pte_soft_dirty(ptent) ||
     148             :                                          !(vma->vm_flags & VM_SOFTDIRTY))) {
     149             :                                 ptent = pte_mkwrite(ptent);
     150             :                         }
     151           0 :                         ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
     152           0 :                         pages++;
     153           0 :                 } else if (is_swap_pte(oldpte)) {
     154           0 :                         swp_entry_t entry = pte_to_swp_entry(oldpte);
     155             :                         pte_t newpte;
     156             : 
     157           0 :                         if (is_writable_migration_entry(entry)) {
     158             :                                 /*
     159             :                                  * A protection check is difficult so
     160             :                                  * just be safe and disable write
     161             :                                  */
     162           0 :                                 entry = make_readable_migration_entry(
     163             :                                                         swp_offset(entry));
     164           0 :                                 newpte = swp_entry_to_pte(entry);
     165           0 :                                 if (pte_swp_soft_dirty(oldpte))
     166             :                                         newpte = pte_swp_mksoft_dirty(newpte);
     167             :                                 if (pte_swp_uffd_wp(oldpte))
     168             :                                         newpte = pte_swp_mkuffd_wp(newpte);
     169             :                         } else if (is_writable_device_private_entry(entry)) {
     170             :                                 /*
     171             :                                  * We do not preserve soft-dirtiness. See
     172             :                                  * copy_one_pte() for explanation.
     173             :                                  */
     174             :                                 entry = make_readable_device_private_entry(
     175             :                                                         swp_offset(entry));
     176             :                                 newpte = swp_entry_to_pte(entry);
     177             :                                 if (pte_swp_uffd_wp(oldpte))
     178             :                                         newpte = pte_swp_mkuffd_wp(newpte);
     179             :                         } else if (is_writable_device_exclusive_entry(entry)) {
     180             :                                 entry = make_readable_device_exclusive_entry(
     181             :                                                         swp_offset(entry));
     182             :                                 newpte = swp_entry_to_pte(entry);
     183             :                                 if (pte_swp_soft_dirty(oldpte))
     184             :                                         newpte = pte_swp_mksoft_dirty(newpte);
     185             :                                 if (pte_swp_uffd_wp(oldpte))
     186             :                                         newpte = pte_swp_mkuffd_wp(newpte);
     187             :                         } else {
     188             :                                 newpte = oldpte;
     189             :                         }
     190             : 
     191             :                         if (uffd_wp)
     192             :                                 newpte = pte_swp_mkuffd_wp(newpte);
     193             :                         else if (uffd_wp_resolve)
     194             :                                 newpte = pte_swp_clear_uffd_wp(newpte);
     195             : 
     196           0 :                         if (!pte_same(oldpte, newpte)) {
     197           0 :                                 set_pte_at(vma->vm_mm, addr, pte, newpte);
     198           0 :                                 pages++;
     199             :                         }
     200             :                 }
     201           0 :         } while (pte++, addr += PAGE_SIZE, addr != end);
     202             :         arch_leave_lazy_mmu_mode();
     203           0 :         pte_unmap_unlock(pte - 1, ptl);
     204             : 
     205             :         return pages;
     206             : }
     207             : 
     208             : /*
     209             :  * Used when setting automatic NUMA hinting protection where it is
     210             :  * critical that a numa hinting PMD is not confused with a bad PMD.
     211             :  */
     212             : static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
     213             : {
     214           0 :         pmd_t pmdval = pmd_read_atomic(pmd);
     215             : 
     216             :         /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
     217             : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     218             :         barrier();
     219             : #endif
     220             : 
     221           0 :         if (pmd_none(pmdval))
     222             :                 return 1;
     223           0 :         if (pmd_trans_huge(pmdval))
     224             :                 return 0;
     225           0 :         if (unlikely(pmd_bad(pmdval))) {
     226           0 :                 pmd_clear_bad(pmd);
     227             :                 return 1;
     228             :         }
     229             : 
     230             :         return 0;
     231             : }
     232             : 
     233           0 : static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
     234             :                 pud_t *pud, unsigned long addr, unsigned long end,
     235             :                 pgprot_t newprot, unsigned long cp_flags)
     236             : {
     237             :         pmd_t *pmd;
     238             :         unsigned long next;
     239           0 :         unsigned long pages = 0;
     240           0 :         unsigned long nr_huge_updates = 0;
     241             :         struct mmu_notifier_range range;
     242             : 
     243           0 :         range.start = 0;
     244             : 
     245           0 :         pmd = pmd_offset(pud, addr);
     246             :         do {
     247             :                 unsigned long this_pages;
     248             : 
     249           0 :                 next = pmd_addr_end(addr, end);
     250             : 
     251             :                 /*
     252             :                  * Automatic NUMA balancing walks the tables with mmap_lock
     253             :                  * held for read. It's possible a parallel update to occur
     254             :                  * between pmd_trans_huge() and a pmd_none_or_clear_bad()
     255             :                  * check leading to a false positive and clearing.
     256             :                  * Hence, it's necessary to atomically read the PMD value
     257             :                  * for all the checks.
     258             :                  */
     259           0 :                 if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) &&
     260           0 :                      pmd_none_or_clear_bad_unless_trans_huge(pmd))
     261             :                         goto next;
     262             : 
     263             :                 /* invoke the mmu notifier if the pmd is populated */
     264             :                 if (!range.start) {
     265             :                         mmu_notifier_range_init(&range,
     266             :                                 MMU_NOTIFY_PROTECTION_VMA, 0,
     267             :                                 vma, vma->vm_mm, addr, end);
     268             :                         mmu_notifier_invalidate_range_start(&range);
     269             :                 }
     270             : 
     271           0 :                 if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
     272             :                         if (next - addr != HPAGE_PMD_SIZE) {
     273             :                                 __split_huge_pmd(vma, pmd, addr, false, NULL);
     274             :                         } else {
     275             :                                 int nr_ptes = change_huge_pmd(vma, pmd, addr,
     276             :                                                               newprot, cp_flags);
     277             : 
     278             :                                 if (nr_ptes) {
     279             :                                         if (nr_ptes == HPAGE_PMD_NR) {
     280             :                                                 pages += HPAGE_PMD_NR;
     281             :                                                 nr_huge_updates++;
     282             :                                         }
     283             : 
     284             :                                         /* huge pmd was handled */
     285             :                                         goto next;
     286             :                                 }
     287             :                         }
     288             :                         /* fall through, the trans huge pmd just split */
     289             :                 }
     290           0 :                 this_pages = change_pte_range(vma, pmd, addr, next, newprot,
     291             :                                               cp_flags);
     292           0 :                 pages += this_pages;
     293             : next:
     294           0 :                 cond_resched();
     295           0 :         } while (pmd++, addr = next, addr != end);
     296             : 
     297             :         if (range.start)
     298             :                 mmu_notifier_invalidate_range_end(&range);
     299             : 
     300             :         if (nr_huge_updates)
     301             :                 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
     302           0 :         return pages;
     303             : }
     304             : 
     305           0 : static inline unsigned long change_pud_range(struct vm_area_struct *vma,
     306             :                 p4d_t *p4d, unsigned long addr, unsigned long end,
     307             :                 pgprot_t newprot, unsigned long cp_flags)
     308             : {
     309             :         pud_t *pud;
     310             :         unsigned long next;
     311           0 :         unsigned long pages = 0;
     312             : 
     313           0 :         pud = pud_offset(p4d, addr);
     314             :         do {
     315           0 :                 next = pud_addr_end(addr, end);
     316           0 :                 if (pud_none_or_clear_bad(pud))
     317           0 :                         continue;
     318           0 :                 pages += change_pmd_range(vma, pud, addr, next, newprot,
     319             :                                           cp_flags);
     320           0 :         } while (pud++, addr = next, addr != end);
     321             : 
     322           0 :         return pages;
     323             : }
     324             : 
     325             : static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
     326             :                 pgd_t *pgd, unsigned long addr, unsigned long end,
     327             :                 pgprot_t newprot, unsigned long cp_flags)
     328             : {
     329             :         p4d_t *p4d;
     330             :         unsigned long next;
     331           0 :         unsigned long pages = 0;
     332             : 
     333           0 :         p4d = p4d_offset(pgd, addr);
     334             :         do {
     335           0 :                 next = p4d_addr_end(addr, end);
     336           0 :                 if (p4d_none_or_clear_bad(p4d))
     337             :                         continue;
     338           0 :                 pages += change_pud_range(vma, p4d, addr, next, newprot,
     339             :                                           cp_flags);
     340           0 :         } while (p4d++, addr = next, addr != end);
     341             : 
     342             :         return pages;
     343             : }
     344             : 
     345           0 : static unsigned long change_protection_range(struct vm_area_struct *vma,
     346             :                 unsigned long addr, unsigned long end, pgprot_t newprot,
     347             :                 unsigned long cp_flags)
     348             : {
     349           0 :         struct mm_struct *mm = vma->vm_mm;
     350             :         pgd_t *pgd;
     351             :         unsigned long next;
     352           0 :         unsigned long start = addr;
     353           0 :         unsigned long pages = 0;
     354             : 
     355           0 :         BUG_ON(addr >= end);
     356           0 :         pgd = pgd_offset(mm, addr);
     357           0 :         flush_cache_range(vma, addr, end);
     358             :         inc_tlb_flush_pending(mm);
     359             :         do {
     360           0 :                 next = pgd_addr_end(addr, end);
     361           0 :                 if (pgd_none_or_clear_bad(pgd))
     362             :                         continue;
     363           0 :                 pages += change_p4d_range(vma, pgd, addr, next, newprot,
     364             :                                           cp_flags);
     365           0 :         } while (pgd++, addr = next, addr != end);
     366             : 
     367             :         /* Only flush the TLB if we actually modified any entries: */
     368           0 :         if (pages)
     369           0 :                 flush_tlb_range(vma, start, end);
     370           0 :         dec_tlb_flush_pending(mm);
     371             : 
     372           0 :         return pages;
     373             : }
     374             : 
     375           0 : unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
     376             :                        unsigned long end, pgprot_t newprot,
     377             :                        unsigned long cp_flags)
     378             : {
     379             :         unsigned long pages;
     380             : 
     381           0 :         BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL);
     382             : 
     383           0 :         if (is_vm_hugetlb_page(vma))
     384             :                 pages = hugetlb_change_protection(vma, start, end, newprot);
     385             :         else
     386           0 :                 pages = change_protection_range(vma, start, end, newprot,
     387             :                                                 cp_flags);
     388             : 
     389           0 :         return pages;
     390             : }
     391             : 
     392             : static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
     393             :                                unsigned long next, struct mm_walk *walk)
     394             : {
     395             :         return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
     396             :                 0 : -EACCES;
     397             : }
     398             : 
     399             : static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
     400             :                                    unsigned long addr, unsigned long next,
     401             :                                    struct mm_walk *walk)
     402             : {
     403             :         return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
     404             :                 0 : -EACCES;
     405             : }
     406             : 
     407             : static int prot_none_test(unsigned long addr, unsigned long next,
     408             :                           struct mm_walk *walk)
     409             : {
     410             :         return 0;
     411             : }
     412             : 
     413             : static const struct mm_walk_ops prot_none_walk_ops = {
     414             :         .pte_entry              = prot_none_pte_entry,
     415             :         .hugetlb_entry          = prot_none_hugetlb_entry,
     416             :         .test_walk              = prot_none_test,
     417             : };
     418             : 
     419             : int
     420           0 : mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
     421             :         unsigned long start, unsigned long end, unsigned long newflags)
     422             : {
     423           0 :         struct mm_struct *mm = vma->vm_mm;
     424           0 :         unsigned long oldflags = vma->vm_flags;
     425           0 :         long nrpages = (end - start) >> PAGE_SHIFT;
     426           0 :         unsigned long charged = 0;
     427             :         pgoff_t pgoff;
     428             :         int error;
     429           0 :         int dirty_accountable = 0;
     430             : 
     431           0 :         if (newflags == oldflags) {
     432           0 :                 *pprev = vma;
     433           0 :                 return 0;
     434             :         }
     435             : 
     436             :         /*
     437             :          * Do PROT_NONE PFN permission checks here when we can still
     438             :          * bail out without undoing a lot of state. This is a rather
     439             :          * uncommon case, so doesn't need to be very optimized.
     440             :          */
     441             :         if (arch_has_pfn_modify_check() &&
     442             :             (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
     443             :             (newflags & VM_ACCESS_FLAGS) == 0) {
     444             :                 pgprot_t new_pgprot = vm_get_page_prot(newflags);
     445             : 
     446             :                 error = walk_page_range(current->mm, start, end,
     447             :                                 &prot_none_walk_ops, &new_pgprot);
     448             :                 if (error)
     449             :                         return error;
     450             :         }
     451             : 
     452             :         /*
     453             :          * If we make a private mapping writable we increase our commit;
     454             :          * but (without finer accounting) cannot reduce our commit if we
     455             :          * make it unwritable again. hugetlb mapping were accounted for
     456             :          * even if read-only so there is no need to account for them here
     457             :          */
     458           0 :         if (newflags & VM_WRITE) {
     459             :                 /* Check space limits when area turns into data. */
     460           0 :                 if (!may_expand_vm(mm, newflags, nrpages) &&
     461           0 :                                 may_expand_vm(mm, oldflags, nrpages))
     462             :                         return -ENOMEM;
     463           0 :                 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
     464             :                                                 VM_SHARED|VM_NORESERVE))) {
     465           0 :                         charged = nrpages;
     466           0 :                         if (security_vm_enough_memory_mm(mm, charged))
     467             :                                 return -ENOMEM;
     468           0 :                         newflags |= VM_ACCOUNT;
     469             :                 }
     470             :         }
     471             : 
     472             :         /*
     473             :          * First try to merge with previous and/or next vma.
     474             :          */
     475           0 :         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
     476           0 :         *pprev = vma_merge(mm, *pprev, start, end, newflags,
     477             :                            vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
     478             :                            vma->vm_userfaultfd_ctx, anon_vma_name(vma));
     479           0 :         if (*pprev) {
     480             :                 vma = *pprev;
     481             :                 VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
     482             :                 goto success;
     483             :         }
     484             : 
     485           0 :         *pprev = vma;
     486             : 
     487           0 :         if (start != vma->vm_start) {
     488           0 :                 error = split_vma(mm, vma, start, 1);
     489           0 :                 if (error)
     490             :                         goto fail;
     491             :         }
     492             : 
     493           0 :         if (end != vma->vm_end) {
     494           0 :                 error = split_vma(mm, vma, end, 0);
     495           0 :                 if (error)
     496             :                         goto fail;
     497             :         }
     498             : 
     499             : success:
     500             :         /*
     501             :          * vm_flags and vm_page_prot are protected by the mmap_lock
     502             :          * held in write mode.
     503             :          */
     504           0 :         vma->vm_flags = newflags;
     505           0 :         dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot);
     506           0 :         vma_set_page_prot(vma);
     507             : 
     508           0 :         change_protection(vma, start, end, vma->vm_page_prot,
     509             :                           dirty_accountable ? MM_CP_DIRTY_ACCT : 0);
     510             : 
     511             :         /*
     512             :          * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
     513             :          * fault on access.
     514             :          */
     515           0 :         if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED &&
     516           0 :                         (newflags & VM_WRITE)) {
     517           0 :                 populate_vma_page_range(vma, start, end, NULL);
     518             :         }
     519             : 
     520           0 :         vm_stat_account(mm, oldflags, -nrpages);
     521           0 :         vm_stat_account(mm, newflags, nrpages);
     522           0 :         perf_event_mmap(vma);
     523           0 :         return 0;
     524             : 
     525             : fail:
     526           0 :         vm_unacct_memory(charged);
     527           0 :         return error;
     528             : }
     529             : 
     530             : /*
     531             :  * pkey==-1 when doing a legacy mprotect()
     532             :  */
     533           0 : static int do_mprotect_pkey(unsigned long start, size_t len,
     534             :                 unsigned long prot, int pkey)
     535             : {
     536             :         unsigned long nstart, end, tmp, reqprot;
     537             :         struct vm_area_struct *vma, *prev;
     538           0 :         int error = -EINVAL;
     539           0 :         const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
     540           0 :         const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
     541           0 :                                 (prot & PROT_READ);
     542             : 
     543           0 :         start = untagged_addr(start);
     544             : 
     545           0 :         prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
     546           0 :         if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
     547             :                 return -EINVAL;
     548             : 
     549           0 :         if (start & ~PAGE_MASK)
     550             :                 return -EINVAL;
     551           0 :         if (!len)
     552             :                 return 0;
     553           0 :         len = PAGE_ALIGN(len);
     554           0 :         end = start + len;
     555           0 :         if (end <= start)
     556             :                 return -ENOMEM;
     557           0 :         if (!arch_validate_prot(prot, start))
     558             :                 return -EINVAL;
     559             : 
     560           0 :         reqprot = prot;
     561             : 
     562           0 :         if (mmap_write_lock_killable(current->mm))
     563             :                 return -EINTR;
     564             : 
     565             :         /*
     566             :          * If userspace did not allocate the pkey, do not let
     567             :          * them use it here.
     568             :          */
     569           0 :         error = -EINVAL;
     570           0 :         if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey))
     571             :                 goto out;
     572             : 
     573           0 :         vma = find_vma(current->mm, start);
     574           0 :         error = -ENOMEM;
     575           0 :         if (!vma)
     576             :                 goto out;
     577             : 
     578           0 :         if (unlikely(grows & PROT_GROWSDOWN)) {
     579           0 :                 if (vma->vm_start >= end)
     580             :                         goto out;
     581           0 :                 start = vma->vm_start;
     582           0 :                 error = -EINVAL;
     583           0 :                 if (!(vma->vm_flags & VM_GROWSDOWN))
     584             :                         goto out;
     585             :         } else {
     586           0 :                 if (vma->vm_start > start)
     587             :                         goto out;
     588           0 :                 if (unlikely(grows & PROT_GROWSUP)) {
     589             :                         end = vma->vm_end;
     590             :                         error = -EINVAL;
     591             :                         if (!(vma->vm_flags & VM_GROWSUP))
     592             :                                 goto out;
     593             :                 }
     594             :         }
     595             : 
     596           0 :         if (start > vma->vm_start)
     597           0 :                 prev = vma;
     598             :         else
     599           0 :                 prev = vma->vm_prev;
     600             : 
     601             :         for (nstart = start ; ; ) {
     602             :                 unsigned long mask_off_old_flags;
     603             :                 unsigned long newflags;
     604             :                 int new_vma_pkey;
     605             : 
     606             :                 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
     607             : 
     608             :                 /* Does the application expect PROT_READ to imply PROT_EXEC */
     609           0 :                 if (rier && (vma->vm_flags & VM_MAYEXEC))
     610           0 :                         prot |= PROT_EXEC;
     611             : 
     612             :                 /*
     613             :                  * Each mprotect() call explicitly passes r/w/x permissions.
     614             :                  * If a permission is not passed to mprotect(), it must be
     615             :                  * cleared from the VMA.
     616             :                  */
     617           0 :                 mask_off_old_flags = VM_READ | VM_WRITE | VM_EXEC |
     618             :                                         VM_FLAGS_CLEAR;
     619             : 
     620           0 :                 new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey);
     621           0 :                 newflags = calc_vm_prot_bits(prot, new_vma_pkey);
     622           0 :                 newflags |= (vma->vm_flags & ~mask_off_old_flags);
     623             : 
     624             :                 /* newflags >> 4 shift VM_MAY% in place of VM_% */
     625           0 :                 if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) {
     626             :                         error = -EACCES;
     627             :                         goto out;
     628             :                 }
     629             : 
     630             :                 /* Allow architectures to sanity-check the new flags */
     631           0 :                 if (!arch_validate_flags(newflags)) {
     632             :                         error = -EINVAL;
     633             :                         goto out;
     634             :                 }
     635             : 
     636           0 :                 error = security_file_mprotect(vma, reqprot, prot);
     637             :                 if (error)
     638             :                         goto out;
     639             : 
     640           0 :                 tmp = vma->vm_end;
     641           0 :                 if (tmp > end)
     642           0 :                         tmp = end;
     643             : 
     644           0 :                 if (vma->vm_ops && vma->vm_ops->mprotect) {
     645           0 :                         error = vma->vm_ops->mprotect(vma, nstart, tmp, newflags);
     646           0 :                         if (error)
     647             :                                 goto out;
     648             :                 }
     649             : 
     650           0 :                 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
     651           0 :                 if (error)
     652             :                         goto out;
     653             : 
     654           0 :                 nstart = tmp;
     655             : 
     656           0 :                 if (nstart < prev->vm_end)
     657           0 :                         nstart = prev->vm_end;
     658           0 :                 if (nstart >= end)
     659             :                         goto out;
     660             : 
     661           0 :                 vma = prev->vm_next;
     662           0 :                 if (!vma || vma->vm_start != nstart) {
     663             :                         error = -ENOMEM;
     664             :                         goto out;
     665             :                 }
     666             :                 prot = reqprot;
     667             :         }
     668             : out:
     669           0 :         mmap_write_unlock(current->mm);
     670           0 :         return error;
     671             : }
     672             : 
     673           0 : SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
     674             :                 unsigned long, prot)
     675             : {
     676           0 :         return do_mprotect_pkey(start, len, prot, -1);
     677             : }
     678             : 
     679             : #ifdef CONFIG_ARCH_HAS_PKEYS
     680             : 
     681             : SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len,
     682             :                 unsigned long, prot, int, pkey)
     683             : {
     684             :         return do_mprotect_pkey(start, len, prot, pkey);
     685             : }
     686             : 
     687             : SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val)
     688             : {
     689             :         int pkey;
     690             :         int ret;
     691             : 
     692             :         /* No flags supported yet. */
     693             :         if (flags)
     694             :                 return -EINVAL;
     695             :         /* check for unsupported init values */
     696             :         if (init_val & ~PKEY_ACCESS_MASK)
     697             :                 return -EINVAL;
     698             : 
     699             :         mmap_write_lock(current->mm);
     700             :         pkey = mm_pkey_alloc(current->mm);
     701             : 
     702             :         ret = -ENOSPC;
     703             :         if (pkey == -1)
     704             :                 goto out;
     705             : 
     706             :         ret = arch_set_user_pkey_access(current, pkey, init_val);
     707             :         if (ret) {
     708             :                 mm_pkey_free(current->mm, pkey);
     709             :                 goto out;
     710             :         }
     711             :         ret = pkey;
     712             : out:
     713             :         mmap_write_unlock(current->mm);
     714             :         return ret;
     715             : }
     716             : 
     717             : SYSCALL_DEFINE1(pkey_free, int, pkey)
     718             : {
     719             :         int ret;
     720             : 
     721             :         mmap_write_lock(current->mm);
     722             :         ret = mm_pkey_free(current->mm, pkey);
     723             :         mmap_write_unlock(current->mm);
     724             : 
     725             :         /*
     726             :          * We could provide warnings or errors if any VMA still
     727             :          * has the pkey set here.
     728             :          */
     729             :         return ret;
     730             : }
     731             : 
     732             : #endif /* CONFIG_ARCH_HAS_PKEYS */

Generated by: LCOV version 1.14